首页 / PYTHON / python – Keras 3D Convolution:检查模型输入时出错:预期covolution3d_input_1有5个维度,但得到数组形状(1,90,100,100)
python – Keras 3D Convolution:检查模型输入时出错:预期covolution3d_input_1有5个维度,但得到数组形状(1,90,100,100)
内容导读
互联网集市收集整理的这篇技术教程文章主要介绍了python – Keras 3D Convolution:检查模型输入时出错:预期covolution3d_input_1有5个维度,但得到数组形状(1,90,100,100),小编现在分享给大家,供广大互联网技能从业者学习和参考。文章包含9237字,纯文字阅读大概需要14分钟。
内容图文
![python – Keras 3D Convolution:检查模型输入时出错:预期covolution3d_input_1有5个维度,但得到数组形状(1,90,100,100)](/upload/InfoBanner/zyjiaocheng/781/0e893d7ab8bf41239f7cc23e4a44b7ed.jpg)
该系统的目标是对发音单词的视频输入进行分类.每个样本都是一组90,100×100,灰度(1色通道帧,尺寸为(1,90,100,100).以前,训练数据直接加载到内存并训练,但有效,但不是为了解决这个问题,系统被修改为预处理并将训练数据保存到HDF5文件中,然后使用生成器将训练数据拟合到模型中以进行按需加载.但是,由于此修改,现在会生成以下错误:
Exception: Error when checking model input: expected
convolution3d_input_1 to have 5 dimensions, but got array with shape
(1, 90, 100, 100)
这是系统的代码:
from keras import backend as K
from keras.callbacks import Callback
from keras.constraints import maxnorm
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Convolution3D
from keras.layers.convolutional import MaxPooling3D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.utils.io_utils import HDF5Matrix
from pprint import pprint
from sklearn.utils import shuffle
K.set_image_dim_ordering("th")
import cv2
import h5py
import json
import os
import sys
import numpy as np
class OpticalSpeechRecognizer(object):
def __init__(self, rows, columns, frames_per_sequence):
self.rows = rows
self.columns = columns
self.frames_per_sequence = frames_per_sequence
self.osr = None
def train_osr_model(self, training_save_fn):
""" Train the optical speech recognizer
"""
print "\nTraining OSR"
validation_ratio = 0.3
training_sequence_generator = self.generate_training_sequences(training_save_fn)
validation_sequence_generator = self.generate_training_sequences(training_save_fn, validation_ratio=validation_ratio)
training_save_file = h5py.File(training_save_fn, "r")
sample_count = training_save_file.attrs["sample_count"]
pbi = PrintBatchInfo()
self.osr.fit_generator(generator=training_sequence_generator,
validation_data=validation_sequence_generator,
samples_per_epoch=sample_count,
nb_val_samples=int(round(validation_ratio*sample_count)),
nb_epoch=10,
verbose=2,
callbacks=[pbi],
class_weight=None,
nb_worker=1)
def generate_osr_model(self, training_save_fn):
""" Builds the optical speech recognizer model
"""
print "".join(["Generating OSR model\n",
"-"*40])
training_save_file = h5py.File(training_save_fn, "r")
osr = Sequential()
print " - Adding convolution layers"
osr.add(Convolution3D(nb_filter=32,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
input_shape=(1, self.frames_per_sequence, self.rows, self.columns),
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Convolution3D(nb_filter=32,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Convolution3D(nb_filter=64,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Convolution3D(nb_filter=64,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Convolution3D(nb_filter=128,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Convolution3D(nb_filter=128,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Flatten())
osr.add(Dropout(0.2))
print " - Adding fully connected layers"
osr.add(Dense(output_dim=128,
init="normal",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Dense(output_dim=64,
init="normal",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Dense(output_dim=32,
init="normal",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Dense(output_dim=len(training_save_file.attrs["training_classes"].split(",")),
init="normal",
activation="softmax"))
print " - Compiling model"
sgd = SGD(lr=0.01,
decay=1e-6,
momentum=0.9,
nesterov=True)
osr.compile(loss="categorical_crossentropy",
optimizer=sgd,
metrics=["accuracy"])
self.osr = osr
print " * OSR MODEL GENERATED * "
def generate_training_sequences(self, training_save_fn, validation_ratio=0):
while True:
training_save_file = h5py.File(training_save_fn, "r")
sample_count = int(training_save_file.attrs["sample_count"])
# generate sequences for validation
if validation_ratio:
validation_sample_count = int(round(validation_ratio*sample_count))
validation_sample_idxs = np.random.randint(low=0, high=sample_count, size=validation_sample_count)
for idx in validation_sample_idxs:
X = training_save_file["X"][idx]
Y = training_save_file["Y"][idx]
yield (X, Y)
# generate sequences for training
else:
for idx in range(0, sample_count):
X = training_save_file["X"][idx]
Y = training_save_file["Y"][idx]
yield (X, Y)
def process_training_data(self, config_file, training_save_fn):
""" Preprocesses training data and saves them into an HDF5 file
"""
# load training metadata from config file
training_metadata = {}
training_classes = []
with open(config_file) as training_config:
training_metadata = json.load(training_config)
training_classes = sorted(list(training_metadata.keys()))
print "".join(["\n",
"Found {0} training classes!\n".format(len(training_classes)),
"-"*40])
for class_label, training_class in enumerate(training_classes):
print "{0:<4d} {1:<10s} {2:<30s}".format(class_label, training_class, training_metadata[training_class])
print ""
# count number of samples
sample_count = 0
sample_count_by_class = [0]*len(training_classes)
for class_label, training_class in enumerate(training_classes):
# get training class sequeunce paths
training_class_data_path = training_metadata[training_class]
training_class_sequence_paths = [os.path.join(training_class_data_path, file_name)
for file_name in os.listdir(training_class_data_path)
if (os.path.isfile(os.path.join(training_class_data_path, file_name))
and ".mov" in file_name)]
# update sample count
sample_count += len(training_class_sequence_paths)
sample_count_by_class[class_label] = len(training_class_sequence_paths)
print "".join(["\n",
"Found {0} training samples!\n".format(sample_count),
"-"*40])
for class_label, training_class in enumerate(training_classes):
print "{0:<4d} {1:<10s} {2:<6d}".format(class_label, training_class, sample_count_by_class[class_label])
print ""
# initialize HDF5 save file, but clear older duplicate first if it exists
try:
print "Saved file \"{0}\" already exists! Overwriting previous saved file.\n".format(training_save_fn)
os.remove(training_save_fn)
except OSError:
pass
training_save_file = h5py.File(training_save_fn, "w")
training_save_file.attrs["training_classes"] = np.string_(",".join(training_classes))
training_save_file.attrs["sample_count"] = sample_count
x_training_dataset = training_save_file.create_dataset("X",
shape=(sample_count, 1, self.frames_per_sequence, self.rows, self.columns),
dtype="f")
y_training_dataset = training_save_file.create_dataset("Y",
shape=(sample_count, len(training_classes)),
dtype="i")
# iterate through each class data
sample_idx = 0
for class_label, training_class in enumerate(training_classes):
# get training class sequeunce paths
training_class_data_path = training_metadata[training_class]
training_class_sequence_paths = [os.path.join(training_class_data_path, file_name)
for file_name in os.listdir(training_class_data_path)
if (os.path.isfile(os.path.join(training_class_data_path, file_name))
and ".mov" in file_name)]
# iterate through each sequence
for idx, training_class_sequence_path in enumerate(training_class_sequence_paths):
sys.stdout.write("Processing training data for class \"{0}\": {1}/{2} sequences\r"
.format(training_class, idx+1, len(training_class_sequence_paths)))
sys.stdout.flush()
# append grayscale, normalized sample frames
frames = self.process_frames(training_class_sequence_path)
x_training_dataset[sample_idx] = [frames]
# append one-hot encoded sample label
label = [0]*len(training_classes)
label[class_label] = 1
y_training_dataset[sample_idx] = label
# update sample index
sample_idx += 1
print "\n"
training_save_file.close()
print "Training data processed and saved to {0}".format(training_save_fn)
def process_frames(self, video_file_path):
""" Splits frames, resizes frames, converts RGB frames to greyscale, and normalizes frames
"""
video = cv2.VideoCapture(video_file_path)
success, frame = video.read()
frames = []
success = True
# resize, convert to grayscale, normalize, and collect valid frames
while success:
success, frame = video.read()
if success:
frame = cv2.resize(frame, (self.rows, self.columns))
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frame = frame.astype('float32') / 255.0
frames.append(frame)
# pre-pad short sequences and equalize frame lengths
if len(frames) < self.frames_per_sequence:
frames = [frames[0]]*(self.frames_per_sequence - len(frames)) + frames
frames = frames[0:self.frames_per_sequence]
return frames
class PrintBatchInfo(Callback):
def on_batch_end(self, epoch, logs={}):
print logs
if __name__ == "__main__":
osr = OpticalSpeechRecognizer(100, 100, 90)
osr.process_training_data("training_config.json", "training_data.h5")
osr.generate_osr_model("training_data.h5")
osr.train_osr_model("training_data.h5")
让我感到困惑的是,报告的输入维度是预期的输入维度,但它抱怨缺少第五维度.对于每次迭代,生成器是否应该生成一批样本而不是单个样本以生成5维输出?
解决方法:
如果您要返回一个简单示例,则需要确保输出为5维且形状为:(batch_size,channels,frames,height,width).这仅仅是因为每一层的维度都应该是固定的.使这项工作最简单的方法是:
X = training_save_file["X"][[idx]]
使用此修复程序,您的输出应与预期形状匹配.
内容总结
以上是互联网集市为您收集整理的python – Keras 3D Convolution:检查模型输入时出错:预期covolution3d_input_1有5个维度,但得到数组形状(1,90,100,100)全部内容,希望文章能够帮你解决python – Keras 3D Convolution:检查模型输入时出错:预期covolution3d_input_1有5个维度,但得到数组形状(1,90,100,100)所遇到的程序开发问题。 如果觉得互联网集市技术教程内容还不错,欢迎将互联网集市网站推荐给程序员好友。
内容备注
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 gblab@vip.qq.com 举报,一经查实,本站将立刻删除。
内容手机端
扫描二维码推送至手机访问。