当我尝试通过迁移学习一个我已经训练过的模型来训练我的模型时,我得到了这个错误:
ValueError:调用图层“密集”(类型为密集)时遇到异常。
维度必须相等,但对于具有输入形状的“{{node dense/MatMul}} = MatMul[T=DT_FLOAT,transpose_a=false,transpose_b=false](占位符,dense/MatMul/ReadVariableOp)”,维度必须为100352和2048:[?,100352],[2048,256]。
调用层“密集”(密集型)接收的参数:输入=tf.Tensor(形状=(无,100352),数据类型= float 32)
这是我的模型代码:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import matplotlib as plt
import tensorflow as tf
from tensorflow import keras
from keras import layers
from sklearn.model_selection import train_test_split
import pickle
#loading data
IMG_SIZE = 225
pickle_in = open("X.pickle", "rb")
X = pickle.load(pickle_in)
pickle_in = open("Y.pickle", "rb")
Y = pickle.load(pickle_in)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=42)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.20, random_state=42)
#normalize
X_train /= 255.
X_test /= 255.
#loading VGG16
VGG16 = keras.models.load_model("VGG16.h5", compile=False, custom_objects = {"sin": tf.math.sin})
#model
model = keras.Sequential()
model.add(keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3)))
for idx, layer in enumerate(VGG16.layers[0:-1]):
model.add(layer)
layer._name = "VGG16" + str(idx)
for layer in model.layers[1:]:
layer.trainable = False
model.add(layers.Flatten())
model.add(layers.Conv2D(64, kernel_size=(3,3), padding="SAME", activation=tf.math.sin))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2,2), padding="SAME"), strides=2)
model.add(layers.Conv2D(32, kernel_size=(3,3), padding="SAME", activation=tf.math.sin))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2,2), padding="SAME"), strides=2)
model.add(layers.Flatten())
model.add(layers.Dense(1, activation="sigmoid"))
print(model.summary())
#training model
loss = keras.losses.BinaryCrossentropy()
optim = keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss=loss, optimizer=optim, metrics=["accuracy"])
model.fit(X_train, Y_train, epochs=15, validation_data=(X_val, Y_val))
model.evaluate(X_test, Y_test, verbose=2)
这是我已经训练过的模型的代码:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
from tensorflow import keras
from tensorflow import math
from keras import layers
from keras.datasets import cifar10
def main():
#loading data
(X_train, Y_train), (X_test, Y_test) = keras.datasets.cifar10.load_data()
X_test, X_train = X_test.astype("float32") / 255., X_train.astype("float32") / 255.
Y_train, Y_test = keras.utils.to_categorical(Y_train, 10), keras.utils.to_categorical(Y_test, 10)
#VGG16 model with SIREN
model = keras.Sequential()
model.add(layers.Conv2D(64, (3, 3), padding='same', kernel_initializer="he_uniform", activation=tf.math.sin, input_shape=(32,32,1)))
model.add(layers.Conv2D(64, (3, 3), padding='same', activation=math.sin))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(layers.Conv2D(128, (3, 3), padding='same', activation=math.sin))
model.add(layers.Conv2D(128, (3, 3), padding='same', activation=math.sin))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(layers.Conv2D(256, (3, 3), padding='same', activation=math.sin))
model.add(layers.Conv2D(256, (3, 3), padding='same', activation=math.sin))
model.add(layers.Conv2D(256, (3, 3), padding='same', activation=math.sin))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(layers.Conv2D(512, (3, 3), padding='same', activation=math.sin))
model.add(layers.Conv2D(512, (3, 3), padding='same', activation=math.sin))
model.add(layers.Conv2D(512, (3, 3), padding='same', activation=math.sin))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(layers.Flatten())
model.add(layers.Dense(256, activation=math.sin))
model.add(layers.Dense(128, activation=math.sin))
model.add(layers.Dense(10, activation="softmax"))
#training model
lr = 0.0001
loss = keras.losses.CategoricalCrossentropy()
decayed_lr = tf.keras.optimizers.schedules.ExponentialDecay(lr, 10000, 0.85, True)
optim = keras.optimizers.Adam(decayed_lr)
model.compile(loss=loss, optimizer=optim, metrics=["accuracy"])
checkpoint_filepath = '/Users/JamesRONewton/Documents/Programming/MachineLearning/SIREN_projects/BrainTumor/checkpoint.hdf5'
checkpoint = keras.callbacks.ModelCheckpoint(filepath = checkpoint_filepath, monitor='accuracy', verbose=2, save_best_only=True, save_weights_only=True, mode='auto', save_freq ="epoch")
try:
model.load_weights(checkpoint_filepath)
except Exception as e:
pass
model.fit(X_train, Y_train, batch_size=128, epochs=25, callbacks = [checkpoint])
model.evaluate(X_test, Y_test, verbose=2)
#saving model
model.save("VGG16.h5")
if __name__ == '__main__':
main()
我在两个不同的模型中使用了不同大小的输入,但是我已经了解到只要步长相同就不会有问题,我不能100%确定步长相同,但是我认为它们是相同的。
1条答案
按热度按时间yhuiod9q1#
因为如果你想用不同的输入形状进行迁移学习,你可以使用conv2d层,但不能使用密集层。如果你使用
VGG16.summary()
,你会看到最后4层不是conv。所以第一个错误是for idx, layer in enumerate(VGG16.layers[0:-1]):
应该是for idx, layer in enumerate(VGG16.layers[0:-4]):
添加vgg16图层后,您也添加了一些conv图层。在此之前,您不应该使用flat。此外,一些
strides
prop 不在正确的位置。如果我们修复所有错误,它将变为: