我正在训练一个基于多标签VGG-16的分类模型。这个任务有25个标签。我试图在https://towardsdatascience.com/multi-label-classification-and-class-activation-map-on-fashion-mnist-1454f09f5925复制这个代码,使用训练好的模型生成类激活图。
model = load_model('weights/vgg16_multilabel.09-0.3833.h5')
model.summary()
sgd = SGD(learning_rate=0.001, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='binary_crossentropy',
metrics=['accuracy'])
#labels
columns=['Action', 'Adventure', 'Animation', 'Biography', 'Comedy',
'Crime', 'Documentary', 'Drama', 'Family', 'Fantasy',
'History', 'Horror', 'Music', 'Musical', 'Mystery',
'N/A', 'News', 'Reality-TV', 'Romance', 'Sci-Fi', 'Short',
'Sport', 'Thriller', 'War', 'Western']
gap_weights = model.layers[-1].get_weights()[0] #final dense layer
print(" >>> size(gap_weights) = ", gap_weights.size)
#extract from the deepest convolutional layer
cam_model = Model(inputs=model.input,
outputs=(model.layers[-3].output,
model.layers[-1].output))
print(" >>> K.int_shape(model.layers[-3].output) = ", K.int_shape(model.layers[-3].output))
print(" >>> K.int_shape(model.layers[-1].output) = ", K.int_shape(model.layers[-1].output))
#--- make the prediction
features, results = cam_model.predict(X_test)
# check the CAM activations for 10 test images
for idx in range(10):
# get the feature map of the test image
features_for_one_img = features[idx, :, :, :]
# map the feature map to the original size
height_roomout = train_img_size_h / features_for_one_img.shape[0]
width_roomout = train_img_size_w / features_for_one_img.shape[1]
cam_features = sp.ndimage.zoom(features_for_one_img, (height_roomout, width_roomout, 1), order=2)
# get the predicted label with the maximum probability
pred = np.argmax(results[idx])
# prepare the final display
plt.figure(facecolor='white')
# get the weights of class activation map
cam_weights = gap_weights[:, pred]
# create the class activation map
cam_output = np.dot(cam_features, cam_weights)
# draw the class activation map
ax.set_xticklabels([])
ax.set_yticklabels([])
buf = 'Predicted Class = ' + columns[pred] + ', Probability = ' + str(results[idx][pred])
plt.xlabel(buf)
plt.imshow(t_pic[idx], alpha=0.5)
plt.imshow(cam_output, cmap='jet', alpha=0.5)
plt.show()
这是输出
size(gap_weights) = 12800
K.int_shape(model.layers[-4].output) = (None, 512)
K.int_shape(model.layers[-1].output) = (None, 25)
出现以下错误:
Traceback (most recent call last):
File "/project/1/complete_code.py", line 1295, in <module>
features_for_one_img = features[idx, :, :, :]
IndexError: too many indices for array: array is 2-dimensional, but 4 were indexed
我在Tensorflow 2.X中遇到这个错误,但在Tensorflow 1.X中没有遇到任何问题。
1条答案
按热度按时间tct7dpnv1#
使用VGG 16作为模型时,model.layers[-3].output将给予密集图层的输出,即中的Tensor(无,512)。但是,CAM需要的是最后一个MaxPooling 2D层的输出,作为(无,7,7,512)。请打印model.summary以获得正确的输出层。我认为您应该在cam_model中使用model.layers[-6].output。