我正在使用gradcam来查看测试图像的哪些区域对于resnet50
的预测最重要。我得到的输出有一些错误。
Code Snippets:毕业证
from tensorflow.keras.models import Model
import tensorflow as tf
import numpy as np
import cv2
class GradCAM:
def __init__(self, model, classIdx, layerName=None):
# store the model, the class index used to measure the class
# activation map, and the layer to be used when visualizing
# the class activation map
self.model = model
self.classIdx = classIdx
self.layerName = layerName
# if the layer name is None, attempt to automatically find
# the target output layer
if self.layerName is None:
self.layerName = self.find_target_layer()
def find_target_layer(self):
# attempt to find the final convolutional layer in the network
# by looping over the layers of the network in reverse order
for layer in reversed(self.model.layers):
# check to see if the layer has a 4D output
if len(layer.output_shape) == 4:
return layer.name
# otherwise, we could not find a 4D layer so the GradCAM
# algorithm cannot be applied
raise ValueError("Could not find 4D layer. Cannot apply GradCAM.")
def compute_heatmap(self, image, eps=1e-8):
# construct our gradient model by supplying (1) the inputs
# to our pre-trained model, (2) the output of the (presumably)
# final 4D layer in the network, and (3) the output of the
# softmax activations from the model
gradModel = Model(
inputs=[self.model.inputs],
outputs=[self.model.get_layer(self.layerName).output, self.model.output])
# record operations for automatic differentiation
with tf.GradientTape() as tape:
# cast the image tensor to a float-32 data type, pass the
# image through the gradient model, and grab the loss
# associated with the specific class index
inputs = tf.cast(image, tf.float32)
(convOutputs, predictions) = gradModel(inputs)
loss = predictions[:, tf.argmax(predictions[0])]
# use automatic differentiation to compute the gradients
grads = tape.gradient(loss, convOutputs)
# compute the guided gradients
castConvOutputs = tf.cast(convOutputs > 0, "float32")
castGrads = tf.cast(grads > 0, "float32")
guidedGrads = castConvOutputs * castGrads * grads
# the convolution and guided gradients have a batch dimension
# (which we don't need) so let's grab the volume itself and
# discard the batch
convOutputs = convOutputs[0]
guidedGrads = guidedGrads[0]
# compute the average of the gradient values, and using them
# as weights, compute the ponderation of the filters with
# respect to the weights
weights = tf.reduce_mean(guidedGrads, axis=(0, 1))
cam = tf.reduce_sum(tf.multiply(weights, convOutputs), axis=-1)
# grab the spatial dimensions of the input image and resize
# the output class activation map to match the input image
# dimensions
(w, h) = (image.shape[2], image.shape[1])
heatmap = cv2.resize(cam.numpy(), (w, h))
# normalize the heatmap such that all values lie in the range
# [0, 1], scale the resulting values to the range [0, 255],
# and then convert to an unsigned 8-bit integer
numer = heatmap - np.min(heatmap)
denom = (heatmap.max() - heatmap.min()) + eps
heatmap = numer / denom
heatmap = (heatmap * 255).astype("uint8")
# return the resulting heatmap to the calling function
return heatmap
def overlay_heatmap(self, heatmap, image, alpha=0.5,
colormap=cv2.COLORMAP_VIRIDIS):
# apply the supplied color map to the heatmap and then
# overlay the heatmap on the input image
heatmap = cv2.applyColorMap(heatmap, colormap)
output = cv2.addWeighted(image, alpha, heatmap, 1 - alpha, 0)
# return a 2-tuple of the color mapped heatmap and the output,
# overlaid image
return (heatmap, output)
用于可视化热图的代码片段:
import random
num_images = 5
random_indices = random.sample(range(len(X_test)), num_images)
for idx in random_indices:
image = X_test[idx] #assuming the image array is the first element in the tuple
# print(image)
# image = cv2.resize(image, (224, 224))
image1 = image.astype('float32') / 255
image1 = np.expand_dims(image1, axis=0)
preds = model.predict(image1)
i = np.argmax(preds[0])
icam = GradCAM(model, i, 'conv5_block3_out')
heatmap = icam.compute_heatmap(image1)
heatmap = cv2.resize(heatmap, (224, 224))
(heatmap, output) = icam.overlay_heatmap(heatmap, image, alpha=0.5)
fig, ax = plt.subplots(1, 3)
ax[0].imshow(heatmap)
ax[1].imshow(image)
ax[2].imshow(output)
输出:
我面临的问题是,在输出中,您可以看到原始图像是不同的,但热图,图像和gradcam对于所有图像都是相同的。我不知道这背后的原因是什么。
1条答案
按热度按时间epggiuax1#
这个问题看起来有点老,但这里是我的答案,以防其他人在GradCAM输出方面遇到类似的问题。
我在使用ResNet50时看到了类似的问题。但是,通过打印各个GradCAM,我能够看到它们具有不同的值,尽管绘制的GradCAM输出看起来相似。正如您在Colab链接上看到的,您的GradCAM计算看起来很好。
尽管从你附上的图片中很难发现,但我可以看出至少前两个GradCAMs是不同的。因此,显着图通常对于不同的图像是不同的,它们只是不够好。我通过用MobileNetV2模型替换ResNet50解决了我的问题,它带来了更好的显着性和分类性能。
正如No Free Lunch theorem中所述,没有一个模型适合所有问题或数据集,因此您必须使用不同的模型进行实验。