PyTorch与Tensorflow的结果不同

nhjlsmyf  于 2022-11-09  发布在  其他
关注(0)|答案(1)|浏览(199)

我正在实现“感知损失”功能。但是,PyTorch和Tensorflow给出了不同的结果。我使用了相同的图像。请让我知道为什么。

tensorflow

class FeatureExtractor(tf.keras.Model):
    def __init__(self, n_layers):
            super(FeatureExtractor, self).__init__()
            extractor = tf.keras.applications.VGG16(weights="imagenet",
                          include_top=False,input_shape=(256, 256, 3))
            extractor.trainable = True

            #features = [extractor.layers[i].output for i in n_layers]
            features = [extractor.get_layer(i).output for i in n_layers]
            self.extractor = tf.keras.models.Model(extractor.inputs,features)

    def call(self, x):
         return self.extractor(x)

def loss_function(generated_image, target_image,
                            feature_extractor):
         MSE = tf.keras.losses.MeanSquaredError()
         mse_loss = MSE(generated_image, target_image) 

         real_features = feature_extractor(target_image)
         generated_features = feature_extractor(generated_image)
         perceptual_loss = 0

         for i in range(len(real_features)):
             loss = MSE(real_features[i], generated_features[i]) 
             print(loss)
             perceptual_loss += loss
         return mse_loss, perceptual_loss

运行时间:

feature_extractor = FeatureExtractor(n_layers=["block1_conv1","block1_conv2", 
  "block3_conv2","block4_conv2"])

mse_loss, perceptual_loss = loss_function(image1, image2,
                                          feature_extractor)
print(f"{mse_loss} {perceptual_loss} {mse_loss+perceptual_loss}")

它给出:

output:
tf.Tensor(0.0014001362, shape=(), dtype=float32)
tf.Tensor(0.030578917, shape=(), dtype=float32)
tf.Tensor(2.6163354, shape=(), dtype=float32)
tf.Tensor(0.842701, shape=(), dtype=float32)
0.002584027126431465 3.4910154342651367 3.4935994148254395

皮拓奇

class FeatureExtractor(torch.nn.Module):
       def __init__(self, n_layers):
            super(FeatureExtractor, self).__init__()
            extractor = models.vgg16(pretrained=True).features            
            index = 0
            self.layers = nn.ModuleList([])

            for i in range(len(n_layers)):
                self.layers.append(torch.nn.Sequential())
                for j in range(index, n_layers[i] + 1):
                    self.layers[i].add_module(str(j), extractor[j])
                index = n_layers[i] + 1

            for param in self.parameters():
                param.requires_grad = False

    def forward(self, x):
            result = []

            for i in range(len(self.layers)):
                x = self.layers[i](x)
                result.append(x)
            return result

def loss_function(generated_image, target_image, feature_extractor):
        MSE = nn.MSELoss(reduction='mean')
        mse_loss = MSE(generated_image, target_image) 
        real_features = feature_extractor(target_image)
        generated_features = feature_extractor(generated_image)
        perceptual_loss = 0

        for i in range(len(real_features)):
             loss = MSE(real_features[i], generated_features[i])
             perceptual_loss += loss
             print(loss)

        return mse_loss, perceptual_loss

运行时间:

feature_extractor = FeatureExtractor(n_layers=[1, 3, 13, 20]).to(device)
mse_loss, perceptual_loss = loss_function(image1, image2,
                                          feature_extractor)
print(f"{mse_loss} {perceptual_loss} {mse_loss+perceptual_loss}")

它给出:

output:
tensor(0.0003)
tensor(0.0029)
tensor(0.2467)
tensor(0.2311)
0.002584027359262109 0.4810013473033905 0.483585387468338
vu8f3i0k

vu8f3i0k1#

虽然是相同的模型,但是由于初始化参数不同,最终模型的参数可能会有所不同。对于不同的框架,比如keras和pytorch,在训练之前对输入图像进行预处理是不同的。因此,即使是相同的图像,处理后的tenor值也是不同的。下面的代码是一个例子,可以帮助理解。

from abc import ABC

import torch
import numpy as np
import tensorflow as tf

from torch import nn
from PIL import Image
from torch.autograd import Variable
import torchvision.models as models
import torchvision.transforms as transforms
from keras.preprocessing.image import load_img
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing.image import img_to_array

# 'https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Cat03.jpg/1200px-Cat03.jpg'

IMG_URL1 = ' the local path of 1200px-Cat03.jpeg'

# 'https://upload.wikimedia.org/wikipedia/commons/b/bb/Kittyply_edit1.jpg'

IMG_URL2 = 'the local path of Kittyply_edit1.jpg'

# preprocess in keras

image1_tf = load_img(IMG_URL1, target_size=(224, 224))
image1_tf = img_to_array(image1_tf)
image1_tf = image1_tf.reshape((1, image1_tf.shape[0], image1_tf.shape[1], image1_tf.shape[2]))
image1_tf = preprocess_input(image1_tf)

image2_tf = load_img(IMG_URL2, target_size=(224, 224))
image2_tf = img_to_array(image2_tf)
image2_tf = image2_tf.reshape((1, image2_tf.shape[0], image2_tf.shape[1], image2_tf.shape[2]))
image2_tf = preprocess_input(image2_tf)

# preprocess in pytorch

image1_torch = Image.open(IMG_URL1)
image2_torch = Image.open(IMG_URL2)
image1_torch = image1_torch.resize((224, 224))
image2_torch = image2_torch.resize((224, 224))

min_img_size = 224
transform_pipeline = transforms.Compose([transforms.Resize(min_img_size),
                                         transforms.ToTensor(),
                                         transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                              std=[0.229, 0.224, 0.225])])
image1_torch = transform_pipeline(image1_torch)
image2_torch = transform_pipeline(image2_torch)
image1_torch = image1_torch.unsqueeze(0)
image2_torch = image2_torch.unsqueeze(0)
image1_torch = Variable(image1_torch)
image2_torch = Variable(image2_torch)

class FeatureExtractor(tf.keras.Model, ABC):
    def __init__(self, n_layers):
        super(FeatureExtractor, self).__init__()

        extractor = tf.keras.applications.VGG16(weights="imagenet", input_shape=(224, 224, 3))
        extractor.trainable = True
        features = [extractor.get_layer(i).output for i in n_layers]
        self.extractor = tf.keras.models.Model(extractor.inputs, features)

    def call(self, x):
        return self.extractor(x)

def loss_function(generated_image, target_image, feature_extractor):
    MSE = tf.keras.losses.MeanSquaredError()
    mse_loss = MSE(generated_image, target_image)

    real_features = feature_extractor(target_image)
    generated_features = feature_extractor(generated_image)

    print("tf prediction:", np.argmax(generated_features[-1].numpy()[0]))
    print("tf prediction:", np.argmax(real_features[-1].numpy()[0]))

    perceptual_loss = 0

    for i in range(len(real_features[:-1])):
        loss = MSE(real_features[i], generated_features[i])
        print(loss)
        perceptual_loss += loss

    return mse_loss, perceptual_loss

feature_extractor = FeatureExtractor(n_layers=["block1_conv1", "block1_conv2", "block3_conv2",
                                               "block4_conv2", "predictions"])
print("tensorflow: ")
mse_loss, perceptual_loss = loss_function(image1_tf, image2_tf, feature_extractor)
print(f"{mse_loss} {perceptual_loss} {mse_loss + perceptual_loss}")

class FeatureExtractor1(torch.nn.Module):

    def __init__(self, n_layers):
        super(FeatureExtractor1, self).__init__()
        self.vgg = models.vgg16(pretrained=True)
        extractor = self.vgg.features
        index = 0
        self.layers = nn.ModuleList([])
        for i in range(len(n_layers)):

            self.layers.append(torch.nn.Sequential())
            for j in range(index, n_layers[i] + 1):
                self.layers[i].add_module(str(j), extractor[j])
            index = n_layers[i] + 1

        for param in self.parameters():
            param.requires_grad = False

    def forward(self, x):
        result = []
        predict = self.vgg(x)
        for i in range(len(self.layers)):
            x = self.layers[i](x)
            result.append(x)
        result.append(predict)
        return result

def loss_function1(generated_image, target_image, feature_extractor):
    MSE = nn.MSELoss(reduction='mean')
    mse_loss = MSE(generated_image, target_image)

    real_features = feature_extractor(target_image)
    generated_features = feature_extractor(generated_image)

    print("torch prediction:", np.argmax(generated_features[-1].numpy()[0]))
    print("torch prediction:", np.argmax(real_features[-1].numpy()[0]))
    perceptual_loss = 0

    for i in range(len(real_features[:-1])):
        loss = MSE(real_features[i], generated_features[i])
        perceptual_loss += loss
        print(loss)

    return mse_loss, perceptual_loss

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
feature_extractor = FeatureExtractor1(n_layers=[1, 3, 13, 20]).to(device)
print("pytorch: ")
mse_loss, perceptual_loss = loss_function1(image1_torch, image2_torch, feature_extractor)
print(f"{mse_loss} {perceptual_loss} {mse_loss + perceptual_loss}")

另外,该模型的训练目标是分类精度,因此网络中间的特征图之间的差异结果是有意义的。

相关问题