Paddle 论文复现：LSTM修改属性名后无法前向对齐

5tmbdcev 于 2022-10-20 发布在其他

关注(0)|答案(4)|浏览(214)

paddlepaddle版本：2.2.1
pytorch版本：1.10.0+cu113

torch.nn.lstm和paddle.nn.lstm在经过相同的修改属性名操作后，前向会有diff

代码如下：

import paddle
import paddle.nn as nn
import torch
from torch.nn import Module
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# 有diff的版本

class TorchWeightDropout(Module):
    def __init__(self,module,layer_names='weight_hh_l0'):
        super(TorchWeightDropout, self).__init__()
        self.module,self.layer_names = module,[layer_names]
        for layer in self.layer_names:
            w = getattr(self.module,layer)
            delattr(self.module,layer)
            self.register_parameter(f'{layer}_raw',torch.nn.Parameter(w.data))
            setattr(self.module,layer,w.clone())

    def _setweights(self):
        for layer in self.layer_names:
            raw_w = getattr(self,f'{layer}_raw')
            w = raw_w.clone()
            setattr(self.module,layer,w)

    def forward(self,*args):
        self._setweights()
        return self.module(*args)

class PaddleWeightDropout(nn.Layer):
    def __init__(self,module,layer_names='weight_hh_l0'):
        super(PaddleWeightDropout, self).__init__()
        self.module, self.layer_names = module, [layer_names]
        for layer in self.layer_names:
            w = getattr(self.module, layer)
            delattr(self.module, layer)
            parameter = paddle.create_parameter(shape=w.detach().shape, dtype=str(w.detach().numpy().dtype),
                                                default_initializer=nn.initializer.Assign(w.detach()))
            self.add_parameter(f'{layer}_raw', parameter)
            setattr(self.module, layer, w.clone())

    def _setweights(self):
        for layer in self.layer_names:
            raw_w = getattr(self, f'{layer}_raw')
            w = raw_w.clone()
            setattr(self.module, layer, w)

    def forward(self, *args):
        self._setweights()
        return self.module(*args) 

# 预构建lstm和对应权重

n_in = 400
n_out = 1152
paddle_lstm = paddle.nn.LSTM(n_in,n_out,1,time_major=False,direction="forward")
torch_lstm = torch.nn.LSTM(n_in,n_out,1,batch_first=True,bidirectional=False)

weight1 = np.random.randint(1,100,size=(4608,400))
weight2 = np.random.randint(1,100,size=(4608,1152))
weight3 = np.random.randint(1,100,size=(4608))
weight4 = np.random.randint(1,100,size=(4608))

# 注意：此处将原权重值名改为了weight_hh_l0_raw

weight_dict = {"module.weight_ih_l0":weight1,"weight_hh_l0_raw":weight2,"module.bias_ih_l0":weight3,"module.bias_hh_l0":weight4}
paddle_dict = {}
torch_dict = {}
for key in weight_dict.keys():
    paddle_dict[key] = paddle.to_tensor(weight_dict[key],dtype=paddle.float32)
    torch_dict[key] = torch.tensor(weight_dict[key],dtype=torch.float32)

paddle_model = PaddleWeightDropout(paddle_lstm)
torch_model = TorchWeightDropout(torch_lstm)
paddle_model.load_dict(paddle_dict)
torch_model.load_state_dict(torch_dict)
paddle_model.eval()
torch_model.eval()

x = np.random.randn(1,4,400).astype(np.float32)
paddle_x = paddle.to_tensor(x)
torch_x = torch.tensor(x)

paddle_out = paddle_model(paddle_x)[0].detach().numpy()
torch_out = torch_model(torch_x)[0].cpu().detach().numpy()
diff = np.allclose(paddle_out,torch_out,atol = 1e-6)
diff2 = np.mean(np.abs(paddle_out - torch_out))

# 有diff

print(diff,diff2)

Paddle

来源：https://github.com/PaddlePaddle/Paddle/issues/39074

4条答案

按热度按时间

relj7zay1#

您好，我们已经收到了您的问题，会安排技术人员尽快解答您的问题，请耐心等待。请您再次检查是否提供了清晰的问题描述、复现代码、环境&版本、报错信息等。同时，您也可以通过查看官网API文档、常见问题、历史Issue 、 AI社区来寻求解答。祝您生活愉快～

Hi! We've received your issue and please be patient to get responded. We will arrange technicians to answer your questions as soon as possible. Please make sure that you have posted enough message to demo your request. You may also check out the API ， FAQ ， Github Issue and AI community to get the answer.Have a nice day!

赞(0）回复(0）举报 2022-10-20

fjaof16o2#


# 没有diff的版本

class TorchWeightDropoutNew(Module):
    def __init__(self,module,layer_names='weight_hh_l0'):
        super(TorchWeightDropoutNew, self).__init__()
        self.module,self.layer_names = module,[layer_names]

    def forward(self,*args):
        return self.module(*args)

class PaddleWeightDropoutNew(nn.Layer):
    def __init__(self,module,layer_names='weight_hh_l0'):
        super(PaddleWeightDropoutNew, self).__init__()
        self.module, self.layer_names = module, [layer_names]

    def forward(self, *args):
        return self.module(*args)

# 预构建lstm和对应权重

n_in = 400
n_out = 1152
paddle_lstm = paddle.nn.LSTM(n_in,n_out,1,time_major=False,direction="forward")
torch_lstm = torch.nn.LSTM(n_in,n_out,1,batch_first=True,bidirectional=False)

weight1 = np.random.randint(1,100,size=(4608,400))
weight2 = np.random.randint(1,100,size=(4608,1152))
weight3 = np.random.randint(1,100,size=(4608))
weight4 = np.random.randint(1,100,size=(4608))

# 不修改权值名

weight_dict = {"module.weight_ih_l0":weight1,"module.weight_hh_l0":weight2,"module.bias_ih_l0":weight3,"module.bias_hh_l0":weight4}
paddle_dict = {}
torch_dict = {}
for key in weight_dict.keys():
    paddle_dict[key] = paddle.to_tensor(weight_dict[key],dtype=paddle.float32)
    torch_dict[key] = torch.tensor(weight_dict[key],dtype=torch.float32)

paddle_model = PaddleWeightDropoutNew(paddle_lstm)
torch_model = TorchWeightDropoutNew(torch_lstm)
paddle_model.load_dict(paddle_dict)
torch_model.load_state_dict(torch_dict)
paddle_model.eval()
torch_model.eval()

x = np.random.randn(1,4,400).astype(np.float32)
paddle_x = paddle.to_tensor(x)
torch_x = torch.tensor(x)

paddle_out = paddle_model(paddle_x)[0].detach().numpy()
torch_out = torch_model(torch_x)[0].cpu().detach().numpy()
diff = np.allclose(paddle_out,torch_out,atol = 1e-6)
diff2 = np.mean(np.abs(paddle_out - torch_out))

# 前向无diff

print(diff,diff2)

赞(0）回复(0）举报 2022-10-20

u5rb5r593#

你好，我们会有相关开发同学回应，请耐心等待，谢谢。

赞(0）回复(0）举报 2022-10-20

xmjla07d4#

@akari0216 你好，可以看看有没有给新名称正确地赋值？把两个版本的’weight_hh_l0‘和’ weight_hh_l0_raw‘都打印一下吧

赞(0）回复(0）举报 2022-10-20