Paddle paddle 1.6版本中,动态图,反向传播后,梯度无法获取

k5ifujac  于 2022-10-20  发布在  其他
关注(0)|答案(2)|浏览(180)

使用paddle1.6版本动态图,经过反向传播后,梯度获取,网络层权重无变化。
main方法内容如下:

  1. import paddle.fluid as fl
  2. hr = fl.dygraph.to_variable(hr_image) # hr_image is the real image
  3. lr = fl.dygraph.to_variable(lr_image)
  4. hr.stop_gradient = False
  5. lr.stop_gradient = False
  6. sr = model.forward(lr)
  7. # L1 loss
  8. loss = fl.layers.mse_loss(sr,hr)
  9. # 计算梯度更新参数
  10. start = print_net_params_value(model)
  11. loss.backward()
  12. optimizer.minimize(loss,parameter_list=model.parameters())
  13. end = print_net_params_value(model)
  14. a = print_net_params(model)
  15. model.clear_gradients()

获取网络参数的方法如下:

  1. def print_net_params(net):
  2. # dy_param_value= {}
  3. dy_param_value= []
  4. for param in net.parameters():
  5. if param.trainable:
  6. # if param._grad_ivar() is not None:
  7. same_value = param.gradient()
  8. same_value = same_value.reshape(same_value.shape[0],-1)
  9. # gradients = fl.layers.reshape(param,[param.shape[0],-1])
  10. some1 = np.linalg.norm(same_value,ord=2,axis=1)
  11. # dy_param_value[param.name] = np.mean(some1)
  12. dy_param_value.append(np.mean(some1))
  13. a = [dy_param_value[0],dy_param_value[-2]]
  14. return(a)

网络内容如下:

  1. # !/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import paddle
  4. import paddle.fluid as fl
  5. from paddle.fluid.dygraph import Conv2D
  6. from option import opt
  7. import numpy as np
  8. class ResBlock(fl.dygraph.Layer):
  9. def __init__(self,namescope,n_feats,expansion_ratio,res_scale = 1.0):
  10. super(ResBlock, self).__init__(namescope)
  11. self.res_scale = res_scale
  12. self.conv1 = Conv2D(namescope,n_feats*expansion_ratio,filter_size=3,padding=1)
  13. self.conv2 = Conv2D(namescope,n_feats,filter_size=3,padding=1)
  14. def forward(self, y):
  15. x = self.conv1(y)
  16. x = fl.layers.clip_by_norm(x,max_norm=1.0)
  17. x = fl.layers.relu(x)
  18. x = self.conv2(x)
  19. x = fl.layers.clip_by_norm(x,max_norm=1.0)
  20. return(y+x*self.res_scale)
  21. def print_net_params(net):
  22. # dy_param_value= {}
  23. dy_param_value= []
  24. for param in net.parameters():
  25. if param.trainable:
  26. same_value = param.gradient()
  27. same_value = same_value.reshape(same_value.shape[0],-1)
  28. # gradients = fl.layers.reshape(param,[param.shape[0],-1])
  29. some1 = np.linalg.norm(same_value,ord=2,axis=1)
  30. # dy_param_value[param.name] = np.mean(some1)
  31. dy_param_value.append(np.mean(some1))
  32. a = [dy_param_value[0],dy_param_value[-2]]
  33. return(a)
  34. class Wdsr_A(fl.dygraph.Layer):
  35. def __init__(self,namescope):
  36. super(Wdsr_A,self).__init__(namescope)
  37. # skip
  38. self.skip = Conv2D(namescope,3*(opt.scale**2),filter_size=5,padding=2)
  39. # head
  40. self.head = Conv2D(namescope,opt.n_feats,filter_size=3,padding=1)
  41. # body
  42. for i in range(opt.n_res_blocks):
  43. self.add_sublayer('body'+str(i+1),ResBlock(namescope,opt.n_feats,opt.expansion_ratio,opt.res_scale))
  44. # tail
  45. self.tail = Conv2D(namescope,3*(opt.scale**2),filter_size=3,padding=1)
  46. def forward(self, x):
  47. # if self.subtract_mean:
  48. # 第一步就开始上采样了
  49. # 单独增加drop层,因为通常上来说,显示中的低阶图片都是缺少关键的像素点\
  50. # 先看看这个效果怎么样之后在增加内容
  51. # skip
  52. x = self.skip(x)
  53. x = fl.layers.clip_by_norm(x,max_norm=1.0)
  54. s = fl.layers.pixel_shuffle((x),upscale_factor = opt.scale)
  55. # head
  56. x = self.head(x)
  57. x = fl.layers.clip_by_norm(x,max_norm=1.0)
  58. # body
  59. for i in range(opt.n_res_blocks):
  60. x = self.__getattr__('body'+str(i+1)).forward(x)
  61. # tail
  62. x = fl.layers.clip_by_norm(self.tail(x),max_norm=1.0)
  63. x = fl.layers.pixel_shuffle((x),upscale_factor = opt.scale)
  64. # residual
  65. x += s
  66. return(x)
nom7f22z

nom7f22z1#

代码格式能容易阅读的版本么,或者提供一个链接

jgovgodb

jgovgodb2#

代码格式能容易阅读的版本么,或者提供一个链接

您好,修复了粘贴代码格式的问题。经过调试我发现fl.layers.clip_by_norm 是这行代码了阻止了反向传播,把他去掉之后就可以了。

因为这里应用了很多个fl.layers.clip_by_norm这个方法,去掉不同位置时,有的时候会组织反向传播,有的时候不会

相关问题