keras 使用TensorArray存储中间结果时,不为任何变量提供梯度

91zkwejq  于 2023-04-06  发布在  其他
关注(0)|答案(1)|浏览(168)

For practice, I am working on a feedback recurrent autoencoder in Keras. The code I am using is

import tensorflow as tf
import keras
import os

class Linear(keras.layers.Layer):
    def __init__(self, units=32):
        super(Linear, self).__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            shape=(self.units,), initializer="random_normal", trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b#tf.matmul(inputs, self.w) + self.b
    

class FRAE(tf.keras.Model):
    def __init__(self):
        super(FRAE, self).__init__()
        self.linear_1 = Linear(4)
        self.linear_2 = Linear(3)
        self.latent   = Linear(1)
        self.linear_3 = Linear(3)
        self.linear_4 = Linear(2)
        self.decoded  = tf.zeros(shape=(1, 2))
        
    def call(self, inputs):
        #x = self.flatten(inputs)
        
        batch_size = inputs.shape[0]
        input_dim = inputs.shape[1]
        # output_list = [None] * batch_size #tf.zeros(shape = (batch_size, input_dim))
        output_list = tf.TensorArray(tf.float32, size=batch_size, clear_after_read=False)

        for i in range(batch_size):
            x = tf.concat((tf.expand_dims(inputs[i], axis=0),self.decoded),axis=1)
            x = self.linear_1(x)
            x = tf.nn.swish(x)
            x = self.linear_2(x)
            x = tf.nn.swish(x)
            x = self.latent(x)
            x = tf.nn.swish(x)
            x = tf.concat((x,self.decoded),axis=1)
            x = self.linear_3(x)
            x = tf.nn.swish(x)
            x = self.linear_4(x)
            x = tf.nn.swish(x)
            self.decoded = tf.identity(x)
            output_list.write(i,  x)
        y = output_list.stack()
        return y

        
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
        

xtrain = tf.random.uniform(shape=(10,2))
model = FRAE()
y = model(xtrain)
optimizer = keras.optimizers.Adam(lr=0.001)
model.compile(optimizer=optimizer,loss="mse")
model.fit(x=xtrain,y=xtrain, epochs=50, batch_size=1)

When I run this code, I get the error
ValueError: No gradients provided for any variable: (['frae_13/linear_65/Variable:0', 'frae_13/linear_65/Variable:0', 'frae_13/linear_66/Variable:0', 'frae_13/linear_66/Variable:0', 'frae_13/linear_67/Variable:0', 'frae_13/linear_67/Variable:0', 'frae_13/linear_68/Variable:0', 'frae_13/linear_68/Variable:0', 'frae_13/linear_69/Variable:0', 'frae_13/linear_69/Variable:0'],). Provided grads_and_vars is ((None, <tf.Variable 'frae_13/linear_65/Variable:0' shape=(4, 4) dtype=float32>), (None, <tf.Variable 'frae_13/linear_65/Variable:0' shape=(4,) dtype=float32>), (None, <tf.Variable 'frae_13/linear_66/Variable:0' shape=(4, 3) dtype=float32>), (None, <tf.Variable 'frae_13/linear_66/Variable:0' shape=(3,) dtype=float32>), (None, <tf.Variable 'frae_13/linear_67/Variable:0' shape=(3, 1) dtype=float32>), (None, <tf.Variable 'frae_13/linear_67/Variable:0' shape=(1,) dtype=float32>), (None, <tf.Variable 'frae_13/linear_68/Variable:0' shape=(3, 3) dtype=float32>), (None, <tf.Variable 'frae_13/linear_68/Variable:0' shape=(3,) dtype=float32>), (None, <tf.Variable 'frae_13/linear_69/Variable:0' shape=(3, 2) dtype=float32>), (None, <tf.Variable 'frae_13/linear_69/Variable:0' shape=(2,) dtype=float32>)).
Which probably stems from using the TensorArray to store the output of the batch samples. Somehow, the gradients get lost/cannot be computed.
Does anyone know how to compute the gradients in this case?
I tried to google for common issues, but this one is rather special, so the solutions I found were not really helpful.

7ajki6be

7ajki6be1#

多亏了Alberto关于使用列表的评论,我才能做到这一点。
我是这样做的:

import tensorflow as tf
import keras
import os

class Linear(keras.layers.Layer):
    def __init__(self, units=32):
        super(Linear, self).__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            shape=(self.units,), initializer="random_normal", trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b#tf.matmul(inputs, self.w) + self.b
    

class FRAE(tf.keras.Model):
    def __init__(self):
        super(FRAE, self).__init__()
        self.linear_1 = Linear(4)
        self.linear_2 = Linear(3)
        self.latent   = Linear(1)
        self.linear_3 = Linear(3)
        self.linear_4 = Linear(2)
        self.decoded  = [[0,0]] # tf.zeros(shape=(1, 2))
    
    
    def call(self, inputs):
        #x = self.flatten(inputs)
        
        batch_size = inputs.shape[0]
        input_dim = inputs.shape[1]
        output_list = [None]*batch_size #tf.TensorArray(tf.float32, size=batch_size, clear_after_read=False)
        #y = tf.Variable()
        for i in range(batch_size):
            x = tf.concat((tf.expand_dims(inputs[i], axis=0),tf.convert_to_tensor(self.decoded, dtype=tf.float32)),axis=1)
            x = self.linear_1(x)
            x = tf.nn.swish(x)
            x = self.linear_2(x)
            x = tf.nn.swish(x)
            x = self.latent(x)
            x = tf.nn.swish(x)
            x = tf.concat((x,tf.convert_to_tensor(self.decoded, dtype=tf.float32)),axis=1)
            x = self.linear_3(x)
            x = tf.nn.swish(x)
            x = self.linear_4(x)
            x = tf.nn.swish(x)
            self.decoded = x.numpy().tolist() #tf.identity(x)
            output_list[i] = x #output_list.write(i,  x)
        y = tf.convert_to_tensor(output_list)    #  output_list.stack()
        return y

        
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
        

xtrain = tf.random.uniform(shape=(10,2))
model = FRAE()
y = model(xtrain)
optimizer = keras.optimizers.Adam(lr=0.001)
model.compile(optimizer=optimizer,loss="mse", run_eagerly=True)
model.fit(x=xtrain,y=xtrain, epochs=50, batch_size=1)

这当然是一个原型,所以对于随机数据,输出的反馈根本不会有好处。对于相关数据,它应该会产生一些好处。

相关问题