For practice, I am working on a feedback recurrent autoencoder in Keras. The code I am using is
import tensorflow as tf
import keras
import os
class Linear(keras.layers.Layer):
def __init__(self, units=32):
super(Linear, self).__init__()
self.units = units
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=True,
)
self.b = self.add_weight(
shape=(self.units,), initializer="random_normal", trainable=True
)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b#tf.matmul(inputs, self.w) + self.b
class FRAE(tf.keras.Model):
def __init__(self):
super(FRAE, self).__init__()
self.linear_1 = Linear(4)
self.linear_2 = Linear(3)
self.latent = Linear(1)
self.linear_3 = Linear(3)
self.linear_4 = Linear(2)
self.decoded = tf.zeros(shape=(1, 2))
def call(self, inputs):
#x = self.flatten(inputs)
batch_size = inputs.shape[0]
input_dim = inputs.shape[1]
# output_list = [None] * batch_size #tf.zeros(shape = (batch_size, input_dim))
output_list = tf.TensorArray(tf.float32, size=batch_size, clear_after_read=False)
for i in range(batch_size):
x = tf.concat((tf.expand_dims(inputs[i], axis=0),self.decoded),axis=1)
x = self.linear_1(x)
x = tf.nn.swish(x)
x = self.linear_2(x)
x = tf.nn.swish(x)
x = self.latent(x)
x = tf.nn.swish(x)
x = tf.concat((x,self.decoded),axis=1)
x = self.linear_3(x)
x = tf.nn.swish(x)
x = self.linear_4(x)
x = tf.nn.swish(x)
self.decoded = tf.identity(x)
output_list.write(i, x)
y = output_list.stack()
return y
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
xtrain = tf.random.uniform(shape=(10,2))
model = FRAE()
y = model(xtrain)
optimizer = keras.optimizers.Adam(lr=0.001)
model.compile(optimizer=optimizer,loss="mse")
model.fit(x=xtrain,y=xtrain, epochs=50, batch_size=1)
When I run this code, I get the error
ValueError: No gradients provided for any variable: (['frae_13/linear_65/Variable:0', 'frae_13/linear_65/Variable:0', 'frae_13/linear_66/Variable:0', 'frae_13/linear_66/Variable:0', 'frae_13/linear_67/Variable:0', 'frae_13/linear_67/Variable:0', 'frae_13/linear_68/Variable:0', 'frae_13/linear_68/Variable:0', 'frae_13/linear_69/Variable:0', 'frae_13/linear_69/Variable:0'],). Provided grads_and_vars
is ((None, <tf.Variable 'frae_13/linear_65/Variable:0' shape=(4, 4) dtype=float32>), (None, <tf.Variable 'frae_13/linear_65/Variable:0' shape=(4,) dtype=float32>), (None, <tf.Variable 'frae_13/linear_66/Variable:0' shape=(4, 3) dtype=float32>), (None, <tf.Variable 'frae_13/linear_66/Variable:0' shape=(3,) dtype=float32>), (None, <tf.Variable 'frae_13/linear_67/Variable:0' shape=(3, 1) dtype=float32>), (None, <tf.Variable 'frae_13/linear_67/Variable:0' shape=(1,) dtype=float32>), (None, <tf.Variable 'frae_13/linear_68/Variable:0' shape=(3, 3) dtype=float32>), (None, <tf.Variable 'frae_13/linear_68/Variable:0' shape=(3,) dtype=float32>), (None, <tf.Variable 'frae_13/linear_69/Variable:0' shape=(3, 2) dtype=float32>), (None, <tf.Variable 'frae_13/linear_69/Variable:0' shape=(2,) dtype=float32>)).
Which probably stems from using the TensorArray
to store the output of the batch samples. Somehow, the gradients get lost/cannot be computed.
Does anyone know how to compute the gradients in this case?
I tried to google for common issues, but this one is rather special, so the solutions I found were not really helpful.
1条答案
按热度按时间7ajki6be1#
多亏了Alberto关于使用列表的评论,我才能做到这一点。
我是这样做的:
这当然是一个原型,所以对于随机数据,输出的反馈根本不会有好处。对于相关数据,它应该会产生一些好处。