我在我的项目中设置了定制培训和测试功能,因此我可以对培训过程进行详细定制。我使用k-折叠交叉验证来评估我的模型。无论出于何种原因,模型在第一次折叠时正确训练,然后在第二次折叠时抛出此错误。 tensorflow.python.framework.errors_impl.FailedPreconditionError: Could not find variable _AnonymousVar13. This could mean that the variable has been deleted. In TF1, it can also mean the variable is uninitialized. Debug info: container=localhost, status=Not found: Resource localhost/_AnonymousVar13/N10tensorflow3VarE does not exist. [[node test_model/dense_2/Tensordot/ReadVariableOp (defined at training_example.py:33) ]] [Op:__inference__train_step_1082]
我不知道发生了什么事。我认为错误是由于初始化不好引起的,所以我 model.build()
使用输入形状。我也尝试过用空白Tensor初始化图的权重,但没有成功。我还在最后一行重置了后端,以防与名称冲突,但这并不能解决问题。
import numpy as np
import sklearn.model_selection
import tensorflow as tf
from tensorflow.python.keras.metrics import Mean, Precision, Recall
from tensorflow.python.keras.optimizer_v2.adam import Adam
n_splits = 5
batch_size = 16
n_epochs = 2
loss_function = tf.keras.losses.BinaryCrossentropy()
optimiser_fn = Adam
metrics = [
Mean(name='loss'),
Precision(name='prec'),
Recall(name='recall'),
]
learning_rate = 1e-2
dense_outputs = [10,10]
activation = 'relu'
class TestModel(tf.keras.Model):
def __init__(self):
super().__init__()
self._dense_ops = [tf.keras.layers.Dense(o) for o in dense_outputs]
self._output = tf.keras.layers.Dense(1)
def call(self, inputs):
hidden = inputs
for l in self._dense_ops:
hidden = l(hidden)
return self._output(hidden)
def _load_fold_sets_for_training(fold, fold_idcs, features, labels, batch_size):
# Get the indices for the sets.
train_idcs, validation_idcs, _ = fold_idcs[fold]
# Get the training data and labels.
training_data = features[train_idcs]
training_labels = labels[train_idcs]
# Load the training, validation and testing sets.
training_set = tf.data.Dataset.from_tensor_slices(
(training_data, training_labels)
)
training_set = training_set.batch(batch_size, drop_remainder=False)
validation_set = tf.data.Dataset.from_tensor_slices(
(features[validation_idcs], labels[validation_idcs])
)
validation_set = validation_set.batch(batch_size, drop_remainder=False)
return training_set, validation_set
@tf.function
def _train_step(batch_samples, batch_labels):
batch_predictions = model(batch_samples, training=True)
loss = loss_function(batch_predictions, batch_labels)
gradients = tf.gradients(loss, model.trainable_variables)
optimiser.apply_gradients(
zip(gradients, model.trainable_variables)
)
batch_predictions = tf.sigmoid(batch_predictions)
metrics[0].update_state(loss)
[m.update_state(batch_labels, batch_predictions) for m in metrics[1:]]
@tf.function
def _inference_step(batch_samples, batch_labels):
batch_predictions = model(batch_samples, training=False)
loss = loss_function(batch_predictions, batch_labels)
batch_predictions = tf.sigmoid(batch_predictions)
metrics[0].update_state(loss)
[m.update_state(batch_labels, batch_predictions) for m in metrics[1:]]
# Generate dataset.
features = np.random.rand(15,1440,1)
labels = np.random.rand(15,1440)
# Set up splits.
kfold = sklearn.model_selection.KFold(n_splits=n_splits, shuffle=True)
splits = []
for train_idcs, test_idcs in kfold.split(features):
train_idcs, val_idcs = sklearn.model_selection.train_test_split(train_idcs)
splits += [[train_idcs, val_idcs, test_idcs]]
fold = 0
while fold < n_splits:
# Load datasets for fold.
training_set, validation_set = _load_fold_sets_for_training(fold, splits, features, labels, batch_size)
# Load model.
model = TestModel()
# Build model.
model.build((1440, 1))
# Initialise Adam optimiser.
optimiser = optimiser_fn(learning_rate)
epoch = 0
while epoch < n_epochs:
epoch += 1
# Training.
for batch_features, batch_labels in training_set: _train_step(batch_features, batch_labels)
print(f'fold {fold}: epoch {epoch}:', ' '.join(f'train_{m.name}: {m.result():0.05f}' for m in metrics))
# Validation.
for batch_features, batch_labels in validation_set: _inference_step(batch_features, batch_labels)
print(f'fold {fold}: epoch {epoch}:', ' '.join(f'val_{m.name}: {m.result():0.05f}' for m in metrics))
tf.keras.backend.clear_session()
fold += 1
有什么想法吗?
1条答案
按热度按时间sshcrbum1#
问题是该项目的位置
_train_step
及_inference_step
. 如果在每次折叠迭代时重新定义这两个函数,则误差消失,模型训练。我不知道为什么每一步都要重新定义它们。