我对ML和tensorflow非常陌生,所以我正在学习tensorflow的教程,并使用自定义训练循环复制/粘贴其中一个模型,并进行了一些修改。在cifar 10数据集上训练3个epoch后,它实现了60%的训练accc和54%的验证accc。然而,当我将它加载到另一个脚本中,试图用在线图像测试它时,logits有一个几乎相同的错误传播。测试的图像中有2个来自在线的船和飞机,看起来与其他cifar 10图像相似,而另外2个来自数据集本身,是马和船。所有产生几乎相同的logits。真正奇怪的是,logit似乎总是在第四个标签(cat)处达到峰值,无论我使用哪种模型、架构或图像。我也试过切换优化器,但没有用。我遗漏了什么(非常感谢关于问题本身或代码的任何其他反馈)?
产品型号:
import tensorflow as tf
import keras
from keras import layers
import numpy as np
import wandb
import datetime
from datetime import time
from PIL import Image
wandb.init(project='cifar10_seq', sync_tensorboard=True)
model = tf.keras.Sequential([
layers.Conv2D(64, 3, activation='relu', input_shape=(32, 32, 3)),
layers.BatchNormalization(),
layers.Conv2D(64, 3, activation='relu'),
layers.BatchNormalization(),
layers.Conv2D(64, 3, activation='relu'),
layers.BatchNormalization(),
layers.Conv2D(64, 3, activation='relu'),
layers.BatchNormalization(),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(10)
])
model.summary()
# Instantiate an optimizer.
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
# Instantiate a loss function.
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# Prepare the metrics.
train_acc_metric = keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = keras.metrics.SparseCategoricalAccuracy()
# dataset
batch_size = 64
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
# x_train = np.reshape(x_train, (-1, 784))
# x_test = np.reshape(x_test, (-1, 784))
# Reserve 10,000 samples for validation.
x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]
# Prepare the training dataset.
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)
# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(batch_size)
epochs = 3
training_on = True
if training_on:
for epoch in range(epochs):
print("Start of epoch %d" % (epoch+1,))
# Iterate over dataset in batches
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
with tf.GradientTape() as tape:
# Run the forward pass of the layer.
# The operations that the layer applies
# to its inputs are going to be recorded
# on the GradientTape.
logits = model(x_batch_train, training=True)
# Compute the loss value for this minibatch.
loss_value = loss_fn(y_batch_train, logits)
wandb.log({"loss": loss_value})
wandb.log({"train_acc": train_acc_metric.result()})
# Use the gradient tape to automatically retrieve
# the gradients of the trainable variables with respect to the loss.
grads = tape.gradient(loss_value, model.trainable_weights)
# Run one step of gradient descent by updating
# the value of the variables to minimize the loss.
optimizer.apply_gradients(zip(grads, model.trainable_weights))
# Update training metric.
train_acc_metric.update_state(y_batch_train, logits)
# Log every 200 batches.
if step % 200 == 0:
print(
"Training loss (for one batch) at step %d: %.4f"
% (step, float(loss_value))
)
print("Seen so far: %s samples" % ((step + 1) * batch_size))
# Display metrics at the end of each epoch.
train_acc = train_acc_metric.result()
print("Training acc over epoch: %.4f" % (float(train_acc),))
# Reset training metrics at the end of each epoch
train_acc_metric.reset_states()
# Run a validation loop at the end of each epoch.
for x_batch_val, y_batch_val in val_dataset:
val_logits = model(x_batch_val, training=False)
# Update val metrics
val_acc_metric.update_state(y_batch_val, val_logits)
wandb.log({"val_acc": val_acc_metric.result()})
val_acc = val_acc_metric.result()
val_acc_metric.reset_states()
print("Validation acc: %.4f" % (float(val_acc),))
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
model.save("cifar10_new.keras")
wandb.finish()
字符串
测试脚本:
from PIL import Image
import tensorflow as tf
import keras
from keras import layers
import numpy as np
def rescale_images(directory, size):
im = Image.open(directory)
im_resized = im.resize(size, Image.LANCZOS)
im_resized.save(directory)
def prepare_image(directory):
img = tf.io.read_file(directory)
tensor = tf.io.decode_image(img, channels=3, dtype=tf.dtypes.float32)
input_tensor = tf.expand_dims(tensor, axis=0)
return input_tensor
model = keras.models.load_model("cifar10_new.keras")
rescale_images("random_images/car.png", (32, 32))
prediction = model.predict(prepare_image("random_images/car.png"))
prediction = tf.nn.softmax(prediction)
print(prediction)
型
每个图像的输出(softmax):
car:tf.Tensor(1.0266284e-03 7.7120079e-05 7.0860572e-02 8.8686663e-01 1.8580483e-02 1.7540520e-02 9.1350032e-04 8.7989774e-04 3.2326749e-03 2.1997652e-05,shape=(1,10),dtype=float32)
boat:tf.Tensor(1.0634739e-03 7.8063604e-05 7.0776239e-02 8.8636857e-01 1.8690629e-02 1.7929520e-02 8.5596397e-04 8.9970016e-04 3.3151936e-03 2.2658043e-05,shape=(1,10),dtype=float32)
horse:tf.Tensor(9.7531546e-04 6.9031390e-05 6.4809047e-02 8.9467043e-01 1.7074293e-02 1.7835772e-02 7.6558901e-04 8.5785246e-04 2.9216956e-03 2.0959489e-05,shape=(1,10),dtype=float32)
飞机:tf.Tensor(1.0483327e-03 7.7148259e-05 7.0324250e-02 8.8746655e-01 1.8424451e-02 1.7627772e-02 8.5348362e-04 8.9448754e-04 3.2609249e-03 2.2576383e-05,shape=(1,10),dtype=float32)
图像是截然不同的,所以我不相信这是因为模型实际上认为这些图像中的每一个都是猫。任何帮助都是感激的。
1条答案
按热度按时间oo7oh9g91#
显然,我在解码图像时将图像转换为float32,这意味着它是从0,1而不是我的模型训练的0,255缩放的。当我将其转换为uint8时,模型工作了。