我是ML和AI的新手,我试图建立一个阅读文本图像的模型,我有点迷失在这里。这是我用来建立模型的代码,但我不能。
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.models import Sequential
# Defines the path to your images folder
folder_path = r"C:\Users\cuell.DESKTOP-1DJM07S\Desktop\MLTU\train"
# Defines the characters that can appear in the images
characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
# Define the number of images to use for training
num_train_images = 85
# Define the size of the images
image_width, image_height, num_channels = 180, 40, 3
# Defines the number of characters in each image
num_characters = 6
# Defines the batch size and number of epochs to use for training
batch_size, num_epochs = 10, 1
# Load the images and their labels
def load_images(folder_path):
images, labels = [], []
for filename in os.listdir(folder_path):
if filename.endswith(".jpg"):
image_path = os.path.join(folder_path, filename)
image = tf.io.decode_png(tf.io.read_file(image_path), channels=num_channels)
label = filename.split(".")[0]
images.append(image)
labels.append(label)
return images, labels
images, labels = load_images(folder_path)
# Convert the images and labels to numpy arrays
def convert_to_arrays(images, labels):
images_array = np.array(images) / 255.0
labels_array = np.zeros((num_train_images, num_characters, len(characters)))
for i, label in enumerate(labels):
for j, char in enumerate(label):
labels_array[i, j, characters.index(char)] = 1
return images_array, labels_array
images_array, labels_array = convert_to_arrays(images, labels)
# Shuffle the images and labels
def shuffle_arrays(images_array, labels_array):
permutation = np.random.permutation(len(images_array))
return images_array[permutation], labels_array[permutation]
images_array, labels_array = shuffle_arrays(images_array, labels_array)
# Split the images and labels into training and validation sets
def split_arrays(images_array, labels_array):
num_train = int(num_train_images * 0.8)
train_images, train_labels = images_array[:num_train], labels_array[:num_train]
val_images, val_labels = images_array[num_train:], labels_array[num_train:]
return train_images, train_labels, val_images, val_labels
train_images, train_labels, val_images, val_labels = split_arrays(images_array, labels_array)
# Define the TensorFlow model
model = Sequential([
Conv2D(32, (3, 3), activation="relu", input_shape=(40, 180, 3)),
MaxPooling2D(pool_size=(2, 2)),
Conv2D(64, (3, 3), activation="relu"),
MaxPooling2D(pool_size=(2, 2)),
Flatten(),
Dense(128, activation="relu"),
Dense(num_characters * len(characters), activation="relu")
])
# Compile the TensorFlow model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
# Train the TensorFlow model
model.fit(train_images, train_labels, batch_size=batch_size, epochs=num_epochs, validation_data=(val_images, val_labels))
#model.save("text_model.h5")
当我点击www.example.com时model.fit,我得到了ValueError的答案:形状(无,6,36)和(无,216)是不兼容的,我的猜测是,我做错了什么,对分裂的20%的测试和培训,和他们的维度变化,但我仍然不确定。
我期待着我的第一个文本阅读模型运行。:)
1条答案
按热度按时间ozxc1zmp1#
你定义
拆分为批次后,标签的形状为
(batch_size, 6, 36)
。同时,模型的最后一层是
其形状为
(batch_size, 216)
。这种不匹配会导致错误。要解决这个问题,您需要重塑模型的输出。将这个额外的图层添加到您的Sequential:
编辑:也从
tensorflow.keras.layers
导入Reshape
。