keras 将图像的原始训练数据集与增强的训练数据集连接

oyt4ldly  于 2023-04-06  发布在  其他
关注(0)|答案(1)|浏览(152)

我已经写了下面的代码来加载训练和测试数据,我已经增强了训练数据集,但是我想把原始训练数据集和增强的训练数据集连接起来。我怎么做?

from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
        rescale=1./255, 
        rotation_range=5,
        zoom_range = 0.1,
        width_shift_range=0.1,
        height_shift_range=0.1,
        validation_split=0.2
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_dir = 'train_separated'
test_dir = 'test_separated'
batch_size = 128
img_height = 100
img_width = 100
num_classes = 10

# load train and test data
train_data = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical', 
    subset='training')

# after that I have train_data that was augmented, but how to concatenete new augmented data with original train data?

val_data = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical', 
    subset='validation')

test_data = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical')

我希望我的训练数据将包含增强的训练数据和原始数据。

j8ag8udp

j8ag8udp1#

我发现了一个方法。在这里我给你举一个例子:

import tensorflow as tf
train_dir = "images/"
img_height = 32
img_width = 32
batch_size = 16

#build the generators
train_data = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)

train_generator = train_data.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size, 
    class_mode='binary',
    shuffle=True
)

aug_train_data = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255, 
    rotation_range=5,
    zoom_range = 0.1,
    width_shift_range=0.1,
    height_shift_range=0.1
)

aug_train_generator = aug_train_data.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    shuffle=True
)

#now let's combine these
train_ds = tf.data.Dataset.from_generator(
    lambda: train_generator,
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, img_height, img_width, 3], [None,])  #here are the shapes
)

aug_train_ds = tf.data.Dataset.from_generator(
    lambda: aug_train_generator,
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, img_height, img_width, 3], [None,])
)

# concatenate the two datasets
train_ds = train_ds.concatenate(aug_train_ds)

# shuffle
train_ds = train_ds.shuffle(buffer_size=5)

#classification example
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, GlobalMaxPool2D

model = Sequential([
    Conv2D(4, (3,3), activation='relu', input_shape=(img_height, img_width,3)),
    MaxPooling2D((2,2)),
    Conv2D(8, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    Conv2D(16, (3,3), activation='relu'),
    GlobalMaxPool2D(),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

steps_per_epoch = len(train_generator) + len(aug_train_generator) #this is mandatory, otherwise it will keep looping

model.fit(train_ds, steps_per_epoch=steps_per_epoch, epochs=5)

相关问题