keras Tensorflow:如何压缩多个串联数据集?

ymdaylpp  于 2022-11-13  发布在  其他
关注(0)|答案(1)|浏览(158)

我试图为一个多输入keras模型压缩多个tf.data.Dataset对象。每个tf.data.Dataset对象都是多个 Dataframe 的串联,每个 Dataframe 都有相同的列数,但不一定有相同的行数。我可以创建完整的数据集,但当我试图在keras模型中传递数据集时,我得到一个错误:

TypeError: Inputs to a layer should be tensors. Got: <tensorflow.python.data.ops.dataset_ops._NestedVariant object

问题是,我真的很想利用tf.data.dataset的懒惰结构,因为我正在使用窗口函数,但是我很难将所有的数据集聚合在一起。
如何将多个数据集压缩在一起,以便可以将传递到model.fit()函数中?
任何帮助都将不胜感激。
下面是一个简单的函数代码,它可以重现我的问题:

import pandas as pd
import numpy as np

import tensorflow as tf

# Create dataframes with 4 features and  target

dataframe1 = pd.DataFrame(np.random.randn(1000, 5), columns=["feature1", "feature2", "feature3", "feature4", "target"])
dataframe2 = pd.DataFrame(np.random.randn(800, 5), columns=["feature1", "feature2", "feature3", "feature4", "target"])

# Convert dataframes to datasets

def get_dataset(df: pd.DataFrame, features):
    dataset = tf.data.Dataset.from_tensor_slices(df.loc[:, features].iloc[4:].to_numpy())

    return dataset

def get_dataset_windowed(df: pd.DataFrame, features):
    dataset = tf.data.Dataset.from_tensor_slices(df.loc[:, features].to_numpy()).window(5, shift=1, stride=1,
                                                                                        drop_remainder=True)
    return dataset

windowed_dataset = [get_dataset_windowed(x, ["feature3", "feature4"]) for x in [dataframe1, dataframe2]]

windowed_dataset = tf.data.Dataset.from_tensor_slices(windowed_dataset)
windowed_dataset = windowed_dataset.interleave(lambda x: x, cycle_length=1, num_parallel_calls=tf.data.AUTOTUNE)

static_dataset = [get_dataset(x, ["feature1", "feature2"]) for x in [dataframe1, dataframe2]]

static_dataset = tf.data.Dataset.from_tensor_slices(static_dataset)
static_dataset = static_dataset.interleave(lambda x: x, cycle_length=1, num_parallel_calls=tf.data.AUTOTUNE)

targets = [get_dataset(x, ["target"]) for x in [dataframe1, dataframe2]]

targets = tf.data.Dataset.from_tensor_slices(targets)
targets = targets.interleave(lambda x: x, cycle_length=1, num_parallel_calls=tf.data.AUTOTUNE)

# Zip datasets together

full_dataset = tf.data.Dataset.zip(
    (
        {
            "short_term_ts_input": windowed_dataset,
            "static_input": static_dataset,
        },
        {
             "output": targets,
        }
    )
)
full_dataset = full_dataset.shuffle(buffer_size=1024).batch(128)

# Creating, compiling and fitting model

short_term_ts_input = tf.keras.Input(shape=(5, 2), name="short_term_ts_input")
static_input = tf.keras.Input(shape=(2), name="static_input")
targets = tf.keras.Input(shape=(1,), name="output")

short_term_ts_features = tf.keras.layers.LSTM(32, return_sequences=False)(short_term_ts_input)
short_term_ts_features = tf.keras.layers.Dense(8)(short_term_ts_features)
static_features = tf.keras.layers.Dense(16)(static_input)
x_concat = tf.keras.layers.concatenate([short_term_ts_features, static_features])
x_concat = tf.keras.layers.Dense(32)(x_concat)

output = tf.keras.layers.Dense(1)(x_concat)

model = tf.keras.Model(inputs=[short_term_ts_input, static_input], outputs=[output])
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

tf.keras.utils.plot_model(model, "model_test.png", show_shapes=True)

model.fit(full_dataset)`
m3eecexj

m3eecexj1#

也许是这样的:

import pandas as pd
import numpy as np

import tensorflow as tf

# Create dataframes with 4 features and  target

dataframe1 = pd.DataFrame(np.random.randn(1000, 5), columns=["feature1", "feature2", "feature3", "feature4", "target"])
dataframe2 = pd.DataFrame(np.random.randn(800, 5), columns=["feature1", "feature2", "feature3", "feature4", "target"])

# Convert dataframes to datasets

def get_dataset(df: pd.DataFrame, features):
    dataset = tf.data.Dataset.from_tensor_slices(df.loc[:, features].iloc[4:].to_numpy())

    return dataset

def get_dataset_windowed(df: pd.DataFrame, features):
    dataset = tf.data.Dataset.from_tensor_slices(df.loc[:, features].to_numpy()).window(5, shift=1, stride=1,
                                                                                        drop_remainder=True)
    return dataset

windowed_dataset = [get_dataset_windowed(x, ["feature3", "feature4"]) for x in [dataframe1, dataframe2]]

windowed_dataset = tf.data.Dataset.from_tensor_slices(windowed_dataset)
windowed_dataset = windowed_dataset.interleave(lambda x: x, cycle_length=1, num_parallel_calls=tf.data.AUTOTUNE).flat_map(lambda z: z.batch(5))

static_dataset = [get_dataset(x, ["feature1", "feature2"]) for x in [dataframe1, dataframe2]]

static_dataset = tf.data.Dataset.from_tensor_slices(static_dataset)
static_dataset = static_dataset.interleave(lambda x: x, cycle_length=1, num_parallel_calls=tf.data.AUTOTUNE)

targets = [get_dataset(x, ["target"]) for x in [dataframe1, dataframe2]]

targets = tf.data.Dataset.from_tensor_slices(targets)
targets = targets.interleave(lambda x: x, cycle_length=1, num_parallel_calls=tf.data.AUTOTUNE)

# Zip datasets together

full_dataset = tf.data.Dataset.zip(
    (
       {
            "short_term_ts_input": windowed_dataset,
            "static_input": static_dataset,
        },
      
    )
)
full_dataset = tf.data.Dataset.zip((full_dataset, targets))
full_dataset = full_dataset.shuffle(buffer_size=1024).batch(128)

# Creating, compiling and fitting model

short_term_ts_input = tf.keras.Input(shape=(5, 2), name="short_term_ts_input")
static_input = tf.keras.Input(shape=(2), name="static_input")

short_term_ts_features = tf.keras.layers.LSTM(32, return_sequences=False)(short_term_ts_input)
short_term_ts_features = tf.keras.layers.Dense(8)(short_term_ts_features)
static_features = tf.keras.layers.Dense(16)(static_input)
x_concat = tf.keras.layers.concatenate([short_term_ts_features, static_features])
x_concat = tf.keras.layers.Dense(32)(x_concat)

output = tf.keras.layers.Dense(1)(x_concat)

model = tf.keras.Model(inputs=[short_term_ts_input, static_input], outputs=[output])
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')

tf.keras.utils.plot_model(model, "model_test.png", show_shapes=True)

model.fit(full_dataset)

相关问题