我正在修改“Timeseries classification with a Transformer model”找到的here,数据集有41671行和43列。下面是我使用的代码:
# Model 5: Time-Series Transformer for Classification
import tensorflow as tf
from tensorflow import keras
from keras import layers
import numpy as np
tickers = ['AAPL', 'GOOG', 'MSFT', 'INTC', 'AMZN']
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
x = layers.LayerNormalization(epsilon=1e-6)(inputs)
x = layers.MultiHeadAttention(
key_dim=head_size, num_heads=num_heads, dropout=dropout
)(x, x)
x = layers.Dropout(dropout)(x)
res = x + inputs
# Feed Forward Part
x = layers.LayerNormalization(epsilon=1e-6)(res)
x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
x = layers.Dropout(dropout)(x)
x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
return x + res
def build_model(
input_shape,
head_size,
num_heads,
ff_dim,
num_transformer_blocks,
mlp_units,
dropout=0,
mlp_dropout=0,
):
inputs = keras.Input(shape=input_shape)
print(inputs.shape)
x = inputs
for _ in range(num_transformer_blocks):
x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
print(x.shape)
x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
print(x.shape)
for dim in mlp_units:
x = layers.Dense(dim, activation="relu")(x)
print(x.shape)
x = layers.Dropout(mlp_dropout)(x)
print(x.shape)
outputs = layers.Dense(1, activation="softmax")(x)
print(outputs.shape)
return keras.Model(inputs, outputs)
# Model Training
for i in range(len(tickers)):
train_tickers = tickers.copy()
train_tickers.pop(i)
print(train_tickers)
df_train = pd.DataFrame()
for train_ticker in train_tickers:
df = pd.read_csv(f"Spoofing-Injected DataFrames/{train_ticker}_segmentsummary_spoofed_bidside_{FACTOR}_{INTERVAL_START}_{INTERVAL_END}.csv", index_col=0)
df_train = pd.concat([df_train, df], axis=0)
X_train = df_train.drop("Classification", axis=1)
y_train = df_train["Classification"]
df_valid = pd.read_csv(f"Spoofing-Injected DataFrames/{tickers[i]}_segmentsummary_spoofed_bidside_{FACTOR}_{INTERVAL_START}_{INTERVAL_END}.csv", index_col=0)
X_valid = df_valid.drop("Classification", axis=1)
y_valid = df_valid["Classification"]
# batch_size = None
model5 = build_model(
input_shape=X_train.shape,
head_size=256,
num_heads=4,
ff_dim=4,
num_transformer_blocks=4,
mlp_units=[128],
mlp_dropout=0.4,
dropout=0.25
)
model5.compile(
loss='sparse_categorical_crossentropy',
optimizer=keras.optimizers.Adam(learning_rate=1e-4),
metrics=['sparse_categorical_accuracy']
)
model5.summary()
callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]
history = model5.fit(
X_train,
y_train,
validation_split=0.2,
epochs=200,
batch_size=64,
callbacks=callbacks,
verbose=0
)
model.evaluate(X_valid, y_valid, verbose=1)
history_df = pd.DataFrame(history.history)
# Start the plot at epoch 5
history_df.loc[5:, ['loss', 'val_loss']].plot()
history_df.loc[5:, ['binary_accuracy', 'val_binary_accuracy']].plot()
print(("Best Validation Loss: {:0.4f}" +\
"\nBest Validation Accuracy: {:0.4f}")\
.format(history_df['val_loss'].min(),
history_df['val_binary_accuracy'].max()))
字符串
但我收到了标题中的错误。它正在检测输入形状(None,43),而实际上应该是(None,41671,43)。我怀疑它与Conv1D层有关,但我找不到它在这些层之后指定输入形状的位置。
如何确保输入形状保持一致,同时允许行数变化?(我正在与我的数据进行交叉验证)
我尝试将数据集更改为TensorFlow数据集而不是Pandas数据集,但当我这样做时,它根本没有检测到输入的形状。
下面是我的模型总结
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 41671, 43)] 0 []
layer_normalization (LayerNorm (None, 41671, 43) 86 ['input_1[0][0]']
alization)
multi_head_attention (MultiHea (None, 41671, 43) 179243 ['layer_normalization[0][0]',
dAttention) 'layer_normalization[0][0]']
dropout (Dropout) (None, 41671, 43) 0 ['multi_head_attention[0][0]']
tf.__operators__.add (TFOpLamb (None, 41671, 43) 0 ['dropout[0][0]',
da) 'input_1[0][0]']
layer_normalization_1 (LayerNo (None, 41671, 43) 86 ['tf.__operators__.add[0][0]']
rmalization)
conv1d (Conv1D) (None, 41671, 4) 176 ['layer_normalization_1[0][0]']
dropout_1 (Dropout) (None, 41671, 4) 0 ['conv1d[0][0]']
conv1d_1 (Conv1D) (None, 41671, 43) 215 ['dropout_1[0][0]']
tf.__operators__.add_1 (TFOpLa (None, 41671, 43) 0 ['conv1d_1[0][0]',
mbda) 'tf.__operators__.add[0][0]']
layer_normalization_2 (LayerNo (None, 41671, 43) 86 ['tf.__operators__.add_1[0][0]']
rmalization)
multi_head_attention_1 (MultiH (None, 41671, 43) 179243 ['layer_normalization_2[0][0]',
eadAttention) 'layer_normalization_2[0][0]']
dropout_2 (Dropout) (None, 41671, 43) 0 ['multi_head_attention_1[0][0]']
tf.__operators__.add_2 (TFOpLa (None, 41671, 43) 0 ['dropout_2[0][0]',
mbda) 'tf.__operators__.add_1[0][0]']
layer_normalization_3 (LayerNo (None, 41671, 43) 86 ['tf.__operators__.add_2[0][0]']
rmalization)
conv1d_2 (Conv1D) (None, 41671, 4) 176 ['layer_normalization_3[0][0]']
dropout_3 (Dropout) (None, 41671, 4) 0 ['conv1d_2[0][0]']
conv1d_3 (Conv1D) (None, 41671, 43) 215 ['dropout_3[0][0]']
tf.__operators__.add_3 (TFOpLa (None, 41671, 43) 0 ['conv1d_3[0][0]',
mbda) 'tf.__operators__.add_2[0][0]']
layer_normalization_4 (LayerNo (None, 41671, 43) 86 ['tf.__operators__.add_3[0][0]']
rmalization)
multi_head_attention_2 (MultiH (None, 41671, 43) 179243 ['layer_normalization_4[0][0]',
eadAttention) 'layer_normalization_4[0][0]']
dropout_4 (Dropout) (None, 41671, 43) 0 ['multi_head_attention_2[0][0]']
tf.__operators__.add_4 (TFOpLa (None, 41671, 43) 0 ['dropout_4[0][0]',
mbda) 'tf.__operators__.add_3[0][0]']
layer_normalization_5 (LayerNo (None, 41671, 43) 86 ['tf.__operators__.add_4[0][0]']
rmalization)
conv1d_4 (Conv1D) (None, 41671, 4) 176 ['layer_normalization_5[0][0]']
dropout_5 (Dropout) (None, 41671, 4) 0 ['conv1d_4[0][0]']
conv1d_5 (Conv1D) (None, 41671, 43) 215 ['dropout_5[0][0]']
tf.__operators__.add_5 (TFOpLa (None, 41671, 43) 0 ['conv1d_5[0][0]',
mbda) 'tf.__operators__.add_4[0][0]']
layer_normalization_6 (LayerNo (None, 41671, 43) 86 ['tf.__operators__.add_5[0][0]']
rmalization)
multi_head_attention_3 (MultiH (None, 41671, 43) 179243 ['layer_normalization_6[0][0]',
eadAttention) 'layer_normalization_6[0][0]']
dropout_6 (Dropout) (None, 41671, 43) 0 ['multi_head_attention_3[0][0]']
tf.__operators__.add_6 (TFOpLa (None, 41671, 43) 0 ['dropout_6[0][0]',
mbda) 'tf.__operators__.add_5[0][0]']
layer_normalization_7 (LayerNo (None, 41671, 43) 86 ['tf.__operators__.add_6[0][0]']
rmalization)
conv1d_6 (Conv1D) (None, 41671, 4) 176 ['layer_normalization_7[0][0]']
dropout_7 (Dropout) (None, 41671, 4) 0 ['conv1d_6[0][0]']
conv1d_7 (Conv1D) (None, 41671, 43) 215 ['dropout_7[0][0]']
tf.__operators__.add_7 (TFOpLa (None, 41671, 43) 0 ['conv1d_7[0][0]',
mbda) 'tf.__operators__.add_6[0][0]']
global_average_pooling1d (Glob (None, 41671) 0 ['tf.__operators__.add_7[0][0]']
alAveragePooling1D)
dense (Dense) (None, 128) 5334016 ['global_average_pooling1d[0][0]'
]
dropout_8 (Dropout) (None, 128) 0 ['dense[0][0]']
dense_1 (Dense) (None, 1) 129 ['dropout_8[0][0]']
==================================================================================================
Total params: 6,053,369
Trainable params: 6,053,369
Non-trainable params: 0
型
我的训练数据形状是(41671,43),标签形状是(41671,1)。
1条答案
按热度按时间hk8txs481#
Conv1D接受
3+D tensor
中的输入形状,形状为:(batch_shape + (steps, input_dim)
),但您的训练数据集具有2D形状(41671,43)。因此,您需要扩展训练数据集的维度,使其与Conv1D层兼容。你可以使用下面的代码来做到这一点:
字符串
输出量:
型
并将
input_shape
提供给模型,如下所示型