keras ValueError:层“model”的输入0与层不兼容:预期形状=(None,41671,43),找到的形状=(None,43)

ux6nzvsh  于 2023-08-06  发布在  其他
关注(0)|答案(1)|浏览(124)

我正在修改“Timeseries classification with a Transformer model”找到的here,数据集有41671行和43列。下面是我使用的代码:

# Model 5: Time-Series Transformer for Classification
import tensorflow as tf
from tensorflow import keras
from keras import layers
import numpy as np

tickers = ['AAPL', 'GOOG', 'MSFT', 'INTC', 'AMZN']

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    inputs = keras.Input(shape=input_shape)
    print(inputs.shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
        print(x.shape)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    print(x.shape)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        print(x.shape)
        x = layers.Dropout(mlp_dropout)(x)
        print(x.shape)
    outputs = layers.Dense(1, activation="softmax")(x)
    print(outputs.shape)
    return keras.Model(inputs, outputs)

# Model Training
for i in range(len(tickers)):
    train_tickers = tickers.copy()
    train_tickers.pop(i)
    print(train_tickers)
    df_train = pd.DataFrame()
    for train_ticker in train_tickers:
        df = pd.read_csv(f"Spoofing-Injected DataFrames/{train_ticker}_segmentsummary_spoofed_bidside_{FACTOR}_{INTERVAL_START}_{INTERVAL_END}.csv", index_col=0)
        df_train = pd.concat([df_train, df], axis=0)
    X_train = df_train.drop("Classification", axis=1)
    y_train = df_train["Classification"]
    df_valid = pd.read_csv(f"Spoofing-Injected DataFrames/{tickers[i]}_segmentsummary_spoofed_bidside_{FACTOR}_{INTERVAL_START}_{INTERVAL_END}.csv", index_col=0)
    X_valid = df_valid.drop("Classification", axis=1)
    y_valid = df_valid["Classification"]
    # batch_size = None
    model5 = build_model(
        input_shape=X_train.shape,
        head_size=256,
        num_heads=4,
        ff_dim=4,
        num_transformer_blocks=4,
        mlp_units=[128],
        mlp_dropout=0.4,
        dropout=0.25
    )

    model5.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=keras.optimizers.Adam(learning_rate=1e-4),
        metrics=['sparse_categorical_accuracy']
    )

    model5.summary()

    callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]
    history = model5.fit(
        X_train,
        y_train,
        validation_split=0.2,
        epochs=200,
        batch_size=64,
        callbacks=callbacks,
        verbose=0
    )

    model.evaluate(X_valid, y_valid, verbose=1)
    history_df = pd.DataFrame(history.history)
    # Start the plot at epoch 5
    history_df.loc[5:, ['loss', 'val_loss']].plot()
    history_df.loc[5:, ['binary_accuracy', 'val_binary_accuracy']].plot()

    print(("Best Validation Loss: {:0.4f}" +\
        "\nBest Validation Accuracy: {:0.4f}")\
        .format(history_df['val_loss'].min(), 
                history_df['val_binary_accuracy'].max()))

字符串
但我收到了标题中的错误。它正在检测输入形状(None,43),而实际上应该是(None,41671,43)。我怀疑它与Conv1D层有关,但我找不到它在这些层之后指定输入形状的位置。
如何确保输入形状保持一致,同时允许行数变化?(我正在与我的数据进行交叉验证)
我尝试将数据集更改为TensorFlow数据集而不是Pandas数据集,但当我这样做时,它根本没有检测到输入的形状。
下面是我的模型总结

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
==================================================================================================
 input_1 (InputLayer)           [(None, 41671, 43)]  0           []                               
                                                                                                  
 layer_normalization (LayerNorm  (None, 41671, 43)   86          ['input_1[0][0]']                
 alization)                                                                                       
                                                                                                  
 multi_head_attention (MultiHea  (None, 41671, 43)   179243      ['layer_normalization[0][0]',    
 dAttention)                                                      'layer_normalization[0][0]']    
                                                                                                  
 dropout (Dropout)              (None, 41671, 43)    0           ['multi_head_attention[0][0]']   
                                                                                                  
 tf.__operators__.add (TFOpLamb  (None, 41671, 43)   0           ['dropout[0][0]',                
 da)                                                              'input_1[0][0]']                
                                                                                                  
 layer_normalization_1 (LayerNo  (None, 41671, 43)   86          ['tf.__operators__.add[0][0]']   
 rmalization)                                                                                     
                                                                                                  
 conv1d (Conv1D)                (None, 41671, 4)     176         ['layer_normalization_1[0][0]']  
                                                                                                  
 dropout_1 (Dropout)            (None, 41671, 4)     0           ['conv1d[0][0]']                 
                                                                                                  
 conv1d_1 (Conv1D)              (None, 41671, 43)    215         ['dropout_1[0][0]']              
                                                                                                  
 tf.__operators__.add_1 (TFOpLa  (None, 41671, 43)   0           ['conv1d_1[0][0]',               
 mbda)                                                            'tf.__operators__.add[0][0]']   
                                                                                                  
 layer_normalization_2 (LayerNo  (None, 41671, 43)   86          ['tf.__operators__.add_1[0][0]'] 
 rmalization)                                                                                     
                                                                                                  
 multi_head_attention_1 (MultiH  (None, 41671, 43)   179243      ['layer_normalization_2[0][0]',  
 eadAttention)                                                    'layer_normalization_2[0][0]']  
                                                                                                  
 dropout_2 (Dropout)            (None, 41671, 43)    0           ['multi_head_attention_1[0][0]'] 
                                                                                                  
 tf.__operators__.add_2 (TFOpLa  (None, 41671, 43)   0           ['dropout_2[0][0]',              
 mbda)                                                            'tf.__operators__.add_1[0][0]'] 
                                                                                                  
 layer_normalization_3 (LayerNo  (None, 41671, 43)   86          ['tf.__operators__.add_2[0][0]'] 
 rmalization)                                                                                     
                                                                                                  
 conv1d_2 (Conv1D)              (None, 41671, 4)     176         ['layer_normalization_3[0][0]']  
                                                                                                  
 dropout_3 (Dropout)            (None, 41671, 4)     0           ['conv1d_2[0][0]']               
                                                                                                  
 conv1d_3 (Conv1D)              (None, 41671, 43)    215         ['dropout_3[0][0]']              
                                                                                                  
 tf.__operators__.add_3 (TFOpLa  (None, 41671, 43)   0           ['conv1d_3[0][0]',               
 mbda)                                                            'tf.__operators__.add_2[0][0]'] 
                                                                                                  
 layer_normalization_4 (LayerNo  (None, 41671, 43)   86          ['tf.__operators__.add_3[0][0]'] 
 rmalization)                                                                                     
                                                                                                  
 multi_head_attention_2 (MultiH  (None, 41671, 43)   179243      ['layer_normalization_4[0][0]',  
 eadAttention)                                                    'layer_normalization_4[0][0]']  
                                                                                                  
 dropout_4 (Dropout)            (None, 41671, 43)    0           ['multi_head_attention_2[0][0]'] 
                                                                                                  
 tf.__operators__.add_4 (TFOpLa  (None, 41671, 43)   0           ['dropout_4[0][0]',              
 mbda)                                                            'tf.__operators__.add_3[0][0]'] 
                                                                                                  
 layer_normalization_5 (LayerNo  (None, 41671, 43)   86          ['tf.__operators__.add_4[0][0]'] 
 rmalization)                                                                                     
                                                                                                  
 conv1d_4 (Conv1D)              (None, 41671, 4)     176         ['layer_normalization_5[0][0]']  
                                                                                                  
 dropout_5 (Dropout)            (None, 41671, 4)     0           ['conv1d_4[0][0]']               
                                                                                                  
 conv1d_5 (Conv1D)              (None, 41671, 43)    215         ['dropout_5[0][0]']              
                                                                                                  
 tf.__operators__.add_5 (TFOpLa  (None, 41671, 43)   0           ['conv1d_5[0][0]',               
 mbda)                                                            'tf.__operators__.add_4[0][0]'] 
                                                                                                  
 layer_normalization_6 (LayerNo  (None, 41671, 43)   86          ['tf.__operators__.add_5[0][0]'] 
 rmalization)                                                                                     
                                                                                                  
 multi_head_attention_3 (MultiH  (None, 41671, 43)   179243      ['layer_normalization_6[0][0]',  
 eadAttention)                                                    'layer_normalization_6[0][0]']  
                                                                                                  
 dropout_6 (Dropout)            (None, 41671, 43)    0           ['multi_head_attention_3[0][0]'] 
                                                                                                  
 tf.__operators__.add_6 (TFOpLa  (None, 41671, 43)   0           ['dropout_6[0][0]',              
 mbda)                                                            'tf.__operators__.add_5[0][0]'] 
                                                                                                  
 layer_normalization_7 (LayerNo  (None, 41671, 43)   86          ['tf.__operators__.add_6[0][0]'] 
 rmalization)                                                                                     
                                                                                                  
 conv1d_6 (Conv1D)              (None, 41671, 4)     176         ['layer_normalization_7[0][0]']  
                                                                                                  
 dropout_7 (Dropout)            (None, 41671, 4)     0           ['conv1d_6[0][0]']               
                                                                                                  
 conv1d_7 (Conv1D)              (None, 41671, 43)    215         ['dropout_7[0][0]']              
                                                                                                  
 tf.__operators__.add_7 (TFOpLa  (None, 41671, 43)   0           ['conv1d_7[0][0]',               
 mbda)                                                            'tf.__operators__.add_6[0][0]'] 
                                                                                                  
 global_average_pooling1d (Glob  (None, 41671)       0           ['tf.__operators__.add_7[0][0]'] 
 alAveragePooling1D)                                                                              
                                                                                                  
 dense (Dense)                  (None, 128)          5334016     ['global_average_pooling1d[0][0]'
                                                                 ]                                
                                                                                                  
 dropout_8 (Dropout)            (None, 128)          0           ['dense[0][0]']                  
                                                                                                  
 dense_1 (Dense)                (None, 1)            129         ['dropout_8[0][0]']              
                                                                                                  
==================================================================================================
Total params: 6,053,369
Trainable params: 6,053,369
Non-trainable params: 0


我的训练数据形状是(41671,43),标签形状是(41671,1)。

hk8txs48

hk8txs481#

Conv1D接受3+D tensor中的输入形状,形状为:(batch_shape + (steps, input_dim)),但您的训练数据集具有2D形状(41671,43)。因此,您需要扩展训练数据集的维度,使其与Conv1D层兼容。
你可以使用下面的代码来做到这一点:

import numpy as np
#X_train = np.random.random_sample(size=(41671, 43))
X_train.shape #Output : (41671, 43)
X_train = np.expand_dims(X_train, axis=0) #or X_train = np.expand_dims(X_train, axis=-1)
X_train.shape

字符串
输出量:

(1, 41671, 43)


并将input_shape提供给模型,如下所示

input_shape=X_train.shape[1:]

相关问题