我试着将我的代码转换为使用data.xml。我离得不远,但我的功能和模型输入层仍然有问题,在我使用数据之前从未见过。
我加载了很多.csv,其中有很多功能列,csv中有石楠和name字符串。
我的简单测试代码是:
import tensorflow as tf
import pandas as pd
bd_path = 'C:/Users/my doc/Python/mini_test/'
keep_columns = ['precipitation', 'temperature_min', 'temperature_max',
'snow_depth_water_equivalent_max', 'streamflow']
name_columns = pd.read_csv(bd_path + 'camels_01022500+attributs_mensuels.csv').columns
# Enable eager execution
tf.config.run_functions_eagerly(True)
# Load a single CSV file and preprocess it
def load_and_preprocess_csv(filename):
columns = name_columns
dataset = tf.data.experimental.make_csv_dataset(
file_pattern=filename,
num_parallel_reads=2,
batch_size=32,
num_epochs=1,
label_name='streamflow',
column_names=columns,
select_columns=keep_columns,
shuffle_buffer_size=10000,
header=True,
field_delim=','
)
# Apply preprocessing to the dataset
def preprocess_fn(features, label):
# Normalize the features (example: scaling to [0, 1])
features['precipitation'] /= 100.0
features['temperature_min'] /= 100.0
features['temperature_max'] /= 100.0
features['snow_depth_water_equivalent_max'] /= 100.0
# last trial I did
# Create a 'main_inputs' feature by stacking the selected columns
features['main_inputs'] = tf.stack([
features['precipitation'],
features['temperature_min'],
features['temperature_max'],
features['snow_depth_water_equivalent_max']
], axis=-1)
# here an other trial without sucess...
# Rename the columns to match the model's input layer
#features['main_inputs'] = tf.cast(features['main_inputs'], tf.float32) # Ensure the dtype is correct
#features['main_inputs'] = tf.identity(features['main_inputs'], name='main_inputs') # Rename the feature
return features, label
dataset = dataset.map(preprocess_fn)
return dataset
# Create a list of file paths matching pattern
file_paths = tf.io.gfile.glob(bd_path + '*.csv')
# Load and preprocess CSV files in parallel
building_datasets = []
for file_path in file_paths:
dataset = load_and_preprocess_csv(file_path)
building_datasets.append(dataset)
# Combine the individual datasets into a single dataset
combined_dataset = tf.data.Dataset.sample_from_datasets(building_datasets)
# Optional, further transform, shuffle, and batch the dataset as needed
# For example:
combined_dataset = combined_dataset.shuffle(buffer_size=10000)
#combined_dataset = combined_dataset.batch(64)
# model
tensor_input = tf.keras.layers.Input(shape=(4,), name='main_inputs')
xy = tf.keras.layers.Dense(10, activation='linear')(tensor_input)
xy = tf.keras.layers.Dropout(rate=0.2)(xy)
out = tf.keras.layers.Dense(1, activation='linear')(xy)
model = tf.keras.Model(inputs=tensor_input, outputs=out)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mse')
# Train the model
history = model.fit(combined_dataset, epochs=1)
我得到的警告是
... \keras\engine\functional.py:637: UserWarning: Input dict contained keys ['temperature_min', 'snow_depth_water_equivalent_max', 'temperature_max', 'precipitation'] which did not match any model input. They will be ignored by the model.
我的经验是直接传递数组到模型,是输入层必须修改还是这是我的数据集谁需要更多的修改?
1条答案
按热度按时间tvokkenx1#
在
preprocess_fn
中,请确保使用相同的字符串引用特性索引-