我有一个用于情感识别的多模态模型,并对6类情感进行分类模型的结构是每个模态(音频,文本)的两个lstm,我想获得这种模型的混淆矩阵,classification_report和precision_recall_fscore_suppor,这是代码
if self.modality == "bimodal_":
self.train_x_text = self.train_x[:,:,:50].astype('int64')
self.train_x_audio = self.train_x[:,:,50:]
self.val_x_text = self.val_x[:,:,:50].astype('int64')
self.val_x_audio = self.val_x[:,:,50:]
self.test_x_text = self.test_x[:,:,:50].astype('int64')
self.test_x_audio = self.test_x[:,:,50:]
def get_bimodal_model_(self):
# Modality specific hyperparameters
self.epochs = 90
self.batch_size = 80
# text modality
scaler = MinMaxScaler()
min_vals = np.min(self.train_x_text)
max_vals = np.max(self.train_x_text)
self.train_x_text = (self.train_x_text - min_vals) / (max_vals - min_vals)
self.sentence_length = self.train_x_text.shape[2] # 50
text_input = Input(shape=(self.sequence_length, self.sentence_length), dtype='float32')
cnn_outputs = self.cnn_text_feature_extractor(text_input) # return (33, 100)
lstm_text = Bidirectional(LSTM(500, return_sequences=True,dropout=0.7, recurrent_dropout=0.8))(cnn_outputs)
lstm_text= Dense(128, activation='relu')(lstm_text )
lstm_text = BatchNormalization()(lstm_text)
# audio modality
min_vals_a = np.min(self.train_x_audio)
max_vals_a = np.max(self.train_x_audio)
self.train_x_audio = (self.train_x_audio - min_vals_a) / (max_vals_a - min_vals_a)
self.features_audio_dim = self.train_x_audio.shape[2] #1611
audio_input = Input(shape=(self.sequence_length, self.features_audio_dim), dtype='float32')
lstm_audio = Bidirectional(LSTM(500, return_sequences=True,dropout=0.3, recurrent_dropout=0.8))(audio_input)
lstm_audio= Dense(128, activation='relu')(lstm_audio )
lstm_audio = BatchNormalization()(lstm_audio)
concatenated_output = Concatenate()([lstm_text, lstm_audio])
concatenated_output =Bidirectional(LSTM(500, return_sequences=True,dropout=0.3, recurrent_dropout=0.8))(concatenated_output)
attention = Attention()([concatenated_output, concatenated_output])
dense_1 = Dense(128, activation='sigmoid')(attention )
dense_1= Dropout(0.1)(dense_1)
dense_2=Dropout(0.1)(dense_1)
output_layer = Dense(self.classes, activation='softmax')(dense_2)
model = Model(inputs=[text_input, audio_input], outputs=output_layer)
return model
if self.modality == "bimodal_":
self.train_x_ = [self.train_x_text, self.train_x_audio]
self.val_x_ = [self.val_x_text, self.val_x_audio]
self.test_x_ = [self.test_x_text, self.test_x_audio]
model = self.get_bimodal_model_()
model.compile(optimizer=sgd,metrics=['acc',f1_m], loss='categorical_crossentropy', sample_weight_mode='temporal')
model.fit(self.train_x_ , self.train_y, epochs=self.epochs,batch_size=self.batch_size,sample_weight=self.train_mask,shuffle=True,
validation_data=(self.val_x_, self.val_y, self.val_mask))
1条答案
按热度按时间hzbexzde1#
尝试基于阈值找到形成输出数据的多个类别输出。