我构建了一个Pytorch模型来执行一些命名实体识别任务。它在40个epoch后的验证准确率约为90%,在测试数据上约为87%。然而,当我保存模型并重新加载时,准确率几乎为零。
模型定义如下:
class NERModel(nn.Module):
def __init__(self):
super(NERModel,self).__init__()
self.base_count = 128
self.relu = nn.ReLU()
self.sigmoid = nn.Sigmoid()
self.BatchNorm = nn.BatchNorm1d(num_features=100,)#100 is just a placeholder which will be updated
self.SpanEmbed = nn.Embedding(num_embeddings=len(char2idx),embedding_dim=EMBEDDING_DIM,_weight = torch.from_numpy(embed_matrix)).type(torch.float)#,_freeze = True
self.SentEmbed = nn.Embedding(num_embeddings=len(char2idx),embedding_dim=EMBEDDING_DIM,_weight = torch.from_numpy(embed_matrix)).type(torch.float)#,_freeze = True
self.Span_Layer_Stack_PreS = nn.Sequential(
#nn.Embedding(num_embeddings=len(char2idx),embedding_dim=EMBEDDING_DIM,_weight = torch.from_numpy(embed_matrix),_freeze = True).type(torch.float),
self.SpanEmbed,
nn.LSTM(input_size=EMBEDDING_DIM,hidden_size=1800,bidirectional=True,batch_first=True).type(torch.float)
)
self.Span_Layer_Stack_PostS = nn.Sequential(
# nn.BatchNorm1d(num_features=MAX_SPAN_LEN),#100 is just a placeholder which will be updated
nn.Dropout(0.5),
nn.Linear(in_features = 3600,out_features = 1920),
nn.ReLU(),
nn.Linear(in_features = 1920,out_features = 640),
nn.ReLU(),
nn.Linear(in_features = 640,out_features = 1280)
)
self.Sent_Layer_Stack_PreS = nn.Sequential(
# nn.Embedding(num_embeddings=len(char2idx),embedding_dim=EMBEDDING_DIM,_weight = torch.from_numpy(embed_matrix),_freeze = True).type(torch.float),
self.SentEmbed,
nn.LSTM(input_size=EMBEDDING_DIM,hidden_size=1800,bidirectional=True,batch_first = True).type(torch.float)
)
self.Sent_Layer_Stack_PostS = nn.Sequential(
nn.BatchNorm1d(num_features=3600),#100 is just a placeholder which will be updated
nn.Dropout(0.5),
nn.Linear(in_features = 3600,out_features = 1920),
nn.ReLU(),
nn.Linear(in_features = 1920,out_features = 640),
nn.ReLU(),
nn.Linear(in_features = 640,out_features = 1280)
)
self.Comb_Layer_Stack = nn.Sequential(
nn.Linear(in_features = 2560,out_features = 2048),
nn.ReLU(),
nn.Linear(in_features = 2048,out_features = 1024),
nn.ReLU(),
nn.Linear(in_features = 1024,out_features = 512),
nn.ReLU(),
nn.Linear(in_features = 512,out_features = 512),
nn.ReLU(),
nn.Linear(in_features = 512,out_features = 256),
nn.ReLU(),
nn.Linear(in_features = 256,out_features = 128),
nn.ReLU(),
nn.Linear(in_features = 128,out_features = len(label_idx)),
nn.Sigmoid()
)
def forward(self,x:torch.Tensor) -> torch.Tensor:
span_input,sent_input = torch.split(x,[int(x.shape[1]/2),int(x.shape[1]/2)],dim = 1)
span_out_tot,(span_out,span_c_state_out) = self.Span_Layer_Stack_PreS(span_input.type(torch.long))
sent_out_tot,(sent_out,sent_c_state_out) = self.Sent_Layer_Stack_PreS(sent_input.type(torch.long))
#stack the layers
span_out = torch.hstack([span_out[0],span_out[1]])
sent_out = torch.hstack([sent_out[0],sent_out[1]])
#run the stacked layers through the post stack layers
span_out = self.Span_Layer_Stack_PostS(span_out.type(torch.float))
sent_out = self.Sent_Layer_Stack_PostS(sent_out.type(torch.float))
#combine the layers
comb_layers = torch.hstack([span_out,sent_out])
span_label = self.Comb_Layer_Stack(comb_layers)
return(span_label)
字符串
我的训练代码(也包含在检查点保存模型的代码)如下:
def train_model(model:NERModel,num_epochs = 10,lrate = 0.005, save_freq = 2):
torch.manual_seed(42)
# m_accuracy = Accuracy(task="multiclass", num_classes=len(label_idx)).to(device)
loss_fn = nn.BCELoss()
opt = torch.optim.Adam(params=[param for param in model.parameters() if param.requires_grad == True],lr=lrate)
epochs = num_epochs
scheduler = lr_scheduler.ReduceLROnPlateau(opt,mode='min',factor=0.8,patience=2,verbose=True)
mb = master_bar(range(epochs))
for epoch in mb:#range(epochs):#tqdm(range(epochs),total = epochs):
train_loss = 0
train_acc = 0
model.train()
print(f'Epoch {epoch + 1}')
total_data_processed = 0
start_time = timer.perf_counter()
for batch, (X,y) in progress_bar(enumerate(train_dataloader),total = len(train_dataloader),parent=mb):#enumerate(train_dataloader):#tqdm(enumerate(train_dataloader),total = len(train_dataloader)):
y_pred = model(X).to(device)
loss = loss_fn(y_pred.type(torch.float),y.type(torch.float))#torch.argmax(y_pred,dim=1).type(torch.float),y.type(torch.float))
train_loss+=loss
opt.zero_grad()
loss.backward()
opt.step()
train_acc+= get_accuracy(torch.where(y_pred>0.5,1.0,0.0),y)#torch.argmax(y_pred,dim=1),y)
total_data_processed += len(X)
model.eval()
with torch.inference_mode():
total_val_loss, total_val_acc = 0, 0
for X_val,y_val in val_dataloader:
y_val_pred = model(X_val)
total_val_loss += loss_fn(y_val_pred.type(torch.float),y_val.type(torch.float))
total_val_acc += get_accuracy(torch.where(y_val_pred>0.5,1.0,0.0),y_val)
val_loss = total_val_loss/len(val_dataloader)
val_acc = total_val_acc/len(val_dataloader)
scheduler.step(val_loss)
print('Avg. Training Loss:{:.4f} | Val Loss:{:.4f} | Avg. Training Accuracy:{:.4f} | Val Accuracy:{:.4f}'\
.format(train_loss/len(train_dataloader),val_loss,train_acc/len(train_dataloader),val_acc))
end_time = timer.perf_counter()
print('Epoch {:2d} | Elapsed Time:{:.3f}s'.format(epoch + 1, end_time - start_time))
mb.write('Epoch {:2d} completed | Elapsed Time:{:.3f}s'.format(epoch + 1, end_time - start_time))
if (epoch + 1) % save_freq == 0 and epoch + 1 > 0:
filepath = config.environ_path[config.environ]['save'] + "Char_Tok_Models/Models/cp{:02d}.pth".format(epoch + 1)
print('Saving model to:',filepath)
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'loss': val_loss,
'accuracy': val_acc,
}, filepath)
print('Save completed')
def test_model(model_file:str,d_loader:DataLoader):
torch.manual_seed(42)
# m_accuracy = Accuracy(task="multiclass", num_classes=len(label_idx)).to(device)
test_model = NERModel().to(device)
loss_fn = nn.BCELoss()
checkpoint = torch.load(config.environ_path[config.environ]['save'] + "Char_Tok_Models/Models/" + model_file)
test_model.load_state_dict(checkpoint['model_state_dict'])
test_model.eval()
with torch.inference_mode():
total_test_loss, total_test_acc = 0, 0
for batch,(X_test,y_test) in progress_bar(enumerate(d_loader),total=len(d_loader)):
y_test_pred = test_model(X_test)
total_test_loss += loss_fn(y_test_pred.type(torch.float),y_test.type(torch.float))
total_test_acc += get_accuracy(torch.where(y_test_pred>0.5,1.0,0.0),y_test)
test_loss = total_test_loss/len(test_dataloader)
test_acc = total_test_acc/len(test_dataloader)
# scheduler.step(test_loss)
print('Test Loss:{:.4f} | Test Accuracy:{:.4f}'.format(test_loss,test_acc))
型
进行预测的代码如下:
def init_model(model_file:str):
torch.manual_seed(42)
new_model = NERModel().to(device)
checkpoint = torch.load(config.environ_path[config.environ]['save'] + "Char_Tok_Models/Models/" + model_file,map_location=torch.device(device))
new_model.load_state_dict(checkpoint['model_state_dict'])
return new_model
def predict(input_sent:str,model:NERModel):
torch.manual_seed(42)
pred_can_spans = get_can_spans(input_sent)
can_spans = []
for span in pred_can_spans:
can_spans.append(span[2])
# print(span[2])
enc_spans = pad_encode_text(can_spans)
# for span in enc_spans:
# print(span)
enc_input_sent = pad_encode_text([input_sent])[0]
enc_input_sent_arr = np.repeat([np.array(enc_input_sent)],len(enc_spans),axis=0)
# print(enc_input_sent_arr.shape)
enc_spans_arr = np.array(enc_spans)
# print(enc_spans_arr.shape)
pred_input = torch.from_numpy(np.concatenate([enc_spans_arr,enc_input_sent_arr],axis=1)).to(device)
# print(pred_input.shape,type(pred_input))
model.eval()
with torch.no_grad():
pred_out = model(pred_input)
pred_out = torch.where(pred_out>0.5,1.0,0.0)
outcome = zip(pred_input,pred_out)
print('initial outcome length',len(pred_input))
filt_outcome = [item for item in outcome if item[1][label_idx['']] != 1]
print('final outcome length',len(filt_outcome))
return pred_out
型
我用来计算精度度量的函数如下:
def get_accuracy(y_pred,y_true):
y_pred_arr = y_pred.cpu().numpy();y_true_arr = y_true.cpu().numpy()
comp_out = np.where(np.sum(np.equal(y_pred_arr,y_true_arr),axis=1)<y_true_arr.shape[1],0,1)
return np.sum(comp_out)/comp_out.shape[0]
型
我已经种子 Torch 随机种子以及,但没有效果。什么问题可能是?我会感谢任何帮助。谢谢。
我期望模型给予我接近测试期间达到的准确性,但它没有。
2条答案
按热度按时间ngynwnxp1#
您可能需要将模型输出更改为索引类别,而不是原始输出:
字符串
与NumPy相同
2w2cym1i2#
解决了。每次我重启内核时,标签代码都会重新生成,并且每次都会为每个标签分配新的编号。通过硬编码标签解决了问题。是否有更优雅的解决方案?请告诉我。