我尝试使用一个多头关注层和两个线性层以及一些表格数据来训练一个多类分类模型(包含3个类),然后我得到了这个错误:
IndexError:维度超出范围(应在[-1,0]范围内,但得到1)
我已经复制了我的模型/数据集类和下面的训练循环;这个错误似乎与我传递给损失函数(criterion
)的数据有关,损失函数如下所示:
y_预测值:tensor([-115.7523, -113.5820, 37.0307], dtype=torch.float64, grad_fn=<SqueezeBackward0>)
以及
y = tensor(0)
。
我无法解决这个错误,所以任何帮助这将是非常感谢。
下面是数据集和模型类:
class GeneExpressionDataset(torch.utils.data.Dataset):
def __init__(self, data):
self.data = data
self.features = self.data.iloc[:, 2:].values
self.labels = self.data.iloc[:, 1].values
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
features = torch.tensor(self.features[idx], dtype=torch.double)
labels = torch.tensor(self.labels[idx], dtype=torch.long)
return features, labels
class MultiheadAttention(nn.Module):
def __init__(self, input_dim, num_heads, dropout_rate):
super(MultiheadAttention, self).__init__()
self.input_dim = input_dim
self.num_heads = num_heads
self.dropout_rate = dropout_rate
self.q_linear = nn.Linear(input_dim, input_dim)
self.k_linear = nn.Linear(input_dim, input_dim)
self.v_linear = nn.Linear(input_dim, input_dim)
self.dropout = nn.Dropout(dropout_rate)
self.out_linear = nn.Linear(input_dim, input_dim)
def forward(self, query, key, value, mask=None):
batch_size = query.size(0)
# Apply linear transformations to obtain query, key, and value representations
q = self.q_linear(query).view(batch_size, -1, self.num_heads)
k = self.k_linear(key).view(batch_size, -1, self.num_heads)
v = self.v_linear(value).view(batch_size, -1, self.num_heads)
# Compute scaled dot-product attention scores
scores = torch.matmul(q, k.transpose(1, 2)) / (self.input_dim ** 0.5)
if mask is not None:
mask = mask.unsqueeze(1)
scores = scores.masked_fill(mask == 0, -1e9)
# Apply softmax to obtain attention weights
attn_weights = torch.softmax(scores, dim=-1)
# Apply dropout to the attention weights
attn_weights = self.dropout(attn_weights)
# Compute the attention output
attn_output = torch.matmul(attn_weights, v)
# Concatenate the attention output from different heads
attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, -1, self.num_heads * (self.input_dim // self.num_heads))
# Apply linear transformation to obtain the final attention output
out = self.out_linear(attn_output)
return out
class geneGPT(nn.Module):
def __init__(self, input_dim, hid_dim, output_dim, num_heads, dropout_rate):
super().__init__()
self.attention = MultiheadAttention(input_dim, num_heads, dropout_rate)
self.fc1 = nn.Linear(num_heads * (input_dim//num_heads), hid_dim)
self.relu = nn.ReLU()
self.out = nn.Linear(hid_dim, output_dim)
def forward(self, x, mask=None):
x = self.attention(x, x, x, mask)
x = self.relu(self.fc1(x))
x = self.out(x)
return x
这是训练循环:
print('Training...')
model = geneGPT(INPUT_DIM, HID_DIM, OUTPUT_DIM, NUM_HEADS, DROPOUT_RATE).double().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(NUM_EPOCHS):
train_losses = 0.0
valid_losses = 0.0
train_accs = 0.0
valid_accs = 0.0
for i, (x, y) in enumerate(train_dl):
x, y = x.to(device), y.to(device)
optimizer.zero_grad()
y_pred = model(x).squeeze()
y = y.squeeze()
print(y_pred, y)
train_loss = criterion(y_pred, y)
train_acc = multi_acc(y_pred, y)
train_loss.backward()
optimizer.step()
train_losses += train_loss.item()
train_accs += train_acc.item()
for i, (x, y) in enumerate(val_dl):
x, y = x.to(device), y.to(device)
y_pred = model(x).squeeze()
y = y.squeeze()
valid_loss = criterion(y_pred, y)
valid_acc = multi_acc(y_pred, y)
valid_losses += valid_loss.item()
valid_accs += valid_acc.item()
print("Epoch {}/{} | Loss: {:.4f} | Train Loss:{:.4f} | Valid Loss".format(epoch + 1, NUM_EPOCHS, train_loss / len(train_dl), valid_loss / len(val_dl)))
print("Training Accuracy: {:.4f} | Validation Accuracy: {:.4f}".format(train_accs / len(train_dl), valid_accs / len(val_dl)))
test_accs = 0.0
for i, (x, y) in enumerate(test_dl):
x, y = x.to(device), y.to(device)
y_pred = model(x).squeeze()
y = y.squeeze()
test_acc = multi_acc(y_pred, y)
test_accs += test_acc.item()
print("Testing Accuracy: {:.4f}".format(test_accs / len(test_dl)))
torch.save(model.state_dict(), "model.pth")
1条答案
按热度按时间w8f9ii691#
在您的训练循环中
您更改了两者的尺寸,并在后面的步骤中
您再次更改了y_pred维度,但y dim保持不变,因此我假设y和y_pred的dim的相对差异导致了错误“Expected input batch_size(1)to match target batch_size(0)"。