class Net(nn.Module):
def __init__(self):
super().__init__()
#(input channel, output channel, kenel size)
#channel is a dimension of a tensor which is a container that can house data in N dimensions (matrices)
self.conv1 = nn.Conv2d(3, 6, 5)
#shrink the image stack by pooling(kernel size, stride(shift)) and take max value per window
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
#TODO: add conv3
self.conv3 = nn.Conv2d(16, 32, 5)
#drop layer deletes 20% of the feautures to help prevent overfitting
self.drop = nn.Dropout2d(p=0.2)
#linear predicts the output as a linear function of inputs
#(output channels, height, width, batch size
#TODO:
self.fc1 = nn.Linear(16 * 16 * 5, 120)
#TODO:
self.fc1_5 = nn.Linear()
#layer(size of input, size of output)
#Linear layer=Fully connected layer
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
#F.ReLUs change negative values to 0. Apply to all stack of images.
#they are activation functions. We apply it after each liner layer.
#only used in hidden layers.
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
#Select some feautures to drop after 3rd conv to prevent overfitting
x = self.drop(F.relu(self.conv3(x)))
x = torch.flatten(x, 1) # flatten all dimensions except batch into 1-D
x = F.relu(self.fc1(x))
#TODO: add fc1_5
x = F.relu(self.fc1_5(x))
x = F.relu(self.fc2(x))
#Feed to Fully connected layer to predict class
x = self.fc3(x) # no relu b/c it's a last layer.
return x
我使用的是CIFAR 10中的图像,大小为3x 32 x32。当我之前运行代码时,它停止了,因为self.fc1线性层大小不适用于我添加的self.conv3。
我也不知道该为self.fc1_5写些什么。有人能给我解释一下这是如何工作的以及解决方案吗?谢谢!
我添加了一个额外的卷积层,您可以看到它是self.conv3 = nn.Conv2d(16,32,5)。TODO下面的行是我卡住的地方。我将该行更新为:fc 1 = nn.线性(16 * 16 * 5,120)之前,它是:fc 1 = nn.线性(16 * 5 * 5,120)。
1条答案
按热度按时间0lvr5msh1#
当你创建一个固定输入大小的CNN分类时,很容易计算出你的图像在CNN图层中的大小。因为我们从
[32,32]
大小的图像开始(通道现在不重要):CNN大小损失可以通过使用
(K-1)//2
的填充来抵消,其中K=kernel_size
。