我运行的是PyTorch 2.0.0 CPU。我的Python版本是3.10.10 64位。下面是我的代码:
import torch
import torch.nn as nn
import string
# define the vocabulary of characters
vocab = string.ascii_letters + " ."
# define the size of the vocabulary and the hidden state
vocab_size = len(vocab)
hidden_size = 16
# define a mapping from characters to indices and vice versa
char_to_index = {c: i for i, c in enumerate(vocab)}
index_to_char = {i: c for i, c in enumerate(vocab)}
class RNN(nn.Module):
def __init__(self, n):
# initialize the parent class
super(RNN, self).__init__()
self.n = n
# define the embedding layer that maps indices to vectors
self.embedding = nn.Embedding(vocab_size, hidden_size)
# define the recurrent layer that updates the hidden state
self.recurrent = nn.Linear(hidden_size, hidden_size)
# define the output layer that maps hidden state to logits
self.output = nn.Linear(hidden_size, vocab_size)
torch.autograd.set_detect_anomaly(True)
def forward(self, x, h):
# x is a tensor of shape (self.n) containing indices
# h is a tensor of shape (1, hidden_size) containing a hidden state
# embed x into a vector of shape (1, hidden_size)
x = self.embedding(x)
# update h with x using a tanh activation function and non-inplace addition
h_new = torch.tanh(self.recurrent(x).add(h))
# compute logits from h_new using a linear layer
logits = self.output(h_new)
return logits,h_new
def update(self, text):
# text is a string containing user input
# initialize an optimizer and a loss function
optimizer = torch.optim.SGD(self.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()
# loop through each character in text except the last self.n ones
for i in range(len(text) - self.n):
# get the current and next characters as indices
current_chars = [char_to_index[c] for c in text[i:i+self.n]]
next_char = char_to_index[text[i+self.n]]
# convert them to tensors of shape (self.n) and (1) respectively
current_chars = torch.tensor(current_chars)
next_char = torch.tensor([next_char])
# zero out the gradients from previous step
optimizer.zero_grad()
# forward pass through the model and get logits and new hidden state
logits, self.h = self.forward(current_chars, self.h)
# compute loss between logits and next_char
loss = criterion(logits.view(1,-1), next_char.view(1))
print(f"Loss: {loss.item():.4f}")
# backward pass to compute gradients
loss.backward()
# update parameters with gradient descent
optimizer.step()
def generate(self, start):
# start is a string of length self.n to start with
# get the indices of the start characters
start_indices = [char_to_index[c] for c in start]
# convert them to a tensor of shape (self.n)
start_indices = torch.tensor(start_indices)
# initialize the output with the start characters
output = [c for c in start]
# loop until reaching a period or a maximum length
while output[-1] != "." and len(output) < 100:
# forward pass through the model and get logits and new hidden state
logits, self.h = self.forward(start_indices, self.h)
# apply softmax to get probabilities
probs = torch.softmax(logits.view(-1), dim=0)
# sample a next index from the probabilities
next_index = torch.multinomial(probs, 1).item()
# get the next character from the index
next_char = index_to_char[next_index]
# append it to the output
output.append(next_char)
# update the start indices with the next index
start_indices[:-1] = start_indices[1:]
start_indices[-1] = next_index
# join and return the output as a string
return "".join(output)
if __name__ == '__main__':
# create a new RNN model with context size
model = RNN(1)
# initialize a random hidden state of shape (1, hidden_size)
model.h = torch.randn(1, hidden_size)
# update the model with some user input
model.update("hello world.")
# generate some text starting with "he"
print(model.generate("he"))
下面是我运行它时的输出:
Loss: 4.5443
Loss: 4.4064
C:\Users\pythonic\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\torch\autograd\__init__.py:200: UserWarning: Error detected in TanhBackward0. Traceback of forward call that caused the error:
File "c:\Users\pythonic\Documents\python\bing\markov\v4.py", line 100, in <module>
model.update("hello world.") #open('english.txt', 'r').read())
File "c:\Users\pythonic\Documents\python\bing\markov\v4.py", line 57, in update
logits, self.h = self.forward(current_chars, self.h)
File "c:\Users\pythonic\Documents\python\bing\markov\v4.py", line 34, in forward
h_new = torch.tanh(self.recurrent(x).add(h))
(Triggered internally at ..\torch\csrc\autograd\python_anomaly_mode.cpp:119.)
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
Traceback (most recent call last):
File "c:\Users\pythonic\Documents\python\bing\markov\v4.py", line 100, in <module>
model.update("hello world.") #open('english.txt', 'r').read())
File "c:\Users\pythonic\Documents\python\bing\markov\v4.py", line 62, in update
loss.backward()
File "C:\Users\pythonic\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\torch\_tensor.py", line 487, in backward
torch.autograd.backward(
File "C:\Users\pythonic\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\torch\autograd\__init__.py", line 200, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.
我对Python和Torch的使用不是很有经验,所以如果这是一个愚蠢的问题,请原谅我。我所做的所有研究都表明我在错误的地方修改了图形变量(或类似的东西)。
我尝试了loss.backward(retain_graph=True):
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [16, 16]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
Good luck!
让我有点担心XD
1条答案
按热度按时间3bygqnnd1#
修复了它。这里是修改后的
forward
和update
函数。原来我需要
.detach()
和.clone()
的h
变量不覆盖它。我还需要retain_graph=True
使其工作。This可能会使用大量的内存,但我还没有遇到这个问题。