python-3.x 运行时错误:试图通过图倒退第二次-损失.倒退/或类似?

nimxete2  于 2023-03-24  发布在  Python
关注(0)|答案(1)|浏览(243)

我运行的是PyTorch 2.0.0 CPU。我的Python版本是3.10.10 64位。下面是我的代码:

import torch
import torch.nn as nn
import string

# define the vocabulary of characters
vocab = string.ascii_letters + " ."
# define the size of the vocabulary and the hidden state
vocab_size = len(vocab)
hidden_size = 16
# define a mapping from characters to indices and vice versa
char_to_index = {c: i for i, c in enumerate(vocab)}
index_to_char = {i: c for i, c in enumerate(vocab)}

class RNN(nn.Module):

    def __init__(self, n):
        # initialize the parent class
        super(RNN, self).__init__()
        self.n = n
        # define the embedding layer that maps indices to vectors
        self.embedding = nn.Embedding(vocab_size, hidden_size)
        # define the recurrent layer that updates the hidden state
        self.recurrent = nn.Linear(hidden_size, hidden_size)
        # define the output layer that maps hidden state to logits
        self.output = nn.Linear(hidden_size, vocab_size)
        torch.autograd.set_detect_anomaly(True)

    def forward(self, x, h):
        # x is a tensor of shape (self.n) containing indices
        # h is a tensor of shape (1, hidden_size) containing a hidden state
        # embed x into a vector of shape (1, hidden_size)
        x = self.embedding(x)
        # update h with x using a tanh activation function and non-inplace addition
        h_new = torch.tanh(self.recurrent(x).add(h))
        # compute logits from h_new using a linear layer
        logits = self.output(h_new)
        return logits,h_new

    def update(self, text):
        # text is a string containing user input
        # initialize an optimizer and a loss function
        optimizer = torch.optim.SGD(self.parameters(), lr=0.01)
        criterion = nn.CrossEntropyLoss()
        # loop through each character in text except the last self.n ones
        for i in range(len(text) - self.n):
            # get the current and next characters as indices
            current_chars = [char_to_index[c] for c in text[i:i+self.n]]
            next_char = char_to_index[text[i+self.n]]
            # convert them to tensors of shape (self.n) and (1) respectively
            current_chars = torch.tensor(current_chars)
            next_char = torch.tensor([next_char])
            # zero out the gradients from previous step
            optimizer.zero_grad()
            # forward pass through the model and get logits and new hidden state 
            logits, self.h = self.forward(current_chars, self.h)
            # compute loss between logits and next_char 
            loss = criterion(logits.view(1,-1), next_char.view(1))
            print(f"Loss: {loss.item():.4f}")
            # backward pass to compute gradients
            loss.backward()
            # update parameters with gradient descent 
            optimizer.step()
        
    def generate(self, start):
        # start is a string of length self.n to start with
        # get the indices of the start characters
        start_indices = [char_to_index[c] for c in start]
        # convert them to a tensor of shape (self.n)
        start_indices = torch.tensor(start_indices)
        # initialize the output with the start characters
        output = [c for c in start]
        # loop until reaching a period or a maximum length
        while output[-1] != "." and len(output) < 100:
            # forward pass through the model and get logits and new hidden state 
            logits, self.h = self.forward(start_indices, self.h)
            # apply softmax to get probabilities 
            probs = torch.softmax(logits.view(-1), dim=0)
            # sample a next index from the probabilities 
            next_index = torch.multinomial(probs, 1).item()
            # get the next character from the index
            next_char = index_to_char[next_index]
            # append it to the output 
            output.append(next_char)
            # update the start indices with the next index 
            start_indices[:-1] = start_indices[1:]
            start_indices[-1] = next_index
        # join and return the output as a string
        return "".join(output)

if __name__ == '__main__':
    # create a new RNN model with context size
    model = RNN(1)
    # initialize a random hidden state of shape (1, hidden_size)
    model.h = torch.randn(1, hidden_size)
    # update the model with some user input
    model.update("hello world.")
    # generate some text starting with "he"
    print(model.generate("he"))

下面是我运行它时的输出:

Loss: 4.5443
Loss: 4.4064
C:\Users\pythonic\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\torch\autograd\__init__.py:200: UserWarning: Error detected in TanhBackward0. Traceback of forward call that caused the error:
  File "c:\Users\pythonic\Documents\python\bing\markov\v4.py", line 100, in <module>
    model.update("hello world.") #open('english.txt', 'r').read())
  File "c:\Users\pythonic\Documents\python\bing\markov\v4.py", line 57, in update
    logits, self.h = self.forward(current_chars, self.h)
  File "c:\Users\pythonic\Documents\python\bing\markov\v4.py", line 34, in forward
    h_new = torch.tanh(self.recurrent(x).add(h))
 (Triggered internally at ..\torch\csrc\autograd\python_anomaly_mode.cpp:119.)
  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
Traceback (most recent call last):
  File "c:\Users\pythonic\Documents\python\bing\markov\v4.py", line 100, in <module>
    model.update("hello world.") #open('english.txt', 'r').read())
  File "c:\Users\pythonic\Documents\python\bing\markov\v4.py", line 62, in update
    loss.backward()
  File "C:\Users\pythonic\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\torch\_tensor.py", line 487, in backward
    torch.autograd.backward(
  File "C:\Users\pythonic\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\torch\autograd\__init__.py", line 200, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

我对Python和Torch的使用不是很有经验,所以如果这是一个愚蠢的问题,请原谅我。我所做的所有研究都表明我在错误的地方修改了图形变量(或类似的东西)。
我尝试了loss.backward(retain_graph=True):

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [16, 16]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

Good luck!让我有点担心XD

3bygqnnd

3bygqnnd1#

修复了它。这里是修改后的forwardupdate函数。

def forward(self, x, h):
    x = self.embedding(x)
    h_new = torch.tanh(self.recurrent(x) + h.clone())
    return self.output(h_new), h_new

def update(self, text):
    optimizer = torch.optim.SGD(self.parameters(), lr=0.01)
    criterion = nn.CrossEntropyLoss()
    for i in range(len(text) - self.n):
        optimizer.zero_grad()
        current_chars = [char_to_index[c] for c in text[i:i+self.n]]
        next_char = char_to_index[text[i+self.n]]
        current_chars = torch.tensor(current_chars)
        next_char = torch.tensor([next_char])
        logits, h_new = self.forward(current_chars, self.h.detach())
        loss = criterion(logits.view(1,-1), next_char.view(1))
        loss.backward(retain_graph=True)
        optimizer.step()
        self.h = h_new.detach()
    print(f"Loss: {loss.item():.4f}")

原来我需要.detach().clone()h变量不覆盖它。我还需要retain_graph=True使其工作。This可能会使用大量的内存,但我还没有遇到这个问题。

相关问题