我有一个pytorch训练模型,我收到以下错误:
Traceback (most recent call last):
File "/opt/pyai-3.8/lib64/python3.8/site-packages/gradio/routes.py", line 442, in run_predict
output = await app.get_blocks().process_api(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/gradio/blocks.py", line 1392, in process_api
result = await self.call_function(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/gradio/blocks.py", line 1097, in call_function
prediction = await anyio.to_thread.run_sync(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/anyio/to_thread.py", line 33, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
return await future
File "/opt/pyai-3.8/lib64/python3.8/site-packages/anyio/_backends/_asyncio.py", line 807, in run
result = context.run(func, *args)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/gradio/utils.py", line 703, in wrapper
response = f(*args, **kwargs)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/gradio/utils.py", line 703, in wrapper
response = f(*args, **kwargs)
File "app.py", line 277, in generate
return self.trainer.generate(
File "/home/ramin.mardani/simple-llm-finetuner/trainer.py", line 108, in generate
assert self.model is not None
AssertionError
Traceback (most recent call last):
File "/opt/pyai-3.8/lib64/python3.8/site-packages/gradio/routes.py", line 442, in run_predict
output = await app.get_blocks().process_api(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/gradio/blocks.py", line 1392, in process_api
result = await self.call_function(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/gradio/blocks.py", line 1097, in call_function
prediction = await anyio.to_thread.run_sync(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/anyio/to_thread.py", line 33, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
return await future
File "/opt/pyai-3.8/lib64/python3.8/site-packages/anyio/_backends/_asyncio.py", line 807, in run
result = context.run(func, *args)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/gradio/utils.py", line 703, in wrapper
response = f(*args, **kwargs)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/gradio/utils.py", line 703, in wrapper
response = f(*args, **kwargs)
File "app.py", line 277, in generate
return self.trainer.generate(
File "/home/ramin.mardani/simple-llm-finetuner/trainer.py", line 108, in generate
assert self.model is not None
AssertionError
/opt/pyai-3.8/lib64/python3.8/site-packages/peft/utils/other.py:119: FutureWarning: prepare_model_for_int8_training is deprecated and will be removed in a future version. Use prepare_model_for_kbit_training instead.
warnings.warn(
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
/opt/pyai-3.8/lib64/python3.8/site-packages/bitsandbytes/autograd/_functions.py:322: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization
warnings.warn(f"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization")
{'train_runtime': 9.6476, 'train_samples_per_second': 18.658, 'train_steps_per_second': 0.311, 'train_loss': 1.3394749959309895, 'epoch': 2.0}
Traceback (most recent call last):
File "/opt/pyai-3.8/lib64/python3.8/site-packages/gradio/routes.py", line 442, in run_predict
output = await app.get_blocks().process_api(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/gradio/blocks.py", line 1392, in process_api
result = await self.call_function(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/gradio/blocks.py", line 1097, in call_function
prediction = await anyio.to_thread.run_sync(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/anyio/to_thread.py", line 33, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
return await future
File "/opt/pyai-3.8/lib64/python3.8/site-packages/anyio/_backends/_asyncio.py", line 807, in run
result = context.run(func, *args)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/gradio/utils.py", line 703, in wrapper
response = f(*args, **kwargs)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/gradio/utils.py", line 703, in wrapper
response = f(*args, **kwargs)
File "app.py", line 277, in generate
return self.trainer.generate(
File "/home/ramin.mardani/simple-llm-finetuner/trainer.py", line 133, in generate
output = self.model.generate(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/peft/peft_model.py", line 971, in generate
outputs = self.base_model.generate(**kwargs)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/transformers/generation/utils.py", line 1642, in generate
return self.sample(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/transformers/generation/utils.py", line 2724, in sample
outputs = self(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 1076, in forward
transformer_outputs = self.transformer(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 900, in forward
outputs = block(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 390, in forward
attn_outputs = self.attn(
File "/opt/pyai-3.8/lib64/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 312, in forward
query, key, value = self.c_attn(hidden_states).split(self.split_size, dim=2)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/peft/tuners/lora.py", line 1078, in forward
self.lora_A[self.active_adapter](self.lora_dropout[self.active_adapter](x))
File "/opt/pyai-3.8/lib64/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/pyai-3.8/lib64/python3.8/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: expected scalar type Float but found Half
我试过更改培训文件,但不起作用。下面是我的trainer.py文件:
import os
import gc
import torch
import transformers
import peft
import datasets
from contextlib import nullcontext
from config import (
HAS_CUDA,
MODEL,
DEVICE_MAP,
TRAINING_PARAMS,
LORA_TRAINING_PARAMS,
GENERATION_PARAMS
)
class Trainer():
def __init__(self):
self.model = None
self.model_name = None
self.lora_name = None
self.loras = {}
self.tokenizer = None
self.trainer = None
self.should_abort = False
def unload_model(self):
del self.model
del self.tokenizer
self.model = None
self.model_name = None
self.tokenizer = None
if (HAS_CUDA):
with torch.no_grad():
torch.cuda.empty_cache()
gc.collect()
def load_model(self, model_name, force=False, **kwargs):
assert model_name is not None
if (model_name == self.model_name and not force):
return
if (self.model is not None):
self.unload_model()
self.model = transformers.AutoModelForCausalLM.from_pretrained(
model_name,
device_map=DEVICE_MAP,
load_in_8bit=True,
torch_dtype=torch.float16,
)
#Clear the collection that tracks which adapters are loaded, as they are associated with self.model
self.loras = {}
if model_name.startswith('decapoda-research/llama'):
self.tokenizer = transformers.LlamaTokenizer.from_pretrained(model_name)
else:
self.tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
self.tokenizer.pad_token_id = 0
self.model_name = model_name
def load_lora(self, lora_name, replace_model=True):
assert self.model is not None
assert lora_name is not None
if (lora_name == self.lora_name):
return
if lora_name in self.loras:
self.lora_name = lora_name
self.model.set_adapter(lora_name)
return
peft_config = peft.PeftConfig.from_pretrained(lora_name)
if not replace_model:
assert peft_config.base_model_name_or_path == self.model_name
if peft_config.base_model_name_or_path != self.model_name:
self.load_model(peft_config.base_model_name_or_path)
assert self.model_name is not None
assert self.model is not None
if hasattr(self.model, 'load_adapter'):
self.model.load_adapter(lora_name, adapter_name=lora_name)
else:
self.model = peft.PeftModel.from_pretrained(self.model, lora_name, adapter_name=lora_name)
self.model.set_adapter(lora_name)
if (self.model_name.startswith('cerebras')):
self.model.half()
self.lora_name = lora_name
self.loras[lora_name] = True
def unload_lora(self):
self.lora_name = None
def generate(self, prompt, **kwargs):
assert self.model is not None
assert self.model_name is not None
assert self.tokenizer is not None
kwargs = { **GENERATION_PARAMS, **kwargs }
inputs = self.tokenizer(str(prompt), return_tensors="pt")
input_ids = inputs["input_ids"].to(self.model.device)
if self.model.config.pad_token_id is None:
kwargs['pad_token_id'] = self.model.config.eos_token_id
if (kwargs['do_sample']):
del kwargs['num_beams']
generation_config = transformers.GenerationConfig(
use_cache=False,
**kwargs
)
disable_lora = nullcontext()
if self.lora_name is None and hasattr(self.model, 'disable_adapter'):
disable_lora = self.model.disable_adapter()
with torch.no_grad(), disable_lora:
output = self.model.generate(
input_ids=input_ids,
attention_mask=torch.ones_like(input_ids),
generation_config=generation_config
)[0].to(self.model.device)
return self.tokenizer.decode(output, skip_special_tokens=True).strip()
def tokenize_sample(self, item, max_seq_length, add_eos_token=True):
assert self.tokenizer is not None
result = self.tokenizer(
item["text"],
truncation=True,
max_length=max_seq_length,
padding="max_length",
)
result = {
"input_ids": result["input_ids"][:-1],
"attention_mask": result["attention_mask"][:-1],
}
if (
result["input_ids"][-1] != self.tokenizer.eos_token_id
and len(result["input_ids"]) < max_seq_length
and add_eos_token
):
result["input_ids"].append(self.tokenizer.eos_token_id)
result["attention_mask"].append(1)
return result
def tokenize_training_text(self, training_text, max_seq_length, separator="\n\n\n", **kwargs):
samples = training_text.split(separator)
samples = [x.strip() for x in samples]
def to_dict(text):
return { 'text': text }
samples = [to_dict(x) for x in samples]
training_dataset = datasets.Dataset.from_list(samples)
training_dataset = training_dataset.shuffle().map(
lambda x: self.tokenize_sample(x, max_seq_length),
batched=False
)
return training_dataset
def train(self, training_text=None, new_peft_model_name=None, **kwargs):
assert self.should_abort is False
assert self.model is not None
assert self.model_name is not None
assert self.tokenizer is not None
kwargs = { **TRAINING_PARAMS, **LORA_TRAINING_PARAMS, **kwargs }
self.lora_name = None
self.loras = {}
train_dataset = self.tokenize_training_text(training_text, **kwargs)
if hasattr(self.model, 'disable_adapter'):
self.load_model(self.model_name, force=True)
self.model = peft.prepare_model_for_int8_training(self.model)
self.model = peft.get_peft_model(self.model, peft.LoraConfig(
r=kwargs['lora_r'],
lora_alpha=kwargs['lora_alpha'],
lora_dropout=kwargs['lora_dropout'],
bias="none",
task_type="CAUSAL_LM",
))
if not os.path.exists('lora'):
os.makedirs('lora')
sanitized_model_name = self.model_name.replace('/', '_').replace('.', '_')
output_dir = f"lora/{sanitized_model_name}_{new_peft_model_name}"
training_args = transformers.TrainingArguments(
per_device_train_batch_size=kwargs['micro_batch_size'],
gradient_accumulation_steps=kwargs['gradient_accumulation_steps'],
num_train_epochs=kwargs['epochs'],
learning_rate=kwargs['learning_rate'],
fp16=True,
optim='adamw_torch',
logging_steps=20,
save_total_limit=3,
output_dir=output_dir,
)
# _trainer = self
# class LoggingCallback(transformers.TrainerCallback):
# def on_log(self, args, state, control, logs=None, **kwargs):
# _trainer.log += json.dumps(logs) + '\n'
def should_abort():
return self.should_abort
def reset_abort():
self.should_abort = False
class AbortCallback(transformers.TrainerCallback):
def on_step_end(self, args, state, control, **kwargs):
if should_abort():
print("Stopping training...")
control.should_training_stop = True
def on_train_end(self, args, state, control, **kwargs):
if should_abort():
control.should_save = False
# class CustomTrainer(transformers.Trainer):
# def __init__(self, *args, **kwargs):
# super().__init__(*args, **kwargs)
# self.abort_training = False
# def stop_training(self):
# print("Stopping training...")
# self.abort_training = True
# def training_step(self, model, inputs):
# if self.abort_training:
# raise RuntimeError("Training aborted.")
# return super().training_step(model, inputs)
self.trainer = transformers.Trainer(
model=self.model,
train_dataset=train_dataset,
args=training_args,
data_collator=transformers.DataCollatorForLanguageModeling(
self.tokenizer,
mlm=False,
),
callbacks=[AbortCallback()]
)
self.model.config.use_cache = False
result = self.trainer.train(resume_from_checkpoint=False)
if not should_abort():
self.model.save_pretrained(output_dir)
reset_abort()
return result
def abort_training(self):
self.should_abort = True
if __name__ == '__main__':
t = Trainer()
t.load_model(MODEL)
prompt = "Human: How is cheese made?\n\nAssistant:"
print(t.generate(prompt))
t.load_lora('lora/melon-mango-orange')
print(t.generate(prompt))
t.unload_lora()
print(t.generate(prompt))
这是我从这里得到这个文件的repo:https://github.com/lxe/simple-llm-finetuner/blob/master/trainer.py
似乎其他人以前也遇到过这个问题:https://github.com/lxe/simple-llm-finetuner/issues/52
dtype=torch.float用于输入和输出
1条答案
按热度按时间rryofs0p1#
你有相当多的代码,这使得很难确定问题。如果你不能做出一个最小的例子,这将是很难帮助。
尽管如此,您在整个代码中多次强制使用
torch.half
或torch.float16
类型,但声明您使用的是float32类型输入。例如
和
我不熟悉你的模型应该如何工作,也不知道你是从哪里得到它的,但是我会尽量不改变这些数据类型,或者尝试将它们指定为torch.float32。
如果您需要混合精度训练以适应计算限制,请尝试查看torch Automatic Mixed Precision。