在这段代码中,max_prompt_len
可能是 0:
vllm/vllm/worker/model_runner.py
第 232 行 264017a
| | start_loc_tensor=torch.arange(0, |
| File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 29, in _raise_exception_on_finish [32/1990]
| task.result()
| File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 411, in run_engine_loop
| has_requests_in_progress = await self.engine_step()
| File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 390, in engine_step
| request_outputs = await self.engine.step_async()
| File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 189, in step_async
| all_outputs = await self._run_workers_async(
| File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 276, in _run_workers_async
| all_outputs = await asyncio.gather(*coros)
| File "/usr/lib/python3.10/concurrent/futures/thread.py", line 58, in run
| result = self.fn(*self.args, **self.kwargs)
| File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
| return func(*args, **kwargs)
| File "/usr/local/lib/python3.10/dist-packages/vllm/worker/worker.py", line 225, in execute_model
| output = self.model_runner.execute_model(seq_group_metadata_list,
| File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
| return func(*args, **kwargs)
| File "/usr/local/lib/python3.10/dist-packages/vllm/worker/model_runner.py", line 553, in execute_model
| lora_mapping) = self.prepare_input_tensors(seq_group_metadata_list)
| File "/usr/local/lib/python3.10/dist-packages/vllm/worker/model_runner.py", line 473, in prepare_input_tensors
| lora_requests) = self._prepare_prompt(seq_group_metadata_list)
| File "/usr/local/lib/python3.10/dist-packages/vllm/worker/model_runner.py", line 232, in _prepare_prompt
| start_loc_tensor = torch.arange(0,
| RuntimeError: step must be nonzero
4条答案
按热度按时间vsnjm48y1#
我遇到了同样的问题。
模型:qwen-72b-chat-int4
语言模型:0.3.1
zd287kbt2#
我解决了。因为我不小心传入了一个空的提示符。
zxlwwiss3#
我可以确认这个问题即使输入不为零也存在。
以下是我的有效载荷:
@WoosukKwon
emeijp434#
+1