- 训练信息
1)单机单卡
2)显存信息,本地K40机器
- 问题描述:
Traceback (most recent call last):
File "./train.py", line 296, in
main()
File "./train.py", line 292, in main
train(args, data_reader=data_reader)
train(args, data_reader=data_reader)
File "./train.py", line 229, in train
results = parallel_exe.run(var_names, feed=get_feeder_data(data, place))
File "/home/vis/liushanshan/common_env/anaconda2_cuda90_fix_dev/lib/python2.7/site-packages/paddle/fluid/parallel_executor.py", line 311, in run
return_numpy=return_numpy)
File "/home/vis/liushanshan/common_env/anaconda2_cuda90_fix_dev/lib/python2.7/site-packages/paddle/fluid/executor.py", line 775, in run
six.reraise(*sys.exc_info())
File "/home/vis/liushanshan/common_env/anaconda2_cuda90_fix_dev/lib/python2.7/site-packages/paddle/fluid/executor.py", line 770, in run
use_program_cache=use_program_cache)
File "/home/vis/liushanshan/common_env/anaconda2_cuda90_fix_dev/lib/python2.7/site-packages/paddle/fluid/executor.py", line 829, in _run_impl
return_numpy=return_numpy)
File "/home/vis/liushanshan/common_env/anaconda2_cuda90_fix_dev/lib/python2.7/site-packages/paddle/fluid/executor.py", line 669, in _run_parallel
tensors = exe.run(fetch_var_names)._move_to_list()
paddle.fluid.core_avx.EnforceNotMet:
--堆栈信息
`C++ Call Stacks (More useful to developers):
0 std::string paddle::platform::GetTraceBackString<std::string const&>(std::string const&, char const*, int)
1 paddle::platform::EnforceNotMet::EnforceNotMet(std::string const&, char const*, int)
2 paddle::framework::Tensor::check_memory_size() const
3 paddle::framework::EigenVector<long, 1, long>::Flatten(paddle::framework::Tensor&)
4 void paddle::operators::SumToLoDTensor(paddle::framework::ExecutionContext const&)
5 std::Function_handler<void (paddle::framework::ExecutionContext const&), paddle::framework::OpKernelRegistrarFunctor<paddle::platform::CUDAPlace, false, 3ul, paddle::operators::SumKernel<paddle::platform::CUDADeviceContext, float>, paddle::operators::SumKernel<paddle::platform::CUDADeviceContext, double>, paddle::operators::SumKernel<paddle::platform::CUDADeviceContext, int>, paddle::operators::SumKernel<paddle::platform::CUDADeviceContext, long>, paddle::operators::SumKernel<paddle::platform::CUDADeviceContext, paddle::platform::float16> >::operator()(char const, char const*, int) const::{lambda(paddle::framework::ExecutionContext const&)#1}>::*M_invoke(std::Any_data const&, paddle::framework::ExecutionContext const&)
6 paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void, boost::detail::variant::void*, boost::detail::variant::void*, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&, paddle::framework::RuntimeContext*) const
7 paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const
8 paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&)
9 paddle::framework::details::ComputationOpHandle::RunImpl()
10 paddle::framework::details::FastThreadedSSAGraphExecutor::RunOpSync(paddle::framework::details::OpHandleBase*)
11 paddle::framework::details::FastThreadedSSAGraphExecutor::RunOp(paddle::framework::details::OpHandleBase*, std::shared_ptr<paddle::framework::BlockingQueue > const&, unsigned long*)
12 std::_Function_handler<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> (), std::__future_base::_Task_setter<std::unique_ptr<std::__future_base::_Result, std::__future_base::_Result_base::_Deleter>, void> >::_M_invoke(std::_Any_data const&)
13 std::__future_base::_State_base::_M_do_set(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>&, bool&)
14 ThreadPool::ThreadPool(unsigned long)::{lambda()#1}::operator()() const
Python Call Stacks (More useful to users):
File "/home/vis/liushanshan/common_env/anaconda2_cuda90_fix_dev/lib/python2.7/site-packages/paddle/fluid/framework.py", line 2423, in append_op
attrs=kwargs.get("attrs", None))
File "/home/vis/liushanshan/common_env/anaconda2_cuda90_fix_dev/lib/python2.7/site-packages/paddle/fluid/layer_helper.py", line 43, in append_op
return self.main_program.current_block().append_op(args,kwargs)
File "/home/vis/liushanshan/common_env/anaconda2_cuda90_fix_dev/lib/python2.7/site-packages/paddle/fluid/layers/tensor.py", line 443, in sums
attrs={'use_mkldnn': False})
File "/home/vis/liushanshan/common_env/anaconda2_cuda90_fix_dev/lib/python2.7/site-packages/paddle/fluid/evaluator.py", line 277, ininit*
out=self.instance_error)
File "/home/vis/liushanshan/chinese/model/ctc_attention/1_base/thirdparty/ctc_attention_model.py", line 251, in ctc_attention_train_net
error_evaluator = fluid.evaluator.EditDistance(input=decoded_out, label=casted_label)
File "./train.py", line 74, in train
images, label, label_in, label_out, args, num_classes, data_shape)
File "./train.py", line 292, in main
train(args, data_reader=data_reader)
File "./train.py", line 296, in
main()
Error Message Summary:
PaddleCheckError: holder_ should not be null
Tensor holds no memory. Call Tensor::mutable_data first. at [/root/Paddle/paddle/fluid/framework/tensor.cc:23]
[operator < sum > error]`
3条答案
按热度按时间kmynzznz1#
求官方给个答复。
今日值班同学反馈是官网没有对应api介绍,也找不到对应api负责同学。那我们使用遇到问题到底应该找谁??
imzjd6km2#
还是代码简化一下,提供一个简答可复现的Demo吧。
7qhs6swi3#
谢谢回复。已经解决。解决思路是
1、用当前paddle版本训练最简单的ctc模型,check可以跑。paddle版本和接口没问题。
2、对齐配置,发现代码中缺少error_evaluator.reset(exe),导致了这个问题Tensor holds no memory. Call Tensor::mutable_data first