我正在尝试使用CUDA V12.2.128在Ubuntu 22.04上的GeForce RTX 2060 GPU上学习CUDA编程。
当我编译下面的代码并尝试运行它时,
#include "cuda_runtime.h"
#include <iostream>
#include <stdexcept>
#include <cstdlib>
#include <cmath>
#define CUDACHECK(E) if (E != cudaSuccess) \
{ \
std::cerr << "Fatal error [line " << __LINE__ << "]: " << cudaGetErrorString(E) << ".\n"; \
std::abort(); \
}
#define ASSERT(B) if (!(B)) \
{ \
std::cerr << "Fatal error: test failed in line " << __LINE__ << ".\n"; \
std::abort(); \
}
__global__ void fill(float* x, const std::size_t& size, const float& value)
{
const auto tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid < size)
{
x[tid] = value;
}
}
int main()
{
// allocate device data
const auto size = std::size_t{10000};
auto* xD = static_cast<float*>(nullptr);
CUDACHECK(cudaMalloc(&xD, size * sizeof(float)));
ASSERT(xD != nullptr);
// run fill kernel
const auto fill_value = static_cast<float>(-1.0);
fill<<<1,1>>>(xD, size, fill_value);
CUDACHECK(cudaDeviceSynchronize());
// <--- control doesn't get past here!
// copy to host
//auto* xH = reinterpret_cast<float*>(std::malloc(size * sizeof(float)));
//ASSERT(xH != nullptr);
//CUDACHECK(cudaMemcpy(xH, xD, size * sizeof(float), cudaMemcpyDeviceToHost));
//constexpr auto tol = std::is_same<float, float>::value ? static_cast<float>(1E-5) : static_cast<float>(1E-10);
//for (auto ii = std::size_t{}; ii < size; ++ii)
//{
// ASSERT(std::fabs(fill_value - xH[ii]) < tol);
//}
// free
CUDACHECK(cudaFree(reinterpret_cast<void*>(xD)));
//std::free(xH);
// tests have been passed
std::cout << "Tests passed! Hallo, CUDA world!" << std::endl;
return 0;
}
字符串
我得到以下输出:
Fatal error [line 42]: an illegal memory access was encountered.
Aborted (core dumped)
型
然后,我使用compute-sanitizer和--tool memcheck
,得到以下输出:
========= COMPUTE-SANITIZER
========= Invalid __global__ read of size 8 bytes
========= at 0x20 in fill(float *, const unsigned long &, const float &)
========= by thread (0,0,0) in block (0,0,0)
========= Address 0x7ffc902b1f30 is out of bounds
========= and is 639.439.150.385 bytes after the nearest allocation at 0x7f67aea00000 of size 512 bytes
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x32e950]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame:libcudart_static_4d8b33a106dceb3c07a56e26de61f2d53bb62a68 [0x1093e]
========= in /home/nitin/Documents/code/gpu/device/./hw_cuda
========= Host Frame:cudaLaunchKernel [0x70b4e]
========= in /home/nitin/Documents/code/gpu/device/./hw_cuda
========= Host Frame:cudaError cudaLaunchKernel<char>(char const*, dim3, dim3, void**, unsigned long, CUstream_st*) [0xb235]
========= in /home/nitin/Documents/code/gpu/device/./hw_cuda
========= Host Frame:__device_stub__Z4fillPfRKmRKf(float*, unsigned long const*, float const*) [0xb094]
========= in /home/nitin/Documents/code/gpu/device/./hw_cuda
========= Host Frame:fill(float*, unsigned long const&, float const&) [0xb0f7]
========= in /home/nitin/Documents/code/gpu/device/./hw_cuda
========= Host Frame:main [0xadaa]
========= in /home/nitin/Documents/code/gpu/device/./hw_cuda
========= Host Frame:../sysdeps/nptl/libc_start_call_main.h:58:__libc_start_call_main [0x29d90]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:../csu/libc-start.c:379:__libc_start_main [0x29e40]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xab15]
========= in /home/nitin/Documents/code/gpu/device/./hw_cuda
=========
========= Program hit cudaErrorLaunchFailure (error 719) due to "unspecified launch failure" on CUDA API call to cudaDeviceSynchronize.
========= Saved host backtrace up to driver entry point at error
========= Host Frame: [0x47e786]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame:cudaDeviceSynchronize [0x48a64]
========= in /home/nitin/Documents/code/gpu/device/./hw_cuda
========= Host Frame:main [0xadaf]
========= in /home/nitin/Documents/code/gpu/device/./hw_cuda
========= Host Frame:../sysdeps/nptl/libc_start_call_main.h:58:__libc_start_call_main [0x29d90]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:../csu/libc-start.c:379:__libc_start_main [0x29e40]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xab15]
========= in /home/nitin/Documents/code/gpu/device/./hw_cuda
=========
Fatal error [line 42]: ========= Program hit cudaErrorLaunchFailure (error 719) due to "unspecified launch failure" on CUDA API call to cudaDeviceSynchronize.
========= Saved host backtrace up to driver entry point at error
========= Host Frame: [0x47e786]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame:cudaDeviceSynchronize [0x48a64]
========= in /home/nitin/Documents/code/gpu/device/./hw_cuda
========= Host Frame:main [0xadfb]
========= in /home/nitin/Documents/code/gpu/device/./hw_cuda
========= Host Frame:../sysdeps/nptl/libc_start_call_main.h:58:__libc_start_call_main [0x29d90]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:../csu/libc-start.c:379:__libc_start_main [0x29e40]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xab15]
========= in /home/nitin/Documents/code/gpu/device/./hw_cuda
=========
unspecified launch failure.
========= Error: process didn't terminate successfully
========= Target application returned an error
========= ERROR SUMMARY: 3 errors
型
问题:
1.此错误的原因可能是什么?
1.如何继续调试它?
我会很乐意提供更多的信息,当要求它。先谢了。
1条答案
按热度按时间yhxst69z1#
在@AbatorAbetor在评论中提到它之后,我能够回到我的代码来验证传递对自动变量的引用会导致这种行为。所需的校正为:
字符串
非常感谢你的帮助。