C++ OpenCL构建错误:内核源未声明

y1aodyip  于 2023-02-20  发布在  其他
关注(0)|答案(1)|浏览(148)

我正在尝试运行一个来自互联网的OpenCL示例。它看起来像这样:
VecAdd.c

#define PROGRAM_FILE "vecAdd.cl"
#define KERNEL_FUNC "vecAdd"

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#ifdef MAC
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif

int main( int argc, char* argv[] )
{
    // Length of vectors
    unsigned int n = 100000;

    // Host input vectors
    double *h_a;
    double *h_b;
    // Host output vector
    double *h_c;

    // Device input buffers
    cl_mem d_a;
    cl_mem d_b;
    // Device output buffer
    cl_mem d_c;

    cl_platform_id cpPlatform;        // OpenCL platform
    cl_device_id device_id;           // device ID
    cl_context context;               // context
    cl_command_queue queue;           // command queue
    cl_program program;               // program
    cl_kernel kernel;                 // kernel

    // Size, in bytes, of each vector
    size_t bytes = n*sizeof(double);

    // Allocate memory for each vector on host
    h_a = (double*)malloc(bytes);
    h_b = (double*)malloc(bytes);
    h_c = (double*)malloc(bytes);

    // Initialize vectors on host
    int i;
    for( i = 0; i < n; i++ )
    {
        h_a[i] = sinf(i)*sinf(i);
        h_b[i] = cosf(i)*cosf(i);
    }

    size_t globalSize, localSize;
    cl_int err;

    // Number of work items in each local work group
    localSize = 64;

    // Number of total work items - localSize must be devisor
    globalSize = ceil(n/(float)localSize)*localSize;

    // Bind to platform
    err = clGetPlatformIDs(1, &cpPlatform, NULL);

    // Get ID for the device
    err = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);

    // Create a context
    context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);

    // Create a command queue
    queue = clCreateCommandQueue(context, device_id, 0, &err);

    // Create the compute program from the source buffer
    program = clCreateProgramWithSource(context, 1,
                            (const char **) & kernelSource, NULL, &err);

    // Build the program executable
    clBuildProgram(program, 0, NULL, NULL, NULL, NULL);

    // Create the compute kernel in the program we wish to run
    kernel = clCreateKernel(program, "vecAdd", &err);

    // Create the input and output arrays in device memory for our calculation
    d_a = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL);
    d_b = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL);
    d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bytes, NULL, NULL);

    // Write our data set into the input array in device memory
    err = clEnqueueWriteBuffer(queue, d_a, CL_TRUE, 0,
                                   bytes, h_a, 0, NULL, NULL);
    err |= clEnqueueWriteBuffer(queue, d_b, CL_TRUE, 0,
                                   bytes, h_b, 0, NULL, NULL);

    // Set the arguments to our compute kernel
    err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
    err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b);
    err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c);
    err |= clSetKernelArg(kernel, 3, sizeof(unsigned int), &n);

    // Execute the kernel over the entire range of the data set
    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize,
                                                              0, NULL, NULL);

    // Wait for the command queue to get serviced before reading back results
    clFinish(queue);

    // Read the results from the device
    clEnqueueReadBuffer(queue, d_c, CL_TRUE, 0,
                                bytes, h_c, 0, NULL, NULL );

    //Sum up vector c and print result divided by n, this should equal 1 within error
    double sum = 0;
    for(i=0; i<n; i++)
        sum += h_c[i];
    printf("final result: %f\n", sum/n);

    // release OpenCL resources
    clReleaseMemObject(d_a);
    clReleaseMemObject(d_b);
    clReleaseMemObject(d_c);
    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(queue);
    clReleaseContext(context);

    //release host memory
    free(h_a);
    free(h_b);
    free(h_c);

    return 0;
}

VecAdd.cl

// OpenCL kernel. Each work item takes care of one element of c
__kernel void vecAdd(  __global double *a,                    
                       __global double *b,                   
                       __global double *c,                     
                       const unsigned int n)                   
{                                                             
    //Get our global thread ID                                
    int id = get_global_id(0);                                
                                                               
    //Make sure we do not go out of bounds                    
    if (id < n)                                                
        c[id] = a[id] + b[id];                                 
}

当我尝试使用CodeBlocks运行VecAdd. c时,我在这一行中得到一个错误:program = clCreateProgramWithSource(context, 1, (const char **) & kernelSource, NULL, &err);
错误如下所示:vecAdd.c|79|错误:未声明'kernelSource'(首次在此函数中使用)
由于print_info. cpp示例工作正常并打印出来,因此我预计不会出现错误:
OpenCL设备信息:名称:英特尔® UHD显卡620供应商:英特尔®公司版本:OpenCL 3.0 NEO工作项的最大大小:(256,256,256)工作组的最大规模:256计算单元数量:24全局内存大小(字节):6762340352每个计算单位的本地内存大小(字节):2730

kcrjzv8t

kcrjzv8t1#

示例代码不完整。它缺少将VecAdd.cl文件读取为字符串kernelSource的部分。您可以添加:

#include <iostream> // write to console
#include <fstream> // read/write files

// ...

int main( int argc, char* argv[] )
{

    // ...

    std::string kernelSource = "";
    {
        std::ifstream file("./VecAdd.cl", std::ios::in); // path might be different for you
        if(file.fail()) stc::cout << "Error: File does not exist!\n";
        kernelSource = std::string((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
        file.close();
    }

    // Create the compute program from the source buffer
    program = clCreateProgramWithSource(context, 1, (const char**)&kernelSource, NULL, &err);

    // ...

}

为了更容易地开始使用OpenCL,看看这个OpenCL-Wrapper。这大大简化了API的使用,而不会放弃任何功能或性能。默认情况下,它附带了一个向量加法示例。请注意,与常规的OpenCL膨胀相比,代码要短得多,也不那么复杂。

相关问题