最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

c++ - How to speed up string search using openCL - Stack Overflow

programmeradmin5浏览0评论

I am attempting to utilize opencl to accelerate string searching. However I am currently having a hard time making it faster then std::find. Here is code that is preforming the string search for reference.


#define CL_HPP_TARGET_OPENCL_VERSION 300
#define CL_HPP_ENABLE_EXCEPTIONS

//This function can throw exceptions
int clSearch(const std::string& str, const std::string& substr) {
    PROFILE_FUNCTION();
    int hostResult = -1;

    Timer* setupTimer = new Timer("Setup Context and Queue");
    // Create context (first available device)
    cl::Context context(CL_DEVICE_TYPE_DEFAULT);

    // Create a command queue
    cl::CommandQueue queue(context);
    delete setupTimer;

    //Each work-item checks if 'substr' occurs at position `i` of 'str'.
    std::string kernelSource = R"(
    __kernel void searchNaive(
        __global const char* text,
        __global const char* pattern,
        const int textLen,
        const int patternLen,
        __global int* result
    )
    {
        int i = get_global_id(0);

        // If there's room for the pattern starting at i:
        if (i + patternLen <= textLen)
        {
            // Check each character in the pattern
            for (int j = 0; j < patternLen; j++) {
                if (text[i + j] != pattern[j]) {
                    return;  // Mismatch, so no write to result
                }
            }
            // If we got here, we found a match at position i.
            // Store i in *result. This overwrites previous matches.
            // If you only want the first match, you could avoid overwriting
            // if *result != -1 or use an atomic.
            *result = i;
        }
    }
    )";

    // build the program
    Timer* buildTimer = new Timer("Build Program");
    cl::Program program(context, kernelSource, true);
    program.build();
    delete buildTimer;

    Timer *bufferTimer = new Timer("Create Buffers");
    int textLen    = static_cast<int>(str.size());
    int patternLen = static_cast<int>(substr.size());

    // Copy the text and pattern data to device
    cl::Buffer d_text(
        context,
        CL_MEM_READ_ONLY  | CL_MEM_USE_HOST_PTR,
        sizeof(char) * textLen,
        (void*)str.data()
    );
    cl::Buffer d_pattern(
        context,
        CL_MEM_READ_ONLY  | CL_MEM_USE_HOST_PTR,
        sizeof(char) * patternLen,
        (void*)substr.data()
    );

    cl::Buffer d_result(
        context,
        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
        sizeof(int),
        &hostResult
    );
    delete bufferTimer;

    // run kernel
    {
        PROFILE_SCOPE("Run Kernel");

        cl::Kernel kernel(program, "searchNaive");
        kernel.setArg(0, d_text);
        kernel.setArg(1, d_pattern);
        kernel.setArg(2, textLen);
        kernel.setArg(3, patternLen);
        kernel.setArg(4, d_result);

        // Enqueue kernel:
        queue.enqueueNDRangeKernel(
            kernel,
            cl::NullRange,
            cl::NDRange(textLen),
            cl::NullRange
        );
    }


    // Read back the result
    Timer* readTimer = new Timer("Read Result");
    cl::copy(queue, d_result, &hostResult, &hostResult + 1);
    delete readTimer;
    return hostResult;
}

I have tried switching to CL_MEM_USE_HOST_PTR which gave a small improvement in performance.

Currently I am seeing these times for my code

Create Buffers: 178 ms, Run Kernel: 373 ms, Read Result 39 ms

Currently all other times are very small. What can I do to speed up these sections? I am processing a 2gb string for this example.

发布评论

评论列表(0)

  1. 暂无评论