I'm working on a project where I need to perform image inversion using GPU with OpenCL, but it's not working as expected. The goal is to invert the colors of an image using a kernel and then retrieve the result. However, the output isn't correct, and I need help troubleshooting.
Here's the structure of my code:
using OpenCL.Net;
namespace miniphotoshop_be.Services
{
public class GpuOpenCLTransformationService
{
private readonly OpenCL.Net.Context _context;
private readonly OpenCL.Net.CommandQueue _queue;
private readonly OpenCL.Net.Device _device;
private readonly OpenCL.Net.Program _program;
public GpuOpenCLTransformationService()
{
var platformIds = Cl.GetPlatformIDs(out var errorCode);
_device = Cl.GetDeviceIDs(platformIds.First(), DeviceType.Gpu, out errorCode).FirstOrDefault();
_context = Cl.CreateContext(null, 1, new[] { _device }, null, IntPtr.Zero, out errorCode);
_queue = Cl.CreateCommandQueue(_context, _device, CommandQueueProperties.None, out errorCode);
var kernelSource = System.IO.File.ReadAllText("Services/opencl_kernels/invert_image.cl");
_program = Cl.CreateProgramWithSource(_context, 1, new[] { kernelSource }, null, out errorCode);
errorCode = Cl.BuildProgram(_program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero);
}
public byte[] Invert(byte[] imageData, int width, int height)
{
var kernel = Cl.CreateKernel(_program, "invert_image", out var errorCode);
if (errorCode != ErrorCode.Success)
throw new Exception("Failed to create OpenCL kernel.");
var imageBuffer = Cl.CreateBuffer(_context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, (IntPtr)imageData.Length, imageData, out errorCode);
if (errorCode != ErrorCode.Success)
throw new Exception("Failed to create OpenCL buffer.");
errorCode = Cl.SetKernelArg(kernel, 0, imageBuffer);
errorCode |= Cl.SetKernelArg(kernel, 1, width);
errorCode |= Cl.SetKernelArg(kernel, 2, height);
if (errorCode != ErrorCode.Success)
throw new Exception("Failed to set OpenCL kernel arguments.");
var globalWorkSize = new IntPtr[] { (IntPtr)width, (IntPtr)height };
errorCode = Cl.EnqueueNDRangeKernel(_queue, kernel, 2, null, globalWorkSize, null, 0, null, out _);
if (errorCode != ErrorCode.Success)
throw new Exception("Failed to enqueue OpenCL kernel.");
var result = new byte[imageData.Length];
errorCode = Cl.EnqueueReadBuffer(_queue, imageBuffer, Bool.True, IntPtr.Zero, (IntPtr)result.Length, result, 0, null, out _);
if (errorCode != ErrorCode.Success)
throw new Exception("Failed to read OpenCL buffer.");
Cl.ReleaseKernel(kernel);
Cl.ReleaseMemObject(imageBuffer);
Console.WriteLine("-----------------");
Console.WriteLine("First bytes before kernel execution: " + string.Join(", ", imageData.Take(10)));
Console.WriteLine("First bytes after kernel execution: " + string.Join(", ", result.Take(10))); Console.WriteLine("-----------------");
Console.WriteLine("-----------------");
return result;
}
}
}
And the used kernel is:
__kernel void invert_image(__global unsigned char* image, int width, int height)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < width && y < height)
{
int idx = (y * width + x) * 3;
image[idx] = 255 - image[idx];
image[idx + 1] = 255 - image[idx + 1];
image[idx + 2] = 255 - image[idx + 2];
}
}
I've already tried the CPU-based image inversion using Bitmap and MemoryStream, and it works as expected. However, when I try to perform the same operation using OpenCL on the GPU, the output is either not changing or the image is corrupted.
I'm working on a project where I need to perform image inversion using GPU with OpenCL, but it's not working as expected. The goal is to invert the colors of an image using a kernel and then retrieve the result. However, the output isn't correct, and I need help troubleshooting.
Here's the structure of my code:
using OpenCL.Net;
namespace miniphotoshop_be.Services
{
public class GpuOpenCLTransformationService
{
private readonly OpenCL.Net.Context _context;
private readonly OpenCL.Net.CommandQueue _queue;
private readonly OpenCL.Net.Device _device;
private readonly OpenCL.Net.Program _program;
public GpuOpenCLTransformationService()
{
var platformIds = Cl.GetPlatformIDs(out var errorCode);
_device = Cl.GetDeviceIDs(platformIds.First(), DeviceType.Gpu, out errorCode).FirstOrDefault();
_context = Cl.CreateContext(null, 1, new[] { _device }, null, IntPtr.Zero, out errorCode);
_queue = Cl.CreateCommandQueue(_context, _device, CommandQueueProperties.None, out errorCode);
var kernelSource = System.IO.File.ReadAllText("Services/opencl_kernels/invert_image.cl");
_program = Cl.CreateProgramWithSource(_context, 1, new[] { kernelSource }, null, out errorCode);
errorCode = Cl.BuildProgram(_program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero);
}
public byte[] Invert(byte[] imageData, int width, int height)
{
var kernel = Cl.CreateKernel(_program, "invert_image", out var errorCode);
if (errorCode != ErrorCode.Success)
throw new Exception("Failed to create OpenCL kernel.");
var imageBuffer = Cl.CreateBuffer(_context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, (IntPtr)imageData.Length, imageData, out errorCode);
if (errorCode != ErrorCode.Success)
throw new Exception("Failed to create OpenCL buffer.");
errorCode = Cl.SetKernelArg(kernel, 0, imageBuffer);
errorCode |= Cl.SetKernelArg(kernel, 1, width);
errorCode |= Cl.SetKernelArg(kernel, 2, height);
if (errorCode != ErrorCode.Success)
throw new Exception("Failed to set OpenCL kernel arguments.");
var globalWorkSize = new IntPtr[] { (IntPtr)width, (IntPtr)height };
errorCode = Cl.EnqueueNDRangeKernel(_queue, kernel, 2, null, globalWorkSize, null, 0, null, out _);
if (errorCode != ErrorCode.Success)
throw new Exception("Failed to enqueue OpenCL kernel.");
var result = new byte[imageData.Length];
errorCode = Cl.EnqueueReadBuffer(_queue, imageBuffer, Bool.True, IntPtr.Zero, (IntPtr)result.Length, result, 0, null, out _);
if (errorCode != ErrorCode.Success)
throw new Exception("Failed to read OpenCL buffer.");
Cl.ReleaseKernel(kernel);
Cl.ReleaseMemObject(imageBuffer);
Console.WriteLine("-----------------");
Console.WriteLine("First bytes before kernel execution: " + string.Join(", ", imageData.Take(10)));
Console.WriteLine("First bytes after kernel execution: " + string.Join(", ", result.Take(10))); Console.WriteLine("-----------------");
Console.WriteLine("-----------------");
return result;
}
}
}
And the used kernel is:
__kernel void invert_image(__global unsigned char* image, int width, int height)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < width && y < height)
{
int idx = (y * width + x) * 3;
image[idx] = 255 - image[idx];
image[idx + 1] = 255 - image[idx + 1];
image[idx + 2] = 255 - image[idx + 2];
}
}
I've already tried the CPU-based image inversion using Bitmap and MemoryStream, and it works as expected. However, when I try to perform the same operation using OpenCL on the GPU, the output is either not changing or the image is corrupted.
Share Improve this question edited Mar 31 at 11:41 DarkBee 15.5k8 gold badges72 silver badges117 bronze badges asked Mar 31 at 11:30 SzPeter-9923SzPeter-9923 113 bronze badges 5 |2 Answers
Reset to default 1SOLUTION
Thanks for the tip about the bitwise XOR operator, that's definitely a good idea!
However, in my case the images don't have an alpha channel, so using RGBA (*4) doesn't work – it's actually just RGB, so I need to use *3 instead.
Also, I figured out what was wrong – I was accidentally working with the JPEG byte array instead of the raw pixelBuffer
, so that’s why the transformation didn’t work as expected. It’s all good now!
__kernel void invert_image(__global unsigned char* image, int width, int height)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < width && y < height)
{
int idx = (y * width + x) * 4;
image[idx] = 255 - image[idx];
image[idx + 1] = 255 - image[idx + 1];
image[idx + 2] = 255 - image[idx + 2];
}
}
*4 instead of *3 if its RGBA which I would assume is the case without having the image to verify. Another note. instead of using 255 - image[idx] etc etc, instead use image[idx] ^ 255 etc etc. Using the BitwiseXOR(^) is typically faster than subtraction because it avoids arithmetic calculations. Here is the final example
__kernel void invert_image(__global unsigned char* image, int width, int height)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < width && y < height)
{
int idx = (y * width + x) * 4;
image[idx] = image[idx] ^ 255;
image[idx + 1] = image[idx + 1] ^ 255;
image[idx + 2] = image[idx + 2] ^ 255;
}
}
(y * width + x) * 3
. It should bey * stride + x * 3
. This is likely not the main error, but this can be a problem for images with uneven width. – JonasH Commented Mar 31 at 11:59