improved ocl userspace performance

This commit is contained in:
Marcel Lütke Dreimann
2025-09-18 09:04:55 +02:00
parent f72d10d279
commit b9a2fa13b7

View File

@@ -1,6 +1,7 @@
#define CL_TARGET_OPENCL_VERSION 100 #define CL_TARGET_OPENCL_VERSION 100
#include "cl.h" #include "cl.h"
#include <gpgpu/gpgpu.h> #include <gpgpu/gpgpu.h>
#include <util/string.h>
#pragma GCC diagnostic ignored "-Wunused-parameter" #pragma GCC diagnostic ignored "-Wunused-parameter"
#ifdef __cplusplus #ifdef __cplusplus
@@ -67,24 +68,21 @@ clGetPlatformInfo(cl_platform_id platform,
case CL_PLATFORM_NAME: case CL_PLATFORM_NAME:
{ {
char* val = (char*)param_value; char* val = (char*)param_value;
for(size_t i = 0; i < sizeof(name) && i < param_value_size; i++) Genode::memcpy(val, name, Genode::min(sizeof(name), param_value_size));
val[i] = name[i];
break; break;
} }
case CL_PLATFORM_VERSION: case CL_PLATFORM_VERSION:
{ {
char* val = (char*)param_value; char* val = (char*)param_value;
for(size_t i = 0; i < sizeof(ver) && i < param_value_size; i++) Genode::memcpy(val, ver, Genode::min(sizeof(ver), param_value_size));
val[i] = ver[i];
break; break;
} }
case CL_PLATFORM_VENDOR: case CL_PLATFORM_VENDOR:
{ {
char* val = (char*)param_value; char* val = (char*)param_value;
for(size_t i = 0; i < sizeof(vendor) && i < param_value_size; i++) Genode::memcpy(val, vendor, Genode::min(sizeof(vendor), param_value_size));
val[i] = vendor[i];
break; break;
} }
@@ -132,8 +130,7 @@ clGetDeviceInfo(cl_device_id device,
case CL_DEVICE_NAME: case CL_DEVICE_NAME:
{ {
char* val = (char*)param_value; char* val = (char*)param_value;
for(size_t i = 0; i < sizeof(name) && i < param_value_size; i++) Genode::memcpy(val, name, Genode::min(sizeof(name), param_value_size));
val[i] = name[i];
break; break;
} }
@@ -141,16 +138,14 @@ clGetDeviceInfo(cl_device_id device,
case CL_DEVICE_VERSION: case CL_DEVICE_VERSION:
{ {
char* val = (char*)param_value; char* val = (char*)param_value;
for(size_t i = 0; i < sizeof(ver) && i < param_value_size; i++) Genode::memcpy(val, ver, Genode::min(sizeof(ver), param_value_size));
val[i] = ver[i];
break; break;
} }
case CL_DEVICE_VENDOR: case CL_DEVICE_VENDOR:
{ {
char* val = (char*)param_value; char* val = (char*)param_value;
for(size_t i = 0; i < sizeof(vendor) && i < param_value_size; i++) Genode::memcpy(val, vendor, Genode::min(sizeof(vendor), param_value_size));
val[i] = vendor[i];
break; break;
} }
@@ -792,23 +787,15 @@ clCreateKernel(cl_program program,
// we can not just set the binary, because its not in shared mem => copy it // we can not just set the binary, because its not in shared mem => copy it
kc->binary = (uint8_t*)g_cl_genode->alloc(program->size); kc->binary = (uint8_t*)g_cl_genode->alloc(program->size);
uint8_t* bin = (uint8_t*)program->binary; uint8_t* bin = (uint8_t*)program->binary;
for(size_t i = 0; i < program->size; i++) Genode::memcpy(kc->binary, bin, program->size);
{
kc->binary[i] = bin[i];
}
// preallocated 32 buff configs; // preallocated 32 buff configs;
kc->buffConfigs = new(g_cl_genode->getAlloc()) buffer_config[CL_MAX_KERNEL_ARGS]; kc->buffConfigs = new(g_cl_genode->getAlloc()) buffer_config[CL_MAX_KERNEL_ARGS];
// get name size // get name size
size_t size = 0; size_t size = Genode::strlen(kernel_name);
for(; kernel_name[size] != '\0'; size++);
size++; // add '\0'
kc->kernelName = (char*)g_cl_genode->alloc(size * sizeof(char)); kc->kernelName = (char*)g_cl_genode->alloc(size * sizeof(char));
for(size_t i = 0; i < size; i++) Genode::memcpy(kc->kernelName, kernel_name, size);
{
kc->kernelName[i] = kernel_name[i];
}
*errcode_ret |= CL_SUCCESS; *errcode_ret |= CL_SUCCESS;
return (cl_kernel)kc; return (cl_kernel)kc;
@@ -880,10 +867,7 @@ clSetKernelArg(cl_kernel kernel,
bc.non_pointer_type = true; bc.non_pointer_type = true;
// copy value to shared mem // copy value to shared mem
uint8_t* src = (uint8_t*)arg_value; Genode::memcpy(bc.buffer, arg_value, arg_size);
uint8_t* dst = (uint8_t*)bc.buffer;
for(size_t i = 0; i < arg_size; i++)
dst[i] = src[i];
} }
if(kc->buffCount < (arg_index + 1)) if(kc->buffCount < (arg_index + 1))
@@ -1099,12 +1083,7 @@ clEnqueueReadBuffer(cl_command_queue command_queue,
return CL_INVALID_VALUE; return CL_INVALID_VALUE;
} }
uint8_t* src = (uint8_t*)buffer->virt_vm; Genode::memcpy(ptr, buffer->virt_vm, size);
uint8_t* dst = (uint8_t*)ptr;
for(size_t i = 0; i < size; i++)
{
dst[i] = src[i];
}
return CL_SUCCESS; return CL_SUCCESS;
} }
@@ -1149,12 +1128,7 @@ clEnqueueWriteBuffer(cl_command_queue command_queue,
return CL_INVALID_VALUE; return CL_INVALID_VALUE;
} }
uint8_t* src = (uint8_t*)ptr; Genode::memcpy(buffer->virt_vm, ptr, size);
uint8_t* dst = (uint8_t*)buffer->virt_vm;
for(size_t i = 0; i < size; i++)
{
dst[i] = src[i];
}
return CL_SUCCESS; return CL_SUCCESS;
} }