no SLM + host map

This commit is contained in:
Marcel Lütke Dreimann
2025-06-23 13:57:49 +02:00
parent 73dd7cedb5
commit 93ef189be3

View File

@@ -43,6 +43,8 @@ using namespace std;
// Profile the result of each step to a file
// #define profile
// #define SLM
///////////////////////////////////////////////////////////
#include "SURF_kernel.h"
@@ -121,6 +123,10 @@ cl_mem rrx, rry;
cl_mem des;
cl_mem mid, ndes;
#ifndef SLM
cl_mem fake_slm[10];
#endif // SLM
///////////////////////////////////////////////////////////
double cRow, cCol, cInt, cBui, cExt, cOut, cMov, cRnum, cOri, cDes, cnDes, cCom;
cl_event RowEvent, ColEvent, BuiEvent, ExtEvent, WriteOut, WriteMipts, OriEvent, DesEvent, nDesEvent, comEvent;
@@ -461,13 +467,16 @@ int main(int argc, char **argv)
#endif
*/
d_Input = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, imgSize * sizeof(float), data, &ciErrNum);
//d_Input = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, imgSize * sizeof(float), data, &ciErrNum);
d_Input = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, imgSize * sizeof(float), NULL, &ciErrNum);
ciErrNum = clEnqueueWriteBuffer(clqueue, d_Input, CL_FALSE, 0, imgSize * sizeof(float), (int *)data, 0, NULL, NULL);
// oclCheckError(ciErrNum, CL_SUCCESS);
d_Output = clCreateBuffer(context, CL_MEM_READ_WRITE, imgSize * sizeof(float), NULL, &ciErrNum);
// oclCheckError(ciErrNum, CL_SUCCESS);
intImage = clCreateBuffer(context, CL_MEM_READ_WRITE, imgSize * sizeof(float), NULL, &ciErrNum);
// oclCheckError(ciErrNum, CL_SUCCESS);
#ifdef profile
int N = imgSize;
FILE *hin = fopen("in.dat", "w");
@@ -489,7 +498,12 @@ int main(int argc, char **argv)
ciErrNum = clSetKernelArg(ckRowIntegral, 0, sizeof(cl_mem), (void *)&d_Input);
ciErrNum |= clSetKernelArg(ckRowIntegral, 1, sizeof(cl_mem), (void *)&d_Output);
#ifdef SLM
ciErrNum |= clSetKernelArg(ckRowIntegral, 2, sharedMemSize * sizeof(float), NULL);
#else // SLM
fake_slm[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sharedMemSize * sizeof(float), NULL, &ciErrNum);
ciErrNum |= clSetKernelArg(ckRowIntegral, 2, sharedMemSize * sizeof(float), &fake_slm[0]);
#endif // SLM
ciErrNum |= clSetKernelArg(ckRowIntegral, 3, sizeof(int), (void *)&width);
/*
@@ -538,7 +552,12 @@ int main(int argc, char **argv)
ciErrNum = clSetKernelArg(ckColIntegral, 0, sizeof(cl_mem), (void *)&d_Output);
ciErrNum |= clSetKernelArg(ckColIntegral, 1, sizeof(cl_mem), (void *)&intImage);
#ifdef SLM
ciErrNum |= clSetKernelArg(ckColIntegral, 2, sharedMemSize * sizeof(float), NULL);
#else // SLM
fake_slm[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sharedMemSize * sizeof(float), NULL, &ciErrNum);
ciErrNum |= clSetKernelArg(ckColIntegral, 2, sharedMemSize * sizeof(float), &fake_slm[1]);
#endif // SLM
ciErrNum |= clSetKernelArg(ckColIntegral, 3, sizeof(int), (void *)&height);
ciErrNum |= clSetKernelArg(ckColIntegral, 4, sizeof(int), (void *)&width);
@@ -780,11 +799,24 @@ int main(int argc, char **argv)
clSetKernelArg(ckGetOrientation, 4, sizeof(int), (void *)&stride);
clSetKernelArg(ckGetOrientation, 5, sizeof(cl_mem), (void *)&orientation);
clSetKernelArg(ckGetOrientation, 6, sizeof(int), (void *)&cmn);
#ifdef SLM
clSetKernelArg(ckGetOrientation, 7, 109 * sizeof(float), 0);
clSetKernelArg(ckGetOrientation, 8, 109 * sizeof(float), 0);
clSetKernelArg(ckGetOrientation, 9, 109 * sizeof(float), 0);
clSetKernelArg(ckGetOrientation, 10, 48 * sizeof(float), 0);
clSetKernelArg(ckGetOrientation, 11, 48 * sizeof(float), 0);
#else // SLM
fake_slm[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, 109 * sizeof(float), NULL, &ciErrNum);
fake_slm[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, 109 * sizeof(float), NULL, &ciErrNum);
fake_slm[4] = clCreateBuffer(context, CL_MEM_READ_WRITE, 109 * sizeof(float), NULL, &ciErrNum);
fake_slm[5] = clCreateBuffer(context, CL_MEM_READ_WRITE, 48 * sizeof(float), NULL, &ciErrNum);
fake_slm[6] = clCreateBuffer(context, CL_MEM_READ_WRITE, 48 * sizeof(float), NULL, &ciErrNum);
clSetKernelArg(ckGetOrientation, 7, 109 * sizeof(float), &fake_slm[2]);
clSetKernelArg(ckGetOrientation, 8, 109 * sizeof(float), &fake_slm[3]);
clSetKernelArg(ckGetOrientation, 9, 109 * sizeof(float), &fake_slm[4]);
clSetKernelArg(ckGetOrientation, 10, 48 * sizeof(float), &fake_slm[5]);
clSetKernelArg(ckGetOrientation, 11, 48 * sizeof(float), &fake_slm[6]);
#endif // SLM
// clSetKernelArg(ckGetOrientation, 12,sizeof(cl_mem), (void *)&test);
SHOWINFO(clEnqueueNDRangeKernel);
@@ -950,8 +982,15 @@ int main(int argc, char **argv)
clSetKernelArg(ckcomputeDes, 7, sizeof(int), (void *)&stride);
// clSetKernelArg(ckcomputeDes, 8, sizeof(cl_mem), (void *)&rrx);
// clSetKernelArg(ckcomputeDes, 9, sizeof(cl_mem), (void *)&rry);
#ifdef SLM
clSetKernelArg(ckcomputeDes, 8, localWorkSize * sizeof(float), 0);
clSetKernelArg(ckcomputeDes, 9, localWorkSize * sizeof(float), 0);
#else // SLM
fake_slm[7] = clCreateBuffer(context, CL_MEM_READ_WRITE, localWorkSize * sizeof(float), NULL, &ciErrNum);
fake_slm[8] = clCreateBuffer(context, CL_MEM_READ_WRITE, localWorkSize * sizeof(float), NULL, &ciErrNum);
clSetKernelArg(ckcomputeDes, 8, localWorkSize * sizeof(float), &fake_slm[7]);
clSetKernelArg(ckcomputeDes, 9, localWorkSize * sizeof(float), &fake_slm[8]);
#endif // SLM
clSetKernelArg(ckcomputeDes, 10, sizeof(cl_mem), (void *)&gauss_s2);
clSetKernelArg(ckcomputeDes, 11, sizeof(cl_mem), (void *)&des);
clSetKernelArg(ckcomputeDes, 12, sizeof(int), (void *)&group);
@@ -1038,7 +1077,12 @@ mid = clCreateBuffer(context,
clSetKernelArg(cknormalDes, 0, sizeof(cl_mem), (void *)&des);
clSetKernelArg(cknormalDes, 1, sizeof(cl_mem), (void *)&ndes);
#ifdef SLM
clSetKernelArg(cknormalDes, 2, NDES_BLOCK * sizeof(float), 0);
#else // SLM
fake_slm[9] = clCreateBuffer(context, CL_MEM_READ_WRITE, NDES_BLOCK * sizeof(float), NULL, &ciErrNum);
clSetKernelArg(cknormalDes, 2, NDES_BLOCK * sizeof(float), &fake_slm[9]);
#endif // SLM
SHOWINFO(clEnqueueNDRangeKernel);
ciErrNum = clEnqueueNDRangeKernel(clqueue,