diff --git a/repos/hello_gpgpu/src/hello_gpgpu/benchmark/SURF/OpenSurf.cpp b/repos/hello_gpgpu/src/hello_gpgpu/benchmark/SURF/OpenSurf.cpp index 34d64d62f7..138f545c48 100644 --- a/repos/hello_gpgpu/src/hello_gpgpu/benchmark/SURF/OpenSurf.cpp +++ b/repos/hello_gpgpu/src/hello_gpgpu/benchmark/SURF/OpenSurf.cpp @@ -43,6 +43,8 @@ using namespace std; // Profile the result of each step to a file // #define profile +// #define SLM + /////////////////////////////////////////////////////////// #include "SURF_kernel.h" @@ -121,6 +123,10 @@ cl_mem rrx, rry; cl_mem des; cl_mem mid, ndes; +#ifndef SLM +cl_mem fake_slm[10]; +#endif // SLM + /////////////////////////////////////////////////////////// double cRow, cCol, cInt, cBui, cExt, cOut, cMov, cRnum, cOri, cDes, cnDes, cCom; cl_event RowEvent, ColEvent, BuiEvent, ExtEvent, WriteOut, WriteMipts, OriEvent, DesEvent, nDesEvent, comEvent; @@ -461,13 +467,16 @@ int main(int argc, char **argv) #endif */ - d_Input = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, imgSize * sizeof(float), data, &ciErrNum); + //d_Input = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, imgSize * sizeof(float), data, &ciErrNum); + d_Input = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, imgSize * sizeof(float), NULL, &ciErrNum); + ciErrNum = clEnqueueWriteBuffer(clqueue, d_Input, CL_FALSE, 0, imgSize * sizeof(float), (int *)data, 0, NULL, NULL); // oclCheckError(ciErrNum, CL_SUCCESS); d_Output = clCreateBuffer(context, CL_MEM_READ_WRITE, imgSize * sizeof(float), NULL, &ciErrNum); // oclCheckError(ciErrNum, CL_SUCCESS); intImage = clCreateBuffer(context, CL_MEM_READ_WRITE, imgSize * sizeof(float), NULL, &ciErrNum); // oclCheckError(ciErrNum, CL_SUCCESS); + #ifdef profile int N = imgSize; FILE *hin = fopen("in.dat", "w"); @@ -489,7 +498,12 @@ int main(int argc, char **argv) ciErrNum = clSetKernelArg(ckRowIntegral, 0, sizeof(cl_mem), (void *)&d_Input); ciErrNum |= clSetKernelArg(ckRowIntegral, 1, sizeof(cl_mem), (void *)&d_Output); +#ifdef SLM ciErrNum |= clSetKernelArg(ckRowIntegral, 2, sharedMemSize * sizeof(float), NULL); +#else // SLM + fake_slm[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sharedMemSize * sizeof(float), NULL, &ciErrNum); + ciErrNum |= clSetKernelArg(ckRowIntegral, 2, sharedMemSize * sizeof(float), &fake_slm[0]); +#endif // SLM ciErrNum |= clSetKernelArg(ckRowIntegral, 3, sizeof(int), (void *)&width); /* @@ -538,7 +552,12 @@ int main(int argc, char **argv) ciErrNum = clSetKernelArg(ckColIntegral, 0, sizeof(cl_mem), (void *)&d_Output); ciErrNum |= clSetKernelArg(ckColIntegral, 1, sizeof(cl_mem), (void *)&intImage); +#ifdef SLM ciErrNum |= clSetKernelArg(ckColIntegral, 2, sharedMemSize * sizeof(float), NULL); +#else // SLM + fake_slm[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sharedMemSize * sizeof(float), NULL, &ciErrNum); + ciErrNum |= clSetKernelArg(ckColIntegral, 2, sharedMemSize * sizeof(float), &fake_slm[1]); +#endif // SLM ciErrNum |= clSetKernelArg(ckColIntegral, 3, sizeof(int), (void *)&height); ciErrNum |= clSetKernelArg(ckColIntegral, 4, sizeof(int), (void *)&width); @@ -780,11 +799,24 @@ int main(int argc, char **argv) clSetKernelArg(ckGetOrientation, 4, sizeof(int), (void *)&stride); clSetKernelArg(ckGetOrientation, 5, sizeof(cl_mem), (void *)&orientation); clSetKernelArg(ckGetOrientation, 6, sizeof(int), (void *)&cmn); +#ifdef SLM clSetKernelArg(ckGetOrientation, 7, 109 * sizeof(float), 0); clSetKernelArg(ckGetOrientation, 8, 109 * sizeof(float), 0); clSetKernelArg(ckGetOrientation, 9, 109 * sizeof(float), 0); clSetKernelArg(ckGetOrientation, 10, 48 * sizeof(float), 0); clSetKernelArg(ckGetOrientation, 11, 48 * sizeof(float), 0); +#else // SLM + fake_slm[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, 109 * sizeof(float), NULL, &ciErrNum); + fake_slm[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, 109 * sizeof(float), NULL, &ciErrNum); + fake_slm[4] = clCreateBuffer(context, CL_MEM_READ_WRITE, 109 * sizeof(float), NULL, &ciErrNum); + fake_slm[5] = clCreateBuffer(context, CL_MEM_READ_WRITE, 48 * sizeof(float), NULL, &ciErrNum); + fake_slm[6] = clCreateBuffer(context, CL_MEM_READ_WRITE, 48 * sizeof(float), NULL, &ciErrNum); + clSetKernelArg(ckGetOrientation, 7, 109 * sizeof(float), &fake_slm[2]); + clSetKernelArg(ckGetOrientation, 8, 109 * sizeof(float), &fake_slm[3]); + clSetKernelArg(ckGetOrientation, 9, 109 * sizeof(float), &fake_slm[4]); + clSetKernelArg(ckGetOrientation, 10, 48 * sizeof(float), &fake_slm[5]); + clSetKernelArg(ckGetOrientation, 11, 48 * sizeof(float), &fake_slm[6]); +#endif // SLM // clSetKernelArg(ckGetOrientation, 12,sizeof(cl_mem), (void *)&test); SHOWINFO(clEnqueueNDRangeKernel); @@ -950,8 +982,15 @@ int main(int argc, char **argv) clSetKernelArg(ckcomputeDes, 7, sizeof(int), (void *)&stride); // clSetKernelArg(ckcomputeDes, 8, sizeof(cl_mem), (void *)&rrx); // clSetKernelArg(ckcomputeDes, 9, sizeof(cl_mem), (void *)&rry); +#ifdef SLM clSetKernelArg(ckcomputeDes, 8, localWorkSize * sizeof(float), 0); clSetKernelArg(ckcomputeDes, 9, localWorkSize * sizeof(float), 0); +#else // SLM + fake_slm[7] = clCreateBuffer(context, CL_MEM_READ_WRITE, localWorkSize * sizeof(float), NULL, &ciErrNum); + fake_slm[8] = clCreateBuffer(context, CL_MEM_READ_WRITE, localWorkSize * sizeof(float), NULL, &ciErrNum); + clSetKernelArg(ckcomputeDes, 8, localWorkSize * sizeof(float), &fake_slm[7]); + clSetKernelArg(ckcomputeDes, 9, localWorkSize * sizeof(float), &fake_slm[8]); +#endif // SLM clSetKernelArg(ckcomputeDes, 10, sizeof(cl_mem), (void *)&gauss_s2); clSetKernelArg(ckcomputeDes, 11, sizeof(cl_mem), (void *)&des); clSetKernelArg(ckcomputeDes, 12, sizeof(int), (void *)&group); @@ -1038,7 +1077,12 @@ mid = clCreateBuffer(context, clSetKernelArg(cknormalDes, 0, sizeof(cl_mem), (void *)&des); clSetKernelArg(cknormalDes, 1, sizeof(cl_mem), (void *)&ndes); +#ifdef SLM clSetKernelArg(cknormalDes, 2, NDES_BLOCK * sizeof(float), 0); +#else // SLM + fake_slm[9] = clCreateBuffer(context, CL_MEM_READ_WRITE, NDES_BLOCK * sizeof(float), NULL, &ciErrNum); + clSetKernelArg(cknormalDes, 2, NDES_BLOCK * sizeof(float), &fake_slm[9]); +#endif // SLM SHOWINFO(clEnqueueNDRangeKernel); ciErrNum = clEnqueueNDRangeKernel(clqueue,