11 #ifndef TESSERACT_OPENCL_OPENCLWRAPPER_H_ 12 #define TESSERACT_OPENCL_OPENCLWRAPPER_H_ 15 #include "allheaders.h" 22 #if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || \ 23 defined(__CYGWIN__) || defined(__MINGW32__) 28 #define IF_WINDOWS(X) X 32 #define NOT_WINDOWS(X) 33 #elif defined(__linux__) 42 #define NOT_WINDOWS(X) X 43 #elif defined(__APPLE__) 52 #define NOT_WINDOWS(X) X 62 #define NOT_WINDOWS(X) X 78 #define PERF_COUNT_VERBOSE 1 79 #define PERF_COUNT_REPORT_STR "[%36s], %24s, %11.6f\n" 83 #if PERF_COUNT_VERBOSE >= 2 84 #define PERF_COUNT_START(FUNCT_NAME) \ 85 char* funct_name = FUNCT_NAME; \ 86 double elapsed_time_sec; \ 87 LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, \ 89 QueryPerformanceFrequency(&freq); \ 90 QueryPerformanceCounter(&time_funct_start); \ 91 time_sub_start = time_funct_start; \ 92 time_sub_end = time_funct_start; 94 #define PERF_COUNT_END \ 95 QueryPerformanceCounter(&time_funct_end); \ 96 elapsed_time_sec = (time_funct_end.QuadPart - time_funct_start.QuadPart) / \ 97 (double)(freq.QuadPart); \ 98 tprintf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec); 100 #define PERF_COUNT_START(FUNCT_NAME) 101 #define PERF_COUNT_END 104 #if PERF_COUNT_VERBOSE >= 3 105 #define PERF_COUNT_SUB(SUB) \ 106 QueryPerformanceCounter(&time_sub_end); \ 107 elapsed_time_sec = (time_sub_end.QuadPart - time_sub_start.QuadPart) / \ 108 (double)(freq.QuadPart); \ 109 tprintf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \ 110 time_sub_start = time_sub_end; 112 #define PERF_COUNT_SUB(SUB) 118 #if PERF_COUNT_VERBOSE >= 2 119 #define PERF_COUNT_START(FUNCT_NAME) \ 120 char* funct_name = FUNCT_NAME; \ 121 double elapsed_time_sec; \ 122 timespec time_funct_start, time_funct_end, time_sub_start, time_sub_end; \ 123 clock_gettime(CLOCK_MONOTONIC, &time_funct_start); \ 124 time_sub_start = time_funct_start; \ 125 time_sub_end = time_funct_start; 127 #define PERF_COUNT_END \ 128 clock_gettime(CLOCK_MONOTONIC, &time_funct_end); \ 130 (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + \ 131 (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; \ 132 tprintf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec); 134 #define PERF_COUNT_START(FUNCT_NAME) 135 #define PERF_COUNT_END 138 #if PERF_COUNT_VERBOSE >= 3 139 #define PERF_COUNT_SUB(SUB) \ 140 clock_gettime(CLOCK_MONOTONIC, &time_sub_end); \ 142 (time_sub_end.tv_sec - time_sub_start.tv_sec) * 1.0 + \ 143 (time_sub_end.tv_nsec - time_sub_start.tv_nsec) / 1000000000.0; \ 144 tprintf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \ 145 time_sub_start = time_sub_end; 147 #define PERF_COUNT_SUB(SUB) 156 #include "opencl_device_selection.h" 159 #define strcasecmp strcmp 162 #define MAX_KERNEL_STRING_LEN 64 163 #define MAX_CLFILE_NUM 50 164 #define MAX_CLKERNEL_NUM 200 165 #define MAX_KERNEL_NAME_LEN 64 166 #define CL_QUEUE_THREAD_HANDLE_AMD 0x403E 167 #define GROUPSIZE_X 16 168 #define GROUPSIZE_Y 16 169 #define GROUPSIZE_HMORX 256 170 #define GROUPSIZE_HMORY 1 173 cl_context mpkContext;
174 cl_command_queue mpkCmdQueue;
175 cl_program mpkProgram;
177 char mckKernelName[150];
181 cl_platform_id mpOclPlatformID;
182 cl_context mpOclContext;
183 cl_device_id mpOclDevsID;
184 cl_command_queue mpOclCmdQueue;
186 typedef int (*cl_kernel_function)(
void** userdata, KernelEnv* kenv);
188 #define CHECK_OPENCL(status, name) \ 189 if (status != CL_SUCCESS) { \ 190 tprintf("OpenCL error code is %d at when %s .\n", status, name); \ 195 cl_platform_id mpPlatformID;
196 cl_device_type mDevType;
197 cl_context mpContext;
198 cl_device_id* mpArryDevsID;
199 cl_device_id mpDevID;
200 cl_command_queue mpCmdQueue;
201 cl_kernel mpArryKernels[MAX_CLFILE_NUM];
202 cl_program mpArryPrograms[MAX_CLFILE_NUM];
204 char mArryKnelSrcFile[MAX_CLFILE_NUM]
206 mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
207 cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM];
208 int mnKernelCount, mnFileCount,
217 static GPUEnv gpuEnv;
221 static int InitEnv();
222 static int InitOpenclRunEnv(
224 static int InitOpenclRunEnv_DeviceSelection(
226 static int RegistOpenclKernel();
227 static int ReleaseOpenclRunEnv();
228 static int ReleaseOpenclEnv(GPUEnv* gpuInfo);
229 static int CompileKernelFile(GPUEnv* gpuInfo,
const char* buildOption);
230 static int CachedOfKernerPrg(
const GPUEnv* gpuEnvCached,
231 const char* clFileName);
232 static int GeneratBinFromKernelSource(cl_program program,
233 const char* clFileName);
234 static int WriteBinaryToFile(
const char* fileName,
const char* birary,
236 static int BinaryGenerated(
const char* clFileName, FILE** fhandle);
239 static l_uint32* pixReadFromTiffKernel(l_uint32* tiffdata, l_int32 w,
240 l_int32 h, l_int32 wpl,
242 static int composeRGBPixelCl(
int* tiffdata,
int* line,
int h,
int w);
247 static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix* pixs);
248 static void releaseMorphCLBuffers();
250 static void pixGetLinesCL(Pix* pixd, Pix* pixs, Pix** pix_vline,
251 Pix** pix_hline, Pix** pixClosed,
bool getpixClosed,
252 l_int32 close_hsize, l_int32 close_vsize,
253 l_int32 open_hsize, l_int32 open_vsize,
254 l_int32 line_hsize, l_int32 line_vsize);
258 static int SetKernelEnv(KernelEnv* envInfo);
268 static int LoadOpencl();
271 static void FreeOpenclDll();
274 inline static int AddKernelConfig(
int kCount,
const char* kName);
277 static int HistogramRectOCL(
void* imagedata,
int bytes_per_pixel,
278 int bytes_per_line,
int left,
int top,
int width,
279 int height,
int kHistogramSize,
280 int* histogramAllChannels);
282 static int ThresholdRectToPixOCL(
unsigned char* imagedata,
283 int bytes_per_pixel,
int bytes_per_line,
284 int* thresholds,
int* hi_values, Pix** pix,
285 int rect_height,
int rect_width,
286 int rect_top,
int rect_left);
288 static ds_device getDeviceSelection();
289 static ds_device selectedDevice;
290 static bool deviceIsSelected;
291 static bool selectedDeviceIsOpenCL();
295 #endif // TESSERACT_OPENCL_OPENCLWRAPPER_H_