From d76fcd23645e8498cd481416eee2ee16f8cd5d71 Mon Sep 17 00:00:00 2001 From: akallabeth Date: Mon, 19 Feb 2024 11:12:03 +0100 Subject: [PATCH] [primitives,opencl] fix alpha handling --- libfreerdp/primitives/prim_YUV_opencl.c | 288 +++++++++----- libfreerdp/primitives/primitives.cl | 367 +++++++++++++++++- .../primitives/test/TestPrimitivesYUV.c | 6 +- 3 files changed, 550 insertions(+), 111 deletions(-) diff --git a/libfreerdp/primitives/prim_YUV_opencl.c b/libfreerdp/primitives/prim_YUV_opencl.c index 7745e96be..2ca1b31d8 100644 --- a/libfreerdp/primitives/prim_YUV_opencl.c +++ b/libfreerdp/primitives/prim_YUV_opencl.c @@ -45,114 +45,191 @@ typedef struct cl_program program; } primitives_opencl_context; +typedef struct +{ + primitives_opencl_context* cl; + cl_kernel kernel; + cl_mem srcObjs[3]; + cl_mem dstObj; + prim_size_t roi; + size_t dstStep; +} primitives_cl_kernel; + static primitives_opencl_context* primitives_get_opencl_context(void); -static pstatus_t opencl_YUVToRGB(const char* kernelName, const BYTE* const pSrc[3], - const UINT32 srcStep[3], BYTE* pDst, UINT32 dstStep, - const prim_size_t* roi) +static void cl_kernel_free(primitives_cl_kernel* kernel) { - cl_int ret = 0; - cl_mem objs[3] = { NULL, NULL, NULL }; - cl_mem destObj = NULL; - cl_kernel kernel = NULL; - size_t indexes[2] = { 0 }; - const char* sourceNames[] = { "Y", "U", "V" }; - primitives_opencl_context* cl = primitives_get_opencl_context(); + if (!kernel) + return; - kernel = clCreateKernel(cl->program, kernelName, &ret); + if (kernel->dstObj) + clReleaseMemObject(kernel->dstObj); + + for (size_t i = 0; i < ARRAYSIZE(kernel->srcObjs); i++) + { + cl_mem obj = kernel->srcObjs[i]; + kernel->srcObjs[i] = NULL; + if (obj) + clReleaseMemObject(obj); + } + + if (kernel->kernel) + clReleaseKernel(kernel->kernel); + + free(kernel); +} + +static primitives_cl_kernel* cl_kernel_new(const char* kernelName, const prim_size_t* roi) +{ + WINPR_ASSERT(kernelName); + WINPR_ASSERT(roi); + + primitives_cl_kernel* kernel = calloc(1, sizeof(primitives_cl_kernel)); + if (!kernel) + goto fail; + + kernel->roi = *roi; + kernel->cl = primitives_get_opencl_context(); + if (!kernel->cl) + goto fail; + + cl_int ret = CL_INVALID_VALUE; + kernel->kernel = clCreateKernel(kernel->cl->program, kernelName, &ret); if (ret != CL_SUCCESS) { WLog_ERR(TAG, "openCL: unable to create kernel %s", kernelName); - return -1; + goto fail; } - for (cl_uint i = 0; i < 3; i++) + return kernel; +fail: + cl_kernel_free(kernel); + return NULL; +} + +static BOOL cl_kernel_set_sources(primitives_cl_kernel* ctx, + const BYTE* const WINPR_RESTRICT pSrc[3], const UINT32 srcStep[3]) +{ + const char* sourceNames[] = { "Y", "U", "V" }; + + WINPR_ASSERT(ctx); + WINPR_ASSERT(pSrc); + WINPR_ASSERT(srcStep); + + for (cl_uint i = 0; i < ARRAYSIZE(ctx->srcObjs); i++) { - objs[i] = clCreateBuffer(cl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - srcStep[i] * roi->height, (char*)pSrc[i], &ret); + cl_int ret = CL_INVALID_VALUE; + ctx->srcObjs[i] = clCreateBuffer(ctx->cl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + 1ull * srcStep[i] * ctx->roi.height, pSrc[i], &ret); if (ret != CL_SUCCESS) { WLog_ERR(TAG, "unable to create %sobj", sourceNames[i]); - goto error_objs; + return FALSE; } - } - destObj = clCreateBuffer(cl->context, CL_MEM_WRITE_ONLY, dstStep * roi->height, NULL, &ret); - if (ret != CL_SUCCESS) - { - WLog_ERR(TAG, "unable to create dest obj"); - goto error_objs; - } - - /* push source + stride arguments*/ - for (cl_uint i = 0; i < 3; i++) - { - ret = clSetKernelArg(kernel, i * 2, sizeof(cl_mem), &objs[i]); + ret = clSetKernelArg(ctx->kernel, i * 2, sizeof(cl_mem), &ctx->srcObjs[i]); if (ret != CL_SUCCESS) { WLog_ERR(TAG, "unable to set arg for %sobj", sourceNames[i]); - goto error_set_args; + return FALSE; } - ret = clSetKernelArg(kernel, i * 2 + 1, sizeof(cl_int), &srcStep[i]); + ret = clSetKernelArg(ctx->kernel, i * 2 + 1, sizeof(cl_uint), &srcStep[i]); if (ret != CL_SUCCESS) { WLog_ERR(TAG, "unable to set arg stride for %sobj", sourceNames[i]); - goto error_set_args; + return FALSE; } } - ret = clSetKernelArg(kernel, 6, sizeof(cl_mem), &destObj); + return TRUE; +} + +static BOOL cl_kernel_set_destination(primitives_cl_kernel* ctx, UINT32 dstStep) +{ + + WINPR_ASSERT(ctx); + + ctx->dstStep = dstStep; + cl_int ret = CL_INVALID_VALUE; + ctx->dstObj = clCreateBuffer(ctx->cl->context, CL_MEM_WRITE_ONLY, + 1ull * dstStep * ctx->roi.height, NULL, &ret); + if (ret != CL_SUCCESS) + { + WLog_ERR(TAG, "unable to create dest obj"); + return FALSE; + } + + ret = clSetKernelArg(ctx->kernel, 6, sizeof(cl_mem), &ctx->dstObj); if (ret != CL_SUCCESS) { WLog_ERR(TAG, "unable to set arg destObj"); - goto error_set_args; + return FALSE; } - ret = clSetKernelArg(kernel, 7, sizeof(cl_int), &dstStep); + ret = clSetKernelArg(ctx->kernel, 7, sizeof(cl_uint), &dstStep); if (ret != CL_SUCCESS) { WLog_ERR(TAG, "unable to set arg dstStep"); - goto error_set_args; + return FALSE; } - indexes[0] = roi->width; - indexes[1] = roi->height; - ret = clEnqueueNDRangeKernel(cl->commandQueue, kernel, 2, NULL, indexes, NULL, 0, NULL, NULL); + return TRUE; +} + +static BOOL cl_kernel_process(primitives_cl_kernel* ctx, BYTE* pDst) +{ + WINPR_ASSERT(ctx); + WINPR_ASSERT(pDst); + + size_t indexes[2] = { 0 }; + indexes[0] = ctx->roi.width; + indexes[1] = ctx->roi.height; + + cl_int ret = clEnqueueNDRangeKernel(ctx->cl->commandQueue, ctx->kernel, 2, NULL, indexes, NULL, + 0, NULL, NULL); if (ret != CL_SUCCESS) { WLog_ERR(TAG, "unable to enqueue call kernel"); - goto error_set_args; + return FALSE; } /* Transfer result to host */ - ret = clEnqueueReadBuffer(cl->commandQueue, destObj, CL_TRUE, 0, roi->height * dstStep, pDst, 0, - NULL, NULL); + ret = clEnqueueReadBuffer(ctx->cl->commandQueue, ctx->dstObj, CL_TRUE, 0, + ctx->roi.height * ctx->dstStep, pDst, 0, NULL, NULL); if (ret != CL_SUCCESS) { WLog_ERR(TAG, "unable to read back buffer"); - goto error_set_args; + return FALSE; } - /* cleanup things */ - clReleaseMemObject(destObj); - for (cl_uint i = 0; i < 3; i++) - if (objs[i]) - clReleaseMemObject(objs[i]); - clReleaseKernel(kernel); + return TRUE; +} - return PRIMITIVES_SUCCESS; +static pstatus_t opencl_YUVToRGB(const char* kernelName, const BYTE* const WINPR_RESTRICT pSrc[3], + const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, UINT32 dstStep, + const prim_size_t* WINPR_RESTRICT roi) +{ + pstatus_t res = -1; -error_set_args: - clReleaseMemObject(destObj); -error_objs: - for (cl_uint i = 0; i < 3; i++) - { - if (objs[i]) - clReleaseMemObject(objs[i]); - } - clReleaseKernel(kernel); - return -1; + primitives_cl_kernel* ctx = cl_kernel_new(kernelName, roi); + if (!ctx) + goto fail; + + if (!cl_kernel_set_sources(ctx, pSrc, srcStep)) + goto fail; + + if (!cl_kernel_set_destination(ctx, dstStep)) + goto fail; + + if (!cl_kernel_process(ctx, pDst)) + goto fail; + + res = PRIMITIVES_SUCCESS; + +fail: + cl_kernel_free(ctx); + return res; } static primitives_opencl_context openclContext = { 0 }; @@ -162,20 +239,27 @@ static primitives_opencl_context* primitives_get_opencl_context(void) return &openclContext; } +static void cl_context_free(primitives_opencl_context* ctx) +{ + if (!ctx) + return; + clReleaseProgram(ctx->program); + clReleaseCommandQueue(ctx->commandQueue); + clReleaseContext(ctx->context); + clReleaseDevice(ctx->deviceId); + ctx->support = FALSE; +} + static pstatus_t primitives_uninit_opencl(void) { if (!openclContext.support) return PRIMITIVES_SUCCESS; - clReleaseProgram(openclContext.program); - clReleaseCommandQueue(openclContext.commandQueue); - clReleaseContext(openclContext.context); - clReleaseDevice(openclContext.deviceId); - openclContext.support = FALSE; + cl_context_free(&openclContext); return PRIMITIVES_SUCCESS; } -static const char* openclProgram = +static const char openclProgram[] = #include "primitives.cl" ; @@ -209,8 +293,8 @@ static BOOL primitives_init_opencl_context(primitives_opencl_context* cl) { cl_device_id device_id = NULL; cl_context context = NULL; - char platformName[1000]; - char deviceName[1000]; + char platformName[1000] = { 0 }; + char deviceName[1000] = { 0 }; ret = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, sizeof(platformName), platformName, NULL); @@ -263,13 +347,13 @@ static BOOL primitives_init_opencl_context(primitives_opencl_context* cl) return FALSE; } - programLen = strlen(openclProgram); - cl->program = - clCreateProgramWithSource(cl->context, 1, (const char**)&openclProgram, &programLen, &ret); + programLen = strnlen(openclProgram, sizeof(openclProgram)); + const char* ptr = openclProgram; + cl->program = clCreateProgramWithSource(cl->context, 1, &ptr, &programLen, &ret); if (ret != CL_SUCCESS) { WLog_ERR(TAG, "openCL: unable to create program"); - goto out_program_create; + goto fail; } ret = clBuildProgram(cl->program, 1, &cl->deviceId, NULL, NULL, NULL); @@ -289,42 +373,55 @@ static BOOL primitives_init_opencl_context(primitives_opencl_context* cl) { WLog_ERR(TAG, "openCL: unable to build program, errorLog=%s", buffer); } - goto out_program_build; + goto fail; } kernel = clCreateKernel(cl->program, "yuv420_to_bgra_1b", &ret); if (ret != CL_SUCCESS) { WLog_ERR(TAG, "openCL: unable to create yuv420_to_bgra_1b kernel"); - goto out_program_build; + goto fail; } clReleaseKernel(kernel); cl->support = TRUE; return TRUE; -out_program_build: - clReleaseProgram(cl->program); -out_program_create: - clReleaseCommandQueue(cl->commandQueue); - clReleaseContext(cl->context); - clReleaseDevice(cl->deviceId); +fail: + cl_context_free(cl); return FALSE; } -static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* const pSrc[3], const UINT32 srcStep[3], - BYTE* pDst, UINT32 dstStep, UINT32 DstFormat, - const prim_size_t* roi) +static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* const WINPR_RESTRICT pSrc[3], + const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, + UINT32 dstStep, UINT32 DstFormat, + const prim_size_t* WINPR_RESTRICT roi) { const char* kernel_name = NULL; switch (DstFormat) { + case PIXEL_FORMAT_ABGR32: + kernel_name = "yuv420_to_abgr_1b"; + break; + case PIXEL_FORMAT_XBGR32: + kernel_name = "yuv420_to_xbgr_1b"; + break; + case PIXEL_FORMAT_RGBX32: + kernel_name = "yuv420_to_rgba_1b"; + break; + case PIXEL_FORMAT_RGBA32: + kernel_name = "yuv420_to_rgbx_1b"; + break; case PIXEL_FORMAT_BGRA32: - case PIXEL_FORMAT_BGRX32: kernel_name = "yuv420_to_bgra_1b"; break; + case PIXEL_FORMAT_BGRX32: + kernel_name = "yuv420_to_bgrx_1b"; + break; case PIXEL_FORMAT_XRGB32: + kernel_name = "yuv420_to_xrgb_1b"; + break; case PIXEL_FORMAT_ARGB32: kernel_name = "yuv420_to_argb_1b"; break; @@ -340,19 +437,36 @@ static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* const pSrc[3], const U return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi); } -static pstatus_t opencl_YUV444ToRGB_8u_P3AC4R(const BYTE* const pSrc[3], const UINT32 srcStep[3], - BYTE* pDst, UINT32 dstStep, UINT32 DstFormat, - const prim_size_t* roi) +static pstatus_t opencl_YUV444ToRGB_8u_P3AC4R(const BYTE* const WINPR_RESTRICT pSrc[3], + const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, + UINT32 dstStep, UINT32 DstFormat, + const prim_size_t* WINPR_RESTRICT roi) { const char* kernel_name = NULL; switch (DstFormat) { + case PIXEL_FORMAT_ABGR32: + kernel_name = "yuv444_to_abgr_1b"; + break; + case PIXEL_FORMAT_XBGR32: + kernel_name = "yuv444_to_xbgr_1b"; + break; + case PIXEL_FORMAT_RGBX32: + kernel_name = "yuv444_to_rgba_1b"; + break; + case PIXEL_FORMAT_RGBA32: + kernel_name = "yuv444_to_rgbx_1b"; + break; case PIXEL_FORMAT_BGRA32: - case PIXEL_FORMAT_BGRX32: kernel_name = "yuv444_to_bgra_1b"; break; + case PIXEL_FORMAT_BGRX32: + kernel_name = "yuv444_to_bgrx_1b"; + break; case PIXEL_FORMAT_XRGB32: + kernel_name = "yuv444_to_xrgb_1b"; + break; case PIXEL_FORMAT_ARGB32: kernel_name = "yuv444_to_argb_1b"; break; diff --git a/libfreerdp/primitives/primitives.cl b/libfreerdp/primitives/primitives.cl index b8790ac3b..5e094df28 100644 --- a/libfreerdp/primitives/primitives.cl +++ b/libfreerdp/primitives/primitives.cl @@ -28,11 +28,11 @@ uchar clamp_uc(int v, short l, short h) return (uchar)v; } -__kernel void yuv420_to_argb_1b( - __global const uchar *bufY, int strideY, - __global const uchar *bufU, int strideU, - __global const uchar *bufV, int strideV, - __global uchar *dest, int strideDest) +__kernel void yuv420_to_rgba_1b( + __global const uchar *bufY, unsigned strideY, + __global const uchar *bufU, unsigned strideU, + __global const uchar *bufV, unsigned strideV, + __global uchar *dest, unsigned strideDest) { unsigned int x = get_global_id(0); unsigned int y = get_global_id(1); @@ -49,17 +49,234 @@ __kernel void yuv420_to_argb_1b( * | B | ( | 256 475 0 | | V - 128 | ) */ int y256 = 256 * Y; - destPtr[0] = 0xff; /* A */ + destPtr[0] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */ + destPtr[1] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */ + destPtr[2] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */ + /* A */ +} + +__kernel void yuv420_to_abgr_1b( + __global const uchar *bufY, unsigned strideY, + __global const uchar *bufU, unsigned strideU, + __global const uchar *bufV, unsigned strideV, + __global uchar *dest, unsigned strideDest) +{ + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); + + short Y = bufY[y * strideY + x]; + short U = bufU[(y / 2) * strideU + (x / 2)] - 128; + short V = bufV[(y / 2) * strideV + (x / 2)] - 128; + + __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + /* A */ + destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ + destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ + destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ +} + +__kernel void yuv444_to_abgr_1b( + __global const uchar *bufY, unsigned strideY, + __global const uchar *bufU, unsigned strideU, + __global const uchar *bufV, unsigned strideV, + __global uchar *dest, unsigned strideDest) +{ + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); + + short Y = bufY[y * strideY + x]; + short U = bufU[y * strideU + x] - 128; + short V = bufV[y * strideV + x] - 128; + + __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + /* A */ + destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ + destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ + destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ +} + +__kernel void yuv444_to_rgba_1b( + __global const uchar *bufY, unsigned strideY, + __global const uchar *bufU, unsigned strideU, + __global const uchar *bufV, unsigned strideV, + __global uchar *dest, unsigned strideDest) +{ + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); + + short Y = bufY[y * strideY + x]; + short U = bufU[y * strideU + x] - 128; + short V = bufV[y * strideV + x] - 128; + + __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[0] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ + destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ + destPtr[2] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ + /* A */ +} + +__kernel void yuv420_to_rgbx_1b( + __global const uchar *bufY, unsigned strideY, + __global const uchar *bufU, unsigned strideU, + __global const uchar *bufV, unsigned strideV, + __global uchar *dest, unsigned strideDest) +{ + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); + + short Y = bufY[y * strideY + x]; + short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128; + short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128; + + __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[0] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */ + destPtr[1] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */ + destPtr[2] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */ + destPtr[3] = 0xff; /* A */ +} + +__kernel void yuv420_to_xbgr_1b( + __global const uchar *bufY, unsigned strideY, + __global const uchar *bufU, unsigned strideU, + __global const uchar *bufV, unsigned strideV, + __global uchar *dest, unsigned strideDest) +{ + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); + + short Y = bufY[y * strideY + x]; + short U = bufU[(y / 2) * strideU + (x / 2)] - 128; + short V = bufV[(y / 2) * strideV + (x / 2)] - 128; + + __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[0] = 0xff; /* A */ + destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ + destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ + destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ +} + +__kernel void yuv444_to_xbgr_1b( + __global const uchar *bufY, unsigned strideY, + __global const uchar *bufU, unsigned strideU, + __global const uchar *bufV, unsigned strideV, + __global uchar *dest, unsigned strideDest) +{ + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); + + short Y = bufY[y * strideY + x]; + short U = bufU[y * strideU + x] - 128; + short V = bufV[y * strideV + x] - 128; + + __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[0] = 0xff; /* A */ + destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ + destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ + destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ +} + +__kernel void yuv444_to_rgbx_1b( + __global const uchar *bufY, unsigned strideY, + __global const uchar *bufU, unsigned strideU, + __global const uchar *bufV, unsigned strideV, + __global uchar *dest, unsigned strideDest) +{ + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); + + short Y = bufY[y * strideY + x]; + short U = bufU[y * strideU + x] - 128; + short V = bufV[y * strideV + x] - 128; + + __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[0] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ + destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ + destPtr[2] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ + destPtr[3] = 0xff; /* A */ +} + + +__kernel void yuv420_to_argb_1b( + __global const uchar *bufY, unsigned strideY, + __global const uchar *bufU, unsigned strideU, + __global const uchar *bufV, unsigned strideV, + __global uchar *dest, unsigned strideDest) +{ + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); + + short Y = bufY[y * strideY + x]; + short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128; + short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128; + + __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + /* A */ destPtr[1] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */ destPtr[2] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */ destPtr[3] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */ } __kernel void yuv420_to_bgra_1b( - __global const uchar *bufY, int strideY, - __global const uchar *bufU, int strideU, - __global const uchar *bufV, int strideV, - __global uchar *dest, int strideDest) + __global const uchar *bufY, unsigned strideY, + __global const uchar *bufU, unsigned strideU, + __global const uchar *bufV, unsigned strideV, + __global uchar *dest, unsigned strideDest) { unsigned int x = get_global_id(0); unsigned int y = get_global_id(1); @@ -79,14 +296,14 @@ __kernel void yuv420_to_bgra_1b( destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ - destPtr[3] = 0xff; /* A */ + /* A */ } __kernel void yuv444_to_bgra_1b( - __global const uchar *bufY, int strideY, - __global const uchar *bufU, int strideU, - __global const uchar *bufV, int strideV, - __global uchar *dest, int strideDest) + __global const uchar *bufY, unsigned strideY, + __global const uchar *bufU, unsigned strideU, + __global const uchar *bufV, unsigned strideV, + __global uchar *dest, unsigned strideDest) { unsigned int x = get_global_id(0); unsigned int y = get_global_id(1); @@ -106,14 +323,14 @@ __kernel void yuv444_to_bgra_1b( destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ - destPtr[3] = 0xff; /* A */ + /* A */ } __kernel void yuv444_to_argb_1b( - __global const uchar *bufY, int strideY, - __global const uchar *bufU, int strideU, - __global const uchar *bufV, int strideV, - __global uchar *dest, int strideDest) + __global const uchar *bufY, unsigned strideY, + __global const uchar *bufU, unsigned strideU, + __global const uchar *bufV, unsigned strideV, + __global uchar *dest, unsigned strideDest) { unsigned int x = get_global_id(0); unsigned int y = get_global_id(1); @@ -133,6 +350,114 @@ __kernel void yuv444_to_argb_1b( destPtr[3] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ destPtr[1] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ - destPtr[0] = 0xff; /* A */ + /* A */ +} + +__kernel void yuv420_to_xrgb_1b( + __global const uchar *bufY, unsigned strideY, + __global const uchar *bufU, unsigned strideU, + __global const uchar *bufV, unsigned strideV, + __global uchar *dest, unsigned strideDest) +{ + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); + + short Y = bufY[y * strideY + x]; + short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128; + short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128; + + __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[0] = 0xff; /* A */ + destPtr[1] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */ + destPtr[2] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */ + destPtr[3] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */ +} + +__kernel void yuv420_to_bgrx_1b( + __global const uchar *bufY, unsigned strideY, + __global const uchar *bufU, unsigned strideU, + __global const uchar *bufV, unsigned strideV, + __global uchar *dest, unsigned strideDest) +{ + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); + + short Y = bufY[y * strideY + x]; + short U = bufU[(y / 2) * strideU + (x / 2)] - 128; + short V = bufV[(y / 2) * strideV + (x / 2)] - 128; + + __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ + destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ + destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ + destPtr[3] = 0xff; /* A */ +} + +__kernel void yuv444_to_bgrx_1b( + __global const uchar *bufY, unsigned strideY, + __global const uchar *bufU, unsigned strideU, + __global const uchar *bufV, unsigned strideV, + __global uchar *dest, unsigned strideDest) +{ + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); + + short Y = bufY[y * strideY + x]; + short U = bufU[y * strideU + x] - 128; + short V = bufV[y * strideV + x] - 128; + + __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ + destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ + destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ + destPtr[3] = 0xff; /* A */ +} + +__kernel void yuv444_to_xrgb_1b( + __global const uchar *bufY, unsigned strideY, + __global const uchar *bufU, unsigned strideU, + __global const uchar *bufV, unsigned strideV, + __global uchar *dest, unsigned strideDest) +{ + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); + + short Y = bufY[y * strideY + x]; + short U = bufU[y * strideU + x] - 128; + short V = bufV[y * strideV + x] - 128; + + __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[3] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ + destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ + destPtr[1] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ + destPtr[0] = 0xff; /* A */ } ) diff --git a/libfreerdp/primitives/test/TestPrimitivesYUV.c b/libfreerdp/primitives/test/TestPrimitivesYUV.c index 4d17865ff..46fba2efd 100644 --- a/libfreerdp/primitives/test/TestPrimitivesYUV.c +++ b/libfreerdp/primitives/test/TestPrimitivesYUV.c @@ -45,7 +45,7 @@ static BOOL similarRGB(const BYTE* src, const BYTE* dst, size_t size, UINT32 for for (x = 0; x < size; x++) { - const double maxDiff = 4.0; + const LONG maxDiff = 4; UINT32 sColor = 0; UINT32 dColor = 0; BYTE sR = 0; @@ -63,7 +63,7 @@ static BOOL similarRGB(const BYTE* src, const BYTE* dst, size_t size, UINT32 for FreeRDPSplitColor(sColor, format, &sR, &sG, &sB, &sA, NULL); FreeRDPSplitColor(dColor, format, &dR, &dG, &dB, &dA, NULL); - if ((abs(sR - dR) > maxDiff) || (abs(sG - dG) > maxDiff) || (abs(sB - dB) > maxDiff)) + if ((labs(sR - dR) > maxDiff) || (labs(sG - dG) > maxDiff) || (labs(sB - dB) > maxDiff)) { fprintf( stderr, @@ -477,7 +477,7 @@ static BOOL TestPrimitiveYUV(primitives_t* prims, prim_size_t roi, BOOL use444) yuv_step[1] = uvwidth; yuv_step[2] = uvwidth; - for (x = 0; x < sizeof(formats) / sizeof(formats[0]); x++) + for (x = 0; x < ARRAYSIZE(formats); x++) { pstatus_t rc = 0; const UINT32 DstFormat = formats[x];