[primitives,opencl] fix alpha handling

This commit is contained in:
akallabeth
2024-02-19 11:12:03 +01:00
committed by akallabeth
parent 1a42cf4f26
commit d76fcd2364
3 changed files with 550 additions and 111 deletions

View File

@@ -45,114 +45,191 @@ typedef struct
cl_program program;
} primitives_opencl_context;
typedef struct
{
primitives_opencl_context* cl;
cl_kernel kernel;
cl_mem srcObjs[3];
cl_mem dstObj;
prim_size_t roi;
size_t dstStep;
} primitives_cl_kernel;
static primitives_opencl_context* primitives_get_opencl_context(void);
static pstatus_t opencl_YUVToRGB(const char* kernelName, const BYTE* const pSrc[3],
const UINT32 srcStep[3], BYTE* pDst, UINT32 dstStep,
const prim_size_t* roi)
static void cl_kernel_free(primitives_cl_kernel* kernel)
{
cl_int ret = 0;
cl_mem objs[3] = { NULL, NULL, NULL };
cl_mem destObj = NULL;
cl_kernel kernel = NULL;
size_t indexes[2] = { 0 };
const char* sourceNames[] = { "Y", "U", "V" };
primitives_opencl_context* cl = primitives_get_opencl_context();
if (!kernel)
return;
kernel = clCreateKernel(cl->program, kernelName, &ret);
if (kernel->dstObj)
clReleaseMemObject(kernel->dstObj);
for (size_t i = 0; i < ARRAYSIZE(kernel->srcObjs); i++)
{
cl_mem obj = kernel->srcObjs[i];
kernel->srcObjs[i] = NULL;
if (obj)
clReleaseMemObject(obj);
}
if (kernel->kernel)
clReleaseKernel(kernel->kernel);
free(kernel);
}
static primitives_cl_kernel* cl_kernel_new(const char* kernelName, const prim_size_t* roi)
{
WINPR_ASSERT(kernelName);
WINPR_ASSERT(roi);
primitives_cl_kernel* kernel = calloc(1, sizeof(primitives_cl_kernel));
if (!kernel)
goto fail;
kernel->roi = *roi;
kernel->cl = primitives_get_opencl_context();
if (!kernel->cl)
goto fail;
cl_int ret = CL_INVALID_VALUE;
kernel->kernel = clCreateKernel(kernel->cl->program, kernelName, &ret);
if (ret != CL_SUCCESS)
{
WLog_ERR(TAG, "openCL: unable to create kernel %s", kernelName);
return -1;
goto fail;
}
for (cl_uint i = 0; i < 3; i++)
return kernel;
fail:
cl_kernel_free(kernel);
return NULL;
}
static BOOL cl_kernel_set_sources(primitives_cl_kernel* ctx,
const BYTE* const WINPR_RESTRICT pSrc[3], const UINT32 srcStep[3])
{
const char* sourceNames[] = { "Y", "U", "V" };
WINPR_ASSERT(ctx);
WINPR_ASSERT(pSrc);
WINPR_ASSERT(srcStep);
for (cl_uint i = 0; i < ARRAYSIZE(ctx->srcObjs); i++)
{
objs[i] = clCreateBuffer(cl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
srcStep[i] * roi->height, (char*)pSrc[i], &ret);
cl_int ret = CL_INVALID_VALUE;
ctx->srcObjs[i] = clCreateBuffer(ctx->cl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
1ull * srcStep[i] * ctx->roi.height, pSrc[i], &ret);
if (ret != CL_SUCCESS)
{
WLog_ERR(TAG, "unable to create %sobj", sourceNames[i]);
goto error_objs;
return FALSE;
}
}
destObj = clCreateBuffer(cl->context, CL_MEM_WRITE_ONLY, dstStep * roi->height, NULL, &ret);
if (ret != CL_SUCCESS)
{
WLog_ERR(TAG, "unable to create dest obj");
goto error_objs;
}
/* push source + stride arguments*/
for (cl_uint i = 0; i < 3; i++)
{
ret = clSetKernelArg(kernel, i * 2, sizeof(cl_mem), &objs[i]);
ret = clSetKernelArg(ctx->kernel, i * 2, sizeof(cl_mem), &ctx->srcObjs[i]);
if (ret != CL_SUCCESS)
{
WLog_ERR(TAG, "unable to set arg for %sobj", sourceNames[i]);
goto error_set_args;
return FALSE;
}
ret = clSetKernelArg(kernel, i * 2 + 1, sizeof(cl_int), &srcStep[i]);
ret = clSetKernelArg(ctx->kernel, i * 2 + 1, sizeof(cl_uint), &srcStep[i]);
if (ret != CL_SUCCESS)
{
WLog_ERR(TAG, "unable to set arg stride for %sobj", sourceNames[i]);
goto error_set_args;
return FALSE;
}
}
ret = clSetKernelArg(kernel, 6, sizeof(cl_mem), &destObj);
return TRUE;
}
static BOOL cl_kernel_set_destination(primitives_cl_kernel* ctx, UINT32 dstStep)
{
WINPR_ASSERT(ctx);
ctx->dstStep = dstStep;
cl_int ret = CL_INVALID_VALUE;
ctx->dstObj = clCreateBuffer(ctx->cl->context, CL_MEM_WRITE_ONLY,
1ull * dstStep * ctx->roi.height, NULL, &ret);
if (ret != CL_SUCCESS)
{
WLog_ERR(TAG, "unable to create dest obj");
return FALSE;
}
ret = clSetKernelArg(ctx->kernel, 6, sizeof(cl_mem), &ctx->dstObj);
if (ret != CL_SUCCESS)
{
WLog_ERR(TAG, "unable to set arg destObj");
goto error_set_args;
return FALSE;
}
ret = clSetKernelArg(kernel, 7, sizeof(cl_int), &dstStep);
ret = clSetKernelArg(ctx->kernel, 7, sizeof(cl_uint), &dstStep);
if (ret != CL_SUCCESS)
{
WLog_ERR(TAG, "unable to set arg dstStep");
goto error_set_args;
return FALSE;
}
indexes[0] = roi->width;
indexes[1] = roi->height;
ret = clEnqueueNDRangeKernel(cl->commandQueue, kernel, 2, NULL, indexes, NULL, 0, NULL, NULL);
return TRUE;
}
static BOOL cl_kernel_process(primitives_cl_kernel* ctx, BYTE* pDst)
{
WINPR_ASSERT(ctx);
WINPR_ASSERT(pDst);
size_t indexes[2] = { 0 };
indexes[0] = ctx->roi.width;
indexes[1] = ctx->roi.height;
cl_int ret = clEnqueueNDRangeKernel(ctx->cl->commandQueue, ctx->kernel, 2, NULL, indexes, NULL,
0, NULL, NULL);
if (ret != CL_SUCCESS)
{
WLog_ERR(TAG, "unable to enqueue call kernel");
goto error_set_args;
return FALSE;
}
/* Transfer result to host */
ret = clEnqueueReadBuffer(cl->commandQueue, destObj, CL_TRUE, 0, roi->height * dstStep, pDst, 0,
NULL, NULL);
ret = clEnqueueReadBuffer(ctx->cl->commandQueue, ctx->dstObj, CL_TRUE, 0,
ctx->roi.height * ctx->dstStep, pDst, 0, NULL, NULL);
if (ret != CL_SUCCESS)
{
WLog_ERR(TAG, "unable to read back buffer");
goto error_set_args;
return FALSE;
}
/* cleanup things */
clReleaseMemObject(destObj);
for (cl_uint i = 0; i < 3; i++)
if (objs[i])
clReleaseMemObject(objs[i]);
clReleaseKernel(kernel);
return TRUE;
}
return PRIMITIVES_SUCCESS;
static pstatus_t opencl_YUVToRGB(const char* kernelName, const BYTE* const WINPR_RESTRICT pSrc[3],
const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, UINT32 dstStep,
const prim_size_t* WINPR_RESTRICT roi)
{
pstatus_t res = -1;
error_set_args:
clReleaseMemObject(destObj);
error_objs:
for (cl_uint i = 0; i < 3; i++)
{
if (objs[i])
clReleaseMemObject(objs[i]);
}
clReleaseKernel(kernel);
return -1;
primitives_cl_kernel* ctx = cl_kernel_new(kernelName, roi);
if (!ctx)
goto fail;
if (!cl_kernel_set_sources(ctx, pSrc, srcStep))
goto fail;
if (!cl_kernel_set_destination(ctx, dstStep))
goto fail;
if (!cl_kernel_process(ctx, pDst))
goto fail;
res = PRIMITIVES_SUCCESS;
fail:
cl_kernel_free(ctx);
return res;
}
static primitives_opencl_context openclContext = { 0 };
@@ -162,20 +239,27 @@ static primitives_opencl_context* primitives_get_opencl_context(void)
return &openclContext;
}
static void cl_context_free(primitives_opencl_context* ctx)
{
if (!ctx)
return;
clReleaseProgram(ctx->program);
clReleaseCommandQueue(ctx->commandQueue);
clReleaseContext(ctx->context);
clReleaseDevice(ctx->deviceId);
ctx->support = FALSE;
}
static pstatus_t primitives_uninit_opencl(void)
{
if (!openclContext.support)
return PRIMITIVES_SUCCESS;
clReleaseProgram(openclContext.program);
clReleaseCommandQueue(openclContext.commandQueue);
clReleaseContext(openclContext.context);
clReleaseDevice(openclContext.deviceId);
openclContext.support = FALSE;
cl_context_free(&openclContext);
return PRIMITIVES_SUCCESS;
}
static const char* openclProgram =
static const char openclProgram[] =
#include "primitives.cl"
;
@@ -209,8 +293,8 @@ static BOOL primitives_init_opencl_context(primitives_opencl_context* cl)
{
cl_device_id device_id = NULL;
cl_context context = NULL;
char platformName[1000];
char deviceName[1000];
char platformName[1000] = { 0 };
char deviceName[1000] = { 0 };
ret = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, sizeof(platformName),
platformName, NULL);
@@ -263,13 +347,13 @@ static BOOL primitives_init_opencl_context(primitives_opencl_context* cl)
return FALSE;
}
programLen = strlen(openclProgram);
cl->program =
clCreateProgramWithSource(cl->context, 1, (const char**)&openclProgram, &programLen, &ret);
programLen = strnlen(openclProgram, sizeof(openclProgram));
const char* ptr = openclProgram;
cl->program = clCreateProgramWithSource(cl->context, 1, &ptr, &programLen, &ret);
if (ret != CL_SUCCESS)
{
WLog_ERR(TAG, "openCL: unable to create program");
goto out_program_create;
goto fail;
}
ret = clBuildProgram(cl->program, 1, &cl->deviceId, NULL, NULL, NULL);
@@ -289,42 +373,55 @@ static BOOL primitives_init_opencl_context(primitives_opencl_context* cl)
{
WLog_ERR(TAG, "openCL: unable to build program, errorLog=%s", buffer);
}
goto out_program_build;
goto fail;
}
kernel = clCreateKernel(cl->program, "yuv420_to_bgra_1b", &ret);
if (ret != CL_SUCCESS)
{
WLog_ERR(TAG, "openCL: unable to create yuv420_to_bgra_1b kernel");
goto out_program_build;
goto fail;
}
clReleaseKernel(kernel);
cl->support = TRUE;
return TRUE;
out_program_build:
clReleaseProgram(cl->program);
out_program_create:
clReleaseCommandQueue(cl->commandQueue);
clReleaseContext(cl->context);
clReleaseDevice(cl->deviceId);
fail:
cl_context_free(cl);
return FALSE;
}
static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* const pSrc[3], const UINT32 srcStep[3],
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
const prim_size_t* roi)
static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* const WINPR_RESTRICT pSrc[3],
const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
UINT32 dstStep, UINT32 DstFormat,
const prim_size_t* WINPR_RESTRICT roi)
{
const char* kernel_name = NULL;
switch (DstFormat)
{
case PIXEL_FORMAT_ABGR32:
kernel_name = "yuv420_to_abgr_1b";
break;
case PIXEL_FORMAT_XBGR32:
kernel_name = "yuv420_to_xbgr_1b";
break;
case PIXEL_FORMAT_RGBX32:
kernel_name = "yuv420_to_rgba_1b";
break;
case PIXEL_FORMAT_RGBA32:
kernel_name = "yuv420_to_rgbx_1b";
break;
case PIXEL_FORMAT_BGRA32:
case PIXEL_FORMAT_BGRX32:
kernel_name = "yuv420_to_bgra_1b";
break;
case PIXEL_FORMAT_BGRX32:
kernel_name = "yuv420_to_bgrx_1b";
break;
case PIXEL_FORMAT_XRGB32:
kernel_name = "yuv420_to_xrgb_1b";
break;
case PIXEL_FORMAT_ARGB32:
kernel_name = "yuv420_to_argb_1b";
break;
@@ -340,19 +437,36 @@ static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* const pSrc[3], const U
return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
}
static pstatus_t opencl_YUV444ToRGB_8u_P3AC4R(const BYTE* const pSrc[3], const UINT32 srcStep[3],
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
const prim_size_t* roi)
static pstatus_t opencl_YUV444ToRGB_8u_P3AC4R(const BYTE* const WINPR_RESTRICT pSrc[3],
const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
UINT32 dstStep, UINT32 DstFormat,
const prim_size_t* WINPR_RESTRICT roi)
{
const char* kernel_name = NULL;
switch (DstFormat)
{
case PIXEL_FORMAT_ABGR32:
kernel_name = "yuv444_to_abgr_1b";
break;
case PIXEL_FORMAT_XBGR32:
kernel_name = "yuv444_to_xbgr_1b";
break;
case PIXEL_FORMAT_RGBX32:
kernel_name = "yuv444_to_rgba_1b";
break;
case PIXEL_FORMAT_RGBA32:
kernel_name = "yuv444_to_rgbx_1b";
break;
case PIXEL_FORMAT_BGRA32:
case PIXEL_FORMAT_BGRX32:
kernel_name = "yuv444_to_bgra_1b";
break;
case PIXEL_FORMAT_BGRX32:
kernel_name = "yuv444_to_bgrx_1b";
break;
case PIXEL_FORMAT_XRGB32:
kernel_name = "yuv444_to_xrgb_1b";
break;
case PIXEL_FORMAT_ARGB32:
kernel_name = "yuv444_to_argb_1b";
break;

View File

@@ -28,11 +28,11 @@ uchar clamp_uc(int v, short l, short h)
return (uchar)v;
}
__kernel void yuv420_to_argb_1b(
__global const uchar *bufY, int strideY,
__global const uchar *bufU, int strideU,
__global const uchar *bufV, int strideV,
__global uchar *dest, int strideDest)
__kernel void yuv420_to_rgba_1b(
__global const uchar *bufY, unsigned strideY,
__global const uchar *bufU, unsigned strideU,
__global const uchar *bufV, unsigned strideV,
__global uchar *dest, unsigned strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
@@ -49,17 +49,234 @@ __kernel void yuv420_to_argb_1b(
* | B | ( | 256 475 0 | | V - 128 | )
*/
int y256 = 256 * Y;
destPtr[0] = 0xff; /* A */
destPtr[0] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */
destPtr[1] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */
destPtr[2] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */
/* A */
}
__kernel void yuv420_to_abgr_1b(
__global const uchar *bufY, unsigned strideY,
__global const uchar *bufU, unsigned strideU,
__global const uchar *bufV, unsigned strideV,
__global uchar *dest, unsigned strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
short Y = bufY[y * strideY + x];
short U = bufU[(y / 2) * strideU + (x / 2)] - 128;
short V = bufV[(y / 2) * strideV + (x / 2)] - 128;
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
* | B | ( | 256 475 0 | | V - 128 | )
*/
int y256 = 256 * Y;
/* A */
destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
}
__kernel void yuv444_to_abgr_1b(
__global const uchar *bufY, unsigned strideY,
__global const uchar *bufU, unsigned strideU,
__global const uchar *bufV, unsigned strideV,
__global uchar *dest, unsigned strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
short Y = bufY[y * strideY + x];
short U = bufU[y * strideU + x] - 128;
short V = bufV[y * strideV + x] - 128;
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
* | B | ( | 256 475 0 | | V - 128 | )
*/
int y256 = 256 * Y;
/* A */
destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
}
__kernel void yuv444_to_rgba_1b(
__global const uchar *bufY, unsigned strideY,
__global const uchar *bufU, unsigned strideU,
__global const uchar *bufV, unsigned strideV,
__global uchar *dest, unsigned strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
short Y = bufY[y * strideY + x];
short U = bufU[y * strideU + x] - 128;
short V = bufV[y * strideV + x] - 128;
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
* | B | ( | 256 475 0 | | V - 128 | )
*/
int y256 = 256 * Y;
destPtr[0] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
destPtr[2] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
/* A */
}
__kernel void yuv420_to_rgbx_1b(
__global const uchar *bufY, unsigned strideY,
__global const uchar *bufU, unsigned strideU,
__global const uchar *bufV, unsigned strideV,
__global uchar *dest, unsigned strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
short Y = bufY[y * strideY + x];
short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128;
short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128;
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
* | B | ( | 256 475 0 | | V - 128 | )
*/
int y256 = 256 * Y;
destPtr[0] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */
destPtr[1] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */
destPtr[2] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */
destPtr[3] = 0xff; /* A */
}
__kernel void yuv420_to_xbgr_1b(
__global const uchar *bufY, unsigned strideY,
__global const uchar *bufU, unsigned strideU,
__global const uchar *bufV, unsigned strideV,
__global uchar *dest, unsigned strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
short Y = bufY[y * strideY + x];
short U = bufU[(y / 2) * strideU + (x / 2)] - 128;
short V = bufV[(y / 2) * strideV + (x / 2)] - 128;
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
* | B | ( | 256 475 0 | | V - 128 | )
*/
int y256 = 256 * Y;
destPtr[0] = 0xff; /* A */
destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
}
__kernel void yuv444_to_xbgr_1b(
__global const uchar *bufY, unsigned strideY,
__global const uchar *bufU, unsigned strideU,
__global const uchar *bufV, unsigned strideV,
__global uchar *dest, unsigned strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
short Y = bufY[y * strideY + x];
short U = bufU[y * strideU + x] - 128;
short V = bufV[y * strideV + x] - 128;
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
* | B | ( | 256 475 0 | | V - 128 | )
*/
int y256 = 256 * Y;
destPtr[0] = 0xff; /* A */
destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
}
__kernel void yuv444_to_rgbx_1b(
__global const uchar *bufY, unsigned strideY,
__global const uchar *bufU, unsigned strideU,
__global const uchar *bufV, unsigned strideV,
__global uchar *dest, unsigned strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
short Y = bufY[y * strideY + x];
short U = bufU[y * strideU + x] - 128;
short V = bufV[y * strideV + x] - 128;
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
* | B | ( | 256 475 0 | | V - 128 | )
*/
int y256 = 256 * Y;
destPtr[0] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
destPtr[2] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
destPtr[3] = 0xff; /* A */
}
__kernel void yuv420_to_argb_1b(
__global const uchar *bufY, unsigned strideY,
__global const uchar *bufU, unsigned strideU,
__global const uchar *bufV, unsigned strideV,
__global uchar *dest, unsigned strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
short Y = bufY[y * strideY + x];
short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128;
short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128;
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
* | B | ( | 256 475 0 | | V - 128 | )
*/
int y256 = 256 * Y;
/* A */
destPtr[1] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */
destPtr[2] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */
destPtr[3] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */
}
__kernel void yuv420_to_bgra_1b(
__global const uchar *bufY, int strideY,
__global const uchar *bufU, int strideU,
__global const uchar *bufV, int strideV,
__global uchar *dest, int strideDest)
__global const uchar *bufY, unsigned strideY,
__global const uchar *bufU, unsigned strideU,
__global const uchar *bufV, unsigned strideV,
__global uchar *dest, unsigned strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
@@ -79,14 +296,14 @@ __kernel void yuv420_to_bgra_1b(
destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
destPtr[3] = 0xff; /* A */
/* A */
}
__kernel void yuv444_to_bgra_1b(
__global const uchar *bufY, int strideY,
__global const uchar *bufU, int strideU,
__global const uchar *bufV, int strideV,
__global uchar *dest, int strideDest)
__global const uchar *bufY, unsigned strideY,
__global const uchar *bufU, unsigned strideU,
__global const uchar *bufV, unsigned strideV,
__global uchar *dest, unsigned strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
@@ -106,14 +323,14 @@ __kernel void yuv444_to_bgra_1b(
destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
destPtr[3] = 0xff; /* A */
/* A */
}
__kernel void yuv444_to_argb_1b(
__global const uchar *bufY, int strideY,
__global const uchar *bufU, int strideU,
__global const uchar *bufV, int strideV,
__global uchar *dest, int strideDest)
__global const uchar *bufY, unsigned strideY,
__global const uchar *bufU, unsigned strideU,
__global const uchar *bufV, unsigned strideV,
__global uchar *dest, unsigned strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
@@ -133,6 +350,114 @@ __kernel void yuv444_to_argb_1b(
destPtr[3] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
destPtr[1] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
destPtr[0] = 0xff; /* A */
/* A */
}
__kernel void yuv420_to_xrgb_1b(
__global const uchar *bufY, unsigned strideY,
__global const uchar *bufU, unsigned strideU,
__global const uchar *bufV, unsigned strideV,
__global uchar *dest, unsigned strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
short Y = bufY[y * strideY + x];
short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128;
short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128;
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
* | B | ( | 256 475 0 | | V - 128 | )
*/
int y256 = 256 * Y;
destPtr[0] = 0xff; /* A */
destPtr[1] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */
destPtr[2] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */
destPtr[3] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */
}
__kernel void yuv420_to_bgrx_1b(
__global const uchar *bufY, unsigned strideY,
__global const uchar *bufU, unsigned strideU,
__global const uchar *bufV, unsigned strideV,
__global uchar *dest, unsigned strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
short Y = bufY[y * strideY + x];
short U = bufU[(y / 2) * strideU + (x / 2)] - 128;
short V = bufV[(y / 2) * strideV + (x / 2)] - 128;
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
* | B | ( | 256 475 0 | | V - 128 | )
*/
int y256 = 256 * Y;
destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
destPtr[3] = 0xff; /* A */
}
__kernel void yuv444_to_bgrx_1b(
__global const uchar *bufY, unsigned strideY,
__global const uchar *bufU, unsigned strideU,
__global const uchar *bufV, unsigned strideV,
__global uchar *dest, unsigned strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
short Y = bufY[y * strideY + x];
short U = bufU[y * strideU + x] - 128;
short V = bufV[y * strideV + x] - 128;
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
* | B | ( | 256 475 0 | | V - 128 | )
*/
int y256 = 256 * Y;
destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
destPtr[3] = 0xff; /* A */
}
__kernel void yuv444_to_xrgb_1b(
__global const uchar *bufY, unsigned strideY,
__global const uchar *bufU, unsigned strideU,
__global const uchar *bufV, unsigned strideV,
__global uchar *dest, unsigned strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
short Y = bufY[y * strideY + x];
short U = bufU[y * strideU + x] - 128;
short V = bufV[y * strideV + x] - 128;
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
* | B | ( | 256 475 0 | | V - 128 | )
*/
int y256 = 256 * Y;
destPtr[3] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
destPtr[1] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
destPtr[0] = 0xff; /* A */
}
)

View File

@@ -45,7 +45,7 @@ static BOOL similarRGB(const BYTE* src, const BYTE* dst, size_t size, UINT32 for
for (x = 0; x < size; x++)
{
const double maxDiff = 4.0;
const LONG maxDiff = 4;
UINT32 sColor = 0;
UINT32 dColor = 0;
BYTE sR = 0;
@@ -63,7 +63,7 @@ static BOOL similarRGB(const BYTE* src, const BYTE* dst, size_t size, UINT32 for
FreeRDPSplitColor(sColor, format, &sR, &sG, &sB, &sA, NULL);
FreeRDPSplitColor(dColor, format, &dR, &dG, &dB, &dA, NULL);
if ((abs(sR - dR) > maxDiff) || (abs(sG - dG) > maxDiff) || (abs(sB - dB) > maxDiff))
if ((labs(sR - dR) > maxDiff) || (labs(sG - dG) > maxDiff) || (labs(sB - dB) > maxDiff))
{
fprintf(
stderr,
@@ -477,7 +477,7 @@ static BOOL TestPrimitiveYUV(primitives_t* prims, prim_size_t roi, BOOL use444)
yuv_step[1] = uvwidth;
yuv_step[2] = uvwidth;
for (x = 0; x < sizeof(formats) / sizeof(formats[0]); x++)
for (x = 0; x < ARRAYSIZE(formats); x++)
{
pstatus_t rc = 0;
const UINT32 DstFormat = formats[x];