diff --git a/include/freerdp/primitives.h b/include/freerdp/primitives.h index f8737703e..5b02d23d4 100644 --- a/include/freerdp/primitives.h +++ b/include/freerdp/primitives.h @@ -84,24 +84,24 @@ typedef enum } avc444_frame_type; /* Function prototypes for all of the supported primitives. */ -typedef pstatus_t (*__copy_t)(const void* WINPR_RESTRICT pSrc, void* WINPR_RESTRICT pDst, - INT32 bytes); -typedef pstatus_t (*__copy_8u_t)(const BYTE* WINPR_RESTRICT pSrc, BYTE* WINPR_RESTRICT pDst, - INT32 len); -typedef pstatus_t (*__copy_8u_AC4r_t)(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep, /* bytes */ - BYTE* WINPR_RESTRICT pDst, INT32 dstStep, /* bytes */ - INT32 width, INT32 height); /* pixels */ -typedef pstatus_t (*__set_8u_t)(BYTE val, BYTE* WINPR_RESTRICT pDst, UINT32 len); -typedef pstatus_t (*__set_32s_t)(INT32 val, INT32* WINPR_RESTRICT pDst, UINT32 len); -typedef pstatus_t (*__set_32u_t)(UINT32 val, UINT32* WINPR_RESTRICT pDst, UINT32 len); -typedef pstatus_t (*__zero_t)(void* WINPR_RESTRICT pDst, size_t bytes); -typedef pstatus_t (*__alphaComp_argb_t)(const BYTE* WINPR_RESTRICT pSrc1, UINT32 src1Step, - const BYTE* WINPR_RESTRICT pSrc2, UINT32 src2Step, - BYTE* WINPR_RESTRICT pDst, UINT32 dstStep, UINT32 width, - UINT32 height); -typedef pstatus_t (*__add_16s_t)(const INT16* WINPR_RESTRICT pSrc1, - const INT16* WINPR_RESTRICT pSrc2, INT16* WINPR_RESTRICT pDst, - UINT32 len); +typedef pstatus_t (*fn_copy_t)(const void* WINPR_RESTRICT pSrc, void* WINPR_RESTRICT pDst, + INT32 bytes); +typedef pstatus_t (*fn_copy_8u_t)(const BYTE* WINPR_RESTRICT pSrc, BYTE* WINPR_RESTRICT pDst, + INT32 len); +typedef pstatus_t (*fn_copy_8u_AC4r_t)(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep, /* bytes */ + BYTE* WINPR_RESTRICT pDst, INT32 dstStep, /* bytes */ + INT32 width, INT32 height); /* pixels */ +typedef pstatus_t (*fn_set_8u_t)(BYTE val, BYTE* WINPR_RESTRICT pDst, UINT32 len); +typedef pstatus_t (*fn_set_32s_t)(INT32 val, INT32* WINPR_RESTRICT pDst, UINT32 len); +typedef pstatus_t (*fn_set_32u_t)(UINT32 val, UINT32* WINPR_RESTRICT pDst, UINT32 len); +typedef pstatus_t (*fn_zero_t)(void* WINPR_RESTRICT pDst, size_t bytes); +typedef pstatus_t (*fn_alphaComp_argb_t)(const BYTE* WINPR_RESTRICT pSrc1, UINT32 src1Step, + const BYTE* WINPR_RESTRICT pSrc2, UINT32 src2Step, + BYTE* WINPR_RESTRICT pDst, UINT32 dstStep, UINT32 width, + UINT32 height); +typedef pstatus_t (*fn_add_16s_t)(const INT16* WINPR_RESTRICT pSrc1, + const INT16* WINPR_RESTRICT pSrc2, INT16* WINPR_RESTRICT pDst, + UINT32 len); /** * @brief Add INT16 from pSrcDst2 to pSrcDst1 and store in both arrays * @param pSrcDst1 A pointer to the array of INT16 to add to @@ -110,8 +110,8 @@ typedef pstatus_t (*__add_16s_t)(const INT16* WINPR_RESTRICT pSrc1, * @return \b <=0 for failure, success otherwise * @since version 3.6.0 */ -typedef pstatus_t (*__add_16s_inplace_t)(INT16* WINPR_RESTRICT pSrcDst1, - INT16* WINPR_RESTRICT pSrcDst2, UINT32 len); +typedef pstatus_t (*fn_add_16s_inplace_t)(INT16* WINPR_RESTRICT pSrcDst1, + INT16* WINPR_RESTRICT pSrcDst2, UINT32 len); /** * @brief Copy (sub)image data without overlapping @@ -133,126 +133,165 @@ typedef pstatus_t (*__add_16s_inplace_t)(INT16* WINPR_RESTRICT pSrcDst1, * @return \b <=0 for failure, success otherwise * @since version 3.6.0 */ -typedef pstatus_t (*__copy_no_overlap_t)(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, - UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, - UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, - DWORD SrcFormat, UINT32 nSrcStep, UINT32 nXSrc, - UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette, - UINT32 flags); -typedef pstatus_t (*__lShiftC_16s_inplace_t)(INT16* WINPR_RESTRICT pSrcDst, UINT32 val, UINT32 len); -typedef pstatus_t (*__lShiftC_16s_t)(const INT16* WINPR_RESTRICT pSrc, UINT32 val, +typedef pstatus_t (*fn_copy_no_overlap_t)(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, + UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, + UINT32 nWidth, UINT32 nHeight, + const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, + const gdiPalette* WINPR_RESTRICT palette, UINT32 flags); +typedef pstatus_t (*fn_lShiftC_16s_inplace_t)(INT16* WINPR_RESTRICT pSrcDst, UINT32 val, + UINT32 len); +typedef pstatus_t (*fn_lShiftC_16s_t)(const INT16* WINPR_RESTRICT pSrc, UINT32 val, + INT16* WINPR_RESTRICT pSrcDst, UINT32 len); +typedef pstatus_t (*fn_lShiftC_16u_t)(const UINT16* WINPR_RESTRICT pSrc, UINT32 val, + UINT16* WINPR_RESTRICT pSrcDst, UINT32 len); +typedef pstatus_t (*fn_rShiftC_16s_t)(const INT16* WINPR_RESTRICT pSrc, UINT32 val, + INT16* WINPR_RESTRICT pSrcDst, UINT32 len); +typedef pstatus_t (*fn_rShiftC_16u_t)(const UINT16* WINPR_RESTRICT pSrc, UINT32 val, + UINT16* WINPR_RESTRICT pSrcDst, UINT32 len); +typedef pstatus_t (*fn_shiftC_16s_t)(const INT16* WINPR_RESTRICT pSrc, INT32 val, INT16* WINPR_RESTRICT pSrcDst, UINT32 len); -typedef pstatus_t (*__lShiftC_16u_t)(const UINT16* WINPR_RESTRICT pSrc, UINT32 val, +typedef pstatus_t (*fn_shiftC_16u_t)(const UINT16* WINPR_RESTRICT pSrc, INT32 val, UINT16* WINPR_RESTRICT pSrcDst, UINT32 len); -typedef pstatus_t (*__rShiftC_16s_t)(const INT16* WINPR_RESTRICT pSrc, UINT32 val, - INT16* WINPR_RESTRICT pSrcDst, UINT32 len); -typedef pstatus_t (*__rShiftC_16u_t)(const UINT16* WINPR_RESTRICT pSrc, UINT32 val, - UINT16* WINPR_RESTRICT pSrcDst, UINT32 len); -typedef pstatus_t (*__shiftC_16s_t)(const INT16* WINPR_RESTRICT pSrc, INT32 val, - INT16* WINPR_RESTRICT pSrcDst, UINT32 len); -typedef pstatus_t (*__shiftC_16u_t)(const UINT16* WINPR_RESTRICT pSrc, INT32 val, - UINT16* WINPR_RESTRICT pSrcDst, UINT32 len); -typedef pstatus_t (*__sign_16s_t)(const INT16* WINPR_RESTRICT pSrc, INT16* WINPR_RESTRICT pSrcDst, - UINT32 len); -typedef pstatus_t (*__yCbCrToRGB_16s8u_P3AC4R_t)(const INT16* WINPR_RESTRICT pSrc[3], - UINT32 srcStep, BYTE* WINPR_RESTRICT pDst, - UINT32 dstStep, UINT32 DstFormat, +typedef pstatus_t (*fn_sign_16s_t)(const INT16* WINPR_RESTRICT pSrc, INT16* WINPR_RESTRICT pSrcDst, + UINT32 len); +typedef pstatus_t (*fn_yCbCrToRGB_16s8u_P3AC4R_t)(const INT16* WINPR_RESTRICT pSrc[3], + UINT32 srcStep, BYTE* WINPR_RESTRICT pDst, + UINT32 dstStep, UINT32 DstFormat, + const prim_size_t* WINPR_RESTRICT roi); +typedef pstatus_t (*fn_yCbCrToRGB_16s16s_P3P3_t)(const INT16* WINPR_RESTRICT pSrc[3], INT32 srcStep, + INT16* WINPR_RESTRICT pDst[3], INT32 dstStep, const prim_size_t* WINPR_RESTRICT roi); -typedef pstatus_t (*__yCbCrToRGB_16s16s_P3P3_t)(const INT16* WINPR_RESTRICT pSrc[3], INT32 srcStep, - INT16* WINPR_RESTRICT pDst[3], INT32 dstStep, +typedef pstatus_t (*fn_RGBToYCbCr_16s16s_P3P3_t)(const INT16* WINPR_RESTRICT pSrc[3], INT32 srcStep, + INT16* WINPR_RESTRICT pDst[3], INT32 dstStep, + const prim_size_t* WINPR_RESTRICT roi); +typedef pstatus_t (*fn_RGBToRGB_16s8u_P3AC4R_t)(const INT16* WINPR_RESTRICT pSrc[3], UINT32 srcStep, + BYTE* WINPR_RESTRICT pDst, UINT32 dstStep, + UINT32 DstFormat, const prim_size_t* WINPR_RESTRICT roi); -typedef pstatus_t (*__RGBToYCbCr_16s16s_P3P3_t)(const INT16* WINPR_RESTRICT pSrc[3], INT32 srcStep, - INT16* WINPR_RESTRICT pDst[3], INT32 dstStep, +typedef pstatus_t (*fn_YCoCgToRGB_8u_AC4R_t)(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep, + BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat, + INT32 dstStep, UINT32 width, UINT32 height, + UINT8 shift, BOOL withAlpha); +typedef pstatus_t (*fn_RGB565ToARGB_16u32u_C3C4_t)(const UINT16* WINPR_RESTRICT pSrc, INT32 srcStep, + UINT32* WINPR_RESTRICT pDst, INT32 dstStep, + UINT32 width, UINT32 height, UINT32 format); +typedef pstatus_t (*fn_YUV420ToRGB_8u_P3AC4R_t)(const BYTE* WINPR_RESTRICT pSrc[3], + const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, + UINT32 dstStep, UINT32 DstFormat, const prim_size_t* WINPR_RESTRICT roi); -typedef pstatus_t (*__RGBToRGB_16s8u_P3AC4R_t)(const INT16* WINPR_RESTRICT pSrc[3], UINT32 srcStep, - BYTE* WINPR_RESTRICT pDst, UINT32 dstStep, - UINT32 DstFormat, - const prim_size_t* WINPR_RESTRICT roi); -typedef pstatus_t (*__YCoCgToRGB_8u_AC4R_t)(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep, - BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat, - INT32 dstStep, UINT32 width, UINT32 height, UINT8 shift, - BOOL withAlpha); -typedef pstatus_t (*__RGB565ToARGB_16u32u_C3C4_t)(const UINT16* WINPR_RESTRICT pSrc, INT32 srcStep, - UINT32* WINPR_RESTRICT pDst, INT32 dstStep, - UINT32 width, UINT32 height, UINT32 format); -typedef pstatus_t (*__YUV420ToRGB_8u_P3AC4R_t)(const BYTE* WINPR_RESTRICT pSrc[3], - const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, - UINT32 dstStep, UINT32 DstFormat, - const prim_size_t* WINPR_RESTRICT roi); -typedef pstatus_t (*__YUV444ToRGB_8u_P3AC4R_t)(const BYTE* WINPR_RESTRICT pSrc[3], - const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, - UINT32 dstStep, UINT32 DstFormat, - const prim_size_t* WINPR_RESTRICT roi); -typedef pstatus_t (*__RGBToYUV420_8u_P3AC4R_t)(const BYTE* WINPR_RESTRICT pSrc, UINT32 SrcFormat, - UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3], - const UINT32 dstStep[3], - const prim_size_t* WINPR_RESTRICT roi); -typedef pstatus_t (*__RGBToYUV444_8u_P3AC4R_t)(const BYTE* WINPR_RESTRICT pSrc, UINT32 SrcFormat, - UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3], - const UINT32 dstStep[3], - const prim_size_t* WINPR_RESTRICT roi); -typedef pstatus_t (*__YUV420CombineToYUV444_t)(avc444_frame_type type, - const BYTE* WINPR_RESTRICT pSrc[3], - const UINT32 srcStep[3], UINT32 nWidth, - UINT32 nHeight, BYTE* WINPR_RESTRICT pDst[3], - const UINT32 dstStep[3], - const RECTANGLE_16* WINPR_RESTRICT roi); -typedef pstatus_t (*__YUV444SplitToYUV420_t)( +typedef pstatus_t (*fn_YUV444ToRGB_8u_P3AC4R_t)(const BYTE* WINPR_RESTRICT pSrc[3], + const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, + UINT32 dstStep, UINT32 DstFormat, + const prim_size_t* WINPR_RESTRICT roi); +typedef pstatus_t (*fn_RGBToYUV420_8u_P3AC4R_t)(const BYTE* WINPR_RESTRICT pSrc, UINT32 SrcFormat, + UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3], + const UINT32 dstStep[3], + const prim_size_t* WINPR_RESTRICT roi); +typedef pstatus_t (*fn_RGBToYUV444_8u_P3AC4R_t)(const BYTE* WINPR_RESTRICT pSrc, UINT32 SrcFormat, + UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3], + const UINT32 dstStep[3], + const prim_size_t* WINPR_RESTRICT roi); +typedef pstatus_t (*fn_YUV420CombineToYUV444_t)(avc444_frame_type type, + const BYTE* WINPR_RESTRICT pSrc[3], + const UINT32 srcStep[3], UINT32 nWidth, + UINT32 nHeight, BYTE* WINPR_RESTRICT pDst[3], + const UINT32 dstStep[3], + const RECTANGLE_16* WINPR_RESTRICT roi); +typedef pstatus_t (*fn_YUV444SplitToYUV420_t)( const BYTE* WINPR_RESTRICT pSrc[3], const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pMainDst[3], const UINT32 dstMainStep[3], BYTE* WINPR_RESTRICT pAuxDst[3], const UINT32 srcAuxStep[3], const prim_size_t* WINPR_RESTRICT roi); -typedef pstatus_t (*__RGBToAVC444YUV_t)(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat, - UINT32 srcStep, BYTE* WINPR_RESTRICT pMainDst[3], - const UINT32 dstMainStep[3], - BYTE* WINPR_RESTRICT pAuxDst[3], const UINT32 dstAuxStep[3], - const prim_size_t* WINPR_RESTRICT roi); -typedef pstatus_t (*__andC_32u_t)(const UINT32* WINPR_RESTRICT pSrc, UINT32 val, +typedef pstatus_t (*fn_RGBToAVC444YUV_t)(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat, + UINT32 srcStep, BYTE* WINPR_RESTRICT pMainDst[3], + const UINT32 dstMainStep[3], + BYTE* WINPR_RESTRICT pAuxDst[3], + const UINT32 dstAuxStep[3], + const prim_size_t* WINPR_RESTRICT roi); +typedef pstatus_t (*fn_andC_32u_t)(const UINT32* WINPR_RESTRICT pSrc, UINT32 val, + UINT32* WINPR_RESTRICT pDst, INT32 len); +typedef pstatus_t (*fn_orC_32u_t)(const UINT32* WINPR_RESTRICT pSrc, UINT32 val, UINT32* WINPR_RESTRICT pDst, INT32 len); -typedef pstatus_t (*__orC_32u_t)(const UINT32* WINPR_RESTRICT pSrc, UINT32 val, - UINT32* WINPR_RESTRICT pDst, INT32 len); typedef pstatus_t (*primitives_uninit_t)(void); +#if defined(WITH_FREERDP_3x_DEPRECATED) +typedef fn_copy_t __copy_t; +typedef fn_copy_8u_t __copy_8u_t; +typedef fn_copy_8u_AC4r_t __copy_8u_AC4r_t; +typedef fn_set_8u_t __set_8u_t; +typedef fn_set_32s_t __set_32s_t; +typedef fn_set_32u_t __set_32u_t; +typedef fn_zero_t __zero_t; +typedef fn_alphaComp_argb_t __alphaComp_argb_t; +typedef fn_add_16s_t __add_16s_t; +typedef fn_add_16s_inplace_t __add_16s_inplace_t; +typedef fn_copy_no_overlap_t __copy_no_overlap_t; +typedef fn_lShiftC_16s_inplace_t __lShiftC_16s_inplace_t; +typedef fn_lShiftC_16s_t __lShiftC_16s_t; +typedef fn_lShiftC_16u_t __lShiftC_16u_t; +typedef fn_rShiftC_16s_t __rShiftC_16s_t; +typedef fn_rShiftC_16u_t __rShiftC_16u_t; +typedef fn_shiftC_16s_t __shiftC_16s_t; +typedef fn_shiftC_16u_t __shiftC_16u_t; +typedef fn_sign_16s_t __sign_16s_t; +typedef fn_yCbCrToRGB_16s8u_P3AC4R_t __yCbCrToRGB_16s8u_P3AC4R_t; +typedef fn_yCbCrToRGB_16s16s_P3P3_t __yCbCrToRGB_16s16s_P3P3_t; +typedef fn_RGBToYCbCr_16s16s_P3P3_t __RGBToYCbCr_16s16s_P3P3_t; +typedef fn_RGBToRGB_16s8u_P3AC4R_t __RGBToRGB_16s8u_P3AC4R_t; +typedef fn_YCoCgToRGB_8u_AC4R_t __YCoCgToRGB_8u_AC4R_t; +typedef fn_RGB565ToARGB_16u32u_C3C4_t __RGB565ToARGB_16u32u_C3C4_t; +typedef fn_YUV420ToRGB_8u_P3AC4R_t __YUV420ToRGB_8u_P3AC4R_t; +typedef fn_YUV444ToRGB_8u_P3AC4R_t __YUV444ToRGB_8u_P3AC4R_t; +typedef fn_RGBToYUV420_8u_P3AC4R_t __RGBToYUV420_8u_P3AC4R_t; +typedef fn_RGBToYUV444_8u_P3AC4R_t __RGBToYUV444_8u_P3AC4R_t; +typedef fn_YUV420CombineToYUV444_t __YUV420CombineToYUV444_t; +typedef fn_YUV444SplitToYUV420_t __YUV444SplitToYUV420_t; +typedef fn_RGBToAVC444YUV_t __RGBToAVC444YUV_t; +typedef fn_andC_32u_t __andC_32u_t; +typedef fn_orC_32u_t __orC_32u_t; +#endif + typedef struct { /* Memory-to-memory copy routines */ - __copy_t copy; /* memcpy/memmove, basically */ - __copy_8u_t copy_8u; /* more strongly typed */ - __copy_8u_AC4r_t copy_8u_AC4r; /* pixel copy function */ + fn_copy_t copy; /* memcpy/memmove, basically */ + fn_copy_8u_t copy_8u; /* more strongly typed */ + fn_copy_8u_AC4r_t copy_8u_AC4r; /* pixel copy function */ /* Memory setting routines */ - __set_8u_t set_8u; /* memset, basically */ - __set_32s_t set_32s; - __set_32u_t set_32u; - __zero_t zero; /* bzero or faster */ + fn_set_8u_t set_8u; /* memset, basically */ + fn_set_32s_t set_32s; + fn_set_32u_t set_32u; + fn_zero_t zero; /* bzero or faster */ /* Arithmetic functions */ - __add_16s_t add_16s; + fn_add_16s_t add_16s; /* And/or */ - __andC_32u_t andC_32u; - __orC_32u_t orC_32u; + fn_andC_32u_t andC_32u; + fn_orC_32u_t orC_32u; /* Shifts */ - __lShiftC_16s_t lShiftC_16s; - __lShiftC_16u_t lShiftC_16u; - __rShiftC_16s_t rShiftC_16s; - __rShiftC_16u_t rShiftC_16u; - __shiftC_16s_t shiftC_16s; - __shiftC_16u_t shiftC_16u; + fn_lShiftC_16s_t lShiftC_16s; + fn_lShiftC_16u_t lShiftC_16u; + fn_rShiftC_16s_t rShiftC_16s; + fn_rShiftC_16u_t rShiftC_16u; + fn_shiftC_16s_t shiftC_16s; + fn_shiftC_16u_t shiftC_16u; /* Alpha Composition */ - __alphaComp_argb_t alphaComp_argb; + fn_alphaComp_argb_t alphaComp_argb; /* Sign */ - __sign_16s_t sign_16s; + fn_sign_16s_t sign_16s; /* Color conversions */ - __yCbCrToRGB_16s8u_P3AC4R_t yCbCrToRGB_16s8u_P3AC4R; - __yCbCrToRGB_16s16s_P3P3_t yCbCrToRGB_16s16s_P3P3; - __RGBToYCbCr_16s16s_P3P3_t RGBToYCbCr_16s16s_P3P3; - __RGBToRGB_16s8u_P3AC4R_t RGBToRGB_16s8u_P3AC4R; - __YCoCgToRGB_8u_AC4R_t YCoCgToRGB_8u_AC4R; - __YUV420ToRGB_8u_P3AC4R_t YUV420ToRGB_8u_P3AC4R; - __RGBToYUV420_8u_P3AC4R_t RGBToYUV420_8u_P3AC4R; - __RGBToYUV444_8u_P3AC4R_t RGBToYUV444_8u_P3AC4R; - __YUV420CombineToYUV444_t YUV420CombineToYUV444; - __YUV444SplitToYUV420_t YUV444SplitToYUV420; - __YUV444ToRGB_8u_P3AC4R_t YUV444ToRGB_8u_P3AC4R; - __RGBToAVC444YUV_t RGBToAVC444YUV; - __RGBToAVC444YUV_t RGBToAVC444YUVv2; + fn_yCbCrToRGB_16s8u_P3AC4R_t yCbCrToRGB_16s8u_P3AC4R; + fn_yCbCrToRGB_16s16s_P3P3_t yCbCrToRGB_16s16s_P3P3; + fn_RGBToYCbCr_16s16s_P3P3_t RGBToYCbCr_16s16s_P3P3; + fn_RGBToRGB_16s8u_P3AC4R_t RGBToRGB_16s8u_P3AC4R; + fn_YCoCgToRGB_8u_AC4R_t YCoCgToRGB_8u_AC4R; + fn_YUV420ToRGB_8u_P3AC4R_t YUV420ToRGB_8u_P3AC4R; + fn_RGBToYUV420_8u_P3AC4R_t RGBToYUV420_8u_P3AC4R; + fn_RGBToYUV444_8u_P3AC4R_t RGBToYUV444_8u_P3AC4R; + fn_YUV420CombineToYUV444_t YUV420CombineToYUV444; + fn_YUV444SplitToYUV420_t YUV444SplitToYUV420; + fn_YUV444ToRGB_8u_P3AC4R_t YUV444ToRGB_8u_P3AC4R; + fn_RGBToAVC444YUV_t RGBToAVC444YUV; + fn_RGBToAVC444YUV_t RGBToAVC444YUVv2; /* flags */ DWORD flags; primitives_uninit_t uninit; @@ -260,9 +299,9 @@ typedef struct /** \brief Do vecotor addition, store result in both input buffers * pSrcDst1 = pSrcDst2 = pSrcDst1 + pSrcDst2 */ - __add_16s_inplace_t add_16s_inplace; /** @since version 3.6.0 */ - __lShiftC_16s_inplace_t lShiftC_16s_inplace; /** @since version 3.6.0 */ - __copy_no_overlap_t copy_no_overlap; /** @since version 3.6.0 */ + fn_add_16s_inplace_t add_16s_inplace; /** @since version 3.6.0 */ + fn_lShiftC_16s_inplace_t lShiftC_16s_inplace; /** @since version 3.6.0 */ + fn_copy_no_overlap_t copy_no_overlap; /** @since version 3.6.0 */ } primitives_t; typedef enum diff --git a/libfreerdp/primitives/prim_copy.c b/libfreerdp/primitives/prim_copy.c index 5089245a2..2211d1eed 100644 --- a/libfreerdp/primitives/prim_copy.c +++ b/libfreerdp/primitives/prim_copy.c @@ -418,7 +418,7 @@ void primitives_init_copy(primitives_t* WINPR_RESTRICT prims) /* Start with the default. */ prims->copy_8u = general_copy_8u; prims->copy_8u_AC4r = general_copy_8u_AC4r; - prims->copy = WINPR_FUNC_PTR_CAST(prims->copy_8u, __copy_t); + prims->copy = WINPR_FUNC_PTR_CAST(prims->copy_8u, fn_copy_t); prims->copy_no_overlap = generic_image_copy_no_overlap; } diff --git a/libfreerdp/primitives/sse/prim_templates.h b/libfreerdp/primitives/sse/prim_templates.h index bef91571c..f277fdb37 100644 --- a/libfreerdp/primitives/sse/prim_templates.h +++ b/libfreerdp/primitives/sse/prim_templates.h @@ -52,10 +52,6 @@ return PRIMITIVES_SUCCESS; \ if (val >= 16) \ return -1; \ - if (len < 16) /* pointless if too small */ \ - { \ - return _fallback_(pSrc, val, pDst, ulen); \ - } \ if (sizeof(_type_) == 1) \ shifts = 1; \ else if (sizeof(_type_) == 2) \ @@ -135,101 +131,68 @@ * SCD = Source, Constant, Destination * PRE = preload xmm0 with the constant. */ -#define SSE3_SCD_PRE_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \ - static pstatus_t _name_(const _type_* WINPR_RESTRICT pSrc, _type_ val, \ - _type_* WINPR_RESTRICT pDst, INT32 ilen) \ - { \ - size_t len = WINPR_ASSERTING_INT_CAST(size_t, ilen); \ - int shifts = 0; \ - const _type_* sptr = pSrc; \ - _type_* dptr = pDst; \ - size_t count; \ - __m128i xmm0; \ - if (len < 16) /* pointless if too small */ \ - { \ - return _fallback_(pSrc, val, pDst, WINPR_ASSERTING_INT_CAST(int32_t, len)); \ - } \ - if (sizeof(_type_) == 1) \ - shifts = 1; \ - else if (sizeof(_type_) == 2) \ - shifts = 2; \ - else if (sizeof(_type_) == 4) \ - shifts = 3; \ - else if (sizeof(_type_) == 8) \ - shifts = 4; \ - /* Use 4 128-bit SSE registers. */ \ - count = len >> (7 - shifts); \ - len -= count << (7 - shifts); \ - xmm0 = mm_set1_epu32(val); \ - if ((const ULONG_PTR)sptr & 0x0f) \ - { \ - while (count--) \ - { \ - __m128i xmm1 = LOAD_SI128(sptr); \ - sptr += (16 / sizeof(_type_)); \ - __m128i xmm2 = LOAD_SI128(sptr); \ - sptr += (16 / sizeof(_type_)); \ - __m128i xmm3 = LOAD_SI128(sptr); \ - sptr += (16 / sizeof(_type_)); \ - __m128i xmm4 = LOAD_SI128(sptr); \ - sptr += (16 / sizeof(_type_)); \ - xmm1 = _op_(xmm1, xmm0); \ - xmm2 = _op_(xmm2, xmm0); \ - xmm3 = _op_(xmm3, xmm0); \ - xmm4 = _op_(xmm4, xmm0); \ - STORE_SI128(dptr, xmm1); \ - dptr += (16 / sizeof(_type_)); \ - STORE_SI128(dptr, xmm2); \ - dptr += (16 / sizeof(_type_)); \ - STORE_SI128(dptr, xmm3); \ - dptr += (16 / sizeof(_type_)); \ - STORE_SI128(dptr, xmm4); \ - dptr += (16 / sizeof(_type_)); \ - } \ - } \ - else \ - { \ - while (count--) \ - { \ - __m128i xmm1 = LOAD_SI128(sptr); \ - sptr += (16 / sizeof(_type_)); \ - __m128i xmm2 = LOAD_SI128(sptr); \ - sptr += (16 / sizeof(_type_)); \ - __m128i xmm3 = LOAD_SI128(sptr); \ - sptr += (16 / sizeof(_type_)); \ - __m128i xmm4 = LOAD_SI128(sptr); \ - sptr += (16 / sizeof(_type_)); \ - xmm1 = _op_(xmm1, xmm0); \ - xmm2 = _op_(xmm2, xmm0); \ - xmm3 = _op_(xmm3, xmm0); \ - xmm4 = _op_(xmm4, xmm0); \ - STORE_SI128(dptr, xmm1); \ - dptr += (16 / sizeof(_type_)); \ - STORE_SI128(dptr, xmm2); \ - dptr += (16 / sizeof(_type_)); \ - STORE_SI128(dptr, xmm3); \ - dptr += (16 / sizeof(_type_)); \ - STORE_SI128(dptr, xmm4); \ - dptr += (16 / sizeof(_type_)); \ - } \ - } \ - /* Use a single 128-bit SSE register. */ \ - count = len >> (5 - shifts); \ - len -= count << (5 - shifts); \ - while (count--) \ - { \ - __m128i xmm1 = LOAD_SI128(sptr); \ - sptr += (16 / sizeof(_type_)); \ - xmm1 = _op_(xmm1, xmm0); \ - STORE_SI128(dptr, xmm1); \ - dptr += (16 / sizeof(_type_)); \ - } \ - /* Finish off the remainder. */ \ - while (len--) \ - { \ - _slowWay_; \ - } \ - return PRIMITIVES_SUCCESS; \ +#define SSE3_SCD_PRE_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \ + static pstatus_t _name_(const _type_* WINPR_RESTRICT pSrc, _type_ val, \ + _type_* WINPR_RESTRICT pDst, INT32 ilen) \ + { \ + size_t len = WINPR_ASSERTING_INT_CAST(size_t, ilen); \ + int shifts = 0; \ + const _type_* sptr = pSrc; \ + _type_* dptr = pDst; \ + size_t count; \ + __m128i xmm0; \ + if (sizeof(_type_) == 1) \ + shifts = 1; \ + else if (sizeof(_type_) == 2) \ + shifts = 2; \ + else if (sizeof(_type_) == 4) \ + shifts = 3; \ + else if (sizeof(_type_) == 8) \ + shifts = 4; \ + /* Use 4 128-bit SSE registers. */ \ + count = len >> (7 - shifts); \ + len -= count << (7 - shifts); \ + xmm0 = mm_set1_epu32(val); \ + while (count--) \ + { \ + __m128i xmm1 = LOAD_SI128(sptr); \ + sptr += (16 / sizeof(_type_)); \ + __m128i xmm2 = LOAD_SI128(sptr); \ + sptr += (16 / sizeof(_type_)); \ + __m128i xmm3 = LOAD_SI128(sptr); \ + sptr += (16 / sizeof(_type_)); \ + __m128i xmm4 = LOAD_SI128(sptr); \ + sptr += (16 / sizeof(_type_)); \ + xmm1 = _op_(xmm1, xmm0); \ + xmm2 = _op_(xmm2, xmm0); \ + xmm3 = _op_(xmm3, xmm0); \ + xmm4 = _op_(xmm4, xmm0); \ + STORE_SI128(dptr, xmm1); \ + dptr += (16 / sizeof(_type_)); \ + STORE_SI128(dptr, xmm2); \ + dptr += (16 / sizeof(_type_)); \ + STORE_SI128(dptr, xmm3); \ + dptr += (16 / sizeof(_type_)); \ + STORE_SI128(dptr, xmm4); \ + dptr += (16 / sizeof(_type_)); \ + } \ + /* Use a single 128-bit SSE register. */ \ + count = len >> (5 - shifts); \ + len -= count << (5 - shifts); \ + while (count--) \ + { \ + __m128i xmm1 = LOAD_SI128(sptr); \ + sptr += (16 / sizeof(_type_)); \ + xmm1 = _op_(xmm1, xmm0); \ + STORE_SI128(dptr, xmm1); \ + dptr += (16 / sizeof(_type_)); \ + } \ + /* Finish off the remainder. */ \ + while (len--) \ + { \ + _slowWay_; \ + } \ + return PRIMITIVES_SUCCESS; \ } /* ---------------------------------------------------------------------------- @@ -245,10 +208,6 @@ const _type_* sptr2 = pSrc2; \ _type_* dptr = pDst; \ size_t count; \ - if (len < 16) /* pointless if too small */ \ - { \ - return _fallback_(pSrc1, pSrc2, pDst, len); \ - } \ if (sizeof(_type_) == 1) \ shifts = 1; \ else if (sizeof(_type_) == 2) \ diff --git a/libfreerdp/primitives/test/TestPrimitivesAndOr.c b/libfreerdp/primitives/test/TestPrimitivesAndOr.c index 8f3b141de..372f7b1c1 100644 --- a/libfreerdp/primitives/test/TestPrimitivesAndOr.c +++ b/libfreerdp/primitives/test/TestPrimitivesAndOr.c @@ -23,7 +23,7 @@ #define VALUE (0xA5A5A5A5U) /* ========================================================================= */ -static BOOL test_and_32u_impl(const char* name, __andC_32u_t fkt, const UINT32* src, +static BOOL test_and_32u_impl(const char* name, fn_andC_32u_t fkt, const UINT32* src, const UINT32 val, UINT32* dst, size_t size) { pstatus_t status = fkt(src, val, dst, WINPR_ASSERTING_INT_CAST(int32_t, size)); diff --git a/libfreerdp/primitives/test/TestPrimitivesYUV.c b/libfreerdp/primitives/test/TestPrimitivesYUV.c index 4f808615d..24c56bd79 100644 --- a/libfreerdp/primitives/test/TestPrimitivesYUV.c +++ b/libfreerdp/primitives/test/TestPrimitivesYUV.c @@ -742,8 +742,8 @@ static BOOL TestPrimitiveRgbToLumaChroma(primitives_t* prims, prim_size_t roi, U size_t uvwidth = 0; const size_t padding = 0x1000; UINT32 stride = 0; - __RGBToAVC444YUV_t fkt = NULL; - __RGBToAVC444YUV_t gen = NULL; + fn_RGBToAVC444YUV_t fkt = NULL; + fn_RGBToAVC444YUV_t gen = NULL; const UINT32 formats[] = { PIXEL_FORMAT_XRGB32, PIXEL_FORMAT_XBGR32, PIXEL_FORMAT_ARGB32, PIXEL_FORMAT_ABGR32, PIXEL_FORMAT_RGBA32, PIXEL_FORMAT_RGBX32, PIXEL_FORMAT_BGRA32, PIXEL_FORMAT_BGRX32 };