mirror of
https://github.com/morgan9e/FreeRDP
synced 2026-04-14 00:14:11 +09:00
[restrict] unify restrict keyword usage
This commit is contained in:
@@ -80,7 +80,8 @@ extern "C"
|
||||
* @return \b TRUE in case of success, \b FALSE for any error
|
||||
*/
|
||||
FREERDP_API BOOL progressive_rfx_write_message_progressive_simple(
|
||||
PROGRESSIVE_CONTEXT* progressive, wStream* s, const RFX_MESSAGE* msg);
|
||||
PROGRESSIVE_CONTEXT* WINPR_RESTRICT progressive, wStream* WINPR_RESTRICT s,
|
||||
const RFX_MESSAGE* WINPR_RESTRICT msg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -75,43 +75,51 @@ extern "C"
|
||||
typedef struct S_RFX_MESSAGE RFX_MESSAGE;
|
||||
typedef struct S_RFX_CONTEXT RFX_CONTEXT;
|
||||
|
||||
FREERDP_API BOOL rfx_process_message(RFX_CONTEXT* context, const BYTE* data, UINT32 length,
|
||||
UINT32 left, UINT32 top, BYTE* dst, UINT32 dstFormat,
|
||||
UINT32 dstStride, UINT32 dstHeight,
|
||||
REGION16* invalidRegion);
|
||||
FREERDP_API BOOL rfx_process_message(RFX_CONTEXT* WINPR_RESTRICT context,
|
||||
const BYTE* WINPR_RESTRICT data, UINT32 length,
|
||||
UINT32 left, UINT32 top, BYTE* WINPR_RESTRICT dst,
|
||||
UINT32 dstFormat, UINT32 dstStride, UINT32 dstHeight,
|
||||
REGION16* WINPR_RESTRICT invalidRegion);
|
||||
|
||||
FREERDP_API UINT32 rfx_message_get_frame_idx(const RFX_MESSAGE* message);
|
||||
FREERDP_API const UINT32* rfx_message_get_quants(const RFX_MESSAGE* message,
|
||||
UINT16* numQuantVals);
|
||||
FREERDP_API UINT32 rfx_message_get_frame_idx(const RFX_MESSAGE* WINPR_RESTRICT message);
|
||||
FREERDP_API const UINT32* rfx_message_get_quants(const RFX_MESSAGE* WINPR_RESTRICT message,
|
||||
UINT16* WINPR_RESTRICT numQuantVals);
|
||||
|
||||
FREERDP_API const RFX_TILE** rfx_message_get_tiles(const RFX_MESSAGE* message,
|
||||
UINT16* numTiles);
|
||||
FREERDP_API UINT16 rfx_message_get_tile_count(const RFX_MESSAGE* message);
|
||||
FREERDP_API const RFX_TILE** rfx_message_get_tiles(const RFX_MESSAGE* WINPR_RESTRICT message,
|
||||
UINT16* WINPR_RESTRICT numTiles);
|
||||
FREERDP_API UINT16 rfx_message_get_tile_count(const RFX_MESSAGE* WINPR_RESTRICT message);
|
||||
|
||||
FREERDP_API const RFX_RECT* rfx_message_get_rects(const RFX_MESSAGE* message, UINT16* numRects);
|
||||
FREERDP_API UINT16 rfx_message_get_rect_count(const RFX_MESSAGE* message);
|
||||
FREERDP_API const RFX_RECT* rfx_message_get_rects(const RFX_MESSAGE* WINPR_RESTRICT message,
|
||||
UINT16* WINPR_RESTRICT numRects);
|
||||
FREERDP_API UINT16 rfx_message_get_rect_count(const RFX_MESSAGE* WINPR_RESTRICT message);
|
||||
|
||||
FREERDP_API void rfx_message_free(RFX_CONTEXT* context, RFX_MESSAGE* message);
|
||||
FREERDP_API void rfx_message_free(RFX_CONTEXT* WINPR_RESTRICT context,
|
||||
RFX_MESSAGE* WINPR_RESTRICT message);
|
||||
|
||||
FREERDP_API BOOL rfx_compose_message(RFX_CONTEXT* context, wStream* s, const RFX_RECT* rects,
|
||||
size_t num_rects, const BYTE* image_data, UINT32 width,
|
||||
FREERDP_API BOOL rfx_compose_message(RFX_CONTEXT* WINPR_RESTRICT context,
|
||||
wStream* WINPR_RESTRICT s,
|
||||
const RFX_RECT* WINPR_RESTRICT rects, size_t num_rects,
|
||||
const BYTE* WINPR_RESTRICT image_data, UINT32 width,
|
||||
UINT32 height, UINT32 rowstride);
|
||||
|
||||
FREERDP_API RFX_MESSAGE* rfx_encode_message(RFX_CONTEXT* context, const RFX_RECT* rects,
|
||||
size_t numRects, const BYTE* data, UINT32 width,
|
||||
UINT32 height, size_t scanline);
|
||||
FREERDP_API RFX_MESSAGE* rfx_encode_message(RFX_CONTEXT* WINPR_RESTRICT context,
|
||||
const RFX_RECT* WINPR_RESTRICT rects,
|
||||
size_t numRects, const BYTE* WINPR_RESTRICT data,
|
||||
UINT32 width, UINT32 height, size_t scanline);
|
||||
|
||||
FREERDP_API RFX_MESSAGE_LIST* rfx_encode_messages(RFX_CONTEXT* context, const RFX_RECT* rects,
|
||||
size_t numRects, const BYTE* data,
|
||||
UINT32 width, UINT32 height, UINT32 scanline,
|
||||
size_t* numMessages, size_t maxDataSize);
|
||||
FREERDP_API RFX_MESSAGE_LIST*
|
||||
rfx_encode_messages(RFX_CONTEXT* WINPR_RESTRICT context, const RFX_RECT* WINPR_RESTRICT rects,
|
||||
size_t numRects, const BYTE* WINPR_RESTRICT data, UINT32 width,
|
||||
UINT32 height, UINT32 scanline, size_t* WINPR_RESTRICT numMessages,
|
||||
size_t maxDataSize);
|
||||
FREERDP_API void rfx_message_list_free(RFX_MESSAGE_LIST* messages);
|
||||
|
||||
FREERDP_API const RFX_MESSAGE* rfx_message_list_get(const RFX_MESSAGE_LIST* messages,
|
||||
size_t idx);
|
||||
FREERDP_API const RFX_MESSAGE*
|
||||
rfx_message_list_get(const RFX_MESSAGE_LIST* WINPR_RESTRICT messages, size_t idx);
|
||||
|
||||
FREERDP_API BOOL rfx_write_message(RFX_CONTEXT* context, wStream* s,
|
||||
const RFX_MESSAGE* message);
|
||||
FREERDP_API BOOL rfx_write_message(RFX_CONTEXT* WINPR_RESTRICT context,
|
||||
wStream* WINPR_RESTRICT s,
|
||||
const RFX_MESSAGE* WINPR_RESTRICT message);
|
||||
|
||||
FREERDP_API void rfx_context_free(RFX_CONTEXT* context);
|
||||
|
||||
@@ -124,7 +132,7 @@ extern "C"
|
||||
FREERDP_API BOOL rfx_context_reset(RFX_CONTEXT* WINPR_RESTRICT context, UINT32 width,
|
||||
UINT32 height);
|
||||
|
||||
FREERDP_API BOOL rfx_context_set_mode(RFX_CONTEXT* context, RLGR_MODE mode);
|
||||
FREERDP_API BOOL rfx_context_set_mode(RFX_CONTEXT* WINPR_RESTRICT context, RLGR_MODE mode);
|
||||
|
||||
/** Getter for RFX mode
|
||||
* @param context The RFX context to query
|
||||
@@ -170,8 +178,9 @@ extern "C"
|
||||
* @since version 3.0.0
|
||||
* @return \b TRUE in case of success, \b FALSE for any error
|
||||
*/
|
||||
FREERDP_API BOOL rfx_write_message_progressive_simple(RFX_CONTEXT* rfx, wStream* s,
|
||||
const RFX_MESSAGE* msg);
|
||||
FREERDP_API BOOL rfx_write_message_progressive_simple(RFX_CONTEXT* WINPR_RESTRICT rfx,
|
||||
wStream* WINPR_RESTRICT s,
|
||||
const RFX_MESSAGE* WINPR_RESTRICT msg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ extern "C"
|
||||
UINT32* WINPR_RESTRICT pDstSize, UINT32 flags);
|
||||
FREERDP_API int zgfx_compress(ZGFX_CONTEXT* WINPR_RESTRICT zgfx,
|
||||
const BYTE* WINPR_RESTRICT pSrcData, UINT32 SrcSize,
|
||||
BYTE** WINPR_RESTRICT ppDstData, UINT32* pDstSize,
|
||||
BYTE** WINPR_RESTRICT ppDstData, UINT32* WINPR_RESTRICT pDstSize,
|
||||
UINT32* WINPR_RESTRICT pFlags);
|
||||
FREERDP_API int zgfx_compress_to_stream(ZGFX_CONTEXT* WINPR_RESTRICT zgfx,
|
||||
wStream* WINPR_RESTRICT sDst,
|
||||
|
||||
@@ -1207,7 +1207,7 @@ error_nsc:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void clear_context_free(CLEAR_CONTEXT* clear)
|
||||
void clear_context_free(CLEAR_CONTEXT* WINPR_RESTRICT clear)
|
||||
{
|
||||
if (!clear)
|
||||
return;
|
||||
|
||||
@@ -1127,8 +1127,9 @@ BOOL freerdp_image_fill(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 n
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
BOOL freerdp_image_fill_ex(BYTE* pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst,
|
||||
UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, UINT32 color, UINT32 flags)
|
||||
BOOL freerdp_image_fill_ex(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep,
|
||||
UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, UINT32 color,
|
||||
UINT32 flags)
|
||||
{
|
||||
if (FreeRDPColorHasAlpha(DstFormat) && ((flags & FREERDP_IMAGE_FILL_IGNORE_ALPHA) != 0))
|
||||
{
|
||||
|
||||
@@ -2379,8 +2379,9 @@ fail:
|
||||
return rc;
|
||||
}
|
||||
|
||||
BOOL progressive_rfx_write_message_progressive_simple(PROGRESSIVE_CONTEXT* progressive, wStream* s,
|
||||
const RFX_MESSAGE* msg)
|
||||
BOOL progressive_rfx_write_message_progressive_simple(
|
||||
PROGRESSIVE_CONTEXT* WINPR_RESTRICT progressive, wStream* WINPR_RESTRICT s,
|
||||
const RFX_MESSAGE* WINPR_RESTRICT msg)
|
||||
{
|
||||
RFX_CONTEXT* context = NULL;
|
||||
|
||||
|
||||
@@ -377,7 +377,8 @@ static void nsc_encode_subsampling_sse2(NSC_CONTEXT* context)
|
||||
}
|
||||
}
|
||||
|
||||
static BOOL nsc_encode_sse2(NSC_CONTEXT* context, const BYTE* data, UINT32 scanline)
|
||||
static BOOL nsc_encode_sse2(NSC_CONTEXT* WINPR_RESTRICT context, const BYTE* WINPR_RESTRICT data,
|
||||
UINT32 scanline)
|
||||
{
|
||||
if (!nsc_encode_argb_to_aycocg_sse2(context, data, scanline))
|
||||
return FALSE;
|
||||
|
||||
@@ -32,14 +32,18 @@ extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
FREERDP_LOCAL int xcrush_compress(XCRUSH_CONTEXT* xcrush, const BYTE* pSrcData, UINT32 SrcSize,
|
||||
BYTE* pDstBuffer, const BYTE** ppDstData, UINT32* pDstSize,
|
||||
UINT32* pFlags);
|
||||
FREERDP_LOCAL int xcrush_decompress(XCRUSH_CONTEXT* xcrush, const BYTE* pSrcData,
|
||||
UINT32 SrcSize, const BYTE** ppDstData, UINT32* pDstSize,
|
||||
UINT32 flags);
|
||||
FREERDP_LOCAL int xcrush_compress(XCRUSH_CONTEXT* WINPR_RESTRICT xcrush,
|
||||
const BYTE* WINPR_RESTRICT pSrcData, UINT32 SrcSize,
|
||||
BYTE* WINPR_RESTRICT pDstBuffer,
|
||||
const BYTE** WINPR_RESTRICT ppDstData,
|
||||
UINT32* WINPR_RESTRICT pDstSize,
|
||||
UINT32* WINPR_RESTRICT pFlags);
|
||||
FREERDP_LOCAL int xcrush_decompress(XCRUSH_CONTEXT* WINPR_RESTRICT xcrush,
|
||||
const BYTE* WINPR_RESTRICT pSrcData, UINT32 SrcSize,
|
||||
const BYTE** WINPR_RESTRICT ppDstData,
|
||||
UINT32* WINPR_RESTRICT pDstSize, UINT32 flags);
|
||||
|
||||
FREERDP_LOCAL void xcrush_context_reset(XCRUSH_CONTEXT* xcrush, BOOL flush);
|
||||
FREERDP_LOCAL void xcrush_context_reset(XCRUSH_CONTEXT* WINPR_RESTRICT xcrush, BOOL flush);
|
||||
|
||||
FREERDP_LOCAL XCRUSH_CONTEXT* xcrush_context_new(BOOL Compressor);
|
||||
FREERDP_LOCAL void xcrush_context_free(XCRUSH_CONTEXT* xcrush);
|
||||
|
||||
@@ -34,9 +34,10 @@ static INT16 convert(UINT8 raw, int shift)
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static pstatus_t general_YCoCgToRGB_8u_AC4R(const BYTE* pSrc, INT32 srcStep, BYTE* pDst,
|
||||
UINT32 DstFormat, INT32 dstStep, UINT32 width,
|
||||
UINT32 height, UINT8 shift, BOOL withAlpha)
|
||||
static pstatus_t general_YCoCgToRGB_8u_AC4R(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep,
|
||||
BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat,
|
||||
INT32 dstStep, UINT32 width, UINT32 height, UINT8 shift,
|
||||
BOOL withAlpha)
|
||||
{
|
||||
const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
|
||||
fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, TRUE);
|
||||
|
||||
@@ -30,8 +30,9 @@
|
||||
#define ALPHA(_k_) (((_k_)&0xFF000000U) >> 24)
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static pstatus_t general_alphaComp_argb(const BYTE* pSrc1, UINT32 src1Step, const BYTE* pSrc2,
|
||||
UINT32 src2Step, BYTE* pDst, UINT32 dstStep, UINT32 width,
|
||||
static pstatus_t general_alphaComp_argb(const BYTE* WINPR_RESTRICT pSrc1, UINT32 src1Step,
|
||||
const BYTE* WINPR_RESTRICT pSrc2, UINT32 src2Step,
|
||||
BYTE* WINPR_RESTRICT pDst, UINT32 dstStep, UINT32 width,
|
||||
UINT32 height)
|
||||
{
|
||||
for (size_t y = 0; y < height; y++)
|
||||
|
||||
@@ -24,7 +24,8 @@
|
||||
/* ----------------------------------------------------------------------------
|
||||
* 32-bit AND with a constant.
|
||||
*/
|
||||
static pstatus_t general_andC_32u(const UINT32* pSrc, UINT32 val, UINT32* pDst, INT32 len)
|
||||
static pstatus_t general_andC_32u(const UINT32* WINPR_RESTRICT pSrc, UINT32 val,
|
||||
UINT32* WINPR_RESTRICT pDst, INT32 len)
|
||||
{
|
||||
if (val == 0)
|
||||
return PRIMITIVES_SUCCESS;
|
||||
@@ -38,7 +39,8 @@ static pstatus_t general_andC_32u(const UINT32* pSrc, UINT32 val, UINT32* pDst,
|
||||
/* ----------------------------------------------------------------------------
|
||||
* 32-bit OR with a constant.
|
||||
*/
|
||||
static pstatus_t general_orC_32u(const UINT32* pSrc, UINT32 val, UINT32* pDst, INT32 len)
|
||||
static pstatus_t general_orC_32u(const UINT32* WINPR_RESTRICT pSrc, UINT32 val,
|
||||
UINT32* WINPR_RESTRICT pDst, INT32 len)
|
||||
{
|
||||
if (val == 0)
|
||||
return PRIMITIVES_SUCCESS;
|
||||
|
||||
@@ -84,7 +84,8 @@ static BOOL memory_regions_overlap_2d(const BYTE* p1, int p1Step, int p1Size, co
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static pstatus_t general_copy_8u(const BYTE* pSrc, BYTE* pDst, INT32 len)
|
||||
static pstatus_t general_copy_8u(const BYTE* WINPR_RESTRICT pSrc, BYTE* WINPR_RESTRICT pDst,
|
||||
INT32 len)
|
||||
{
|
||||
if (memory_regions_overlap_1d(pSrc, pDst, (size_t)len))
|
||||
{
|
||||
@@ -103,8 +104,9 @@ static pstatus_t general_copy_8u(const BYTE* pSrc, BYTE* pDst, INT32 len)
|
||||
* The addresses are assumed to have been already offset to the upper-left
|
||||
* corners of the source and destination region of interest.
|
||||
*/
|
||||
static pstatus_t general_copy_8u_AC4r(const BYTE* pSrc, INT32 srcStep, BYTE* pDst, INT32 dstStep,
|
||||
INT32 width, INT32 height)
|
||||
static pstatus_t general_copy_8u_AC4r(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep,
|
||||
BYTE* WINPR_RESTRICT pDst, INT32 dstStep, INT32 width,
|
||||
INT32 height)
|
||||
{
|
||||
const BYTE* src = pSrc;
|
||||
BYTE* dst = pDst;
|
||||
@@ -420,7 +422,7 @@ void primitives_init_copy(primitives_t* WINPR_RESTRICT prims)
|
||||
prims->copy_no_overlap = generic_image_copy_no_overlap;
|
||||
}
|
||||
|
||||
void primitives_init_copy_opt(primitives_t* prims)
|
||||
void primitives_init_copy_opt(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_copy_sse41(prims);
|
||||
#if defined(WITH_AVX2)
|
||||
|
||||
@@ -25,21 +25,21 @@
|
||||
#include "prim_set.h"
|
||||
|
||||
/* ========================================================================= */
|
||||
static pstatus_t general_set_8u(BYTE val, BYTE* pDst, UINT32 len)
|
||||
static pstatus_t general_set_8u(BYTE val, BYTE* WINPR_RESTRICT pDst, UINT32 len)
|
||||
{
|
||||
memset((void*)pDst, (int)val, (size_t)len);
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static pstatus_t general_zero(void* pDst, size_t len)
|
||||
static pstatus_t general_zero(void* WINPR_RESTRICT pDst, size_t len)
|
||||
{
|
||||
memset(pDst, 0, len);
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
/* ========================================================================= */
|
||||
static pstatus_t general_set_32s(INT32 val, INT32* pDst, UINT32 len)
|
||||
static pstatus_t general_set_32s(INT32 val, INT32* WINPR_RESTRICT pDst, UINT32 len)
|
||||
{
|
||||
INT32* dptr = pDst;
|
||||
size_t span = 0;
|
||||
@@ -78,7 +78,7 @@ static pstatus_t general_set_32s(INT32 val, INT32* pDst, UINT32 len)
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static pstatus_t general_set_32u(UINT32 val, UINT32* pDst, UINT32 len)
|
||||
static pstatus_t general_set_32u(UINT32 val, UINT32* WINPR_RESTRICT pDst, UINT32 len)
|
||||
{
|
||||
UINT32* dptr = pDst;
|
||||
size_t span = 0;
|
||||
|
||||
@@ -31,8 +31,7 @@
|
||||
static primitives_t* generic = NULL;
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static pstatus_t ssse3_sign_16s(const INT16* WINPR_RESTRICT pSrc, INT16* WINPR_RESTRICT pDst,
|
||||
UINT32 len)
|
||||
static pstatus_t ssse3_sign_16s(const INT16* pSrc, INT16* pDst, UINT32 len)
|
||||
{
|
||||
const INT16* sptr = pSrc;
|
||||
INT16* dptr = pDst;
|
||||
|
||||
@@ -134,7 +134,8 @@
|
||||
* PRE = preload xmm0 with the constant.
|
||||
*/
|
||||
#define SSE3_SCD_PRE_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \
|
||||
static pstatus_t _name_(const _type_* pSrc, _type_ val, _type_* pDst, INT32 ilen) \
|
||||
static pstatus_t _name_(const _type_* WINPR_RESTRICT pSrc, _type_ val, \
|
||||
_type_* WINPR_RESTRICT pDst, INT32 ilen) \
|
||||
{ \
|
||||
size_t len = WINPR_ASSERTING_INT_CAST(size_t, ilen); \
|
||||
int shifts = 0; \
|
||||
@@ -232,78 +233,80 @@
|
||||
/* ----------------------------------------------------------------------------
|
||||
* SSD = Source1, Source2, Destination
|
||||
*/
|
||||
#define SSE3_SSD_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \
|
||||
static pstatus_t _name_(const _type_* pSrc1, const _type_* pSrc2, _type_* pDst, UINT32 len) \
|
||||
{ \
|
||||
int shifts = 0; \
|
||||
const _type_* sptr1 = pSrc1; \
|
||||
const _type_* sptr2 = pSrc2; \
|
||||
_type_* dptr = pDst; \
|
||||
size_t count; \
|
||||
if (len < 16) /* pointless if too small */ \
|
||||
{ \
|
||||
return _fallback_(pSrc1, pSrc2, pDst, len); \
|
||||
} \
|
||||
if (sizeof(_type_) == 1) \
|
||||
shifts = 1; \
|
||||
else if (sizeof(_type_) == 2) \
|
||||
shifts = 2; \
|
||||
else if (sizeof(_type_) == 4) \
|
||||
shifts = 3; \
|
||||
else if (sizeof(_type_) == 8) \
|
||||
shifts = 4; \
|
||||
/* Use 4 128-bit SSE registers. */ \
|
||||
count = len >> (7 - shifts); \
|
||||
len -= count << (7 - shifts); \
|
||||
/* Aligned loads */ \
|
||||
while (count--) \
|
||||
{ \
|
||||
__m128i xmm0 = LOAD_SI128(sptr1); \
|
||||
sptr1 += (16 / sizeof(_type_)); \
|
||||
__m128i xmm1 = LOAD_SI128(sptr1); \
|
||||
sptr1 += (16 / sizeof(_type_)); \
|
||||
__m128i xmm2 = LOAD_SI128(sptr1); \
|
||||
sptr1 += (16 / sizeof(_type_)); \
|
||||
__m128i xmm3 = LOAD_SI128(sptr1); \
|
||||
sptr1 += (16 / sizeof(_type_)); \
|
||||
__m128i xmm4 = LOAD_SI128(sptr2); \
|
||||
sptr2 += (16 / sizeof(_type_)); \
|
||||
__m128i xmm5 = LOAD_SI128(sptr2); \
|
||||
sptr2 += (16 / sizeof(_type_)); \
|
||||
__m128i xmm6 = LOAD_SI128(sptr2); \
|
||||
sptr2 += (16 / sizeof(_type_)); \
|
||||
__m128i xmm7 = LOAD_SI128(sptr2); \
|
||||
sptr2 += (16 / sizeof(_type_)); \
|
||||
xmm0 = _op_(xmm0, xmm4); \
|
||||
xmm1 = _op_(xmm1, xmm5); \
|
||||
xmm2 = _op_(xmm2, xmm6); \
|
||||
xmm3 = _op_(xmm3, xmm7); \
|
||||
STORE_SI128(dptr, xmm0); \
|
||||
dptr += (16 / sizeof(_type_)); \
|
||||
STORE_SI128(dptr, xmm1); \
|
||||
dptr += (16 / sizeof(_type_)); \
|
||||
STORE_SI128(dptr, xmm2); \
|
||||
dptr += (16 / sizeof(_type_)); \
|
||||
STORE_SI128(dptr, xmm3); \
|
||||
dptr += (16 / sizeof(_type_)); \
|
||||
} \
|
||||
/* Use a single 128-bit SSE register. */ \
|
||||
count = len >> (5 - shifts); \
|
||||
len -= count << (5 - shifts); \
|
||||
while (count--) \
|
||||
{ \
|
||||
__m128i xmm0 = LOAD_SI128(sptr1); \
|
||||
sptr1 += (16 / sizeof(_type_)); \
|
||||
__m128i xmm1 = LOAD_SI128(sptr2); \
|
||||
sptr2 += (16 / sizeof(_type_)); \
|
||||
xmm0 = _op_(xmm0, xmm1); \
|
||||
STORE_SI128(dptr, xmm0); \
|
||||
dptr += (16 / sizeof(_type_)); \
|
||||
} \
|
||||
/* Finish off the remainder. */ \
|
||||
while (len--) \
|
||||
{ \
|
||||
_slowWay_; \
|
||||
} \
|
||||
return PRIMITIVES_SUCCESS; \
|
||||
#define SSE3_SSD_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \
|
||||
static pstatus_t _name_(const _type_* WINPR_RESTRICT pSrc1, \
|
||||
const _type_* WINPR_RESTRICT pSrc2, _type_* WINPR_RESTRICT pDst, \
|
||||
UINT32 len) \
|
||||
{ \
|
||||
int shifts = 0; \
|
||||
const _type_* sptr1 = pSrc1; \
|
||||
const _type_* sptr2 = pSrc2; \
|
||||
_type_* dptr = pDst; \
|
||||
size_t count; \
|
||||
if (len < 16) /* pointless if too small */ \
|
||||
{ \
|
||||
return _fallback_(pSrc1, pSrc2, pDst, len); \
|
||||
} \
|
||||
if (sizeof(_type_) == 1) \
|
||||
shifts = 1; \
|
||||
else if (sizeof(_type_) == 2) \
|
||||
shifts = 2; \
|
||||
else if (sizeof(_type_) == 4) \
|
||||
shifts = 3; \
|
||||
else if (sizeof(_type_) == 8) \
|
||||
shifts = 4; \
|
||||
/* Use 4 128-bit SSE registers. */ \
|
||||
count = len >> (7 - shifts); \
|
||||
len -= count << (7 - shifts); \
|
||||
/* Aligned loads */ \
|
||||
while (count--) \
|
||||
{ \
|
||||
__m128i xmm0 = LOAD_SI128(sptr1); \
|
||||
sptr1 += (16 / sizeof(_type_)); \
|
||||
__m128i xmm1 = LOAD_SI128(sptr1); \
|
||||
sptr1 += (16 / sizeof(_type_)); \
|
||||
__m128i xmm2 = LOAD_SI128(sptr1); \
|
||||
sptr1 += (16 / sizeof(_type_)); \
|
||||
__m128i xmm3 = LOAD_SI128(sptr1); \
|
||||
sptr1 += (16 / sizeof(_type_)); \
|
||||
__m128i xmm4 = LOAD_SI128(sptr2); \
|
||||
sptr2 += (16 / sizeof(_type_)); \
|
||||
__m128i xmm5 = LOAD_SI128(sptr2); \
|
||||
sptr2 += (16 / sizeof(_type_)); \
|
||||
__m128i xmm6 = LOAD_SI128(sptr2); \
|
||||
sptr2 += (16 / sizeof(_type_)); \
|
||||
__m128i xmm7 = LOAD_SI128(sptr2); \
|
||||
sptr2 += (16 / sizeof(_type_)); \
|
||||
xmm0 = _op_(xmm0, xmm4); \
|
||||
xmm1 = _op_(xmm1, xmm5); \
|
||||
xmm2 = _op_(xmm2, xmm6); \
|
||||
xmm3 = _op_(xmm3, xmm7); \
|
||||
STORE_SI128(dptr, xmm0); \
|
||||
dptr += (16 / sizeof(_type_)); \
|
||||
STORE_SI128(dptr, xmm1); \
|
||||
dptr += (16 / sizeof(_type_)); \
|
||||
STORE_SI128(dptr, xmm2); \
|
||||
dptr += (16 / sizeof(_type_)); \
|
||||
STORE_SI128(dptr, xmm3); \
|
||||
dptr += (16 / sizeof(_type_)); \
|
||||
} \
|
||||
/* Use a single 128-bit SSE register. */ \
|
||||
count = len >> (5 - shifts); \
|
||||
len -= count << (5 - shifts); \
|
||||
while (count--) \
|
||||
{ \
|
||||
__m128i xmm0 = LOAD_SI128(sptr1); \
|
||||
sptr1 += (16 / sizeof(_type_)); \
|
||||
__m128i xmm1 = LOAD_SI128(sptr2); \
|
||||
sptr2 += (16 / sizeof(_type_)); \
|
||||
xmm0 = _op_(xmm0, xmm1); \
|
||||
STORE_SI128(dptr, xmm0); \
|
||||
dptr += (16 / sizeof(_type_)); \
|
||||
} \
|
||||
/* Finish off the remainder. */ \
|
||||
while (len--) \
|
||||
{ \
|
||||
_slowWay_; \
|
||||
} \
|
||||
return PRIMITIVES_SUCCESS; \
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user