[restrict] unify restrict keyword usage

This commit is contained in:
akallabeth
2025-03-10 18:17:49 +01:00
parent 918f5d5af9
commit 0bb49c94c9
15 changed files with 161 additions and 136 deletions

View File

@@ -80,7 +80,8 @@ extern "C"
* @return \b TRUE in case of success, \b FALSE for any error
*/
FREERDP_API BOOL progressive_rfx_write_message_progressive_simple(
PROGRESSIVE_CONTEXT* progressive, wStream* s, const RFX_MESSAGE* msg);
PROGRESSIVE_CONTEXT* WINPR_RESTRICT progressive, wStream* WINPR_RESTRICT s,
const RFX_MESSAGE* WINPR_RESTRICT msg);
#ifdef __cplusplus
}

View File

@@ -75,43 +75,51 @@ extern "C"
typedef struct S_RFX_MESSAGE RFX_MESSAGE;
typedef struct S_RFX_CONTEXT RFX_CONTEXT;
FREERDP_API BOOL rfx_process_message(RFX_CONTEXT* context, const BYTE* data, UINT32 length,
UINT32 left, UINT32 top, BYTE* dst, UINT32 dstFormat,
UINT32 dstStride, UINT32 dstHeight,
REGION16* invalidRegion);
FREERDP_API BOOL rfx_process_message(RFX_CONTEXT* WINPR_RESTRICT context,
const BYTE* WINPR_RESTRICT data, UINT32 length,
UINT32 left, UINT32 top, BYTE* WINPR_RESTRICT dst,
UINT32 dstFormat, UINT32 dstStride, UINT32 dstHeight,
REGION16* WINPR_RESTRICT invalidRegion);
FREERDP_API UINT32 rfx_message_get_frame_idx(const RFX_MESSAGE* message);
FREERDP_API const UINT32* rfx_message_get_quants(const RFX_MESSAGE* message,
UINT16* numQuantVals);
FREERDP_API UINT32 rfx_message_get_frame_idx(const RFX_MESSAGE* WINPR_RESTRICT message);
FREERDP_API const UINT32* rfx_message_get_quants(const RFX_MESSAGE* WINPR_RESTRICT message,
UINT16* WINPR_RESTRICT numQuantVals);
FREERDP_API const RFX_TILE** rfx_message_get_tiles(const RFX_MESSAGE* message,
UINT16* numTiles);
FREERDP_API UINT16 rfx_message_get_tile_count(const RFX_MESSAGE* message);
FREERDP_API const RFX_TILE** rfx_message_get_tiles(const RFX_MESSAGE* WINPR_RESTRICT message,
UINT16* WINPR_RESTRICT numTiles);
FREERDP_API UINT16 rfx_message_get_tile_count(const RFX_MESSAGE* WINPR_RESTRICT message);
FREERDP_API const RFX_RECT* rfx_message_get_rects(const RFX_MESSAGE* message, UINT16* numRects);
FREERDP_API UINT16 rfx_message_get_rect_count(const RFX_MESSAGE* message);
FREERDP_API const RFX_RECT* rfx_message_get_rects(const RFX_MESSAGE* WINPR_RESTRICT message,
UINT16* WINPR_RESTRICT numRects);
FREERDP_API UINT16 rfx_message_get_rect_count(const RFX_MESSAGE* WINPR_RESTRICT message);
FREERDP_API void rfx_message_free(RFX_CONTEXT* context, RFX_MESSAGE* message);
FREERDP_API void rfx_message_free(RFX_CONTEXT* WINPR_RESTRICT context,
RFX_MESSAGE* WINPR_RESTRICT message);
FREERDP_API BOOL rfx_compose_message(RFX_CONTEXT* context, wStream* s, const RFX_RECT* rects,
size_t num_rects, const BYTE* image_data, UINT32 width,
FREERDP_API BOOL rfx_compose_message(RFX_CONTEXT* WINPR_RESTRICT context,
wStream* WINPR_RESTRICT s,
const RFX_RECT* WINPR_RESTRICT rects, size_t num_rects,
const BYTE* WINPR_RESTRICT image_data, UINT32 width,
UINT32 height, UINT32 rowstride);
FREERDP_API RFX_MESSAGE* rfx_encode_message(RFX_CONTEXT* context, const RFX_RECT* rects,
size_t numRects, const BYTE* data, UINT32 width,
UINT32 height, size_t scanline);
FREERDP_API RFX_MESSAGE* rfx_encode_message(RFX_CONTEXT* WINPR_RESTRICT context,
const RFX_RECT* WINPR_RESTRICT rects,
size_t numRects, const BYTE* WINPR_RESTRICT data,
UINT32 width, UINT32 height, size_t scanline);
FREERDP_API RFX_MESSAGE_LIST* rfx_encode_messages(RFX_CONTEXT* context, const RFX_RECT* rects,
size_t numRects, const BYTE* data,
UINT32 width, UINT32 height, UINT32 scanline,
size_t* numMessages, size_t maxDataSize);
FREERDP_API RFX_MESSAGE_LIST*
rfx_encode_messages(RFX_CONTEXT* WINPR_RESTRICT context, const RFX_RECT* WINPR_RESTRICT rects,
size_t numRects, const BYTE* WINPR_RESTRICT data, UINT32 width,
UINT32 height, UINT32 scanline, size_t* WINPR_RESTRICT numMessages,
size_t maxDataSize);
FREERDP_API void rfx_message_list_free(RFX_MESSAGE_LIST* messages);
FREERDP_API const RFX_MESSAGE* rfx_message_list_get(const RFX_MESSAGE_LIST* messages,
size_t idx);
FREERDP_API const RFX_MESSAGE*
rfx_message_list_get(const RFX_MESSAGE_LIST* WINPR_RESTRICT messages, size_t idx);
FREERDP_API BOOL rfx_write_message(RFX_CONTEXT* context, wStream* s,
const RFX_MESSAGE* message);
FREERDP_API BOOL rfx_write_message(RFX_CONTEXT* WINPR_RESTRICT context,
wStream* WINPR_RESTRICT s,
const RFX_MESSAGE* WINPR_RESTRICT message);
FREERDP_API void rfx_context_free(RFX_CONTEXT* context);
@@ -124,7 +132,7 @@ extern "C"
FREERDP_API BOOL rfx_context_reset(RFX_CONTEXT* WINPR_RESTRICT context, UINT32 width,
UINT32 height);
FREERDP_API BOOL rfx_context_set_mode(RFX_CONTEXT* context, RLGR_MODE mode);
FREERDP_API BOOL rfx_context_set_mode(RFX_CONTEXT* WINPR_RESTRICT context, RLGR_MODE mode);
/** Getter for RFX mode
* @param context The RFX context to query
@@ -170,8 +178,9 @@ extern "C"
* @since version 3.0.0
* @return \b TRUE in case of success, \b FALSE for any error
*/
FREERDP_API BOOL rfx_write_message_progressive_simple(RFX_CONTEXT* rfx, wStream* s,
const RFX_MESSAGE* msg);
FREERDP_API BOOL rfx_write_message_progressive_simple(RFX_CONTEXT* WINPR_RESTRICT rfx,
wStream* WINPR_RESTRICT s,
const RFX_MESSAGE* WINPR_RESTRICT msg);
#ifdef __cplusplus
}

View File

@@ -45,7 +45,7 @@ extern "C"
UINT32* WINPR_RESTRICT pDstSize, UINT32 flags);
FREERDP_API int zgfx_compress(ZGFX_CONTEXT* WINPR_RESTRICT zgfx,
const BYTE* WINPR_RESTRICT pSrcData, UINT32 SrcSize,
BYTE** WINPR_RESTRICT ppDstData, UINT32* pDstSize,
BYTE** WINPR_RESTRICT ppDstData, UINT32* WINPR_RESTRICT pDstSize,
UINT32* WINPR_RESTRICT pFlags);
FREERDP_API int zgfx_compress_to_stream(ZGFX_CONTEXT* WINPR_RESTRICT zgfx,
wStream* WINPR_RESTRICT sDst,

View File

@@ -1207,7 +1207,7 @@ error_nsc:
return NULL;
}
void clear_context_free(CLEAR_CONTEXT* clear)
void clear_context_free(CLEAR_CONTEXT* WINPR_RESTRICT clear)
{
if (!clear)
return;

View File

@@ -1127,8 +1127,9 @@ BOOL freerdp_image_fill(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 n
return TRUE;
}
BOOL freerdp_image_fill_ex(BYTE* pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst,
UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, UINT32 color, UINT32 flags)
BOOL freerdp_image_fill_ex(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep,
UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, UINT32 color,
UINT32 flags)
{
if (FreeRDPColorHasAlpha(DstFormat) && ((flags & FREERDP_IMAGE_FILL_IGNORE_ALPHA) != 0))
{

View File

@@ -2379,8 +2379,9 @@ fail:
return rc;
}
BOOL progressive_rfx_write_message_progressive_simple(PROGRESSIVE_CONTEXT* progressive, wStream* s,
const RFX_MESSAGE* msg)
BOOL progressive_rfx_write_message_progressive_simple(
PROGRESSIVE_CONTEXT* WINPR_RESTRICT progressive, wStream* WINPR_RESTRICT s,
const RFX_MESSAGE* WINPR_RESTRICT msg)
{
RFX_CONTEXT* context = NULL;

View File

@@ -377,7 +377,8 @@ static void nsc_encode_subsampling_sse2(NSC_CONTEXT* context)
}
}
static BOOL nsc_encode_sse2(NSC_CONTEXT* context, const BYTE* data, UINT32 scanline)
static BOOL nsc_encode_sse2(NSC_CONTEXT* WINPR_RESTRICT context, const BYTE* WINPR_RESTRICT data,
UINT32 scanline)
{
if (!nsc_encode_argb_to_aycocg_sse2(context, data, scanline))
return FALSE;

View File

@@ -32,14 +32,18 @@ extern "C"
{
#endif
FREERDP_LOCAL int xcrush_compress(XCRUSH_CONTEXT* xcrush, const BYTE* pSrcData, UINT32 SrcSize,
BYTE* pDstBuffer, const BYTE** ppDstData, UINT32* pDstSize,
UINT32* pFlags);
FREERDP_LOCAL int xcrush_decompress(XCRUSH_CONTEXT* xcrush, const BYTE* pSrcData,
UINT32 SrcSize, const BYTE** ppDstData, UINT32* pDstSize,
UINT32 flags);
FREERDP_LOCAL int xcrush_compress(XCRUSH_CONTEXT* WINPR_RESTRICT xcrush,
const BYTE* WINPR_RESTRICT pSrcData, UINT32 SrcSize,
BYTE* WINPR_RESTRICT pDstBuffer,
const BYTE** WINPR_RESTRICT ppDstData,
UINT32* WINPR_RESTRICT pDstSize,
UINT32* WINPR_RESTRICT pFlags);
FREERDP_LOCAL int xcrush_decompress(XCRUSH_CONTEXT* WINPR_RESTRICT xcrush,
const BYTE* WINPR_RESTRICT pSrcData, UINT32 SrcSize,
const BYTE** WINPR_RESTRICT ppDstData,
UINT32* WINPR_RESTRICT pDstSize, UINT32 flags);
FREERDP_LOCAL void xcrush_context_reset(XCRUSH_CONTEXT* xcrush, BOOL flush);
FREERDP_LOCAL void xcrush_context_reset(XCRUSH_CONTEXT* WINPR_RESTRICT xcrush, BOOL flush);
FREERDP_LOCAL XCRUSH_CONTEXT* xcrush_context_new(BOOL Compressor);
FREERDP_LOCAL void xcrush_context_free(XCRUSH_CONTEXT* xcrush);

View File

@@ -34,9 +34,10 @@ static INT16 convert(UINT8 raw, int shift)
}
/* ------------------------------------------------------------------------- */
static pstatus_t general_YCoCgToRGB_8u_AC4R(const BYTE* pSrc, INT32 srcStep, BYTE* pDst,
UINT32 DstFormat, INT32 dstStep, UINT32 width,
UINT32 height, UINT8 shift, BOOL withAlpha)
static pstatus_t general_YCoCgToRGB_8u_AC4R(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep,
BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat,
INT32 dstStep, UINT32 width, UINT32 height, UINT8 shift,
BOOL withAlpha)
{
const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, TRUE);

View File

@@ -30,8 +30,9 @@
#define ALPHA(_k_) (((_k_)&0xFF000000U) >> 24)
/* ------------------------------------------------------------------------- */
static pstatus_t general_alphaComp_argb(const BYTE* pSrc1, UINT32 src1Step, const BYTE* pSrc2,
UINT32 src2Step, BYTE* pDst, UINT32 dstStep, UINT32 width,
static pstatus_t general_alphaComp_argb(const BYTE* WINPR_RESTRICT pSrc1, UINT32 src1Step,
const BYTE* WINPR_RESTRICT pSrc2, UINT32 src2Step,
BYTE* WINPR_RESTRICT pDst, UINT32 dstStep, UINT32 width,
UINT32 height)
{
for (size_t y = 0; y < height; y++)

View File

@@ -24,7 +24,8 @@
/* ----------------------------------------------------------------------------
* 32-bit AND with a constant.
*/
static pstatus_t general_andC_32u(const UINT32* pSrc, UINT32 val, UINT32* pDst, INT32 len)
static pstatus_t general_andC_32u(const UINT32* WINPR_RESTRICT pSrc, UINT32 val,
UINT32* WINPR_RESTRICT pDst, INT32 len)
{
if (val == 0)
return PRIMITIVES_SUCCESS;
@@ -38,7 +39,8 @@ static pstatus_t general_andC_32u(const UINT32* pSrc, UINT32 val, UINT32* pDst,
/* ----------------------------------------------------------------------------
* 32-bit OR with a constant.
*/
static pstatus_t general_orC_32u(const UINT32* pSrc, UINT32 val, UINT32* pDst, INT32 len)
static pstatus_t general_orC_32u(const UINT32* WINPR_RESTRICT pSrc, UINT32 val,
UINT32* WINPR_RESTRICT pDst, INT32 len)
{
if (val == 0)
return PRIMITIVES_SUCCESS;

View File

@@ -84,7 +84,8 @@ static BOOL memory_regions_overlap_2d(const BYTE* p1, int p1Step, int p1Size, co
}
/* ------------------------------------------------------------------------- */
static pstatus_t general_copy_8u(const BYTE* pSrc, BYTE* pDst, INT32 len)
static pstatus_t general_copy_8u(const BYTE* WINPR_RESTRICT pSrc, BYTE* WINPR_RESTRICT pDst,
INT32 len)
{
if (memory_regions_overlap_1d(pSrc, pDst, (size_t)len))
{
@@ -103,8 +104,9 @@ static pstatus_t general_copy_8u(const BYTE* pSrc, BYTE* pDst, INT32 len)
* The addresses are assumed to have been already offset to the upper-left
* corners of the source and destination region of interest.
*/
static pstatus_t general_copy_8u_AC4r(const BYTE* pSrc, INT32 srcStep, BYTE* pDst, INT32 dstStep,
INT32 width, INT32 height)
static pstatus_t general_copy_8u_AC4r(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep,
BYTE* WINPR_RESTRICT pDst, INT32 dstStep, INT32 width,
INT32 height)
{
const BYTE* src = pSrc;
BYTE* dst = pDst;
@@ -420,7 +422,7 @@ void primitives_init_copy(primitives_t* WINPR_RESTRICT prims)
prims->copy_no_overlap = generic_image_copy_no_overlap;
}
void primitives_init_copy_opt(primitives_t* prims)
void primitives_init_copy_opt(primitives_t* WINPR_RESTRICT prims)
{
primitives_init_copy_sse41(prims);
#if defined(WITH_AVX2)

View File

@@ -25,21 +25,21 @@
#include "prim_set.h"
/* ========================================================================= */
static pstatus_t general_set_8u(BYTE val, BYTE* pDst, UINT32 len)
static pstatus_t general_set_8u(BYTE val, BYTE* WINPR_RESTRICT pDst, UINT32 len)
{
memset((void*)pDst, (int)val, (size_t)len);
return PRIMITIVES_SUCCESS;
}
/* ------------------------------------------------------------------------- */
static pstatus_t general_zero(void* pDst, size_t len)
static pstatus_t general_zero(void* WINPR_RESTRICT pDst, size_t len)
{
memset(pDst, 0, len);
return PRIMITIVES_SUCCESS;
}
/* ========================================================================= */
static pstatus_t general_set_32s(INT32 val, INT32* pDst, UINT32 len)
static pstatus_t general_set_32s(INT32 val, INT32* WINPR_RESTRICT pDst, UINT32 len)
{
INT32* dptr = pDst;
size_t span = 0;
@@ -78,7 +78,7 @@ static pstatus_t general_set_32s(INT32 val, INT32* pDst, UINT32 len)
}
/* ------------------------------------------------------------------------- */
static pstatus_t general_set_32u(UINT32 val, UINT32* pDst, UINT32 len)
static pstatus_t general_set_32u(UINT32 val, UINT32* WINPR_RESTRICT pDst, UINT32 len)
{
UINT32* dptr = pDst;
size_t span = 0;

View File

@@ -31,8 +31,7 @@
static primitives_t* generic = NULL;
/* ------------------------------------------------------------------------- */
static pstatus_t ssse3_sign_16s(const INT16* WINPR_RESTRICT pSrc, INT16* WINPR_RESTRICT pDst,
UINT32 len)
static pstatus_t ssse3_sign_16s(const INT16* pSrc, INT16* pDst, UINT32 len)
{
const INT16* sptr = pSrc;
INT16* dptr = pDst;

View File

@@ -134,7 +134,8 @@
* PRE = preload xmm0 with the constant.
*/
#define SSE3_SCD_PRE_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \
static pstatus_t _name_(const _type_* pSrc, _type_ val, _type_* pDst, INT32 ilen) \
static pstatus_t _name_(const _type_* WINPR_RESTRICT pSrc, _type_ val, \
_type_* WINPR_RESTRICT pDst, INT32 ilen) \
{ \
size_t len = WINPR_ASSERTING_INT_CAST(size_t, ilen); \
int shifts = 0; \
@@ -232,78 +233,80 @@
/* ----------------------------------------------------------------------------
* SSD = Source1, Source2, Destination
*/
#define SSE3_SSD_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \
static pstatus_t _name_(const _type_* pSrc1, const _type_* pSrc2, _type_* pDst, UINT32 len) \
{ \
int shifts = 0; \
const _type_* sptr1 = pSrc1; \
const _type_* sptr2 = pSrc2; \
_type_* dptr = pDst; \
size_t count; \
if (len < 16) /* pointless if too small */ \
{ \
return _fallback_(pSrc1, pSrc2, pDst, len); \
} \
if (sizeof(_type_) == 1) \
shifts = 1; \
else if (sizeof(_type_) == 2) \
shifts = 2; \
else if (sizeof(_type_) == 4) \
shifts = 3; \
else if (sizeof(_type_) == 8) \
shifts = 4; \
/* Use 4 128-bit SSE registers. */ \
count = len >> (7 - shifts); \
len -= count << (7 - shifts); \
/* Aligned loads */ \
while (count--) \
{ \
__m128i xmm0 = LOAD_SI128(sptr1); \
sptr1 += (16 / sizeof(_type_)); \
__m128i xmm1 = LOAD_SI128(sptr1); \
sptr1 += (16 / sizeof(_type_)); \
__m128i xmm2 = LOAD_SI128(sptr1); \
sptr1 += (16 / sizeof(_type_)); \
__m128i xmm3 = LOAD_SI128(sptr1); \
sptr1 += (16 / sizeof(_type_)); \
__m128i xmm4 = LOAD_SI128(sptr2); \
sptr2 += (16 / sizeof(_type_)); \
__m128i xmm5 = LOAD_SI128(sptr2); \
sptr2 += (16 / sizeof(_type_)); \
__m128i xmm6 = LOAD_SI128(sptr2); \
sptr2 += (16 / sizeof(_type_)); \
__m128i xmm7 = LOAD_SI128(sptr2); \
sptr2 += (16 / sizeof(_type_)); \
xmm0 = _op_(xmm0, xmm4); \
xmm1 = _op_(xmm1, xmm5); \
xmm2 = _op_(xmm2, xmm6); \
xmm3 = _op_(xmm3, xmm7); \
STORE_SI128(dptr, xmm0); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm1); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm2); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm3); \
dptr += (16 / sizeof(_type_)); \
} \
/* Use a single 128-bit SSE register. */ \
count = len >> (5 - shifts); \
len -= count << (5 - shifts); \
while (count--) \
{ \
__m128i xmm0 = LOAD_SI128(sptr1); \
sptr1 += (16 / sizeof(_type_)); \
__m128i xmm1 = LOAD_SI128(sptr2); \
sptr2 += (16 / sizeof(_type_)); \
xmm0 = _op_(xmm0, xmm1); \
STORE_SI128(dptr, xmm0); \
dptr += (16 / sizeof(_type_)); \
} \
/* Finish off the remainder. */ \
while (len--) \
{ \
_slowWay_; \
} \
return PRIMITIVES_SUCCESS; \
#define SSE3_SSD_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \
static pstatus_t _name_(const _type_* WINPR_RESTRICT pSrc1, \
const _type_* WINPR_RESTRICT pSrc2, _type_* WINPR_RESTRICT pDst, \
UINT32 len) \
{ \
int shifts = 0; \
const _type_* sptr1 = pSrc1; \
const _type_* sptr2 = pSrc2; \
_type_* dptr = pDst; \
size_t count; \
if (len < 16) /* pointless if too small */ \
{ \
return _fallback_(pSrc1, pSrc2, pDst, len); \
} \
if (sizeof(_type_) == 1) \
shifts = 1; \
else if (sizeof(_type_) == 2) \
shifts = 2; \
else if (sizeof(_type_) == 4) \
shifts = 3; \
else if (sizeof(_type_) == 8) \
shifts = 4; \
/* Use 4 128-bit SSE registers. */ \
count = len >> (7 - shifts); \
len -= count << (7 - shifts); \
/* Aligned loads */ \
while (count--) \
{ \
__m128i xmm0 = LOAD_SI128(sptr1); \
sptr1 += (16 / sizeof(_type_)); \
__m128i xmm1 = LOAD_SI128(sptr1); \
sptr1 += (16 / sizeof(_type_)); \
__m128i xmm2 = LOAD_SI128(sptr1); \
sptr1 += (16 / sizeof(_type_)); \
__m128i xmm3 = LOAD_SI128(sptr1); \
sptr1 += (16 / sizeof(_type_)); \
__m128i xmm4 = LOAD_SI128(sptr2); \
sptr2 += (16 / sizeof(_type_)); \
__m128i xmm5 = LOAD_SI128(sptr2); \
sptr2 += (16 / sizeof(_type_)); \
__m128i xmm6 = LOAD_SI128(sptr2); \
sptr2 += (16 / sizeof(_type_)); \
__m128i xmm7 = LOAD_SI128(sptr2); \
sptr2 += (16 / sizeof(_type_)); \
xmm0 = _op_(xmm0, xmm4); \
xmm1 = _op_(xmm1, xmm5); \
xmm2 = _op_(xmm2, xmm6); \
xmm3 = _op_(xmm3, xmm7); \
STORE_SI128(dptr, xmm0); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm1); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm2); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm3); \
dptr += (16 / sizeof(_type_)); \
} \
/* Use a single 128-bit SSE register. */ \
count = len >> (5 - shifts); \
len -= count << (5 - shifts); \
while (count--) \
{ \
__m128i xmm0 = LOAD_SI128(sptr1); \
sptr1 += (16 / sizeof(_type_)); \
__m128i xmm1 = LOAD_SI128(sptr2); \
sptr2 += (16 / sizeof(_type_)); \
xmm0 = _op_(xmm0, xmm1); \
STORE_SI128(dptr, xmm0); \
dptr += (16 / sizeof(_type_)); \
} \
/* Finish off the remainder. */ \
while (len--) \
{ \
_slowWay_; \
} \
return PRIMITIVES_SUCCESS; \
}