diff --git a/libfreerdp/codec/neon/nsc_neon.c b/libfreerdp/codec/neon/nsc_neon.c index cfa7b7f58..ca5474891 100644 --- a/libfreerdp/codec/neon/nsc_neon.c +++ b/libfreerdp/codec/neon/nsc_neon.c @@ -32,12 +32,9 @@ #define TAG FREERDP_TAG("codec.nsc.neon") #endif -void nsc_init_neon(NSC_CONTEXT* context) +void nsc_init_neon_int(NSC_CONTEXT* WINPR_RESTRICT context) { #if defined(NEON_INTRINSICS_ENABLED) - if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) - return; - WLog_WARN(TAG, "TODO: Implement neon optimized version of this function"); #endif } diff --git a/libfreerdp/codec/neon/nsc_neon.h b/libfreerdp/codec/neon/nsc_neon.h index 159ab246a..a5eaeb6dd 100644 --- a/libfreerdp/codec/neon/nsc_neon.h +++ b/libfreerdp/codec/neon/nsc_neon.h @@ -20,9 +20,18 @@ #ifndef FREERDP_LIB_CODEC_NSC_NEON_H #define FREERDP_LIB_CODEC_NSC_NEON_H +#include + #include #include -FREERDP_LOCAL void nsc_init_neon(NSC_CONTEXT* context); +FREERDP_LOCAL void nsc_init_neon_int(NSC_CONTEXT* WINPR_RESTRICT context); +static inline void nsc_init_neon(NSC_CONTEXT* WINPR_RESTRICT context) +{ + if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) + return; + + nsc_init_neon_int(context); +} #endif /* FREERDP_LIB_CODEC_NSC_NEON_H */ diff --git a/libfreerdp/codec/neon/rfx_neon.c b/libfreerdp/codec/neon/rfx_neon.c index 131d91d94..45a8d7a2a 100644 --- a/libfreerdp/codec/neon/rfx_neon.c +++ b/libfreerdp/codec/neon/rfx_neon.c @@ -523,20 +523,16 @@ static void rfx_dwt_2d_extrapolate_decode_neon(INT16* buffer, INT16* temp) } #endif // NEON_INTRINSICS_ENABLED -void rfx_init_neon(RFX_CONTEXT* context) +void rfx_init_neon_int(RFX_CONTEXT* WINPR_RESTRICT context) { #if defined(NEON_INTRINSICS_ENABLED) - if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) - { - DEBUG_RFX("Using NEON optimizations"); - PROFILER_RENAME(context->priv->prof_rfx_ycbcr_to_rgb, "rfx_decode_YCbCr_to_RGB_NEON"); - PROFILER_RENAME(context->priv->prof_rfx_quantization_decode, - "rfx_quantization_decode_NEON"); - PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode_NEON"); - context->quantization_decode = rfx_quantization_decode_NEON; - context->dwt_2d_decode = rfx_dwt_2d_decode_NEON; - context->dwt_2d_extrapolate_decode = rfx_dwt_2d_extrapolate_decode_neon; - } + DEBUG_RFX("Using NEON optimizations"); + PROFILER_RENAME(context->priv->prof_rfx_ycbcr_to_rgb, "rfx_decode_YCbCr_to_RGB_NEON"); + PROFILER_RENAME(context->priv->prof_rfx_quantization_decode, "rfx_quantization_decode_NEON"); + PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode_NEON"); + context->quantization_decode = rfx_quantization_decode_NEON; + context->dwt_2d_decode = rfx_dwt_2d_decode_NEON; + context->dwt_2d_extrapolate_decode = rfx_dwt_2d_extrapolate_decode_neon; #else WINPR_UNUSED(context); #endif diff --git a/libfreerdp/codec/neon/rfx_neon.h b/libfreerdp/codec/neon/rfx_neon.h index 472d260bb..a17101d10 100644 --- a/libfreerdp/codec/neon/rfx_neon.h +++ b/libfreerdp/codec/neon/rfx_neon.h @@ -20,9 +20,18 @@ #ifndef FREERDP_LIB_CODEC_RFX_NEON_H #define FREERDP_LIB_CODEC_RFX_NEON_H +#include + #include #include -FREERDP_LOCAL void rfx_init_neon(RFX_CONTEXT* context); +FREERDP_LOCAL void rfx_init_neon_int(RFX_CONTEXT* WINPR_RESTRICT context); +static inline void rfx_init_neon(RFX_CONTEXT* WINPR_RESTRICT context) +{ + if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) + return; + + rfx_init_neon_int(context); +} #endif /* FREERDP_LIB_CODEC_RFX_NEON_H */ diff --git a/libfreerdp/codec/sse/nsc_sse2.c b/libfreerdp/codec/sse/nsc_sse2.c index f3ef4ce5a..784fe4211 100644 --- a/libfreerdp/codec/sse/nsc_sse2.c +++ b/libfreerdp/codec/sse/nsc_sse2.c @@ -389,13 +389,9 @@ static BOOL nsc_encode_sse2(NSC_CONTEXT* context, const BYTE* data, UINT32 scanl } #endif -void nsc_init_sse2(NSC_CONTEXT* context) +void nsc_init_sse2_int(NSC_CONTEXT* WINPR_RESTRICT context) { #if defined(SSE_AVX_INTRINSICS_ENABLED) - if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) || - !IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) - return; - PROFILER_RENAME(context->priv->prof_nsc_encode, "nsc_encode_sse2") context->encode = nsc_encode_sse2; #else diff --git a/libfreerdp/codec/sse/nsc_sse2.h b/libfreerdp/codec/sse/nsc_sse2.h index 8b74cabf8..d15857c62 100644 --- a/libfreerdp/codec/sse/nsc_sse2.h +++ b/libfreerdp/codec/sse/nsc_sse2.h @@ -20,9 +20,19 @@ #ifndef FREERDP_LIB_CODEC_NSC_SSE2_H #define FREERDP_LIB_CODEC_NSC_SSE2_H +#include + #include #include -FREERDP_LOCAL void nsc_init_sse2(NSC_CONTEXT* context); +FREERDP_LOCAL void nsc_init_sse2_int(NSC_CONTEXT* WINPR_RESTRICT context); +static inline void nsc_init_sse2(NSC_CONTEXT* WINPR_RESTRICT context) +{ + if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) || + !IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) + return; + + nsc_init_sse2_int(context); +} #endif /* FREERDP_LIB_CODEC_NSC_SSE2_H */ diff --git a/libfreerdp/codec/sse/rfx_sse2.c b/libfreerdp/codec/sse/rfx_sse2.c index 6eee9c957..9f76d38e8 100644 --- a/libfreerdp/codec/sse/rfx_sse2.c +++ b/libfreerdp/codec/sse/rfx_sse2.c @@ -451,13 +451,9 @@ static void rfx_dwt_2d_encode_sse2(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RE } #endif -void rfx_init_sse2(RFX_CONTEXT* context) +void rfx_init_sse2_int(RFX_CONTEXT* WINPR_RESTRICT context) { #if defined(SSE_AVX_INTRINSICS_ENABLED) - if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) || - !IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) - return; - PROFILER_RENAME(context->priv->prof_rfx_quantization_decode, "rfx_quantization_decode_sse2") PROFILER_RENAME(context->priv->prof_rfx_quantization_encode, "rfx_quantization_encode_sse2") PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode_sse2") diff --git a/libfreerdp/codec/sse/rfx_sse2.h b/libfreerdp/codec/sse/rfx_sse2.h index d15c1854d..d852ca5bd 100644 --- a/libfreerdp/codec/sse/rfx_sse2.h +++ b/libfreerdp/codec/sse/rfx_sse2.h @@ -20,9 +20,20 @@ #ifndef FREERDP_LIB_CODEC_RFX_SSE2_H #define FREERDP_LIB_CODEC_RFX_SSE2_H +#include + #include #include -FREERDP_LOCAL void rfx_init_sse2(RFX_CONTEXT* context); +FREERDP_LOCAL void rfx_init_sse2_int(RFX_CONTEXT* WINPR_RESTRICT context); + +static inline void rfx_init_sse2(RFX_CONTEXT* WINPR_RESTRICT context) +{ + if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) || + !IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) + return; + + rfx_init_sse2_int(context); +} #endif /* FREERDP_LIB_CODEC_RFX_SSE2_H */ diff --git a/libfreerdp/primitives/neon/prim_YCoCg_neon.c b/libfreerdp/primitives/neon/prim_YCoCg_neon.c index 1edc24ecc..1970f1500 100644 --- a/libfreerdp/primitives/neon/prim_YCoCg_neon.c +++ b/libfreerdp/primitives/neon/prim_YCoCg_neon.c @@ -154,17 +154,13 @@ static pstatus_t neon_YCoCgToRGB_8u_AC4R(const BYTE* WINPR_RESTRICT pSrc, INT32 #endif /* ------------------------------------------------------------------------- */ -void primitives_init_YCoCg_neon(primitives_t* WINPR_RESTRICT prims) +void primitives_init_YCoCg_neon_int(primitives_t* WINPR_RESTRICT prims) { #if defined(NEON_INTRINSICS_ENABLED) generic = primitives_get_generic(); - primitives_init_YCoCg(prims); - if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) - { - WLog_VRB(PRIM_TAG, "NEON optimizations"); - prims->YCoCgToRGB_8u_AC4R = neon_YCoCgToRGB_8u_AC4R; - } + WLog_VRB(PRIM_TAG, "NEON optimizations"); + prims->YCoCgToRGB_8u_AC4R = neon_YCoCgToRGB_8u_AC4R; #else WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or neon intrinsics not available"); WINPR_UNUSED(prims); diff --git a/libfreerdp/primitives/neon/prim_YUV_neon.c b/libfreerdp/primitives/neon/prim_YUV_neon.c index 1ef3d6f92..53ce9c5be 100644 --- a/libfreerdp/primitives/neon/prim_YUV_neon.c +++ b/libfreerdp/primitives/neon/prim_YUV_neon.c @@ -776,19 +776,14 @@ static pstatus_t neon_YUV420CombineToYUV444(avc444_frame_type type, } #endif -void primitives_init_YUV_neon(primitives_t* WINPR_RESTRICT prims) +void primitives_init_YUV_neon_int(primitives_t* WINPR_RESTRICT prims) { #if defined(NEON_INTRINSICS_ENABLED) generic = primitives_get_generic(); - primitives_init_YUV(prims); - - if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) - { - WLog_VRB(PRIM_TAG, "NEON optimizations"); - prims->YUV420ToRGB_8u_P3AC4R = neon_YUV420ToRGB_8u_P3AC4R; - prims->YUV444ToRGB_8u_P3AC4R = neon_YUV444ToRGB_8u_P3AC4R; - prims->YUV420CombineToYUV444 = neon_YUV420CombineToYUV444; - } + WLog_VRB(PRIM_TAG, "NEON optimizations"); + prims->YUV420ToRGB_8u_P3AC4R = neon_YUV420ToRGB_8u_P3AC4R; + prims->YUV444ToRGB_8u_P3AC4R = neon_YUV444ToRGB_8u_P3AC4R; + prims->YUV420CombineToYUV444 = neon_YUV420CombineToYUV444; #else WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or neon intrinsics not available"); WINPR_UNUSED(prims); diff --git a/libfreerdp/primitives/neon/prim_colors_neon.c b/libfreerdp/primitives/neon/prim_colors_neon.c index 6f2715f13..f471a1955 100644 --- a/libfreerdp/primitives/neon/prim_colors_neon.c +++ b/libfreerdp/primitives/neon/prim_colors_neon.c @@ -343,19 +343,15 @@ neon_RGBToRGB_16s8u_P3AC4R(const INT16* WINPR_RESTRICT pSrc[3], /* 16-bit R,G, a #endif /* NEON_INTRINSICS_ENABLED */ /* ------------------------------------------------------------------------- */ -void primitives_init_colors_neon(primitives_t* prims) +void primitives_init_colors_neon_int(primitives_t* WINPR_RESTRICT prims) { #if defined(NEON_INTRINSICS_ENABLED) generic = primitives_get_generic(); - primitives_init_colors(prims); - if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) - { - WLog_VRB(PRIM_TAG, "NEON optimizations"); - prims->RGBToRGB_16s8u_P3AC4R = neon_RGBToRGB_16s8u_P3AC4R; - prims->yCbCrToRGB_16s8u_P3AC4R = neon_yCbCrToRGB_16s8u_P3AC4R; - prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3; - } + WLog_VRB(PRIM_TAG, "NEON optimizations"); + prims->RGBToRGB_16s8u_P3AC4R = neon_RGBToRGB_16s8u_P3AC4R; + prims->yCbCrToRGB_16s8u_P3AC4R = neon_yCbCrToRGB_16s8u_P3AC4R; + prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3; #else WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or neon intrinsics not available"); WINPR_UNUSED(prims); diff --git a/libfreerdp/primitives/prim_YCoCg.h b/libfreerdp/primitives/prim_YCoCg.h index bd878d5b6..d411a95bb 100644 --- a/libfreerdp/primitives/prim_YCoCg.h +++ b/libfreerdp/primitives/prim_YCoCg.h @@ -22,10 +22,34 @@ #define FREERDP_LIB_PRIM_YCoCg_H #include +#include + #include #include -void primitives_init_YCoCg_ssse3(primitives_t* WINPR_RESTRICT prims); -void primitives_init_YCoCg_neon(primitives_t* WINPR_RESTRICT prims); +#include "prim_internal.h" + +FREERDP_LOCAL void primitives_init_YCoCg_ssse3_int(primitives_t* WINPR_RESTRICT prims); +static inline void primitives_init_YCoCg_ssse3(primitives_t* WINPR_RESTRICT prims) +{ + primitives_init_YCoCg(prims); + + if (!IsProcessorFeaturePresentEx(PF_EX_SSSE3) || + !IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) || + !IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) + return; + + primitives_init_YCoCg_ssse3_int(prims); +} + +FREERDP_LOCAL void primitives_init_YCoCg_neon_int(primitives_t* WINPR_RESTRICT prims); +static inline void primitives_init_YCoCg_neon(primitives_t* WINPR_RESTRICT prims) +{ + primitives_init_YCoCg(prims); + + if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) + return; + primitives_init_YCoCg_neon_int(prims); +} #endif diff --git a/libfreerdp/primitives/prim_YUV.h b/libfreerdp/primitives/prim_YUV.h index b441c3d53..de7d0fe42 100644 --- a/libfreerdp/primitives/prim_YUV.h +++ b/libfreerdp/primitives/prim_YUV.h @@ -22,10 +22,34 @@ #define FREERDP_LIB_PRIM_YUV_H #include +#include + #include #include -void primitives_init_YUV_sse41(primitives_t* WINPR_RESTRICT prims); -void primitives_init_YUV_neon(primitives_t* WINPR_RESTRICT prims); +#include "prim_internal.h" + +FREERDP_LOCAL void primitives_init_YUV_sse41_int(primitives_t* WINPR_RESTRICT prims); +static inline void primitives_init_YUV_sse41(primitives_t* WINPR_RESTRICT prims) +{ + primitives_init_YUV(prims); + + if (!IsProcessorFeaturePresentEx(PF_EX_SSE41) || + !IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE)) + return; + + primitives_init_YUV_sse41_int(prims); +} + +FREERDP_LOCAL void primitives_init_YUV_neon_int(primitives_t* WINPR_RESTRICT prims); +static inline void primitives_init_YUV_neon(primitives_t* WINPR_RESTRICT prims) +{ + primitives_init_YUV(prims); + + if (!IsProcessorFeaturePresentEx(PF_EX_SSE41)) + return; + + primitives_init_YUV_neon_int(prims); +} #endif diff --git a/libfreerdp/primitives/prim_add.h b/libfreerdp/primitives/prim_add.h index 845759380..9949e1821 100644 --- a/libfreerdp/primitives/prim_add.h +++ b/libfreerdp/primitives/prim_add.h @@ -22,9 +22,23 @@ #define FREERDP_LIB_PRIM_ADD_H #include +#include + #include #include -void primitives_init_add_sse3(primitives_t* WINPR_RESTRICT prims); +#include "prim_internal.h" + +FREERDP_LOCAL void primitives_init_add_sse3_int(primitives_t* WINPR_RESTRICT prims); +static inline void primitives_init_add_sse3(primitives_t* WINPR_RESTRICT prims) +{ + primitives_init_add(prims); + + if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) || + !IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */ + return; + + primitives_init_add_sse3_int(prims); +} #endif diff --git a/libfreerdp/primitives/prim_alphaComp.h b/libfreerdp/primitives/prim_alphaComp.h index b49460f73..94d6452e9 100644 --- a/libfreerdp/primitives/prim_alphaComp.h +++ b/libfreerdp/primitives/prim_alphaComp.h @@ -22,9 +22,23 @@ #define FREERDP_LIB_PRIM_ALPHA_COMP_H #include +#include + #include #include -void primitives_init_alphaComp_sse3(primitives_t* WINPR_RESTRICT prims); +#include "prim_internal.h" + +FREERDP_LOCAL void primitives_init_alphaComp_sse3_int(primitives_t* WINPR_RESTRICT prims); +static inline void primitives_init_alphaComp_sse3(primitives_t* WINPR_RESTRICT prims) +{ + primitives_init_alphaComp(prims); + + if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) || + !IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */ + return; + + primitives_init_alphaComp_sse3_int(prims); +} #endif diff --git a/libfreerdp/primitives/prim_andor.h b/libfreerdp/primitives/prim_andor.h index c82305b00..324a6a660 100644 --- a/libfreerdp/primitives/prim_andor.h +++ b/libfreerdp/primitives/prim_andor.h @@ -22,9 +22,23 @@ #define FREERDP_LIB_PRIM_ANDOR_H #include +#include + #include #include -void primitives_init_andor_sse3(primitives_t* WINPR_RESTRICT prims); +#include "prim_internal.h" + +FREERDP_LOCAL void primitives_init_andor_sse3_int(primitives_t* WINPR_RESTRICT prims); +static inline void primitives_init_andor_sse3(primitives_t* WINPR_RESTRICT prims) +{ + primitives_init_andor(prims); + + if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) || + !IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) + return; + + primitives_init_andor_sse3_int(prims); +} #endif diff --git a/libfreerdp/primitives/prim_colors.h b/libfreerdp/primitives/prim_colors.h index 65bbd43cf..a433a0ad3 100644 --- a/libfreerdp/primitives/prim_colors.h +++ b/libfreerdp/primitives/prim_colors.h @@ -22,10 +22,33 @@ #define FREERDP_LIB_PRIM_COLORS_H #include +#include + #include #include -void primitives_init_colors_sse2(primitives_t* prims); -void primitives_init_colors_neon(primitives_t* prims); +#include "prim_internal.h" + +FREERDP_LOCAL void primitives_init_colors_sse2_int(primitives_t* WINPR_RESTRICT prims); +static inline void primitives_init_colors_sse2(primitives_t* WINPR_RESTRICT prims) +{ + primitives_init_colors(prims); + + if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) || + !IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) + return; + + primitives_init_colors_sse2_int(prims); +} + +FREERDP_LOCAL void primitives_init_colors_neon_int(primitives_t* WINPR_RESTRICT prims); +static inline void primitives_init_colors_neon(primitives_t* WINPR_RESTRICT prims) +{ + primitives_init_colors(prims); + if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) + return; + + primitives_init_colors_neon_int(prims); +} #endif diff --git a/libfreerdp/primitives/prim_copy.h b/libfreerdp/primitives/prim_copy.h index 8275adc9d..f33a75102 100644 --- a/libfreerdp/primitives/prim_copy.h +++ b/libfreerdp/primitives/prim_copy.h @@ -22,6 +22,8 @@ #define FREERDP_LIB_PRIM_COPY_H #include +#include + #include #include @@ -38,10 +40,24 @@ pstatus_t generic_image_copy_no_overlap_memcpy( SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset, UINT32 flags); -void primitives_init_copy_sse41(primitives_t* prims); +FREERDP_LOCAL void primitives_init_copy_sse41_int(primitives_t* WINPR_RESTRICT prims); +static inline void primitives_init_copy_sse41(primitives_t* WINPR_RESTRICT prims) +{ + if (!IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE)) + return; + + primitives_init_copy_sse41_int(prims); +} #if defined(WITH_AVX2) -void primitives_init_copy_avx2(primitives_t* prims); +FREERDP_LOCAL void primitives_init_copy_avx2_int(primitives_t* WINPR_RESTRICT prims); +static inline void primitives_init_copy_avx2(primitives_t* WINPR_RESTRICT prims) +{ + if (!IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE)) + return; + + primitives_init_copy_avx2_int(prims); +} #endif #endif diff --git a/libfreerdp/primitives/prim_set.h b/libfreerdp/primitives/prim_set.h index 779d042cc..5d0aa5cc2 100644 --- a/libfreerdp/primitives/prim_set.h +++ b/libfreerdp/primitives/prim_set.h @@ -22,9 +22,22 @@ #define FREERDP_LIB_PRIM_SET_H #include +#include + #include #include -void primitives_init_set_sse2(primitives_t* WINPR_RESTRICT prims); +#include "prim_internal.h" + +FREERDP_LOCAL void primitives_init_set_sse2_int(primitives_t* WINPR_RESTRICT prims); +static inline void primitives_init_set_sse2(primitives_t* WINPR_RESTRICT prims) +{ + primitives_init_set(prims); + if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) || + !IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) + return; + + primitives_init_set_sse2_int(prims); +} #endif diff --git a/libfreerdp/primitives/prim_shift.h b/libfreerdp/primitives/prim_shift.h index ac594ea3c..91fe8fa19 100644 --- a/libfreerdp/primitives/prim_shift.h +++ b/libfreerdp/primitives/prim_shift.h @@ -22,9 +22,22 @@ #define FREERDP_LIB_PRIM_SHIFT_H #include +#include + #include #include +#include "prim_internal.h" -extern void primitives_init_shift_sse3(primitives_t* WINPR_RESTRICT prims); +FREERDP_LOCAL void primitives_init_shift_sse3_int(primitives_t* WINPR_RESTRICT prims); +static inline void primitives_init_shift_sse3(primitives_t* WINPR_RESTRICT prims) +{ + primitives_init_shift(prims); + + if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) || + !IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) + return; + + primitives_init_shift_sse3_int(prims); +} #endif diff --git a/libfreerdp/primitives/prim_sign.h b/libfreerdp/primitives/prim_sign.h index 092bb8d38..f1017b04c 100644 --- a/libfreerdp/primitives/prim_sign.h +++ b/libfreerdp/primitives/prim_sign.h @@ -22,9 +22,22 @@ #define FREERDP_LIB_PRIM_SIGN_H #include +#include + #include #include -void primitives_init_sign_ssse3(primitives_t* WINPR_RESTRICT prims); +#include "prim_internal.h" + +FREERDP_LOCAL void primitives_init_sign_ssse3_int(primitives_t* WINPR_RESTRICT prims); +static inline void primitives_init_sign_ssse3(primitives_t* WINPR_RESTRICT prims) +{ + primitives_init_sign(prims); + if (!IsProcessorFeaturePresentEx(PF_EX_SSSE3) || + !IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) + return; + + primitives_init_sign_ssse3_int(prims); +} #endif diff --git a/libfreerdp/primitives/sse/prim_YCoCg_ssse3.c b/libfreerdp/primitives/sse/prim_YCoCg_ssse3.c index f9bf13cfa..c1faaf223 100644 --- a/libfreerdp/primitives/sse/prim_YCoCg_ssse3.c +++ b/libfreerdp/primitives/sse/prim_YCoCg_ssse3.c @@ -411,19 +411,13 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R(const BYTE* WINPR_RESTRICT pSrc, INT3 #endif /* ------------------------------------------------------------------------- */ -void primitives_init_YCoCg_ssse3(primitives_t* WINPR_RESTRICT prims) +void primitives_init_YCoCg_ssse3_int(primitives_t* WINPR_RESTRICT prims) { #if defined(SSE_AVX_INTRINSICS_ENABLED) generic = primitives_get_generic(); - primitives_init_YCoCg(prims); - if (IsProcessorFeaturePresentEx(PF_EX_SSSE3) && - IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) && - IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) - { - WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations"); - prims->YCoCgToRGB_8u_AC4R = ssse3_YCoCgRToRGB_8u_AC4R; - } + WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations"); + prims->YCoCgToRGB_8u_AC4R = ssse3_YCoCgRToRGB_8u_AC4R; #else WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE2 intrinsics not available"); WINPR_UNUSED(prims); diff --git a/libfreerdp/primitives/sse/prim_YUV_sse4.1.c b/libfreerdp/primitives/sse/prim_YUV_sse4.1.c index db5b5251a..e0e77cd7c 100644 --- a/libfreerdp/primitives/sse/prim_YUV_sse4.1.c +++ b/libfreerdp/primitives/sse/prim_YUV_sse4.1.c @@ -1749,23 +1749,18 @@ static pstatus_t sse41_YUV420CombineToYUV444(avc444_frame_type type, } #endif -void primitives_init_YUV_sse41(primitives_t* WINPR_RESTRICT prims) +void primitives_init_YUV_sse41_int(primitives_t* WINPR_RESTRICT prims) { #if defined(SSE_AVX_INTRINSICS_ENABLED) generic = primitives_get_generic(); - primitives_init_YUV(prims); - if (IsProcessorFeaturePresentEx(PF_EX_SSE41) && - IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE)) - { - WLog_VRB(PRIM_TAG, "SSE3/sse41 optimizations"); - prims->RGBToYUV420_8u_P3AC4R = sse41_RGBToYUV420; - prims->RGBToAVC444YUV = sse41_RGBToAVC444YUV; - prims->RGBToAVC444YUVv2 = sse41_RGBToAVC444YUVv2; - prims->YUV420ToRGB_8u_P3AC4R = sse41_YUV420ToRGB; - prims->YUV444ToRGB_8u_P3AC4R = sse41_YUV444ToRGB_8u_P3AC4R; - prims->YUV420CombineToYUV444 = sse41_YUV420CombineToYUV444; - } + WLog_VRB(PRIM_TAG, "SSE3/sse41 optimizations"); + prims->RGBToYUV420_8u_P3AC4R = sse41_RGBToYUV420; + prims->RGBToAVC444YUV = sse41_RGBToAVC444YUV; + prims->RGBToAVC444YUVv2 = sse41_RGBToAVC444YUVv2; + prims->YUV420ToRGB_8u_P3AC4R = sse41_YUV420ToRGB; + prims->YUV444ToRGB_8u_P3AC4R = sse41_YUV444ToRGB_8u_P3AC4R; + prims->YUV420CombineToYUV444 = sse41_YUV420CombineToYUV444; #else WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or sse41 intrinsics not available"); WINPR_UNUSED(prims); diff --git a/libfreerdp/primitives/sse/prim_add_sse3.c b/libfreerdp/primitives/sse/prim_add_sse3.c index 67908bc2b..7d2ebc689 100644 --- a/libfreerdp/primitives/sse/prim_add_sse3.c +++ b/libfreerdp/primitives/sse/prim_add_sse3.c @@ -171,20 +171,14 @@ static pstatus_t sse3_add_16s_inplace(INT16* WINPR_RESTRICT pSrcDst1, #endif /* ------------------------------------------------------------------------- */ -void primitives_init_add_sse3(primitives_t* WINPR_RESTRICT prims) +void primitives_init_add_sse3_int(primitives_t* WINPR_RESTRICT prims) { #if defined(SSE_AVX_INTRINSICS_ENABLED) generic = primitives_get_generic(); - primitives_init_add(prims); - - if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) && - IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */ - { - WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations"); - prims->add_16s = sse3_add_16s; - prims->add_16s_inplace = sse3_add_16s_inplace; - } + WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations"); + prims->add_16s = sse3_add_16s; + prims->add_16s_inplace = sse3_add_16s_inplace; #else WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE3 intrinsics not available"); WINPR_UNUSED(prims); diff --git a/libfreerdp/primitives/sse/prim_alphaComp_sse3.c b/libfreerdp/primitives/sse/prim_alphaComp_sse3.c index f48e66ec3..84f3e15a4 100644 --- a/libfreerdp/primitives/sse/prim_alphaComp_sse3.c +++ b/libfreerdp/primitives/sse/prim_alphaComp_sse3.c @@ -201,18 +201,12 @@ static pstatus_t sse2_alphaComp_argb(const BYTE* WINPR_RESTRICT pSrc1, UINT32 sr #endif /* ------------------------------------------------------------------------- */ -void primitives_init_alphaComp_sse3(primitives_t* WINPR_RESTRICT prims) +void primitives_init_alphaComp_sse3_int(primitives_t* WINPR_RESTRICT prims) { #if defined(SSE_AVX_INTRINSICS_ENABLED) generic = primitives_get_generic(); - primitives_init_alphaComp(prims); - - if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) && - IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */ - { - WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations"); - prims->alphaComp_argb = sse2_alphaComp_argb; - } + WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations"); + prims->alphaComp_argb = sse2_alphaComp_argb; #else WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE3 intrinsics not available"); diff --git a/libfreerdp/primitives/sse/prim_andor_sse3.c b/libfreerdp/primitives/sse/prim_andor_sse3.c index 04b485167..d1c66d38f 100644 --- a/libfreerdp/primitives/sse/prim_andor_sse3.c +++ b/libfreerdp/primitives/sse/prim_andor_sse3.c @@ -38,19 +38,14 @@ SSE3_SCD_PRE_ROUTINE(sse3_orC_32u, UINT32, generic->orC_32u, _mm_or_si128, *dptr #endif /* ------------------------------------------------------------------------- */ -void primitives_init_andor_sse3(primitives_t* WINPR_RESTRICT prims) +void primitives_init_andor_sse3_int(primitives_t* WINPR_RESTRICT prims) { #if defined(SSE_AVX_INTRINSICS_ENABLED) generic = primitives_get_generic(); - primitives_init_andor(prims); - if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) && - IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) - { - WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations"); - prims->andC_32u = sse3_andC_32u; - prims->orC_32u = sse3_orC_32u; - } + WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations"); + prims->andC_32u = sse3_andC_32u; + prims->orC_32u = sse3_orC_32u; #else WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE3 intrinsics not available"); diff --git a/libfreerdp/primitives/sse/prim_colors_sse2.c b/libfreerdp/primitives/sse/prim_colors_sse2.c index 38520e020..9ffc1ebac 100644 --- a/libfreerdp/primitives/sse/prim_colors_sse2.c +++ b/libfreerdp/primitives/sse/prim_colors_sse2.c @@ -1211,21 +1211,16 @@ sse2_RGBToRGB_16s8u_P3AC4R(const INT16* WINPR_RESTRICT pSrc[3], /* 16-bit R,G, a } #endif -void primitives_init_colors_sse2(primitives_t* prims) +void primitives_init_colors_sse2_int(primitives_t* WINPR_RESTRICT prims) { #if defined(SSE_AVX_INTRINSICS_ENABLED) generic = primitives_get_generic(); - primitives_init_colors(prims); - if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) && - IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) - { - WLog_VRB(PRIM_TAG, "SSE2 optimizations"); - prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R; - prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3; - prims->yCbCrToRGB_16s8u_P3AC4R = sse2_yCbCrToRGB_16s8u_P3AC4R; - prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3; - } + WLog_VRB(PRIM_TAG, "SSE2 optimizations"); + prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R; + prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3; + prims->yCbCrToRGB_16s8u_P3AC4R = sse2_yCbCrToRGB_16s8u_P3AC4R; + prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3; #else WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE2 intrinsics not available"); diff --git a/libfreerdp/primitives/sse/prim_copy_avx2.c b/libfreerdp/primitives/sse/prim_copy_avx2.c index 0baed39db..c88ad9fe9 100644 --- a/libfreerdp/primitives/sse/prim_copy_avx2.c +++ b/libfreerdp/primitives/sse/prim_copy_avx2.c @@ -273,14 +273,11 @@ static pstatus_t avx2_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD #endif /* ------------------------------------------------------------------------- */ -void primitives_init_copy_avx2(primitives_t* prims) +void primitives_init_copy_avx2_int(primitives_t* WINPR_RESTRICT prims) { #if defined(SSE_AVX_INTRINSICS_ENABLED) - if (IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE)) - { - WLog_VRB(PRIM_TAG, "AVX2 optimizations"); - prims->copy_no_overlap = avx2_image_copy_no_overlap; - } + WLog_VRB(PRIM_TAG, "AVX2 optimizations"); + prims->copy_no_overlap = avx2_image_copy_no_overlap; #else WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or WITH_AVX2 or AVX2 intrinsics not available"); WINPR_UNUSED(prims); diff --git a/libfreerdp/primitives/sse/prim_copy_sse4_1.c b/libfreerdp/primitives/sse/prim_copy_sse4_1.c index d7dad682a..d2acddd7a 100644 --- a/libfreerdp/primitives/sse/prim_copy_sse4_1.c +++ b/libfreerdp/primitives/sse/prim_copy_sse4_1.c @@ -251,14 +251,11 @@ static pstatus_t sse_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD #endif /* ------------------------------------------------------------------------- */ -void primitives_init_copy_sse41(primitives_t* prims) +void primitives_init_copy_sse41_int(primitives_t* WINPR_RESTRICT prims) { #if defined(SSE_AVX_INTRINSICS_ENABLED) - if (IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE)) - { - WLog_VRB(PRIM_TAG, "SSE4.1 optimizations"); - prims->copy_no_overlap = sse_image_copy_no_overlap; - } + WLog_VRB(PRIM_TAG, "SSE4.1 optimizations"); + prims->copy_no_overlap = sse_image_copy_no_overlap; #else WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE4.1 intrinsics not available"); WINPR_UNUSED(prims); diff --git a/libfreerdp/primitives/sse/prim_set_sse2.c b/libfreerdp/primitives/sse/prim_set_sse2.c index 45dc0f9ff..f7233856e 100644 --- a/libfreerdp/primitives/sse/prim_set_sse2.c +++ b/libfreerdp/primitives/sse/prim_set_sse2.c @@ -214,21 +214,17 @@ static pstatus_t sse2_set_32s(INT32 val, INT32* WINPR_RESTRICT pDst, UINT32 len) #endif /* ------------------------------------------------------------------------- */ -void primitives_init_set_sse2(primitives_t* WINPR_RESTRICT prims) +void primitives_init_set_sse2_int(primitives_t* WINPR_RESTRICT prims) { #if defined(SSE_AVX_INTRINSICS_ENABLED) generic = primitives_get_generic(); - primitives_init_set(prims); + /* Pick tuned versions if possible. */ - if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) && - IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) - { - WLog_VRB(PRIM_TAG, "SSE2 optimizations"); - prims->set_8u = sse2_set_8u; - prims->set_32s = sse2_set_32s; - prims->set_32u = sse2_set_32u; - } + WLog_VRB(PRIM_TAG, "SSE2 optimizations"); + prims->set_8u = sse2_set_8u; + prims->set_32s = sse2_set_32s; + prims->set_32u = sse2_set_32u; #else WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE2 intrinsics not available"); diff --git a/libfreerdp/primitives/sse/prim_shift_sse3.c b/libfreerdp/primitives/sse/prim_shift_sse3.c index 7709dba0e..4f9a73a2e 100644 --- a/libfreerdp/primitives/sse/prim_shift_sse3.c +++ b/libfreerdp/primitives/sse/prim_shift_sse3.c @@ -140,22 +140,17 @@ static pstatus_t sse2_lShiftC_16s_inplace(INT16* WINPR_RESTRICT pSrcDst, UINT32 */ /* ------------------------------------------------------------------------- */ -void primitives_init_shift_sse3(primitives_t* WINPR_RESTRICT prims) +void primitives_init_shift_sse3_int(primitives_t* WINPR_RESTRICT prims) { #if defined(SSE_AVX_INTRINSICS_ENABLED) generic = primitives_get_generic(); - primitives_init_shift(prims); - if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) && - IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) - { - WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations"); - prims->lShiftC_16s_inplace = sse2_lShiftC_16s_inplace; - prims->lShiftC_16s = sse2_lShiftC_16s; - prims->rShiftC_16s = sse2_rShiftC_16s; - prims->lShiftC_16u = sse2_lShiftC_16u; - prims->rShiftC_16u = sse2_rShiftC_16u; - } + WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations"); + prims->lShiftC_16s_inplace = sse2_lShiftC_16s_inplace; + prims->lShiftC_16s = sse2_lShiftC_16s; + prims->rShiftC_16s = sse2_rShiftC_16s; + prims->lShiftC_16u = sse2_lShiftC_16u; + prims->rShiftC_16u = sse2_rShiftC_16u; #else WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE3 intrinsics not available"); diff --git a/libfreerdp/primitives/sse/prim_sign_ssse3.c b/libfreerdp/primitives/sse/prim_sign_ssse3.c index 8bd41d045..57646f290 100644 --- a/libfreerdp/primitives/sse/prim_sign_ssse3.c +++ b/libfreerdp/primitives/sse/prim_sign_ssse3.c @@ -169,20 +169,16 @@ static pstatus_t ssse3_sign_16s(const INT16* WINPR_RESTRICT pSrc, INT16* WINPR_R #endif /* SSE_AVX_INTRINSICS_ENABLED */ /* ------------------------------------------------------------------------- */ -void primitives_init_sign_ssse3(primitives_t* WINPR_RESTRICT prims) +void primitives_init_sign_ssse3_int(primitives_t* WINPR_RESTRICT prims) { #if defined(SSE_AVX_INTRINSICS_ENABLED) generic = primitives_get_generic(); - primitives_init_sign(prims); + /* Pick tuned versions if possible. */ /* I didn't spot an IPP version of this. */ - if (IsProcessorFeaturePresentEx(PF_EX_SSSE3) && - IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) - { - WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations"); - prims->sign_16s = ssse3_sign_16s; - } + WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations"); + prims->sign_16s = ssse3_sign_16s; #else WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSSE3/SSE3 intrinsics not available");