[simd] move CPU feature detection

Use static inline functions in header to do CPU feature detection.
The c files are already compiled/linked with SIMD support and might have
used instructions from that featureset already.
This commit is contained in:
akallabeth
2025-02-11 20:22:14 +01:00
committed by Armin Novak
parent fd13e9b919
commit 22efdfbc8f
32 changed files with 300 additions and 173 deletions

View File

@@ -32,12 +32,9 @@
#define TAG FREERDP_TAG("codec.nsc.neon")
#endif
void nsc_init_neon(NSC_CONTEXT* context)
void nsc_init_neon_int(NSC_CONTEXT* WINPR_RESTRICT context)
{
#if defined(NEON_INTRINSICS_ENABLED)
if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
return;
WLog_WARN(TAG, "TODO: Implement neon optimized version of this function");
#endif
}

View File

@@ -20,9 +20,18 @@
#ifndef FREERDP_LIB_CODEC_NSC_NEON_H
#define FREERDP_LIB_CODEC_NSC_NEON_H
#include <winpr/sysinfo.h>
#include <freerdp/codec/nsc.h>
#include <freerdp/api.h>
FREERDP_LOCAL void nsc_init_neon(NSC_CONTEXT* context);
FREERDP_LOCAL void nsc_init_neon_int(NSC_CONTEXT* WINPR_RESTRICT context);
static inline void nsc_init_neon(NSC_CONTEXT* WINPR_RESTRICT context)
{
if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
return;
nsc_init_neon_int(context);
}
#endif /* FREERDP_LIB_CODEC_NSC_NEON_H */

View File

@@ -523,20 +523,16 @@ static void rfx_dwt_2d_extrapolate_decode_neon(INT16* buffer, INT16* temp)
}
#endif // NEON_INTRINSICS_ENABLED
void rfx_init_neon(RFX_CONTEXT* context)
void rfx_init_neon_int(RFX_CONTEXT* WINPR_RESTRICT context)
{
#if defined(NEON_INTRINSICS_ENABLED)
if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
{
DEBUG_RFX("Using NEON optimizations");
PROFILER_RENAME(context->priv->prof_rfx_ycbcr_to_rgb, "rfx_decode_YCbCr_to_RGB_NEON");
PROFILER_RENAME(context->priv->prof_rfx_quantization_decode,
"rfx_quantization_decode_NEON");
PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode_NEON");
context->quantization_decode = rfx_quantization_decode_NEON;
context->dwt_2d_decode = rfx_dwt_2d_decode_NEON;
context->dwt_2d_extrapolate_decode = rfx_dwt_2d_extrapolate_decode_neon;
}
DEBUG_RFX("Using NEON optimizations");
PROFILER_RENAME(context->priv->prof_rfx_ycbcr_to_rgb, "rfx_decode_YCbCr_to_RGB_NEON");
PROFILER_RENAME(context->priv->prof_rfx_quantization_decode, "rfx_quantization_decode_NEON");
PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode_NEON");
context->quantization_decode = rfx_quantization_decode_NEON;
context->dwt_2d_decode = rfx_dwt_2d_decode_NEON;
context->dwt_2d_extrapolate_decode = rfx_dwt_2d_extrapolate_decode_neon;
#else
WINPR_UNUSED(context);
#endif

View File

@@ -20,9 +20,18 @@
#ifndef FREERDP_LIB_CODEC_RFX_NEON_H
#define FREERDP_LIB_CODEC_RFX_NEON_H
#include <winpr/sysinfo.h>
#include <freerdp/codec/rfx.h>
#include <freerdp/api.h>
FREERDP_LOCAL void rfx_init_neon(RFX_CONTEXT* context);
FREERDP_LOCAL void rfx_init_neon_int(RFX_CONTEXT* WINPR_RESTRICT context);
static inline void rfx_init_neon(RFX_CONTEXT* WINPR_RESTRICT context)
{
if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
return;
rfx_init_neon_int(context);
}
#endif /* FREERDP_LIB_CODEC_RFX_NEON_H */

View File

@@ -389,13 +389,9 @@ static BOOL nsc_encode_sse2(NSC_CONTEXT* context, const BYTE* data, UINT32 scanl
}
#endif
void nsc_init_sse2(NSC_CONTEXT* context)
void nsc_init_sse2_int(NSC_CONTEXT* WINPR_RESTRICT context)
{
#if defined(SSE_AVX_INTRINSICS_ENABLED)
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
return;
PROFILER_RENAME(context->priv->prof_nsc_encode, "nsc_encode_sse2")
context->encode = nsc_encode_sse2;
#else

View File

@@ -20,9 +20,19 @@
#ifndef FREERDP_LIB_CODEC_NSC_SSE2_H
#define FREERDP_LIB_CODEC_NSC_SSE2_H
#include <winpr/sysinfo.h>
#include <freerdp/codec/nsc.h>
#include <freerdp/api.h>
FREERDP_LOCAL void nsc_init_sse2(NSC_CONTEXT* context);
FREERDP_LOCAL void nsc_init_sse2_int(NSC_CONTEXT* WINPR_RESTRICT context);
static inline void nsc_init_sse2(NSC_CONTEXT* WINPR_RESTRICT context)
{
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
return;
nsc_init_sse2_int(context);
}
#endif /* FREERDP_LIB_CODEC_NSC_SSE2_H */

View File

@@ -451,13 +451,9 @@ static void rfx_dwt_2d_encode_sse2(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RE
}
#endif
void rfx_init_sse2(RFX_CONTEXT* context)
void rfx_init_sse2_int(RFX_CONTEXT* WINPR_RESTRICT context)
{
#if defined(SSE_AVX_INTRINSICS_ENABLED)
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
return;
PROFILER_RENAME(context->priv->prof_rfx_quantization_decode, "rfx_quantization_decode_sse2")
PROFILER_RENAME(context->priv->prof_rfx_quantization_encode, "rfx_quantization_encode_sse2")
PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode_sse2")

View File

@@ -20,9 +20,20 @@
#ifndef FREERDP_LIB_CODEC_RFX_SSE2_H
#define FREERDP_LIB_CODEC_RFX_SSE2_H
#include <winpr/sysinfo.h>
#include <freerdp/codec/rfx.h>
#include <freerdp/api.h>
FREERDP_LOCAL void rfx_init_sse2(RFX_CONTEXT* context);
FREERDP_LOCAL void rfx_init_sse2_int(RFX_CONTEXT* WINPR_RESTRICT context);
static inline void rfx_init_sse2(RFX_CONTEXT* WINPR_RESTRICT context)
{
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
return;
rfx_init_sse2_int(context);
}
#endif /* FREERDP_LIB_CODEC_RFX_SSE2_H */

View File

@@ -154,17 +154,13 @@ static pstatus_t neon_YCoCgToRGB_8u_AC4R(const BYTE* WINPR_RESTRICT pSrc, INT32
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_YCoCg_neon(primitives_t* WINPR_RESTRICT prims)
void primitives_init_YCoCg_neon_int(primitives_t* WINPR_RESTRICT prims)
{
#if defined(NEON_INTRINSICS_ENABLED)
generic = primitives_get_generic();
primitives_init_YCoCg(prims);
if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "NEON optimizations");
prims->YCoCgToRGB_8u_AC4R = neon_YCoCgToRGB_8u_AC4R;
}
WLog_VRB(PRIM_TAG, "NEON optimizations");
prims->YCoCgToRGB_8u_AC4R = neon_YCoCgToRGB_8u_AC4R;
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or neon intrinsics not available");
WINPR_UNUSED(prims);

View File

@@ -776,19 +776,14 @@ static pstatus_t neon_YUV420CombineToYUV444(avc444_frame_type type,
}
#endif
void primitives_init_YUV_neon(primitives_t* WINPR_RESTRICT prims)
void primitives_init_YUV_neon_int(primitives_t* WINPR_RESTRICT prims)
{
#if defined(NEON_INTRINSICS_ENABLED)
generic = primitives_get_generic();
primitives_init_YUV(prims);
if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "NEON optimizations");
prims->YUV420ToRGB_8u_P3AC4R = neon_YUV420ToRGB_8u_P3AC4R;
prims->YUV444ToRGB_8u_P3AC4R = neon_YUV444ToRGB_8u_P3AC4R;
prims->YUV420CombineToYUV444 = neon_YUV420CombineToYUV444;
}
WLog_VRB(PRIM_TAG, "NEON optimizations");
prims->YUV420ToRGB_8u_P3AC4R = neon_YUV420ToRGB_8u_P3AC4R;
prims->YUV444ToRGB_8u_P3AC4R = neon_YUV444ToRGB_8u_P3AC4R;
prims->YUV420CombineToYUV444 = neon_YUV420CombineToYUV444;
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or neon intrinsics not available");
WINPR_UNUSED(prims);

View File

@@ -343,19 +343,15 @@ neon_RGBToRGB_16s8u_P3AC4R(const INT16* WINPR_RESTRICT pSrc[3], /* 16-bit R,G, a
#endif /* NEON_INTRINSICS_ENABLED */
/* ------------------------------------------------------------------------- */
void primitives_init_colors_neon(primitives_t* prims)
void primitives_init_colors_neon_int(primitives_t* WINPR_RESTRICT prims)
{
#if defined(NEON_INTRINSICS_ENABLED)
generic = primitives_get_generic();
primitives_init_colors(prims);
if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "NEON optimizations");
prims->RGBToRGB_16s8u_P3AC4R = neon_RGBToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s8u_P3AC4R = neon_yCbCrToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3;
}
WLog_VRB(PRIM_TAG, "NEON optimizations");
prims->RGBToRGB_16s8u_P3AC4R = neon_RGBToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s8u_P3AC4R = neon_yCbCrToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3;
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or neon intrinsics not available");
WINPR_UNUSED(prims);

View File

@@ -22,10 +22,34 @@
#define FREERDP_LIB_PRIM_YCoCg_H
#include <winpr/wtypes.h>
#include <winpr/sysinfo.h>
#include <freerdp/config.h>
#include <freerdp/primitives.h>
void primitives_init_YCoCg_ssse3(primitives_t* WINPR_RESTRICT prims);
void primitives_init_YCoCg_neon(primitives_t* WINPR_RESTRICT prims);
#include "prim_internal.h"
FREERDP_LOCAL void primitives_init_YCoCg_ssse3_int(primitives_t* WINPR_RESTRICT prims);
static inline void primitives_init_YCoCg_ssse3(primitives_t* WINPR_RESTRICT prims)
{
primitives_init_YCoCg(prims);
if (!IsProcessorFeaturePresentEx(PF_EX_SSSE3) ||
!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
return;
primitives_init_YCoCg_ssse3_int(prims);
}
FREERDP_LOCAL void primitives_init_YCoCg_neon_int(primitives_t* WINPR_RESTRICT prims);
static inline void primitives_init_YCoCg_neon(primitives_t* WINPR_RESTRICT prims)
{
primitives_init_YCoCg(prims);
if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
return;
primitives_init_YCoCg_neon_int(prims);
}
#endif

View File

@@ -22,10 +22,34 @@
#define FREERDP_LIB_PRIM_YUV_H
#include <winpr/wtypes.h>
#include <winpr/sysinfo.h>
#include <freerdp/config.h>
#include <freerdp/primitives.h>
void primitives_init_YUV_sse41(primitives_t* WINPR_RESTRICT prims);
void primitives_init_YUV_neon(primitives_t* WINPR_RESTRICT prims);
#include "prim_internal.h"
FREERDP_LOCAL void primitives_init_YUV_sse41_int(primitives_t* WINPR_RESTRICT prims);
static inline void primitives_init_YUV_sse41(primitives_t* WINPR_RESTRICT prims)
{
primitives_init_YUV(prims);
if (!IsProcessorFeaturePresentEx(PF_EX_SSE41) ||
!IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE))
return;
primitives_init_YUV_sse41_int(prims);
}
FREERDP_LOCAL void primitives_init_YUV_neon_int(primitives_t* WINPR_RESTRICT prims);
static inline void primitives_init_YUV_neon(primitives_t* WINPR_RESTRICT prims)
{
primitives_init_YUV(prims);
if (!IsProcessorFeaturePresentEx(PF_EX_SSE41))
return;
primitives_init_YUV_neon_int(prims);
}
#endif

View File

@@ -22,9 +22,23 @@
#define FREERDP_LIB_PRIM_ADD_H
#include <winpr/wtypes.h>
#include <winpr/sysinfo.h>
#include <freerdp/config.h>
#include <freerdp/primitives.h>
void primitives_init_add_sse3(primitives_t* WINPR_RESTRICT prims);
#include "prim_internal.h"
FREERDP_LOCAL void primitives_init_add_sse3_int(primitives_t* WINPR_RESTRICT prims);
static inline void primitives_init_add_sse3(primitives_t* WINPR_RESTRICT prims)
{
primitives_init_add(prims);
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
return;
primitives_init_add_sse3_int(prims);
}
#endif

View File

@@ -22,9 +22,23 @@
#define FREERDP_LIB_PRIM_ALPHA_COMP_H
#include <winpr/wtypes.h>
#include <winpr/sysinfo.h>
#include <freerdp/config.h>
#include <freerdp/primitives.h>
void primitives_init_alphaComp_sse3(primitives_t* WINPR_RESTRICT prims);
#include "prim_internal.h"
FREERDP_LOCAL void primitives_init_alphaComp_sse3_int(primitives_t* WINPR_RESTRICT prims);
static inline void primitives_init_alphaComp_sse3(primitives_t* WINPR_RESTRICT prims)
{
primitives_init_alphaComp(prims);
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
return;
primitives_init_alphaComp_sse3_int(prims);
}
#endif

View File

@@ -22,9 +22,23 @@
#define FREERDP_LIB_PRIM_ANDOR_H
#include <winpr/wtypes.h>
#include <winpr/sysinfo.h>
#include <freerdp/config.h>
#include <freerdp/primitives.h>
void primitives_init_andor_sse3(primitives_t* WINPR_RESTRICT prims);
#include "prim_internal.h"
FREERDP_LOCAL void primitives_init_andor_sse3_int(primitives_t* WINPR_RESTRICT prims);
static inline void primitives_init_andor_sse3(primitives_t* WINPR_RESTRICT prims)
{
primitives_init_andor(prims);
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
return;
primitives_init_andor_sse3_int(prims);
}
#endif

View File

@@ -22,10 +22,33 @@
#define FREERDP_LIB_PRIM_COLORS_H
#include <winpr/wtypes.h>
#include <winpr/sysinfo.h>
#include <freerdp/config.h>
#include <freerdp/primitives.h>
void primitives_init_colors_sse2(primitives_t* prims);
void primitives_init_colors_neon(primitives_t* prims);
#include "prim_internal.h"
FREERDP_LOCAL void primitives_init_colors_sse2_int(primitives_t* WINPR_RESTRICT prims);
static inline void primitives_init_colors_sse2(primitives_t* WINPR_RESTRICT prims)
{
primitives_init_colors(prims);
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
return;
primitives_init_colors_sse2_int(prims);
}
FREERDP_LOCAL void primitives_init_colors_neon_int(primitives_t* WINPR_RESTRICT prims);
static inline void primitives_init_colors_neon(primitives_t* WINPR_RESTRICT prims)
{
primitives_init_colors(prims);
if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
return;
primitives_init_colors_neon_int(prims);
}
#endif

View File

@@ -22,6 +22,8 @@
#define FREERDP_LIB_PRIM_COPY_H
#include <winpr/wtypes.h>
#include <winpr/sysinfo.h>
#include <freerdp/config.h>
#include <freerdp/primitives.h>
@@ -38,10 +40,24 @@ pstatus_t generic_image_copy_no_overlap_memcpy(
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset,
UINT32 flags);
void primitives_init_copy_sse41(primitives_t* prims);
FREERDP_LOCAL void primitives_init_copy_sse41_int(primitives_t* WINPR_RESTRICT prims);
static inline void primitives_init_copy_sse41(primitives_t* WINPR_RESTRICT prims)
{
if (!IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE))
return;
primitives_init_copy_sse41_int(prims);
}
#if defined(WITH_AVX2)
void primitives_init_copy_avx2(primitives_t* prims);
FREERDP_LOCAL void primitives_init_copy_avx2_int(primitives_t* WINPR_RESTRICT prims);
static inline void primitives_init_copy_avx2(primitives_t* WINPR_RESTRICT prims)
{
if (!IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE))
return;
primitives_init_copy_avx2_int(prims);
}
#endif
#endif

View File

@@ -22,9 +22,22 @@
#define FREERDP_LIB_PRIM_SET_H
#include <winpr/wtypes.h>
#include <winpr/sysinfo.h>
#include <freerdp/config.h>
#include <freerdp/primitives.h>
void primitives_init_set_sse2(primitives_t* WINPR_RESTRICT prims);
#include "prim_internal.h"
FREERDP_LOCAL void primitives_init_set_sse2_int(primitives_t* WINPR_RESTRICT prims);
static inline void primitives_init_set_sse2(primitives_t* WINPR_RESTRICT prims)
{
primitives_init_set(prims);
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
return;
primitives_init_set_sse2_int(prims);
}
#endif

View File

@@ -22,9 +22,22 @@
#define FREERDP_LIB_PRIM_SHIFT_H
#include <winpr/wtypes.h>
#include <winpr/sysinfo.h>
#include <freerdp/config.h>
#include <freerdp/primitives.h>
#include "prim_internal.h"
extern void primitives_init_shift_sse3(primitives_t* WINPR_RESTRICT prims);
FREERDP_LOCAL void primitives_init_shift_sse3_int(primitives_t* WINPR_RESTRICT prims);
static inline void primitives_init_shift_sse3(primitives_t* WINPR_RESTRICT prims)
{
primitives_init_shift(prims);
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
return;
primitives_init_shift_sse3_int(prims);
}
#endif

View File

@@ -22,9 +22,22 @@
#define FREERDP_LIB_PRIM_SIGN_H
#include <winpr/wtypes.h>
#include <winpr/sysinfo.h>
#include <freerdp/config.h>
#include <freerdp/primitives.h>
void primitives_init_sign_ssse3(primitives_t* WINPR_RESTRICT prims);
#include "prim_internal.h"
FREERDP_LOCAL void primitives_init_sign_ssse3_int(primitives_t* WINPR_RESTRICT prims);
static inline void primitives_init_sign_ssse3(primitives_t* WINPR_RESTRICT prims)
{
primitives_init_sign(prims);
if (!IsProcessorFeaturePresentEx(PF_EX_SSSE3) ||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
return;
primitives_init_sign_ssse3_int(prims);
}
#endif

View File

@@ -411,19 +411,13 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R(const BYTE* WINPR_RESTRICT pSrc, INT3
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_YCoCg_ssse3(primitives_t* WINPR_RESTRICT prims)
void primitives_init_YCoCg_ssse3_int(primitives_t* WINPR_RESTRICT prims)
{
#if defined(SSE_AVX_INTRINSICS_ENABLED)
generic = primitives_get_generic();
primitives_init_YCoCg(prims);
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3) &&
IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations");
prims->YCoCgToRGB_8u_AC4R = ssse3_YCoCgRToRGB_8u_AC4R;
}
WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations");
prims->YCoCgToRGB_8u_AC4R = ssse3_YCoCgRToRGB_8u_AC4R;
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE2 intrinsics not available");
WINPR_UNUSED(prims);

View File

@@ -1749,23 +1749,18 @@ static pstatus_t sse41_YUV420CombineToYUV444(avc444_frame_type type,
}
#endif
void primitives_init_YUV_sse41(primitives_t* WINPR_RESTRICT prims)
void primitives_init_YUV_sse41_int(primitives_t* WINPR_RESTRICT prims)
{
#if defined(SSE_AVX_INTRINSICS_ENABLED)
generic = primitives_get_generic();
primitives_init_YUV(prims);
if (IsProcessorFeaturePresentEx(PF_EX_SSE41) &&
IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "SSE3/sse41 optimizations");
prims->RGBToYUV420_8u_P3AC4R = sse41_RGBToYUV420;
prims->RGBToAVC444YUV = sse41_RGBToAVC444YUV;
prims->RGBToAVC444YUVv2 = sse41_RGBToAVC444YUVv2;
prims->YUV420ToRGB_8u_P3AC4R = sse41_YUV420ToRGB;
prims->YUV444ToRGB_8u_P3AC4R = sse41_YUV444ToRGB_8u_P3AC4R;
prims->YUV420CombineToYUV444 = sse41_YUV420CombineToYUV444;
}
WLog_VRB(PRIM_TAG, "SSE3/sse41 optimizations");
prims->RGBToYUV420_8u_P3AC4R = sse41_RGBToYUV420;
prims->RGBToAVC444YUV = sse41_RGBToAVC444YUV;
prims->RGBToAVC444YUVv2 = sse41_RGBToAVC444YUVv2;
prims->YUV420ToRGB_8u_P3AC4R = sse41_YUV420ToRGB;
prims->YUV444ToRGB_8u_P3AC4R = sse41_YUV444ToRGB_8u_P3AC4R;
prims->YUV420CombineToYUV444 = sse41_YUV420CombineToYUV444;
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or sse41 intrinsics not available");
WINPR_UNUSED(prims);

View File

@@ -171,20 +171,14 @@ static pstatus_t sse3_add_16s_inplace(INT16* WINPR_RESTRICT pSrcDst1,
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_add_sse3(primitives_t* WINPR_RESTRICT prims)
void primitives_init_add_sse3_int(primitives_t* WINPR_RESTRICT prims)
{
#if defined(SSE_AVX_INTRINSICS_ENABLED)
generic = primitives_get_generic();
primitives_init_add(prims);
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
{
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
prims->add_16s = sse3_add_16s;
prims->add_16s_inplace = sse3_add_16s_inplace;
}
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
prims->add_16s = sse3_add_16s;
prims->add_16s_inplace = sse3_add_16s_inplace;
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE3 intrinsics not available");
WINPR_UNUSED(prims);

View File

@@ -201,18 +201,12 @@ static pstatus_t sse2_alphaComp_argb(const BYTE* WINPR_RESTRICT pSrc1, UINT32 sr
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_alphaComp_sse3(primitives_t* WINPR_RESTRICT prims)
void primitives_init_alphaComp_sse3_int(primitives_t* WINPR_RESTRICT prims)
{
#if defined(SSE_AVX_INTRINSICS_ENABLED)
generic = primitives_get_generic();
primitives_init_alphaComp(prims);
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
{
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
prims->alphaComp_argb = sse2_alphaComp_argb;
}
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
prims->alphaComp_argb = sse2_alphaComp_argb;
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE3 intrinsics not available");

View File

@@ -38,19 +38,14 @@ SSE3_SCD_PRE_ROUTINE(sse3_orC_32u, UINT32, generic->orC_32u, _mm_or_si128, *dptr
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_andor_sse3(primitives_t* WINPR_RESTRICT prims)
void primitives_init_andor_sse3_int(primitives_t* WINPR_RESTRICT prims)
{
#if defined(SSE_AVX_INTRINSICS_ENABLED)
generic = primitives_get_generic();
primitives_init_andor(prims);
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
prims->andC_32u = sse3_andC_32u;
prims->orC_32u = sse3_orC_32u;
}
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
prims->andC_32u = sse3_andC_32u;
prims->orC_32u = sse3_orC_32u;
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE3 intrinsics not available");

View File

@@ -1211,21 +1211,16 @@ sse2_RGBToRGB_16s8u_P3AC4R(const INT16* WINPR_RESTRICT pSrc[3], /* 16-bit R,G, a
}
#endif
void primitives_init_colors_sse2(primitives_t* prims)
void primitives_init_colors_sse2_int(primitives_t* WINPR_RESTRICT prims)
{
#if defined(SSE_AVX_INTRINSICS_ENABLED)
generic = primitives_get_generic();
primitives_init_colors(prims);
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "SSE2 optimizations");
prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3;
prims->yCbCrToRGB_16s8u_P3AC4R = sse2_yCbCrToRGB_16s8u_P3AC4R;
prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3;
}
WLog_VRB(PRIM_TAG, "SSE2 optimizations");
prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3;
prims->yCbCrToRGB_16s8u_P3AC4R = sse2_yCbCrToRGB_16s8u_P3AC4R;
prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3;
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE2 intrinsics not available");

View File

@@ -273,14 +273,11 @@ static pstatus_t avx2_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_copy_avx2(primitives_t* prims)
void primitives_init_copy_avx2_int(primitives_t* WINPR_RESTRICT prims)
{
#if defined(SSE_AVX_INTRINSICS_ENABLED)
if (IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "AVX2 optimizations");
prims->copy_no_overlap = avx2_image_copy_no_overlap;
}
WLog_VRB(PRIM_TAG, "AVX2 optimizations");
prims->copy_no_overlap = avx2_image_copy_no_overlap;
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or WITH_AVX2 or AVX2 intrinsics not available");
WINPR_UNUSED(prims);

View File

@@ -251,14 +251,11 @@ static pstatus_t sse_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_copy_sse41(primitives_t* prims)
void primitives_init_copy_sse41_int(primitives_t* WINPR_RESTRICT prims)
{
#if defined(SSE_AVX_INTRINSICS_ENABLED)
if (IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "SSE4.1 optimizations");
prims->copy_no_overlap = sse_image_copy_no_overlap;
}
WLog_VRB(PRIM_TAG, "SSE4.1 optimizations");
prims->copy_no_overlap = sse_image_copy_no_overlap;
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE4.1 intrinsics not available");
WINPR_UNUSED(prims);

View File

@@ -214,21 +214,17 @@ static pstatus_t sse2_set_32s(INT32 val, INT32* WINPR_RESTRICT pDst, UINT32 len)
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_set_sse2(primitives_t* WINPR_RESTRICT prims)
void primitives_init_set_sse2_int(primitives_t* WINPR_RESTRICT prims)
{
#if defined(SSE_AVX_INTRINSICS_ENABLED)
generic = primitives_get_generic();
primitives_init_set(prims);
/* Pick tuned versions if possible. */
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "SSE2 optimizations");
prims->set_8u = sse2_set_8u;
prims->set_32s = sse2_set_32s;
prims->set_32u = sse2_set_32u;
}
WLog_VRB(PRIM_TAG, "SSE2 optimizations");
prims->set_8u = sse2_set_8u;
prims->set_32s = sse2_set_32s;
prims->set_32u = sse2_set_32u;
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE2 intrinsics not available");

View File

@@ -140,22 +140,17 @@ static pstatus_t sse2_lShiftC_16s_inplace(INT16* WINPR_RESTRICT pSrcDst, UINT32
*/
/* ------------------------------------------------------------------------- */
void primitives_init_shift_sse3(primitives_t* WINPR_RESTRICT prims)
void primitives_init_shift_sse3_int(primitives_t* WINPR_RESTRICT prims)
{
#if defined(SSE_AVX_INTRINSICS_ENABLED)
generic = primitives_get_generic();
primitives_init_shift(prims);
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
prims->lShiftC_16s_inplace = sse2_lShiftC_16s_inplace;
prims->lShiftC_16s = sse2_lShiftC_16s;
prims->rShiftC_16s = sse2_rShiftC_16s;
prims->lShiftC_16u = sse2_lShiftC_16u;
prims->rShiftC_16u = sse2_rShiftC_16u;
}
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
prims->lShiftC_16s_inplace = sse2_lShiftC_16s_inplace;
prims->lShiftC_16s = sse2_lShiftC_16s;
prims->rShiftC_16s = sse2_rShiftC_16s;
prims->lShiftC_16u = sse2_lShiftC_16u;
prims->rShiftC_16u = sse2_rShiftC_16u;
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE3 intrinsics not available");

View File

@@ -169,20 +169,16 @@ static pstatus_t ssse3_sign_16s(const INT16* WINPR_RESTRICT pSrc, INT16* WINPR_R
#endif /* SSE_AVX_INTRINSICS_ENABLED */
/* ------------------------------------------------------------------------- */
void primitives_init_sign_ssse3(primitives_t* WINPR_RESTRICT prims)
void primitives_init_sign_ssse3_int(primitives_t* WINPR_RESTRICT prims)
{
#if defined(SSE_AVX_INTRINSICS_ENABLED)
generic = primitives_get_generic();
primitives_init_sign(prims);
/* Pick tuned versions if possible. */
/* I didn't spot an IPP version of this. */
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3) &&
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations");
prims->sign_16s = ssse3_sign_16s;
}
WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations");
prims->sign_16s = ssse3_sign_16s;
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSSE3/SSE3 intrinsics not available");