mirror of
https://github.com/morgan9e/FreeRDP
synced 2026-04-14 00:14:11 +09:00
[simd] move CPU feature detection
Use static inline functions in header to do CPU feature detection. The c files are already compiled/linked with SIMD support and might have used instructions from that featureset already.
This commit is contained in:
@@ -32,12 +32,9 @@
|
||||
#define TAG FREERDP_TAG("codec.nsc.neon")
|
||||
#endif
|
||||
|
||||
void nsc_init_neon(NSC_CONTEXT* context)
|
||||
void nsc_init_neon_int(NSC_CONTEXT* WINPR_RESTRICT context)
|
||||
{
|
||||
#if defined(NEON_INTRINSICS_ENABLED)
|
||||
if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
WLog_WARN(TAG, "TODO: Implement neon optimized version of this function");
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -20,9 +20,18 @@
|
||||
#ifndef FREERDP_LIB_CODEC_NSC_NEON_H
|
||||
#define FREERDP_LIB_CODEC_NSC_NEON_H
|
||||
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include <freerdp/codec/nsc.h>
|
||||
#include <freerdp/api.h>
|
||||
|
||||
FREERDP_LOCAL void nsc_init_neon(NSC_CONTEXT* context);
|
||||
FREERDP_LOCAL void nsc_init_neon_int(NSC_CONTEXT* WINPR_RESTRICT context);
|
||||
static inline void nsc_init_neon(NSC_CONTEXT* WINPR_RESTRICT context)
|
||||
{
|
||||
if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
nsc_init_neon_int(context);
|
||||
}
|
||||
|
||||
#endif /* FREERDP_LIB_CODEC_NSC_NEON_H */
|
||||
|
||||
@@ -523,20 +523,16 @@ static void rfx_dwt_2d_extrapolate_decode_neon(INT16* buffer, INT16* temp)
|
||||
}
|
||||
#endif // NEON_INTRINSICS_ENABLED
|
||||
|
||||
void rfx_init_neon(RFX_CONTEXT* context)
|
||||
void rfx_init_neon_int(RFX_CONTEXT* WINPR_RESTRICT context)
|
||||
{
|
||||
#if defined(NEON_INTRINSICS_ENABLED)
|
||||
if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
DEBUG_RFX("Using NEON optimizations");
|
||||
PROFILER_RENAME(context->priv->prof_rfx_ycbcr_to_rgb, "rfx_decode_YCbCr_to_RGB_NEON");
|
||||
PROFILER_RENAME(context->priv->prof_rfx_quantization_decode,
|
||||
"rfx_quantization_decode_NEON");
|
||||
PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode_NEON");
|
||||
context->quantization_decode = rfx_quantization_decode_NEON;
|
||||
context->dwt_2d_decode = rfx_dwt_2d_decode_NEON;
|
||||
context->dwt_2d_extrapolate_decode = rfx_dwt_2d_extrapolate_decode_neon;
|
||||
}
|
||||
DEBUG_RFX("Using NEON optimizations");
|
||||
PROFILER_RENAME(context->priv->prof_rfx_ycbcr_to_rgb, "rfx_decode_YCbCr_to_RGB_NEON");
|
||||
PROFILER_RENAME(context->priv->prof_rfx_quantization_decode, "rfx_quantization_decode_NEON");
|
||||
PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode_NEON");
|
||||
context->quantization_decode = rfx_quantization_decode_NEON;
|
||||
context->dwt_2d_decode = rfx_dwt_2d_decode_NEON;
|
||||
context->dwt_2d_extrapolate_decode = rfx_dwt_2d_extrapolate_decode_neon;
|
||||
#else
|
||||
WINPR_UNUSED(context);
|
||||
#endif
|
||||
|
||||
@@ -20,9 +20,18 @@
|
||||
#ifndef FREERDP_LIB_CODEC_RFX_NEON_H
|
||||
#define FREERDP_LIB_CODEC_RFX_NEON_H
|
||||
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include <freerdp/codec/rfx.h>
|
||||
#include <freerdp/api.h>
|
||||
|
||||
FREERDP_LOCAL void rfx_init_neon(RFX_CONTEXT* context);
|
||||
FREERDP_LOCAL void rfx_init_neon_int(RFX_CONTEXT* WINPR_RESTRICT context);
|
||||
static inline void rfx_init_neon(RFX_CONTEXT* WINPR_RESTRICT context)
|
||||
{
|
||||
if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
rfx_init_neon_int(context);
|
||||
}
|
||||
|
||||
#endif /* FREERDP_LIB_CODEC_RFX_NEON_H */
|
||||
|
||||
@@ -389,13 +389,9 @@ static BOOL nsc_encode_sse2(NSC_CONTEXT* context, const BYTE* data, UINT32 scanl
|
||||
}
|
||||
#endif
|
||||
|
||||
void nsc_init_sse2(NSC_CONTEXT* context)
|
||||
void nsc_init_sse2_int(NSC_CONTEXT* WINPR_RESTRICT context)
|
||||
{
|
||||
#if defined(SSE_AVX_INTRINSICS_ENABLED)
|
||||
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
PROFILER_RENAME(context->priv->prof_nsc_encode, "nsc_encode_sse2")
|
||||
context->encode = nsc_encode_sse2;
|
||||
#else
|
||||
|
||||
@@ -20,9 +20,19 @@
|
||||
#ifndef FREERDP_LIB_CODEC_NSC_SSE2_H
|
||||
#define FREERDP_LIB_CODEC_NSC_SSE2_H
|
||||
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include <freerdp/codec/nsc.h>
|
||||
#include <freerdp/api.h>
|
||||
|
||||
FREERDP_LOCAL void nsc_init_sse2(NSC_CONTEXT* context);
|
||||
FREERDP_LOCAL void nsc_init_sse2_int(NSC_CONTEXT* WINPR_RESTRICT context);
|
||||
static inline void nsc_init_sse2(NSC_CONTEXT* WINPR_RESTRICT context)
|
||||
{
|
||||
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
nsc_init_sse2_int(context);
|
||||
}
|
||||
|
||||
#endif /* FREERDP_LIB_CODEC_NSC_SSE2_H */
|
||||
|
||||
@@ -451,13 +451,9 @@ static void rfx_dwt_2d_encode_sse2(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RE
|
||||
}
|
||||
#endif
|
||||
|
||||
void rfx_init_sse2(RFX_CONTEXT* context)
|
||||
void rfx_init_sse2_int(RFX_CONTEXT* WINPR_RESTRICT context)
|
||||
{
|
||||
#if defined(SSE_AVX_INTRINSICS_ENABLED)
|
||||
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
PROFILER_RENAME(context->priv->prof_rfx_quantization_decode, "rfx_quantization_decode_sse2")
|
||||
PROFILER_RENAME(context->priv->prof_rfx_quantization_encode, "rfx_quantization_encode_sse2")
|
||||
PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode_sse2")
|
||||
|
||||
@@ -20,9 +20,20 @@
|
||||
#ifndef FREERDP_LIB_CODEC_RFX_SSE2_H
|
||||
#define FREERDP_LIB_CODEC_RFX_SSE2_H
|
||||
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include <freerdp/codec/rfx.h>
|
||||
#include <freerdp/api.h>
|
||||
|
||||
FREERDP_LOCAL void rfx_init_sse2(RFX_CONTEXT* context);
|
||||
FREERDP_LOCAL void rfx_init_sse2_int(RFX_CONTEXT* WINPR_RESTRICT context);
|
||||
|
||||
static inline void rfx_init_sse2(RFX_CONTEXT* WINPR_RESTRICT context)
|
||||
{
|
||||
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
rfx_init_sse2_int(context);
|
||||
}
|
||||
|
||||
#endif /* FREERDP_LIB_CODEC_RFX_SSE2_H */
|
||||
|
||||
@@ -154,17 +154,13 @@ static pstatus_t neon_YCoCgToRGB_8u_AC4R(const BYTE* WINPR_RESTRICT pSrc, INT32
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_YCoCg_neon(primitives_t* WINPR_RESTRICT prims)
|
||||
void primitives_init_YCoCg_neon_int(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
#if defined(NEON_INTRINSICS_ENABLED)
|
||||
generic = primitives_get_generic();
|
||||
primitives_init_YCoCg(prims);
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
WLog_VRB(PRIM_TAG, "NEON optimizations");
|
||||
prims->YCoCgToRGB_8u_AC4R = neon_YCoCgToRGB_8u_AC4R;
|
||||
}
|
||||
WLog_VRB(PRIM_TAG, "NEON optimizations");
|
||||
prims->YCoCgToRGB_8u_AC4R = neon_YCoCgToRGB_8u_AC4R;
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or neon intrinsics not available");
|
||||
WINPR_UNUSED(prims);
|
||||
|
||||
@@ -776,19 +776,14 @@ static pstatus_t neon_YUV420CombineToYUV444(avc444_frame_type type,
|
||||
}
|
||||
#endif
|
||||
|
||||
void primitives_init_YUV_neon(primitives_t* WINPR_RESTRICT prims)
|
||||
void primitives_init_YUV_neon_int(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
#if defined(NEON_INTRINSICS_ENABLED)
|
||||
generic = primitives_get_generic();
|
||||
primitives_init_YUV(prims);
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
WLog_VRB(PRIM_TAG, "NEON optimizations");
|
||||
prims->YUV420ToRGB_8u_P3AC4R = neon_YUV420ToRGB_8u_P3AC4R;
|
||||
prims->YUV444ToRGB_8u_P3AC4R = neon_YUV444ToRGB_8u_P3AC4R;
|
||||
prims->YUV420CombineToYUV444 = neon_YUV420CombineToYUV444;
|
||||
}
|
||||
WLog_VRB(PRIM_TAG, "NEON optimizations");
|
||||
prims->YUV420ToRGB_8u_P3AC4R = neon_YUV420ToRGB_8u_P3AC4R;
|
||||
prims->YUV444ToRGB_8u_P3AC4R = neon_YUV444ToRGB_8u_P3AC4R;
|
||||
prims->YUV420CombineToYUV444 = neon_YUV420CombineToYUV444;
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or neon intrinsics not available");
|
||||
WINPR_UNUSED(prims);
|
||||
|
||||
@@ -343,19 +343,15 @@ neon_RGBToRGB_16s8u_P3AC4R(const INT16* WINPR_RESTRICT pSrc[3], /* 16-bit R,G, a
|
||||
#endif /* NEON_INTRINSICS_ENABLED */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_colors_neon(primitives_t* prims)
|
||||
void primitives_init_colors_neon_int(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
#if defined(NEON_INTRINSICS_ENABLED)
|
||||
generic = primitives_get_generic();
|
||||
primitives_init_colors(prims);
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
WLog_VRB(PRIM_TAG, "NEON optimizations");
|
||||
prims->RGBToRGB_16s8u_P3AC4R = neon_RGBToRGB_16s8u_P3AC4R;
|
||||
prims->yCbCrToRGB_16s8u_P3AC4R = neon_yCbCrToRGB_16s8u_P3AC4R;
|
||||
prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3;
|
||||
}
|
||||
WLog_VRB(PRIM_TAG, "NEON optimizations");
|
||||
prims->RGBToRGB_16s8u_P3AC4R = neon_RGBToRGB_16s8u_P3AC4R;
|
||||
prims->yCbCrToRGB_16s8u_P3AC4R = neon_yCbCrToRGB_16s8u_P3AC4R;
|
||||
prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3;
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or neon intrinsics not available");
|
||||
WINPR_UNUSED(prims);
|
||||
|
||||
@@ -22,10 +22,34 @@
|
||||
#define FREERDP_LIB_PRIM_YCoCg_H
|
||||
|
||||
#include <winpr/wtypes.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include <freerdp/config.h>
|
||||
#include <freerdp/primitives.h>
|
||||
|
||||
void primitives_init_YCoCg_ssse3(primitives_t* WINPR_RESTRICT prims);
|
||||
void primitives_init_YCoCg_neon(primitives_t* WINPR_RESTRICT prims);
|
||||
#include "prim_internal.h"
|
||||
|
||||
FREERDP_LOCAL void primitives_init_YCoCg_ssse3_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_YCoCg_ssse3(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_YCoCg(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresentEx(PF_EX_SSSE3) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
primitives_init_YCoCg_ssse3_int(prims);
|
||||
}
|
||||
|
||||
FREERDP_LOCAL void primitives_init_YCoCg_neon_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_YCoCg_neon(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_YCoCg(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
primitives_init_YCoCg_neon_int(prims);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -22,10 +22,34 @@
|
||||
#define FREERDP_LIB_PRIM_YUV_H
|
||||
|
||||
#include <winpr/wtypes.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include <freerdp/config.h>
|
||||
#include <freerdp/primitives.h>
|
||||
|
||||
void primitives_init_YUV_sse41(primitives_t* WINPR_RESTRICT prims);
|
||||
void primitives_init_YUV_neon(primitives_t* WINPR_RESTRICT prims);
|
||||
#include "prim_internal.h"
|
||||
|
||||
FREERDP_LOCAL void primitives_init_YUV_sse41_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_YUV_sse41(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_YUV(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresentEx(PF_EX_SSE41) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
primitives_init_YUV_sse41_int(prims);
|
||||
}
|
||||
|
||||
FREERDP_LOCAL void primitives_init_YUV_neon_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_YUV_neon(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_YUV(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresentEx(PF_EX_SSE41))
|
||||
return;
|
||||
|
||||
primitives_init_YUV_neon_int(prims);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -22,9 +22,23 @@
|
||||
#define FREERDP_LIB_PRIM_ADD_H
|
||||
|
||||
#include <winpr/wtypes.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include <freerdp/config.h>
|
||||
#include <freerdp/primitives.h>
|
||||
|
||||
void primitives_init_add_sse3(primitives_t* WINPR_RESTRICT prims);
|
||||
#include "prim_internal.h"
|
||||
|
||||
FREERDP_LOCAL void primitives_init_add_sse3_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_add_sse3(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_add(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
|
||||
return;
|
||||
|
||||
primitives_init_add_sse3_int(prims);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -22,9 +22,23 @@
|
||||
#define FREERDP_LIB_PRIM_ALPHA_COMP_H
|
||||
|
||||
#include <winpr/wtypes.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include <freerdp/config.h>
|
||||
#include <freerdp/primitives.h>
|
||||
|
||||
void primitives_init_alphaComp_sse3(primitives_t* WINPR_RESTRICT prims);
|
||||
#include "prim_internal.h"
|
||||
|
||||
FREERDP_LOCAL void primitives_init_alphaComp_sse3_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_alphaComp_sse3(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_alphaComp(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
|
||||
return;
|
||||
|
||||
primitives_init_alphaComp_sse3_int(prims);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -22,9 +22,23 @@
|
||||
#define FREERDP_LIB_PRIM_ANDOR_H
|
||||
|
||||
#include <winpr/wtypes.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include <freerdp/config.h>
|
||||
#include <freerdp/primitives.h>
|
||||
|
||||
void primitives_init_andor_sse3(primitives_t* WINPR_RESTRICT prims);
|
||||
#include "prim_internal.h"
|
||||
|
||||
FREERDP_LOCAL void primitives_init_andor_sse3_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_andor_sse3(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_andor(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
primitives_init_andor_sse3_int(prims);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -22,10 +22,33 @@
|
||||
#define FREERDP_LIB_PRIM_COLORS_H
|
||||
|
||||
#include <winpr/wtypes.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include <freerdp/config.h>
|
||||
#include <freerdp/primitives.h>
|
||||
|
||||
void primitives_init_colors_sse2(primitives_t* prims);
|
||||
void primitives_init_colors_neon(primitives_t* prims);
|
||||
#include "prim_internal.h"
|
||||
|
||||
FREERDP_LOCAL void primitives_init_colors_sse2_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_colors_sse2(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_colors(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
primitives_init_colors_sse2_int(prims);
|
||||
}
|
||||
|
||||
FREERDP_LOCAL void primitives_init_colors_neon_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_colors_neon(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_colors(prims);
|
||||
if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
primitives_init_colors_neon_int(prims);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -22,6 +22,8 @@
|
||||
#define FREERDP_LIB_PRIM_COPY_H
|
||||
|
||||
#include <winpr/wtypes.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include <freerdp/config.h>
|
||||
#include <freerdp/primitives.h>
|
||||
|
||||
@@ -38,10 +40,24 @@ pstatus_t generic_image_copy_no_overlap_memcpy(
|
||||
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset,
|
||||
UINT32 flags);
|
||||
|
||||
void primitives_init_copy_sse41(primitives_t* prims);
|
||||
FREERDP_LOCAL void primitives_init_copy_sse41_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_copy_sse41(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
if (!IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
primitives_init_copy_sse41_int(prims);
|
||||
}
|
||||
|
||||
#if defined(WITH_AVX2)
|
||||
void primitives_init_copy_avx2(primitives_t* prims);
|
||||
FREERDP_LOCAL void primitives_init_copy_avx2_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_copy_avx2(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
if (!IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
primitives_init_copy_avx2_int(prims);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -22,9 +22,22 @@
|
||||
#define FREERDP_LIB_PRIM_SET_H
|
||||
|
||||
#include <winpr/wtypes.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include <freerdp/config.h>
|
||||
#include <freerdp/primitives.h>
|
||||
|
||||
void primitives_init_set_sse2(primitives_t* WINPR_RESTRICT prims);
|
||||
#include "prim_internal.h"
|
||||
|
||||
FREERDP_LOCAL void primitives_init_set_sse2_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_set_sse2(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_set(prims);
|
||||
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
primitives_init_set_sse2_int(prims);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -22,9 +22,22 @@
|
||||
#define FREERDP_LIB_PRIM_SHIFT_H
|
||||
|
||||
#include <winpr/wtypes.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include <freerdp/config.h>
|
||||
#include <freerdp/primitives.h>
|
||||
#include "prim_internal.h"
|
||||
|
||||
extern void primitives_init_shift_sse3(primitives_t* WINPR_RESTRICT prims);
|
||||
FREERDP_LOCAL void primitives_init_shift_sse3_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_shift_sse3(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_shift(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
primitives_init_shift_sse3_int(prims);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -22,9 +22,22 @@
|
||||
#define FREERDP_LIB_PRIM_SIGN_H
|
||||
|
||||
#include <winpr/wtypes.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include <freerdp/config.h>
|
||||
#include <freerdp/primitives.h>
|
||||
|
||||
void primitives_init_sign_ssse3(primitives_t* WINPR_RESTRICT prims);
|
||||
#include "prim_internal.h"
|
||||
|
||||
FREERDP_LOCAL void primitives_init_sign_ssse3_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_sign_ssse3(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_sign(prims);
|
||||
if (!IsProcessorFeaturePresentEx(PF_EX_SSSE3) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
primitives_init_sign_ssse3_int(prims);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -411,19 +411,13 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R(const BYTE* WINPR_RESTRICT pSrc, INT3
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_YCoCg_ssse3(primitives_t* WINPR_RESTRICT prims)
|
||||
void primitives_init_YCoCg_ssse3_int(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
#if defined(SSE_AVX_INTRINSICS_ENABLED)
|
||||
generic = primitives_get_generic();
|
||||
primitives_init_YCoCg(prims);
|
||||
|
||||
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3) &&
|
||||
IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
|
||||
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations");
|
||||
prims->YCoCgToRGB_8u_AC4R = ssse3_YCoCgRToRGB_8u_AC4R;
|
||||
}
|
||||
WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations");
|
||||
prims->YCoCgToRGB_8u_AC4R = ssse3_YCoCgRToRGB_8u_AC4R;
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE2 intrinsics not available");
|
||||
WINPR_UNUSED(prims);
|
||||
|
||||
@@ -1749,23 +1749,18 @@ static pstatus_t sse41_YUV420CombineToYUV444(avc444_frame_type type,
|
||||
}
|
||||
#endif
|
||||
|
||||
void primitives_init_YUV_sse41(primitives_t* WINPR_RESTRICT prims)
|
||||
void primitives_init_YUV_sse41_int(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
#if defined(SSE_AVX_INTRINSICS_ENABLED)
|
||||
generic = primitives_get_generic();
|
||||
primitives_init_YUV(prims);
|
||||
|
||||
if (IsProcessorFeaturePresentEx(PF_EX_SSE41) &&
|
||||
IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
WLog_VRB(PRIM_TAG, "SSE3/sse41 optimizations");
|
||||
prims->RGBToYUV420_8u_P3AC4R = sse41_RGBToYUV420;
|
||||
prims->RGBToAVC444YUV = sse41_RGBToAVC444YUV;
|
||||
prims->RGBToAVC444YUVv2 = sse41_RGBToAVC444YUVv2;
|
||||
prims->YUV420ToRGB_8u_P3AC4R = sse41_YUV420ToRGB;
|
||||
prims->YUV444ToRGB_8u_P3AC4R = sse41_YUV444ToRGB_8u_P3AC4R;
|
||||
prims->YUV420CombineToYUV444 = sse41_YUV420CombineToYUV444;
|
||||
}
|
||||
WLog_VRB(PRIM_TAG, "SSE3/sse41 optimizations");
|
||||
prims->RGBToYUV420_8u_P3AC4R = sse41_RGBToYUV420;
|
||||
prims->RGBToAVC444YUV = sse41_RGBToAVC444YUV;
|
||||
prims->RGBToAVC444YUVv2 = sse41_RGBToAVC444YUVv2;
|
||||
prims->YUV420ToRGB_8u_P3AC4R = sse41_YUV420ToRGB;
|
||||
prims->YUV444ToRGB_8u_P3AC4R = sse41_YUV444ToRGB_8u_P3AC4R;
|
||||
prims->YUV420CombineToYUV444 = sse41_YUV420CombineToYUV444;
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or sse41 intrinsics not available");
|
||||
WINPR_UNUSED(prims);
|
||||
|
||||
@@ -171,20 +171,14 @@ static pstatus_t sse3_add_16s_inplace(INT16* WINPR_RESTRICT pSrcDst1,
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_add_sse3(primitives_t* WINPR_RESTRICT prims)
|
||||
void primitives_init_add_sse3_int(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
#if defined(SSE_AVX_INTRINSICS_ENABLED)
|
||||
generic = primitives_get_generic();
|
||||
primitives_init_add(prims);
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
|
||||
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
|
||||
{
|
||||
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
|
||||
prims->add_16s = sse3_add_16s;
|
||||
prims->add_16s_inplace = sse3_add_16s_inplace;
|
||||
}
|
||||
|
||||
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
|
||||
prims->add_16s = sse3_add_16s;
|
||||
prims->add_16s_inplace = sse3_add_16s_inplace;
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE3 intrinsics not available");
|
||||
WINPR_UNUSED(prims);
|
||||
|
||||
@@ -201,18 +201,12 @@ static pstatus_t sse2_alphaComp_argb(const BYTE* WINPR_RESTRICT pSrc1, UINT32 sr
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_alphaComp_sse3(primitives_t* WINPR_RESTRICT prims)
|
||||
void primitives_init_alphaComp_sse3_int(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
#if defined(SSE_AVX_INTRINSICS_ENABLED)
|
||||
generic = primitives_get_generic();
|
||||
primitives_init_alphaComp(prims);
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
|
||||
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
|
||||
{
|
||||
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
|
||||
prims->alphaComp_argb = sse2_alphaComp_argb;
|
||||
}
|
||||
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
|
||||
prims->alphaComp_argb = sse2_alphaComp_argb;
|
||||
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE3 intrinsics not available");
|
||||
|
||||
@@ -38,19 +38,14 @@ SSE3_SCD_PRE_ROUTINE(sse3_orC_32u, UINT32, generic->orC_32u, _mm_or_si128, *dptr
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_andor_sse3(primitives_t* WINPR_RESTRICT prims)
|
||||
void primitives_init_andor_sse3_int(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
#if defined(SSE_AVX_INTRINSICS_ENABLED)
|
||||
generic = primitives_get_generic();
|
||||
primitives_init_andor(prims);
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
|
||||
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
|
||||
prims->andC_32u = sse3_andC_32u;
|
||||
prims->orC_32u = sse3_orC_32u;
|
||||
}
|
||||
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
|
||||
prims->andC_32u = sse3_andC_32u;
|
||||
prims->orC_32u = sse3_orC_32u;
|
||||
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE3 intrinsics not available");
|
||||
|
||||
@@ -1211,21 +1211,16 @@ sse2_RGBToRGB_16s8u_P3AC4R(const INT16* WINPR_RESTRICT pSrc[3], /* 16-bit R,G, a
|
||||
}
|
||||
#endif
|
||||
|
||||
void primitives_init_colors_sse2(primitives_t* prims)
|
||||
void primitives_init_colors_sse2_int(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
#if defined(SSE_AVX_INTRINSICS_ENABLED)
|
||||
generic = primitives_get_generic();
|
||||
primitives_init_colors(prims);
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
|
||||
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
WLog_VRB(PRIM_TAG, "SSE2 optimizations");
|
||||
prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R;
|
||||
prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3;
|
||||
prims->yCbCrToRGB_16s8u_P3AC4R = sse2_yCbCrToRGB_16s8u_P3AC4R;
|
||||
prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3;
|
||||
}
|
||||
WLog_VRB(PRIM_TAG, "SSE2 optimizations");
|
||||
prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R;
|
||||
prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3;
|
||||
prims->yCbCrToRGB_16s8u_P3AC4R = sse2_yCbCrToRGB_16s8u_P3AC4R;
|
||||
prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3;
|
||||
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE2 intrinsics not available");
|
||||
|
||||
@@ -273,14 +273,11 @@ static pstatus_t avx2_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_copy_avx2(primitives_t* prims)
|
||||
void primitives_init_copy_avx2_int(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
#if defined(SSE_AVX_INTRINSICS_ENABLED)
|
||||
if (IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
WLog_VRB(PRIM_TAG, "AVX2 optimizations");
|
||||
prims->copy_no_overlap = avx2_image_copy_no_overlap;
|
||||
}
|
||||
WLog_VRB(PRIM_TAG, "AVX2 optimizations");
|
||||
prims->copy_no_overlap = avx2_image_copy_no_overlap;
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or WITH_AVX2 or AVX2 intrinsics not available");
|
||||
WINPR_UNUSED(prims);
|
||||
|
||||
@@ -251,14 +251,11 @@ static pstatus_t sse_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_copy_sse41(primitives_t* prims)
|
||||
void primitives_init_copy_sse41_int(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
#if defined(SSE_AVX_INTRINSICS_ENABLED)
|
||||
if (IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
WLog_VRB(PRIM_TAG, "SSE4.1 optimizations");
|
||||
prims->copy_no_overlap = sse_image_copy_no_overlap;
|
||||
}
|
||||
WLog_VRB(PRIM_TAG, "SSE4.1 optimizations");
|
||||
prims->copy_no_overlap = sse_image_copy_no_overlap;
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE4.1 intrinsics not available");
|
||||
WINPR_UNUSED(prims);
|
||||
|
||||
@@ -214,21 +214,17 @@ static pstatus_t sse2_set_32s(INT32 val, INT32* WINPR_RESTRICT pDst, UINT32 len)
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_set_sse2(primitives_t* WINPR_RESTRICT prims)
|
||||
void primitives_init_set_sse2_int(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
#if defined(SSE_AVX_INTRINSICS_ENABLED)
|
||||
generic = primitives_get_generic();
|
||||
primitives_init_set(prims);
|
||||
|
||||
/* Pick tuned versions if possible. */
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
|
||||
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
WLog_VRB(PRIM_TAG, "SSE2 optimizations");
|
||||
prims->set_8u = sse2_set_8u;
|
||||
prims->set_32s = sse2_set_32s;
|
||||
prims->set_32u = sse2_set_32u;
|
||||
}
|
||||
WLog_VRB(PRIM_TAG, "SSE2 optimizations");
|
||||
prims->set_8u = sse2_set_8u;
|
||||
prims->set_32s = sse2_set_32s;
|
||||
prims->set_32u = sse2_set_32u;
|
||||
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE2 intrinsics not available");
|
||||
|
||||
@@ -140,22 +140,17 @@ static pstatus_t sse2_lShiftC_16s_inplace(INT16* WINPR_RESTRICT pSrcDst, UINT32
|
||||
*/
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_shift_sse3(primitives_t* WINPR_RESTRICT prims)
|
||||
void primitives_init_shift_sse3_int(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
#if defined(SSE_AVX_INTRINSICS_ENABLED)
|
||||
generic = primitives_get_generic();
|
||||
primitives_init_shift(prims);
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
|
||||
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
|
||||
prims->lShiftC_16s_inplace = sse2_lShiftC_16s_inplace;
|
||||
prims->lShiftC_16s = sse2_lShiftC_16s;
|
||||
prims->rShiftC_16s = sse2_rShiftC_16s;
|
||||
prims->lShiftC_16u = sse2_lShiftC_16u;
|
||||
prims->rShiftC_16u = sse2_rShiftC_16u;
|
||||
}
|
||||
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
|
||||
prims->lShiftC_16s_inplace = sse2_lShiftC_16s_inplace;
|
||||
prims->lShiftC_16s = sse2_lShiftC_16s;
|
||||
prims->rShiftC_16s = sse2_rShiftC_16s;
|
||||
prims->lShiftC_16u = sse2_lShiftC_16u;
|
||||
prims->rShiftC_16u = sse2_rShiftC_16u;
|
||||
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE3 intrinsics not available");
|
||||
|
||||
@@ -169,20 +169,16 @@ static pstatus_t ssse3_sign_16s(const INT16* WINPR_RESTRICT pSrc, INT16* WINPR_R
|
||||
#endif /* SSE_AVX_INTRINSICS_ENABLED */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_sign_ssse3(primitives_t* WINPR_RESTRICT prims)
|
||||
void primitives_init_sign_ssse3_int(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
#if defined(SSE_AVX_INTRINSICS_ENABLED)
|
||||
generic = primitives_get_generic();
|
||||
primitives_init_sign(prims);
|
||||
|
||||
/* Pick tuned versions if possible. */
|
||||
/* I didn't spot an IPP version of this. */
|
||||
|
||||
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3) &&
|
||||
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations");
|
||||
prims->sign_16s = ssse3_sign_16s;
|
||||
}
|
||||
WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations");
|
||||
prims->sign_16s = ssse3_sign_16s;
|
||||
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSSE3/SSE3 intrinsics not available");
|
||||
|
||||
Reference in New Issue
Block a user