From 84a3dbc1158d8aac72af9b5fc3e268d69ceb269c Mon Sep 17 00:00:00 2001 From: Bernhard Miklautz Date: Wed, 27 Feb 2013 15:58:06 +0100 Subject: [PATCH] primitives: make use of winprs processor feature detection - Removed the cpu flag detection - Fixed and updated the tests --- include/freerdp/primitives.h | 9 - libfreerdp/primitives/README.txt | 14 +- libfreerdp/primitives/prim_add.c | 3 +- libfreerdp/primitives/prim_add.h | 2 +- libfreerdp/primitives/prim_add_opt.c | 8 +- libfreerdp/primitives/prim_alphaComp.c | 4 +- libfreerdp/primitives/prim_alphaComp.h | 2 +- libfreerdp/primitives/prim_alphaComp_opt.c | 7 +- libfreerdp/primitives/prim_andor.c | 3 +- libfreerdp/primitives/prim_andor.h | 2 +- libfreerdp/primitives/prim_andor_opt.c | 7 +- libfreerdp/primitives/prim_colors.c | 4 +- libfreerdp/primitives/prim_colors.h | 2 +- libfreerdp/primitives/prim_colors_opt.c | 7 +- libfreerdp/primitives/prim_copy.c | 1 - libfreerdp/primitives/prim_internal.h | 18 -- libfreerdp/primitives/prim_set.c | 3 +- libfreerdp/primitives/prim_set.h | 2 +- libfreerdp/primitives/prim_set_opt.c | 5 +- libfreerdp/primitives/prim_shift.c | 3 +- libfreerdp/primitives/prim_shift.h | 2 +- libfreerdp/primitives/prim_shift_opt.c | 7 +- libfreerdp/primitives/prim_sign.c | 3 +- libfreerdp/primitives/prim_sign.h | 2 +- libfreerdp/primitives/prim_sign_opt.c | 7 +- libfreerdp/primitives/primitives.c | 276 +------------------- libfreerdp/primitives/test/CMakeLists.txt | 19 +- libfreerdp/primitives/test/prim_test.c | 88 ++++++- libfreerdp/primitives/test/prim_test.h | 67 ++--- libfreerdp/primitives/test/test_add.c | 7 +- libfreerdp/primitives/test/test_alphaComp.c | 9 +- libfreerdp/primitives/test/test_andor.c | 13 +- libfreerdp/primitives/test/test_colors.c | 13 +- libfreerdp/primitives/test/test_copy.c | 4 +- libfreerdp/primitives/test/test_set.c | 21 +- libfreerdp/primitives/test/test_shift.c | 16 +- libfreerdp/primitives/test/test_sign.c | 9 +- 37 files changed, 200 insertions(+), 469 deletions(-) diff --git a/include/freerdp/primitives.h b/include/freerdp/primitives.h index 8bcd14c7b..24bac4748 100644 --- a/include/freerdp/primitives.h +++ b/include/freerdp/primitives.h @@ -190,9 +190,6 @@ typedef struct __yCbCrToRGB_16s16s_P3P3_t yCbCrToRGB_16s16s_P3P3; __RGBToYCbCr_16s16s_P3P3_t RGBToYCbCr_16s16s_P3P3; __RGBToRGB_16s8u_P3AC4R_t RGBToRGB_16s8u_P3AC4R; - - /* internal use for CPU flags and such. */ - void *hints; } primitives_t; #ifdef __cplusplus @@ -202,12 +199,6 @@ extern "C" { /* Prototypes for the externally-visible entrypoints. */ FREERDP_API void primitives_init(void); FREERDP_API primitives_t *primitives_get(void); -FREERDP_API UINT32 primitives_get_flags( - const primitives_t *prims); -FREERDP_API void primitives_flags_str( - const primitives_t *prims, - char *str, - size_t len); FREERDP_API void primitives_deinit(void); #ifdef __cplusplus diff --git a/libfreerdp/primitives/README.txt b/libfreerdp/primitives/README.txt index 369102c0d..81c7e9754 100644 --- a/libfreerdp/primitives/README.txt +++ b/libfreerdp/primitives/README.txt @@ -62,10 +62,7 @@ New Optimizations ----------------- As the need arises, new optimizations can be added to the library, including NEON, AVX, and perhaps OpenCL or other SIMD implementations. -The initialization routine is free to do any quick run-time test to -determine which features are available before hooking the operation's -function pointer, or it can simply look at the processor features list -from the hints passed to the initialization routine. +The CPU feature detection is done in winpr/sysinfo. Adding Entrypoints @@ -85,15 +82,6 @@ be added. The template functions can frequently be used to extend the operations without writing a lot of new code. - -Flags ------ -The entrypoint primitives_get_flags() returns a bitfield of processor flags -(as defined in primitives.h) and primitives_flag_str() returns a string -related to those processor flags, for debugging and information. The -bitfield can be used elsewhere in the code as needed. - - Cache Management ---------------- I haven't found a lot of speed improvement by attempting prefetch, and diff --git a/libfreerdp/primitives/prim_add.c b/libfreerdp/primitives/prim_add.c index 258bcc6ea..4d5525bdc 100644 --- a/libfreerdp/primitives/prim_add.c +++ b/libfreerdp/primitives/prim_add.c @@ -46,12 +46,11 @@ pstatus_t general_add_16s( /* ------------------------------------------------------------------------- */ void primitives_init_add( - const primitives_hints_t *hints, primitives_t *prims) { prims->add_16s = general_add_16s; - primitives_init_add_opt(hints, prims); + primitives_init_add_opt(prims); } /* ------------------------------------------------------------------------- */ diff --git a/libfreerdp/primitives/prim_add.h b/libfreerdp/primitives/prim_add.h index 4ad460279..f1e143cd1 100644 --- a/libfreerdp/primitives/prim_add.h +++ b/libfreerdp/primitives/prim_add.h @@ -24,7 +24,7 @@ pstatus_t general_add_16s(const INT16 *pSrc1, const INT16 *pSrc2, INT16 *pDst, INT32 len); -void primitives_init_add_opt(const primitives_hints_t *hints, primitives_t *prims); +void primitives_init_add_opt(primitives_t *prims); #endif /* !__PRIM_ADD_H_INCLUDED__ */ diff --git a/libfreerdp/primitives/prim_add_opt.c b/libfreerdp/primitives/prim_add_opt.c index 2de0b8fc6..88a4fbc93 100644 --- a/libfreerdp/primitives/prim_add_opt.c +++ b/libfreerdp/primitives/prim_add_opt.c @@ -20,6 +20,7 @@ #include #include +#include #ifdef WITH_SSE2 #include @@ -45,18 +46,15 @@ SSE3_SSD_ROUTINE(sse3_add_16s, INT16, general_add_16s, /* ------------------------------------------------------------------------- */ void primitives_init_add_opt( - const primitives_hints_t *hints, primitives_t *prims) { #ifdef WITH_IPP prims->add_16s = (__add_16s_t) ippsAdd_16s; #elif defined(WITH_SSE2) - if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) - && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) /* for LDDQU */ + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) + && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */ { prims->add_16s = sse3_add_16s; } #endif } - - diff --git a/libfreerdp/primitives/prim_alphaComp.c b/libfreerdp/primitives/prim_alphaComp.c index 24f916770..e48ce9d61 100644 --- a/libfreerdp/primitives/prim_alphaComp.c +++ b/libfreerdp/primitives/prim_alphaComp.c @@ -102,11 +102,11 @@ pstatus_t general_alphaComp_argb( } /* ------------------------------------------------------------------------- */ -void primitives_init_alphaComp(const primitives_hints_t* hints, primitives_t* prims) +void primitives_init_alphaComp(primitives_t* prims) { prims->alphaComp_argb = general_alphaComp_argb; - primitives_init_alphaComp_opt(hints, prims); + primitives_init_alphaComp_opt(prims); } /* ------------------------------------------------------------------------- */ diff --git a/libfreerdp/primitives/prim_alphaComp.h b/libfreerdp/primitives/prim_alphaComp.h index 50591162d..a16335711 100644 --- a/libfreerdp/primitives/prim_alphaComp.h +++ b/libfreerdp/primitives/prim_alphaComp.h @@ -24,7 +24,7 @@ pstatus_t general_alphaComp_argb(const BYTE *pSrc1, INT32 src1Step, const BYTE *pSrc2, INT32 src2Step, BYTE *pDst, INT32 dstStep, INT32 width, INT32 height); -void primitives_init_alphaComp_opt(const primitives_hints_t* hints, primitives_t* prims); +void primitives_init_alphaComp_opt(primitives_t* prims); #endif /* !__PRIM_ALPHACOMP_H_INCLUDED__ */ diff --git a/libfreerdp/primitives/prim_alphaComp_opt.c b/libfreerdp/primitives/prim_alphaComp_opt.c index 5550fcbc1..52e33fbc9 100644 --- a/libfreerdp/primitives/prim_alphaComp_opt.c +++ b/libfreerdp/primitives/prim_alphaComp_opt.c @@ -26,6 +26,7 @@ #include #include +#include #ifdef WITH_SSE2 #include @@ -210,13 +211,13 @@ pstatus_t ipp_alphaComp_argb( #endif /* ------------------------------------------------------------------------- */ -void primitives_init_alphaComp_opt(const primitives_hints_t* hints, primitives_t* prims) +void primitives_init_alphaComp_opt(primitives_t* prims) { #ifdef WITH_IPP prims->alphaComp_argb = ipp_alphaComp_argb; #elif defined(WITH_SSE2) - if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) - && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) /* for LDDQU */ + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) + && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */ { prims->alphaComp_argb = sse2_alphaComp_argb; } diff --git a/libfreerdp/primitives/prim_andor.c b/libfreerdp/primitives/prim_andor.c index 0b8092ff2..4c1923f46 100644 --- a/libfreerdp/primitives/prim_andor.c +++ b/libfreerdp/primitives/prim_andor.c @@ -61,14 +61,13 @@ pstatus_t general_orC_32u( /* ------------------------------------------------------------------------- */ void primitives_init_andor( - const primitives_hints_t *hints, primitives_t *prims) { /* Start with the default. */ prims->andC_32u = general_andC_32u; prims->orC_32u = general_orC_32u; - primitives_init_andor_opt(hints, prims); + primitives_init_andor_opt(prims); } /* ------------------------------------------------------------------------- */ diff --git a/libfreerdp/primitives/prim_andor.h b/libfreerdp/primitives/prim_andor.h index 6a2e7ac46..9762f22aa 100644 --- a/libfreerdp/primitives/prim_andor.h +++ b/libfreerdp/primitives/prim_andor.h @@ -25,7 +25,7 @@ pstatus_t general_andC_32u(const UINT32 *pSrc, UINT32 val, UINT32 *pDst, INT32 len); pstatus_t general_orC_32u(const UINT32 *pSrc, UINT32 val, UINT32 *pDst, INT32 len); -void primitives_init_andor_opt(const primitives_hints_t *hints, primitives_t *prims); +void primitives_init_andor_opt(primitives_t *prims); #endif /* !__PRIM_ANDOR_H_INCLUDED__ */ diff --git a/libfreerdp/primitives/prim_andor_opt.c b/libfreerdp/primitives/prim_andor_opt.c index e0ce1ea5b..8d74f30fa 100644 --- a/libfreerdp/primitives/prim_andor_opt.c +++ b/libfreerdp/primitives/prim_andor_opt.c @@ -19,6 +19,7 @@ #include #include +#include #ifdef WITH_SSE2 #include @@ -45,14 +46,14 @@ SSE3_SCD_PRE_ROUTINE(sse3_orC_32u, UINT32, general_orC_32u, /* ------------------------------------------------------------------------- */ -void primitives_init_andor_opt(const primitives_hints_t *hints, primitives_t *prims) +void primitives_init_andor_opt(primitives_t *prims) { #if defined(WITH_IPP) prims->andC_32u = (__andC_32u_t) ippsAndC_32u; prims->orC_32u = (__orC_32u_t) ippsOrC_32u; #elif defined(WITH_SSE2) - if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) - && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) + && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { prims->andC_32u = sse3_andC_32u; prims->orC_32u = sse3_orC_32u; diff --git a/libfreerdp/primitives/prim_colors.c b/libfreerdp/primitives/prim_colors.c index 179e569b7..aae98e2d8 100644 --- a/libfreerdp/primitives/prim_colors.c +++ b/libfreerdp/primitives/prim_colors.c @@ -215,13 +215,13 @@ pstatus_t general_RGBToRGB_16s8u_P3AC4R( } /* ------------------------------------------------------------------------- */ -void primitives_init_colors(const primitives_hints_t* hints, primitives_t* prims) +void primitives_init_colors(primitives_t* prims) { prims->RGBToRGB_16s8u_P3AC4R = general_RGBToRGB_16s8u_P3AC4R; prims->yCbCrToRGB_16s16s_P3P3 = general_yCbCrToRGB_16s16s_P3P3; prims->RGBToYCbCr_16s16s_P3P3 = general_RGBToYCbCr_16s16s_P3P3; - primitives_init_colors_opt(hints, prims); + primitives_init_colors_opt(prims); } /* ------------------------------------------------------------------------- */ diff --git a/libfreerdp/primitives/prim_colors.h b/libfreerdp/primitives/prim_colors.h index 70f478547..15b76d997 100644 --- a/libfreerdp/primitives/prim_colors.h +++ b/libfreerdp/primitives/prim_colors.h @@ -26,7 +26,7 @@ pstatus_t general_yCbCrToRGB_16s16s_P3P3(const INT16 *pSrc[3], INT32 srcStep, IN pstatus_t general_RGBToYCbCr_16s16s_P3P3(const INT16 *pSrc[3], INT32 srcStep, INT16 *pDst[3], INT32 dstStep, const prim_size_t *roi); pstatus_t general_RGBToRGB_16s8u_P3AC4R(const INT16 *pSrc[3], int srcStep, BYTE *pDst, int dstStep, const prim_size_t *roi); -void primitives_init_colors_opt(const primitives_hints_t* hints, primitives_t* prims); +void primitives_init_colors_opt(primitives_t* prims); #endif /* !__PRIM_COLORS_H_INCLUDED__ */ diff --git a/libfreerdp/primitives/prim_colors_opt.c b/libfreerdp/primitives/prim_colors_opt.c index cfc87414e..3dcd8895b 100644 --- a/libfreerdp/primitives/prim_colors_opt.c +++ b/libfreerdp/primitives/prim_colors_opt.c @@ -23,6 +23,7 @@ #include #include +#include #ifdef WITH_SSE2 #include @@ -542,17 +543,17 @@ pstatus_t neon_yCbCrToRGB_16s16s_P3P3( */ /* ------------------------------------------------------------------------- */ -void primitives_init_colors_opt(const primitives_hints_t* hints, primitives_t* prims) +void primitives_init_colors_opt(primitives_t* prims) { #if defined(WITH_SSE2) - if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R; prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3; prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3; } #elif defined(WITH_NEON) - if (hints->arm_flags & PRIM_ARM_NEON_AVAILABLE) + if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) { prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3; } diff --git a/libfreerdp/primitives/prim_copy.c b/libfreerdp/primitives/prim_copy.c index 4198f2d8f..95755a09c 100644 --- a/libfreerdp/primitives/prim_copy.c +++ b/libfreerdp/primitives/prim_copy.c @@ -148,7 +148,6 @@ static pstatus_t ippiCopy_8u_AC4r( /* ------------------------------------------------------------------------- */ void primitives_init_copy( - const primitives_hints_t *hints, primitives_t *prims) { /* Start with the default. */ diff --git a/libfreerdp/primitives/prim_internal.h b/libfreerdp/primitives/prim_internal.h index 001ab8562..06418fba2 100644 --- a/libfreerdp/primitives/prim_internal.h +++ b/libfreerdp/primitives/prim_internal.h @@ -34,61 +34,43 @@ ? _mm_lddqu_si128((__m128i *) (_ptr_)) \ : _mm_load_si128((__m128i *) (_ptr_))) -/* This structure can (eventually) be used to provide hints to the - * initialization routines, e.g. whether SSE2 or NEON or IPP instructions - * or calls are available. - */ -typedef struct -{ - UINT32 x86_flags; - UINT32 arm_flags; -} primitives_hints_t; - /* Function prototypes for all the init/deinit routines. */ extern void primitives_init_copy( - const primitives_hints_t *hints, primitives_t *prims); extern void primitives_deinit_copy( primitives_t *prims); extern void primitives_init_set( - const primitives_hints_t *hints, primitives_t *prims); extern void primitives_deinit_set( primitives_t *prims); extern void primitives_init_add( - const primitives_hints_t *hints, primitives_t *prims); extern void primitives_deinit_add( primitives_t *prims); extern void primitives_init_andor( - const primitives_hints_t *hints, primitives_t *prims); extern void primitives_deinit_andor( primitives_t *prims); extern void primitives_init_shift( - const primitives_hints_t *hints, primitives_t *prims); extern void primitives_deinit_shift( primitives_t *prims); extern void primitives_init_sign( - const primitives_hints_t *hints, primitives_t *prims); extern void primitives_deinit_sign( primitives_t *prims); extern void primitives_init_alphaComp( - const primitives_hints_t *hints, primitives_t *prims); extern void primitives_deinit_alphaComp( primitives_t *prims); extern void primitives_init_colors( - const primitives_hints_t *hints, primitives_t *prims); extern void primitives_deinit_colors( primitives_t *prims); diff --git a/libfreerdp/primitives/prim_set.c b/libfreerdp/primitives/prim_set.c index 9176c8722..967df7b33 100644 --- a/libfreerdp/primitives/prim_set.c +++ b/libfreerdp/primitives/prim_set.c @@ -111,7 +111,6 @@ pstatus_t general_set_32u( /* ------------------------------------------------------------------------- */ void primitives_init_set( - const primitives_hints_t *hints, primitives_t *prims) { /* Start with the default. */ @@ -120,7 +119,7 @@ void primitives_init_set( prims->set_32u = general_set_32u; prims->zero = general_zero; - primitives_init_set_opt(hints, prims); + primitives_init_set_opt(prims); } /* ------------------------------------------------------------------------- */ diff --git a/libfreerdp/primitives/prim_set.h b/libfreerdp/primitives/prim_set.h index e4504dc2c..0e2be1ea7 100644 --- a/libfreerdp/primitives/prim_set.h +++ b/libfreerdp/primitives/prim_set.h @@ -28,7 +28,7 @@ pstatus_t general_set_32s(INT32 val, INT32 *pDst, INT32 len); pstatus_t general_set_32u(UINT32 val, UINT32 *pDst, INT32 len); -void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prims); +void primitives_init_set_opt(primitives_t *prims); #endif /* !__PRIM_SET_H_INCLUDED__ */ diff --git a/libfreerdp/primitives/prim_set_opt.c b/libfreerdp/primitives/prim_set_opt.c index 0523434ff..08b0f7e5f 100644 --- a/libfreerdp/primitives/prim_set_opt.c +++ b/libfreerdp/primitives/prim_set_opt.c @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef WITH_SSE2 # include @@ -198,7 +199,7 @@ pstatus_t ipp_wrapper_set_32u( #endif /* ------------------------------------------------------------------------- */ -void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prims) +void primitives_init_set_opt(primitives_t *prims) { /* Pick tuned versions if possible. */ #ifdef WITH_IPP @@ -207,7 +208,7 @@ void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prim prims->set_32u = (__set_32u_t) ipp_wrapper_set_32u; prims->zero = (__zero_t) ippsZero_8u; #elif defined(WITH_SSE2) - if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { prims->set_8u = sse2_set_8u; prims->set_32s = sse2_set_32s; diff --git a/libfreerdp/primitives/prim_shift.c b/libfreerdp/primitives/prim_shift.c index bd26dc0a0..e89b2e086 100644 --- a/libfreerdp/primitives/prim_shift.c +++ b/libfreerdp/primitives/prim_shift.c @@ -104,7 +104,6 @@ pstatus_t general_shiftC_16u( /* ------------------------------------------------------------------------- */ void primitives_init_shift( - const primitives_hints_t *hints, primitives_t *prims) { /* Start with the default. */ @@ -117,7 +116,7 @@ void primitives_init_shift( prims->shiftC_16s = general_shiftC_16s; prims->shiftC_16u = general_shiftC_16u; - primitives_init_shift_opt(hints, prims); + primitives_init_shift_opt(prims); } /* ------------------------------------------------------------------------- */ diff --git a/libfreerdp/primitives/prim_shift.h b/libfreerdp/primitives/prim_shift.h index cad054013..a26a5037c 100644 --- a/libfreerdp/primitives/prim_shift.h +++ b/libfreerdp/primitives/prim_shift.h @@ -29,7 +29,7 @@ pstatus_t general_rShiftC_16u(const UINT16 *pSrc, INT32 val, UINT16 *pDst, INT32 pstatus_t general_shiftC_16s(const INT16 *pSrc, INT32 val, INT16 *pDst, INT32 len); pstatus_t general_shiftC_16u(const UINT16 *pSrc, INT32 val, UINT16 *pDst, INT32 len); -void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *prims); +void primitives_init_shift_opt(primitives_t *prims); #endif /* !__PRIM_SHIFT_H_INCLUDED__ */ diff --git a/libfreerdp/primitives/prim_shift_opt.c b/libfreerdp/primitives/prim_shift_opt.c index 0e57da269..9cdb33db7 100644 --- a/libfreerdp/primitives/prim_shift_opt.c +++ b/libfreerdp/primitives/prim_shift_opt.c @@ -19,6 +19,7 @@ #include #include +#include #ifdef WITH_SSE2 #include @@ -58,7 +59,7 @@ SSE3_SCD_ROUTINE(sse2_rShiftC_16u, UINT16, general_rShiftC_16u, */ /* ------------------------------------------------------------------------- */ -void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *prims) +void primitives_init_shift_opt(primitives_t *prims) { #if defined(WITH_IPP) prims->lShiftC_16s = (__lShiftC_16s_t) ippsLShiftC_16s; @@ -66,8 +67,8 @@ void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *pr prims->lShiftC_16u = (__lShiftC_16u_t) ippsLShiftC_16u; prims->rShiftC_16u = (__rShiftC_16u_t) ippsRShiftC_16u; #elif defined(WITH_SSE2) - if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) - && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) + && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { prims->lShiftC_16s = sse2_lShiftC_16s; prims->rShiftC_16s = sse2_rShiftC_16s; diff --git a/libfreerdp/primitives/prim_sign.c b/libfreerdp/primitives/prim_sign.c index d7d2eb018..8b2bfa974 100644 --- a/libfreerdp/primitives/prim_sign.c +++ b/libfreerdp/primitives/prim_sign.c @@ -42,13 +42,12 @@ pstatus_t general_sign_16s( /* ------------------------------------------------------------------------- */ void primitives_init_sign( - const primitives_hints_t *hints, primitives_t *prims) { /* Start with the default. */ prims->sign_16s = general_sign_16s; - primitives_init_sign_opt(hints, prims); + primitives_init_sign_opt(prims); } /* ------------------------------------------------------------------------- */ diff --git a/libfreerdp/primitives/prim_sign.h b/libfreerdp/primitives/prim_sign.h index 3592990ec..f43eca24c 100644 --- a/libfreerdp/primitives/prim_sign.h +++ b/libfreerdp/primitives/prim_sign.h @@ -24,7 +24,7 @@ pstatus_t general_sign_16s(const INT16 *pSrc, INT16 *pDst, INT32 len); -void primitives_init_sign_opt(const primitives_hints_t *hints, primitives_t *prims); +void primitives_init_sign_opt(primitives_t *prims); #endif /* !__PRIM_SIGN_H_INCLUDED__ */ diff --git a/libfreerdp/primitives/prim_sign_opt.c b/libfreerdp/primitives/prim_sign_opt.c index 81842b9bd..643a75b3b 100644 --- a/libfreerdp/primitives/prim_sign_opt.c +++ b/libfreerdp/primitives/prim_sign_opt.c @@ -19,6 +19,7 @@ #include #include +#include #ifdef WITH_SSE2 #include @@ -134,13 +135,13 @@ pstatus_t ssse3_sign_16s( #endif /* WITH_SSE2 */ /* ------------------------------------------------------------------------- */ -void primitives_init_sign_opt(const primitives_hints_t *hints, primitives_t *prims) +void primitives_init_sign_opt(primitives_t *prims) { /* Pick tuned versions if possible. */ /* I didn't spot an IPP version of this. */ #if defined(WITH_SSE2) - if ((hints->x86_flags & PRIM_X86_SSSE3_AVAILABLE) - && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) + && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { prims->sign_16s = ssse3_sign_16s; } diff --git a/libfreerdp/primitives/primitives.c b/libfreerdp/primitives/primitives.c index 245a6ce3b..2bb05d626 100644 --- a/libfreerdp/primitives/primitives.c +++ b/libfreerdp/primitives/primitives.c @@ -22,173 +22,16 @@ #include #include -#include - #include #include "prim_internal.h" -#ifdef __ANDROID__ -#include "cpu-features.h" -#endif - /* Singleton pointer used throughout the program when requested. */ static primitives_t* pPrimitives = NULL; -#define D_BIT_MMX (1<<23) -#define D_BIT_SSE (1<<25) -#define D_BIT_SSE2 (1<<26) -#define D_BIT_3DN (1<<30) -#define C_BIT_SSE3 (1<<0) -#define C_BIT_3DNP (1<<8) -#define C_BIT_SSSE3 (1<<9) -#define C_BIT_SSE41 (1<<19) -#define C_BIT_SSE42 (1<<20) -#define C_BIT_XGETBV (1<<27) -#define C_BIT_AVX (1<<28) -#define C_BITS_AVX (C_BIT_XGETBV|C_BIT_AVX) -#define E_BIT_XMM (1<<1) -#define E_BIT_YMM (1<<2) -#define E_BITS_AVX (E_BIT_XMM|E_BIT_YMM) -#define C_BIT_FMA (1<<11) -#define C_BIT_AVX_AES (1<<24) - -/* If x86 */ -#if defined(_M_IX86_AMD64) - -/* If GCC */ -#ifdef __GNUC__ - -#ifdef __AVX__ -#define xgetbv(_func_, _lo_, _hi_) \ - __asm__ __volatile__ ("xgetbv" : "=a" (_lo_), "=d" (_hi_) : "c" (_func_)) -#endif - -static void cpuid( - unsigned info, - unsigned *eax, - unsigned *ebx, - unsigned *ecx, - unsigned *edx) -{ - *eax = *ebx = *ecx = *edx = 0; - - __asm volatile - ( - /* The EBX (or RBX register on x86_64) is used for the PIC base address - * and must not be corrupted by our inline assembly. - */ -#ifdef _M_IX86 - "mov %%ebx, %%esi;" - "cpuid;" - "xchg %%ebx, %%esi;" -#else - "mov %%rbx, %%rsi;" - "cpuid;" - "xchg %%rbx, %%rsi;" -#endif - : "=a" (*eax), "=S" (*ebx), "=c" (*ecx), "=d" (*edx) - : "0" (info) - ); -} - -static void set_hints(primitives_hints_t* hints) -{ - unsigned a, b, c, d; - - cpuid(1, &a, &b, &c, &d); - - if (d & D_BIT_MMX) - hints->x86_flags |= PRIM_X86_MMX_AVAILABLE; - if (d & D_BIT_SSE) - hints->x86_flags |= PRIM_X86_SSE_AVAILABLE; - if (d & D_BIT_SSE2) - hints->x86_flags |= PRIM_X86_SSE2_AVAILABLE; - if (d & D_BIT_3DN) - hints->x86_flags |= PRIM_X86_3DNOW_AVAILABLE; - if (c & C_BIT_3DNP) - hints->x86_flags |= PRIM_X86_3DNOW_PREFETCH_AVAILABLE; - if (c & C_BIT_SSE3) - hints->x86_flags |= PRIM_X86_SSE3_AVAILABLE; - if (c & C_BIT_SSSE3) - hints->x86_flags |= PRIM_X86_SSSE3_AVAILABLE; - if (c & C_BIT_SSE41) - hints->x86_flags |= PRIM_X86_SSE41_AVAILABLE; - if (c & C_BIT_SSE42) - hints->x86_flags |= PRIM_X86_SSE42_AVAILABLE; - -#ifdef __AVX__ - if ((c & C_BITS_AVX) == C_BITS_AVX) - { - int e, f; - xgetbv(0, e, f); - - if ((e & E_BITS_AVX) == E_BITS_AVX) - { - hints->x86_flags |= PRIM_X86_AVX_AVAILABLE; - - if (c & C_BIT_FMA) - hints->x86_flags |= PRIM_X86_FMA_AVAILABLE; - if (c & C_BIT_AVX_AES) - hints->x86_flags |= PRIM_X86_AVX_AES_AVAILABLE; - } - } - /* TODO: AVX2: set eax=7, ecx=0, cpuid, check ebx-bit5 */ -#endif -} - -#else - -static void set_hints(primitives_hints_t* hints) -{ - /* x86 non-GCC: TODO */ -} - -#endif /* __GNUC__ */ - -/* ------------------------------------------------------------------------- */ - -#elif defined(_M_ARM) - -static UINT32 getNeonSupport(void) -{ -#ifdef __ANDROID__ - if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM) return 0; - - UINT64 features = android_getCpuFeatures(); - - if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7)) - { - if (features & ANDROID_CPU_ARM_FEATURE_NEON) - { - return PRIM_ARM_NEON_AVAILABLE; - } - } -#elif defined(__APPLE) - /* assume NEON support on iOS devices */ - return PRIM_ARM_NEON_AVAILABLE; -#endif - return 0; -} - -static void set_hints(primitives_hints_t* hints) -{ - /* ARM: TODO */ - hints->arm_flags |= getNeonSupport(); -} - -#else -static void set_hints( - primitives_hints_t *hints) -{ -} -#endif /* x86 else ARM else */ - /* ------------------------------------------------------------------------- */ void primitives_init(void) { - primitives_hints_t* hints; - if (pPrimitives == NULL) { pPrimitives = calloc(1, sizeof(primitives_t)); @@ -197,19 +40,15 @@ void primitives_init(void) return; } - hints = calloc(1, sizeof(primitives_hints_t)); - set_hints(hints); - pPrimitives->hints = (void *) hints; - /* Now call each section's initialization routine. */ - primitives_init_add(hints, pPrimitives); - primitives_init_andor(hints, pPrimitives); - primitives_init_alphaComp(hints, pPrimitives); - primitives_init_copy(hints, pPrimitives); - primitives_init_set(hints, pPrimitives); - primitives_init_shift(hints, pPrimitives); - primitives_init_sign(hints, pPrimitives); - primitives_init_colors(hints, pPrimitives); + primitives_init_add(pPrimitives); + primitives_init_andor(pPrimitives); + primitives_init_alphaComp(pPrimitives); + primitives_init_copy(pPrimitives); + primitives_init_set(pPrimitives); + primitives_init_shift(pPrimitives); + primitives_init_sign(pPrimitives); + primitives_init_colors(pPrimitives); } /* ------------------------------------------------------------------------- */ @@ -221,102 +60,6 @@ primitives_t* primitives_get(void) return pPrimitives; } -/* ------------------------------------------------------------------------- */ -UINT32 primitives_get_flags(const primitives_t* prims) -{ - primitives_hints_t* hints = (primitives_hints_t*) (prims->hints); - -#if defined(_M_IX86_AMD64) - return hints->x86_flags; -#elif defined(_M_ARM) - return hints->arm_flags; -#else - return 0; -#endif -} - -/* ------------------------------------------------------------------------- */ - -typedef struct -{ - UINT32 flag; - const char *str; -} flagpair_t; - -static const flagpair_t x86_flags[] = -{ - { PRIM_X86_MMX_AVAILABLE, "MMX" }, - { PRIM_X86_3DNOW_AVAILABLE, "3DNow" }, - { PRIM_X86_3DNOW_PREFETCH_AVAILABLE, "3DNow-PF" }, - { PRIM_X86_SSE_AVAILABLE, "SSE" }, - { PRIM_X86_SSE2_AVAILABLE, "SSE2" }, - { PRIM_X86_SSE3_AVAILABLE, "SSE3" }, - { PRIM_X86_SSSE3_AVAILABLE, "SSSE3" }, - { PRIM_X86_SSE41_AVAILABLE, "SSE4.1" }, - { PRIM_X86_SSE42_AVAILABLE, "SSE4.2" }, - { PRIM_X86_AVX_AVAILABLE, "AVX" }, - { PRIM_X86_FMA_AVAILABLE, "FMA" }, - { PRIM_X86_AVX_AES_AVAILABLE, "AVX-AES" }, - { PRIM_X86_AVX2_AVAILABLE, "AVX2" }, -}; - -static const flagpair_t arm_flags[] = -{ - { PRIM_ARM_VFP1_AVAILABLE, "VFP1" }, - { PRIM_ARM_VFP2_AVAILABLE, "VFP2" }, - { PRIM_ARM_VFP3_AVAILABLE, "VFP3" }, - { PRIM_ARM_VFP4_AVAILABLE, "VFP4" }, - { PRIM_ARM_FPA_AVAILABLE, "FPA" }, - { PRIM_ARM_FPE_AVAILABLE, "FPE" }, - { PRIM_ARM_IWMMXT_AVAILABLE, "IWMMXT" }, - { PRIM_ARM_NEON_AVAILABLE, "NEON" }, -}; - -void primitives_flags_str(const primitives_t* prims, char* str, size_t len) -{ - int i; - primitives_hints_t* hints; - - *str = '\0'; - --len; /* for the '/0' */ - - hints = (primitives_hints_t*) (prims->hints); - - for (i = 0; i < sizeof(x86_flags) / sizeof(flagpair_t); ++i) - { - if (hints->x86_flags & x86_flags[i].flag) - { - int slen = strlen(x86_flags[i].str) + 1; - - if (len < slen) - break; - - if (*str != '\0') - strcat(str, " "); - - strcat(str, x86_flags[i].str); - len -= slen; - } - } - - for (i = 0; i < sizeof(arm_flags) / sizeof(flagpair_t); ++i) - { - if (hints->arm_flags & arm_flags[i].flag) - { - int slen = strlen(arm_flags[i].str) + 1; - - if (len < slen) - break; - - if (*str != '\0') - strcat(str, " "); - - strcat(str, arm_flags[i].str); - len -= slen; - } - } -} - /* ------------------------------------------------------------------------- */ void primitives_deinit(void) { @@ -333,9 +76,6 @@ void primitives_deinit(void) primitives_deinit_sign(pPrimitives); primitives_deinit_colors(pPrimitives); - if (pPrimitives->hints != NULL) - free((void*) (pPrimitives->hints)); - free((void*) pPrimitives); pPrimitives = NULL; } diff --git a/libfreerdp/primitives/test/CMakeLists.txt b/libfreerdp/primitives/test/CMakeLists.txt index 7030ac7f8..738329152 100644 --- a/libfreerdp/primitives/test/CMakeLists.txt +++ b/libfreerdp/primitives/test/CMakeLists.txt @@ -31,28 +31,11 @@ set(PRIMITIVE_TEST_CFILES test_set.c test_shift.c test_sign.c - ../prim_add.c - ../prim_andor.c - ../prim_alphaComp.c - ../prim_colors.c - ../prim_copy.c - ../prim_set.c - ../prim_shift.c - ../prim_sign.c - ../prim_add_opt.c - ../prim_alphaComp_opt.c - ../prim_andor_opt.c - ../prim_colors_opt.c - ../prim_set_opt.c - ../prim_shift_opt.c - ../prim_sign_opt.c - ../primitives.c ) set(PRIMITIVE_TEST_HEADERS measure.h prim_test.h - ../prim_internal.h ) set(PRIMITIVE_TEST_SRCS @@ -138,7 +121,7 @@ endif() set_property(SOURCE ${PRIMITIVE_TEST_CFILES} PROPERTY COMPILE_FLAGS ${OPTFLAGS}) -target_link_libraries(prim_test rt) +target_link_libraries(prim_test freerdp-primitives rt winpr-sysinfo) if(NOT TESTING_OUTPUT_DIRECTORY) set(TESTING_OUTPUT_DIRECTORY .) endif() diff --git a/libfreerdp/primitives/test/prim_test.c b/libfreerdp/primitives/test/prim_test.c index 172d6ffd4..7e88efbc9 100644 --- a/libfreerdp/primitives/test/prim_test.c +++ b/libfreerdp/primitives/test/prim_test.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #ifdef HAVE_UNISTD_H #include @@ -32,6 +34,88 @@ int test_sizes[] = { 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 }; int Quiet = 0; + + +/* ------------------------------------------------------------------------- */ +typedef struct +{ + UINT32 flag; + const char *str; +} flagpair_t; + +static const flagpair_t flags[] = +#ifdef _M_IX86_AMD64 +{ + { PF_MMX_INSTRUCTIONS_AVAILABLE, "MMX" }, + { PF_3DNOW_INSTRUCTIONS_AVAILABLE, "3DNow" }, + { PF_XMMI_INSTRUCTIONS_AVAILABLE, "SSE" }, + { PF_XMMI64_INSTRUCTIONS_AVAILABLE, "SSE2" }, + { PF_SSE3_INSTRUCTIONS_AVAILABLE, "SSE3" }, +#elif defined(_M_ARM) + { PF_ARM_VFP3, "VFP3" }, + { PF_ARM_INTEL_WMMX, "IWMMXT" }, + { PF_ARM_NEON_INSTRUCTIONS_AVAILABLE, "NEON" }, +#endif +}; + +static const flagpair_t flags_extended[] = +{ +#ifdef _M_IX86_AMD64 + { PF_EX_3DNOW_PREFETCH, "3DNow-PF" }, + { PF_EX_SSSE3, "SSSE3" }, + { PF_EX_SSE41, "SSE4.1" }, + { PF_EX_SSE42, "SSE4.2" }, + { PF_EX_AVX, "AVX" }, + { PF_EX_FMA, "FMA" }, + { PF_EX_AVX_AES, "AVX-AES" }, + { PF_EX_AVX2, "AVX2" }, +#elif defined(_M_ARM) + { PF_EX_ARM_VFP1, "VFP1"}, + { PF_EX_ARM_VFP4, "VFP4" }, +#endif +}; + +void primitives_flags_str(char* str, size_t len) +{ + int i; + + *str = '\0'; + --len; /* for the '/0' */ + + for (i = 0; i < sizeof(flags) / sizeof(flagpair_t); ++i) + { + if (IsProcessorFeaturePresent(flags[i].flag)) + { + int slen = strlen(flags[i].str) + 1; + + if (len < slen) + break; + + if (*str != '\0') + strcat(str, " "); + + strcat(str, flags[i].str); + len -= slen; + } + } + for (i = 0; i < sizeof(flags_extended) / sizeof(flagpair_t); ++i) + { + if (IsProcessorFeaturePresent(flags_extended[i].flag)) + { + int slen = strlen(flags_extended[i].str) + 1; + + if (len < slen) + break; + + if (*str != '\0') + strcat(str, " "); + + strcat(str, flags_extended[i].str); + len -= slen; + } + } +} + /* ------------------------------------------------------------------------- */ static void get_random_data_lrand( void *buffer, @@ -198,7 +282,7 @@ static const test_t testTypeList[] = int main(int argc, char** argv) { int i; - char hints[256]; + char hints[1024]; UINT32 testSet = 0; UINT32 testTypes = 0; int results = SUCCESS; @@ -253,7 +337,7 @@ int main(int argc, char** argv) primitives_init(); - primitives_flags_str(primitives_get(), hints, sizeof(hints)); + primitives_flags_str(hints, sizeof(hints)); printf("Hints: %s\n", hints); /* COPY */ diff --git a/libfreerdp/primitives/test/prim_test.h b/libfreerdp/primitives/test/prim_test.h index 9c4d3d872..fa61025f0 100644 --- a/libfreerdp/primitives/test/prim_test.h +++ b/libfreerdp/primitives/test/prim_test.h @@ -29,6 +29,7 @@ #include #include +#include #ifdef WITH_IPP #include @@ -121,8 +122,8 @@ extern int test_or_32u_speed(void); } \ } while (0) -#if defined(i386) && defined(WITH_SSE2) -#define DO_SSE_MEASUREMENTS(_funcSSE_, _prework_) \ +#if (defined(_M_IX86_AMD64) && defined(WITH_SSE2)) || (defined(arm) && defined(WITH_NEON)) +#define DO_OPT_MEASUREMENTS(_funcOpt_, _prework_) \ do { \ for (s=0; s 0.0) _floatprint(resultNormal[s], sN); \ - if (resultSSENeon[s] > 0.0) \ + if (resultOpt[s] > 0.0) \ { \ - _floatprint(resultSSENeon[s], sSN); \ + _floatprint(resultOpt[s], sSN); \ if (resultNormal[s] > 0.0) \ { \ sprintf(sSNp, "%d%%", \ - (int) (resultSSENeon[s] / resultNormal[s] * 100.0 + 0.5)); \ + (int) (resultOpt[s] / resultNormal[s] * 100.0 + 0.5)); \ } \ } \ if (resultIPP[s] > 0.0) \ @@ -244,7 +229,7 @@ static void _name_( \ printf("%8d: %15s %15s %5s %15s %5s\n", \ size_array[s], sN, sSN, sSNp, sIPP, sIPPp); \ } \ - free(resultNormal); free(resultSSENeon); free(resultIPP); \ + free(resultNormal); free(resultOpt); free(resultIPP); \ } #endif // !__PRIMTEST_H_INCLUDED__ diff --git a/libfreerdp/primitives/test/test_add.c b/libfreerdp/primitives/test/test_add.c index bcdd643b7..083c37907 100644 --- a/libfreerdp/primitives/test/test_add.c +++ b/libfreerdp/primitives/test/test_add.c @@ -16,6 +16,7 @@ #include "config.h" #endif +#include #include "prim_test.h" #define FUNC_TEST_SIZE 65536 @@ -35,7 +36,6 @@ int test_add16s_func(void) int failed = 0; int i; char testStr[256]; - UINT32 pflags = primitives_get_flags(primitives_get()); testStr[0] = '\0'; get_random_data(src1, sizeof(src1)); @@ -44,7 +44,7 @@ int test_add16s_func(void) memset(d2, 0, sizeof(d2)); general_add_16s(src1+1, src2+1, d1+1, FUNC_TEST_SIZE); #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSE3_AVAILABLE) + if(IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE3"); /* Aligned */ @@ -91,8 +91,7 @@ int test_add16s_func(void) /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(add16s_speed_test, INT16, INT16, dst=dst, TRUE, general_add_16s(src1, src2, dst, size), - TRUE, sse3_add_16s(src1, src2, dst, size), PRIM_X86_SSE3_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse3_add_16s(src1, src2, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ippsAdd_16s(src1, src2, dst, size)); int test_add16s_speed(void) diff --git a/libfreerdp/primitives/test/test_alphaComp.c b/libfreerdp/primitives/test/test_alphaComp.c index a39157acd..2d8285f27 100644 --- a/libfreerdp/primitives/test/test_alphaComp.c +++ b/libfreerdp/primitives/test/test_alphaComp.c @@ -15,6 +15,7 @@ #ifdef HAVE_CONFIG_H #include "config.h" #endif +#include #include "prim_test.h" @@ -110,7 +111,6 @@ int test_alphaComp_func(void) UINT32 ALIGN(dst2u[DST_WIDTH*DST_HEIGHT+1]); UINT32 ALIGN(dst3[DST_WIDTH*DST_HEIGHT]); int error = 0; - UINT32 pflags = primitives_get_flags(primitives_get()); char testStr[256]; UINT32 *ptr; int i, x, y; @@ -133,7 +133,7 @@ int test_alphaComp_func(void) (const BYTE *) src2, 4*SRC2_WIDTH, (BYTE *) dst1, 4*DST_WIDTH, TEST_WIDTH, TEST_HEIGHT); #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); sse2_alphaComp_argb((const BYTE *) src1, 4*SRC1_WIDTH, @@ -166,7 +166,7 @@ int test_alphaComp_func(void) error = 1; } #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { UINT32 c2 = *PIXEL(dst2a, 4*DST_WIDTH, x, y); if (colordist(c0, c2) > TOLERANCE) @@ -207,8 +207,7 @@ STD_SPEED_TEST(alphaComp_speed, BYTE, BYTE, int bytes = size*4, TRUE, general_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes, size, size), TRUE, sse2_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes, - size, size), PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + size, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ipp_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes, size, size)); diff --git a/libfreerdp/primitives/test/test_andor.c b/libfreerdp/primitives/test/test_andor.c index 54e1ead9f..6e8b3d8a3 100644 --- a/libfreerdp/primitives/test/test_andor.c +++ b/libfreerdp/primitives/test/test_andor.c @@ -15,6 +15,7 @@ #ifdef HAVE_CONFIG_H #include "config.h" #endif +#include #include "prim_test.h" @@ -39,7 +40,6 @@ int test_and_32u_func(void) UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]); int failed = 0; int i; - UINT32 pflags = primitives_get_flags(primitives_get()); char testStr[256]; testStr[0] = '\0'; @@ -56,7 +56,7 @@ int test_and_32u_func(void) } } #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSE3_AVAILABLE) + if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE3"); /* Aligned */ @@ -92,8 +92,7 @@ int test_and_32u_func(void) /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(andC_32u_speed_test, UINT32, UINT32, dst=dst, TRUE, general_andC_32u(src1, constant, dst, size), - TRUE, sse3_andC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse3_andC_32u(src1, constant, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ippsAndC_32u(src1, constant, dst, size)) int test_and_32u_speed(void) @@ -113,7 +112,6 @@ int test_or_32u_func(void) UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]); int failed = 0; int i; - UINT32 pflags = primitives_get_flags(primitives_get()); char testStr[256]; testStr[0] = '\0'; @@ -130,7 +128,7 @@ int test_or_32u_func(void) } } #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSE3_AVAILABLE) + if(IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE3"); /* Aligned */ @@ -166,8 +164,7 @@ int test_or_32u_func(void) /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(orC_32u_speed_test, UINT32, UINT32, dst=dst, TRUE, general_orC_32u(src1, constant, dst, size), - TRUE, sse3_orC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse3_orC_32u(src1, constant, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ippsOrC_32u(src1, constant, dst, size)) int test_or_32u_speed(void) diff --git a/libfreerdp/primitives/test/test_colors.c b/libfreerdp/primitives/test/test_colors.c index d86d76e12..e5192c035 100644 --- a/libfreerdp/primitives/test/test_colors.c +++ b/libfreerdp/primitives/test/test_colors.c @@ -16,6 +16,7 @@ #include "config.h" #endif +#include #include "prim_test.h" static const int RGB_TRIAL_ITERATIONS = 1000; @@ -38,7 +39,6 @@ int test_RGBToRGB_16s8u_P3AC4R_func(void) UINT32 ALIGN(out1[4096]), ALIGN(out2[4096]); int i; int failed = 0; - UINT32 pflags = primitives_get_flags(primitives_get()); char testStr[256]; INT16 *ptrs[3]; prim_size_t roi = { 64, 64 }; @@ -62,7 +62,7 @@ int test_RGBToRGB_16s8u_P3AC4R_func(void) general_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2, (BYTE *) out1, 64*4, &roi); #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); sse2_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2, @@ -90,8 +90,7 @@ STD_SPEED_TEST( (const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64), TRUE, sse2_RGBToRGB_16s8u_P3AC4R( (const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64), - PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, FALSE, dst=dst); int test_RGBToRGB_16s8u_P3AC4R_speed(void) @@ -131,7 +130,6 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void) INT16 ALIGN(r2[4096]), ALIGN(g2[4096]), ALIGN(b2[4096]); int i; int failed = 0; - UINT32 pflags = primitives_get_flags(primitives_get()); char testStr[256]; const INT16 *in[3]; INT16 *out1[3]; @@ -168,7 +166,7 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void) general_yCbCrToRGB_16s16s_P3P3(in, 64*2, out1, 64*2, &roi); #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); sse2_yCbCrToRGB_16s16s_P3P3(in, 64*2, out2, 64*2, &roi); @@ -193,8 +191,7 @@ STD_SPEED_TEST( ycbcr_to_rgb_speed, INT16*, INT16*, dst=dst, TRUE, general_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64), TRUE, sse2_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64), - PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, FALSE, dst=dst); int test_yCbCrToRGB_16s16s_P3P3_speed(void) diff --git a/libfreerdp/primitives/test/test_copy.c b/libfreerdp/primitives/test/test_copy.c index d92af5300..5f2ed9103 100644 --- a/libfreerdp/primitives/test/test_copy.c +++ b/libfreerdp/primitives/test/test_copy.c @@ -16,6 +16,7 @@ #include "config.h" #endif +#include #include "prim_test.h" static const int MEMCPY_PRETEST_ITERATIONS = 1000000; @@ -70,8 +71,7 @@ int test_copy8u_func(void) /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(copy8u_speed_test, BYTE, BYTE, dst=dst, TRUE, memcpy(dst, src1, size), - FALSE, NULL, 0, - FALSE, NULL, 0, + FALSE, NULL, 0, FALSE, TRUE, ippsCopy_8u(src1, dst, size)); int test_copy8u_speed(void) diff --git a/libfreerdp/primitives/test/test_set.c b/libfreerdp/primitives/test/test_set.c index 597f76678..0343674f5 100644 --- a/libfreerdp/primitives/test/test_set.c +++ b/libfreerdp/primitives/test/test_set.c @@ -16,6 +16,7 @@ #include "config.h" #endif +#include #include "prim_test.h" static const int MEMSET8_PRETEST_ITERATIONS = 100000000; @@ -40,12 +41,11 @@ int test_set8u_func(void) int failed = 0; int off; char testStr[256]; - UINT32 pflags = primitives_get_flags(primitives_get()); testStr[0] = '\0'; #ifdef _M_IX86_AMD64 /* Test SSE under various alignments */ - if (pflags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); for (off=0; off<16; ++off) @@ -101,8 +101,7 @@ int test_set8u_func(void) /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(set8u_speed_test, BYTE, BYTE, dst=dst, TRUE, memset(dst, constant, size), - FALSE, NULL, 0, - FALSE, NULL, 0, + FALSE, NULL, 0, FALSE, TRUE, ippsSet_8u(constant, dst, size)); int test_set8u_speed(void) @@ -116,17 +115,15 @@ int test_set8u_speed(void) /* ------------------------------------------------------------------------- */ int test_set32s_func(void) { - primitives_t* prims = primitives_get(); INT32 ALIGN(dest[512]); int failed = 0; int off; char testStr[256]; - UINT32 pflags = primitives_get_flags(prims); testStr[0] = '\0'; #ifdef _M_IX86_AMD64 /* Test SSE under various alignments */ - if (pflags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); for (off=0; off<16; ++off) { @@ -179,17 +176,15 @@ int test_set32s_func(void) /* ------------------------------------------------------------------------- */ int test_set32u_func(void) { - primitives_t* prims = primitives_get(); UINT32 ALIGN(dest[512]); int failed = 0; int off; char testStr[256]; - UINT32 pflags = primitives_get_flags(prims); testStr[0] = '\0'; #ifdef _M_IX86_AMD64 /* Test SSE under various alignments */ - if (pflags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); for (off=0; off<16; ++off) { @@ -251,8 +246,7 @@ static inline void memset32u_naive( /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(set32u_speed_test, UINT32, UINT32, dst=dst, TRUE, memset32u_naive(constant, dst, size), - TRUE, sse2_set_32u(constant, dst, size), PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse2_set_32u(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ipp_wrapper_set_32u(constant, dst, size)); int test_set32u_speed(void) @@ -280,8 +274,7 @@ static inline void memset32s_naive( /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(set32s_speed_test, INT32, INT32, dst=dst, TRUE, memset32s_naive(constant, dst, size), - TRUE, sse2_set_32s(constant, dst, size), PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse2_set_32s(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ippsSet_32s(constant, dst, size)); int test_set32s_speed(void) diff --git a/libfreerdp/primitives/test/test_shift.c b/libfreerdp/primitives/test/test_shift.c index 1b8a5c12d..d72407e13 100644 --- a/libfreerdp/primitives/test/test_shift.c +++ b/libfreerdp/primitives/test/test_shift.c @@ -16,6 +16,7 @@ #include "config.h" #endif +#include #include "prim_test.h" #define FUNC_TEST_SIZE 65536 @@ -55,12 +56,11 @@ int _name_(void) \ ALIGN(d1[FUNC_TEST_SIZE+3]), ALIGN(d2[FUNC_TEST_SIZE+3]); \ int failed = 0; \ int i; \ - UINT32 pflags = primitives_get_flags(primitives_get()); \ char testStr[256]; \ testStr[0] = '\0'; \ get_random_data(src, sizeof(src)); \ _f1_(src+1, 3, d1+1, FUNC_TEST_SIZE); \ - if (pflags & PRIM_X86_SSE3_AVAILABLE) \ + if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) \ { \ strcat(testStr, " SSE3"); \ /* Aligned */ \ @@ -109,23 +109,19 @@ SHIFT_TEST_FUNC(test_rShift_16u_func, UINT16, "rshift_16u", general_rShiftC_16u, /* ========================================================================= */ STD_SPEED_TEST(speed_lShift_16s, INT16, INT16, dst=dst, TRUE, general_lShiftC_16s(src1, constant, dst, size), - TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ippsLShiftC_16s(src1, constant, dst, size)); STD_SPEED_TEST(speed_lShift_16u, UINT16, UINT16, dst=dst, TRUE, general_lShiftC_16u(src1, constant, dst, size), - TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ippsLShiftC_16u(src1, constant, dst, size)); STD_SPEED_TEST(speed_rShift_16s, INT16, INT16, dst=dst, TRUE, general_rShiftC_16s(src1, constant, dst, size), - TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ippsRShiftC_16s(src1, constant, dst, size)); STD_SPEED_TEST(speed_rShift_16u, UINT16, UINT16, dst=dst, TRUE, general_rShiftC_16u(src1, constant, dst, size), - TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ippsRShiftC_16u(src1, constant, dst, size)); /* ------------------------------------------------------------------------- */ diff --git a/libfreerdp/primitives/test/test_sign.c b/libfreerdp/primitives/test/test_sign.c index 012303544..99f5a60d4 100644 --- a/libfreerdp/primitives/test/test_sign.c +++ b/libfreerdp/primitives/test/test_sign.c @@ -16,6 +16,7 @@ #include "config.h" #endif +#include #include "prim_test.h" static const int SIGN_PRETEST_ITERATIONS = 100000; @@ -30,7 +31,6 @@ int test_sign16s_func(void) INT16 ALIGN(src[65535]), ALIGN(d1[65535]), ALIGN(d2[65535]); int failed = 0; int i; - UINT32 pflags = primitives_get_flags(primitives_get()); char testStr[256]; /* Test when we can reach 16-byte alignment */ @@ -38,7 +38,7 @@ int test_sign16s_func(void) get_random_data(src, sizeof(src)); general_sign_16s(src+1, d1+1, 65535); #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSSE3_AVAILABLE) + if (IsProcessorFeaturePresentEx(PF_EX_SSSE3)) { strcat(testStr, " SSSE3"); ssse3_sign_16s(src+1, d2+1, 65535); @@ -58,7 +58,7 @@ int test_sign16s_func(void) get_random_data(src, sizeof(src)); general_sign_16s(src+1, d1+2, 65535); #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSSE3_AVAILABLE) + if (IsProcessorFeaturePresentEx(PF_EX_SSSE3)) { ssse3_sign_16s(src+1, d2+2, 65535); for (i=2; i<65535; ++i) @@ -79,8 +79,7 @@ int test_sign16s_func(void) /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(sign16s_speed_test, INT16, INT16, dst=dst, TRUE, general_sign_16s(src1, dst, size), - TRUE, ssse3_sign_16s(src1, dst, size), PRIM_X86_SSSE3_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, ssse3_sign_16s(src1, dst, size), PF_EX_SSSE3, TRUE, FALSE, dst=dst); int test_sign16s_speed(void)