mirror of
https://github.com/morgan9e/FreeRDP
synced 2026-04-14 00:14:11 +09:00
[codec] log primitives used
This commit is contained in:
@@ -21,6 +21,7 @@
|
||||
#include <winpr/platform.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
#include <freerdp/config.h>
|
||||
#include <freerdp/codec/nsc.h>
|
||||
#include <freerdp/log.h>
|
||||
|
||||
#include "../nsc_types.h"
|
||||
@@ -36,5 +37,7 @@ void nsc_init_neon_int(WINPR_ATTR_UNUSED NSC_CONTEXT* WINPR_RESTRICT context)
|
||||
{
|
||||
#if defined(NEON_INTRINSICS_ENABLED)
|
||||
WLog_WARN(TAG, "TODO: Implement neon optimized version of this function");
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or NEON intrinsics not available");
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -526,7 +526,7 @@ static void rfx_dwt_2d_extrapolate_decode_neon(INT16* buffer, INT16* temp)
|
||||
void rfx_init_neon_int(RFX_CONTEXT* WINPR_RESTRICT context)
|
||||
{
|
||||
#if defined(NEON_INTRINSICS_ENABLED)
|
||||
DEBUG_RFX("Using NEON optimizations");
|
||||
WLog_VRB(PRIM_TAG, "NEON optimizations");
|
||||
PROFILER_RENAME(context->priv->prof_rfx_ycbcr_to_rgb, "rfx_decode_YCbCr_to_RGB_NEON");
|
||||
PROFILER_RENAME(context->priv->prof_rfx_quantization_decode, "rfx_quantization_decode_NEON");
|
||||
PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode_NEON");
|
||||
@@ -534,6 +534,7 @@ void rfx_init_neon_int(RFX_CONTEXT* WINPR_RESTRICT context)
|
||||
context->dwt_2d_decode = rfx_dwt_2d_decode_NEON;
|
||||
context->dwt_2d_extrapolate_decode = rfx_dwt_2d_extrapolate_decode_neon;
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or NEON intrinsics not available");
|
||||
WINPR_UNUSED(context);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -393,9 +393,11 @@ static BOOL nsc_encode_sse2(NSC_CONTEXT* WINPR_RESTRICT context, const BYTE* WIN
|
||||
void nsc_init_sse2_int(NSC_CONTEXT* WINPR_RESTRICT context)
|
||||
{
|
||||
#if defined(SSE_AVX_INTRINSICS_ENABLED)
|
||||
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
|
||||
PROFILER_RENAME(context->priv->prof_nsc_encode, "nsc_encode_sse2")
|
||||
context->encode = nsc_encode_sse2;
|
||||
#else
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE2 intrinsics not available");
|
||||
WINPR_UNUSED(context);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -454,6 +454,7 @@ static void rfx_dwt_2d_encode_sse2(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RE
|
||||
void rfx_init_sse2_int(RFX_CONTEXT* WINPR_RESTRICT context)
|
||||
{
|
||||
#if defined(SSE_AVX_INTRINSICS_ENABLED)
|
||||
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
|
||||
PROFILER_RENAME(context->priv->prof_rfx_quantization_decode, "rfx_quantization_decode_sse2")
|
||||
PROFILER_RENAME(context->priv->prof_rfx_quantization_encode, "rfx_quantization_encode_sse2")
|
||||
PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode_sse2")
|
||||
@@ -464,5 +465,6 @@ void rfx_init_sse2_int(RFX_CONTEXT* WINPR_RESTRICT context)
|
||||
context->dwt_2d_encode = rfx_dwt_2d_encode_sse2;
|
||||
#else
|
||||
WINPR_UNUSED(context);
|
||||
WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE2 intrinsics not available");
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -21,6 +21,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <freerdp/config.h>
|
||||
#include <freerdp/log.h>
|
||||
|
||||
#define PRIM_TAG FREERDP_TAG("primitives")
|
||||
|
||||
/* https://sourceforge.net/p/predef/wiki/Architectures/
|
||||
*
|
||||
|
||||
@@ -23,12 +23,8 @@
|
||||
#include <freerdp/primitives.h>
|
||||
#include <freerdp/api.h>
|
||||
|
||||
#include <freerdp/log.h>
|
||||
|
||||
#include "../core/simd.h"
|
||||
|
||||
#define PRIM_TAG FREERDP_TAG("primitives")
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define PRIM_ALIGN_128 __attribute__((aligned(16)))
|
||||
#else
|
||||
|
||||
@@ -208,29 +208,29 @@ sse2_yCbCrToRGB_16s8u_P3AC4R_BGRX(const INT16* WINPR_RESTRICT pSrc[3],
|
||||
/* The comments below pretend these are 8-byte registers
|
||||
* rather than 16-byte, for readability.
|
||||
*/
|
||||
__m128i R0 = b1; /* R0 = 00B300B200B100B0 */
|
||||
__m128i R1 = b2; /* R1 = 00B700B600B500B4 */
|
||||
R0 = _mm_packus_epi16(R0, R1); /* R0 = B7B6B5B4B3B2B1B0 */
|
||||
R1 = g1; /* R1 = 00G300G200G100G0 */
|
||||
__m128i R2 = g2; /* R2 = 00G700G600G500G4 */
|
||||
R1 = _mm_packus_epi16(R1, R2); /* R1 = G7G6G5G4G3G2G1G0 */
|
||||
R2 = R1; /* R2 = G7G6G5G4G3G2G1G0 */
|
||||
R2 = _mm_unpacklo_epi8(R0, R2); /* R2 = B3G3B2G2B1G1B0G0 */
|
||||
R1 = _mm_unpackhi_epi8(R0, R1); /* R1 = B7G7B6G6B5G5B4G4 */
|
||||
R0 = r1; /* R0 = 00R300R200R100R0 */
|
||||
__m128i R3 = r2; /* R3 = 00R700R600R500R4 */
|
||||
R0 = _mm_packus_epi16(R0, R3); /* R0 = R7R6R5R4R3R2R1R0 */
|
||||
R3 = mm_set1_epu32(0xFFFFFFFFU); /* R3 = FFFFFFFFFFFFFFFF */
|
||||
__m128i R4 = R3; /* R4 = FFFFFFFFFFFFFFFF */
|
||||
R4 = _mm_unpacklo_epi8(R0, R4); /* R4 = R3FFR2FFR1FFR0FF */
|
||||
R3 = _mm_unpackhi_epi8(R0, R3); /* R3 = R7FFR6FFR5FFR4FF */
|
||||
R0 = R4; /* R0 = R4 */
|
||||
R0 = _mm_unpacklo_epi16(R2, R0); /* R0 = B1G1R1FFB0G0R0FF */
|
||||
R4 = _mm_unpackhi_epi16(R2, R4); /* R4 = B3G3R3FFB2G2R2FF */
|
||||
R2 = R3; /* R2 = R3 */
|
||||
R2 = _mm_unpacklo_epi16(R1, R2); /* R2 = B5G5R5FFB4G4R4FF */
|
||||
R3 = _mm_unpackhi_epi16(R1, R3); /* R3 = B7G7R7FFB6G6R6FF */
|
||||
STORE_SI128(d_buf, R0); /* B1G1R1FFB0G0R0FF */
|
||||
__m128i R0 = b1; /* R0 = 00B300B200B100B0 */
|
||||
__m128i R1 = b2; /* R1 = 00B700B600B500B4 */
|
||||
R0 = _mm_packus_epi16(R0, R1); /* R0 = B7B6B5B4B3B2B1B0 */
|
||||
R1 = g1; /* R1 = 00G300G200G100G0 */
|
||||
__m128i R2 = g2; /* R2 = 00G700G600G500G4 */
|
||||
R1 = _mm_packus_epi16(R1, R2); /* R1 = G7G6G5G4G3G2G1G0 */
|
||||
R2 = R1; /* R2 = G7G6G5G4G3G2G1G0 */
|
||||
R2 = _mm_unpacklo_epi8(R0, R2); /* R2 = B3G3B2G2B1G1B0G0 */
|
||||
R1 = _mm_unpackhi_epi8(R0, R1); /* R1 = B7G7B6G6B5G5B4G4 */
|
||||
R0 = r1; /* R0 = 00R300R200R100R0 */
|
||||
__m128i R3 = r2; /* R3 = 00R700R600R500R4 */
|
||||
R0 = _mm_packus_epi16(R0, R3); /* R0 = R7R6R5R4R3R2R1R0 */
|
||||
R3 = mm_set1_epu32(0xFFFFFFFFU); /* R3 = FFFFFFFFFFFFFFFF */
|
||||
__m128i R4 = R3; /* R4 = FFFFFFFFFFFFFFFF */
|
||||
R4 = _mm_unpacklo_epi8(R0, R4); /* R4 = R3FFR2FFR1FFR0FF */
|
||||
R3 = _mm_unpackhi_epi8(R0, R3); /* R3 = R7FFR6FFR5FFR4FF */
|
||||
R0 = R4; /* R0 = R4 */
|
||||
R0 = _mm_unpacklo_epi16(R2, R0); /* R0 = B1G1R1FFB0G0R0FF */
|
||||
R4 = _mm_unpackhi_epi16(R2, R4); /* R4 = B3G3R3FFB2G2R2FF */
|
||||
R2 = R3; /* R2 = R3 */
|
||||
R2 = _mm_unpacklo_epi16(R1, R2); /* R2 = B5G5R5FFB4G4R4FF */
|
||||
R3 = _mm_unpackhi_epi16(R1, R3); /* R3 = B7G7R7FFB6G6R6FF */
|
||||
STORE_SI128(d_buf, R0); /* B1G1R1FFB0G0R0FF */
|
||||
d_buf += sizeof(__m128i);
|
||||
STORE_SI128(d_buf, R4); /* B3G3R3FFB2G2R2FF */
|
||||
d_buf += sizeof(__m128i);
|
||||
@@ -377,29 +377,29 @@ sse2_yCbCrToRGB_16s8u_P3AC4R_RGBX(const INT16* WINPR_RESTRICT pSrc[3],
|
||||
/* The comments below pretend these are 8-byte registers
|
||||
* rather than 16-byte, for readability.
|
||||
*/
|
||||
__m128i R0 = r1; /* R0 = 00R300R200R100R0 */
|
||||
__m128i R1 = r2; /* R1 = 00R700R600R500R4 */
|
||||
R0 = _mm_packus_epi16(R0, R1); /* R0 = R7R6R5R4R3R2R1R0 */
|
||||
R1 = g1; /* R1 = 00G300G200G100G0 */
|
||||
__m128i R2 = g2; /* R2 = 00G700G600G500G4 */
|
||||
R1 = _mm_packus_epi16(R1, R2); /* R1 = G7G6G5G4G3G2G1G0 */
|
||||
R2 = R1; /* R2 = G7G6G5G4G3G2G1G0 */
|
||||
R2 = _mm_unpacklo_epi8(R0, R2); /* R2 = R3G3R2G2R1G1R0G0 */
|
||||
R1 = _mm_unpackhi_epi8(R0, R1); /* R1 = R7G7R6G6R5G5R4G4 */
|
||||
R0 = b1; /* R0 = 00B300B200B100B0 */
|
||||
__m128i R3 = b2; /* R3 = 00B700B600B500B4 */
|
||||
R0 = _mm_packus_epi16(R0, R3); /* R0 = B7B6B5B4B3B2B1B0 */
|
||||
R3 = mm_set1_epu32(0xFFFFFFFFU); /* R3 = FFFFFFFFFFFFFFFF */
|
||||
__m128i R4 = R3; /* R4 = FFFFFFFFFFFFFFFF */
|
||||
R4 = _mm_unpacklo_epi8(R0, R4); /* R4 = B3FFB2FFB1FFB0FF */
|
||||
R3 = _mm_unpackhi_epi8(R0, R3); /* R3 = B7FFB6FFB5FFB4FF */
|
||||
R0 = R4; /* R0 = R4 */
|
||||
R0 = _mm_unpacklo_epi16(R2, R0); /* R0 = R1G1B1FFR0G0B0FF */
|
||||
R4 = _mm_unpackhi_epi16(R2, R4); /* R4 = R3G3B3FFR2G2B2FF */
|
||||
R2 = R3; /* R2 = R3 */
|
||||
R2 = _mm_unpacklo_epi16(R1, R2); /* R2 = R5G5B5FFR4G4B4FF */
|
||||
R3 = _mm_unpackhi_epi16(R1, R3); /* R3 = R7G7B7FFR6G6B6FF */
|
||||
STORE_SI128(d_buf, R0); /* R1G1B1FFR0G0B0FF */
|
||||
__m128i R0 = r1; /* R0 = 00R300R200R100R0 */
|
||||
__m128i R1 = r2; /* R1 = 00R700R600R500R4 */
|
||||
R0 = _mm_packus_epi16(R0, R1); /* R0 = R7R6R5R4R3R2R1R0 */
|
||||
R1 = g1; /* R1 = 00G300G200G100G0 */
|
||||
__m128i R2 = g2; /* R2 = 00G700G600G500G4 */
|
||||
R1 = _mm_packus_epi16(R1, R2); /* R1 = G7G6G5G4G3G2G1G0 */
|
||||
R2 = R1; /* R2 = G7G6G5G4G3G2G1G0 */
|
||||
R2 = _mm_unpacklo_epi8(R0, R2); /* R2 = R3G3R2G2R1G1R0G0 */
|
||||
R1 = _mm_unpackhi_epi8(R0, R1); /* R1 = R7G7R6G6R5G5R4G4 */
|
||||
R0 = b1; /* R0 = 00B300B200B100B0 */
|
||||
__m128i R3 = b2; /* R3 = 00B700B600B500B4 */
|
||||
R0 = _mm_packus_epi16(R0, R3); /* R0 = B7B6B5B4B3B2B1B0 */
|
||||
R3 = mm_set1_epu32(0xFFFFFFFFU); /* R3 = FFFFFFFFFFFFFFFF */
|
||||
__m128i R4 = R3; /* R4 = FFFFFFFFFFFFFFFF */
|
||||
R4 = _mm_unpacklo_epi8(R0, R4); /* R4 = B3FFB2FFB1FFB0FF */
|
||||
R3 = _mm_unpackhi_epi8(R0, R3); /* R3 = B7FFB6FFB5FFB4FF */
|
||||
R0 = R4; /* R0 = R4 */
|
||||
R0 = _mm_unpacklo_epi16(R2, R0); /* R0 = R1G1B1FFR0G0B0FF */
|
||||
R4 = _mm_unpackhi_epi16(R2, R4); /* R4 = R3G3B3FFR2G2B2FF */
|
||||
R2 = R3; /* R2 = R3 */
|
||||
R2 = _mm_unpacklo_epi16(R1, R2); /* R2 = R5G5B5FFR4G4B4FF */
|
||||
R3 = _mm_unpackhi_epi16(R1, R3); /* R3 = R7G7B7FFR6G6B6FF */
|
||||
STORE_SI128(d_buf, R0); /* R1G1B1FFR0G0B0FF */
|
||||
d_buf += sizeof(__m128i);
|
||||
STORE_SI128(d_buf, R4); /* R3G3B3FFR2G2B2FF */
|
||||
d_buf += sizeof(__m128i);
|
||||
@@ -441,7 +441,7 @@ sse2_yCbCrToRGB_16s8u_P3AC4R(const INT16* WINPR_RESTRICT pSrc[3], UINT32 srcStep
|
||||
const prim_size_t* WINPR_RESTRICT roi) /* region of interest */
|
||||
{
|
||||
if (((ULONG_PTR)(pSrc[0]) & 0x0f) || ((ULONG_PTR)(pSrc[1]) & 0x0f) ||
|
||||
((ULONG_PTR)(pSrc[2]) & 0x0f) || ((ULONG_PTR)(pDst)&0x0f) || (srcStep & 0x0f) ||
|
||||
((ULONG_PTR)(pSrc[2]) & 0x0f) || ((ULONG_PTR)(pDst) & 0x0f) || (srcStep & 0x0f) ||
|
||||
(dstStep & 0x0f))
|
||||
{
|
||||
/* We can't maintain 16-byte alignment. */
|
||||
@@ -1044,7 +1044,7 @@ void primitives_init_colors_sse2_int(primitives_t* WINPR_RESTRICT prims)
|
||||
#if defined(SSE_AVX_INTRINSICS_ENABLED)
|
||||
generic = primitives_get_generic();
|
||||
|
||||
WLog_VRB(PRIM_TAG, "SSE2 optimizations");
|
||||
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
|
||||
prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R;
|
||||
prims->yCbCrToRGB_16s8u_P3AC4R = sse2_yCbCrToRGB_16s8u_P3AC4R;
|
||||
prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3;
|
||||
|
||||
@@ -223,7 +223,7 @@ void primitives_init_set_sse2_int(primitives_t* WINPR_RESTRICT prims)
|
||||
|
||||
/* Pick tuned versions if possible. */
|
||||
|
||||
WLog_VRB(PRIM_TAG, "SSE2 optimizations");
|
||||
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
|
||||
prims->set_8u = sse2_set_8u;
|
||||
prims->set_32s = sse2_set_32s;
|
||||
prims->set_32u = sse2_set_32u;
|
||||
|
||||
Reference in New Issue
Block a user