From 14ba7fb60118a07e20ce9930be80f8daef8699ce Mon Sep 17 00:00:00 2001 From: akallabeth Date: Tue, 11 Jun 2024 09:26:52 +0200 Subject: [PATCH 1/6] [winpr,sysinfo] update IsProcessorFeaturePresent Update instruction set detection for newer stuff (SSE4, AVC2, ...) --- winpr/include/winpr/sysinfo.h | 13 ++++++++++ winpr/libwinpr/sysinfo/sysinfo.c | 41 ++++++++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/winpr/include/winpr/sysinfo.h b/winpr/include/winpr/sysinfo.h index 61fadc317..45893fb25 100644 --- a/winpr/include/winpr/sysinfo.h +++ b/winpr/include/winpr/sysinfo.h @@ -227,6 +227,19 @@ extern "C" #define PF_ARM_64BIT_LOADSTORE_ATOMIC 25 #define PF_ARM_EXTERNAL_CACHE_AVAILABLE 26 #define PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE 27 +#define PF_SSSE3_INSTRUCTIONS_AVAILABLE 36 +#define PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37 +#define PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38 +#define PF_AVX_INSTRUCTIONS_AVAILABLE 39 +#define PF_AVX2_INSTRUCTIONS_AVAILABLE 40 +#define PF_AVX512F_INSTRUCTIONS_AVAILABLE 41 +#define PF_ARM_V8_INSTRUCTIONS_AVAILABLE 29 +#define PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE 30 +#define PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE 31 +#define PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE 34 +#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43 +#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE 44 +#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45 #define PF_ARM_V4 0x80000001 #define PF_ARM_V5 0x80000002 diff --git a/winpr/libwinpr/sysinfo/sysinfo.c b/winpr/libwinpr/sysinfo/sysinfo.c index bf81b8bd5..89bfb3798 100644 --- a/winpr/libwinpr/sysinfo/sysinfo.c +++ b/winpr/libwinpr/sysinfo/sysinfo.c @@ -788,6 +788,7 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature) return features & ANDROID_CPU_ARM_FEATURE_NEON; default: + WLog_WARN(TAG, "feature 0x%08" PRIx32 " check not implemented", ProcessorFeature); return FALSE; } @@ -857,8 +858,15 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature) ret = TRUE; break; - + case PF_ARM_V8_INSTRUCTIONS_AVAILABLE: + case PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE: + case PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE: + case PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE: + case PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE: + case PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE: + case PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE: default: + WLog_WARN(TAG, "feature 0x%08" PRIx32 " check not implemented", ProcessorFeature); break; } @@ -872,7 +880,15 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature) ret = TRUE; #endif break; + case PF_ARM_V8_INSTRUCTIONS_AVAILABLE: + case PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE: + case PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE: + case PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE: + case PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE: + case PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE: + case PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE: default: + WLog_WARN(TAG, "feature 0x%08" PRIx32 " check not implemented", ProcessorFeature); break; } @@ -912,12 +928,29 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature) break; case PF_SSE3_INSTRUCTIONS_AVAILABLE: - if (c & C_BIT_SSE3) - ret = TRUE; - + ret = __builtin_cpu_supports("sse3"); break; + case PF_SSSE3_INSTRUCTIONS_AVAILABLE: + ret = __builtin_cpu_supports("ssse3"); + break; + case PF_SSE4_1_INSTRUCTIONS_AVAILABLE: + ret = __builtin_cpu_supports("sse4.1"); + break; + case PF_SSE4_2_INSTRUCTIONS_AVAILABLE: + ret = __builtin_cpu_supports("sse4.2"); + break; + case PF_AVX_INSTRUCTIONS_AVAILABLE: + ret = __builtin_cpu_supports("avx"); + break; + case PF_AVX2_INSTRUCTIONS_AVAILABLE: + ret = __builtin_cpu_supports("avx2"); + break; + case PF_AVX512F_INSTRUCTIONS_AVAILABLE: + ret = __builtin_cpu_supports("avx512f"); + break; default: + WLog_WARN(TAG, "feature 0x%08" PRIx32 " check not implemented", ProcessorFeature); break; } From 4f6422ba86063f21f930aaaed62987d4608bd285 Mon Sep 17 00:00:00 2001 From: akallabeth Date: Tue, 11 Jun 2024 09:28:13 +0200 Subject: [PATCH 2/6] [winpr,platform] add WINPR_PRAGMA_UNROLL_LOOP add a define for compiler specific pragmas to enforce loop unrolling --- winpr/include/winpr/platform.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/winpr/include/winpr/platform.h b/winpr/include/winpr/platform.h index 00f9d224b..ca1761fa8 100644 --- a/winpr/include/winpr/platform.h +++ b/winpr/include/winpr/platform.h @@ -40,6 +40,7 @@ #define WINPR_PRAGMA_DIAG_IGNORED_MISMATCHED_DEALLOC \ _Pragma("clang diagnostic ignored \"-Wmismatched-dealloc\"") #define WINPR_PRAGMA_DIAG_POP _Pragma("clang diagnostic pop") +#define WINPR_PRAGMA_UNROLL_LOOP _Pragma("clang loop vectorize_width(8) interleave_count(8)") #elif defined(__GNUC__) #define WINPR_PRAGMA_DIAG_PUSH _Pragma("GCC diagnostic push") #define WINPR_PRAGMA_DIAG_IGNORED_PEDANTIC _Pragma("GCC diagnostic ignored \"-Wpedantic\"") @@ -59,6 +60,7 @@ #define WINPR_PRAGMA_DIAG_IGNORED_MISMATCHED_DEALLOC \ _Pragma("GCC diagnostic ignored \"-Wmismatched-dealloc\"") #define WINPR_PRAGMA_DIAG_POP _Pragma("GCC diagnostic pop") +#define WINPR_PRAGMA_UNROLL_LOOP _Pragma("GCC unroll 8") _Pragma("GCC ivdep") #else #define WINPR_PRAGMA_DIAG_PUSH #define WINPR_PRAGMA_DIAG_IGNORED_PEDANTIC @@ -70,6 +72,12 @@ #define WINPR_PRAGMA_DIAG_IGNORED_FORMAT_SECURITY #define WINPR_PRAGMA_DIAG_IGNORED_MISMATCHED_DEALLOC #define WINPR_PRAGMA_DIAG_POP +#define WINPR_PRAGMA_UNROLL_LOOP +#endif + +#if defined(MSVC) +#undef WINPR_PRAGMA_UNROLL_LOOP +#define WINPR_PRAGMA_UNROLL_LOOP _Pragma("loop ( ivdep )") #endif WINPR_PRAGMA_DIAG_PUSH From e8cca22d2a1f8bc895d3edaab7d1856077c91fc1 Mon Sep 17 00:00:00 2001 From: akallabeth Date: Tue, 11 Jun 2024 09:30:18 +0200 Subject: [PATCH 3/6] [cmake] refactor libfreerdp * Move codec and primitives to own CMakeLists.txt * Add freerdp_object_library_add * add freerdp_compile_options_add --- libfreerdp/CMakeLists.txt | 291 +++------------------------ libfreerdp/codec/CMakeLists.txt | 180 +++++++++++++++++ libfreerdp/primitives/CMakeLists.txt | 92 +++++++++ 3 files changed, 300 insertions(+), 263 deletions(-) create mode 100644 libfreerdp/codec/CMakeLists.txt create mode 100644 libfreerdp/primitives/CMakeLists.txt diff --git a/libfreerdp/CMakeLists.txt b/libfreerdp/CMakeLists.txt index 12668c466..53e757da0 100644 --- a/libfreerdp/CMakeLists.txt +++ b/libfreerdp/CMakeLists.txt @@ -23,9 +23,11 @@ include(FindCairo) set(LIBFREERDP_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(LIBFREERDP_SRCS "") +set(LIBFREERDP_OBJECT_LIBS "") set(LIBFREERDP_LIBS "") set(LIBFREERDP_INCLUDES "") set(LIBFREERDP_DEFINITIONS "") +set(LIBFREERDP_COMPILE_OPTIONS "") macro (freerdp_module_add) file (RELATIVE_PATH _relPath "${LIBFREERDP_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}") @@ -66,6 +68,13 @@ macro (freerdp_library_add_public) set (LIBFREERDP_PUB_LIBS ${LIBFREERDP_PUB_LIBS} PARENT_SCOPE) endmacro() +macro (freerdp_object_library_add) + foreach (_lib ${ARGN}) + list (APPEND LIBFREERDP_OBJECT_LIBS "$") + endforeach() + set (LIBFREERDP_OBJECT_LIBS ${LIBFREERDP_OBJECT_LIBS} PARENT_SCOPE) +endmacro() + macro (freerdp_library_add) foreach (_lib ${ARGN}) list (APPEND LIBFREERDP_LIBS "${_lib}") @@ -80,6 +89,13 @@ macro (freerdp_definition_add) set (LIBFREERDP_DEFINITIONS ${LIBFREERDP_DEFINITIONS} PARENT_SCOPE) endmacro() +macro (freerdp_compile_options_add) + foreach (_lib ${ARGN}) + list (APPEND LIBFREERDP_COMPILE_OPTIONS "${_lib}") + endforeach() + set (LIBFREERDP_COMPILE_OPTIONS ${LIBFREERDP_COMPILE_OPTIONS} PARENT_SCOPE) +endmacro() + if (WITH_SWSCALE) find_package(SWScale REQUIRED) endif(WITH_SWSCALE) @@ -118,134 +134,8 @@ if (NOT WITH_DSP_FFMPEG AND NOT WITH_FAAC) message(WARNING "Compiling without WITH_DSP_FFMPEG and WITH_FAAC, AAC encoder support disabled") endif () -## cmake source properties are only seen by targets in the same CMakeLists.txt -## therefore primitives and codecs need to be defined here - -# codec -set(CODEC_SRCS - codec/bulk.c - codec/bulk.h - codec/dsp.c - codec/color.c - codec/audio.c - codec/planar.c - codec/bitmap.c - codec/interleaved.c - codec/progressive.c - codec/rfx_bitstream.h - codec/rfx_constants.h - codec/rfx_decode.c - codec/rfx_decode.h - codec/rfx_differential.h - codec/rfx_dwt.c - codec/rfx_dwt.h - codec/rfx_encode.c - codec/rfx_encode.h - codec/rfx_quantization.c - codec/rfx_quantization.h - codec/rfx_rlgr.c - codec/rfx_rlgr.h - codec/rfx_types.h - codec/rfx.c - codec/region.c - codec/nsc.c - codec/nsc_encode.c - codec/nsc_encode.h - codec/nsc_types.h - codec/ncrush.c - codec/xcrush.c - codec/mppc.c - codec/zgfx.c - codec/clear.c - codec/jpeg.c - codec/h264.c - codec/yuv.c) - -set(CODEC_SSE2_SRCS - codec/rfx_sse2.c - codec/rfx_sse2.h - codec/nsc_sse2.c - codec/nsc_sse2.h) - -set(CODEC_NEON_SRCS - codec/rfx_neon.c - codec/rfx_neon.h) - -if(WITH_SSE2) - set(CODEC_SRCS ${CODEC_SRCS} ${CODEC_SSE2_SRCS}) - - if(CMAKE_COMPILER_IS_GNUCC OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") - set_source_files_properties(${CODEC_SSE2_SRCS} PROPERTIES COMPILE_FLAGS "-msse2" ) - endif() - - if(MSVC) - set_source_files_properties(${CODEC_SSE2_SRCS} PROPERTIES COMPILE_FLAGS "/arch:SSE2" ) - endif() -endif() - -if (WITH_DSP_FFMPEG) - set(CODEC_SRCS - ${CODEC_SRCS} - codec/dsp_ffmpeg.c - codec/dsp_ffmpeg.h) - freerdp_include_directory_add(${FFMPEG_INCLUDE_DIRS}) - freerdp_library_add(${FFMPEG_LIBRARIES}) -endif (WITH_DSP_FFMPEG) - -if (WITH_SOXR) - freerdp_library_add(${SOXR_LIBRARIES}) - include_directories(${SOXR_INCLUDE_DIR}) -endif(WITH_SOXR) - -if(GSM_FOUND) - freerdp_library_add(${GSM_LIBRARIES}) - include_directories(${GSM_INCLUDE_DIRS}) -endif() - -if(LAME_FOUND) - freerdp_library_add(${LAME_LIBRARIES}) - include_directories(${LAME_INCLUDE_DIRS}) -endif() - -set(OPUS_DEFAULT OFF) -if (NOT WITH_DSP_FFMPEG) - find_package(Opus) - if (Opus_FOUND) - set(OPUS_DEFAULT ${OPUS_FOUND}) - else() - find_package(PkgConfig) - if (PkgConfig_FOUND) - pkg_check_modules(OPUS opus) - set(OPUS_DEFAULT ${OPUS_FOUND}) - endif() - endif() -endif() - -option(WITH_OPUS "compile with opus codec support" ${OPUS_DEFAULT}) -if (WITH_OPUS) - find_package(Opus) - if (Opus_FOUND) - freerdp_library_add(Opus::opus) - else() - find_package(PkgConfig REQUIRED) - pkg_check_modules(OPUS REQUIRED opus) - if(OPUS_FOUND) - freerdp_library_add(${OPUS_LIBRARIES}) - include_directories(${OPUS_INCLUDE_DIRS}) - link_directories(${OPUS_LIBRARY_DIRS}) - endif() - endif() -endif() - -if(FAAD2_FOUND) - freerdp_library_add(${FAAD2_LIBRARIES}) - include_directories(${FAAD2_INCLUDE_DIRS}) -endif() - -if(FAAC_FOUND) - freerdp_library_add(${FAAC_LIBRARIES}) - include_directories(${FAAC_INCLUDE_DIRS}) -endif() +add_subdirectory(codec) +add_subdirectory(primitives) if (WITH_AAD) if (NOT WITH_WINPR_JSON) @@ -253,141 +143,6 @@ if (WITH_AAD) endif() endif() -if(WITH_NEON) - check_symbol_exists("_M_AMD64" "" MSVC_ARM64) - check_symbol_exists("__aarch64__" "" ARCH_ARM64) - - if (NOT MSVC_ARM64 AND NOT ARCH_ARM64) - set_source_files_properties(${CODEC_NEON_SRCS} PROPERTIES COMPILE_FLAGS "-mfpu=neon" ) - endif() - - set(CODEC_SRCS ${CODEC_SRCS} ${CODEC_NEON_SRCS}) -endif() - -if(WITH_OPENH264) - set(CODEC_SRCS ${CODEC_SRCS} codec/h264_openh264.c) - freerdp_include_directory_add(${OPENH264_INCLUDE_DIR}) - if (NOT WITH_OPENH264_LOADING) - freerdp_library_add(${OPENH264_LIBRARIES}) - endif (NOT WITH_OPENH264_LOADING) -endif() - -if(WITH_VIDEO_FFMPEG) - set(CODEC_SRCS ${CODEC_SRCS} codec/h264_ffmpeg.c) - freerdp_include_directory_add(${FFMPEG_INCLUDE_DIRS}) - freerdp_library_add(${FFMPEG_LIBRARIES}) -endif() - -if(WIN32 AND WITH_MEDIA_FOUNDATION) - set(CODEC_SRCS ${CODEC_SRCS} codec/h264_mf.c) -endif() - -if(ANDROID AND WITH_MEDIACODEC) - list(APPEND CODEC_SRCS codec/h264_mediacodec.c) - - find_library(MEDIACODEC mediandk REQUIRED) - freerdp_library_add(${MEDIACODEC}) -endif() - -freerdp_module_add(${CODEC_SRCS}) - -if(BUILD_TESTING) - add_subdirectory(codec/test) -endif() - -# /codec - -# primitives - -set(PRIMITIVES_SRCS - primitives/prim_add.c - primitives/prim_andor.c - primitives/prim_alphaComp.c - primitives/prim_colors.c - primitives/prim_copy.c - primitives/prim_set.c - primitives/prim_shift.c - primitives/prim_sign.c - primitives/prim_YUV.c - primitives/prim_YCoCg.c - primitives/primitives.c - primitives/prim_internal.h) - -if (WITH_SSE2 OR WITH_NEON) - set(PRIMITIVES_SSE2_SRCS - primitives/prim_colors_opt.c - primitives/prim_set_opt.c) - - set(PRIMITIVES_SSE3_SRCS - primitives/prim_add_opt.c - primitives/prim_alphaComp_opt.c - primitives/prim_andor_opt.c - primitives/prim_shift_opt.c) - - set(PRIMITIVES_SSSE3_SRCS - primitives/prim_sign_opt.c - primitives/prim_YCoCg_opt.c) - - if (WITH_SSE2) - set(PRIMITIVES_SSSE3_SRCS ${PRIMITIVES_SSSE3_SRCS} - primitives/prim_YUV_ssse3.c) - endif() - - if (WITH_NEON) - set(PRIMITIVES_SSSE3_SRCS ${PRIMITIVES_SSSE3_SRCS} - primitives/prim_YUV_neon.c) - endif() -endif() - -if (WITH_OPENCL) - set(PRIMITIVES_OPENCL_SRCS primitives/prim_YUV_opencl.c) - - freerdp_include_directory_add(${OpenCL_INCLUDE_DIRS}) - freerdp_library_add(OpenCL::OpenCL) - -endif() - -set(PRIMITIVES_OPT_SRCS - ${PRIMITIVES_SSE2_SRCS} - ${PRIMITIVES_SSE3_SRCS} - ${PRIMITIVES_SSSE3_SRCS} - ${PRIMITIVES_OPENCL_SRCS}) - -if(WITH_SSE2) - if(CMAKE_COMPILER_IS_GNUCC OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") - set_source_files_properties(${PRIMITIVES_SSE2_SRCS} - PROPERTIES COMPILE_FLAGS "${OPTIMIZATION} -msse2") - set_source_files_properties(${PRIMITIVES_SSE3_SRCS} - PROPERTIES COMPILE_FLAGS "${OPTIMIZATION} -msse3") - set_source_files_properties(${PRIMITIVES_SSSE3_SRCS} - PROPERTIES COMPILE_FLAGS "${OPTIMIZATION} -mssse3") - endif() - - if(MSVC) - set_source_files_properties(${PRIMITIVES_OPT_SRCS} - PROPERTIES COMPILE_FLAGS "${OPTIMIZATION} /arch:SSE2") - endif() -elseif(WITH_NEON) - if(CMAKE_COMPILER_IS_GNUCC OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") - if (NOT MSVC_ARM64 AND NOT ARCH_ARM64) - set_source_files_properties(${PRIMITIVES_OPT_SRCS} - PROPERTIES COMPILE_FLAGS "${OPTIMIZATION} -mfpu=neon") - endif() - endif() - # TODO: Add MSVC equivalent -endif() - -set(PRIMITIVES_SRCS ${PRIMITIVES_SRCS} ${PRIMITIVES_OPT_SRCS}) - -freerdp_module_add(${PRIMITIVES_SRCS}) - -if(BUILD_TESTING AND NOT WIN32 AND NOT APPLE) - add_subdirectory(primitives/test) -endif() - - -# /primitives - list(APPEND LIBFREERDP_PUB_LIBS winpr) list(REMOVE_DUPLICATES LIBFREERDP_DEFINITIONS) @@ -398,6 +153,16 @@ AddTargetWithResourceFile(${MODULE_NAME} FALSE "${FREERDP_VERSION}" LIBFREERDP_S add_definitions(${LIBFREERDP_DEFINITIONS}) +if (LIBFREERDP_COMPILE_OPTIONS) + list(REMOVE_DUPLICATES LIBFREERDP_COMPILE_OPTIONS) + target_compile_options(${MODULE_NAME} PRIVATE ${LIBFREERDP_COMPILE_OPTIONS}) +endif() + +if (LIBFREERDP_OBJECT_LIBS) + list(REMOVE_DUPLICATES LIBFREERDP_OBJECT_LIBS) + target_link_libraries(${MODULE_NAME} PRIVATE ${LIBFREERDP_OBJECT_LIBS}) +endif() + if (WITH_FULL_CONFIG_PATH) add_definitions(-DWITH_FULL_CONFIG_PATH) endif() diff --git a/libfreerdp/codec/CMakeLists.txt b/libfreerdp/codec/CMakeLists.txt new file mode 100644 index 000000000..8640a5d53 --- /dev/null +++ b/libfreerdp/codec/CMakeLists.txt @@ -0,0 +1,180 @@ +# codec + +set(CODEC_SRCS + bulk.c + bulk.h + dsp.c + color.c + audio.c + planar.c + bitmap.c + interleaved.c + progressive.c + rfx_bitstream.h + rfx_constants.h + rfx_decode.c + rfx_decode.h + rfx_differential.h + rfx_dwt.c + rfx_dwt.h + rfx_encode.c + rfx_encode.h + rfx_quantization.c + rfx_quantization.h + rfx_rlgr.c + rfx_rlgr.h + rfx_types.h + rfx.c + region.c + nsc.c + nsc_encode.c + nsc_encode.h + nsc_types.h + ncrush.c + xcrush.c + mppc.c + zgfx.c + clear.c + jpeg.c + h264.c + yuv.c) + +set(CODEC_SSE2_SRCS + rfx_sse2.c + rfx_sse2.h + nsc_sse2.c + nsc_sse2.h +) + +set(CODEC_NEON_SRCS + rfx_neon.c + rfx_neon.h +) + +# Append initializers +set(CODEC_LIBS "") +list(APPEND CODEC_SRCS ${CODEC_SSE2_SRCS}) +list(APPEND CODEC_SRCS ${CODEC_NEON_SRCS}) + +if(WITH_SSE2) + if(CMAKE_COMPILER_IS_GNUCC OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") + if (CODEC_SSE2_SRCS) + set_source_files_properties(${CODEC_SSE2_SRCS} PROPERTIES COMPILE_FLAGS "-msse2" ) + endif() + endif() + + if(MSVC) + if (CODEC_SSE2_SRCS) + set_source_files_properties(${CODEC_SSE2_SRCS} PROPERTIES COMPILE_FLAGS "/arch:SSE2" ) + endif() + endif() +endif() +if(WITH_NEON) + check_symbol_exists("_M_AMD64" "" MSVC_ARM64) + check_symbol_exists("__aarch64__" "" ARCH_ARM64) + + if (NOT MSVC_ARM64 AND NOT ARCH_ARM64) + if (CODEC_SSE2_SRCS) + set_source_files_properties(${CODEC_NEON_SRCS} PROPERTIES COMPILE_FLAGS "-mfpu=neon" ) + endif() + endif() +endif() + +if (WITH_DSP_FFMPEG) + set(CODEC_SRCS + ${CODEC_SRCS} + dsp_ffmpeg.c + dsp_ffmpeg.h) + include_directories(${FFMPEG_INCLUDE_DIRS}) + list(APPEND CODEC_LIBS ${FFMPEG_LIBRARIES}) +endif (WITH_DSP_FFMPEG) + +if (WITH_SOXR) + list(APPEND CODEC_LIBS ${SOXR_LIBRARIES}) + include_directories(${SOXR_INCLUDE_DIR}) +endif(WITH_SOXR) + +if(GSM_FOUND) + list(APPEND CODEC_LIBS ${GSM_LIBRARIES}) + include_directories(${GSM_INCLUDE_DIRS}) +endif() + +if(LAME_FOUND) + list(APPEND CODEC_LIBS ${LAME_LIBRARIES}) + include_directories(${LAME_INCLUDE_DIRS}) +endif() + +set(OPUS_DEFAULT OFF) +if (NOT WITH_DSP_FFMPEG) + find_package(Opus) + if (Opus_FOUND) + set(OPUS_DEFAULT ${OPUS_FOUND}) + else() + find_package(PkgConfig) + if (PkgConfig_FOUND) + pkg_check_modules(OPUS opus) + set(OPUS_DEFAULT ${OPUS_FOUND}) + endif() + endif() +endif() + +option(WITH_OPUS "compile with opus codec support" ${OPUS_DEFAULT}) +if (WITH_OPUS) + find_package(Opus) + if (Opus_FOUND) + list(APPEND CODEC_LIBS Opus::opus) + else() + find_package(PkgConfig REQUIRED) + pkg_check_modules(OPUS REQUIRED opus) + if(OPUS_FOUND) + list(APPEND CODEC_LIBS ${OPUS_LIBRARIES}) + include_directories(${OPUS_INCLUDE_DIRS}) + link_directories(${OPUS_LIBRARY_DIRS}) + endif() + endif() +endif() + +if(FAAD2_FOUND) + list(APPEND CODEC_LIBS ${FAAD2_LIBRARIES}) + include_directories(${FAAD2_INCLUDE_DIRS}) +endif() + +if(FAAC_FOUND) + list(APPEND CODEC_LIBS ${FAAC_LIBRARIES}) + include_directories(${FAAC_INCLUDE_DIRS}) +endif() + +if(WITH_OPENH264) + set(CODEC_SRCS ${CODEC_SRCS} h264_openh264.c) + include_directories(${OPENH264_INCLUDE_DIR}) + if (NOT WITH_OPENH264_LOADING) + list(APPEND CODEC_LIBS ${OPENH264_LIBRARIES}) + endif (NOT WITH_OPENH264_LOADING) +endif() + +if(WITH_VIDEO_FFMPEG) + set(CODEC_SRCS ${CODEC_SRCS} h264_ffmpeg.c) + include_directories(${FFMPEG_INCLUDE_DIRS}) + list(APPEND CODEC_LIBS ${FFMPEG_LIBRARIES}) +endif() + +if(WIN32 AND WITH_MEDIA_FOUNDATION) + set(CODEC_SRCS ${CODEC_SRCS} h264_mf.c) +endif() + +if(ANDROID AND WITH_MEDIACODEC) + list(APPEND CODEC_SRCS h264_mediacodec.c) + + find_library(MEDIACODEC mediandk REQUIRED) + list(APPEND CODEC_LIBS ${MEDIACODEC}) +endif() + +add_library(freerdp-codecs OBJECT + ${CODEC_SRCS} +) +freerdp_library_add(${CODEC_LIBS}) +freerdp_object_library_add(freerdp-codecs) + +if(BUILD_TESTING) + add_subdirectory(test) +endif() diff --git a/libfreerdp/primitives/CMakeLists.txt b/libfreerdp/primitives/CMakeLists.txt new file mode 100644 index 000000000..5408c318e --- /dev/null +++ b/libfreerdp/primitives/CMakeLists.txt @@ -0,0 +1,92 @@ +# primitives + +set(PRIMITIVES_SRCS + prim_add.c + prim_andor.c + prim_alphaComp.c + prim_colors.c + prim_copy.c + prim_set.c + prim_shift.c + prim_sign.c + prim_YUV.c + prim_YCoCg.c + primitives.c + prim_internal.h) + +if (WITH_SSE2 OR WITH_NEON) + set(PRIMITIVES_SSE2_SRCS + prim_colors_opt.c + prim_set_opt.c) + + set(PRIMITIVES_SSE3_SRCS + prim_add_opt.c + prim_alphaComp_opt.c + prim_andor_opt.c + prim_shift_opt.c) + + set(PRIMITIVES_SSSE3_SRCS + prim_sign_opt.c + prim_YCoCg_opt.c) + + if (WITH_SSE2) + set(PRIMITIVES_SSSE3_SRCS ${PRIMITIVES_SSSE3_SRCS} + prim_YUV_ssse3.c) + endif() + + if (WITH_NEON) + set(PRIMITIVES_SSSE3_SRCS ${PRIMITIVES_SSSE3_SRCS} + prim_YUV_neon.c) + endif() +endif() + +if (WITH_OPENCL) + set(PRIMITIVES_OPENCL_SRCS prim_YUV_opencl.c) + + freerdp_include_directory_add(${OpenCL_INCLUDE_DIRS}) + freerdp_library_add(OpenCL::OpenCL) + +endif() + +set(PRIMITIVES_OPT_SRCS + ${PRIMITIVES_SSE2_SRCS} + ${PRIMITIVES_SSE3_SRCS} + ${PRIMITIVES_SSSE3_SRCS} + ${PRIMITIVES_OPENCL_SRCS}) + +set(PRIMITIVES_SRCS ${PRIMITIVES_SRCS} ${PRIMITIVES_OPT_SRCS}) + +add_library(freerdp-primitives OBJECT + ${PRIMITIVES_SRCS} +) + +if(WITH_SSE2) + if(CMAKE_COMPILER_IS_GNUCC OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") + if(PRIMITIVES_SSE2_SRCS) + set_source_files_properties(${PRIMITIVES_SSE2_SRCS} PROPERTIES COMPILE_FLAGS "-msse2" ) + endif() + if (PRIMITIVES_SSE3_SRCS) + set_source_files_properties(${PRIMITIVES_SSE3_SRCS} PROPERTIES COMPILE_FLAGS "-msse3" ) + endif() + if (PRIMITIVES_SSSE3_SRCS) + set_source_files_properties(${PRIMITIVES_SSSE3_SRCS} PROPERTIES COMPILE_FLAGS "-mssse3" ) + endif() + endif() + + if(MSVC) + set_source_files_properties(${PRIMITIVES_OPT_SRCS} PROPERTIES COMPILE_FLAGS "/arch:SSE2") + endif() +elseif(WITH_NEON) + if(CMAKE_COMPILER_IS_GNUCC OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") + if (NOT MSVC_ARM64 AND NOT ARCH_ARM64) + set_source_files_properties(${PRIMITIVES_OPT_SRCS} PROPERTIES COMPILE_FLAGS "-mfpu=neon") + endif() + endif() + # TODO: Add MSVC equivalent +endif() + +freerdp_object_library_add(freerdp-primitives) + +if(BUILD_TESTING AND NOT WIN32 AND NOT APPLE) + add_subdirectory(test) +endif() From 2ee987e665d475b0950e4c7b0e30e504f4cb0410 Mon Sep 17 00:00:00 2001 From: akallabeth Date: Tue, 11 Jun 2024 09:47:22 +0200 Subject: [PATCH 4/6] [cmake,codec] refactor codec * move sse and neon implementations to own subdirectories * add stubs for missing optimizations (with log messages) --- libfreerdp/codec/CMakeLists.txt | 16 +++++++---- libfreerdp/codec/neon/nsc_neon.c | 38 ++++++++++++++++++++++++++ libfreerdp/codec/neon/nsc_neon.h | 28 +++++++++++++++++++ libfreerdp/codec/{ => neon}/rfx_neon.c | 16 +++++++---- libfreerdp/codec/{ => neon}/rfx_neon.h | 6 ---- libfreerdp/codec/nsc.c | 6 ++-- libfreerdp/codec/rfx.c | 14 +++------- libfreerdp/codec/{ => sse}/nsc_sse2.c | 15 ++++++++-- libfreerdp/codec/{ => sse}/nsc_sse2.h | 6 ---- libfreerdp/codec/{ => sse}/rfx_sse2.c | 15 ++++++++-- libfreerdp/codec/{ => sse}/rfx_sse2.h | 6 ---- 11 files changed, 119 insertions(+), 47 deletions(-) create mode 100644 libfreerdp/codec/neon/nsc_neon.c create mode 100644 libfreerdp/codec/neon/nsc_neon.h rename libfreerdp/codec/{ => neon}/rfx_neon.c (99%) rename libfreerdp/codec/{ => neon}/rfx_neon.h (88%) rename libfreerdp/codec/{ => sse}/nsc_sse2.c (98%) rename libfreerdp/codec/{ => sse}/nsc_sse2.h (89%) rename libfreerdp/codec/{ => sse}/rfx_sse2.c (98%) rename libfreerdp/codec/{ => sse}/rfx_sse2.h (88%) diff --git a/libfreerdp/codec/CMakeLists.txt b/libfreerdp/codec/CMakeLists.txt index 8640a5d53..c271f151b 100644 --- a/libfreerdp/codec/CMakeLists.txt +++ b/libfreerdp/codec/CMakeLists.txt @@ -40,15 +40,17 @@ set(CODEC_SRCS yuv.c) set(CODEC_SSE2_SRCS - rfx_sse2.c - rfx_sse2.h - nsc_sse2.c - nsc_sse2.h + sse/rfx_sse2.c + sse/rfx_sse2.h + sse/nsc_sse2.c + sse/nsc_sse2.h ) set(CODEC_NEON_SRCS - rfx_neon.c - rfx_neon.h + neon/rfx_neon.c + neon/rfx_neon.h + neon/nsc_neon.c + neon/nsc_neon.h ) # Append initializers @@ -116,6 +118,8 @@ if (NOT WITH_DSP_FFMPEG) set(OPUS_DEFAULT ${OPUS_FOUND}) endif() endif() + + message("Using OPUS: ${OPUS_DEFAULT}") endif() option(WITH_OPUS "compile with opus codec support" ${OPUS_DEFAULT}) diff --git a/libfreerdp/codec/neon/nsc_neon.c b/libfreerdp/codec/neon/nsc_neon.c new file mode 100644 index 000000000..5d9887b9a --- /dev/null +++ b/libfreerdp/codec/neon/nsc_neon.c @@ -0,0 +1,38 @@ +/** + * FreeRDP: A Remote Desktop Protocol Implementation + * NSCodec Library - SSE2 Optimizations + * + * Copyright 2024 Armin Novak + * Copyright 2024 Thincast Technologies GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "../nsc_types.h" +#include "nsc_neon.h" + +#define TAG FREERDP_TAG("codec.nsc.neon") + +void nsc_init_neon(NSC_CONTEXT* context) +{ +#if defined(WITH_NEON) + if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) + return; + + WLog_WARN(TAG, "TODO: Implement neon optimized version of this function"); +#endif +} diff --git a/libfreerdp/codec/neon/nsc_neon.h b/libfreerdp/codec/neon/nsc_neon.h new file mode 100644 index 000000000..159ab246a --- /dev/null +++ b/libfreerdp/codec/neon/nsc_neon.h @@ -0,0 +1,28 @@ +/** + * FreeRDP: A Remote Desktop Protocol Implementation + * NSCodec Library - NEON Optimizations + * + * Copyright 2012 Vic Lee + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FREERDP_LIB_CODEC_NSC_NEON_H +#define FREERDP_LIB_CODEC_NSC_NEON_H + +#include +#include + +FREERDP_LOCAL void nsc_init_neon(NSC_CONTEXT* context); + +#endif /* FREERDP_LIB_CODEC_NSC_NEON_H */ diff --git a/libfreerdp/codec/rfx_neon.c b/libfreerdp/codec/neon/rfx_neon.c similarity index 99% rename from libfreerdp/codec/rfx_neon.c rename to libfreerdp/codec/neon/rfx_neon.c index f723efdbd..677b55856 100644 --- a/libfreerdp/codec/rfx_neon.c +++ b/libfreerdp/codec/neon/rfx_neon.c @@ -18,6 +18,12 @@ */ #include +#include + +#include "../rfx_types.h" +#include "rfx_neon.h" + +#define TAG FREERDP_TAG("codec.rfx.neon") #if defined(WITH_NEON) @@ -27,9 +33,6 @@ #include #include -#include "rfx_types.h" -#include "rfx_neon.h" - /* rfx_decode_YCbCr_to_RGB_NEON code now resides in the primitives library. */ static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -517,9 +520,11 @@ static void rfx_dwt_2d_extrapolate_decode_neon(INT16* buffer, INT16* temp) rfx_dwt_2d_decode_extrapolate_block_neon(&buffer[3007], temp, 2); rfx_dwt_2d_decode_extrapolate_block_neon(&buffer[0], temp, 1); } +#endif // WITH_NEON void rfx_init_neon(RFX_CONTEXT* context) { +#if defined(WITH_NEON) if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) { DEBUG_RFX("Using NEON optimizations"); @@ -531,6 +536,7 @@ void rfx_init_neon(RFX_CONTEXT* context) context->dwt_2d_decode = rfx_dwt_2d_decode_NEON; context->dwt_2d_extrapolate_decode = rfx_dwt_2d_extrapolate_decode_neon; } +#else + WINPR_UNUSED(context); +#endif } - -#endif // WITH_NEON diff --git a/libfreerdp/codec/rfx_neon.h b/libfreerdp/codec/neon/rfx_neon.h similarity index 88% rename from libfreerdp/codec/rfx_neon.h rename to libfreerdp/codec/neon/rfx_neon.h index ecb3ec08f..472d260bb 100644 --- a/libfreerdp/codec/rfx_neon.h +++ b/libfreerdp/codec/neon/rfx_neon.h @@ -25,10 +25,4 @@ FREERDP_LOCAL void rfx_init_neon(RFX_CONTEXT* context); -#ifndef RFX_INIT_SIMD -#if defined(WITH_NEON) -#define RFX_INIT_SIMD(_rfx_context) rfx_init_neon(_rfx_context) -#endif -#endif - #endif /* FREERDP_LIB_CODEC_RFX_NEON_H */ diff --git a/libfreerdp/codec/nsc.c b/libfreerdp/codec/nsc.c index fd3183519..c89cd9e43 100644 --- a/libfreerdp/codec/nsc.c +++ b/libfreerdp/codec/nsc.c @@ -34,7 +34,8 @@ #include "nsc_types.h" #include "nsc_encode.h" -#include "nsc_sse2.h" +#include "sse/nsc_sse2.h" +#include "neon/nsc_neon.h" #include #define TAG FREERDP_TAG("codec.nsc") @@ -368,7 +369,8 @@ NSC_CONTEXT* nsc_context_new(void) context->ColorLossLevel = 3; context->ChromaSubsamplingLevel = 1; /* init optimized methods */ - NSC_INIT_SIMD(context); + nsc_init_sse2(context); + nsc_init_neon(context); return context; error: WINPR_PRAGMA_DIAG_PUSH diff --git a/libfreerdp/codec/rfx.c b/libfreerdp/codec/rfx.c index 2feb0f76b..c5cafeda7 100644 --- a/libfreerdp/codec/rfx.c +++ b/libfreerdp/codec/rfx.c @@ -47,18 +47,11 @@ #include "rfx_dwt.h" #include "rfx_rlgr.h" -#include "rfx_sse2.h" -#include "rfx_neon.h" +#include "sse/rfx_sse2.h" +#include "neon/rfx_neon.h" #define TAG FREERDP_TAG("codec") -#ifndef RFX_INIT_SIMD -#define RFX_INIT_SIMD(_rfx_context) \ - do \ - { \ - } while (0) -#endif - #define RFX_KEY "Software\\" FREERDP_VENDOR_STRING "\\" FREERDP_PRODUCT_STRING "\\RemoteFX" /** @@ -338,7 +331,8 @@ RFX_CONTEXT* rfx_context_new_ex(BOOL encoder, UINT32 ThreadingFlags) context->dwt_2d_encode = rfx_dwt_2d_encode; context->rlgr_decode = rfx_rlgr_decode; context->rlgr_encode = rfx_rlgr_encode; - RFX_INIT_SIMD(context); + rfx_init_sse2(context); + rfx_init_neon(context); context->state = RFX_STATE_SEND_HEADERS; context->expectedDataBlockType = WBT_FRAME_BEGIN; return context; diff --git a/libfreerdp/codec/nsc_sse2.c b/libfreerdp/codec/sse/nsc_sse2.c similarity index 98% rename from libfreerdp/codec/nsc_sse2.c rename to libfreerdp/codec/sse/nsc_sse2.c index 7ef0275cf..1c88139f8 100644 --- a/libfreerdp/codec/nsc_sse2.c +++ b/libfreerdp/codec/sse/nsc_sse2.c @@ -18,7 +18,14 @@ */ #include +#include +#include "../nsc_types.h" +#include "nsc_sse2.h" + +#define TAG FREERDP_TAG("codec.nsc.sse2") + +#if defined(WITH_SSE2) #include #include #include @@ -30,9 +37,6 @@ #include #include -#include "nsc_types.h" -#include "nsc_sse2.h" - static BOOL nsc_encode_argb_to_aycocg_sse2(NSC_CONTEXT* context, const BYTE* data, UINT32 scanline) { UINT16 y = 0; @@ -373,12 +377,17 @@ static BOOL nsc_encode_sse2(NSC_CONTEXT* context, const BYTE* data, UINT32 scanl return TRUE; } +#endif void nsc_init_sse2(NSC_CONTEXT* context) { +#if defined(WITH_SSE2) if (!IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) return; PROFILER_RENAME(context->priv->prof_nsc_encode, "nsc_encode_sse2") context->encode = nsc_encode_sse2; +#else + WINPR_UNUSED(context); +#endif } diff --git a/libfreerdp/codec/nsc_sse2.h b/libfreerdp/codec/sse/nsc_sse2.h similarity index 89% rename from libfreerdp/codec/nsc_sse2.h rename to libfreerdp/codec/sse/nsc_sse2.h index 8b795d7bd..8b74cabf8 100644 --- a/libfreerdp/codec/nsc_sse2.h +++ b/libfreerdp/codec/sse/nsc_sse2.h @@ -25,10 +25,4 @@ FREERDP_LOCAL void nsc_init_sse2(NSC_CONTEXT* context); -#ifdef WITH_SSE2 -#ifndef NSC_INIT_SIMD -#define NSC_INIT_SIMD(_context) nsc_init_sse2(_context) -#endif -#endif - #endif /* FREERDP_LIB_CODEC_NSC_SSE2_H */ diff --git a/libfreerdp/codec/rfx_sse2.c b/libfreerdp/codec/sse/rfx_sse2.c similarity index 98% rename from libfreerdp/codec/rfx_sse2.c rename to libfreerdp/codec/sse/rfx_sse2.c index 1b401b428..4c83a3b8a 100644 --- a/libfreerdp/codec/rfx_sse2.c +++ b/libfreerdp/codec/sse/rfx_sse2.c @@ -19,7 +19,14 @@ */ #include +#include +#include "../rfx_types.h" +#include "rfx_sse2.h" + +#define TAG FREERDP_TAG("codec.rfx.sse2") + +#if defined(WITH_SSE2) #include #include #include @@ -28,9 +35,6 @@ #include #include -#include "rfx_types.h" -#include "rfx_sse2.h" - #ifdef _MSC_VER #define __attribute__(...) #endif @@ -477,9 +481,11 @@ static void rfx_dwt_2d_encode_sse2(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RE rfx_dwt_2d_encode_block_sse2(buffer + 3072, dwt_buffer, 16); rfx_dwt_2d_encode_block_sse2(buffer + 3840, dwt_buffer, 8); } +#endif void rfx_init_sse2(RFX_CONTEXT* context) { +#if defined(WITH_SSE2) if (!IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) return; @@ -491,4 +497,7 @@ void rfx_init_sse2(RFX_CONTEXT* context) context->quantization_encode = rfx_quantization_encode_sse2; context->dwt_2d_decode = rfx_dwt_2d_decode_sse2; context->dwt_2d_encode = rfx_dwt_2d_encode_sse2; +#else + WINPR_UNUSED(context); +#endif } diff --git a/libfreerdp/codec/rfx_sse2.h b/libfreerdp/codec/sse/rfx_sse2.h similarity index 88% rename from libfreerdp/codec/rfx_sse2.h rename to libfreerdp/codec/sse/rfx_sse2.h index b0d3998e3..d15c1854d 100644 --- a/libfreerdp/codec/rfx_sse2.h +++ b/libfreerdp/codec/sse/rfx_sse2.h @@ -25,10 +25,4 @@ FREERDP_LOCAL void rfx_init_sse2(RFX_CONTEXT* context); -#ifdef WITH_SSE2 -#ifndef RFX_INIT_SIMD -#define RFX_INIT_SIMD(_rfx_context) rfx_init_sse2(_rfx_context) -#endif -#endif - #endif /* FREERDP_LIB_CODEC_RFX_SSE2_H */ From 311068e60506c4b4c5a7450030d621e18e9d9ce1 Mon Sep 17 00:00:00 2001 From: akallabeth Date: Tue, 11 Jun 2024 09:51:29 +0200 Subject: [PATCH 5/6] [primitives] add image copy primitive * move freerdp_image_copy_no_overlap implementation to primitives * add SSE4.1 and AVX2 optimizations --- include/freerdp/primitives.h | 7 + libfreerdp/codec/CMakeLists.txt | 1 + libfreerdp/codec/color.c | 233 ++------------------- libfreerdp/codec/color.h | 147 +++++++++++++ libfreerdp/primitives/CMakeLists.txt | 5 + libfreerdp/primitives/prim_copy.c | 252 ++++++++++++++++++++++ libfreerdp/primitives/prim_copy.h | 42 ++++ libfreerdp/primitives/prim_copy_avx2.c | 276 +++++++++++++++++++++++++ libfreerdp/primitives/prim_copy_sse.c | 274 ++++++++++++++++++++++++ 9 files changed, 1023 insertions(+), 214 deletions(-) create mode 100644 libfreerdp/codec/color.h create mode 100644 libfreerdp/primitives/prim_copy.h create mode 100644 libfreerdp/primitives/prim_copy_avx2.c create mode 100644 libfreerdp/primitives/prim_copy_sse.c diff --git a/include/freerdp/primitives.h b/include/freerdp/primitives.h index f3a6b3835..88dd5e89d 100644 --- a/include/freerdp/primitives.h +++ b/include/freerdp/primitives.h @@ -104,6 +104,12 @@ typedef pstatus_t (*__add_16s_t)(const INT16* WINPR_RESTRICT pSrc1, UINT32 len); typedef pstatus_t (*__add_16s_inplace_t)(INT16* WINPR_RESTRICT pSrcDst1, INT16* WINPR_RESTRICT pSrcDst2, UINT32 len); +typedef pstatus_t (*__copy_no_overlap_t)(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, + UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, + UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, + DWORD SrcFormat, UINT32 nSrcStep, UINT32 nXSrc, + UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette, + UINT32 flags); typedef pstatus_t (*__lShiftC_16s_inplace_t)(INT16* WINPR_RESTRICT pSrcDst, UINT32 val, UINT32 len); typedef pstatus_t (*__lShiftC_16s_t)(const INT16* pSrc, UINT32 val, INT16* pSrcDst, UINT32 len); typedef pstatus_t (*__lShiftC_16u_t)(const UINT16* pSrc, UINT32 val, UINT16* pSrcDst, UINT32 len); @@ -222,6 +228,7 @@ typedef struct */ __add_16s_inplace_t add_16s_inplace; __lShiftC_16s_inplace_t lShiftC_16s_inplace; + __copy_no_overlap_t copy_no_overlap; } primitives_t; typedef enum diff --git a/libfreerdp/codec/CMakeLists.txt b/libfreerdp/codec/CMakeLists.txt index c271f151b..6eedc1910 100644 --- a/libfreerdp/codec/CMakeLists.txt +++ b/libfreerdp/codec/CMakeLists.txt @@ -5,6 +5,7 @@ set(CODEC_SRCS bulk.h dsp.c color.c + color.h audio.c planar.c bitmap.c diff --git a/libfreerdp/codec/color.c b/libfreerdp/codec/color.c index 4a079aca1..b6a04a8fe 100644 --- a/libfreerdp/codec/color.c +++ b/libfreerdp/codec/color.c @@ -39,17 +39,9 @@ #include #endif -#define TAG FREERDP_TAG("color") +#include "color.h" -static INLINE BOOL FreeRDPWriteColorIgnoreAlpha_int(BYTE* WINPR_RESTRICT dst, UINT32 format, - UINT32 color); -static INLINE BOOL FreeRDPWriteColor_int(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT32 color); -static INLINE UINT32 FreeRDPReadColor_int(const BYTE* WINPR_RESTRICT src, UINT32 format); -static INLINE DWORD FreeRDPAreColorFormatsEqualNoAlpha_int(DWORD first, DWORD second) -{ - const DWORD mask = (DWORD) ~(8UL << 12UL); - return (first & mask) == (second & mask); -} +#define TAG FREERDP_TAG("color") BYTE* freerdp_glyph_convert(UINT32 width, UINT32 height, const BYTE* WINPR_RESTRICT data) { @@ -733,102 +725,6 @@ static INLINE BOOL freerdp_image_copy_no_overlap_dst_alpha( srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); } -BOOL freerdp_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, - UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, - const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, - UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, - const gdiPalette* WINPR_RESTRICT palette, UINT32 flags) -{ - const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat); - const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat); - const SSIZE_T copyDstWidth = nWidth * dstByte; - const SSIZE_T xSrcOffset = nXSrc * srcByte; - const SSIZE_T xDstOffset = nXDst * dstByte; - const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE; - SSIZE_T srcVOffset = 0; - SSIZE_T srcVMultiplier = 1; - SSIZE_T dstVOffset = 0; - SSIZE_T dstVMultiplier = 1; - - if ((nWidth == 0) || (nHeight == 0)) - return TRUE; - - if ((nHeight > INT32_MAX) || (nWidth > INT32_MAX)) - return FALSE; - - if (!pDstData || !pSrcData) - return FALSE; - - if (nDstStep == 0) - nDstStep = nWidth * FreeRDPGetBytesPerPixel(DstFormat); - - if (nSrcStep == 0) - nSrcStep = nWidth * FreeRDPGetBytesPerPixel(SrcFormat); - - if (vSrcVFlip) - { - srcVOffset = (nHeight - 1ll) * nSrcStep; - srcVMultiplier = -1; - } - - if (((flags & FREERDP_KEEP_DST_ALPHA) != 0) && FreeRDPColorHasAlpha(DstFormat)) - return freerdp_image_copy_no_overlap_dst_alpha( - pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat, - nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, - dstVOffset); - else if (FreeRDPAreColorFormatsEqualNoAlpha_int(SrcFormat, DstFormat)) - { - if (!vSrcVFlip && (nDstStep == nSrcStep) && (xSrcOffset == 0) && (xDstOffset == 0)) - { - const void* src = &pSrcData[1ull * nYSrc * nSrcStep]; - void* dst = &pDstData[1ull * nYDst * nDstStep]; - memcpy(dst, src, 1ull * nDstStep * nHeight); - } - else - { - for (SSIZE_T y = 0; y < nHeight; y++) - { - const BYTE* WINPR_RESTRICT srcLine = - &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset]; - BYTE* WINPR_RESTRICT dstLine = - &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset]; - memcpy(&dstLine[xDstOffset], &srcLine[xSrcOffset], copyDstWidth); - } - } - } - else - { - for (SSIZE_T y = 0; y < nHeight; y++) - { - const BYTE* WINPR_RESTRICT srcLine = - &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset]; - BYTE* WINPR_RESTRICT dstLine = - &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset]; - - UINT32 color = FreeRDPReadColor_int(&srcLine[nXSrc * srcByte], SrcFormat); - UINT32 oldColor = color; - UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette); - FreeRDPWriteColor_int(&dstLine[nXDst * dstByte], DstFormat, dstColor); - for (SSIZE_T x = 1; x < nWidth; x++) - { - color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat); - if (color == oldColor) - { - FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor); - } - else - { - oldColor = color; - dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette); - FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor); - } - } - } - } - - return TRUE; -} - BOOL freerdp_image_copy_overlap(BYTE* pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, const BYTE* pSrcData, DWORD SrcFormat, UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, @@ -1608,124 +1504,16 @@ BOOL FreeRDPWriteColorIgnoreAlpha(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT3 return FreeRDPWriteColorIgnoreAlpha_int(dst, format, color); } -BOOL FreeRDPWriteColorIgnoreAlpha_int(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT32 color) -{ - switch (format) - { - case PIXEL_FORMAT_XBGR32: - case PIXEL_FORMAT_XRGB32: - case PIXEL_FORMAT_ABGR32: - case PIXEL_FORMAT_ARGB32: - { - const UINT32 tmp = ((UINT32)dst[0] << 24ULL) | (color & 0x00FFFFFFULL); - return FreeRDPWriteColor_int(dst, format, tmp); - } - case PIXEL_FORMAT_BGRX32: - case PIXEL_FORMAT_RGBX32: - case PIXEL_FORMAT_BGRA32: - case PIXEL_FORMAT_RGBA32: - { - const UINT32 tmp = ((UINT32)dst[3]) | (color & 0xFFFFFF00ULL); - return FreeRDPWriteColor_int(dst, format, tmp); - } - default: - return FreeRDPWriteColor_int(dst, format, color); - } -} - BOOL FreeRDPWriteColor(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT32 color) { return FreeRDPWriteColor_int(dst, format, color); } -BOOL FreeRDPWriteColor_int(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT32 color) -{ - switch (FreeRDPGetBitsPerPixel(format)) - { - case 32: - dst[0] = (BYTE)(color >> 24); - dst[1] = (BYTE)(color >> 16); - dst[2] = (BYTE)(color >> 8); - dst[3] = (BYTE)color; - break; - - case 24: - dst[0] = (BYTE)(color >> 16); - dst[1] = (BYTE)(color >> 8); - dst[2] = (BYTE)color; - break; - - case 16: - dst[1] = (BYTE)(color >> 8); - dst[0] = (BYTE)color; - break; - - case 15: - if (!FreeRDPColorHasAlpha(format)) - color = color & 0x7FFF; - - dst[1] = (BYTE)(color >> 8); - dst[0] = (BYTE)color; - break; - - case 8: - dst[0] = (BYTE)color; - break; - - default: - WLog_ERR(TAG, "Unsupported format %s", FreeRDPGetColorFormatName(format)); - return FALSE; - } - - return TRUE; -} UINT32 FreeRDPReadColor(const BYTE* WINPR_RESTRICT src, UINT32 format) { return FreeRDPReadColor_int(src, format); } -UINT32 FreeRDPReadColor_int(const BYTE* WINPR_RESTRICT src, UINT32 format) -{ - UINT32 color = 0; - - switch (FreeRDPGetBitsPerPixel(format)) - { - case 32: - color = - ((UINT32)src[0] << 24) | ((UINT32)src[1] << 16) | ((UINT32)src[2] << 8) | src[3]; - break; - - case 24: - color = ((UINT32)src[0] << 16) | ((UINT32)src[1] << 8) | src[2]; - break; - - case 16: - color = ((UINT32)src[1] << 8) | src[0]; - break; - - case 15: - color = ((UINT32)src[1] << 8) | src[0]; - - if (!FreeRDPColorHasAlpha(format)) - color = color & 0x7FFF; - - break; - - case 8: - case 4: - case 1: - color = *src; - break; - - default: - WLog_ERR(TAG, "Unsupported format %s", FreeRDPGetColorFormatName(format)); - color = 0; - break; - } - - return color; -} - UINT32 FreeRDPGetColor(UINT32 format, BYTE r, BYTE g, BYTE b, BYTE a) { UINT32 _r = r; @@ -1817,3 +1605,20 @@ UINT32 FreeRDPGetColor(UINT32 format, BYTE r, BYTE g, BYTE b, BYTE a) return 0; } } + +BOOL freerdp_image_copy_no_overlap(BYTE* pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, + UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, + const BYTE* pSrcData, DWORD SrcFormat, UINT32 nSrcStep, + UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* palette, + UINT32 flags) +{ + static primitives_t* prims = NULL; + if (!prims) + prims = primitives_get(); + + WINPR_ASSERT(prims); + WINPR_ASSERT(prims->copy_no_overlap); + return prims->copy_no_overlap(pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, + pSrcData, SrcFormat, nSrcStep, nXSrc, nYSrc, palette, + flags) == PRIMITIVES_SUCCESS; +} diff --git a/libfreerdp/codec/color.h b/libfreerdp/codec/color.h new file mode 100644 index 000000000..5a4f08e78 --- /dev/null +++ b/libfreerdp/codec/color.h @@ -0,0 +1,147 @@ +/** + * FreeRDP: A Remote Desktop Protocol Implementation + * codec color + * + * Copyright 2024 Armin Novak + * Copyright 2024 Thincast Technologies GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef FREERDP_LIB_CODEC_COLOR_H +#define FREERDP_LIB_CODEC_COLOR_H + +#include +#include + +#include +#include + +#define INT_COLOR_TAG FREERDP_TAG("codec.color.h") + +static INLINE DWORD FreeRDPAreColorFormatsEqualNoAlpha_int(DWORD first, DWORD second) +{ + const DWORD mask = (DWORD) ~(8UL << 12UL); + return (first & mask) == (second & mask); +} + +static INLINE BOOL FreeRDPWriteColor_int(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT32 color) +{ + switch (FreeRDPGetBitsPerPixel(format)) + { + case 32: + dst[0] = (BYTE)(color >> 24); + dst[1] = (BYTE)(color >> 16); + dst[2] = (BYTE)(color >> 8); + dst[3] = (BYTE)color; + break; + + case 24: + dst[0] = (BYTE)(color >> 16); + dst[1] = (BYTE)(color >> 8); + dst[2] = (BYTE)color; + break; + + case 16: + dst[1] = (BYTE)(color >> 8); + dst[0] = (BYTE)color; + break; + + case 15: + if (!FreeRDPColorHasAlpha(format)) + color = color & 0x7FFF; + + dst[1] = (BYTE)(color >> 8); + dst[0] = (BYTE)color; + break; + + case 8: + dst[0] = (BYTE)color; + break; + + default: + WLog_ERR(INT_COLOR_TAG, "Unsupported format %s", FreeRDPGetColorFormatName(format)); + return FALSE; + } + + return TRUE; +} + +static INLINE BOOL FreeRDPWriteColorIgnoreAlpha_int(BYTE* WINPR_RESTRICT dst, UINT32 format, + UINT32 color) +{ + switch (format) + { + case PIXEL_FORMAT_XBGR32: + case PIXEL_FORMAT_XRGB32: + case PIXEL_FORMAT_ABGR32: + case PIXEL_FORMAT_ARGB32: + { + const UINT32 tmp = ((UINT32)dst[0] << 24ULL) | (color & 0x00FFFFFFULL); + return FreeRDPWriteColor_int(dst, format, tmp); + } + case PIXEL_FORMAT_BGRX32: + case PIXEL_FORMAT_RGBX32: + case PIXEL_FORMAT_BGRA32: + case PIXEL_FORMAT_RGBA32: + { + const UINT32 tmp = ((UINT32)dst[3]) | (color & 0xFFFFFF00ULL); + return FreeRDPWriteColor_int(dst, format, tmp); + } + default: + return FreeRDPWriteColor_int(dst, format, color); + } +} + +static INLINE UINT32 FreeRDPReadColor_int(const BYTE* WINPR_RESTRICT src, UINT32 format) +{ + UINT32 color = 0; + + switch (FreeRDPGetBitsPerPixel(format)) + { + case 32: + color = + ((UINT32)src[0] << 24) | ((UINT32)src[1] << 16) | ((UINT32)src[2] << 8) | src[3]; + break; + + case 24: + color = ((UINT32)src[0] << 16) | ((UINT32)src[1] << 8) | src[2]; + break; + + case 16: + color = ((UINT32)src[1] << 8) | src[0]; + break; + + case 15: + color = ((UINT32)src[1] << 8) | src[0]; + + if (!FreeRDPColorHasAlpha(format)) + color = color & 0x7FFF; + + break; + + case 8: + case 4: + case 1: + color = *src; + break; + + default: + WLog_ERR(INT_COLOR_TAG, "Unsupported format %s", FreeRDPGetColorFormatName(format)); + color = 0; + break; + } + + return color; +} + +#endif diff --git a/libfreerdp/primitives/CMakeLists.txt b/libfreerdp/primitives/CMakeLists.txt index 5408c318e..8f56fe936 100644 --- a/libfreerdp/primitives/CMakeLists.txt +++ b/libfreerdp/primitives/CMakeLists.txt @@ -6,6 +6,7 @@ set(PRIMITIVES_SRCS prim_alphaComp.c prim_colors.c prim_copy.c + prim_copy.h prim_set.c prim_shift.c prim_sign.c @@ -17,6 +18,8 @@ set(PRIMITIVES_SRCS if (WITH_SSE2 OR WITH_NEON) set(PRIMITIVES_SSE2_SRCS prim_colors_opt.c + prim_copy_sse.c + prim_copy_avx2.c prim_set_opt.c) set(PRIMITIVES_SSE3_SRCS @@ -71,6 +74,8 @@ if(WITH_SSE2) if (PRIMITIVES_SSSE3_SRCS) set_source_files_properties(${PRIMITIVES_SSSE3_SRCS} PROPERTIES COMPILE_FLAGS "-mssse3" ) endif() + set_source_files_properties(prim_copy_sse.c PROPERTIES COMPILE_FLAGS "-msse4.1" ) + set_source_files_properties(prim_copy_avx2.c PROPERTIES COMPILE_FLAGS "-mavx2" ) endif() if(MSVC) diff --git a/libfreerdp/primitives/prim_copy.c b/libfreerdp/primitives/prim_copy.c index 14b936def..0d630248b 100644 --- a/libfreerdp/primitives/prim_copy.c +++ b/libfreerdp/primitives/prim_copy.c @@ -18,7 +18,15 @@ #include #include #include +#include + #include "prim_internal.h" +#include "prim_copy.h" +#include "../codec/color.h" + +#include + +#define TAG FREERDP_TAG("primitives.copy") static primitives_t* generic = NULL; @@ -128,6 +136,247 @@ static pstatus_t general_copy_8u_AC4r(const BYTE* pSrc, INT32 srcStep, BYTE* pDs return PRIMITIVES_SUCCESS; } +static INLINE pstatus_t generic_image_copy_bgr24_bgrx32(BYTE* WINPR_RESTRICT pDstData, + UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, + UINT32 nWidth, UINT32 nHeight, + const BYTE* WINPR_RESTRICT pSrcData, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, + SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, + SSIZE_T dstVMultiplier, SSIZE_T dstVOffset) +{ + + const SSIZE_T srcByte = 3; + const SSIZE_T dstByte = 4; + + for (SSIZE_T y = 0; y < nHeight; y++) + { + const BYTE* WINPR_RESTRICT srcLine = + &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset]; + BYTE* WINPR_RESTRICT dstLine = + &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset]; + + for (SSIZE_T x = 0; x < nWidth; x++) + { + dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0]; + dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1]; + dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2]; + } + } + + return PRIMITIVES_SUCCESS; +} + +static INLINE pstatus_t generic_image_copy_bgrx32_bgrx32( + BYTE* WINPR_RESTRICT pDstData, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, + UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, UINT32 nSrcStep, UINT32 nXSrc, + UINT32 nYSrc, SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, + SSIZE_T dstVOffset) +{ + + const SSIZE_T srcByte = 4; + const SSIZE_T dstByte = 4; + + for (SSIZE_T y = 0; y < nHeight; y++) + { + const BYTE* WINPR_RESTRICT srcLine = + &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset]; + BYTE* WINPR_RESTRICT dstLine = + &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset]; + + for (SSIZE_T x = 0; x < nWidth; x++) + { + dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0]; + dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1]; + dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2]; + } + } + + return PRIMITIVES_SUCCESS; +} + +pstatus_t generic_image_copy_no_overlap_convert( + BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, + UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette, + SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset) +{ + const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat); + const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat); + + const UINT32 width = nWidth - nWidth % 8; + for (SSIZE_T y = 0; y < nHeight; y++) + { + const BYTE* WINPR_RESTRICT srcLine = + &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset]; + BYTE* WINPR_RESTRICT dstLine = + &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset]; + + SSIZE_T x = 0; + WINPR_PRAGMA_UNROLL_LOOP + for (; x < width; x++) + { + const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat); + const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette); + FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor); + } + for (; x < nWidth; x++) + { + const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat); + const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette); + FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor); + } + } + return PRIMITIVES_SUCCESS; +} + +pstatus_t generic_image_copy_no_overlap_memcpy( + BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, + UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette, + SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset, + UINT32 flags) +{ + const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE; + const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat); + const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat); + const SSIZE_T copyDstWidth = nWidth * dstByte; + const SSIZE_T xSrcOffset = nXSrc * srcByte; + const SSIZE_T xDstOffset = nXDst * dstByte; + + if (!vSrcVFlip && (nDstStep == nSrcStep) && (xSrcOffset == 0) && (xDstOffset == 0)) + { + const void* src = &pSrcData[1ull * nYSrc * nSrcStep]; + void* dst = &pDstData[1ull * nYDst * nDstStep]; + memcpy(dst, src, 1ull * nDstStep * nHeight); + } + else + { + for (SSIZE_T y = 0; y < nHeight; y++) + { + const BYTE* WINPR_RESTRICT srcLine = + &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset]; + BYTE* WINPR_RESTRICT dstLine = + &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset]; + memcpy(&dstLine[xDstOffset], &srcLine[xSrcOffset], copyDstWidth); + } + } + + return PRIMITIVES_SUCCESS; +} + +static INLINE pstatus_t generic_image_copy_no_overlap_dst_alpha( + BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, + UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette, + SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset) +{ + WINPR_ASSERT(pDstData); + WINPR_ASSERT(pSrcData); + + switch (SrcFormat) + { + case PIXEL_FORMAT_BGR24: + switch (DstFormat) + { + case PIXEL_FORMAT_BGRX32: + case PIXEL_FORMAT_BGRA32: + return generic_image_copy_bgr24_bgrx32( + pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep, + nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); + default: + break; + } + break; + case PIXEL_FORMAT_BGRX32: + case PIXEL_FORMAT_BGRA32: + switch (DstFormat) + { + case PIXEL_FORMAT_BGRX32: + case PIXEL_FORMAT_BGRA32: + return generic_image_copy_bgrx32_bgrx32( + pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep, + nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); + default: + break; + } + break; + default: + break; + } + + return generic_image_copy_no_overlap_convert( + pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat, nSrcStep, + nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); +} + +static INLINE pstatus_t generic_image_copy_no_overlap_no_alpha( + BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, + UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette, + SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset, + UINT32 flags) +{ + if (FreeRDPAreColorFormatsEqualNoAlpha(SrcFormat, DstFormat)) + return generic_image_copy_no_overlap_memcpy(pDstData, DstFormat, nDstStep, nXDst, nYDst, + nWidth, nHeight, pSrcData, SrcFormat, nSrcStep, + nXSrc, nYSrc, palette, srcVMultiplier, + srcVOffset, dstVMultiplier, dstVOffset, flags); + else + return generic_image_copy_no_overlap_convert(pDstData, DstFormat, nDstStep, nXDst, nYDst, + nWidth, nHeight, pSrcData, SrcFormat, nSrcStep, + nXSrc, nYSrc, palette, srcVMultiplier, + srcVOffset, dstVMultiplier, dstVOffset); +} + +static pstatus_t generic_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, + UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, + UINT32 nWidth, UINT32 nHeight, + const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, + const gdiPalette* WINPR_RESTRICT palette, + UINT32 flags) +{ + const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE; + SSIZE_T srcVOffset = 0; + SSIZE_T srcVMultiplier = 1; + SSIZE_T dstVOffset = 0; + SSIZE_T dstVMultiplier = 1; + + if ((nWidth == 0) || (nHeight == 0)) + return PRIMITIVES_SUCCESS; + + if ((nHeight > INT32_MAX) || (nWidth > INT32_MAX)) + return -1; + + if (!pDstData || !pSrcData) + return -1; + + if (nDstStep == 0) + nDstStep = nWidth * FreeRDPGetBytesPerPixel(DstFormat); + + if (nSrcStep == 0) + nSrcStep = nWidth * FreeRDPGetBytesPerPixel(SrcFormat); + + if (vSrcVFlip) + { + srcVOffset = (nHeight - 1ll) * nSrcStep; + srcVMultiplier = -1; + } + + if (((flags & FREERDP_KEEP_DST_ALPHA) != 0) && FreeRDPColorHasAlpha(DstFormat)) + return generic_image_copy_no_overlap_dst_alpha( + pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat, + nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, + dstVOffset); + else + return generic_image_copy_no_overlap_no_alpha( + pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat, + nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset, + flags); + + return PRIMITIVES_SUCCESS; +} + /* ------------------------------------------------------------------------- */ void primitives_init_copy(primitives_t* prims) { @@ -136,6 +385,7 @@ void primitives_init_copy(primitives_t* prims) prims->copy_8u_AC4r = general_copy_8u_AC4r; /* This is just an alias with void* parameters */ prims->copy = (__copy_t)(prims->copy_8u); + prims->copy_no_overlap = generic_image_copy_no_overlap; } #if defined(WITH_SSE2) || defined(WITH_NEON) @@ -153,5 +403,7 @@ void primitives_init_copy_opt(primitives_t* prims) */ /* This is just an alias with void* parameters */ prims->copy = (__copy_t)(prims->copy_8u); + primitives_init_copy_sse(prims); + primitives_init_copy_avx2(prims); } #endif diff --git a/libfreerdp/primitives/prim_copy.h b/libfreerdp/primitives/prim_copy.h new file mode 100644 index 000000000..18b927d0d --- /dev/null +++ b/libfreerdp/primitives/prim_copy.h @@ -0,0 +1,42 @@ +/** + * FreeRDP: A Remote Desktop Protocol Implementation + * Primitives copy + * + * Copyright 2024 Armin Novak + * Copyright 2024 Thincast Technologies GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FREERDP_LIB_PRIM_COPY_H +#define FREERDP_LIB_PRIM_COPY_H + +#include +#include + +pstatus_t generic_image_copy_no_overlap_convert( + BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, + UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette, + SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset); + +pstatus_t generic_image_copy_no_overlap_memcpy( + BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, + UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette, + SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset, + UINT32 flags); + +extern void primitives_init_copy_sse(primitives_t* prims); +extern void primitives_init_copy_avx2(primitives_t* prims); +#endif diff --git a/libfreerdp/primitives/prim_copy_avx2.c b/libfreerdp/primitives/prim_copy_avx2.c new file mode 100644 index 000000000..a054a5cd9 --- /dev/null +++ b/libfreerdp/primitives/prim_copy_avx2.c @@ -0,0 +1,276 @@ +/* FreeRDP: A Remote Desktop Protocol Client + * Copy operations. + * vi:ts=4 sw=4: + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#include + +#include + +#include +#include +#include +#include + +#include "prim_internal.h" +#include "prim_copy.h" +#include "../codec/color.h" + +#include + +#define TAG FREERDP_TAG("primitives.copy") + +#if defined(WITH_SSE2) +#include +#include + +static INLINE pstatus_t avx2_image_copy_bgr24_bgrx32(BYTE* WINPR_RESTRICT pDstData, UINT32 nDstStep, + UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, + UINT32 nHeight, + const BYTE* WINPR_RESTRICT pSrcData, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, + SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, + SSIZE_T dstVMultiplier, SSIZE_T dstVOffset) +{ + + const SSIZE_T srcByte = 3; + const SSIZE_T dstByte = 4; + + const __m256i mask = _mm256_set_epi32(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + const SSIZE_T rem = nWidth % 8; + const SSIZE_T width = nWidth - rem; + for (SSIZE_T y = 0; y < nHeight; y++) + { + const BYTE* WINPR_RESTRICT srcLine = + &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset]; + BYTE* WINPR_RESTRICT dstLine = + &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset]; + + SSIZE_T x = 0; + for (; x < width; x += 8) + { + const __m256i* src = (const __m256i*)&srcLine[(x + nXSrc) * srcByte]; + __m256i* dst = (__m256i*)&dstLine[(x + nXDst) * dstByte]; + const __m256i s0 = _mm256_loadu_si256(src); + const __m256i s1 = _mm256_loadu_si256(dst); + const __m256i s2 = _mm256_shuffle_epi8(s1, mask); + __m256i d0 = _mm256_blendv_epi8(s2, s0, mask); + _mm256_storeu_si256(dst, d0); + } + for (; x < nWidth; x++) + { + const BYTE* src = &srcLine[(x + nXSrc) * srcByte]; + BYTE* dst = &dstLine[(x + nXDst) * dstByte]; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + } + } + + return PRIMITIVES_SUCCESS; +} + +static INLINE pstatus_t avx2_image_copy_bgrx32_bgrx32(BYTE* WINPR_RESTRICT pDstData, + UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, + UINT32 nWidth, UINT32 nHeight, + const BYTE* WINPR_RESTRICT pSrcData, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, + SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, + SSIZE_T dstVMultiplier, SSIZE_T dstVOffset) +{ + + const SSIZE_T srcByte = 4; + const SSIZE_T dstByte = 4; + + const __m256i mask = + _mm256_setr_epi8(0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00, + 0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00, + 0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00); + const SSIZE_T rem = nWidth % 8; + const SSIZE_T width = nWidth - rem; + for (SSIZE_T y = 0; y < nHeight; y++) + { + const BYTE* WINPR_RESTRICT srcLine = + &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset]; + BYTE* WINPR_RESTRICT dstLine = + &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset]; + + SSIZE_T x = 0; + for (; x < width; x += 8) + { + const __m256i* src = (const __m256i*)&srcLine[(x + nXSrc) * srcByte]; + __m256i* dst = (__m256i*)&dstLine[(x + nXDst) * dstByte]; + const __m256i s0 = _mm256_loadu_si256(src); + const __m256i s1 = _mm256_loadu_si256(dst); + __m256i d0 = _mm256_blendv_epi8(s1, s0, mask); + _mm256_storeu_si256(dst, d0); + } + + for (; x < nWidth; x++) + { + const BYTE* src = &srcLine[(x + nXSrc) * srcByte]; + BYTE* dst = &dstLine[(x + nXDst) * dstByte]; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + } + } + + return PRIMITIVES_SUCCESS; +} + +static pstatus_t avx2_image_copy_no_overlap_dst_alpha( + BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, + UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette, + SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset) +{ + WINPR_ASSERT(pDstData); + WINPR_ASSERT(pSrcData); + + switch (SrcFormat) + { + case PIXEL_FORMAT_BGR24: + switch (DstFormat) + { + case PIXEL_FORMAT_BGRX32: + case PIXEL_FORMAT_BGRA32: + return avx2_image_copy_bgr24_bgrx32( + pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep, + nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); + default: + break; + } + break; + case PIXEL_FORMAT_BGRX32: + case PIXEL_FORMAT_BGRA32: + switch (DstFormat) + { + case PIXEL_FORMAT_BGRX32: + case PIXEL_FORMAT_BGRA32: + return avx2_image_copy_bgrx32_bgrx32( + pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep, + nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); + default: + break; + } + break; + default: + break; + } + + WLog_DBG(TAG, "unsupported format src %s --> dst %s", FreeRDPGetColorFormatName(SrcFormat), + FreeRDPGetColorFormatName(DstFormat)); + return -1; +} + +static INLINE pstatus_t avx2_image_copy_no_overlap_convert( + BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, + UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette, + SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset) +{ + const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat); + const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat); + + const UINT32 width = nWidth - nWidth % 8; + for (SSIZE_T y = 0; y < nHeight; y++) + { + const BYTE* WINPR_RESTRICT srcLine = + &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset]; + BYTE* WINPR_RESTRICT dstLine = + &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset]; + + SSIZE_T x = 0; + WINPR_PRAGMA_UNROLL_LOOP + for (; x < width; x++) + { + const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat); + const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette); + FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor); + } + for (; x < nWidth; x++) + { + const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat); + const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette); + FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor); + } + } + return PRIMITIVES_SUCCESS; +} + +static pstatus_t avx2_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, + UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, + UINT32 nWidth, UINT32 nHeight, + const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, + const gdiPalette* WINPR_RESTRICT palette, UINT32 flags) +{ + const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE; + SSIZE_T srcVOffset = 0; + SSIZE_T srcVMultiplier = 1; + SSIZE_T dstVOffset = 0; + SSIZE_T dstVMultiplier = 1; + + if ((nWidth == 0) || (nHeight == 0)) + return PRIMITIVES_SUCCESS; + + if ((nHeight > INT32_MAX) || (nWidth > INT32_MAX)) + return -1; + + if (!pDstData || !pSrcData) + return -1; + + if (nDstStep == 0) + nDstStep = nWidth * FreeRDPGetBytesPerPixel(DstFormat); + + if (nSrcStep == 0) + nSrcStep = nWidth * FreeRDPGetBytesPerPixel(SrcFormat); + + if (vSrcVFlip) + { + srcVOffset = (nHeight - 1ll) * nSrcStep; + srcVMultiplier = -1; + } + + if (((flags & FREERDP_KEEP_DST_ALPHA) != 0) && FreeRDPColorHasAlpha(DstFormat)) + return avx2_image_copy_no_overlap_dst_alpha(pDstData, DstFormat, nDstStep, nXDst, nYDst, + nWidth, nHeight, pSrcData, SrcFormat, nSrcStep, + nXSrc, nYSrc, palette, srcVMultiplier, + srcVOffset, dstVMultiplier, dstVOffset); + else if (FreeRDPAreColorFormatsEqualNoAlpha(SrcFormat, DstFormat)) + return generic_image_copy_no_overlap_memcpy(pDstData, DstFormat, nDstStep, nXDst, nYDst, + nWidth, nHeight, pSrcData, SrcFormat, nSrcStep, + nXSrc, nYSrc, palette, srcVMultiplier, + srcVOffset, dstVMultiplier, dstVOffset, flags); + else + return avx2_image_copy_no_overlap_convert(pDstData, DstFormat, nDstStep, nXDst, nYDst, + nWidth, nHeight, pSrcData, SrcFormat, nSrcStep, + nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, + dstVMultiplier, dstVOffset); +} + +#endif + +/* ------------------------------------------------------------------------- */ +void primitives_init_copy_avx2(primitives_t* prims) +{ +#if defined(WITH_SSE2) + if (IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE)) + { + prims->copy_no_overlap = avx2_image_copy_no_overlap; + } +#else + WINPR_UNUSED(prims); +#endif +} diff --git a/libfreerdp/primitives/prim_copy_sse.c b/libfreerdp/primitives/prim_copy_sse.c new file mode 100644 index 000000000..710b9f65e --- /dev/null +++ b/libfreerdp/primitives/prim_copy_sse.c @@ -0,0 +1,274 @@ +/* FreeRDP: A Remote Desktop Protocol Client + * Copy operations. + * vi:ts=4 sw=4: + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#include + +#include + +#include +#include +#include +#include + +#include "prim_internal.h" +#include "prim_copy.h" +#include "../codec/color.h" + +#include + +#define TAG FREERDP_TAG("primitives.copy") + +#if defined(WITH_SSE2) +#include +#include + +static INLINE pstatus_t sse_image_copy_bgr24_bgrx32(BYTE* WINPR_RESTRICT pDstData, UINT32 nDstStep, + UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, + UINT32 nHeight, + const BYTE* WINPR_RESTRICT pSrcData, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, + SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, + SSIZE_T dstVMultiplier, SSIZE_T dstVOffset) +{ + + const SSIZE_T srcByte = 3; + const SSIZE_T dstByte = 4; + + const __m128i mask = _mm_set_epi32(0xFF, 0xFF, 0xFF, 0xFF); + const SSIZE_T rem = nWidth % 4; + const SSIZE_T width = nWidth - rem; + for (SSIZE_T y = 0; y < nHeight; y++) + { + const BYTE* WINPR_RESTRICT srcLine = + &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset]; + BYTE* WINPR_RESTRICT dstLine = + &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset]; + + SSIZE_T x = 0; + for (; x < width; x += 4) + { + const __m128i* src = (const __m128i*)&srcLine[(x + nXSrc) * srcByte]; + __m128i* dst = (__m128i*)&dstLine[(x + nXDst) * dstByte]; + const __m128i s0 = _mm_loadu_si128(src); + const __m128i s1 = _mm_loadu_si128(dst); + const __m128i s2 = _mm_shuffle_epi8(s1, mask); + __m128i d0 = _mm_blendv_epi8(s2, s0, mask); + _mm_storeu_si128(dst, d0); + } + for (; x < nWidth; x++) + { + const BYTE* src = &srcLine[(x + nXSrc) * srcByte]; + BYTE* dst = &dstLine[(x + nXDst) * dstByte]; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + } + } + + return PRIMITIVES_SUCCESS; +} + +static INLINE pstatus_t sse_image_copy_bgrx32_bgrx32(BYTE* WINPR_RESTRICT pDstData, UINT32 nDstStep, + UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, + UINT32 nHeight, + const BYTE* WINPR_RESTRICT pSrcData, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, + SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, + SSIZE_T dstVMultiplier, SSIZE_T dstVOffset) +{ + + const SSIZE_T srcByte = 4; + const SSIZE_T dstByte = 4; + + const __m128i mask = _mm_setr_epi8(0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, + 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00); + const SSIZE_T rem = nWidth % 4; + const SSIZE_T width = nWidth - rem; + for (SSIZE_T y = 0; y < nHeight; y++) + { + const BYTE* WINPR_RESTRICT srcLine = + &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset]; + BYTE* WINPR_RESTRICT dstLine = + &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset]; + + SSIZE_T x = 0; + for (; x < width; x += 4) + { + const __m128i* src = (const __m128i*)&srcLine[(x + nXSrc) * srcByte]; + __m128i* dst = (__m128i*)&dstLine[(x + nXDst) * dstByte]; + const __m128i s0 = _mm_loadu_si128(src); + const __m128i s1 = _mm_loadu_si128(dst); + __m128i d0 = _mm_blendv_epi8(s1, s0, mask); + _mm_storeu_si128(dst, d0); + } + + for (; x < nWidth; x++) + { + const BYTE* src = &srcLine[(x + nXSrc) * srcByte]; + BYTE* dst = &dstLine[(x + nXDst) * dstByte]; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + } + } + + return PRIMITIVES_SUCCESS; +} + +static pstatus_t sse_image_copy_no_overlap_dst_alpha( + BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, + UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette, + SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset) +{ + WINPR_ASSERT(pDstData); + WINPR_ASSERT(pSrcData); + + switch (SrcFormat) + { + case PIXEL_FORMAT_BGR24: + switch (DstFormat) + { + case PIXEL_FORMAT_BGRX32: + case PIXEL_FORMAT_BGRA32: + return sse_image_copy_bgr24_bgrx32( + pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep, + nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); + default: + break; + } + break; + case PIXEL_FORMAT_BGRX32: + case PIXEL_FORMAT_BGRA32: + switch (DstFormat) + { + case PIXEL_FORMAT_BGRX32: + case PIXEL_FORMAT_BGRA32: + return sse_image_copy_bgrx32_bgrx32( + pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep, + nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); + default: + break; + } + break; + default: + break; + } + + WLog_DBG(TAG, "unsupported format src %s --> dst %s", FreeRDPGetColorFormatName(SrcFormat), + FreeRDPGetColorFormatName(DstFormat)); + return -1; +} + +static INLINE pstatus_t sse_image_copy_no_overlap_convert( + BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, + UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette, + SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset) +{ + const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat); + const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat); + + const UINT32 width = nWidth - nWidth % 8; + for (SSIZE_T y = 0; y < nHeight; y++) + { + const BYTE* WINPR_RESTRICT srcLine = + &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset]; + BYTE* WINPR_RESTRICT dstLine = + &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset]; + + SSIZE_T x = 0; + WINPR_PRAGMA_UNROLL_LOOP + for (; x < width; x++) + { + const UINT32 color = FreeRDPReadColor(&srcLine[(x + nXSrc) * srcByte], SrcFormat); + const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette); + FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor); + } + for (; x < nWidth; x++) + { + const UINT32 color = FreeRDPReadColor(&srcLine[(x + nXSrc) * srcByte], SrcFormat); + const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette); + FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor); + } + } + return PRIMITIVES_SUCCESS; +} + +static pstatus_t sse_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, + UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, + UINT32 nWidth, UINT32 nHeight, + const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, + const gdiPalette* WINPR_RESTRICT palette, UINT32 flags) +{ + const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE; + SSIZE_T srcVOffset = 0; + SSIZE_T srcVMultiplier = 1; + SSIZE_T dstVOffset = 0; + SSIZE_T dstVMultiplier = 1; + + if ((nWidth == 0) || (nHeight == 0)) + return PRIMITIVES_SUCCESS; + + if ((nHeight > INT32_MAX) || (nWidth > INT32_MAX)) + return -1; + + if (!pDstData || !pSrcData) + return -1; + + if (nDstStep == 0) + nDstStep = nWidth * FreeRDPGetBytesPerPixel(DstFormat); + + if (nSrcStep == 0) + nSrcStep = nWidth * FreeRDPGetBytesPerPixel(SrcFormat); + + if (vSrcVFlip) + { + srcVOffset = (nHeight - 1ll) * nSrcStep; + srcVMultiplier = -1; + } + + if (((flags & FREERDP_KEEP_DST_ALPHA) != 0) && FreeRDPColorHasAlpha(DstFormat)) + return sse_image_copy_no_overlap_dst_alpha(pDstData, DstFormat, nDstStep, nXDst, nYDst, + nWidth, nHeight, pSrcData, SrcFormat, nSrcStep, + nXSrc, nYSrc, palette, srcVMultiplier, + srcVOffset, dstVMultiplier, dstVOffset); + else if (FreeRDPAreColorFormatsEqualNoAlpha(SrcFormat, DstFormat)) + return generic_image_copy_no_overlap_memcpy(pDstData, DstFormat, nDstStep, nXDst, nYDst, + nWidth, nHeight, pSrcData, SrcFormat, nSrcStep, + nXSrc, nYSrc, palette, srcVMultiplier, + srcVOffset, dstVMultiplier, dstVOffset, flags); + else + return sse_image_copy_no_overlap_convert(pDstData, DstFormat, nDstStep, nXDst, nYDst, + nWidth, nHeight, pSrcData, SrcFormat, nSrcStep, + nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, + dstVMultiplier, dstVOffset); +} + +#endif + +/* ------------------------------------------------------------------------- */ +void primitives_init_copy_sse(primitives_t* prims) +{ +#if defined(WITH_SSE2) + if (IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE)) + { + prims->copy_no_overlap = sse_image_copy_no_overlap; + } +#else + WINPR_UNUSED(prims); +#endif +} From 550a3e40c721ddb6633ebcb76ad0d0343836ed06 Mon Sep 17 00:00:00 2001 From: akallabeth Date: Tue, 11 Jun 2024 11:42:24 +0200 Subject: [PATCH 6/6] [ci,ios] update defaults --- ci/cmake-preloads/config-ios.txt | 11 ++++++----- ci/cmake-preloads/config-macosx.txt | 1 + libfreerdp/codec/CMakeLists.txt | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/ci/cmake-preloads/config-ios.txt b/ci/cmake-preloads/config-ios.txt index 687c2b6cf..15b55fbf1 100644 --- a/ci/cmake-preloads/config-ios.txt +++ b/ci/cmake-preloads/config-ios.txt @@ -8,8 +8,9 @@ set (WITH_SANITIZE_ADDRESS ON CACHE BOOL "build with address sanitizer") set (WITH_CLIENT OFF CACHE BOOL "disable iOS client") set (WITH_SERVER OFF CACHE BOOL "disable iOS server") set (WITH_KRB5 OFF CACHE BOOL "Kerberos support") -set (WITH_CLIENT_SDL OFF CACHE BOOL "SDL client") -set (WITH_FFMPEG OFF CACHE BOOL "SDL client") -set (WITH_SWSCALE OFF CACHE BOOL "SDL client") -set (WITH_NEON ON CACHE BOOL "SDL client") -set (BUILD_SHARED_LIBS OFF CACHE BOOL "SDL client") +set (WITH_CLIENT_SDL OFF CACHE BOOL "iOS preload") +set (WITH_FFMPEG OFF CACHE BOOL "iOS preload") +set (WITH_SWSCALE OFF CACHE BOOL "iOS preload") +set (WITH_NEON ON CACHE BOOL "iOS preload") +set (WITH_OPUS OFF CACHE BOOL "iOS preload") +set (BUILD_SHARED_LIBS OFF CACHE BOOL "iOS preload") diff --git a/ci/cmake-preloads/config-macosx.txt b/ci/cmake-preloads/config-macosx.txt index c52aabe8a..3282118cd 100644 --- a/ci/cmake-preloads/config-macosx.txt +++ b/ci/cmake-preloads/config-macosx.txt @@ -13,4 +13,5 @@ set (WITH_FREERDP_DEPRECATED_COMMANDLINE ON CACHE BOOL "Enable deprecated comman set (WITH_KRB5 OFF CACHE BOOL "Kerberos support") set (WITH_WEBVIEW OFF CACHE BOOL "ci default") set (WITH_FFMPEG OFF CACHE BOOL "ci default") +set (WITH_OPUS OFF CACHE BOOL "ci default") set (WITH_SWSCALE OFF CACHE BOOL "ci default") diff --git a/libfreerdp/codec/CMakeLists.txt b/libfreerdp/codec/CMakeLists.txt index 6eedc1910..811009525 100644 --- a/libfreerdp/codec/CMakeLists.txt +++ b/libfreerdp/codec/CMakeLists.txt @@ -127,7 +127,7 @@ option(WITH_OPUS "compile with opus codec support" ${OPUS_DEFAULT}) if (WITH_OPUS) find_package(Opus) if (Opus_FOUND) - list(APPEND CODEC_LIBS Opus::opus) + list(APPEND CODEC_LIBS ${OPUS_LIBRARIES}) else() find_package(PkgConfig REQUIRED) pkg_check_modules(OPUS REQUIRED opus)