rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

commit 93470f7eb4aceb9cfbe831c1663f27145c13eb30
parent c1ceb73a08dd99f01b5ed50f035dac6299c7d95a
Author: vaplv <vaplv@free.fr>
Date:   Sat,  5 Mar 2016 18:16:20 +0100

Add support of the SSE4.1 blendv instructions

Diffstat:
Mcmake/CMakeLists.txt | 27++++++++++++++++++++++++---
Msrc/sse/ssef.h | 7+++++++
Msrc/sse/ssei.h | 13++++++++-----
3 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt @@ -33,6 +33,14 @@ set(CMAKE_MODULE_PATH ${RCMAKE_SOURCE_DIR}) include(rcmake) ################################################################################ +# Check compiler features +################################################################################ +if(CMAKE_COMPILER_IS_GNUCC) + include(CheckCCompilerFlag) + CHECK_C_COMPILER_FLAG("-msse4.1" SSE4_1) +endif(CMAKE_COMPILER_IS_GNUCC) + +################################################################################ # Configure and define targets ################################################################################ set(VERSION_MAJOR 0) @@ -82,11 +90,18 @@ rcmake_setup_devel(rsimd RSIMD ${VERSION} rsimd/rsimd_version.h) # Add tests ################################################################################ if(NOT NO_TEST) - macro(new_test _name) - add_executable(${_name} ${RSIMD_SOURCE_DIR}/${_name}.c) + function(new_test_named _name _src) + add_executable(${_name} ${RSIMD_SOURCE_DIR}/${_src}.c) target_link_libraries(${_name} rsimd m) add_test(${_name} ${_name}) - endmacro(new_test) + if(NOT "${ARGN}" STREQUAL "") + set_target_properties(${_name} PROPERTIES COMPILE_FLAGS ${ARGN}) + endif(NOT "${ARGN}" STREQUAL "") + endfunction(new_test_named) + + function(new_test _name) + new_test_named(${_name} ${_name} ${ARGN}) + endfunction(new_test) new_test(test_v4f) new_test(test_v4i) @@ -96,6 +111,12 @@ if(NOT NO_TEST) new_test(test_soa4f2) new_test(test_soa4f3) new_test(test_soa4f4) + + if(SSE4_1 AND CMAKE_COMPILER_IS_GNUCC) + new_test_named(test_v4f_sse4_1 test_v4f "-msse4.1") + new_test_named(test_v4i_sse4_1 test_v4i "-msse4.1") + endif(SSE4_1 AND CMAKE_COMPILER_IS_GNUCC) + endif(NOT NO_TEST) ################################################################################ diff --git a/src/sse/ssef.h b/src/sse/ssef.h @@ -25,6 +25,9 @@ #include <rsys/math.h> #include <xmmintrin.h> #include <emmintrin.h> +#ifdef SIMD_SSE4_1 + #include <smmintrin.h> +#endif typedef __m128 v4f_T; #define V4F_AT__(Vec, Id) __builtin_ia32_vec_ext_v4sf(Vec, Id) @@ -269,7 +272,11 @@ v4f_xor(const v4f_T v0, const v4f_T v1) static FINLINE v4f_T v4f_sel(const v4f_T vfalse, const v4f_T vtrue, const v4f_T vcond) { +#ifdef SIMD_SSE4_1 + return _mm_blendv_ps(vfalse, vtrue, vcond); +#else return v4f_xor(vfalse, v4f_and(vcond, v4f_xor(vfalse, vtrue))); +#endif } /******************************************************************************* diff --git a/src/sse/ssei.h b/src/sse/ssei.h @@ -23,11 +23,10 @@ #include "sse_swz.h" #include <rsys/math.h> -#ifdef SIMD_SSE - #include <xmmintrin.h> -#endif -#ifdef SIMD_SSE2 - #include <emmintrin.h> +#include <xmmintrin.h> +#include <emmintrin.h> +#ifdef SIMD_SSE4_1 + #include <smmintrin.h> #endif typedef __m128i v4i_T; @@ -198,7 +197,11 @@ v4i_le(const v4i_T v0, const v4i_T v1) static FINLINE v4i_T v4i_sel(const v4i_T vfalse, const v4i_T vtrue, const v4i_T vcond) { +#ifdef SIMD_SSE4_1 + return _mm_blendv_epi8(vfalse, vtrue, vcond); +#else return v4i_xor(vfalse, v4i_and(vcond, v4i_xor(vfalse, vtrue))); +#endif } #endif /* RSIMD_SSEI_H */