commit 93470f7eb4aceb9cfbe831c1663f27145c13eb30
parent c1ceb73a08dd99f01b5ed50f035dac6299c7d95a
Author: vaplv <vaplv@free.fr>
Date: Sat, 5 Mar 2016 18:16:20 +0100
Add support of the SSE4.1 blendv instructions
Diffstat:
3 files changed, 39 insertions(+), 8 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -33,6 +33,14 @@ set(CMAKE_MODULE_PATH ${RCMAKE_SOURCE_DIR})
include(rcmake)
################################################################################
+# Check compiler features
+################################################################################
+if(CMAKE_COMPILER_IS_GNUCC)
+ include(CheckCCompilerFlag)
+ CHECK_C_COMPILER_FLAG("-msse4.1" SSE4_1)
+endif(CMAKE_COMPILER_IS_GNUCC)
+
+################################################################################
# Configure and define targets
################################################################################
set(VERSION_MAJOR 0)
@@ -82,11 +90,18 @@ rcmake_setup_devel(rsimd RSIMD ${VERSION} rsimd/rsimd_version.h)
# Add tests
################################################################################
if(NOT NO_TEST)
- macro(new_test _name)
- add_executable(${_name} ${RSIMD_SOURCE_DIR}/${_name}.c)
+ function(new_test_named _name _src)
+ add_executable(${_name} ${RSIMD_SOURCE_DIR}/${_src}.c)
target_link_libraries(${_name} rsimd m)
add_test(${_name} ${_name})
- endmacro(new_test)
+ if(NOT "${ARGN}" STREQUAL "")
+ set_target_properties(${_name} PROPERTIES COMPILE_FLAGS ${ARGN})
+ endif(NOT "${ARGN}" STREQUAL "")
+ endfunction(new_test_named)
+
+ function(new_test _name)
+ new_test_named(${_name} ${_name} ${ARGN})
+ endfunction(new_test)
new_test(test_v4f)
new_test(test_v4i)
@@ -96,6 +111,12 @@ if(NOT NO_TEST)
new_test(test_soa4f2)
new_test(test_soa4f3)
new_test(test_soa4f4)
+
+ if(SSE4_1 AND CMAKE_COMPILER_IS_GNUCC)
+ new_test_named(test_v4f_sse4_1 test_v4f "-msse4.1")
+ new_test_named(test_v4i_sse4_1 test_v4i "-msse4.1")
+ endif(SSE4_1 AND CMAKE_COMPILER_IS_GNUCC)
+
endif(NOT NO_TEST)
################################################################################
diff --git a/src/sse/ssef.h b/src/sse/ssef.h
@@ -25,6 +25,9 @@
#include <rsys/math.h>
#include <xmmintrin.h>
#include <emmintrin.h>
+#ifdef SIMD_SSE4_1
+ #include <smmintrin.h>
+#endif
typedef __m128 v4f_T;
#define V4F_AT__(Vec, Id) __builtin_ia32_vec_ext_v4sf(Vec, Id)
@@ -269,7 +272,11 @@ v4f_xor(const v4f_T v0, const v4f_T v1)
static FINLINE v4f_T
v4f_sel(const v4f_T vfalse, const v4f_T vtrue, const v4f_T vcond)
{
+#ifdef SIMD_SSE4_1
+ return _mm_blendv_ps(vfalse, vtrue, vcond);
+#else
return v4f_xor(vfalse, v4f_and(vcond, v4f_xor(vfalse, vtrue)));
+#endif
}
/*******************************************************************************
diff --git a/src/sse/ssei.h b/src/sse/ssei.h
@@ -23,11 +23,10 @@
#include "sse_swz.h"
#include <rsys/math.h>
-#ifdef SIMD_SSE
- #include <xmmintrin.h>
-#endif
-#ifdef SIMD_SSE2
- #include <emmintrin.h>
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#ifdef SIMD_SSE4_1
+ #include <smmintrin.h>
#endif
typedef __m128i v4i_T;
@@ -198,7 +197,11 @@ v4i_le(const v4i_T v0, const v4i_T v1)
static FINLINE v4i_T
v4i_sel(const v4i_T vfalse, const v4i_T vtrue, const v4i_T vcond)
{
+#ifdef SIMD_SSE4_1
+ return _mm_blendv_epi8(vfalse, vtrue, vcond);
+#else
return v4i_xor(vfalse, v4i_and(vcond, v4i_xor(vfalse, vtrue)));
+#endif
}
#endif /* RSIMD_SSEI_H */