commit dc44f07f5fca4178b6a4dd52cb81f06391660402
parent 85c665f40a596c1728a57523014a5eea61f1ba86
Author: vaplv <vaplv@free.fr>
Date: Tue, 27 Apr 2021 15:59:55 +0200
Add the math functions for the v8f_T type
Diffstat:
12 files changed, 621 insertions(+), 288 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -63,6 +63,9 @@ set(RSIMD_FILES_INC_LEGACY
aosf44.h
aosq.h
math.h
+ mathX.h
+ math4.h
+ math8.h
rsimd.h
soaXfY.h
soaXfY_begin.h
@@ -74,7 +77,9 @@ set(RSIMD_FILES_INC_LEGACY
soa4f4.h
soa8f2.h
soa8f3.h
- soa8f4.h)
+ soa8f4.h
+ vXf_begin.h
+ vXf_end.h)
set(RSIMD_FILES_INC_SSE
sse/sse.h
sse/ssef.h
@@ -88,7 +93,7 @@ set(RSIMD_FILES_SRC
aosf44.c
aosq.c)
set(RSIMD_FILES_DOC COPYING COPYING.LESSER README.md)
-set(RSIMD_FILES_CMAKE
+set(RSIMD_FILES_CMAKE
RSIMDConfig.cmake
RSIMDConfigVersion.cmake)
rcmake_prepend_path(RSIMD_FILES_INC_LEGACY ${RSIMD_SOURCE_DIR})
@@ -131,7 +136,7 @@ if(NOT NO_TEST)
new_test(test_aosf33)
new_test(test_aosf44)
new_test(test_aosq)
- new_test(test_math)
+ new_test(test_math4)
new_test(test_soa4f2)
new_test(test_soa4f3)
new_test(test_soa4f4)
@@ -149,6 +154,7 @@ if(NOT NO_TEST)
endif()
if(AVX AND CMAKE_COMPILER_IS_GNUCC)
+ new_test(test_math8 "-mavx")
new_test(test_v8f "-mavx")
new_test(test_v8i "-mavx")
new_test(test_soa8f2 "-mavx")
@@ -174,6 +180,6 @@ install(FILES ${Sleef_DIR}/SleefConfig.cmake DESTINATION lib/cmake/Sleef/)
install(FILES ${PROJECT_SOURCE_DIR}/RSIMDConfig.cmake
DESTINATION lib/cmake/RSIMD)
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/RSIMDConfigVersion.cmake
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/RSIMDConfigVersion.cmake
DESTINATION lib/cmake/RSIMD)
diff --git a/src/math.h b/src/math.h
@@ -16,143 +16,14 @@
#ifndef RSIMD_MATH_H
#define RSIMD_MATH_H
-#include "rsimd.h"
+#include <rsys/rsys.h>
-#ifdef COMPILER_GCC
- #pragma GCC diagnostic push
- #pragma GCC diagnostic ignored "-Wignored-qualifiers"
+#ifdef SIMD_SSE2
+ #include "math4.h"
#endif
-
-#include <sleef.h>
-
-#ifdef COMPILER_GCC
- #pragma GCC diagnostic pop
+#ifdef SIMD_AVX
+ #include "math8.h"
#endif
-static FINLINE v4f_T
-v4f_copysign(const v4f_T x, const v4f_T y)
-{
- return Sleef_copysignf4(x, y);
-}
-
-static INLINE v4f_T
-v4f_floor(const v4f_T x)
-{
- return Sleef_floorf4(x);
-}
-
-static INLINE v4f_T
-v4f_pow(const v4f_T x, const v4f_T y)
-{
- return Sleef_powf4_u10(x, y);
-}
-
-/*******************************************************************************
- * Exponentatial functions
- ******************************************************************************/
-static INLINE v4f_T
-v4f_exp2(const v4f_T x)
-{
- return Sleef_exp2f4_u10(x);
-}
-
-static INLINE v4f_T
-v4f_exp(const v4f_T x)
-{
- return Sleef_expf4_u10(x);
-}
-
-static INLINE v4f_T
-v4f_exp10(const v4f_T x)
-{
- return Sleef_exp10f4_u10(x);
-}
-
-/*******************************************************************************
- * Log functions
- ******************************************************************************/
-static INLINE v4f_T
-v4f_log2(const v4f_T x)
-{
- return Sleef_log2f4_u10(x);
-}
-
-static INLINE v4f_T
-v4f_log(const v4f_T x)
-{
- return Sleef_logf4_u10(x);
-}
-
-static INLINE v4f_T
-v4f_log10(const v4f_T x)
-{
- return Sleef_log10f4_u10(x);
-}
-
-/*******************************************************************************
- * Trigonometric functions
- ******************************************************************************/
-static INLINE v4f_T
-v4f_sin(const v4f_T v)
-{
- return Sleef_sinf4_u10(v);
-}
-
-static INLINE v4f_T
-v4f_asin(const v4f_T v)
-{
- return Sleef_asinf4_u10(v);
-}
-
-static INLINE v4f_T
-v4f_cos(const v4f_T v)
-{
- return Sleef_cosf4_u10(v);
-}
-
-static INLINE v4f_T
-v4f_acos(const v4f_T v)
-{
- return Sleef_acosf4_u10(v);
-}
-
-static INLINE void
-v4f_sincos(const v4f_T v, v4f_T* RESTRICT s, v4f_T* RESTRICT c)
-{
- const Sleef___m128_2 r = Sleef_sincosf4_u10(v);
- *s = r.x;
- *c = r.y;
-}
-
-static INLINE v4f_T
-v4f_tan(const v4f_T v)
-{
- return Sleef_tanf4_u10(v);
-}
-
-static INLINE v4f_T
-v4f_atan(const v4f_T v)
-{
- return Sleef_atanf4_u10(v);
-}
-
-/*******************************************************************************
- * Miscellaneous
- ******************************************************************************/
-static FINLINE v4f_T /* Cartesian (xyz) to spherical (r, theta, phi)*/
-v4f_xyz_to_rthetaphi(const v4f_T v)
-{
- const v4f_T zero = v4f_zero();
- const v4f_T len2 = v4f_len2(v);
- const v4f_T len3 = v4f_len3(v);
- const v4f_T theta = v4f_sel
- (v4f_acos(v4f_div(v4f_zzzz(v), len3)), zero, v4f_eq(len3, zero));
- const v4f_T tmp_phi = v4f_sel
- (v4f_asin(v4f_div(v4f_yyyy(v), len2)), zero, v4f_eq(len2, zero));
- const v4f_T phi = v4f_sel
- (v4f_sub(v4f_set1((float)PI), tmp_phi), tmp_phi, v4f_ge(v4f_xxxx(v), zero));
-
- return v4f_xyab(v4f_xayb(len3, theta), phi);
-}
-
#endif /* RSIMD_MATH_H */
+
diff --git a/src/math4.h b/src/math4.h
@@ -0,0 +1,41 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef RSIMD_MATH4_H
+#define RSIMD_MATH4_H
+
+#define RSIMD_WIDTH__ 4
+#include "vXf_begin.h"
+#include "mathX.h"
+#include "vXf_end.h"
+
+/*******************************************************************************
+ * Miscellaneous
+ ******************************************************************************/
+static FINLINE v4f_T /* Cartesian (xyz) to spherical (r, theta, phi)*/
+v4f_xyz_to_rthetaphi(const v4f_T v)
+{
+ const v4f_T zero = v4f_zero();
+ const v4f_T len2 = v4f_len2(v);
+ const v4f_T len3 = v4f_len3(v);
+ const v4f_T theta = v4f_sel
+ (v4f_acos(v4f_div(v4f_zzzz(v), len3)), zero, v4f_eq(len3, zero));
+ const v4f_T tmp_phi = v4f_sel
+ (v4f_asin(v4f_div(v4f_yyyy(v), len2)), zero, v4f_eq(len2, zero));
+ const v4f_T phi = v4f_sel
+ (v4f_sub(v4f_set1((float)PI), tmp_phi),tmp_phi, v4f_ge(v4f_xxxx(v), zero));
+ return v4f_xyab(v4f_xayb(len3, theta), phi);
+}
+#endif /* RSIMD_MATH4_H */
diff --git a/src/math8.h b/src/math8.h
@@ -0,0 +1,24 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef RSIMD_MATH8_H
+#define RSIMD_MATH8_H
+
+#define RSIMD_WIDTH__ 8
+#include "vXf_begin.h"
+#include "mathX.h"
+#include "vXf_end.h"
+
+#endif /* RSIMD_MATH8_H */
diff --git a/src/mathX.h b/src/mathX.h
@@ -0,0 +1,137 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "rsimd.h"
+
+#ifdef COMPILER_GCC
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wignored-qualifiers"
+#endif
+
+#include <sleef.h>
+
+#ifdef COMPILER_GCC
+ #pragma GCC diagnostic pop
+#endif
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_vXf__(copysign)(const RSIMD_vXf_T__ x, const RSIMD_vXf_T__ y)
+{
+ return RSIMD_Sleef__(copysignf)(x, y);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(floor)(const RSIMD_vXf_T__ x)
+{
+ return RSIMD_Sleef__(floorf)(x);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(pow)(const RSIMD_vXf_T__ x, const RSIMD_vXf_T__ y)
+{
+ return RSIMD_Sleef_ULP__(powf, u10)(x, y);
+}
+
+/*******************************************************************************
+ * Exponentatial functions
+ ******************************************************************************/
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(exp2)(const RSIMD_vXf_T__ x)
+{
+ return RSIMD_Sleef_ULP__(exp2f, u10)(x);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(exp)(const RSIMD_vXf_T__ x)
+{
+ return RSIMD_Sleef_ULP__(expf, u10)(x);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(exp10)(const RSIMD_vXf_T__ x)
+{
+ return RSIMD_Sleef_ULP__(exp10f, u10)(x);
+}
+
+/*******************************************************************************
+ * Log functions
+ ******************************************************************************/
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(log2)(const RSIMD_vXf_T__ x)
+{
+ return RSIMD_Sleef_ULP__(log2f, u10)(x);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(log)(const RSIMD_vXf_T__ x)
+{
+ return RSIMD_Sleef_ULP__(logf, u10)(x);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(log10)(const RSIMD_vXf_T__ x)
+{
+ return RSIMD_Sleef_ULP__(log10f, u10)(x);
+}
+
+/*******************************************************************************
+ * Trigonometric functions
+ ******************************************************************************/
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(sin)(const RSIMD_vXf_T__ v)
+{
+ return RSIMD_Sleef_ULP__(sinf, u10)(v);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(asin)(const RSIMD_vXf_T__ v)
+{
+ return RSIMD_Sleef_ULP__(asinf, u10)(v);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(cos)(const RSIMD_vXf_T__ v)
+{
+ return RSIMD_Sleef_ULP__(cosf, u10)(v);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(acos)(const RSIMD_vXf_T__ v)
+{
+ return RSIMD_Sleef_ULP__(acosf, u10)(v);
+}
+
+static INLINE void
+RSIMD_vXf__(sincos)
+ (const RSIMD_vXf_T__ v, RSIMD_vXf_T__* RESTRICT s, RSIMD_vXf_T__* RESTRICT c)
+{
+ const RSIMD_Sleef_vecf__(2) r = RSIMD_Sleef_ULP__(sincosf, u10)(v);
+ *s = r.x;
+ *c = r.y;
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(tan)(const RSIMD_vXf_T__ v)
+{
+ return RSIMD_Sleef_ULP__(tanf, u10)(v);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(atan)(const RSIMD_vXf_T__ v)
+{
+ return RSIMD_Sleef_ULP__(atanf, u10)(v);
+}
+
+
diff --git a/src/soaXfY_begin.h b/src/soaXfY_begin.h
@@ -14,6 +14,7 @@
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#include "rsimd.h"
+#include "vXf_begin.h"
/* This file can be included once */
#ifdef SOAXFY_BEGIN_H
@@ -32,23 +33,16 @@
#error "Unexpected RSIMD_SOA_DIMENSION__ value"
#endif
#if RSIMD_WIDTH__ != 4 && RSIMD_WIDTH__ != 8
- #error "Unexpected RSIMD_WIDTH__ value"
+ #error "Unexpected RSIMD_WIDTH__ value of "STR(RSIMD_WIDTH__)
#endif
/* Check that internal macros are not already defined */
-#if defined(RSIMD_vXf__) \
- || defined(RSIMD_vXf_T__) \
- || defined(RSIMD_soaXfY_PREFIX__) \
+#if defined(RSIMD_soaXfY_PREFIX__) \
|| defined(RSIMD_soaXfY__) \
|| defined(SIZEOF_RSIMD_soaXfY__)
#error "Unexpected macro definition"
#endif
-/* Macros generic to RSIMD_WIDTH__ */
-#define RSIMD_vXf__(Func) \
- CONCAT(CONCAT(CONCAT(CONCAT(v, RSIMD_WIDTH__), f), _), Func)
-#define RSIMD_vXf_T__ CONCAT(CONCAT(v, RSIMD_WIDTH__), f_T)
-
/* Macros genric to RSIMD_WIDTH__ and RSIMD_SOA_DIMENSION__ */
#define RSIMD_soaXfY_PREFIX__ \
CONCAT(CONCAT(CONCAT(soa, RSIMD_WIDTH__), f), RSIMD_SOA_DIMENSION__)
diff --git a/src/soaXfY_end.h b/src/soaXfY_end.h
@@ -18,8 +18,6 @@
#endif
/* Undef helper macros */
-#undef RSIMD_vXf__
-#undef RSIMD_vXf_T__
#undef RSIMD_soaXfY_PREFIX__
#undef RSIMD_soaXfY__
#undef SIZEOF_RSIMD_soaXfY__
@@ -29,3 +27,5 @@
#undef RSIMD_WIDTH__
#undef SOAXFY_BEGIN_H
+
+#include "vXf_end.h"
diff --git a/src/test_math.c b/src/test_math.c
@@ -1,138 +0,0 @@
-/* Copyright (C) 2013-2019 Vincent Forest (vaplv@free.fr)
- *
- * The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * The RSIMD library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
-
-#define _POSIX_C_SOURCE 200112L
-
-#include "rsimd.h"
-#include "math.h"
-
-#include <math.h>
-
-#define LOG2E 1.4426950408889634074 /* log_2 e */
-#define LN10 2.30258509299404568402 /* log_e 10 */
-
-#define CHKV4_EPS(V, Ref, Eps) { \
- CHK(eq_eps(v4f_x(V), Ref[0], fabsf(Ref[0]) * Eps)); \
- CHK(eq_eps(v4f_y(V), Ref[1], fabsf(Ref[1]) * Eps)); \
- CHK(eq_eps(v4f_z(V), Ref[2], fabsf(Ref[2]) * Eps)); \
- CHK(eq_eps(v4f_w(V), Ref[3], fabsf(Ref[3]) * Eps)); \
-} (void)0
-
-#define CHKV4_FUNC_EPS(V, Func, Eps) { \
- const v4f_T r__ = v4f_##Func(V); \
- float ref__[4]; \
- ref__[0] = (float)Func(v4f_x(V)); \
- ref__[1] = (float)Func(v4f_y(V)); \
- ref__[2] = (float)Func(v4f_z(V)); \
- ref__[3] = (float)Func(v4f_w(V)); \
- CHKV4_EPS(r__, ref__, Eps); \
-} (void)0
-
-static void
-test_trigo(void)
-{
- v4f_T i, j, k;
- float ref[4];
-
- i = v4f_set((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
-
- CHKV4_FUNC_EPS(i, cos, 1.e-6);
- CHKV4_FUNC_EPS(i, sin, 1.e-6);
-
- v4f_sincos(i, &k, &j);
- ref[0] = (float)sin(v4f_x(i));
- ref[1] = (float)sin(v4f_y(i));
- ref[2] = (float)sin(v4f_z(i));
- ref[3] = (float)sin(v4f_w(i));
- CHKV4_EPS(k, ref, 1.e-6f);
- ref[0] = (float)cos(v4f_x(i));
- ref[1] = (float)cos(v4f_y(i));
- ref[2] = (float)cos(v4f_z(i));
- ref[3] = (float)cos(v4f_w(i));
- CHKV4_EPS(j, ref, 1.e-6f);
-
- i = v4f_set((float)PI/8.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
- CHKV4_FUNC_EPS(i, tan, 1.e-6);
- CHKV4_FUNC_EPS(v4f_cos(i), acos, 1.e-6);
- CHKV4_FUNC_EPS(v4f_sin(i), asin, 1.e-6);
- CHKV4_FUNC_EPS(v4f_tan(i), atan, 1.e-6);
-}
-
-static void
-test_exp(void)
-{
- const v4f_T i = v4f_set(1.f, -1.234f, 0.f, 3.14156f);
- v4f_T j;
- float ref[4];
-
- CHKV4_FUNC_EPS(i, exp, 1.e-6);
- CHKV4_FUNC_EPS(i, exp2, 1.e-6);
-
- j = v4f_exp10(i);
- ref[0] = (float)exp2(LOG2E * LN10 * v4f_x(i));
- ref[1] = (float)exp2(LOG2E * LN10 * v4f_y(i));
- ref[2] = (float)exp2(LOG2E * LN10 * v4f_z(i));
- ref[3] = (float)exp2(LOG2E * LN10 * v4f_w(i));
- CHKV4_EPS(j, ref, 1.e-6f);
-}
-
-static void
-test_log(void)
-{
- const v4f_T i = v4f_set(4.675f, 3.14f, 9.99999f, 1.234e-13f);
-
- CHKV4_FUNC_EPS(i, log, 1.e-6);
- CHKV4_FUNC_EPS(i, log2, 1.e-6);
- CHKV4_FUNC_EPS(i, log10, 1.e-6);
-}
-
-static void
-test_misc(void)
-{
- v4f_T i, j, k;
- float ref[4];
-
- i = v4f_set(-1.2345f, 9.3e-7f, 3.879e9f, -10.56f);
- j = v4f_set(7.89e-9f, 0.12f, -4.9e10f, 3.14f);
- k = v4f_copysign(i, j);
- ref[0] = (float)copysign(v4f_x(i), v4f_x(j));
- ref[1] = (float)copysign(v4f_y(i), v4f_y(j));
- ref[2] = (float)copysign(v4f_z(i), v4f_z(j));
- ref[3] = (float)copysign(v4f_w(i), v4f_w(j));
- CHKV4_EPS(k, ref, 1.e-6f);
-
- CHKV4_FUNC_EPS(i, floor, 1.e-6);
-
- k = v4f_pow(v4f_abs(i), j);
- ref[0] = (float)pow(fabsf(v4f_x(i)), v4f_x(j));
- ref[1] = (float)pow(fabsf(v4f_y(i)), v4f_y(j));
- ref[2] = (float)pow(fabsf(v4f_z(i)), v4f_z(j));
- ref[3] = (float)pow(fabsf(v4f_w(i)), v4f_w(j));
- CHKV4_EPS(k, ref, 1.e-6f);
-}
-
-int
-main(int argc, char** argv)
-{
- (void)argc, (void)argv;
-
- test_trigo();
- test_exp();
- test_log();
- test_misc();
-
- return 0;
-}
-
diff --git a/src/test_math4.c b/src/test_math4.c
@@ -0,0 +1,138 @@
+/* Copyright (C) 2013-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#define _POSIX_C_SOURCE 200112L
+
+#include "rsimd.h"
+#include "math.h"
+
+#include <math.h>
+
+#define LOG2E 1.4426950408889634074 /* log_2 e */
+#define LN10 2.30258509299404568402 /* log_e 10 */
+
+#define CHKV4_EPS(V, Ref, Eps) { \
+ CHK(eq_eps(v4f_x(V), Ref[0], fabsf(Ref[0]) * Eps)); \
+ CHK(eq_eps(v4f_y(V), Ref[1], fabsf(Ref[1]) * Eps)); \
+ CHK(eq_eps(v4f_z(V), Ref[2], fabsf(Ref[2]) * Eps)); \
+ CHK(eq_eps(v4f_w(V), Ref[3], fabsf(Ref[3]) * Eps)); \
+} (void)0
+
+#define CHKV4_FUNC_EPS(V, Func, Eps) { \
+ const v4f_T r__ = v4f_##Func(V); \
+ float ref__[4]; \
+ ref__[0] = (float)Func(v4f_x(V)); \
+ ref__[1] = (float)Func(v4f_y(V)); \
+ ref__[2] = (float)Func(v4f_z(V)); \
+ ref__[3] = (float)Func(v4f_w(V)); \
+ CHKV4_EPS(r__, ref__, Eps); \
+} (void)0
+
+static void
+test_trigo(void)
+{
+ v4f_T i, j, k;
+ float ref[4];
+
+ i = v4f_set((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
+
+ CHKV4_FUNC_EPS(i, cos, 1.e-6f);
+ CHKV4_FUNC_EPS(i, sin, 1.e-6f);
+
+ v4f_sincos(i, &k, &j);
+ ref[0] = (float)sin(v4f_x(i));
+ ref[1] = (float)sin(v4f_y(i));
+ ref[2] = (float)sin(v4f_z(i));
+ ref[3] = (float)sin(v4f_w(i));
+ CHKV4_EPS(k, ref, 1.e-6f);
+ ref[0] = (float)cos(v4f_x(i));
+ ref[1] = (float)cos(v4f_y(i));
+ ref[2] = (float)cos(v4f_z(i));
+ ref[3] = (float)cos(v4f_w(i));
+ CHKV4_EPS(j, ref, 1.e-6f);
+
+ i = v4f_set((float)PI/8.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
+ CHKV4_FUNC_EPS(i, tan, 1.e-6f);
+ CHKV4_FUNC_EPS(v4f_cos(i), acos, 1.e-6f);
+ CHKV4_FUNC_EPS(v4f_sin(i), asin, 1.e-6f);
+ CHKV4_FUNC_EPS(v4f_tan(i), atan, 1.e-6f);
+}
+
+static void
+test_exp(void)
+{
+ const v4f_T i = v4f_set(1.f, -1.234f, 0.f, 3.14156f);
+ v4f_T j;
+ float ref[4];
+
+ CHKV4_FUNC_EPS(i, exp, 1.e-6f);
+ CHKV4_FUNC_EPS(i, exp2, 1.e-6f);
+
+ j = v4f_exp10(i);
+ ref[0] = (float)exp2(LOG2E * LN10 * v4f_x(i));
+ ref[1] = (float)exp2(LOG2E * LN10 * v4f_y(i));
+ ref[2] = (float)exp2(LOG2E * LN10 * v4f_z(i));
+ ref[3] = (float)exp2(LOG2E * LN10 * v4f_w(i));
+ CHKV4_EPS(j, ref, 1.e-6f);
+}
+
+static void
+test_log(void)
+{
+ const v4f_T i = v4f_set(4.675f, 3.14f, 9.99999f, 1.234e-13f);
+
+ CHKV4_FUNC_EPS(i, log, 1.e-6f);
+ CHKV4_FUNC_EPS(i, log2, 1.e-6f);
+ CHKV4_FUNC_EPS(i, log10, 1.e-6f);
+}
+
+static void
+test_misc(void)
+{
+ v4f_T i, j, k;
+ float ref[4];
+
+ i = v4f_set(-1.2345f, 9.3e-7f, 3.879e9f, -10.56f);
+ j = v4f_set(7.89e-9f, 0.12f, -4.9e10f, 3.14f);
+ k = v4f_copysign(i, j);
+ ref[0] = (float)copysign(v4f_x(i), v4f_x(j));
+ ref[1] = (float)copysign(v4f_y(i), v4f_y(j));
+ ref[2] = (float)copysign(v4f_z(i), v4f_z(j));
+ ref[3] = (float)copysign(v4f_w(i), v4f_w(j));
+ CHKV4_EPS(k, ref, 1.e-6f);
+
+ CHKV4_FUNC_EPS(i, floor, 1.e-6f);
+
+ k = v4f_pow(v4f_abs(i), j);
+ ref[0] = (float)pow(fabsf(v4f_x(i)), v4f_x(j));
+ ref[1] = (float)pow(fabsf(v4f_y(i)), v4f_y(j));
+ ref[2] = (float)pow(fabsf(v4f_z(i)), v4f_z(j));
+ ref[3] = (float)pow(fabsf(v4f_w(i)), v4f_w(j));
+ CHKV4_EPS(k, ref, 1.e-6f);
+}
+
+int
+main(int argc, char** argv)
+{
+ (void)argc, (void)argv;
+
+ test_trigo();
+ test_exp();
+ test_log();
+ test_misc();
+
+ return 0;
+}
+
diff --git a/src/test_math8.c b/src/test_math8.c
@@ -0,0 +1,172 @@
+/* Copyright (C) 2013-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#define _POSIX_C_SOURCE 200112L
+
+#include "rsimd.h"
+#include "math.h"
+
+#include <math.h>
+
+#define LOG2E 1.4426950408889634074 /* log_2 e */
+#define LN10 2.30258509299404568402 /* log_e 10 */
+
+#define CHKV8_EPS(V, Ref, Eps) { \
+ CHK(eq_eps(v4f_x(v8f_abcd(V)), Ref[0], fabsf(Ref[0]) * Eps)); \
+ CHK(eq_eps(v4f_y(v8f_abcd(V)), Ref[1], fabsf(Ref[1]) * Eps)); \
+ CHK(eq_eps(v4f_z(v8f_abcd(V)), Ref[2], fabsf(Ref[2]) * Eps)); \
+ CHK(eq_eps(v4f_w(v8f_abcd(V)), Ref[3], fabsf(Ref[3]) * Eps)); \
+ CHK(eq_eps(v4f_x(v8f_efgh(V)), Ref[4], fabsf(Ref[4]) * Eps)); \
+ CHK(eq_eps(v4f_y(v8f_efgh(V)), Ref[5], fabsf(Ref[5]) * Eps)); \
+ CHK(eq_eps(v4f_z(v8f_efgh(V)), Ref[6], fabsf(Ref[6]) * Eps)); \
+ CHK(eq_eps(v4f_w(v8f_efgh(V)), Ref[7], fabsf(Ref[7]) * Eps)); \
+} (void)0
+
+#define CHKV8_FUNC_EPS(V, Func, Eps) { \
+ const v8f_T r__ = v8f_##Func(V); \
+ float ref__[8]; \
+ ref__[0] = (float)Func(v4f_x(v8f_abcd(V))); \
+ ref__[1] = (float)Func(v4f_y(v8f_abcd(V))); \
+ ref__[2] = (float)Func(v4f_z(v8f_abcd(V))); \
+ ref__[3] = (float)Func(v4f_w(v8f_abcd(V))); \
+ ref__[4] = (float)Func(v4f_x(v8f_efgh(V))); \
+ ref__[5] = (float)Func(v4f_y(v8f_efgh(V))); \
+ ref__[6] = (float)Func(v4f_z(v8f_efgh(V))); \
+ ref__[7] = (float)Func(v4f_w(v8f_efgh(V))); \
+ CHKV8_EPS(r__, ref__, Eps); \
+} (void)0
+
+static void
+test_trigo(void)
+{
+ v8f_T i, j, k;
+ float ref[8];
+
+ i = v8f_set
+ ((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f,
+ (float)PI/8.f, (float)PI/7.f, (float)PI/16.f, (float)PI/9.f);
+
+ CHKV8_FUNC_EPS(i, cos, 1.e-6f);
+ CHKV8_FUNC_EPS(i, sin, 1.e-6f);
+
+ v8f_sincos(i, &k, &j);
+ ref[0] = (float)sin(v4f_x(v8f_abcd(i)));
+ ref[1] = (float)sin(v4f_y(v8f_abcd(i)));
+ ref[2] = (float)sin(v4f_z(v8f_abcd(i)));
+ ref[3] = (float)sin(v4f_w(v8f_abcd(i)));
+ ref[4] = (float)sin(v4f_x(v8f_efgh(i)));
+ ref[5] = (float)sin(v4f_y(v8f_efgh(i)));
+ ref[6] = (float)sin(v4f_z(v8f_efgh(i)));
+ ref[7] = (float)sin(v4f_w(v8f_efgh(i)));
+ CHKV8_EPS(k, ref, 1.e-6f);
+ ref[0] = (float)cos(v4f_x(v8f_abcd(i)));
+ ref[1] = (float)cos(v4f_y(v8f_abcd(i)));
+ ref[2] = (float)cos(v4f_z(v8f_abcd(i)));
+ ref[3] = (float)cos(v4f_w(v8f_abcd(i)));
+ ref[4] = (float)cos(v4f_x(v8f_efgh(i)));
+ ref[5] = (float)cos(v4f_y(v8f_efgh(i)));
+ ref[6] = (float)cos(v4f_z(v8f_efgh(i)));
+ ref[7] = (float)cos(v4f_w(v8f_efgh(i)));
+ CHKV8_EPS(j, ref, 1.e-6f);
+
+ i = v8f_set
+ ((float)PI/2.2f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f,
+ (float)PI/8.f, (float)PI/7.f, (float)PI/16.f, (float)PI/9.f);
+
+ CHKV8_FUNC_EPS(i, tan, 1.e-6);
+ CHKV8_FUNC_EPS(v8f_cos(i), acos, 1.e-6f);
+ CHKV8_FUNC_EPS(v8f_sin(i), asin, 1.e-6f);
+ CHKV8_FUNC_EPS(v8f_tan(i), atan, 1.e-6f);
+}
+
+static void
+test_exp(void)
+{
+ const v8f_T i = v8f_set
+ (1.f, -1.234f, 0.f, 3.14156f, 0.9187f, 7.9f, 3.333f, 2.387e-7f);
+ v8f_T j;
+ float ref[8];
+
+ CHKV8_FUNC_EPS(i, exp, 1.e-6f);
+ CHKV8_FUNC_EPS(i, exp2, 1.e-6f);
+
+ j = v8f_exp10(i);
+ ref[0] = (float)exp2(LOG2E * LN10 * v4f_x(v8f_abcd(i)));
+ ref[1] = (float)exp2(LOG2E * LN10 * v4f_y(v8f_abcd(i)));
+ ref[2] = (float)exp2(LOG2E * LN10 * v4f_z(v8f_abcd(i)));
+ ref[3] = (float)exp2(LOG2E * LN10 * v4f_w(v8f_abcd(i)));
+ ref[4] = (float)exp2(LOG2E * LN10 * v4f_x(v8f_efgh(i)));
+ ref[5] = (float)exp2(LOG2E * LN10 * v4f_y(v8f_efgh(i)));
+ ref[6] = (float)exp2(LOG2E * LN10 * v4f_z(v8f_efgh(i)));
+ ref[7] = (float)exp2(LOG2E * LN10 * v4f_w(v8f_efgh(i)));
+ CHKV8_EPS(j, ref, 1.e-6f);
+}
+
+static void
+test_log(void)
+{
+ const v8f_T i = v8f_set
+ (4.675f, 3.14f, 9.99999f, 1.234e-13f, 3.33e-3f, 0.98f, 8.f, 9.87654f);
+ CHKV8_FUNC_EPS(i, log, 1.e-6f);
+ CHKV8_FUNC_EPS(i, log2, 1.e-6f);
+ CHKV8_FUNC_EPS(i, log10, 1.e-6f);
+}
+
+static void
+test_misc(void)
+{
+ v8f_T i, j, k;
+ float ref[8];
+
+ i = v8f_set(-1.2345f, 9.3e-7f, 3.879e9f, -10.56f, 9.9f, -3.1f, 0.33e-6f, 1.f);
+ j = v8f_set(7.89e-9f, 0.12f, -4.9e10f, 3.14f, 5.f, 0.1e-19f, 1.234f, -0.45f);
+ k = v8f_copysign(i, j);
+ ref[0] = (float)copysign(v4f_x(v8f_abcd(i)), v4f_x(v8f_abcd(j)));
+ ref[1] = (float)copysign(v4f_y(v8f_abcd(i)), v4f_y(v8f_abcd(j)));
+ ref[2] = (float)copysign(v4f_z(v8f_abcd(i)), v4f_z(v8f_abcd(j)));
+ ref[3] = (float)copysign(v4f_w(v8f_abcd(i)), v4f_w(v8f_abcd(j)));
+ ref[4] = (float)copysign(v4f_x(v8f_efgh(i)), v4f_x(v8f_efgh(j)));
+ ref[5] = (float)copysign(v4f_y(v8f_efgh(i)), v4f_y(v8f_efgh(j)));
+ ref[6] = (float)copysign(v4f_z(v8f_efgh(i)), v4f_z(v8f_efgh(j)));
+ ref[7] = (float)copysign(v4f_w(v8f_efgh(i)), v4f_w(v8f_efgh(j)));
+ CHKV8_EPS(k, ref, 1.e-6f);
+
+ CHKV8_FUNC_EPS(i, floor, 1.e-6f);
+
+ k = v8f_pow(v8f_abs(i), j);
+ ref[0] = (float)pow(fabsf(v4f_x(v8f_abcd(i))), v4f_x(v8f_abcd(j)));
+ ref[1] = (float)pow(fabsf(v4f_y(v8f_abcd(i))), v4f_y(v8f_abcd(j)));
+ ref[2] = (float)pow(fabsf(v4f_z(v8f_abcd(i))), v4f_z(v8f_abcd(j)));
+ ref[3] = (float)pow(fabsf(v4f_w(v8f_abcd(i))), v4f_w(v8f_abcd(j)));
+ ref[4] = (float)pow(fabsf(v4f_x(v8f_efgh(i))), v4f_x(v8f_efgh(j)));
+ ref[5] = (float)pow(fabsf(v4f_y(v8f_efgh(i))), v4f_y(v8f_efgh(j)));
+ ref[6] = (float)pow(fabsf(v4f_z(v8f_efgh(i))), v4f_z(v8f_efgh(j)));
+ ref[7] = (float)pow(fabsf(v4f_w(v8f_efgh(i))), v4f_w(v8f_efgh(j)));
+ CHKV8_EPS(k, ref, 1.e-6f);
+}
+
+int
+main(int argc, char** argv)
+{
+ (void)argc, (void)argv;
+
+ test_trigo();
+ test_exp();
+ test_log();
+ test_misc();
+
+ return 0;
+}
+
diff --git a/src/vXf_begin.h b/src/vXf_begin.h
@@ -0,0 +1,57 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "rsimd.h"
+
+/* This file can be included once */
+#ifdef VXF_BEGIN_H
+ #error "The vXf_begin.h header is already included"
+#endif
+#define VXF_BEGIN_H
+
+/* Check parameter */
+#if !defined(RSIMD_WIDTH__)
+ #error "Undefined RSIMD_WIDTH__ macro"
+#endif
+#if RSIMD_WIDTH__ != 4 && RSIMD_WIDTH__ != 8
+ #error "Unexpected RSIMD_WIDTH__ value of "STR(RSIMD_WIDTH__)
+#endif
+
+/* Check that internal macros are not already defined */
+#if defined(RSIMD_vXf__) \
+ || defined(RSIMD_vXf_T__) \
+ || defined(RSIMD_Sleef__) \
+ || defined(RSIMD_Sleef_ULP__) \
+ || defined(RSIMD_Sleef_vecf__)
+ #error "Unexpected macro definition"
+#endif
+
+/* Macros generic to RSIMD_WIDTH__ */
+#define RSIMD_vXf__(Func) \
+ CONCAT(CONCAT(CONCAT(CONCAT(v, RSIMD_WIDTH__), f), _), Func)
+#define RSIMD_vXf_T__ CONCAT(CONCAT(v, RSIMD_WIDTH__), f_T)
+
+/* Sleef macros */
+#define RSIMD_Sleef__(Func) CONCAT(CONCAT(Sleef_, Func), RSIMD_WIDTH__)
+#define RSIMD_Sleef_ULP__(Func, Suffix) \
+ CONCAT(CONCAT(CONCAT(CONCAT(Sleef_, Func), RSIMD_WIDTH__), _), Suffix)
+
+/* Vector types of the Sleef library */
+#if RSIMD_WIDTH__ == 4
+ #define RSIMD_Sleef_vecf__(Dim) CONCAT(Sleef___m128_, Dim)
+#elif RSIMD_WIDTH__ == 8
+ #define RSIMD_Sleef_vecf__(Dim) CONCAT(Sleef___m256_, Dim)
+#endif
+
diff --git a/src/vXf_end.h b/src/vXf_end.h
@@ -0,0 +1,31 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef VXF_BEGIN_H
+ #error "The vXf_begin.h file must be included"
+#endif
+
+/* Undef helper macros */
+#undef RSIMD_vXf__
+#undef RSIMD_vXf_T__
+#undef RSIMD_Sleef__
+#undef RSIMD_Sleef_ULP__
+#undef RSIMD_Sleef_vecf__
+
+/* Undef parameters */
+#undef RSIMD_WIDTH__
+
+#undef VXF_BEGIN_H
+