commit 0e1e4c09f7844bf674cb02e2c93be421b1206346
parent 896a516c378be27ae73708bfad24732272cdee40
Author: vaplv <vaplv@free.fr>
Date: Sun, 10 Mar 2019 17:16:22 +0100
Make generic the SoA tests
Refactor the existing SoA tests to use the new generic test. Add and
test the SoA8f3 and SoA8f4 API.
Diffstat:
10 files changed, 394 insertions(+), 527 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -60,7 +60,10 @@ set(RSIMD_FILES_INC_LEGACY
soaXf3.h
soa4f2.h
soa4f3.h
- soa4f4.h)
+ soa4f4.h
+ soa8f2.h
+ soa8f3.h
+ soa8f4.h)
set(RSIMD_FILES_INC_SSE
sse/sse.h
sse/ssef.h
@@ -119,7 +122,6 @@ if(NOT NO_TEST)
new_test(test_soa4f2)
new_test(test_soa4f3)
new_test(test_soa4f4)
- new_test(test_soa8f2 "-mavx")
if(SSE4_1 AND CMAKE_COMPILER_IS_GNUCC)
new_test_named(test_v4f_sse4_1 test_v4f "-msse4.1")
@@ -129,6 +131,9 @@ if(NOT NO_TEST)
if(AVX AND CMAKE_COMPILER_IS_GNUCC)
new_test(test_v8f "-mavx")
new_test(test_v8i "-mavx")
+ new_test(test_soa8f2 "-mavx")
+ new_test(test_soa8f3 "-mavx")
+ new_test(test_soa8f4 "-mavx")
endif(AVX AND CMAKE_COMPILER_IS_GNUCC)
endif(NOT NO_TEST)
diff --git a/src/soa8f3.h b/src/soa8f3.h
@@ -0,0 +1,22 @@
+/* Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef SOA8F3_H
+#define SOA8F3_H
+
+#define RSIMD_WIDTH__ 8
+#include "soaXf3.h"
+
+#endif /* SOA8F3_H */
diff --git a/src/soa8f4.h b/src/soa8f4.h
@@ -0,0 +1,27 @@
+/* Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef SOA8F4_H
+#define SOA8F4_H
+
+/* Generate the common soa4f4 functions */
+#define RSIMD_WIDTH__ 8
+#define RSIMD_SOA_DIMENSION__ 4
+#include "soaXfY_begin.h"
+#include "soaXfY.h"
+#include "soaXfY_end.h"
+
+#endif /* SOA8F4_H */
+
diff --git a/src/test_soa4f2.c b/src/test_soa4f2.c
@@ -13,106 +13,16 @@
* You should have received a copy of the GNU Lesser General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
-#include "soa4f2.h"
-#include "test_soaXf_utils.h"
-
-#define CHECK_F2(V, A, B, C, D, E, F, G, H) \
- { \
- const v4f_T* v__ = (V); \
- CHECK_V4MASK(v4f_eq(v__[0], v4f_set((A), (B), (C), (D))), V4TRUE); \
- CHECK_V4MASK(v4f_eq(v__[1], v4f_set((E), (F), (G), (H))), V4TRUE); \
- } (void)0
+/* Generate the test_soa3f2 function */
+#define SOA_SIMD_WIDTH 4
+#define SOA_DIMENSION 2
+#include "test_soaXfY.h"
int
main(int argc, char** argv)
{
- v4f_T a[2], b[2], c[2], dst[2], f;
(void)argc, (void)argv;
-
- CHK(soa4f2_set(a, soa4f2_splat(c, v4f_set1(-1.f))) == a);
- CHECK_V4MASK(v4f_eq(a[0], v4f_set1(-1.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[1], v4f_set1(-1.f)), V4TRUE);
-
- CHK(soa4f2(c, v4f_set(0.f, 1.f, 2.f, 3.f), v4f_set(5.f, 6.f, 7.f, 8.f)) == c);
- CHK(soa4f2_set(a, c) == a);
- CHECK_V4MASK(v4f_eq(c[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(c[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE);
-
- CHK(soa4f2(a, v4f_set(-1.f, 2.f, 3.f,-4.f),v4f_set(5.f,-6.f,-7.f, 8.f)) == a);
- CHK(soa4f2_minus(b, a) == b);
- CHECK_F2(b, 1.f,-2.f,-3.f, 4.f, -5.f, 6.f, 7.f,-8.f);
-
- CHK(soa4f2_addf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)) == dst);
- CHECK_F2(dst, 0.f, 4.f, 3.f, -1.f, 6.f, -4.f, -7.f, 11.f);
- CHK(soa4f2_add(dst, a, b) == dst);
- CHECK_F2(dst, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f);
- CHK(soa4f2_subf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)) == dst);
- CHECK_F2(dst, -2.f, 0.f, 3.f, -7.f, 4.f, -8.f, -7.f, 5.f);
- CHK(soa4f2_sub(dst, a, b) == dst);
- CHECK_F2(dst, -2.f, 4.f, 6.f, -8.f, 10.f, -12.f, -14.f, 16.f);
- CHK(soa4f2_mulf(dst, a, v4f_set(2.f, 3.f, 0.f, -1.f)) == dst);
- CHECK_F2(dst, -2.f, 6.f, 0.f, 4.f, 10.f, -18.f, 0.f, -8.f);
- CHK(soa4f2_mul(dst, a, b) == dst);
- CHECK_F2(dst, -1.f, -4.f, -9.f, -16.f, -25.f, -36.f, -49.f, -64.f);
- CHK(soa4f2_divf(dst, a, v4f_set(2.f, 0.5f, 1.f, 4.f)) == dst);
- CHECK_F2(dst, -0.5f, 4.f, 3.f, -1.f, 2.5f, -12.f, -7.f, 2.f);
- CHK(soa4f2_div(dst, a, b) == dst);
- CHECK_F2(dst, -1.f, -1.f, -1.f, -1.f, -1.f, -1.f, -1.f, -1.f);
-
- soa4f2(a, v4f_set1(0.f), v4f_set1(1.f));
- soa4f2(b, v4f_set1(1.f), v4f_set1(2.f));
- CHK(soa4f2_lerp(dst, a, b, v4f_set1(0.5f)) == dst);
- CHECK_F2(dst, 0.5f, 0.5f, 0.5f, 0.5f, 1.5f, 1.5f, 1.5f, 1.5f);
- soa4f2(a, v4f_set(-1.f, 2.f, 3.f,-4.f), v4f_set(5.f,-6.f,-7.f, 8.f));
- soa4f2_minus(b, a);
- CHK(soa4f2_lerp(dst, a, b, v4f_set(0.f, 1.f, 0.5f, 1.f)) == dst);
- CHECK_F2(dst, -1.f, -2.f, 0.f, 4.f, 5.f, 6.f, 0.f, -8.f);
-
- f = soa4f2_sum(b);
- CHECK_V4MASK(v4f_eq(f, v4f_set(-4.f, 4.f, 4.f, -4.f)), V4TRUE);
- f = soa4f2_dot(a, b);
- CHECK_V4MASK(v4f_eq(f, v4f_set(-26.f, -40.f, -58.f, -80.f)), V4TRUE);
- f = soa4f2_len(a);
- CHECK_V4MASK
- (v4f_eq_eps(f, v4f_sqrt(soa4f2_dot(a, a)), v4f_set1(1.e-6f)), V4TRUE);
-
- CHECK_V4MASK(soa4f2_is_normalized(b), V4FALSE);
- f = soa4f2_normalize(dst, b);
- CHECK_V4MASK(v4f_eq_eps(f, soa4f2_len(b), v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(soa4f2_is_normalized(b), V4FALSE);
- CHECK_V4MASK(soa4f2_is_normalized(dst), V4TRUE);
- soa4f2_divf(b, b, f);
- CHECK_V4MASK(v4f_eq_eps(dst[0], b[0], v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(v4f_eq_eps(dst[1], b[1], v4f_set1(1.e-6f)), V4TRUE);
-
- CHECK_V4MASK(soa4f2_eq(a, a), V4TRUE);
- CHECK_V4MASK(soa4f2_eq(a, b), V4FALSE);
- soa4f2(a, v4f_set(-1.f, 2.f, 3.f,-4.f), v4f_set(5.f,-6.f,-7.f, 8.f));
- soa4f2(b, v4f_set(-1.f,-2.f, 5.f,-4.001f), v4f_set(5.f,-6.f, 7.f, 8.001f));
- CHECK_V4MASK__(soa4f2_eq(a, b), ~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f2_eq_eps(a, b, v4f_set1(1.e-6f)), ~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f2_eq_eps(a, b, v4f_set(0.f,0.f,0.f,1.e-6f)),~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f2_eq_eps(a, b, v4f_set(0.f,0.f,0.f,1.e-2f)),~0, 0, 0,~0);
-
- soa4f2(a, v4f_set(1.f, 2.f, 3.f,-1.f), v4f_set(-2.f, 0.f,-7.f, 0.f));
- soa4f2(b, v4f_set(3.f, 2.f, 1.f,-2.f), v4f_set(1.f,-6.f, 0.5f, 2.f));
- f = soa4f2_cross(a, b);
- CHECK_V4MASK(v4f_eq(f, v4f_set(7.f, -12.f, 8.5f, -2.f)), V4TRUE);
-
- CHK(soa4f2_min(dst, a, b) == dst);
- CHECK_F2(dst, 1.f, 2.f, 1.f, -2.f, -2.f, -6.f, -7.f, 0.f);
- CHK(soa4f2_max(dst, a, b) == dst);
- CHECK_F2(dst, 3.f, 2.f, 3.f, -1.f, 1.f, 0.f, 0.5f, 2.f);
-
- soa4f2_sel(dst, b, a, v4f_mask(~0, ~0, 0, ~0));
- CHECK_F2(dst, 1.f, 2.f, 1.f, -1.f, -2.f, 0.f, 0.5f, 0.f);
-
- soa4f2(c, v4f_mask(~0, ~0, 1, ~0), v4f_mask(~0, 0, 0, 0));
- soa4f2_selv(dst, b, a, c);
- CHECK_F2(dst, 1.f, 2.f, 1.f, -1.f, -2.f, -6.f, 0.5f, 2.f);
-
+ test_soa4f2();
return 0;
}
diff --git a/src/test_soa4f3.c b/src/test_soa4f3.c
@@ -13,136 +13,15 @@
* You should have received a copy of the GNU Lesser General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
-#include "soa4f3.h"
-#include "test_soaXf_utils.h"
-
-#define CHECK_F3(V, A, B, C, D, E, F, G, H, I, J, K, L) \
- { \
- const v4f_T* v__ = (V); \
- CHECK_V4MASK(v4f_eq(v__[0], v4f_set((A), (B), (C), (D))), V4TRUE); \
- CHECK_V4MASK(v4f_eq(v__[1], v4f_set((E), (F), (G), (H))), V4TRUE); \
- CHECK_V4MASK(v4f_eq(v__[2], v4f_set((I), (J), (K), (L))), V4TRUE); \
- } (void)0
+/* Generate the test_soa4f2 function */
+#define SOA_SIMD_WIDTH 4
+#define SOA_DIMENSION 3
+#include "test_soaXfY.h"
int
main(int argc, char** argv)
{
- v4f_T a[3], b[3], c[3], dst[3], f;
(void)argc, (void)argv;
-
- CHK(soa4f3_set(a, soa4f3_splat(c, v4f_set1(-1.f))) == a);
- CHECK_V4MASK(v4f_eq(a[0], v4f_set1(-1.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[1], v4f_set1(-1.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[2], v4f_set1(-1.f)), V4TRUE);
- CHK(soa4f3(c,
- v4f_set(0.f, 1.f, 2.f, 3.f),
- v4f_set(5.f, 6.f, 7.f, 8.f),
- v4f_set(9.f, 10.f, 11.f, 12.f)) == c);
- CHK(soa4f3_set(a, c) == a);
- CHECK_V4MASK(v4f_eq(c[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(c[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(c[2], v4f_set(9.f, 10.f, 11.f, 12.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[2], v4f_set(9.f, 10.f, 11.f, 12.f)), V4TRUE);
-
- CHK(soa4f3(a,
- v4f_set(-1.f, 2.f, 3.f, -4.f),
- v4f_set(5.f, -6.f, -7.f, 8.f),
- v4f_set(9.f, -10.f, 1.f, -2.f)) == a);
- CHK(soa4f3_minus(b, a) == b);
- CHECK_F3(b, 1.f,-2.f,-3.f, 4.f,-5.f, 6.f, 7.f,-8.f,-9.f, 10.f,-1.f, 2.f);
-
- CHK(soa4f3_addf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)) == dst);
- CHECK_F3(dst, 0.f, 4.f, 3.f,-1.f, 6.f,-4.f,-7.f, 11.f, 10.f,-8.f, 1.f, 1.f);
- CHK(soa4f3_add(dst, a, b) == dst);
- CHECK_F3(dst, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f);
- CHK(soa4f3_subf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)) == dst);
- CHECK_F3(dst,-2.f, 0.f, 3.f,-7.f, 4.f,-8.f,-7.f, 5.f, 8.f,-12.f, 1.f,-5.f);
- CHK(soa4f3_sub(dst, a, b) == dst);
- CHECK_F3(dst,-2.f, 4.f, 6.f,-8.f, 10.f,-12.f,-14.f, 16.f, 18.f,-20.f, 2.f,-4.f);
- CHK(soa4f3_mulf(dst, a, v4f_set(2.f, 3.f, 0.f, -1.f)) == dst);
- CHECK_F3(dst,-2.f, 6.f, 0.f, 4.f, 10.f,-18.f, 0.f,-8.f, 18.f,-30.f, 0.f, 2.f);
- CHK(soa4f3_mul(dst, a, b) == dst);
- CHECK_F3(dst,-1.f,-4.f,-9.f,-16.f,-25.f,-36.f,-49.f,-64.f,-81.f,-100.f,-1.f,-4.f);
- CHK(soa4f3_divf(dst, a, v4f_set(2.f, 0.5f, 1.f, 4.f)) == dst);
- CHECK_F3(dst,-0.5f, 4.f, 3.f,-1.f, 2.5f,-12.f,-7.f, 2.f, 4.5f,-20.f, 1.f,-0.5f);
- CHK(soa4f3_div(dst, a, b) == dst);
- CHECK_F3(dst,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f);
-
- soa4f3(a, v4f_set1(0.f), v4f_set1(1.f), v4f_set1(2.f));
- soa4f3(b, v4f_set1(1.f), v4f_set1(2.f), v4f_set1(-1.f));
- CHK(soa4f3_lerp(dst, a, b, v4f_set1(0.5f)) == dst);
- CHECK_F3(dst,
- 0.5f, 0.5f, 0.5f, 0.5f,
- 1.5f, 1.5f, 1.5f, 1.5f,
- 0.5f, 0.5f, 0.5f, 0.5f);
- CHK(soa4f3(a,
- v4f_set(-1.f, 2.f, 3.f, -4.f),
- v4f_set(5.f, -6.f, -7.f, 8.f),
- v4f_set(9.f, -10.f, 1.f, -2.f)) == a);
- CHK(soa4f3_minus(b, a) == b);
- CHK(soa4f3_lerp(dst, a, b, v4f_set(0.f, 1.f, 0.5f, 1.f)) == dst);
- CHECK_F3(dst, -1.f, -2.f, 0.f, 4.f, 5.f, 6.f, 0.f, -8.f, 9.f, 10.f, 0.f, 2.f);
-
- f = soa4f3_sum(b);
- CHECK_V4MASK(v4f_eq(f, v4f_set(-13.f, 14.f, 3.f, -2.f)), V4TRUE);
- f = soa4f3_dot(a, b);
- CHECK_V4MASK(v4f_eq(f, v4f_set(-107.f, -140.f, -59.f, -84.f)), V4TRUE);
- f = soa4f3_len(a);
- CHECK_V4MASK
- (v4f_eq_eps(f, v4f_sqrt(soa4f3_dot(a, a)), v4f_set1(1.e-6f)), V4TRUE);
-
- CHECK_V4MASK(soa4f3_is_normalized(b), V4FALSE);
- f = soa4f3_normalize(dst, b);
- CHECK_V4MASK(v4f_eq_eps(f, soa4f3_len(b), v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(soa4f3_is_normalized(b), V4FALSE);
- CHECK_V4MASK(soa4f3_is_normalized(dst), V4TRUE);
- soa4f3_divf(b, b, f);
- CHECK_V4MASK(v4f_eq_eps(dst[0], b[0], v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(v4f_eq_eps(dst[1], b[1], v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(v4f_eq_eps(dst[2], b[2], v4f_set1(1.e-6f)), V4TRUE);
-
- CHECK_V4MASK(soa4f3_eq(a, a), V4TRUE);
- CHECK_V4MASK(soa4f3_eq(a, b), V4FALSE);
- soa4f3(a,
- v4f_set(-1.f, 2.f, 3.f,-4.f),
- v4f_set(5.f,-6.f,-7.f, 8.f),
- v4f_set(9.f,-10.f,1.f, -2.f));
- soa4f3(b,
- v4f_set(-1.f, 2.f, 5.f,-4.001f),
- v4f_set(5.f,-6.03f,7.f, 8.0),
- v4f_set(9.f,-10.f,0.f, -2.001f));
- CHECK_V4MASK__(soa4f3_eq(a, b), ~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set1(1.e-6f)), ~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set(0.f,0.f,0.f,1.e-6f)),~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set(0.f,0.f,0.f,1.e-2f)),~0, 0, 0,~0);
- CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set(0.f,1.e-2f,0.f,1.e-2f)),~0, 0, 0,~0);
- CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set(0.f,1.e-1f,0.f,1.e-2f)),~0,~0, 0,~0);
-
- soa4f3(a,
- v4f_set(1.f, 2.f, 3.f,-1.f),
- v4f_set(-2.f, 0.f,-7.f, 0.f),
- v4f_set(-1.f, 4.f, 3.f, 2.f));
- soa4f3(b,
- v4f_set(3.f, 2.f, 1.f,-2.f),
- v4f_set(1.f,-6.f, 0.5f, 2.f),
- v4f_set(0.f, 1.f, 0.f, 3.f));
- CHK(soa4f3_cross(dst, a, b) == dst);
- CHECK_F3(dst, 1.f, 24.f,-1.5f,-4.f,-3.f, 6.f, 3.f,-1.f, 7.f,-12.f, 8.5f,-2.f);
-
- CHK(soa4f3_min(dst, a, b) == dst);
- CHECK_F3(dst, 1.f, 2.f, 1.f, -2.f,-2.f,-6.f,-7.f, 0.f,-1.f, 1.f, 0.f, 2.f);
- CHK(soa4f3_max(dst, a, b) == dst);
- CHECK_F3(dst, 3.f, 2.f, 3.f, -1.f, 1.f, 0.f, 0.5f, 2.f, 0.f, 4.f, 3.f, 3.f);
-
- soa4f3_sel(dst, b, a, v4f_mask(~0, ~0, 1, ~0));
- CHECK_F3(dst, 1.f, 2.f, 1.f, -1.f, -2.f, 0.f, 0.5f, 0.f, -1.f, 4.f, 0.f, 2.f);
-
- soa4f3(c, v4f_mask(~0,~0, 0,~0), v4f_mask(~0, 0, 0, 0), v4f_mask(0,~0,~0, 0));
- soa4f3_selv(dst, b, a, c);
- CHECK_F3(dst, 1.f, 2.f, 1.f,-1.f,-2.f,-6.f, 0.5f, 2.f, 0.f, 4.f, 3.f, 3.f);
-
+ test_soa4f3();
return 0;
}
-
diff --git a/src/test_soa4f4.c b/src/test_soa4f4.c
@@ -13,206 +13,15 @@
* You should have received a copy of the GNU Lesser General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
-#include "soa4f4.h"
-#include "test_soaXf_utils.h"
-
-#define CHECK_F4(V, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
- { \
- const v4f_T* v__ = (V); \
- CHECK_V4MASK(v4f_eq(v__[0], v4f_set((A), (B), (C), (D))), V4TRUE); \
- CHECK_V4MASK(v4f_eq(v__[1], v4f_set((E), (F), (G), (H))), V4TRUE); \
- CHECK_V4MASK(v4f_eq(v__[2], v4f_set((I), (J), (K), (L))), V4TRUE); \
- CHECK_V4MASK(v4f_eq(v__[3], v4f_set((M), (N), (O), (P))), V4TRUE); \
- } (void)0
+/* Generate the test_soa4f2 function */
+#define SOA_SIMD_WIDTH 4
+#define SOA_DIMENSION 4
+#include "test_soaXfY.h"
int
main(int argc, char** argv)
{
- v4f_T a[4], b[4], c[4], dst[4], f;
(void)argc, (void)argv;
-
- CHK(soa4f4_set(a, soa4f4_splat(c, v4f_set1(-1.f))) == a);
- CHECK_V4MASK(v4f_eq(a[0], v4f_set1(-1.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[1], v4f_set1(-1.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[2], v4f_set1(-1.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[3], v4f_set1(-1.f)), V4TRUE);
- CHK(soa4f4(c,
- v4f_set(0.f, 1.f, 2.f, 3.f),
- v4f_set(5.f, 6.f, 7.f, 8.f),
- v4f_set(9.f, 10.f, 11.f, 12.f),
- v4f_set(13.f, 14.f, 15.f, 16.f)) == c);
- CHK(soa4f4_set(a, c) == a);
- CHECK_V4MASK(v4f_eq(c[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(c[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(c[2], v4f_set(9.f, 10.f, 11.f, 12.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(c[3], v4f_set(13.f, 14.f, 15.f, 16.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[2], v4f_set(9.f, 10.f, 11.f, 12.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[3], v4f_set(13.f, 14.f, 15.f, 16.f)), V4TRUE);
-
- CHK(soa4f4(a,
- v4f_set(-1.f, 2.f, 3.f, -4.f),
- v4f_set(5.f, -6.f, -7.f, 8.f),
- v4f_set(9.f, -10.f, 1.f, -2.f),
- v4f_set(5.f, -3.f, -7.f, 1.f)) == a);
- CHK(soa4f4_minus(b, a) == b);
- CHECK_F4(b,
- 1.f, -2.f, -3.f, 4.f,
- -5.f, 6.f, 7.f, -8.f,
- -9.f, 10.f, -1.f, 2.f,
- -5.f, 3.f, 7.f, -1.f);
-
- CHK(soa4f4_addf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)) == dst);
- CHECK_F4(dst,
- 0.f, 4.f, 3.f, -1.f,
- 6.f, -4.f, -7.f, 11.f,
- 10.f, -8.f, 1.f, 1.f,
- 6.f, -1.f, -7.f, 4.f);
- CHK(soa4f4_add(dst, a, b) == dst);
- CHECK_F4(dst,
- 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f);
-
- CHK(soa4f4_subf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)) == dst);
- CHECK_F4(dst,
- -2.f, 0.f, 3.f, -7.f,
- 4.f, -8.f, -7.f, 5.f,
- 8.f,-12.f, 1.f,-5.f,
- 4.f, -5.f, -7.f, -2.f);
- CHK(soa4f4_sub(dst, a, b) == dst);
- CHECK_F4(dst,
- -2.f, 4.f, 6.f, -8.f,
- 10.f, -12.f, -14.f, 16.f,
- 18.f, -20.f, 2.f, -4.f,
- 10.f, -6.f, -14.f, 2.f);
-
- CHK(soa4f4_mulf(dst, a, v4f_set(2.f, 3.f, 0.f, -1.f)) == dst);
- CHECK_F4(dst,
- -2.f, 6.f, 0.f, 4.f,
- 10.f, -18.f, 0.f, -8.f,
- 18.f, -30.f, 0.f, 2.f,
- 10.f, -9.f, 0.f, -1.f);
- CHK(soa4f4_mul(dst, a, b) == dst);
- CHECK_F4(dst,
- -1.f, -4.f, -9.f, -16.f,
- -25.f, -36.f, -49.f, -64.f,
- -81.f, -100.f, -1.f, -4.f,
- -25.f, -9.f, -49.f, -1.f);
-
- CHK(soa4f4_divf(dst, a, v4f_set(2.f, 0.5f, 1.f, 4.f)) == dst);
- CHECK_F4(dst,
- -0.5f, 4.f, 3.f, -1.f,
- 2.5f, -12.f, -7.f, 2.f,
- 4.5f, -20.f, 1.f, -0.5f,
- 2.5f, -6.f, -7.f, 0.25f);
- CHK(soa4f4_div(dst, a, b) == dst);
- CHECK_F4(dst,
- -1.f, -1.f, -1.f, -1.f,
- -1.f, -1.f, -1.f, -1.f,
- -1.f, -1.f, -1.f, -1.f,
- -1.f, -1.f, -1.f, -1.f);
-
- CHK(soa4f4(a,
- v4f_set(-1.f, 2.f, 3.f, -4.f),
- v4f_set(5.f, -6.f, -7.f, 8.f),
- v4f_set(9.f, -10.f, 1.f, -2.f),
- v4f_set(5.f, -3.f, -7.f, 1.f)) == a);
- CHK(soa4f4_minus(b, a) == b);
- CHK(soa4f4_lerp(dst, a, b, v4f_set(0.f, 1.f, 0.5f, 1.f)) == dst);
- CHECK_F4(dst,
- -1.f, -2.f, 0.f, 4.f,
- 5.f, 6.f, 0.f, -8.f,
- 9.f, 10.f, 0.f, 2.f,
- 5.f, 3.f, 0.f, -1.f);
-
- f = soa4f4_sum(b);
- CHECK_V4MASK(v4f_eq(f, v4f_set(-18.f, 17.f, 10.f, -3.f)), V4TRUE);
- f = soa4f4_dot(a, b);
- CHECK_V4MASK(v4f_eq(f, v4f_set(-132.f, -149.f, -108.f, -85.f)), V4TRUE);
- f = soa4f4_len(a);
- CHECK_V4MASK
- (v4f_eq_eps(f, v4f_sqrt(soa4f4_dot(a, a)), v4f_set1(1.e-6f)), V4TRUE);
-
- CHECK_V4MASK(soa4f4_is_normalized(b), V4FALSE);
- f = soa4f4_normalize(dst, b);
- CHECK_V4MASK(v4f_eq_eps(f, soa4f4_len(b), v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(soa4f4_is_normalized(b), V4FALSE);
- CHECK_V4MASK(soa4f4_is_normalized(dst), V4TRUE);
- soa4f4_divf(b, b, f);
- CHECK_V4MASK(v4f_eq_eps(dst[0], b[0], v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(v4f_eq_eps(dst[1], b[1], v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(v4f_eq_eps(dst[2], b[2], v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(v4f_eq_eps(dst[3], b[3], v4f_set1(1.e-6f)), V4TRUE);
-
- CHECK_V4MASK(soa4f4_eq(a, a), V4TRUE);
- CHECK_V4MASK(soa4f4_eq(a, b), V4FALSE);
- soa4f4(a,
- v4f_set(-1.f, 2.f, 3.f, -4.f),
- v4f_set(5.f, -6.f, -7.f, 8.f),
- v4f_set(9.f, -10.f, 1.f, -2.f),
- v4f_set(1.f, -1.f, 1.f, -2.f));
- soa4f4(b,
- v4f_set(-1.f, 2.f, 3.f,-4.001f),
- v4f_set(5.f,-6.03f,-7.f, 8.0),
- v4f_set(9.f,-10.f,1.f, -2.001f),
- v4f_set(1.f, -1.f, 1.0005f, -2.f));
- CHECK_V4MASK__(soa4f4_eq(a, b), ~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set1(1.e-6f)), ~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 0.f, 0.f, 1.e-6f)),
- ~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 0.f, 0.f, 1.e-2f)),
- ~0, 0, 0,~0);
- CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 1.e-2f, 0.f, 1.e-2f)),
- ~0, 0, 0,~0);
- CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 1.e-1f, 0.f, 1.e-2f)),
- ~0,~0, 0,~0);
- CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 1.e-1f, 1.e-3f, 1.e-2f)),
- ~0,~0,~0,~0);
-
- soa4f4(a,
- v4f_set(1.f, 2.f, 3.f, -1.f),
- v4f_set(-2.f, 0.f, -7.f, 0.f),
- v4f_set(-1.f, 4.f, 3.f, 2.f),
- v4f_set(-5.f, 7.f, 0.5f, -1.f));
- soa4f4(b,
- v4f_set(3.f, 2.f, 1.f,-2.f),
- v4f_set(1.f,-6.f, 0.5f, 2.f),
- v4f_set(0.f, 1.f, 0.f, 3.f),
- v4f_set(1.f,-1.f, 0.f, 0.f));
- CHK(soa4f4_min(dst, a, b) == dst);
- CHECK_F4(dst,
- 1.f, 2.f, 1.f, -2.f,
- -2.f, -6.f, -7.f, 0.f,
- -1.f, 1.f, 0.f, 2.f,
- -5.f, -1.f, 0.f, -1.f);
- CHK(soa4f4_max(dst, a, b) == dst);
- CHECK_F4(dst,
- 3.f, 2.f, 3.f, -1.f,
- 1.f, 0.f, 0.5f, 2.f,
- 0.f, 4.f, 3.f, 3.f,
- 1.f, 7.f, 0.5f, 0.f);
-
- soa4f4_sel(dst, b, a, v4f_mask(~0, ~0, 1, ~0));
- CHECK_F4(dst,
- 1.f, 2.f, 1.f, -1.f,
- -2.f, 0.f, 0.5f, 0.f,
- -1.f, 4.f, 0.f, 2.f,
- -5.f, 7.f, 0.f, -1.f);
-
- soa4f4(c,
- v4f_mask(~0,~0, 0,~0),
- v4f_mask(~0, 0, 0, 0),
- v4f_mask( 0,~0,~0, 0),
- v4f_mask(~0,~0, 0, 0));
- soa4f4_selv(dst, b, a, c);
- CHECK_F4(dst,
- 1.f, 2.f, 1.f, -1.f,
- -2.f, -6.f, 0.5f, 2.f,
- 0.f, 4.f, 3.f, 3.f,
- -5.f, 7.f, 0.f, 0.f);
-
+ test_soa4f4();
return 0;
}
diff --git a/src/test_soa8f2.c b/src/test_soa8f2.c
@@ -13,119 +13,16 @@
* You should have received a copy of the GNU Lesser General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
-#include "soa8f2.h"
-#include "test_soaXf_utils.h"
+/* Generate the test_soa8f2 function */
+#define SOA_SIMD_WIDTH 8
+#define SOA_DIMENSION 2
+#include "test_soaXfY.h"
int
main(int argc, char** argv)
{
- v8f_T a[2], b[2], c[2];
- v8f_T v0, v1, v2, v3;
(void)argc, (void)argv;
-
- CHK(soa8f2_set(a, soa8f2_splat(c, v8f_set1(-1))) == a);
- CHECK_V8MASK(v8f_eq(a[0], v8f_set1(-1.f)), V8TRUE);
- CHECK_V8MASK(v8f_eq(a[1], v8f_set1(-1.f)), V8TRUE);
-
- v0 = v8f_set(.5f, 1.f, 2.f, 3.f, 4.f, 5.f , 6.f , 7.f);
- v1 = v8f_set(8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f);
- CHK(soa8f2(c, v0, v1) == c);
- CHK(soa8f2_set(a, c) == a);
- CHECK_V8MASK(v8f_eq(c[0], v0), V8TRUE);
- CHECK_V8MASK(v8f_eq(c[1], v1), V8TRUE);
- CHECK_V8MASK(v8f_eq(a[0], v0), V8TRUE);
- CHECK_V8MASK(v8f_eq(a[1], v1), V8TRUE);
-
- v0 = v8f_set(.5f, -1.f, -2.f, 3.f, -4.f, 5.f , 6.f , -7.f);
- v1 = v8f_set(-8.f, 9.f, -10.f, 11.f, 12.f, -13.f, -14.f, -15.f);
- CHK(soa8f2(a, v0, v1) == a);
- CHK(soa8f2_minus(b, a) == b);
- CHECK_V8MASK(v8f_eq(b[0], v8f_minus(v0)), V8TRUE);
- CHECK_V8MASK(v8f_eq(b[1], v8f_minus(v1)), V8TRUE);
-
- v2 = v8f_set(1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f);
- CHK(soa8f2_addf(c, a, v2) == c);
- CHECK_V8MASK(v8f_eq(c[0], v8f_add(v0, v2)), V8TRUE);
- CHECK_V8MASK(v8f_eq(c[1], v8f_add(v1, v2)), V8TRUE);
- CHK(soa8f2_add(c, a, b) == c);
- CHECK_V8MASK(v8f_eq(c[0], v8f_zero()), V8TRUE);
- CHECK_V8MASK(v8f_eq(c[1], v8f_zero()), V8TRUE);
- CHK(soa8f2_subf(c, a, v2) == c);
- CHECK_V8MASK(v8f_eq(c[0], v8f_sub(v0, v2)), V8TRUE);
- CHECK_V8MASK(v8f_eq(c[1], v8f_sub(v1, v2)), V8TRUE);
- CHK(soa8f2_sub(c, a, b) == c);
- CHECK_V8MASK(v8f_eq(c[0], v8f_sub(a[0], b[0])), V8TRUE);
- CHECK_V8MASK(v8f_eq(c[1], v8f_sub(a[1], b[1])), V8TRUE);
- CHK(soa8f2_mulf(c, a, v2) == c);
- CHECK_V8MASK(v8f_eq(c[0], v8f_mul(v0, v2)), V8TRUE);
- CHECK_V8MASK(v8f_eq(c[1], v8f_mul(v1, v2)), V8TRUE);
- CHK(soa8f2_mul(c, a, b) == c);
- CHECK_V8MASK(v8f_eq(c[0], v8f_mul(a[0], b[0])), V8TRUE);
- CHECK_V8MASK(v8f_eq(c[1], v8f_mul(a[1], b[1])), V8TRUE);
- CHK(soa8f2_divf(c, a, v2) == c);
- CHECK_V8MASK(v8f_eq(c[0], v8f_div(v0, v2)), V8TRUE);
- CHECK_V8MASK(v8f_eq(c[1], v8f_div(v1, v2)), V8TRUE);
- CHK(soa8f2_div(c, a, b) == c);
- CHECK_V8MASK(v8f_eq(c[0], v8f_set1(-1.f)), V8TRUE);
- CHECK_V8MASK(v8f_eq(c[1], v8f_set1(-1.f)), V8TRUE);
-
- v3 = v8f_set(1.f, 0.5f, 0.25f, 0.125f, 0.0625f, 0.03125f, 2.f, 4.f);
- CHK(soa8f2_lerp(c, a, b, v3));
- CHECK_V8MASK(v8f_eq(c[0], v8f_lerp(a[0], b[0], v3)), V8TRUE);
- CHECK_V8MASK(v8f_eq(c[1], v8f_lerp(a[1], b[1], v3)), V8TRUE);
-
- v3 = soa8f2_sum(b);
- CHECK_V8MASK(v8f_eq(v3, v8f_add(b[0], b[1])), V8TRUE);
- v0 = v8f_mul(a[0], b[0]);
- v1 = v8f_mul(a[1], b[1]);
- v2 = v8f_add(v0, v1);
- v3 = soa8f2_dot(a, b);
- CHECK_V8MASK(v8f_eq(v3, v2), V8TRUE);
- v2 = v8f_sqrt(soa8f2_dot(a, a));
- v3 = soa8f2_len(a);
- CHECK_V8MASK(v8f_eq(v3, v2), V8TRUE);
-
- CHECK_V8MASK(soa8f2_is_normalized(a), V8FALSE);
- v2 = soa8f2_normalize(b, a);
- CHECK_V8MASK(v8f_eq_eps(v3, v2, v8f_set1(1.e-4f)), V8TRUE);
- CHECK_V8MASK(soa8f2_is_normalized(b), V8TRUE);
- v2 = soa8f2_len(b);
- CHECK_V8MASK(v8f_eq_eps(v2, v8f_set1(1), v8f_set1(1.e-4f)), V8TRUE);
- soa8f2_divf(c, a, v3);
- CHECK_V8MASK(v8f_eq_eps(b[0], c[0], v8f_set1(1.e-4f)), V8TRUE);
- CHECK_V8MASK(v8f_eq_eps(b[1], c[1], v8f_set1(1.e-4f)), V8TRUE);
-
- CHECK_V8MASK(soa8f2_eq(a, a), V8TRUE);
- CHECK_V8MASK(soa8f2_eq(a, b), V8FALSE);
- soa8f2_addf(b, a, v8f_set1(1.e-4f));
- CHECK_V8MASK(soa8f2_eq(a, b), V8FALSE);
- CHECK_V8MASK(soa8f2_eq_eps(a, b, v8f_set1(1.e-3f)), V8TRUE);
- v2 = v8f_set(0, 0, 1.e-3f, 0, 0, 0, 1.e-3f, 1.e-3f);
- CHECK_V8MASK__(soa8f2_eq_eps(a, b, v2), 0, 0, ~0, 0, 0, 0, ~0, ~0);
-
- CHK(soa8f2_min(c, a, b) == c);
- CHECK_V8MASK(v8f_eq(c[0], v8f_min(a[0], b[0])), V8TRUE);
- CHECK_V8MASK(v8f_eq(c[1], v8f_min(a[1], b[1])), V8TRUE);
- CHK(soa8f2_max(c, a, b) == c);
- CHECK_V8MASK(v8f_eq(c[0], v8f_max(a[0], b[0])), V8TRUE);
- CHECK_V8MASK(v8f_eq(c[1], v8f_max(a[1], b[1])), V8TRUE);
-
- v0 = v8f_mask(0,0,~0,~0,0,~0,~0,0);
- v1 = v8f_mask(0,~0,~0,0,0,0,0,~0);
- CHK(soa8f2_sel(c, b, a, v0) == c);
- CHECK_V8MASK(v8f_eq(c[0], v8f_sel(b[0], a[0], v0)), V8TRUE);
- CHECK_V8MASK(v8f_eq(c[1], v8f_sel(b[1], a[1], v0)), V8TRUE);
- soa8f2(c, v0, v1);
- CHK(soa8f2_selv(c, b, a, c) == c);
- CHECK_V8MASK(v8f_eq(c[0], v8f_sel(b[0], a[0], v0)), V8TRUE);
- CHECK_V8MASK(v8f_eq(c[1], v8f_sel(b[1], a[1], v1)), V8TRUE);
-
- v0 = v8f_mul(a[0], b[1]);
- v1 = v8f_mul(a[1], b[0]);
- v2 = v8f_sub(v0, v1);
- v3 = soa8f2_cross(a, b);
- CHECK_V8MASK(v8f_eq_eps(v3, v2, v8f_set1(1.e-6f)), V8TRUE);
-
+ test_soa8f2();
return 0;
}
diff --git a/src/test_soa8f3.c b/src/test_soa8f3.c
@@ -0,0 +1,28 @@
+/* Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Generate the test_soa8f3 function */
+#define SOA_SIMD_WIDTH 8
+#define SOA_DIMENSION 3
+#include "test_soaXfY.h"
+
+int
+main(int argc, char** argv)
+{
+ (void)argc, (void)argv;
+ test_soa8f3();
+ return 0;
+}
+
diff --git a/src/test_soa8f4.c b/src/test_soa8f4.c
@@ -0,0 +1,28 @@
+/* Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Generate the test_soa8f4 function */
+#define SOA_SIMD_WIDTH 8
+#define SOA_DIMENSION 4
+#include "test_soaXfY.h"
+
+int
+main(int argc, char** argv)
+{
+ (void)argc, (void)argv;
+ test_soa8f4();
+ return 0;
+}
+
diff --git a/src/test_soaXfY.h b/src/test_soaXfY.h
@@ -0,0 +1,262 @@
+/* Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "rsimd.h"
+#include <rsys/rsys.h>
+
+/* Check macros */
+#ifndef SOA_SIMD_WIDTH
+ #error "Missing the SOA_SIMD_WIDTH macro"
+#endif
+#if SOA_SIMD_WIDTH != 4 && SOA_SIMD_WIDTH != 8
+ #error "Invalid value for the SOA_SIMD_WIDTH macro"
+#endif
+#ifndef SOA_DIMENSION
+ #error "Missing the SOA_DIMENSION macro"
+#endif
+#if SOA_DIMENSION < 2 || SOA_DIMENSION > 4
+ #error "Invalid value for the SOA_DIMENSION macro"
+#endif
+
+/* Define macros generics to the SOA_SIMD_WIDTH parameter */
+#if SOA_SIMD_WIDTH == 4
+ #define soaX soa4
+ #define vXf(Func) CONCAT(v4f_, Func)
+ #define vXf_T v4f_T
+ #define VEC(A, B, C, D, E, F, G, H) v4f_set(A, B, C, D)
+ #define MASK(A, B, C, D, E, F, G, H) v4f_mask(A, B, C, D)
+ #define CHKVX(V0, V1) { \
+ const v4f_T v0__ = (V0); \
+ const v4f_T v1__ = (V1); \
+ CHK(v4f_mask_x(v0__) == v4f_mask_y(v1__)); \
+ CHK(v4f_mask_y(v0__) == v4f_mask_y(v1__)); \
+ CHK(v4f_mask_z(v0__) == v4f_mask_z(v1__)); \
+ CHK(v4f_mask_w(v0__) == v4f_mask_w(v1__)); \
+ } (void)0
+#elif SOA_SIMD_WIDTH == 8
+ #define soaX soa8
+ #define vXf(Func) CONCAT(v8f_, Func)
+ #define vXf_T v8f_T
+ #define VEC(A, B, C, D, E, F, G, H) v8f_set(A, B, C, D, E, F, G, H)
+ #define MASK(A, B, C, D, E, F, G, H) v8f_mask(A, B, C, D, E, F, G, H)
+ #define CHKVX(V0, V1) { \
+ const v8f_T v0__ = (V0); \
+ const v8f_T v1__ = (V1); \
+ CHK(v4f_mask_x(v8f_abcd(v0__)) == v4f_mask_y(v8f_abcd(v1__))); \
+ CHK(v4f_mask_y(v8f_abcd(v0__)) == v4f_mask_y(v8f_abcd(v1__))); \
+ CHK(v4f_mask_z(v8f_abcd(v0__)) == v4f_mask_z(v8f_abcd(v1__))); \
+ CHK(v4f_mask_w(v8f_abcd(v0__)) == v4f_mask_w(v8f_abcd(v1__))); \
+ CHK(v4f_mask_x(v8f_efgh(v0__)) == v4f_mask_y(v8f_efgh(v1__))); \
+ CHK(v4f_mask_y(v8f_efgh(v0__)) == v4f_mask_y(v8f_efgh(v1__))); \
+ CHK(v4f_mask_z(v8f_efgh(v0__)) == v4f_mask_z(v8f_efgh(v1__))); \
+ CHK(v4f_mask_w(v8f_efgh(v0__)) == v4f_mask_w(v8f_efgh(v1__))); \
+ } (void)0
+#endif
+
+/* Define macros generics to the SOA_DIMENSION parameter */
+#if SOA_DIMENSION == 2
+ #define soaXfY(Func) CONCAT(CONCAT(soaX, f2_), Func)
+ #define SOA_VEC(Dst, X, Y, Z, W) CONCAT(soaX, f2)(Dst, X, Y)
+#elif SOA_DIMENSION == 3
+ #define soaXfY(Func) CONCAT(CONCAT(soaX, f3_), Func)
+ #define SOA_VEC(Dst, X, Y, Z, W) CONCAT(soaX, f3)(Dst, X, Y, Z)
+#elif SOA_DIMENSION == 4
+ #define soaXfY(Func) CONCAT(CONCAT(soaX, f4_), Func)
+ #define SOA_VEC(Dst, X, Y, Z, W) CONCAT(soaX, f4)(Dst, X, Y, Z, W)
+#endif
+
+/* Include the corresponding header */
+#if SOA_SIMD_WIDTH == 4
+ #if SOA_DIMENSION == 2
+ #include "soa4f2.h"
+ #elif SOA_DIMENSION == 3
+ #include "soa4f3.h"
+ #elif SOA_DIMENSION == 4
+ #include "soa4f4.h"
+ #endif
+#else
+ #if SOA_DIMENSION == 2
+ #include "soa8f2.h"
+ #elif SOA_DIMENSION == 3
+ #include "soa8f3.h"
+ #elif SOA_DIMENSION == 4
+ #include "soa8f4.h"
+ #endif
+#endif
+
+/* Define constants */
+#define VXTRUE MASK(~0,~0,~0,~0,~0,~0,~0,~0)
+#define VXFALSE MASK(0,0,0,0,0,0,0,0)
+
+static void
+CONCAT(CONCAT(CONCAT(test_, soaX), f), SOA_DIMENSION)(void)
+{
+ vXf_T a[SOA_DIMENSION], b[SOA_DIMENSION], c[SOA_DIMENSION];
+ vXf_T v[4], f, tmp, mask;
+ int i;
+
+ v[0] = VEC(.5f, -1.f, -2.f, 3.f, -4.f, 5.f , 6.f , -7.f);
+ v[1] = VEC(-8.f, 9.f, -10.f, 11.f, 12.f, -13.f, -14.f, -15.f);
+ v[2] = VEC(16.f, -17.f, 18.f, -19.f, 20.f, 21.f, 22.f, -23.f);
+ v[3] = VEC(16.f, -17.f, 18.f, -19.f, 20.f, 21.f, 22.f, -23.f);
+ f = VEC(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
+
+ /* Setters */
+ soaXfY(splat)(a, vXf(set1)(-1));
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(a[i], vXf(set1)(-1.f)), VXTRUE);
+ }
+ CHK(soaXfY(set)(b, a) == b);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(b[i], a[i]), VXTRUE);
+ }
+ CHK(SOA_VEC(a, v[0], v[1], v[2], v[3]) == a);
+ CHK(soaXfY(set)(b, a) == b);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(a[i], v[i]), VXTRUE);
+ CHKVX(vXf(eq)(b[i], v[i]), VXTRUE);
+ }
+
+ /* Unary operator */
+ CHK(soaXfY(minus)(b, a) == b);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(a[i], v[i]), VXTRUE);
+ CHKVX(vXf(eq)(b[i], vXf(minus)(v[i])), VXTRUE);
+ }
+
+ /* Regular binary operators */
+ CHK(soaXfY(addf)(c, a, f) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(add)(a[i], f)), VXTRUE);
+ CHK(soaXfY(subf)(c, a, f) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(sub)(a[i], f)), VXTRUE);
+ CHK(soaXfY(mulf)(c, a, f) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(mul)(a[i], f)), VXTRUE);
+ CHK(soaXfY(divf)(c, a, f) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(div)(a[i], f)), VXTRUE);
+ CHK(soaXfY(add)(c, a, b) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(zero)()), VXTRUE);
+ CHK(soaXfY(sub)(c, a, b) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(sub)(a[i], b[i])), VXTRUE);
+ CHK(soaXfY(mul)(c, a, b) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(mul)(a[i], b[i])), VXTRUE);
+ CHK(soaXfY(div)(c, a, b) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(div)(a[i], b[i])), VXTRUE);
+
+ /* Linear interpolation */
+ CHK(soaXfY(lerp)(c, a, b, f));
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(c[i], vXf(lerp)(a[i], b[i], f)), VXTRUE);
+ }
+
+ /* Sum operator */
+ f = soaXfY(sum)(a);
+ tmp = vXf(zero)();
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ tmp = vXf(add)(tmp, a[i]);
+ }
+ CHKVX(vXf(eq)(f, tmp), VXTRUE);
+
+ /* Dot operator */
+ f = soaXfY(dot)(a, b);
+ tmp = vXf(zero)();
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ tmp = vXf(add)(tmp, vXf(mul(a[i], b[i])));
+ }
+ CHKVX(vXf(eq)(f, tmp), VXTRUE);
+
+ /* Vector normalization functions */
+ CHKVX(soaXfY(is_normalized)(a), VXFALSE);
+ f = soaXfY(normalize)(c, a);
+ CHKVX(vXf(eq)(soaXfY(len)(a), vXf(sqrt)(soaXfY(dot)(a, a))), VXTRUE);
+ tmp = vXf(sqrt)(soaXfY(dot)(a, a));
+ CHKVX(vXf(eq_eps)(f, vXf(sqrt)(soaXfY(dot)(a, a)), vXf(set1)(1.e-4f)), VXTRUE);
+ CHKVX(soaXfY(is_normalized)(c), VXTRUE);
+ CHKVX(vXf(eq_eps)(soaXfY(len)(c), vXf(set1)(1.f), vXf(set1)(1.e-4f)), VXTRUE);
+ soaXfY(divf)(b, a, f);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq_eps)(b[i], c[i], vXf(set1)(1.e-4f)), VXTRUE);
+ }
+
+ /* Comparators */
+ CHKVX(soaXfY(eq)(a, a), VXTRUE);
+ CHKVX(soaXfY(eq)(a, b), VXFALSE);
+ soaXfY(addf)(b, a, vXf(set1(1.e-4f)));
+ CHKVX(soaXfY(eq)(a, b), VXFALSE);
+ CHKVX(soaXfY(eq_eps)(a, b, vXf(set1)(1.e-3f)), VXTRUE);
+ tmp = VEC(0, 0, 1.e-3f, 0, 0, 0, 1.e-3f, 1.e-3f);
+ mask = MASK(0, 0, ~0, 0, 0, 0, ~0, ~0);
+ CHKVX(soaXfY(eq_eps)(a, b, tmp), mask);
+
+ /* Min/Max */
+ CHK(soaXfY(min)(c, a, b) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(c[i], vXf(min)(a[i], b[i])), VXTRUE);
+ }
+ CHK(soaXfY(max)(c, a, b) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(c[i], vXf(max)(a[i], b[i])), VXTRUE);
+ }
+
+ /* Select */
+ v[0] = MASK(0,0,~0,~0,0,~0,~0,0);
+ v[1] = MASK(0,~0,~0,0,0,0,0,~0);
+ v[2] = MASK(0, 0, 0,0,~0,~0,0, 0);
+ v[3] = MASK(~0,~0,~0,0,~0,0,0,~0);
+ CHK(soaXfY(sel)(c, b, a, v[0]) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(c[i], vXf(sel)(b[i], a[i], v[0])), VXTRUE);
+ }
+ CHK(soaXfY(selv)(c, b, a, v) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(c[i], vXf(sel)(b[i], a[i], v[i])), VXTRUE);
+ }
+
+ /* Cross product */
+#if SOA_DIMENSION == 2
+ v[0] = vXf(mul)(a[0], b[1]);
+ v[1] = vXf(mul)(a[1], b[0]);
+ tmp = vXf(sub)(v[0], v[1]);
+ f = soaXfY(cross)(a, b);
+ CHKVX(vXf(eq_eps)(f, tmp, vXf(set1)(1.e-6f)), VXTRUE);
+#elif SOA_DIMENSION == 3
+ v[0] = vXf(sub)(vXf(mul)(a[1], b[2]), vXf(mul)(a[2], b[1]));
+ v[1] = vXf(sub)(vXf(mul)(a[2], b[0]), vXf(mul)(a[0], b[2]));
+ v[2] = vXf(sub)(vXf(mul)(a[0], b[1]), vXf(mul)(a[1], b[0]));
+ CHK(soaXfY(cross)(c, a, b) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq_eps)(c[i], v[i], vXf(set1)(1.e-6f)), VXTRUE);
+ }
+#endif
+}
+
+/* Generic parameters */
+#undef SOA_SIMD_WIDTH
+#undef SOA_DIMENSION
+
+/* Macros generic to the SOA_SIMD_WIDTH parameter */
+#undef soaX
+#undef vXf
+#undef vXf_T
+#undef VEC
+#undef MASK
+#undef CHKVX
+
+/* Macros generic to the SOA_DIMENSION parameter */
+#undef soaXfY
+#undef SOA_VEC
+
+/* Constants */
+#undef VXTRUE
+#undef VXFALSE