commit 101c28de8a2975f80e1b664039d0759051fda495
parent a2774b681f32b51926c00d0f8b1bf774d3c3a497
Author: vaplv <vaplv@free.fr>
Date: Thu, 23 Oct 2014 15:42:22 +0200
Implement and test the SIMD SoA Float3 functions
Diffstat:
6 files changed, 230 insertions(+), 42 deletions(-)
diff --git a/README.md b/README.md
@@ -2,9 +2,9 @@
This C89 library defines an interface that encapsulates and make easier the
manipulation of SIMD instruction sets. It also provides a SIMD implementation
-of linear algebrae operations for 3x3 and 4x4 matrices as well as quaternions
-arranged in an `Array of Structures` SIMD layout. Linear algebrae functions on
-two dimensionnal `Structure of Arrays` vectors are also implemented.
+of linear algebra operations for 3x3 and 4x4 matrices as well as quaternions
+arranged in an `Array of Structures` SIMD layout. Linear algebra functions on
+two and three dimensions `Structure of Arrays` vectors are also implemented.
Note that currently only the SSE2 instruction set is supported.
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -43,13 +43,15 @@ set(RSIMD_FILES_INC_LEGACY
aosf33.h
aosf44.h
aosq.h
- rsimd.h)
+ rsimd.h
+ soa4f2.h
+ soa4f3.h)
set(RSIMD_FILES_INC_SSE
sse/sse.h
sse/ssef.h
sse/ssei.h
sse/sse_swz.h)
-set(RSIMD_FILES_SRC
+set(RSIMD_FILES_SRC
aosf44.c
aosq.c
sse/ssef.c)
@@ -86,6 +88,7 @@ new_test(test_aosf33)
new_test(test_aosf44)
new_test(test_aosq)
new_test(test_soa4f2)
+new_test(test_soa4f3)
################################################################################
# Install directives
diff --git a/src/soa4f3.h b/src/soa4f3.h
@@ -0,0 +1,34 @@
+/* Copyright (C) 2014 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef SOA4F3_H
+#define SOA4F3_H
+
+/* Generate the common soa4fX functions */
+#define SOA4FX_DIMENSION__ 3
+#include "soa4fX.h"
+
+static FINLINE v4f_T*
+soa4f3_cross(v4f_T dst[3], const v4f_T a[3], const v4f_T b[3])
+{
+ v4f_T tmp[3];
+ ASSERT(dst && a && b);
+ tmp[0] = v4f_sub(v4f_mul(a[1], b[2]), v4f_mul(a[2], b[1]));
+ tmp[1] = v4f_sub(v4f_mul(a[2], b[0]), v4f_mul(a[0], b[2]));
+ tmp[2] = v4f_sub(v4f_mul(a[0], b[1]), v4f_mul(a[1], b[0]));
+ return soa4f3_set__(dst, tmp);
+}
+
+#endif /* SOA4F3_H */
diff --git a/src/test_soa4f2.c b/src/test_soa4f2.c
@@ -14,20 +14,9 @@
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#include "soa4f2.h"
+#include "test_soa4f_utils.h"
-#define V4TRUE ~0, ~0, ~0, ~0
-#define V4FALSE 0, 0, 0, 0
-#define CHECK_V4MASK__(Mask, A, B, C, D) \
- { \
- const v4f_T mask__ = (Mask); \
- CHECK(v4f_mask_x(mask__), (A)); \
- CHECK(v4f_mask_y(mask__), (B)); \
- CHECK(v4f_mask_z(mask__), (C)); \
- CHECK(v4f_mask_w(mask__), (D)); \
- } (void)0
-#define CHECK_V4MASK(Mask, Vec) CHECK_V4MASK__(Mask, Vec)
-
-#define CHECK_SOA4F2(V, A, B, C, D, E, F, G, H) \
+#define CHECK_F2(V, A, B, C, D, E, F, G, H) \
{ \
const v4f_T* v__ = (V); \
CHECK_V4MASK(v4f_eq(v__[0], v4f_set((A), (B), (C), (D))), V4TRUE); \
@@ -53,44 +42,33 @@ main(int argc, char** argv)
CHECK(soa4f2(a, v4f_set(-1.f, 2.f, 3.f,-4.f),v4f_set(5.f,-6.f,-7.f, 8.f)), a);
CHECK(soa4f2_minus(b, a), b);
- CHECK_SOA4F2(b, 1.f,-2.f,-3.f, 4.f, -5.f, 6.f, 7.f,-8.f);
+ CHECK_F2(b, 1.f,-2.f,-3.f, 4.f, -5.f, 6.f, 7.f,-8.f);
- CHECK(soa4f2_addf(dst, a, v4f_set1(1.f)), dst);
- CHECK_SOA4F2(dst, 0.f, 3.f, 4.f, -3.f, 6.f, -5.f, -6.f, 9.f);
CHECK(soa4f2_addf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)), dst);
- CHECK_SOA4F2(dst, 0.f, 4.f, 3.f, -1.f, 6.f, -4.f, -7.f, 11.f);
+ CHECK_F2(dst, 0.f, 4.f, 3.f, -1.f, 6.f, -4.f, -7.f, 11.f);
CHECK(soa4f2_add(dst, a, b), dst);
- CHECK_SOA4F2(dst, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f);
-
- CHECK(soa4f2_subf(dst, a, v4f_set1(1.f)), dst);
- CHECK_SOA4F2(dst, -2.f, 1.f, 2.f, -5.f, 4.f, -7.f, -8.f, 7.f);
+ CHECK_F2(dst, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f);
CHECK(soa4f2_subf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)), dst);
- CHECK_SOA4F2(dst, -2.f, 0.f, 3.f, -7.f, 4.f, -8.f, -7.f, 5.f);
+ CHECK_F2(dst, -2.f, 0.f, 3.f, -7.f, 4.f, -8.f, -7.f, 5.f);
CHECK(soa4f2_sub(dst, a, b), dst);
- CHECK_SOA4F2(dst, -2.f, 4.f, 6.f, -8.f, 10.f, -12.f, -14.f, 16.f);
-
- CHECK(soa4f2_mulf(dst, a, v4f_set1(2.f)), dst);
- CHECK_SOA4F2(dst, -2.f, 4.f, 6.f, -8.f, 10.f, -12.f, -14.f, 16.f);
+ CHECK_F2(dst, -2.f, 4.f, 6.f, -8.f, 10.f, -12.f, -14.f, 16.f);
CHECK(soa4f2_mulf(dst, a, v4f_set(2.f, 3.f, 0.f, -1.f)), dst);
- CHECK_SOA4F2(dst, -2.f, 6.f, 0.f, 4.f, 10.f, -18.f, 0.f, -8.f);
+ CHECK_F2(dst, -2.f, 6.f, 0.f, 4.f, 10.f, -18.f, 0.f, -8.f);
CHECK(soa4f2_mul(dst, a, b), dst);
- CHECK_SOA4F2(dst, -1.f, -4.f, -9.f, -16.f, -25.f, -36.f, -49.f, -64.f);
-
- CHECK(soa4f2_divf(dst, a, v4f_set1(2.f)), dst);
- CHECK_SOA4F2(dst, -0.5f, 1.f, 1.5f, -2.f, 2.5f, -3.f, -3.5f, 4.f);
+ CHECK_F2(dst, -1.f, -4.f, -9.f, -16.f, -25.f, -36.f, -49.f, -64.f);
CHECK(soa4f2_divf(dst, a, v4f_set(2.f, 0.5f, 1.f, 4.f)), dst);
- CHECK_SOA4F2(dst, -0.5f, 4.f, 3.f, -1.f, 2.5f, -12.f, -7.f, 2.f);
+ CHECK_F2(dst, -0.5f, 4.f, 3.f, -1.f, 2.5f, -12.f, -7.f, 2.f);
CHECK(soa4f2_div(dst, a, b), dst);
- CHECK_SOA4F2(dst, -1.f, -1.f, -1.f, -1.f, -1.f, -1.f, -1.f, -1.f);
+ CHECK_F2(dst, -1.f, -1.f, -1.f, -1.f, -1.f, -1.f, -1.f, -1.f);
soa4f2(a, v4f_set1(0.f), v4f_set1(1.f));
soa4f2(b, v4f_set1(1.f), v4f_set1(2.f));
CHECK(soa4f2_lerp(dst, a, b, v4f_set1(0.5f)), dst);
- CHECK_SOA4F2(dst, 0.5f, 0.5f, 0.5f, 0.5f, 1.5f, 1.5f, 1.5f, 1.5f);
+ CHECK_F2(dst, 0.5f, 0.5f, 0.5f, 0.5f, 1.5f, 1.5f, 1.5f, 1.5f);
soa4f2(a, v4f_set(-1.f, 2.f, 3.f,-4.f), v4f_set(5.f,-6.f,-7.f, 8.f));
soa4f2_minus(b, a);
CHECK(soa4f2_lerp(dst, a, b, v4f_set(-0.5f, 1.f, 0.5f, 4.f)), dst);
- CHECK_SOA4F2(dst, -1.f, -2.f, 0.f, 4.f, 5.f, 6.f, 0.f, -8.f);
+ CHECK_F2(dst, -1.f, -2.f, 0.f, 4.f, 5.f, 6.f, 0.f, -8.f);
f = soa4f2_sum(b);
CHECK_V4MASK(v4f_eq(f, v4f_set(-4.f, 4.f, 4.f, -4.f)), V4TRUE);
@@ -124,9 +102,10 @@ main(int argc, char** argv)
CHECK_V4MASK(v4f_eq(f, v4f_set(7.f, -12.f, 8.5f, -2.f)), V4TRUE);
CHECK(soa4f2_min(dst, a, b), dst);
- CHECK_SOA4F2(dst, 1.f, 2.f, 1.f, -2.f, -2.f, -6.f, -7.f, 0.f);
+ CHECK_F2(dst, 1.f, 2.f, 1.f, -2.f, -2.f, -6.f, -7.f, 0.f);
CHECK(soa4f2_max(dst, a, b), dst);
- CHECK_SOA4F2(dst, 3.f, 2.f, 3.f, -1.f, 1.f, 0.f, 0.5f, 2.f);
+ CHECK_F2(dst, 3.f, 2.f, 3.f, -1.f, 1.f, 0.f, 0.5f, 2.f);
return 0;
}
+
diff --git a/src/test_soa4f3.c b/src/test_soa4f3.c
@@ -0,0 +1,140 @@
+/* Copyright (C) 2014 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "soa4f3.h"
+#include "test_soa4f_utils.h"
+
+#define CHECK_F3(V, A, B, C, D, E, F, G, H, I, J, K, L) \
+ { \
+ const v4f_T* v__ = (V); \
+ CHECK_V4MASK(v4f_eq(v__[0], v4f_set((A), (B), (C), (D))), V4TRUE); \
+ CHECK_V4MASK(v4f_eq(v__[1], v4f_set((E), (F), (G), (H))), V4TRUE); \
+ CHECK_V4MASK(v4f_eq(v__[2], v4f_set((I), (J), (K), (L))), V4TRUE); \
+ } (void)0
+
+int
+main(int argc, char** argv)
+{
+ v4f_T a[3], b[3], c[3], dst[3], f;
+ (void)argc, (void)argv;
+
+ CHECK(soa4f3_set(a, soa4f3_splat(c, v4f_set1(-1.f))), a);
+ CHECK_V4MASK(v4f_eq(a[0], v4f_set1(-1.f)), V4TRUE);
+ CHECK_V4MASK(v4f_eq(a[1], v4f_set1(-1.f)), V4TRUE);
+ CHECK_V4MASK(v4f_eq(a[2], v4f_set1(-1.f)), V4TRUE);
+ CHECK(soa4f3(c,
+ v4f_set(0.f, 1.f, 2.f, 3.f),
+ v4f_set(5.f, 6.f, 7.f, 8.f),
+ v4f_set(9.f, 10.f, 11.f, 12.f)), c);
+ CHECK(soa4f3_set(a, c), a);
+ CHECK_V4MASK(v4f_eq(c[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE);
+ CHECK_V4MASK(v4f_eq(c[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE);
+ CHECK_V4MASK(v4f_eq(c[2], v4f_set(9.f, 10.f, 11.f, 12.f)), V4TRUE);
+ CHECK_V4MASK(v4f_eq(a[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE);
+ CHECK_V4MASK(v4f_eq(a[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE);
+ CHECK_V4MASK(v4f_eq(a[2], v4f_set(9.f, 10.f, 11.f, 12.f)), V4TRUE);
+
+ CHECK(soa4f3(a,
+ v4f_set(-1.f, 2.f, 3.f, -4.f),
+ v4f_set(5.f, -6.f, -7.f, 8.f),
+ v4f_set(9.f, -10.f, 1.f, -2.f)), a);
+ CHECK(soa4f3_minus(b, a), b);
+ CHECK_F3(b, 1.f,-2.f,-3.f, 4.f,-5.f, 6.f, 7.f,-8.f,-9.f, 10.f,-1.f, 2.f);
+
+ CHECK(soa4f3_addf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)), dst);
+ CHECK_F3(dst, 0.f, 4.f, 3.f,-1.f, 6.f,-4.f,-7.f, 11.f, 10.f,-8.f, 1.f, 1.f);
+ CHECK(soa4f3_add(dst, a, b), dst);
+ CHECK_F3(dst, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f);
+ CHECK(soa4f3_subf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)), dst);
+ CHECK_F3(dst,-2.f, 0.f, 3.f,-7.f, 4.f,-8.f,-7.f, 5.f, 8.f,-12.f, 1.f,-5.f);
+ CHECK(soa4f3_sub(dst, a, b), dst);
+ CHECK_F3(dst,-2.f, 4.f, 6.f,-8.f, 10.f,-12.f,-14.f, 16.f, 18.f,-20.f, 2.f,-4.f);
+ CHECK(soa4f3_mulf(dst, a, v4f_set(2.f, 3.f, 0.f, -1.f)), dst);
+ CHECK_F3(dst,-2.f, 6.f, 0.f, 4.f, 10.f,-18.f, 0.f,-8.f, 18.f,-30.f, 0.f, 2.f);
+ CHECK(soa4f3_mul(dst, a, b), dst);
+ CHECK_F3(dst,-1.f,-4.f,-9.f,-16.f,-25.f,-36.f,-49.f,-64.f,-81.f,-100.f,-1.f,-4.f);
+ CHECK(soa4f3_divf(dst, a, v4f_set(2.f, 0.5f, 1.f, 4.f)), dst);
+ CHECK_F3(dst,-0.5f, 4.f, 3.f,-1.f, 2.5f,-12.f,-7.f, 2.f, 4.5f,-20.f, 1.f,-0.5f);
+ CHECK(soa4f3_div(dst, a, b), dst);
+ CHECK_F3(dst,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f);
+
+ soa4f3(a, v4f_set1(0.f), v4f_set1(1.f), v4f_set1(2.f));
+ soa4f3(b, v4f_set1(1.f), v4f_set1(2.f), v4f_set1(-1.f));
+ CHECK(soa4f3_lerp(dst, a, b, v4f_set1(0.5f)), dst);
+ CHECK_F3(dst,
+ 0.5f, 0.5f, 0.5f, 0.5f,
+ 1.5f, 1.5f, 1.5f, 1.5f,
+ 0.5f, 0.5f, 0.5f, 0.5f);
+ CHECK(soa4f3(a,
+ v4f_set(-1.f, 2.f, 3.f, -4.f),
+ v4f_set(5.f, -6.f, -7.f, 8.f),
+ v4f_set(9.f, -10.f, 1.f, -2.f)), a);
+ CHECK(soa4f3_minus(b, a), b);
+ CHECK(soa4f3_lerp(dst, a, b, v4f_set(-0.5f, 1.f, 0.5f, 4.f)), dst);
+ CHECK_F3(dst, -1.f, -2.f, 0.f, 4.f, 5.f, 6.f, 0.f, -8.f, 9.f, 10.f, 0.f, 2.f);
+
+ f = soa4f3_sum(b);
+ CHECK_V4MASK(v4f_eq(f, v4f_set(-13.f, 14.f, 3.f, -2.f)), V4TRUE);
+ f = soa4f3_dot(a, b);
+ CHECK_V4MASK(v4f_eq(f, v4f_set(-107.f, -140.f, -59.f, -84.f)), V4TRUE);
+ f = soa4f3_len(a);
+ CHECK_V4MASK
+ (v4f_eq_eps(f, v4f_sqrt(soa4f3_dot(a, a)), v4f_set1(1.e-6f)), V4TRUE);
+
+ CHECK_V4MASK(soa4f3_is_normalized(b), V4FALSE);
+ f = soa4f3_normalize(dst, b);
+ CHECK_V4MASK(v4f_eq_eps(f, soa4f3_len(b), v4f_set1(1.e-6f)), V4TRUE);
+ CHECK_V4MASK(soa4f3_is_normalized(b), V4FALSE);
+ CHECK_V4MASK(soa4f3_is_normalized(dst), V4TRUE);
+ soa4f3_divf(b, b, f);
+ CHECK_V4MASK(v4f_eq_eps(dst[0], b[0], v4f_set1(1.e-6f)), V4TRUE);
+ CHECK_V4MASK(v4f_eq_eps(dst[1], b[1], v4f_set1(1.e-6f)), V4TRUE);
+ CHECK_V4MASK(v4f_eq_eps(dst[2], b[2], v4f_set1(1.e-6f)), V4TRUE);
+
+ CHECK_V4MASK(soa4f3_eq(a, a), V4TRUE);
+ CHECK_V4MASK(soa4f3_eq(a, b), V4FALSE);
+ soa4f3(a,
+ v4f_set(-1.f, 2.f, 3.f,-4.f),
+ v4f_set(5.f,-6.f,-7.f, 8.f),
+ v4f_set(9.f,-10.f,1.f, -2.f));
+ soa4f3(b,
+ v4f_set(-1.f, 2.f, 5.f,-4.001f),
+ v4f_set(5.f,-6.03f,7.f, 8.0),
+ v4f_set(9.f,-10.f,0.f, -2.001f));
+ CHECK_V4MASK__(soa4f3_eq(a, b), ~0, 0, 0, 0);
+ CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set1(1.e-6f)), ~0, 0, 0, 0);
+ CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set(0.f,0.f,0.f,1.e-6f)),~0, 0, 0, 0);
+ CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set(0.f,0.f,0.f,1.e-2f)),~0, 0, 0,~0);
+ CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set(0.f,1.e-2f,0.f,1.e-2f)),~0, 0, 0,~0);
+ CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set(0.f,1.e-1f,0.f,1.e-2f)),~0,~0, 0,~0);
+
+ soa4f3(a,
+ v4f_set(1.f, 2.f, 3.f,-1.f),
+ v4f_set(-2.f, 0.f,-7.f, 0.f),
+ v4f_set(-1.f, 4.f, 3.f, 2.f));
+ soa4f3(b,
+ v4f_set(3.f, 2.f, 1.f,-2.f),
+ v4f_set(1.f,-6.f, 0.5f, 2.f),
+ v4f_set(0.f, 1.f, 0.f, 3.f));
+ CHECK(soa4f3_cross(dst, a, b), dst);
+ CHECK_F3(dst, 1.f, 24.f,-1.5f,-4.f,-3.f, 6.f, 3.f,-1.f, 7.f,-12.f, 8.5f,-2.f);
+
+ CHECK(soa4f3_min(dst, a, b), dst);
+ CHECK_F3(dst, 1.f, 2.f, 1.f, -2.f,-2.f,-6.f,-7.f, 0.f,-1.f, 1.f, 0.f, 2.f);
+ CHECK(soa4f3_max(dst, a, b), dst);
+ CHECK_F3(dst, 3.f, 2.f, 3.f, -1.f, 1.f, 0.f, 0.5f, 2.f, 0.f, 4.f, 3.f, 3.f);
+ return 0;
+}
+
diff --git a/src/test_soa4f_utils.h b/src/test_soa4f_utils.h
@@ -0,0 +1,32 @@
+/* Copyright (C) 2014 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef TEST_SOA4F_UTILS_H
+#define TEST_SOA4F_UTILS_H
+
+#define V4TRUE ~0, ~0, ~0, ~0
+#define V4FALSE 0, 0, 0, 0
+#define CHECK_V4MASK__(Mask, A, B, C, D) \
+ { \
+ const v4f_T mask__ = (Mask); \
+ CHECK(v4f_mask_x(mask__), (A)); \
+ CHECK(v4f_mask_y(mask__), (B)); \
+ CHECK(v4f_mask_z(mask__), (C)); \
+ CHECK(v4f_mask_w(mask__), (D)); \
+ } (void)0
+#define CHECK_V4MASK(Mask, Vec) CHECK_V4MASK__(Mask, Vec)
+
+#endif /* TEST_SOA4F_UTILS_H */
+