commit 292240923e34d35b08b0e11ea6ca2b82542a46e8
parent ee22862cb041351d56c14327a801b09519e71674
Author: vaplv <vaplv@free.fr>
Date: Fri, 17 Oct 2014 16:17:01 +0200
Add and test the AoS quaternion SIMD functions
Diffstat:
| M | cmake/CMakeLists.txt | | | 3 | +++ |
| A | src/aosq.c | | | 78 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/aosq.h | | | 115 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/test_aosq.c | | | 145 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
4 files changed, 341 insertions(+), 0 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -42,6 +42,7 @@ set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH})
set(RSIMD_FILES_INC
aosf33.h
aosf44.h
+ aosq.h
rsimd.h
sse/sse.h
sse/ssef.h
@@ -49,6 +50,7 @@ set(RSIMD_FILES_INC
sse/sse_swz.h)
set(RSIMD_FILES_SRC
aosf44.c
+ aosq.c
sse/ssef.c)
rcmake_prepend_path(RSIMD_FILES_INC ${RSIMD_SOURCE_DIR})
rcmake_prepend_path(RSIMD_FILES_SRC ${RSIMD_SOURCE_DIR})
@@ -79,6 +81,7 @@ new_test(test_v4f)
new_test(test_v4i)
new_test(test_aosf33)
new_test(test_aosf44)
+new_test(test_aosq)
################################################################################
# Install directives
diff --git a/src/aosq.c b/src/aosq.c
@@ -0,0 +1,78 @@
+/* Copyright (C) 2014 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "aosq.h"
+
+v4f_T
+aosq_slerp(const v4f_T from, const v4f_T to, const v4f_T vvvv)
+{
+ v4f_T tmp_cos_omega, cos_omega, omega, rcp_sin_omega;
+ v4f_T one_sub_v;
+ v4f_T mask;
+ v4f_T tmp0, tmp1, tmp2;
+ v4f_T scale0, scale1;
+ float f;
+
+ f = v4f_x(vvvv);
+ if(f == 0.f)
+ return from;
+ else if(f == 1.f)
+ return to;
+
+ tmp_cos_omega = v4f_dot(from, to);
+
+ mask = v4f_lt(tmp_cos_omega, v4f_zero());
+ tmp0 = v4f_sel(to, v4f_minus(to), mask);
+ cos_omega = v4f_sel(tmp_cos_omega, v4f_minus(tmp_cos_omega), mask);
+
+ omega = v4f_acos(cos_omega);
+ rcp_sin_omega = v4f_rcp(v4f_sin(omega));
+ one_sub_v = v4f_sub(v4f_set1(1.f), vvvv);
+ tmp1 = v4f_mul(v4f_sin(v4f_mul(one_sub_v, omega)), rcp_sin_omega);
+ tmp2 = v4f_mul(v4f_sin(v4f_mul(omega, vvvv)), rcp_sin_omega);
+
+ mask = v4f_gt(v4f_sub(v4f_set1(1.f), cos_omega), v4f_set1(1.e-6f));
+ scale0 = v4f_sel(one_sub_v, tmp1, mask);
+ scale1 = v4f_sel(vvvv, tmp2, mask);
+
+ return v4f_madd(from, scale0, v4f_mul(tmp0, scale1));
+}
+
+void
+aosq_to_aosf33(const v4f_T q, v4f_T out[3])
+{
+ const v4f_T i2j2k2_ = v4f_add(q, q);
+
+ const v4f_T r0 = /* { jj2 + kk2, ij2 + ak2, ik2 - aj2 } */
+ v4f_madd(v4f_mul(v4f_zzyy(i2j2k2_), v4f_zwwz(q)),
+ v4f_set(1.f, 1.f, -1.f, 0.f),
+ v4f_mul(v4f_yyzz(i2j2k2_), v4f_yxxy(q)));
+ const v4f_T r1 = /* { ij2 - ak2, ii2 + kk2, jk2 + ai2 } */
+ v4f_madd(v4f_mul(v4f_zzxx(i2j2k2_), v4f_wzwz(q)),
+ v4f_set(-1.f, 1.f, 1.f, 0.f),
+ v4f_mul(v4f_yxzw(i2j2k2_), v4f_xxyy(q)));
+ const v4f_T r2 = /* { ik2 + aj2, jk2 - ai2, ii2 + jj2 } */
+ v4f_madd(v4f_mul(v4f_yxyx(i2j2k2_), v4f_wwyy(q)),
+ v4f_set(1.f, -1.f, 1.f, 0.f),
+ v4f_mul(v4f_zzxx(i2j2k2_), v4f_xyxy(q)));
+
+ out[0] = /* { 1 - (jj2 + kk2), ij2 + ak2, ik2 - aj2 } */
+ v4f_madd(r0, v4f_set(-1.f, 1.f, 1.f, 0.f), v4f_set(1.f, 0.f, 0.f, 0.f));
+ out[1] = /* { ij2 - ak2, 1 - (ii2 + kk2), jk2 + ai2 } */
+ v4f_madd(r1, v4f_set(1.f, -1.f, 1.f, 0.f), v4f_set(0.f, 1.f, 0.f, 0.f));
+ out[2] = /* { ik2 + aj2, jk2 - ai2, 1 - (ii2 + jj2) } */
+ v4f_madd(r2, v4f_set(1.f, 1.f, -1.f, 0.f), v4f_set(0.f, 0.f, 1.f, 0.f));
+}
+
diff --git a/src/aosq.h b/src/aosq.h
@@ -0,0 +1,115 @@
+/* Copyright (C) 2014 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef AOSQ_H
+#define AOSQ_H
+
+#include "rsimd.h"
+
+/*
+ * Functions on AoS quaternion encoded into a v4f_T as { i, j, k, a }
+ */
+
+/*******************************************************************************
+ * Set operations
+ ******************************************************************************/
+static FINLINE v4f_T
+aosq_identity(void)
+{
+ return v4f_set(0.f, 0.f, 0.f, 1.f);
+}
+
+static FINLINE v4f_T
+aosq_set_axis_angle(const v4f_T xyz_, const v4f_T aaaa)
+{
+ const v4f_T half_angle = v4f_mul(aaaa, v4f_set1(0.5f));
+ v4f_T s, c;
+ v4f_T axis1;
+ v4f_T sssc;
+
+ v4f_sincos(half_angle, &s, &c);
+
+ axis1 = v4f_xyzd(xyz_, v4f_set1(1.f));
+ sssc = v4f_xyzd(s, c);
+
+ /* { x*sin(a/2), y*sin(a/2), z*sin(a/2), cos(a/2) } */
+ return v4f_mul(axis1, sssc);
+}
+
+/*******************************************************************************
+ * Comparison operations
+ ******************************************************************************/
+static FINLINE v4f_T
+aosq_eq(const v4f_T q0, const v4f_T q1)
+{
+ const v4f_T r0 = v4f_eq(q0, q1);
+ const v4f_T r1 = v4f_and(v4f_xxyy(r0), v4f_zzww(r0));
+ return v4f_and(v4f_xxyy(r1), v4f_zzww(r1));
+}
+
+static FINLINE v4f_T
+aosq_eq_eps(const v4f_T q0, const v4f_T q1, const v4f_T eps)
+{
+ const v4f_T r0 = v4f_eq_eps(q0, q1, eps);
+ const v4f_T r1 = v4f_and(v4f_xxyy(r0), v4f_zzww(r0));
+ return v4f_and(v4f_xxyy(r1), v4f_zzww(r1));
+}
+
+/*******************************************************************************
+ * Arithmetic operations
+ ******************************************************************************/
+#define SBIT__ (int32_t)0x80000000
+static FINLINE v4f_T
+aosq_mul(const v4f_T q0, const v4f_T q1)
+{
+ const v4f_T a = v4f_mul(v4f_xor(v4f_mask(0, 0, SBIT__, 0), q0), v4f_wzyx(q1));
+ const v4f_T b = v4f_mul(v4f_xor(v4f_mask(SBIT__, 0, 0, 0), q0), v4f_zwxy(q1));
+ const v4f_T c = v4f_mul(v4f_xor(v4f_mask(0, SBIT__, 0, 0), q0), v4f_yxwz(q1));
+ const v4f_T d = v4f_mul(v4f_xor(v4f_mask(SBIT__, SBIT__, SBIT__, 0), q0), q1);
+ const v4f_T ijij = v4f_xayb(v4f_sum(a), v4f_sum(b));
+ const v4f_T kaka = v4f_xayb(v4f_sum(c), v4f_sum(d));
+ return v4f_xyab(ijij, kaka);
+}
+
+static FINLINE v4f_T /* { -ix, -jy, -jz, a } */
+aosq_conj(const v4f_T q)
+{
+ return v4f_xor(q, v4f_mask(SBIT__, SBIT__, SBIT__, 0));
+}
+#undef SBIT__
+
+static FINLINE v4f_T
+aosq_calca(const v4f_T ijk_)
+{
+ const v4f_T ijk_square_len = v4f_dot3(ijk_, ijk_);
+ return v4f_sqrt(v4f_abs(v4f_sub(v4f_set1(1.f), ijk_square_len)));
+}
+
+static FINLINE v4f_T
+aosq_nlerp(const v4f_T from, const v4f_T to, const v4f_T aaaa)
+{
+ return v4f_normalize(v4f_lerp(from, to, aaaa));
+}
+
+RSIMD_API v4f_T aosq_slerp(const v4f_T from, const v4f_T to, const v4f_T aaaa);
+
+/*******************************************************************************
+ * Conversion
+ ******************************************************************************/
+RSIMD_API void aosq_to_aosf33(const v4f_T q, v4f_T out[3]);
+
+#endif /* AOSQ_H */
+
+
diff --git a/src/test_aosq.c b/src/test_aosq.c
@@ -0,0 +1,145 @@
+/* Copyright (C) 2014 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "aosq.h"
+#include "aosf33.h"
+#include <rsys/float33.h>
+
+#define AOSF33_EQ_EPS(M, A, B, C, D, E, F, G, H, I, Eps) \
+ { \
+ float a[9], b[9]; \
+ b[0] = (A); b[1] = (B); b[2] = (C); \
+ b[3] = (D); b[4] = (E); b[5] = (F); \
+ b[6] = (G); b[7] = (H); b[8] = (I); \
+ CHECK(f33_eq_eps(aosf33_store(a, (M)), b, Eps), 1); \
+ } (void)0
+
+int
+main(int argc, char** argv)
+{
+ union { int32_t i; float f; } cast;
+ v4f_T q0, q1, q2, t;
+ v4f_T m[3];
+ (void)argc, (void)argv;
+
+ q0 = aosq_identity();
+ CHECK(v4f_x(q0), 0.f);
+ CHECK(v4f_y(q0), 0.f);
+ CHECK(v4f_z(q0), 0.f);
+ CHECK(v4f_w(q0), 1.f);
+
+ q0 = aosq_set_axis_angle(v4f_set(2.f, 5.f, 1.f, 0.f), v4f_set1((float)PI*0.3f));
+ CHECK(eq_eps(v4f_x(q0), 0.907981f, 1.e-6f), 1);
+ CHECK(eq_eps(v4f_y(q0), 2.269953f, 1.e-6f), 1);
+ CHECK(eq_eps(v4f_z(q0), 0.453991f, 1.e-6f), 1);
+ CHECK(eq_eps(v4f_w(q0), 0.891007f, 1.e-6f), 1);
+
+ q0 = v4f_set(1.f, 2.f, 3.f, -3.f);
+ q1 = v4f_set(1.f, 2.f, 3.f, -3.f);
+ t = aosq_eq(q0, q1);
+ cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+ cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+ cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+ cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+
+ q1 = v4f_set(0.f, 2.f, 3.f, -3.f);
+ t = aosq_eq(q0, q1);
+ cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0x00000000);
+
+ q1 = v4f_set(1.f, 0.f, 3.f, -3.f);
+ t = aosq_eq(q0, q1);
+ cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0x00000000);
+
+ q1 = v4f_set(1.f, 2.f, 0.f, -3.f);
+ t = aosq_eq(q0, q1);
+ cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0x00000000);
+
+ q1 = v4f_set(1.f, 2.f, 3.f, 0.f);
+ t = aosq_eq(q0, q1);
+ cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0x00000000);
+
+ q1 = v4f_set(1.01f, 2.f, 3.02f, -3.f);
+ t = aosq_eq_eps(q0, q1, v4f_set1(0.01f));
+ cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0x00000000);
+ t = aosq_eq_eps(q0, q1, v4f_set1(0.02f));
+ cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+ cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+ cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+ cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+
+ q0 = v4f_set(1.f, 2.f, 3.f, 4.f);
+ q1 = v4f_set(5.f, 6.f, 7.f, 8.f);
+ q2 = aosq_mul(q0, q1);
+ CHECK(v4f_x(q2), 24.f);
+ CHECK(v4f_y(q2), 48.f);
+ CHECK(v4f_z(q2), 48.f);
+ CHECK(v4f_w(q2), -6.f);
+
+ q2 = aosq_conj(q0);
+ CHECK(v4f_x(q2), -1.f);
+ CHECK(v4f_y(q2), -2.f);
+ CHECK(v4f_z(q2), -3.f);
+ CHECK(v4f_w(q2), 4.f);
+
+ q0 = v4f_normalize(v4f_set(1.f, 2.f, 5.f, 0.5f));
+ q1 = v4f_xyzz(q0);
+ q1 = v4f_xyzd(q1, aosq_calca(q1));
+ CHECK(v4f_x(q0), v4f_x(q1));
+ CHECK(v4f_y(q0), v4f_y(q1));
+ CHECK(v4f_z(q0), v4f_z(q1));
+ CHECK(eq_eps(v4f_w(q0), v4f_w(q1), 1.e-6f), 1);
+
+ q0 = v4f_set(1.f, 2.f, 3.f, 5.f);
+ q1 = v4f_set(2.f, 6.f, 7.f, 6.f);
+ q2 = aosq_slerp(q0, q1, v4f_set1(0.3f));
+ CHECK(eq_eps(v4f_x(q2), 1.3f, 1.e-6f), 1);
+ CHECK(eq_eps(v4f_y(q2), 3.2f, 1.e-6f), 1);
+ CHECK(eq_eps(v4f_z(q2), 4.2f, 1.e-6f), 1);
+ CHECK(eq_eps(v4f_w(q2), 5.3f, 1.e-6f), 1);
+
+ q0 = v4f_set(2.f, 5.f, 17.f, 9.f);
+ aosq_to_aosf33(q0, m);
+ AOSF33_EQ_EPS(m,
+ -627.f, 326.f, -22.f,
+ -286.f, -585.f, 206.f,
+ 158.f, 134.f, -57.f,
+ 1.e-6f);
+
+ q0 = v4f_normalize(q0);
+ aosq_to_aosf33(q0, m);
+ AOSF33_EQ_EPS(m,
+ -0.573935f, 0.817043f, -0.055138f,
+ -0.716792f, -0.468672f, 0.516291f,
+ 0.395990f, 0.335840f, 0.854637f,
+ 1.e-6f);
+
+ return 0;
+}
+