Add and test the AoS quaternion SIMD functions - rsimd - Make SIMD instruction sets easier to use

commit 292240923e34d35b08b0e11ea6ca2b82542a46e8
parent ee22862cb041351d56c14327a801b09519e71674
Author: vaplv <vaplv@free.fr>
Date:   Fri, 17 Oct 2014 16:17:01 +0200

Add and test the AoS quaternion SIMD functions

Diffstat:
M cmake/CMakeLists.txt  | 3 +++
A src/aosq.c  | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/aosq.h  | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/test_aosq.c  | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

4 files changed, 341 insertions(+), 0 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -42,6 +42,7 @@ set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH})
 set(RSIMD_FILES_INC
   aosf33.h
   aosf44.h
+  aosq.h
   rsimd.h
   sse/sse.h
   sse/ssef.h
@@ -49,6 +50,7 @@ set(RSIMD_FILES_INC
   sse/sse_swz.h)
 set(RSIMD_FILES_SRC 
   aosf44.c
+  aosq.c
   sse/ssef.c)
 rcmake_prepend_path(RSIMD_FILES_INC ${RSIMD_SOURCE_DIR})
 rcmake_prepend_path(RSIMD_FILES_SRC ${RSIMD_SOURCE_DIR})
@@ -79,6 +81,7 @@ new_test(test_v4f)
 new_test(test_v4i)
 new_test(test_aosf33)
 new_test(test_aosf44)
+new_test(test_aosq)
 
 ################################################################################
 # Install directives
diff --git a/src/aosq.c b/src/aosq.c
@@ -0,0 +1,78 @@
+/* Copyright (C) 2014 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "aosq.h"
+
+v4f_T
+aosq_slerp(const v4f_T from, const v4f_T to, const v4f_T vvvv)
+{
+  v4f_T tmp_cos_omega, cos_omega, omega, rcp_sin_omega;
+  v4f_T one_sub_v;
+  v4f_T mask;
+  v4f_T tmp0, tmp1, tmp2;
+  v4f_T scale0, scale1;
+  float f;
+
+  f = v4f_x(vvvv);
+  if(f == 0.f)
+    return from;
+  else if(f == 1.f)
+    return to;
+
+  tmp_cos_omega = v4f_dot(from, to);
+
+  mask = v4f_lt(tmp_cos_omega, v4f_zero());
+  tmp0 = v4f_sel(to, v4f_minus(to), mask);
+  cos_omega = v4f_sel(tmp_cos_omega, v4f_minus(tmp_cos_omega), mask);
+
+  omega = v4f_acos(cos_omega);
+  rcp_sin_omega = v4f_rcp(v4f_sin(omega));
+  one_sub_v = v4f_sub(v4f_set1(1.f), vvvv);
+  tmp1 = v4f_mul(v4f_sin(v4f_mul(one_sub_v, omega)), rcp_sin_omega);
+  tmp2 = v4f_mul(v4f_sin(v4f_mul(omega, vvvv)), rcp_sin_omega);
+
+  mask = v4f_gt(v4f_sub(v4f_set1(1.f), cos_omega), v4f_set1(1.e-6f));
+  scale0 = v4f_sel(one_sub_v, tmp1, mask);
+  scale1 = v4f_sel(vvvv, tmp2, mask);
+
+  return v4f_madd(from, scale0, v4f_mul(tmp0, scale1));
+}
+
+void
+aosq_to_aosf33(const v4f_T q, v4f_T out[3])
+{
+  const v4f_T i2j2k2_ = v4f_add(q, q);
+
+  const v4f_T r0 = /* { jj2 + kk2, ij2 + ak2, ik2 - aj2 } */
+    v4f_madd(v4f_mul(v4f_zzyy(i2j2k2_), v4f_zwwz(q)),
+             v4f_set(1.f, 1.f, -1.f, 0.f),
+             v4f_mul(v4f_yyzz(i2j2k2_), v4f_yxxy(q)));
+  const v4f_T r1 = /* { ij2 - ak2, ii2 + kk2, jk2 + ai2 } */
+    v4f_madd(v4f_mul(v4f_zzxx(i2j2k2_), v4f_wzwz(q)),
+             v4f_set(-1.f, 1.f, 1.f, 0.f),
+             v4f_mul(v4f_yxzw(i2j2k2_), v4f_xxyy(q)));
+  const v4f_T r2 = /* { ik2 + aj2, jk2 - ai2, ii2 + jj2 } */
+    v4f_madd(v4f_mul(v4f_yxyx(i2j2k2_), v4f_wwyy(q)),
+             v4f_set(1.f, -1.f, 1.f, 0.f),
+             v4f_mul(v4f_zzxx(i2j2k2_), v4f_xyxy(q)));
+
+  out[0] = /* { 1 - (jj2 + kk2), ij2 + ak2, ik2 - aj2 } */
+    v4f_madd(r0, v4f_set(-1.f, 1.f, 1.f, 0.f), v4f_set(1.f, 0.f, 0.f, 0.f));
+  out[1] = /* { ij2 - ak2, 1 - (ii2 + kk2), jk2 + ai2 } */
+    v4f_madd(r1, v4f_set(1.f, -1.f, 1.f, 0.f), v4f_set(0.f, 1.f, 0.f, 0.f));
+  out[2] = /* { ik2 + aj2, jk2 - ai2, 1 - (ii2 + jj2) } */
+    v4f_madd(r2, v4f_set(1.f, 1.f, -1.f, 0.f), v4f_set(0.f, 0.f, 1.f, 0.f));
+}
+
diff --git a/src/aosq.h b/src/aosq.h
@@ -0,0 +1,115 @@
+/* Copyright (C) 2014 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef AOSQ_H
+#define AOSQ_H
+
+#include "rsimd.h"
+
+/*
+ * Functions on AoS quaternion encoded into a v4f_T as { i, j, k, a }
+ */
+
+/*******************************************************************************
+ * Set operations
+ ******************************************************************************/
+static FINLINE v4f_T
+aosq_identity(void)
+{
+  return v4f_set(0.f, 0.f, 0.f, 1.f);
+}
+
+static FINLINE v4f_T
+aosq_set_axis_angle(const v4f_T xyz_, const v4f_T aaaa)
+{
+  const v4f_T half_angle = v4f_mul(aaaa, v4f_set1(0.5f));
+  v4f_T s, c;
+  v4f_T axis1;
+  v4f_T sssc;
+
+  v4f_sincos(half_angle, &s, &c);
+
+  axis1 = v4f_xyzd(xyz_, v4f_set1(1.f));
+  sssc = v4f_xyzd(s, c);
+
+  /* { x*sin(a/2), y*sin(a/2), z*sin(a/2), cos(a/2) } */
+  return v4f_mul(axis1, sssc);
+}
+
+/*******************************************************************************
+ * Comparison operations
+ ******************************************************************************/
+static FINLINE v4f_T
+aosq_eq(const v4f_T q0, const v4f_T q1)
+{
+  const v4f_T r0 = v4f_eq(q0, q1);
+  const v4f_T r1 = v4f_and(v4f_xxyy(r0), v4f_zzww(r0));
+  return v4f_and(v4f_xxyy(r1), v4f_zzww(r1));
+}
+
+static FINLINE v4f_T
+aosq_eq_eps(const v4f_T q0, const v4f_T q1, const v4f_T eps)
+{
+  const v4f_T r0 = v4f_eq_eps(q0, q1, eps);
+  const v4f_T r1 = v4f_and(v4f_xxyy(r0), v4f_zzww(r0));
+  return v4f_and(v4f_xxyy(r1), v4f_zzww(r1));
+}
+
+/*******************************************************************************
+ * Arithmetic operations
+ ******************************************************************************/
+#define SBIT__ (int32_t)0x80000000
+static FINLINE v4f_T
+aosq_mul(const v4f_T q0, const v4f_T q1)
+{
+  const v4f_T a = v4f_mul(v4f_xor(v4f_mask(0, 0, SBIT__, 0), q0), v4f_wzyx(q1));
+  const v4f_T b = v4f_mul(v4f_xor(v4f_mask(SBIT__, 0, 0, 0), q0), v4f_zwxy(q1));
+  const v4f_T c = v4f_mul(v4f_xor(v4f_mask(0, SBIT__, 0, 0), q0), v4f_yxwz(q1));
+  const v4f_T d = v4f_mul(v4f_xor(v4f_mask(SBIT__, SBIT__, SBIT__, 0), q0), q1);
+  const v4f_T ijij = v4f_xayb(v4f_sum(a), v4f_sum(b));
+  const v4f_T kaka = v4f_xayb(v4f_sum(c), v4f_sum(d));
+  return v4f_xyab(ijij, kaka);
+}
+
+static FINLINE v4f_T /* { -ix, -jy, -jz, a } */
+aosq_conj(const v4f_T q)
+{
+  return v4f_xor(q, v4f_mask(SBIT__, SBIT__, SBIT__, 0));
+}
+#undef SBIT__
+
+static FINLINE v4f_T
+aosq_calca(const v4f_T ijk_)
+{
+  const v4f_T ijk_square_len = v4f_dot3(ijk_, ijk_);
+  return v4f_sqrt(v4f_abs(v4f_sub(v4f_set1(1.f), ijk_square_len)));
+}
+
+static FINLINE v4f_T
+aosq_nlerp(const v4f_T from, const v4f_T to, const v4f_T aaaa)
+{
+  return v4f_normalize(v4f_lerp(from, to, aaaa));
+}
+
+RSIMD_API v4f_T aosq_slerp(const v4f_T from, const v4f_T to, const v4f_T aaaa);
+
+/*******************************************************************************
+ * Conversion
+ ******************************************************************************/
+RSIMD_API void aosq_to_aosf33(const v4f_T q, v4f_T out[3]);
+
+#endif /* AOSQ_H */
+
+
diff --git a/src/test_aosq.c b/src/test_aosq.c
@@ -0,0 +1,145 @@
+/* Copyright (C) 2014 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "aosq.h"
+#include "aosf33.h"
+#include <rsys/float33.h>
+
+#define AOSF33_EQ_EPS(M, A, B, C, D, E, F, G, H, I, Eps)                       \
+  {                                                                            \
+    float a[9], b[9];                                                          \
+    b[0] = (A); b[1] = (B); b[2] = (C);                                        \
+    b[3] = (D); b[4] = (E); b[5] = (F);                                        \
+    b[6] = (G); b[7] = (H); b[8] = (I);                                        \
+    CHECK(f33_eq_eps(aosf33_store(a, (M)), b, Eps), 1);                        \
+  } (void)0
+
+int
+main(int argc, char** argv)
+{
+  union { int32_t i; float f; } cast;
+  v4f_T q0, q1, q2, t;
+  v4f_T m[3];
+  (void)argc, (void)argv;
+
+  q0 = aosq_identity();
+  CHECK(v4f_x(q0), 0.f);
+  CHECK(v4f_y(q0), 0.f);
+  CHECK(v4f_z(q0), 0.f);
+  CHECK(v4f_w(q0), 1.f);
+
+  q0 = aosq_set_axis_angle(v4f_set(2.f, 5.f, 1.f, 0.f), v4f_set1((float)PI*0.3f));
+  CHECK(eq_eps(v4f_x(q0), 0.907981f, 1.e-6f), 1);
+  CHECK(eq_eps(v4f_y(q0), 2.269953f, 1.e-6f), 1);
+  CHECK(eq_eps(v4f_z(q0), 0.453991f, 1.e-6f), 1);
+  CHECK(eq_eps(v4f_w(q0), 0.891007f, 1.e-6f), 1);
+
+  q0 = v4f_set(1.f, 2.f, 3.f, -3.f);
+  q1 = v4f_set(1.f, 2.f, 3.f, -3.f);
+  t = aosq_eq(q0, q1);
+  cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+  cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+  cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+  cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+
+  q1 = v4f_set(0.f, 2.f, 3.f, -3.f);
+  t = aosq_eq(q0, q1);
+  cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0x00000000);
+  cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0x00000000);
+  cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0x00000000);
+  cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0x00000000);
+
+  q1 = v4f_set(1.f, 0.f, 3.f, -3.f);
+  t = aosq_eq(q0, q1);
+  cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0x00000000);
+  cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0x00000000);
+  cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0x00000000);
+  cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0x00000000);
+
+  q1 = v4f_set(1.f, 2.f, 0.f, -3.f);
+  t = aosq_eq(q0, q1);
+  cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0x00000000);
+  cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0x00000000);
+  cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0x00000000);
+  cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0x00000000);
+
+  q1 = v4f_set(1.f, 2.f, 3.f, 0.f);
+  t = aosq_eq(q0, q1);
+  cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0x00000000);
+  cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0x00000000);
+  cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0x00000000);
+  cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0x00000000);
+
+  q1 = v4f_set(1.01f, 2.f, 3.02f, -3.f);
+  t = aosq_eq_eps(q0, q1, v4f_set1(0.01f));
+  cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0x00000000);
+  cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0x00000000);
+  cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0x00000000);
+  cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0x00000000);
+  t = aosq_eq_eps(q0, q1, v4f_set1(0.02f));
+  cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+  cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+  cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+  cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+
+  q0 = v4f_set(1.f, 2.f, 3.f, 4.f);
+  q1 = v4f_set(5.f, 6.f, 7.f, 8.f);
+  q2 = aosq_mul(q0, q1);
+  CHECK(v4f_x(q2), 24.f);
+  CHECK(v4f_y(q2), 48.f);
+  CHECK(v4f_z(q2), 48.f);
+  CHECK(v4f_w(q2), -6.f);
+
+  q2 = aosq_conj(q0);
+  CHECK(v4f_x(q2), -1.f);
+  CHECK(v4f_y(q2), -2.f);
+  CHECK(v4f_z(q2), -3.f);
+  CHECK(v4f_w(q2), 4.f);
+
+  q0 = v4f_normalize(v4f_set(1.f, 2.f, 5.f, 0.5f));
+  q1 = v4f_xyzz(q0);
+  q1 = v4f_xyzd(q1, aosq_calca(q1));
+  CHECK(v4f_x(q0), v4f_x(q1));
+  CHECK(v4f_y(q0), v4f_y(q1));
+  CHECK(v4f_z(q0), v4f_z(q1));
+  CHECK(eq_eps(v4f_w(q0), v4f_w(q1), 1.e-6f), 1);
+
+  q0 = v4f_set(1.f, 2.f, 3.f, 5.f);
+  q1 = v4f_set(2.f, 6.f, 7.f, 6.f);
+  q2 = aosq_slerp(q0, q1, v4f_set1(0.3f));
+  CHECK(eq_eps(v4f_x(q2), 1.3f, 1.e-6f), 1);
+  CHECK(eq_eps(v4f_y(q2), 3.2f, 1.e-6f), 1);
+  CHECK(eq_eps(v4f_z(q2), 4.2f, 1.e-6f), 1);
+  CHECK(eq_eps(v4f_w(q2), 5.3f, 1.e-6f), 1);
+
+  q0 = v4f_set(2.f, 5.f, 17.f, 9.f);
+  aosq_to_aosf33(q0, m);
+  AOSF33_EQ_EPS(m,
+    -627.f, 326.f, -22.f,
+    -286.f, -585.f, 206.f,
+    158.f, 134.f, -57.f,
+    1.e-6f);
+
+  q0 = v4f_normalize(q0);
+  aosq_to_aosf33(q0, m);
+  AOSF33_EQ_EPS(m,
+    -0.573935f, 0.817043f, -0.055138f,
+    -0.716792f, -0.468672f, 0.516291f,
+    0.395990f, 0.335840f, 0.854637f,
+    1.e-6f);
+
+  return 0;
+}
+

	rsimd Make SIMD instruction sets easier to use
	git clone git://git.meso-star.fr/rsimd.git
	Log \| Files \| Refs \| README \| LICENSE

M	cmake/CMakeLists.txt	\|	3	+++
A	src/aosq.c	\|	78	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/aosq.h	\|	115	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/test_aosq.c	\|	145	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++