aosq.h - rsimd - Make SIMD instruction sets easier to use

aosq.h (3709B)
      1 /* Copyright (C) 2014-2019, 2021, 2023, 2025 Vincent Forest (vaplv@free.fr)
      2  *
      3  * The RSIMD library is free software: you can redistribute it and/or modify
      4  * it under the terms of the GNU General Public License as published
      5  * by the Free Software Foundation, either version 3 of the License, or
      6  * (at your option) any later version.
      7  *
      8  * The RSIMD library is distributed in the hope that it will be useful,
      9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     11  * GNU General Public License for more details.
     12  *
     13  * You should have received a copy of the GNU General Public License
     14  * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
     15 
     16 #ifndef AOSQ_H
     17 #define AOSQ_H
     18 
     19 #include "rsimd.h"
     20 #include "math.h"
     21 
     22 /*
     23  * Functions on AoS quaternion encoded into a v4f_T as { i, j, k, a }
     24  */
     25 
     26 /*******************************************************************************
     27  * Set operations
     28  ******************************************************************************/
     29 static FINLINE v4f_T
     30 aosq_identity(void)
     31 {
     32   return v4f_set(0.f, 0.f, 0.f, 1.f);
     33 }
     34 
     35 static FINLINE v4f_T
     36 aosq_set_axis_angle(const v4f_T xyz_, const v4f_T aaaa)
     37 {
     38   const v4f_T half_angle = v4f_mul(aaaa, v4f_set1(0.5f));
     39   v4f_T s, c;
     40   v4f_T axis1;
     41   v4f_T sssc;
     42 
     43   v4f_sincos(half_angle, &s, &c);
     44 
     45   axis1 = v4f_xyzd(xyz_, v4f_set1(1.f));
     46   sssc = v4f_xyzd(s, c);
     47 
     48   /* { x*sin(a/2), y*sin(a/2), z*sin(a/2), cos(a/2) } */
     49   return v4f_mul(axis1, sssc);
     50 }
     51 
     52 /*******************************************************************************
     53  * Comparison operations
     54  ******************************************************************************/
     55 static FINLINE v4f_T
     56 aosq_eq(const v4f_T q0, const v4f_T q1)
     57 {
     58   const v4f_T r0 = v4f_eq(q0, q1);
     59   const v4f_T r1 = v4f_and(v4f_xxyy(r0), v4f_zzww(r0));
     60   return v4f_and(v4f_xxyy(r1), v4f_zzww(r1));
     61 }
     62 
     63 static FINLINE v4f_T
     64 aosq_eq_eps(const v4f_T q0, const v4f_T q1, const v4f_T eps)
     65 {
     66   const v4f_T r0 = v4f_eq_eps(q0, q1, eps);
     67   const v4f_T r1 = v4f_and(v4f_xxyy(r0), v4f_zzww(r0));
     68   return v4f_and(v4f_xxyy(r1), v4f_zzww(r1));
     69 }
     70 
     71 /*******************************************************************************
     72  * Arithmetic operations
     73  ******************************************************************************/
     74 #define SBIT__ (int32_t)0x80000000
     75 static FINLINE v4f_T
     76 aosq_mul(const v4f_T q0, const v4f_T q1)
     77 {
     78   const v4f_T a = v4f_mul(v4f_xor(v4f_mask(0, 0, SBIT__, 0), q0), v4f_wzyx(q1));
     79   const v4f_T b = v4f_mul(v4f_xor(v4f_mask(SBIT__, 0, 0, 0), q0), v4f_zwxy(q1));
     80   const v4f_T c = v4f_mul(v4f_xor(v4f_mask(0, SBIT__, 0, 0), q0), v4f_yxwz(q1));
     81   const v4f_T d = v4f_mul(v4f_xor(v4f_mask(SBIT__, SBIT__, SBIT__, 0), q0), q1);
     82   const v4f_T ijij = v4f_xayb(v4f_sum(a), v4f_sum(b));
     83   const v4f_T kaka = v4f_xayb(v4f_sum(c), v4f_sum(d));
     84   return v4f_xyab(ijij, kaka);
     85 }
     86 
     87 static FINLINE v4f_T /* { -ix, -jy, -jz, a } */
     88 aosq_conj(const v4f_T q)
     89 {
     90   return v4f_xor(q, v4f_mask(SBIT__, SBIT__, SBIT__, 0));
     91 }
     92 #undef SBIT__
     93 
     94 static FINLINE v4f_T
     95 aosq_calca(const v4f_T ijk_)
     96 {
     97   const v4f_T ijk_square_len = v4f_dot3(ijk_, ijk_);
     98   return v4f_sqrt(v4f_abs(v4f_sub(v4f_set1(1.f), ijk_square_len)));
     99 }
    100 
    101 static FINLINE v4f_T
    102 aosq_nlerp(const v4f_T from, const v4f_T to, const v4f_T aaaa)
    103 {
    104   return v4f_normalize(v4f_lerp(from, to, aaaa));
    105 }
    106 
    107 RSIMD_API v4f_T aosq_slerp(const v4f_T from, const v4f_T to, const v4f_T aaaa);
    108 
    109 /*******************************************************************************
    110  * Conversion
    111  ******************************************************************************/
    112 RSIMD_API void aosq_to_aosf33(const v4f_T q, v4f_T out[3]);
    113 
    114 #endif /* AOSQ_H */
    115 
    116
	rsimd Make SIMD instruction sets easier to use
	git clone git://git.meso-star.fr/rsimd.git
	Log \| Files \| Refs \| README \| LICENSE