rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

aosq.c (2871B)


      1 /* Copyright (C) 2014-2019, 2021, 2023, 2025 Vincent Forest (vaplv@free.fr)
      2  *
      3  * The RSIMD library is free software: you can redistribute it and/or modify
      4  * it under the terms of the GNU General Public License as published
      5  * by the Free Software Foundation, either version 3 of the License, or
      6  * (at your option) any later version.
      7  *
      8  * The RSIMD library is distributed in the hope that it will be useful,
      9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     11  * GNU General Public License for more details.
     12  *
     13  * You should have received a copy of the GNU General Public License
     14  * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
     15 
     16 #include "aosq.h"
     17 
     18 v4f_T
     19 aosq_slerp(const v4f_T from, const v4f_T to, const v4f_T vvvv)
     20 {
     21   v4f_T tmp_cos_omega, cos_omega, omega, rcp_sin_omega;
     22   v4f_T one_sub_v;
     23   v4f_T mask;
     24   v4f_T tmp0, tmp1, tmp2;
     25   v4f_T scale0, scale1;
     26   float f;
     27 
     28   f = v4f_x(vvvv);
     29   if(f == 0.f)
     30     return from;
     31   else if(f == 1.f)
     32     return to;
     33 
     34   tmp_cos_omega = v4f_dot(from, to);
     35 
     36   mask = v4f_lt(tmp_cos_omega, v4f_zero());
     37   tmp0 = v4f_sel(to, v4f_minus(to), mask);
     38   cos_omega = v4f_sel(tmp_cos_omega, v4f_minus(tmp_cos_omega), mask);
     39 
     40   omega = v4f_acos(cos_omega);
     41   rcp_sin_omega = v4f_rcp(v4f_sin(omega));
     42   one_sub_v = v4f_sub(v4f_set1(1.f), vvvv);
     43   tmp1 = v4f_mul(v4f_sin(v4f_mul(one_sub_v, omega)), rcp_sin_omega);
     44   tmp2 = v4f_mul(v4f_sin(v4f_mul(omega, vvvv)), rcp_sin_omega);
     45 
     46   mask = v4f_gt(v4f_sub(v4f_set1(1.f), cos_omega), v4f_set1(1.e-6f));
     47   scale0 = v4f_sel(one_sub_v, tmp1, mask);
     48   scale1 = v4f_sel(vvvv, tmp2, mask);
     49 
     50   return v4f_madd(from, scale0, v4f_mul(tmp0, scale1));
     51 }
     52 
     53 void
     54 aosq_to_aosf33(const v4f_T q, v4f_T out[3])
     55 {
     56   const v4f_T i2j2k2_ = v4f_add(q, q);
     57 
     58   const v4f_T r0 = /* { jj2 + kk2, ij2 + ak2, ik2 - aj2 } */
     59     v4f_madd(v4f_mul(v4f_zzyy(i2j2k2_), v4f_zwwz(q)),
     60              v4f_set(1.f, 1.f, -1.f, 0.f),
     61              v4f_mul(v4f_yyzz(i2j2k2_), v4f_yxxy(q)));
     62   const v4f_T r1 = /* { ij2 - ak2, ii2 + kk2, jk2 + ai2 } */
     63     v4f_madd(v4f_mul(v4f_zzxx(i2j2k2_), v4f_wzwz(q)),
     64              v4f_set(-1.f, 1.f, 1.f, 0.f),
     65              v4f_mul(v4f_yxzw(i2j2k2_), v4f_xxyy(q)));
     66   const v4f_T r2 = /* { ik2 + aj2, jk2 - ai2, ii2 + jj2 } */
     67     v4f_madd(v4f_mul(v4f_yxyx(i2j2k2_), v4f_wwyy(q)),
     68              v4f_set(1.f, -1.f, 1.f, 0.f),
     69              v4f_mul(v4f_zzxx(i2j2k2_), v4f_xyxy(q)));
     70 
     71   out[0] = /* { 1 - (jj2 + kk2), ij2 + ak2, ik2 - aj2 } */
     72     v4f_madd(r0, v4f_set(-1.f, 1.f, 1.f, 0.f), v4f_set(1.f, 0.f, 0.f, 0.f));
     73   out[1] = /* { ij2 - ak2, 1 - (ii2 + kk2), jk2 + ai2 } */
     74     v4f_madd(r1, v4f_set(1.f, -1.f, 1.f, 0.f), v4f_set(0.f, 1.f, 0.f, 0.f));
     75   out[2] = /* { ik2 + aj2, jk2 - ai2, 1 - (ii2 + jj2) } */
     76     v4f_madd(r2, v4f_set(1.f, 1.f, -1.f, 0.f), v4f_set(0.f, 0.f, 1.f, 0.f));
     77 }
     78