rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

commit c5b97ca8fc48acbd9fd8cb487c165cc732cf6532
parent 7087e6b6dd782bd4034d8a8394926a8bb3e8a4ed
Author: vaplv <vaplv@free.fr>
Date:   Sun, 24 Feb 2019 16:32:32 +0100

Make the soa vector functions generic to the SIMD width

Diffstat:
Mcmake/CMakeLists.txt | 2+-
Msrc/soa4f2.h | 7++++---
Msrc/soa4f3.h | 7++++---
Msrc/soa4f4.h | 7++++---
Dsrc/soa4fX.h | 352-------------------------------------------------------------------------------
Asrc/soaXfY.h | 389+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 402 insertions(+), 362 deletions(-)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt @@ -53,7 +53,7 @@ set(RSIMD_FILES_INC_LEGACY aosf44.h aosq.h rsimd.h - soa4fX.h + soaXfY.h soa4f2.h soa4f3.h soa4f4.h) diff --git a/src/soa4f2.h b/src/soa4f2.h @@ -16,9 +16,10 @@ #ifndef SOA4F2_H #define SOA4F2_H -/* Generate the common soa4fX funcs */ -#define SOA4FX_DIMENSION__ 2 -#include "soa4fX.h" +/* Generate the common soa4f2 funcs */ +#define RSIMD_WIDTH__ 4 +#define RSIMD_SOA_DIMENSION__ 2 +#include "soaXfY.h" static FINLINE v4f_T soa4f2_cross(const v4f_T a[2], const v4f_T b[2]) diff --git a/src/soa4f3.h b/src/soa4f3.h @@ -16,9 +16,10 @@ #ifndef SOA4F3_H #define SOA4F3_H -/* Generate the common soa4fX functions */ -#define SOA4FX_DIMENSION__ 3 -#include "soa4fX.h" +/* Generate the common soa4f3 functions */ +#define RSIMD_WIDTH__ 4 +#define RSIMD_SOA_DIMENSION__ 3 +#include "soaXfY.h" static FINLINE v4f_T* soa4f3_cross(v4f_T dst[3], const v4f_T a[3], const v4f_T b[3]) diff --git a/src/soa4f4.h b/src/soa4f4.h @@ -16,9 +16,10 @@ #ifndef SOA4F4_H #define SOA4F4_H -/* Generate the common soa4fX functions */ -#define SOA4FX_DIMENSION__ 4 -#include "soa4fX.h" +/* Generate the common soa4f4 functions */ +#define RSIMD_WIDTH__ 4 +#define RSIMD_SOA_DIMENSION__ 4 +#include "soaXfY.h" #endif /* SOA4F4_H */ diff --git a/src/soa4fX.h b/src/soa4fX.h @@ -1,352 +0,0 @@ -/* Copyright (C) 2014-2018 Vincent Forest (vaplv@free.fr) - * - * The RSIMD library is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * The RSIMD library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ - -/* - * Header used to generate funcs on SoA SIMD float vectors of X dimensions - */ -#if !defined(SOA4FX_DIMENSION__) - #error Missing arguments -#endif - -#if defined(SOA4FX_FUNC__) - #error Unexpected SOA4FX_FUNC__ macro defintion -#endif - -#include "rsimd.h" - -#ifdef COMPILER_GCC - #pragma GCC push_options - #pragma GCC optimize("unroll-loops") -#endif - -STATIC_ASSERT(SOA4FX_DIMENSION__ > 1, Unexpected_value); - -#define SOA4FX_FUNC__(Func) \ - CONCAT(CONCAT(CONCAT(soa4f, SOA4FX_DIMENSION__), _), Func) - -/* Helper macro */ -#define SIZEOF_SOA4FX__ sizeof(v4f_T[SOA4FX_DIMENSION__]) - -#if SOA4FX_DIMENSION__ <= 4 -static FINLINE v4f_T* -CONCAT(soa4f, SOA4FX_DIMENSION__) - (v4f_T* dst - ,const v4f_T x - ,const v4f_T y -#if SOA4FX_DIMENSION__ > 2 - ,const v4f_T z -#endif -#if SOA4FX_DIMENSION__ > 3 - ,const v4f_T w -#endif - ) -{ - ASSERT(dst); - dst[0] = x; - dst[1] = y; -#if SOA4FX_DIMENSION__ > 2 - dst[2] = z; -#endif -#if SOA4FX_DIMENSION__ > 3 - dst[3] = w; -#endif - return dst; -} -#endif - -static FINLINE v4f_T* -SOA4FX_FUNC__(splat)(v4f_T* dst, const v4f_T val) -{ - int i; - ASSERT(dst); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - dst[i] = val; - return dst; -} - -static FINLINE v4f_T* -SOA4FX_FUNC__(set__)(v4f_T* dst, const v4f_T* src) -{ - int i; - ASSERT(dst && src); - ASSERT(!MEM_AREA_OVERLAP(dst, SIZEOF_SOA4FX__, src, SIZEOF_SOA4FX__)); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - dst[i] = src[i]; - return dst; -} - -static FINLINE v4f_T* -SOA4FX_FUNC__(set)(v4f_T* dst, const v4f_T* src) -{ - ASSERT(dst && src); - if(!MEM_AREA_OVERLAP(dst, SIZEOF_SOA4FX__, src, SIZEOF_SOA4FX__)) { - return SOA4FX_FUNC__(set__)(dst, src); - } else { - v4f_T tmp[SOA4FX_DIMENSION__]; - return SOA4FX_FUNC__(set__)(dst, SOA4FX_FUNC__(set__)(tmp, src)); - } -} - -static FINLINE v4f_T -SOA4FX_FUNC__(dot)(const v4f_T* a, const v4f_T* b) -{ - v4f_T dot; - int i; - ASSERT(a && b); - dot = v4f_mul(a[0], b[0]); - FOR_EACH(i, 1, SOA4FX_DIMENSION__) { - dot = v4f_add(dot, v4f_mul(a[i], b[i])); - } - return dot; -} - -static FINLINE v4f_T -SOA4FX_FUNC__(len)(const v4f_T* a) -{ - ASSERT(a); - return v4f_sqrt(SOA4FX_FUNC__(dot)(a, a)); -} - -static FINLINE v4f_T -SOA4FX_FUNC__(normalize)(v4f_T* dst, const v4f_T* a) -{ - v4f_T tmp[SOA4FX_DIMENSION__]; - v4f_T sqr_len, rcp_len; - v4f_T mask; - int i; - ASSERT(dst && a); - - sqr_len = SOA4FX_FUNC__(dot)(a, a); - mask = v4f_neq(sqr_len, v4f_zero()); - rcp_len = v4f_rsqrt(sqr_len); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - tmp[i] = v4f_and(mask, v4f_mul(a[i], rcp_len)); - SOA4FX_FUNC__(set__)(dst, tmp); - return v4f_mul(sqr_len, rcp_len); -} - -static FINLINE v4f_T -SOA4FX_FUNC__(is_normalized)(const v4f_T* a) -{ - return v4f_eq_eps(SOA4FX_FUNC__(len)(a), v4f_set1(1.f), v4f_set1(1.e-6f)); -} - -static FINLINE v4f_T* -SOA4FX_FUNC__(add)(v4f_T* dst, const v4f_T* a, const v4f_T* b) -{ - v4f_T tmp[SOA4FX_DIMENSION__]; - int i; - ASSERT(dst && a && b); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - tmp[i] = v4f_add(a[i], b[i]); - return SOA4FX_FUNC__(set__)(dst, tmp); -} - -static FINLINE v4f_T* -SOA4FX_FUNC__(addf)(v4f_T* dst, const v4f_T* a, const v4f_T f) -{ - v4f_T tmp[SOA4FX_DIMENSION__]; - int i; - ASSERT(dst && a); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - tmp[i] = v4f_add(a[i], f); - return SOA4FX_FUNC__(set__)(dst, tmp); -} - -static FINLINE v4f_T* -SOA4FX_FUNC__(sub)(v4f_T* dst, const v4f_T* a, const v4f_T* b) -{ - v4f_T tmp[SOA4FX_DIMENSION__]; - int i; - ASSERT(dst && a && b); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - tmp[i] = v4f_sub(a[i], b[i]); - return SOA4FX_FUNC__(set__)(dst, tmp); -} - -static FINLINE v4f_T* -SOA4FX_FUNC__(subf)(v4f_T* dst, const v4f_T* a, const v4f_T f) -{ - v4f_T tmp[SOA4FX_DIMENSION__]; - int i; - ASSERT(dst && a); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - tmp[i] = v4f_sub(a[i], f); - return SOA4FX_FUNC__(set__)(dst, tmp); -} - -static FINLINE v4f_T* -SOA4FX_FUNC__(mul)(v4f_T* dst, const v4f_T* a, const v4f_T* b) -{ - v4f_T tmp[SOA4FX_DIMENSION__]; - int i; - ASSERT(dst && a && b); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - tmp[i] = v4f_mul(a[i], b[i]); - return SOA4FX_FUNC__(set__)(dst, tmp); -} - -static FINLINE v4f_T* -SOA4FX_FUNC__(mulf)(v4f_T* dst, const v4f_T* a, const v4f_T f) -{ - v4f_T tmp[SOA4FX_DIMENSION__]; - int i; - ASSERT(dst && a); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - tmp[i] = v4f_mul(a[i], f); - return SOA4FX_FUNC__(set__)(dst, tmp); -} - -static FINLINE v4f_T* -SOA4FX_FUNC__(div)(v4f_T* dst, const v4f_T* a, const v4f_T* b) -{ - v4f_T tmp[SOA4FX_DIMENSION__]; - int i; - ASSERT(dst && a && b); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - tmp[i] = v4f_div(a[i], b[i]); - return SOA4FX_FUNC__(set__)(dst, tmp); -} - -static FINLINE v4f_T* -SOA4FX_FUNC__(divf)(v4f_T* dst, const v4f_T* a, const v4f_T f) -{ - v4f_T tmp[SOA4FX_DIMENSION__]; - int i; - ASSERT(dst && a); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - tmp[i] = v4f_div(a[i], f); - return SOA4FX_FUNC__(set__)(dst, tmp); -} - -static FINLINE v4f_T* -SOA4FX_FUNC__(minus)(v4f_T* dst, const v4f_T* a) -{ - v4f_T tmp[SOA4FX_DIMENSION__]; - int i; - ASSERT(dst && a); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - tmp[i] = v4f_minus(a[i]); - return SOA4FX_FUNC__(set__)(dst, tmp); -} - -static FINLINE v4f_T -SOA4FX_FUNC__(sum)(const v4f_T* a) -{ - v4f_T f; - int i = 0; - ASSERT(a); - f = a[i]; - FOR_EACH(i, 1, SOA4FX_DIMENSION__) - f = v4f_add(f, a[i]); - return f; -} - -static FINLINE v4f_T* -SOA4FX_FUNC__(lerp) - (v4f_T* dst, - const v4f_T* from, - const v4f_T* to, - const v4f_T t) -{ - v4f_T tmp[SOA4FX_DIMENSION__]; - v4f_T t_adjusted; - int i; - ASSERT(dst && from && to); - t_adjusted = v4f_min(v4f_max(t, v4f_zero()), v4f_set1(1.f)); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - tmp[i] = v4f_add(from[i], v4f_mul(t_adjusted, v4f_sub(to[i], from[i]))); - SOA4FX_FUNC__(set__)(dst, tmp); - return dst; -} - -static FINLINE v4f_T -SOA4FX_FUNC__(eq)(const v4f_T* a, const v4f_T* b) -{ - v4f_T is_eq; - int i = 0; - ASSERT(a && b); - is_eq = v4f_eq(a[0], b[0]); - FOR_EACH(i, 1, SOA4FX_DIMENSION__) - is_eq = v4f_and(is_eq, v4f_eq(a[i], b[i])); - return is_eq; -} - -static FINLINE v4f_T -SOA4FX_FUNC__(eq_eps)(const v4f_T* a, const v4f_T* b, const v4f_T eps) -{ - v4f_T is_eq; - int i = 0; - ASSERT(a && b); - is_eq = v4f_eq_eps(a[0], b[0], eps); - FOR_EACH(i, 1, SOA4FX_DIMENSION__) - is_eq = v4f_and(is_eq, v4f_eq_eps(a[i], b[i], eps)); - return is_eq; -} - -static FINLINE v4f_T* -SOA4FX_FUNC__(max)(v4f_T* dst, const v4f_T* a, const v4f_T* b) -{ - v4f_T tmp[SOA4FX_DIMENSION__]; - int i; - ASSERT(dst && a && b); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - tmp[i] = v4f_max(a[i], b[i]); - return SOA4FX_FUNC__(set__)(dst, tmp); -} - -static FINLINE v4f_T* -SOA4FX_FUNC__(min)(v4f_T* dst, const v4f_T* a, const v4f_T* b) -{ - v4f_T tmp[SOA4FX_DIMENSION__]; - int i; - ASSERT(dst && a && b); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - tmp[i] = v4f_min(a[i], b[i]); - return SOA4FX_FUNC__(set__)(dst, tmp); -} - -static FINLINE v4f_T* -SOA4FX_FUNC__(sel) - (v4f_T* dst, const v4f_T* vfalse, const v4f_T* vtrue, const v4f_T cond) -{ - v4f_T tmp[SOA4FX_DIMENSION__]; - int i; - ASSERT(dst && vfalse && vtrue); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - tmp[i] = v4f_sel(vfalse[i], vtrue[i], cond); - return SOA4FX_FUNC__(set__)(dst, tmp); -} - -static FINLINE v4f_T* -SOA4FX_FUNC__(selv) - (v4f_T* dst, const v4f_T* vfalse, const v4f_T* vtrue, const v4f_T* vcond) -{ - v4f_T tmp[SOA4FX_DIMENSION__]; - int i; - ASSERT(dst && vfalse && vtrue); - FOR_EACH(i, 0, SOA4FX_DIMENSION__) - tmp[i] = v4f_sel(vfalse[i], vtrue[i], vcond[i]); - return SOA4FX_FUNC__(set__)(dst, tmp); -} - -#undef SIZEOF_SOA4FX__ -#undef SOA4FX_DIMENSION__ -#undef SOA4FX_FUNC__ - -#ifdef COMPILER_GCC - #pragma GCC pop_options -#endif - diff --git a/src/soaXfY.h b/src/soaXfY.h @@ -0,0 +1,389 @@ +/* Copyright (C) 2014-2018 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +/* + * Header used to generate funcs on SoA SIMD float vectors of X dimensions + */ +#include "rsimd.h" + +#if !defined(RSIMD_SOA_DIMENSION__) + #error "Undefined RSIMD_SOA_DIMENSION__ macro" +#endif +#if !defined(RSIMD_WIDTH__) + #error "Undefined RSIMD_WIDTH__ macro" +#endif +#if RSIMD_SOA_DIMENSION__ < 1 || RSIMD_SOA_DIMENSION__ > 4 + #error "Unexpected RSIMD_SOA_DIMENSION__ value" +#endif +#if RSIMD_WIDTH__ != 4 && RSIMD_WIDTH__ != 8 + #error "Unexpected RSIMD_WIDTH__ value" +#endif + +/* Macros generic to RSIMD_WIDTH__ */ +#define RSIMD_vXf__(Func) \ + CONCAT(CONCAT(CONCAT(CONCAT(v, RSIMD_WIDTH__), f), _), Func) +#define RSIMD_vXf_T__ CONCAT(CONCAT(v, RSIMD_WIDTH__), f_T) +#define RSIMD_soaXfY_PREFIX__ \ + CONCAT(CONCAT(CONCAT(soa, RSIMD_WIDTH__), f), RSIMD_SOA_DIMENSION__) +#define RSIMD_soaXfY__(Func) CONCAT(CONCAT(RSIMD_soaXfY_PREFIX__, _), Func) +#define SIZEOF_RSIMD_soaXfY__ sizeof(RSIMD_vXf_T__[RSIMD_SOA_DIMENSION__]) + +/* Force GCC to unroll the loops */ +#ifdef COMPILER_GCC + #pragma GCC push_options + #pragma GCC optimize("unroll-loops") +#endif + +#if RSIMD_SOA_DIMENSION__ <= 4 +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY_PREFIX__ + (RSIMD_vXf_T__* dst + ,const RSIMD_vXf_T__ x + ,const RSIMD_vXf_T__ y +#if RSIMD_SOA_DIMENSION__ > 2 + ,const RSIMD_vXf_T__ z +#endif +#if RSIMD_SOA_DIMENSION__ > 3 + ,const RSIMD_vXf_T__ w +#endif + ) +{ + ASSERT(dst); + dst[0] = x; + dst[1] = y; +#if RSIMD_SOA_DIMENSION__ > 2 + dst[2] = z; +#endif +#if RSIMD_SOA_DIMENSION__ > 3 + dst[3] = w; +#endif + return dst; +} +#endif + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(splat)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__ val) +{ + int i; + ASSERT(dst); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + dst[i] = val; + return dst; +} + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(set__)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* src) +{ + int i; + ASSERT(dst && src); + ASSERT(!MEM_AREA_OVERLAP(dst, SIZEOF_RSIMD_soaXfY__, src, SIZEOF_RSIMD_soaXfY__)); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + dst[i] = src[i]; + return dst; +} + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(set)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* src) +{ + ASSERT(dst && src); + if(!MEM_AREA_OVERLAP(dst, SIZEOF_RSIMD_soaXfY__, src, SIZEOF_RSIMD_soaXfY__)) { + return RSIMD_soaXfY__(set__)(dst, src); + } else { + RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; + return RSIMD_soaXfY__(set__)(dst, RSIMD_soaXfY__(set__)(tmp, src)); + } +} + +static FINLINE RSIMD_vXf_T__ +RSIMD_soaXfY__(dot)(const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b) +{ + RSIMD_vXf_T__ dot; + int i; + ASSERT(a && b); + dot = RSIMD_vXf__(mul)(a[0], b[0]); + FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__) { + dot = RSIMD_vXf__(madd)(a[i], b[i], dot); + } + return dot; +} + +static FINLINE RSIMD_vXf_T__ +RSIMD_soaXfY__(len)(const RSIMD_vXf_T__* a) +{ + ASSERT(a); + return RSIMD_vXf__(sqrt)(RSIMD_soaXfY__(dot)(a, a)); +} + +static FINLINE RSIMD_vXf_T__ +RSIMD_soaXfY__(normalize)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a) +{ + RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; + RSIMD_vXf_T__ sqr_len, rcp_len; + RSIMD_vXf_T__ mask; + int i; + ASSERT(dst && a); + + sqr_len = RSIMD_soaXfY__(dot)(a, a); + mask = RSIMD_vXf__(neq)(sqr_len, RSIMD_vXf__(zero)()); + rcp_len = RSIMD_vXf__(rsqrt)(sqr_len); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + tmp[i] = RSIMD_vXf__(and)(mask, RSIMD_vXf__(mul)(a[i], rcp_len)); + RSIMD_soaXfY__(set__)(dst, tmp); + return RSIMD_vXf__(mul)(sqr_len, rcp_len); +} + +static FINLINE RSIMD_vXf_T__ +RSIMD_soaXfY__(is_normalized)(const RSIMD_vXf_T__* a) +{ + return RSIMD_vXf__(eq_eps) + (RSIMD_soaXfY__(len)(a), + RSIMD_vXf__(set1)(1.f), + RSIMD_vXf__(set1)(1.e-6f)); +} + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(add) + (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b) +{ + RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; + int i; + ASSERT(dst && a && b); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + tmp[i] = RSIMD_vXf__(add)(a[i], b[i]); + return RSIMD_soaXfY__(set__)(dst, tmp); +} + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(addf) + (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f) +{ + RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; + int i; + ASSERT(dst && a); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + tmp[i] = RSIMD_vXf__(add)(a[i], f); + return RSIMD_soaXfY__(set__)(dst, tmp); +} + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(sub) + (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b) +{ + RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; + int i; + ASSERT(dst && a && b); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + tmp[i] = RSIMD_vXf__(sub)(a[i], b[i]); + return RSIMD_soaXfY__(set__)(dst, tmp); +} + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(subf) + (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f) +{ + RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; + int i; + ASSERT(dst && a); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + tmp[i] = RSIMD_vXf__(sub)(a[i], f); + return RSIMD_soaXfY__(set__)(dst, tmp); +} + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(mul) + (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b) +{ + RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; + int i; + ASSERT(dst && a && b); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + tmp[i] = RSIMD_vXf__(mul)(a[i], b[i]); + return RSIMD_soaXfY__(set__)(dst, tmp); +} + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(mulf) + (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f) +{ + RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; + int i; + ASSERT(dst && a); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + tmp[i] = RSIMD_vXf__(mul)(a[i], f); + return RSIMD_soaXfY__(set__)(dst, tmp); +} + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(div) + (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b) +{ + RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; + int i; + ASSERT(dst && a && b); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + tmp[i] = RSIMD_vXf__(div)(a[i], b[i]); + return RSIMD_soaXfY__(set__)(dst, tmp); +} + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(divf) + (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f) +{ + RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; + int i; + ASSERT(dst && a); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + tmp[i] = RSIMD_vXf__(div)(a[i], f); + return RSIMD_soaXfY__(set__)(dst, tmp); +} + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(minus)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a) +{ + RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; + int i; + ASSERT(dst && a); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + tmp[i] = RSIMD_vXf__(minus)(a[i]); + return RSIMD_soaXfY__(set__)(dst, tmp); +} + +static FINLINE RSIMD_vXf_T__ +RSIMD_soaXfY__(sum)(const RSIMD_vXf_T__* a) +{ + RSIMD_vXf_T__ f; + int i = 0; + ASSERT(a); + f = a[i]; + FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__) + f = RSIMD_vXf__(add)(f, a[i]); + return f; +} + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(lerp) + (RSIMD_vXf_T__* dst, + const RSIMD_vXf_T__* from, + const RSIMD_vXf_T__* to, + const RSIMD_vXf_T__ t) +{ + RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; + RSIMD_vXf_T__ t_adjusted; + int i; + ASSERT(dst && from && to); + t_adjusted = RSIMD_vXf__(min) + (RSIMD_vXf__(max)(t, RSIMD_vXf__(zero)()), RSIMD_vXf__(set1)(1.f)); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + tmp[i] = RSIMD_vXf__(madd) + (t_adjusted, RSIMD_vXf__(sub)(to[i], from[i]), from[i]); + RSIMD_soaXfY__(set__)(dst, tmp); + return dst; +} + +static FINLINE RSIMD_vXf_T__ +RSIMD_soaXfY__(eq)(const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b) +{ + RSIMD_vXf_T__ is_eq; + int i = 0; + ASSERT(a && b); + is_eq = RSIMD_vXf__(eq)(a[0], b[0]); + FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__) + is_eq = RSIMD_vXf__(and)(is_eq, RSIMD_vXf__(eq)(a[i], b[i])); + return is_eq; +} + +static FINLINE RSIMD_vXf_T__ +RSIMD_soaXfY__(eq_eps) + (const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b, const RSIMD_vXf_T__ eps) +{ + RSIMD_vXf_T__ is_eq; + int i = 0; + ASSERT(a && b); + is_eq = RSIMD_vXf__(eq_eps)(a[0], b[0], eps); + FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__) + is_eq = RSIMD_vXf__(and)(is_eq, RSIMD_vXf__(eq_eps)(a[i], b[i], eps)); + return is_eq; +} + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(max) + (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b) +{ + RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; + int i; + ASSERT(dst && a && b); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + tmp[i] = RSIMD_vXf__(max)(a[i], b[i]); + return RSIMD_soaXfY__(set__)(dst, tmp); +} + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(min) + (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b) +{ + RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; + int i; + ASSERT(dst && a && b); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + tmp[i] = RSIMD_vXf__(min)(a[i], b[i]); + return RSIMD_soaXfY__(set__)(dst, tmp); +} + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(sel) + (RSIMD_vXf_T__* dst, + const RSIMD_vXf_T__* vfalse, + const RSIMD_vXf_T__* vtrue, + const RSIMD_vXf_T__ cond) +{ + RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; + int i; + ASSERT(dst && vfalse && vtrue); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + tmp[i] = RSIMD_vXf__(sel)(vfalse[i], vtrue[i], cond); + return RSIMD_soaXfY__(set__)(dst, tmp); +} + +static FINLINE RSIMD_vXf_T__* +RSIMD_soaXfY__(selv) + (RSIMD_vXf_T__* dst, + const RSIMD_vXf_T__* vfalse, + const RSIMD_vXf_T__* vtrue, + const RSIMD_vXf_T__* vcond) +{ + RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; + int i; + ASSERT(dst && vfalse && vtrue); + FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) + tmp[i] = RSIMD_vXf__(sel)(vfalse[i], vtrue[i], vcond[i]); + return RSIMD_soaXfY__(set__)(dst, tmp); +} + +/* Restore compilation parameters */ +#ifdef COMPILER_GCC + #pragma GCC pop_options +#endif + +/* Undef helper macros */ +#undef RSIMD_vXf__ +#undef RSIMD_vXf_T__ +#undef RSIMD_soaXfY_PREFIX__ +#undef RSIMD_soaXfY__ +#undef SIZEOF_RSIMD_soaXfY__ + +/* Undef parameters */ +#undef RSIMD_SOA_DIMENSION__ +#undef RSIMD_WIDTH__ +