soaXfY.h (9318B)
1 /* Copyright (C) 2014-2019, 2021, 2023, 2025 Vincent Forest (vaplv@free.fr) 2 * 3 * The RSIMD library is free software: you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License as published 5 * by the Free Software Foundation, either version 3 of the License, or 6 * (at your option) any later version. 7 * 8 * The RSIMD library is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ 15 16 /* 17 * Header used to generate funcs on SoA SIMD float vectors of Y dimensions 18 */ 19 20 #ifndef SOAXFY_BEGIN_H 21 #error "The soaXfY_begin.h header must be included first" 22 #endif 23 24 /* Force GCC to unroll the loops */ 25 #ifdef COMPILER_GCC 26 #pragma GCC push_options 27 #pragma GCC optimize("unroll-loops") 28 #endif 29 30 #if RSIMD_SOA_DIMENSION__ <= 4 31 static FINLINE RSIMD_vXf_T__* 32 RSIMD_soaXfY_PREFIX__ 33 (RSIMD_vXf_T__* dst 34 ,const RSIMD_vXf_T__ x 35 ,const RSIMD_vXf_T__ y 36 #if RSIMD_SOA_DIMENSION__ > 2 37 ,const RSIMD_vXf_T__ z 38 #endif 39 #if RSIMD_SOA_DIMENSION__ > 3 40 ,const RSIMD_vXf_T__ w 41 #endif 42 ) 43 { 44 ASSERT(dst); 45 dst[0] = x; 46 dst[1] = y; 47 #if RSIMD_SOA_DIMENSION__ > 2 48 dst[2] = z; 49 #endif 50 #if RSIMD_SOA_DIMENSION__ > 3 51 dst[3] = w; 52 #endif 53 return dst; 54 } 55 #endif 56 57 static FINLINE RSIMD_vXf_T__* 58 RSIMD_soaXfY__(splat)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__ val) 59 { 60 int i; 61 ASSERT(dst); 62 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 63 dst[i] = val; 64 return dst; 65 } 66 67 static FINLINE RSIMD_vXf_T__* 68 RSIMD_soaXfY__(set__)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* src) 69 { 70 int i; 71 ASSERT(dst && src); 72 ASSERT(!MEM_AREA_OVERLAP(dst, SIZEOF_RSIMD_soaXfY__, src, SIZEOF_RSIMD_soaXfY__)); 73 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 74 dst[i] = src[i]; 75 return dst; 76 } 77 78 static FINLINE RSIMD_vXf_T__* 79 RSIMD_soaXfY__(set)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* src) 80 { 81 ASSERT(dst && src); 82 if(!MEM_AREA_OVERLAP(dst, SIZEOF_RSIMD_soaXfY__, src, SIZEOF_RSIMD_soaXfY__)) { 83 return RSIMD_soaXfY__(set__)(dst, src); 84 } else { 85 RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; 86 return RSIMD_soaXfY__(set__)(dst, RSIMD_soaXfY__(set__)(tmp, src)); 87 } 88 } 89 90 static FINLINE RSIMD_vXf_T__ 91 RSIMD_soaXfY__(dot)(const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b) 92 { 93 RSIMD_vXf_T__ dot; 94 int i; 95 ASSERT(a && b); 96 dot = RSIMD_vXf__(mul)(a[0], b[0]); 97 FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__) { 98 dot = RSIMD_vXf__(madd)(a[i], b[i], dot); 99 } 100 return dot; 101 } 102 103 static FINLINE RSIMD_vXf_T__ 104 RSIMD_soaXfY__(len)(const RSIMD_vXf_T__* a) 105 { 106 ASSERT(a); 107 return RSIMD_vXf__(sqrt)(RSIMD_soaXfY__(dot)(a, a)); 108 } 109 110 static FINLINE RSIMD_vXf_T__ 111 RSIMD_soaXfY__(normalize)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a) 112 { 113 RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; 114 RSIMD_vXf_T__ sqr_len, rcp_len; 115 RSIMD_vXf_T__ mask; 116 int i; 117 ASSERT(dst && a); 118 119 sqr_len = RSIMD_soaXfY__(dot)(a, a); 120 mask = RSIMD_vXf__(neq)(sqr_len, RSIMD_vXf__(zero)()); 121 rcp_len = RSIMD_vXf__(rsqrt)(sqr_len); 122 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 123 tmp[i] = RSIMD_vXf__(and)(mask, RSIMD_vXf__(mul)(a[i], rcp_len)); 124 RSIMD_soaXfY__(set__)(dst, tmp); 125 return RSIMD_vXf__(mul)(sqr_len, rcp_len); 126 } 127 128 static FINLINE RSIMD_vXf_T__ 129 RSIMD_soaXfY__(is_normalized)(const RSIMD_vXf_T__* a) 130 { 131 return RSIMD_vXf__(eq_eps) 132 (RSIMD_soaXfY__(len)(a), 133 RSIMD_vXf__(set1)(1.f), 134 RSIMD_vXf__(set1)(1.e-6f)); 135 } 136 137 static FINLINE RSIMD_vXf_T__* 138 RSIMD_soaXfY__(add) 139 (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b) 140 { 141 RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; 142 int i; 143 ASSERT(dst && a && b); 144 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 145 tmp[i] = RSIMD_vXf__(add)(a[i], b[i]); 146 return RSIMD_soaXfY__(set__)(dst, tmp); 147 } 148 149 static FINLINE RSIMD_vXf_T__* 150 RSIMD_soaXfY__(addf) 151 (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f) 152 { 153 RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; 154 int i; 155 ASSERT(dst && a); 156 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 157 tmp[i] = RSIMD_vXf__(add)(a[i], f); 158 return RSIMD_soaXfY__(set__)(dst, tmp); 159 } 160 161 static FINLINE RSIMD_vXf_T__* 162 RSIMD_soaXfY__(sub) 163 (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b) 164 { 165 RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; 166 int i; 167 ASSERT(dst && a && b); 168 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 169 tmp[i] = RSIMD_vXf__(sub)(a[i], b[i]); 170 return RSIMD_soaXfY__(set__)(dst, tmp); 171 } 172 173 static FINLINE RSIMD_vXf_T__* 174 RSIMD_soaXfY__(subf) 175 (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f) 176 { 177 RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; 178 int i; 179 ASSERT(dst && a); 180 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 181 tmp[i] = RSIMD_vXf__(sub)(a[i], f); 182 return RSIMD_soaXfY__(set__)(dst, tmp); 183 } 184 185 static FINLINE RSIMD_vXf_T__* 186 RSIMD_soaXfY__(mul) 187 (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b) 188 { 189 RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; 190 int i; 191 ASSERT(dst && a && b); 192 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 193 tmp[i] = RSIMD_vXf__(mul)(a[i], b[i]); 194 return RSIMD_soaXfY__(set__)(dst, tmp); 195 } 196 197 static FINLINE RSIMD_vXf_T__* 198 RSIMD_soaXfY__(mulf) 199 (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f) 200 { 201 RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; 202 int i; 203 ASSERT(dst && a); 204 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 205 tmp[i] = RSIMD_vXf__(mul)(a[i], f); 206 return RSIMD_soaXfY__(set__)(dst, tmp); 207 } 208 209 static FINLINE RSIMD_vXf_T__* 210 RSIMD_soaXfY__(div) 211 (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b) 212 { 213 RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; 214 int i; 215 ASSERT(dst && a && b); 216 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 217 tmp[i] = RSIMD_vXf__(div)(a[i], b[i]); 218 return RSIMD_soaXfY__(set__)(dst, tmp); 219 } 220 221 static FINLINE RSIMD_vXf_T__* 222 RSIMD_soaXfY__(divf) 223 (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f) 224 { 225 RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; 226 int i; 227 ASSERT(dst && a); 228 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 229 tmp[i] = RSIMD_vXf__(div)(a[i], f); 230 return RSIMD_soaXfY__(set__)(dst, tmp); 231 } 232 233 static FINLINE RSIMD_vXf_T__* 234 RSIMD_soaXfY__(minus)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a) 235 { 236 RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; 237 int i; 238 ASSERT(dst && a); 239 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 240 tmp[i] = RSIMD_vXf__(minus)(a[i]); 241 return RSIMD_soaXfY__(set__)(dst, tmp); 242 } 243 244 static FINLINE RSIMD_vXf_T__ 245 RSIMD_soaXfY__(sum)(const RSIMD_vXf_T__* a) 246 { 247 RSIMD_vXf_T__ f; 248 int i = 0; 249 ASSERT(a); 250 f = a[i]; 251 FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__) 252 f = RSIMD_vXf__(add)(f, a[i]); 253 return f; 254 } 255 256 static FINLINE RSIMD_vXf_T__* 257 RSIMD_soaXfY__(lerp) 258 (RSIMD_vXf_T__* dst, 259 const RSIMD_vXf_T__* from, 260 const RSIMD_vXf_T__* to, 261 const RSIMD_vXf_T__ t) 262 { 263 RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; 264 int i; 265 ASSERT(dst && from && to); 266 267 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 268 tmp[i] = RSIMD_vXf__(lerp)(from[i], to[i], t); 269 RSIMD_soaXfY__(set__)(dst, tmp); 270 return dst; 271 } 272 273 static FINLINE RSIMD_vXf_T__ 274 RSIMD_soaXfY__(eq)(const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b) 275 { 276 RSIMD_vXf_T__ is_eq; 277 int i = 0; 278 ASSERT(a && b); 279 is_eq = RSIMD_vXf__(eq)(a[0], b[0]); 280 FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__) 281 is_eq = RSIMD_vXf__(and)(is_eq, RSIMD_vXf__(eq)(a[i], b[i])); 282 return is_eq; 283 } 284 285 static FINLINE RSIMD_vXf_T__ 286 RSIMD_soaXfY__(eq_eps) 287 (const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b, const RSIMD_vXf_T__ eps) 288 { 289 RSIMD_vXf_T__ is_eq; 290 int i = 0; 291 ASSERT(a && b); 292 is_eq = RSIMD_vXf__(eq_eps)(a[0], b[0], eps); 293 FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__) 294 is_eq = RSIMD_vXf__(and)(is_eq, RSIMD_vXf__(eq_eps)(a[i], b[i], eps)); 295 return is_eq; 296 } 297 298 static FINLINE RSIMD_vXf_T__* 299 RSIMD_soaXfY__(max) 300 (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b) 301 { 302 RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; 303 int i; 304 ASSERT(dst && a && b); 305 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 306 tmp[i] = RSIMD_vXf__(max)(a[i], b[i]); 307 return RSIMD_soaXfY__(set__)(dst, tmp); 308 } 309 310 static FINLINE RSIMD_vXf_T__* 311 RSIMD_soaXfY__(min) 312 (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b) 313 { 314 RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; 315 int i; 316 ASSERT(dst && a && b); 317 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 318 tmp[i] = RSIMD_vXf__(min)(a[i], b[i]); 319 return RSIMD_soaXfY__(set__)(dst, tmp); 320 } 321 322 static FINLINE RSIMD_vXf_T__* 323 RSIMD_soaXfY__(sel) 324 (RSIMD_vXf_T__* dst, 325 const RSIMD_vXf_T__* vfalse, 326 const RSIMD_vXf_T__* vtrue, 327 const RSIMD_vXf_T__ cond) 328 { 329 RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; 330 int i; 331 ASSERT(dst && vfalse && vtrue); 332 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 333 tmp[i] = RSIMD_vXf__(sel)(vfalse[i], vtrue[i], cond); 334 return RSIMD_soaXfY__(set__)(dst, tmp); 335 } 336 337 static FINLINE RSIMD_vXf_T__* 338 RSIMD_soaXfY__(selv) 339 (RSIMD_vXf_T__* dst, 340 const RSIMD_vXf_T__* vfalse, 341 const RSIMD_vXf_T__* vtrue, 342 const RSIMD_vXf_T__* vcond) 343 { 344 RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__]; 345 int i; 346 ASSERT(dst && vfalse && vtrue); 347 FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__) 348 tmp[i] = RSIMD_vXf__(sel)(vfalse[i], vtrue[i], vcond[i]); 349 return RSIMD_soaXfY__(set__)(dst, tmp); 350 } 351 352 /* Restore compilation parameters */ 353 #ifdef COMPILER_GCC 354 #pragma GCC pop_options 355 #endif 356