aosf33.h (8252B)
1 /* Copyright (C) 2014-2019, 2021, 2023, 2025 Vincent Forest (vaplv@free.fr) 2 * 3 * The RSIMD library is free software: you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License as published 5 * by the Free Software Foundation, either version 3 of the License, or 6 * (at your option) any later version. 7 * 8 * The RSIMD library is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ 15 16 #ifndef AOSF33_H 17 #define AOSF33_H 18 19 #include "rsimd.h" 20 #include <math.h> 21 22 /* 23 * Functions on column major AoS float33 matrices. A 3x3 matrix is a set of 3 24 * 4-wide SIMD float vectors, each representing a matrix column. Actually the 25 * fourth component of each vector is ignored and its value is thus undefined. 26 */ 27 28 /******************************************************************************* 29 * Set operations 30 ******************************************************************************/ 31 static FINLINE float* 32 aosf33_store(float res[9]/* Column major */, const v4f_T m[3]) 33 { 34 ALIGN(16) float tmp[4]; 35 int i; 36 ASSERT(res && m); 37 FOR_EACH(i, 0, 3) { 38 v4f_store(tmp, m[i]); 39 res[i*3 + 0] = tmp[0]; 40 res[i*3 + 1] = tmp[1]; 41 res[i*3 + 2] = tmp[2]; 42 } 43 return res; 44 } 45 46 static FINLINE v4f_T* 47 aosf33_load(v4f_T res[3], const float m[9]/* Column major */) 48 { 49 int i; 50 ASSERT(res && m); 51 FOR_EACH(i, 0, 3) 52 res[i] = v4f_set(m[i*3+0], m[i*3+1], m[i*3+2], 0.f); 53 return res; 54 } 55 56 static FINLINE v4f_T* 57 aosf33_set(v4f_T m[3], const v4f_T c0, const v4f_T c1, const v4f_T c2) 58 { 59 ASSERT(m); 60 m[0] = c0; 61 m[1] = c1; 62 m[2] = c2; 63 return m; 64 } 65 66 static FINLINE v4f_T* 67 aosf33_identity(v4f_T m[3]) 68 { 69 ASSERT(m); 70 m[0] = v4f_set(1.f, 0.f, 0.f, 0.f); 71 m[1] = v4f_set(0.f, 1.f, 0.f, 0.f); 72 m[2] = v4f_set(0.f, 0.f, 1.f, 0.f); 73 return m; 74 } 75 76 static FINLINE v4f_T* 77 aosf33_zero(v4f_T m[3]) 78 { 79 ASSERT(m); 80 m[0] = v4f_zero(); 81 m[1] = v4f_zero(); 82 m[2] = v4f_zero(); 83 return m; 84 } 85 86 static FINLINE v4f_T* 87 aosf33_set_row0(v4f_T m[3], const v4f_T v) 88 { 89 ASSERT(m); 90 m[0] = v4f_ayzw(m[0], v); 91 m[1] = v4f_ayzw(m[1], v4f_yyww(v)); 92 m[2] = v4f_ayzw(m[2], v4f_zwzw(v)); 93 return m; 94 } 95 96 static FINLINE v4f_T* 97 aosf33_set_row1(v4f_T m[3], const v4f_T v) 98 { 99 ASSERT(m); 100 m[0] = v4f_xbzw(m[0], v4f_xxyy(v)); 101 m[1] = v4f_xbzw(m[1], v); 102 m[2] = v4f_xbzw(m[2], v4f_zzww(v)); 103 return m; 104 } 105 106 static FINLINE v4f_T* 107 aosf33_set_row2(v4f_T m[3], const v4f_T v) 108 { 109 ASSERT(m); 110 m[0] = v4f_xyab(m[0], v4f_xyxy(v)); 111 m[1] = v4f_xyab(m[1], v4f_yyzz(v)); 112 m[2] = v4f_xyab(m[2], v4f_zzww(v)); 113 return m; 114 } 115 116 static FINLINE v4f_T* 117 aosf33_set_row(v4f_T m[3], const v4f_T v, const int id) 118 { 119 const v4f_T mask = v4f_mask(-(id==0), -(id==1), -(id==2), 0); 120 ASSERT(m && id >= 0 && id <= 2); 121 m[0] = v4f_sel(m[0], v4f_xxxx(v), mask); 122 m[1] = v4f_sel(m[1], v4f_yyyy(v), mask); 123 m[2] = v4f_sel(m[2], v4f_zzzz(v), mask); 124 return m; 125 } 126 127 static FINLINE v4f_T* 128 aosf33_set_col(v4f_T m[3], const v4f_T v, const int id) 129 { 130 ASSERT(m && id >= 0 && id <= 2); 131 m[id] = v; 132 return m; 133 } 134 135 /******************************************************************************* 136 * Arithmetic operations 137 ******************************************************************************/ 138 static FINLINE v4f_T* 139 aosf33_add(v4f_T res[3], const v4f_T m0[3], const v4f_T m1[3]) 140 { 141 ASSERT(res && m0 && m1); 142 res[0] = v4f_add(m0[0], m1[0]); 143 res[1] = v4f_add(m0[1], m1[1]); 144 res[2] = v4f_add(m0[2], m1[2]); 145 return res; 146 } 147 148 static FINLINE v4f_T* 149 aosf33_sub(v4f_T res[3], const v4f_T m0[3], const v4f_T m1[3]) 150 { 151 ASSERT(res && m0 && m1); 152 res[0] = v4f_sub(m0[0], m1[0]); 153 res[1] = v4f_sub(m0[1], m1[1]); 154 res[2] = v4f_sub(m0[2], m1[2]); 155 return res; 156 } 157 158 static FINLINE v4f_T* 159 aosf33_minus(v4f_T res[3], const v4f_T m[3]) 160 { 161 ASSERT(res && m); 162 res[0] = v4f_minus(m[0]); 163 res[1] = v4f_minus(m[1]); 164 res[2] = v4f_minus(m[2]); 165 return res; 166 } 167 168 static FINLINE v4f_T* 169 aosf33_abs(v4f_T res[3], const v4f_T m[3]) 170 { 171 ASSERT(res && m); 172 res[0] = v4f_abs(m[0]); 173 res[1] = v4f_abs(m[1]); 174 res[2] = v4f_abs(m[2]); 175 return res; 176 } 177 178 static FINLINE v4f_T* 179 aosf33_mul(v4f_T res[3], const v4f_T m[3], const v4f_T v) 180 { 181 ASSERT(res && m); 182 res[0] = v4f_mul(m[0], v); 183 res[1] = v4f_mul(m[1], v); 184 res[2] = v4f_mul(m[2], v); 185 return res; 186 } 187 188 static FINLINE v4f_T 189 aosf33_mulf3(const v4f_T m[3], const v4f_T v) 190 { 191 v4f_T r0, r1; 192 ASSERT(m); 193 r0 = v4f_mul(m[0], v4f_xxxx(v)); 194 r1 = v4f_madd(m[1], v4f_yyyy(v), r0); 195 return v4f_madd(m[2], v4f_zzzz(v), r1); 196 } 197 198 static FINLINE v4f_T 199 aosf3_mulf33(const v4f_T v, const v4f_T m[3]) 200 { 201 v4f_T xxxx, yyyy, zzzz, yyzz; 202 ASSERT(m); 203 xxxx = v4f_dot3(v, m[0]); 204 yyyy = v4f_dot3(v, m[1]); 205 zzzz = v4f_dot3(v, m[2]); 206 yyzz = v4f_xyab(yyyy, zzzz); 207 return v4f_ayzw(yyzz, xxxx); 208 } 209 210 static FINLINE v4f_T* 211 aosf33_mulf33(v4f_T res[3], const v4f_T a[3], const v4f_T b[3]) 212 { 213 v4f_T c0, c1, c2; 214 ASSERT(res && a && b); 215 c0 = aosf33_mulf3(a, b[0]); 216 c1 = aosf33_mulf3(a, b[1]); 217 c2 = aosf33_mulf3(a, b[2]); 218 res[0] = c0; 219 res[1] = c1; 220 res[2] = c2; 221 return res; 222 } 223 224 static FINLINE v4f_T* 225 aosf33_transpose(v4f_T res[3], const v4f_T m[3]) 226 { 227 v4f_T c0, c1, c2; 228 v4f_T x0x2y0y2, z0z2w0w2, z1z1y1y1; 229 ASSERT(res && m); 230 c0 = m[0]; 231 c1 = m[1]; 232 c2 = m[2]; 233 x0x2y0y2 = v4f_xayb(c0, c2); 234 z0z2w0w2 = v4f_zcwd(c0, c2); 235 z1z1y1y1 = v4f_zzyy(c1); 236 res[0] = v4f_xayb(x0x2y0y2, c1); 237 res[1] = v4f_zcwd(x0x2y0y2, z1z1y1y1); 238 res[2] = v4f_xayb(z0z2w0w2, z1z1y1y1); 239 return res; 240 } 241 242 static FINLINE v4f_T 243 aosf33_det(const v4f_T m[3]) 244 { 245 ASSERT(m); 246 return v4f_dot3(m[2], v4f_cross3(m[0], m[1])); 247 } 248 249 static FINLINE v4f_T /* Return the determinant */ 250 aosf33_invtrans(v4f_T res[3], const v4f_T m[3]) 251 { 252 v4f_T t[3], det, invdet; 253 ASSERT(res && m); 254 t[0] = v4f_cross3(m[1], m[2]); 255 t[1] = v4f_cross3(m[2], m[0]); 256 t[2] = v4f_cross3(m[0], m[1]); 257 det = v4f_dot3(t[2], m[2]); 258 invdet = v4f_rcp(det); 259 aosf33_mul(res, t, invdet); 260 return det; 261 } 262 263 static FINLINE v4f_T 264 aosf33_inverse(v4f_T res[3], const v4f_T m[3]) 265 { 266 v4f_T det; 267 ASSERT(res && m); 268 det = aosf33_invtrans(res, m); 269 aosf33_transpose(res, res); 270 return det; 271 } 272 273 /******************************************************************************* 274 * Get operations 275 ******************************************************************************/ 276 static FINLINE v4f_T 277 aosf33_row0(const v4f_T m[3]) 278 { 279 ASSERT(m); 280 return v4f_ayzw(v4f_xyab(v4f_xxzz(m[1]), v4f_xxzz(m[2])), m[0]); 281 } 282 283 static FINLINE v4f_T 284 aosf33_row1(const v4f_T m[3]) 285 { 286 ASSERT(m); 287 return v4f_ayzw(v4f_xyab(v4f_yyww(m[1]), v4f_yyww(m[2])), v4f_yyww(m[0])); 288 } 289 290 static FINLINE v4f_T 291 aosf33_row2(const v4f_T m[3]) 292 { 293 ASSERT(m); 294 return v4f_ayzw(v4f_xyab(v4f_zzww(m[1]), v4f_zzww(m[2])), v4f_zzww(m[0])); 295 } 296 297 static FINLINE v4f_T 298 aosf33_row(const v4f_T m[3], int id) 299 { 300 v4f_T t[3]; 301 ASSERT(m && id >= 0 && id <= 2); 302 aosf33_transpose(t, m); 303 return t[id]; 304 } 305 306 static FINLINE v4f_T 307 aosf33_col(const v4f_T m[3], int id) 308 { 309 ASSERT(m && id >= 0 && id <= 2); 310 return m[id]; 311 } 312 313 /******************************************************************************* 314 * Build functions 315 ******************************************************************************/ 316 static FINLINE v4f_T* /* XYZ norm */ 317 aosf33_rotation(v4f_T res[3], float pitch, float yaw, float roll) 318 { 319 float c1, c2, c3, s1, s2, s3; 320 ASSERT(res); 321 c1 = (float)cos(pitch); 322 c2 = (float)cos(yaw); 323 c3 = (float)cos(roll); 324 s1 = (float)sin(pitch); 325 s2 = (float)sin(yaw); 326 s3 = (float)sin(roll); 327 res[0] = v4f_set(c2*c3, c1*s3 + c3*s1*s2, s1*s3 - c1*c3*s2, 0.f); 328 res[1] = v4f_set(-c2*s3, c1*c3 - s1*s2*s3, c1*s2*s3 + c3*s1, 0.f); 329 res[2] = v4f_set(s2, -c2*s1, c1*c2, 0.f); 330 return res; 331 } 332 333 static FINLINE v4f_T* /* rotation around the Y axis */ 334 aosf33_yaw_rotation(v4f_T res[3], float yaw) 335 { 336 float c, s; 337 ASSERT(res); 338 c = (float)cos(yaw); 339 s = (float)sin(yaw); 340 res[0] = v4f_set(c, 0.f, -s, 0.f); 341 res[1] = v4f_set(0.f, 1.f, 0.f, 0.f); 342 res[2] = v4f_set(s, 0.f, c, 0.f); 343 return res; 344 } 345 346 #endif /* AOSF33_H */ 347