rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

test_aosf44.c (11325B)


      1 /* Copyright (C) 2014-2019, 2021, 2023, 2025 Vincent Forest (vaplv@free.fr)
      2  *
      3  * The RSIMD library is free software: you can redistribute it and/or modify
      4  * it under the terms of the GNU General Public License as published
      5  * by the Free Software Foundation, either version 3 of the License, or
      6  * (at your option) any later version.
      7  *
      8  * The RSIMD library is distributed in the hope that it will be useful,
      9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     11  * GNU General Public License for more details.
     12  *
     13  * You should have received a copy of the GNU General Public License
     14  * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
     15 
     16 #include "aosf44.h"
     17 #include <rsys/float44.h>
     18 
     19 #define AOSF44_EQ_EPS(Mat, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Eps)\
     20   {                                                                            \
     21     float a[16], b[16];                                                        \
     22     b[0] = (A); b[1] = (B); b[2] = (C); b[3] = (D);                            \
     23     b[4] = (E); b[5] = (F); b[6] = (G); b[7] = (H);                            \
     24     b[8] = (I); b[9] = (J); b[10]= (K); b[11]= (L);                            \
     25     b[12]= (M); b[13]= (N); b[14]= (O); b[15]= (P);                            \
     26     CHK(f44_eq_eps(aosf44_store(a, (Mat)), b, Eps) == 1);                      \
     27   } (void)0
     28 #define AOSF44_EQ(Mat, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P)         \
     29   AOSF44_EQ_EPS(Mat, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, 0.f)
     30 
     31 int
     32 main(int argc, char** argv)
     33 {
     34   v4f_T m[4], n[4], o[4], v;
     35   ALIGN(16) float tmp[16];
     36   (void)argc, (void)argv;
     37 
     38   CHK(aosf44_set(m,
     39     v4f_set(0.f, 1.f, 2.f, 3.f),
     40     v4f_set(4.f, 5.f, 6.f, 7.f),
     41     v4f_set(8.f, 9.f, 10.f, 11.f),
     42     v4f_set(12.f, 13.f, 14.f, 15.f)) == m);
     43   AOSF44_EQ(m,
     44     0.f, 1.f, 2.f, 3.f,
     45     4.f, 5.f, 6.f, 7.f,
     46     8.f, 9.f, 10.f, 11.f,
     47     12.f, 13.f, 14.f, 15.f);
     48 
     49   CHK(aosf44_store(tmp, m) == tmp);
     50   CHK(tmp[0] == 0.f);
     51   CHK(tmp[1] == 1.f);
     52   CHK(tmp[2] == 2.f);
     53   CHK(tmp[3] == 3.f);
     54   CHK(tmp[4] == 4.f);
     55   CHK(tmp[5] == 5.f);
     56   CHK(tmp[6] == 6.f);
     57   CHK(tmp[7] == 7.f);
     58   CHK(tmp[8] == 8.f);
     59   CHK(tmp[9] == 9.f);
     60   CHK(tmp[10] == 10.f);
     61   CHK(tmp[11] == 11.f);
     62   CHK(tmp[12] == 12.f);
     63   CHK(tmp[13] == 13.f);
     64   CHK(tmp[14] == 14.f);
     65   CHK(tmp[15] == 15.f);
     66 
     67   tmp[0] = 0.f; tmp[1] = 2.f; tmp[2] = 4.f; tmp[3] = 6.f;
     68   tmp[4] = 8.f; tmp[5] = 10.f; tmp[6] = 12.f; tmp[7] = 14.f;
     69   tmp[8] = 16.f; tmp[9] = 18.f; tmp[10] = 20.f; tmp[11] = 22.f;
     70   tmp[12] = 24.f; tmp[13] = 26.f; tmp[14] = 28.f; tmp[15] = 30.f;
     71   CHK(aosf44_load(m, tmp) == m);
     72   AOSF44_EQ(m,
     73     0.f, 2.f, 4.f, 6.f,
     74     8.f, 10.f, 12.f, 14.f,
     75     16.f, 18.f, 20.f, 22.f,
     76     24.f, 26.f, 28.f, 30.f);
     77 
     78   CHK(aosf44_identity(m) == m);
     79   AOSF44_EQ(m,
     80     1.f, 0.f, 0.f, 0.f,
     81     0.f, 1.f, 0.f, 0.f,
     82     0.f, 0.f, 1.f, 0.f,
     83     0.f, 0.f, 0.f, 1.f);
     84 
     85   CHK(aosf44_zero(m) == m);
     86   AOSF44_EQ(m,
     87     0.f, 0.f, 0.f, 0.f,
     88     0.f, 0.f, 0.f, 0.f,
     89     0.f, 0.f, 0.f, 0.f,
     90     0.f, 0.f, 0.f, 0.f);
     91 
     92   CHK(aosf44_set_row0(m, v4f_set(0.f, 1.f, 2.f, 3.f)) == m);
     93   AOSF44_EQ(m,
     94     0.f, 0.f, 0.f, 0.f,
     95     1.f, 0.f, 0.f, 0.f,
     96     2.f, 0.f, 0.f, 0.f,
     97     3.f, 0.f, 0.f, 0.f);
     98   CHK(aosf44_set_row1(m, v4f_set(4.f, 5.f, 6.f, 7.f)) == m);
     99   AOSF44_EQ(m,
    100     0.f, 4.f, 0.f, 0.f,
    101     1.f, 5.f, 0.f, 0.f,
    102     2.f, 6.f, 0.f, 0.f,
    103     3.f, 7.f, 0.f, 0.f);
    104   CHK(aosf44_set_row2(m, v4f_set(8.f, 9.f, 10.f, 11.f)) == m);
    105   AOSF44_EQ(m,
    106     0.f, 4.f, 8.f, 0.f,
    107     1.f, 5.f, 9.f, 0.f,
    108     2.f, 6.f, 10.f, 0.f,
    109     3.f, 7.f, 11.f, 0.f);
    110   CHK(aosf44_set_row3(m, v4f_set(12.f, 13.f, 14.f, 15.f)) == m);
    111   AOSF44_EQ(m,
    112     0.f, 4.f, 8.f, 12.f,
    113     1.f, 5.f, 9.f, 13.f,
    114     2.f, 6.f, 10.f, 14.f,
    115     3.f, 7.f, 11.f, 15.f);
    116 
    117   CHK(aosf44_zero(m) == m);
    118   CHK(aosf44_set_row(m, v4f_set(0.f, 1.f, 2.f, 3.f), 0) == m);
    119   AOSF44_EQ(m,
    120     0.f, 0.f, 0.f, 0.f,
    121     1.f, 0.f, 0.f, 0.f,
    122     2.f, 0.f, 0.f, 0.f,
    123     3.f, 0.f, 0.f, 0.f);
    124   CHK(aosf44_set_row(m, v4f_set(4.f, 5.f, 6.f, 7.f), 1) == m);
    125   AOSF44_EQ(m,
    126     0.f, 4.f, 0.f, 0.f,
    127     1.f, 5.f, 0.f, 0.f,
    128     2.f, 6.f, 0.f, 0.f,
    129     3.f, 7.f, 0.f, 0.f);
    130   CHK(aosf44_set_row(m, v4f_set(8.f, 9.f, 10.f, 11.f), 2) == m);
    131   AOSF44_EQ(m,
    132     0.f, 4.f, 8.f, 0.f,
    133     1.f, 5.f, 9.f, 0.f,
    134     2.f, 6.f, 10.f, 0.f,
    135     3.f, 7.f, 11.f, 0.f);
    136   CHK(aosf44_set_row(m, v4f_set(12.f, 13.f, 14.f, 15.f), 3) == m);
    137   AOSF44_EQ(m,
    138     0.f, 4.f, 8.f, 12.f,
    139     1.f, 5.f, 9.f, 13.f,
    140     2.f, 6.f, 10.f, 14.f,
    141     3.f, 7.f, 11.f, 15.f);
    142 
    143   CHK(aosf44_zero(m) == m);
    144   CHK(aosf44_set_col(m, v4f_set(0.f, 1.f, 2.f, 3.f), 0) == m);
    145   AOSF44_EQ(m,
    146     0.f, 1.f, 2.f, 3.f,
    147     0.f, 0.f, 0.f, 0.f,
    148     0.f, 0.f, 0.f, 0.f,
    149     0.f, 0.f, 0.f, 0.f);
    150   CHK(aosf44_set_col(m, v4f_set(4.f, 5.f, 6.f, 7.f), 1) == m);
    151   AOSF44_EQ(m,
    152     0.f, 1.f, 2.f, 3.f,
    153     4.f, 5.f, 6.f, 7.f,
    154     0.f, 0.f, 0.f, 0.f,
    155     0.f, 0.f, 0.f, 0.f);
    156   CHK(aosf44_set_col(m, v4f_set(8.f, 9.f, 10.f, 11.f), 2) == m);
    157   AOSF44_EQ(m,
    158     0.f, 1.f, 2.f, 3.f,
    159     4.f, 5.f, 6.f, 7.f,
    160     8.f, 9.f, 10.f, 11.f,
    161     0.f, 0.f, 0.f, 0.f);
    162   CHK(aosf44_set_col(m, v4f_set(12.f, 13.f, 14.f, 15.f), 3) == m);
    163   AOSF44_EQ(m,
    164     0.f, 1.f, 2.f, 3.f,
    165     4.f, 5.f, 6.f, 7.f,
    166     8.f, 9.f, 10.f, 11.f,
    167     12.f, 13.f, 14.f, 15.f);
    168 
    169   v = aosf44_row0(m);
    170   CHK(v4f_x(v) == 0.f);
    171   CHK(v4f_y(v) == 4.f);
    172   CHK(v4f_z(v) == 8.f);
    173   CHK(v4f_w(v) == 12.f);
    174 
    175   v = aosf44_row1(m);
    176   CHK(v4f_x(v) == 1.f);
    177   CHK(v4f_y(v) == 5.f);
    178   CHK(v4f_z(v) == 9.f);
    179   CHK(v4f_w(v) == 13.f);
    180 
    181   v = aosf44_row2(m);
    182   CHK(v4f_x(v) == 2.f);
    183   CHK(v4f_y(v) == 6.f);
    184   CHK(v4f_z(v) == 10.f);
    185   CHK(v4f_w(v) == 14.f);
    186 
    187   v = aosf44_row3(m);
    188   CHK(v4f_x(v) == 3.f);
    189   CHK(v4f_y(v) == 7.f);
    190   CHK(v4f_z(v) == 11.f);
    191   CHK(v4f_w(v) == 15.f);
    192 
    193   v = aosf44_row(m, 0);
    194   CHK(v4f_x(v) == 0.f);
    195   CHK(v4f_y(v) == 4.f);
    196   CHK(v4f_z(v) == 8.f);
    197   CHK(v4f_w(v) == 12.f);
    198 
    199   v = aosf44_row(m, 1);
    200   CHK(v4f_x(v) == 1.f);
    201   CHK(v4f_y(v) == 5.f);
    202   CHK(v4f_z(v) == 9.f);
    203   CHK(v4f_w(v) == 13.f);
    204 
    205   v = aosf44_row(m, 2);
    206   CHK(v4f_x(v) == 2.f);
    207   CHK(v4f_y(v) == 6.f);
    208   CHK(v4f_z(v) == 10.f);
    209   CHK(v4f_w(v) == 14.f);
    210 
    211   v = aosf44_row(m, 3);
    212   CHK(v4f_x(v) == 3.f);
    213   CHK(v4f_y(v) == 7.f);
    214   CHK(v4f_z(v) == 11.f);
    215   CHK(v4f_w(v) == 15.f);
    216 
    217   v = aosf44_col(m, 0);
    218   CHK(v4f_x(v) == 0.f);
    219   CHK(v4f_y(v) == 1.f);
    220   CHK(v4f_z(v) == 2.f);
    221   CHK(v4f_w(v) == 3.f);
    222 
    223   v = aosf44_col(m, 1);
    224   CHK(v4f_x(v) == 4.f);
    225   CHK(v4f_y(v) == 5.f);
    226   CHK(v4f_z(v) == 6.f);
    227   CHK(v4f_w(v) == 7.f);
    228 
    229   v = aosf44_col(m, 2);
    230   CHK(v4f_x(v) == 8.f);
    231   CHK(v4f_y(v) == 9.f);
    232   CHK(v4f_z(v) == 10.f);
    233   CHK(v4f_w(v) == 11.f);
    234 
    235   v = aosf44_col(m, 3);
    236   CHK(v4f_x(v) == 12.f);
    237   CHK(v4f_y(v) == 13.f);
    238   CHK(v4f_z(v) == 14.f);
    239   CHK(v4f_w(v) == 15.f);
    240 
    241   CHK(aosf44_set(m,
    242     v4f_set(0.f, 1.f, 2.f, 3.f),
    243     v4f_set(4.f, 5.f, 6.f, 7.f),
    244     v4f_set(8.f, 9.f, 10.f, 11.f),
    245     v4f_set(12.f, 13.f, 14.f, 15.f)) == m);
    246   CHK(aosf44_set(n,
    247     v4f_set(0.f, 2.f, 1.f, 3.f),
    248     v4f_set(1.f, -2.f, -1.f, -3.f),
    249     v4f_set(1.f, 0.f, 0.f, 2.f),
    250     v4f_set(3.f, 2.f, 1.f, 0.f)) == n);
    251   CHK(aosf44_add(o, m, n) == o);
    252   AOSF44_EQ(o,
    253     0.f, 3.f, 3.f, 6.f,
    254     5.f, 3.f, 5.f, 4.f,
    255     9.f, 9.f, 10.f, 13.f,
    256     15.f, 15.f, 15.f, 15.f);
    257 
    258   CHK(aosf44_sub(o, m, n) == o);
    259   AOSF44_EQ(o,
    260     0.f, -1.f, 1.f, 0.f,
    261     3.f, 7.f, 7.f, 10.f,
    262     7.f, 9.f, 10.f, 9.f,
    263     9.f, 11.f, 13.f, 15.f);
    264 
    265   CHK(aosf44_minus(o, n) == o);
    266   AOSF44_EQ(o,
    267     0.f, -2.f, -1.f, -3.f,
    268     -1.f, 2.f, 1.f, 3.f,
    269     -1.f, 0.f, 0.f, -2.f,
    270     -3.f, -2.f, -1.f, 0.f);
    271 
    272   CHK(aosf44_abs(o, o) == o);
    273   AOSF44_EQ(o,
    274     0.f, 2.f, 1.f, 3.f,
    275     1.f, 2.f, 1.f, 3.f,
    276     1.f, 0.f, 0.f, 2.f,
    277     3.f, 2.f, 1.f, 0.f);
    278 
    279   CHK(aosf44_mul(o, n, v4f_set(1.f, 2.f, 3.f, 2.f)) == o);
    280   AOSF44_EQ(o,
    281     0.f, 4.f, 3.f, 6.f,
    282     1.f, -4.f, -3.f, -6.f,
    283     1.f, 0.f, 0.f, 4.f,
    284     3.f, 4.f, 3.f, 0.f);
    285 
    286   aosf44_set(m,
    287     v4f_set(0.f, 1.f, 2.f, 3.f),
    288     v4f_set(4.f, 5.f, 6.f, 7.f),
    289     v4f_set(8.f, 9.f, 10.f, 11.f),
    290     v4f_set(12.f, 13.f, 14.f, 15.f));
    291   v = aosf44_mulf4(m, v4f_set(1.f, 2.f, 3.f, 1.f));
    292   CHK(v4f_x(v) == 44.f);
    293   CHK(v4f_y(v) == 51.f);
    294   CHK(v4f_z(v) == 58.f);
    295   CHK(v4f_w(v) == 65.f);
    296 
    297   v = aosf4_mulf44(v4f_set(1.f, 2.f, 3.f, 1.f), m);
    298   CHK(v4f_x(v) == 11.f);
    299   CHK(v4f_y(v) == 39.f);
    300   CHK(v4f_z(v) == 67.f);
    301   CHK(v4f_w(v) == 95.f);
    302 
    303   aosf44_set(m,
    304     v4f_set(1.f, 2.f, 3.f, 4.f),
    305     v4f_set(4.f, 5.f, 6.f, 7.f),
    306     v4f_set(7.f, 8.f, 9.f, 10.f),
    307     v4f_set(10.f, 11.f, 12.f, 13.f));
    308   aosf44_set(n,
    309     v4f_set(2.f, 9.f, 8.f, 1.f),
    310     v4f_set(1.f, -2.f, 2.f, 1.f),
    311     v4f_set(1.f, -8.f, -4.f, 2.f),
    312     v4f_set(1.f, 3.f, 4.f, 2.f));
    313   CHK(aosf44_mulf44(o, m, n) == o);
    314   AOSF44_EQ(o,
    315     104.f, 124.f, 144.f, 164.f,
    316     17.f, 19.f, 21.f, 23.f,
    317     -39.f, -48.f, -57.f, -66.f,
    318     61.f, 71.f, 81.f, 91.f);
    319 
    320   CHK(aosf44_transpose(o, n) == o);
    321   AOSF44_EQ(o,
    322     2.f, 1.f, 1.f, 1.f,
    323     9.f, -2.f, -8.f, 3.f,
    324     8.f, 2.f, -4.f, 4.f,
    325     1.f, 1.f, 2.f, 2.f);
    326 
    327   v = aosf44_det(n);
    328   CHK(v4f_x(v) == 78.f);
    329   CHK(v4f_y(v) == 78.f);
    330   CHK(v4f_z(v) == 78.f);
    331   CHK(v4f_w(v) == 78.f);
    332 
    333   v = aosf44_inverse(m, n);
    334   CHK(v4f_x(v) == 78.f);
    335   CHK(v4f_y(v) == 78.f);
    336   CHK(v4f_z(v) == 78.f);
    337   CHK(v4f_w(v) == 78.f);
    338   CHK(aosf44_mulf44(o, m, n) == o);
    339   AOSF44_EQ_EPS(o,
    340     1.f, 0.f, 0.f, 0.f,
    341     0.f, 1.f, 0.f, 0.f,
    342     0.f, 0.f, 1.f, 0.f,
    343     0.f, 0.f, 0.f, 1.f,
    344     1.e-6f);
    345 
    346   v = aosf44_invtrans(o, n);
    347   CHK(v4f_x(v) == 78.f);
    348   CHK(v4f_y(v) == 78.f);
    349   CHK(v4f_z(v) == 78.f);
    350   CHK(v4f_w(v) == 78.f);
    351   AOSF44_EQ(o,
    352     v4f_x(m[0]), v4f_x(m[1]), v4f_x(m[2]), v4f_x(m[3]),
    353     v4f_y(m[0]), v4f_y(m[1]), v4f_y(m[2]), v4f_y(m[3]),
    354     v4f_z(m[0]), v4f_z(m[1]), v4f_z(m[2]), v4f_z(m[3]),
    355     v4f_w(m[0]), v4f_w(m[1]), v4f_w(m[2]), v4f_w(m[3]));
    356 
    357   aosf44_set(m,
    358     v4f_set(0.f, 1.f, 2.f, 3.f),
    359     v4f_set(5.f, 5.f, 6.f, 7.f),
    360     v4f_set(8.f, 9.f, 10.f, 11.f),
    361     v4f_set(12.f, 13.f, 14.f, 15.f));
    362   aosf44_set(n,
    363      v4f_set(0.f, 1.f, 2.f, 3.f),
    364      v4f_set(5.f, 5.f, 6.f, 7.f),
    365      v4f_set(8.f, 9.f, 10.f, 11.f),
    366      v4f_set(12.f, 13.f, 14.f, 15.f));
    367 
    368   v = aosf44_eq(m, n);
    369   CHK(v4f_mask_x(v) == ~0);
    370   CHK(v4f_mask_y(v) == ~0);
    371   CHK(v4f_mask_z(v) == ~0);
    372   CHK(v4f_mask_w(v) == ~0);
    373 
    374   n[0] = v4f_set(0.f, 1.0f, 2.f, 4.f);
    375   v = aosf44_eq(m, n);
    376   CHK(v4f_mask_x(v) == 0);
    377   CHK(v4f_mask_y(v) == 0);
    378   CHK(v4f_mask_z(v) == 0);
    379   CHK(v4f_mask_w(v) == 0);
    380   n[0] = v4f_set(0.f, 1.0f, 2.f, 3.f);
    381 
    382   n[1] = v4f_set(4.f, 5.0f, 6.f, 7.f);
    383   v = aosf44_eq(m, n);
    384   CHK(v4f_mask_x(v) == 0);
    385   CHK(v4f_mask_y(v) == 0);
    386   CHK(v4f_mask_z(v) == 0);
    387   CHK(v4f_mask_w(v) == 0);
    388   n[1] = v4f_set(5.f, 5.0f, 6.f, 7.f);
    389 
    390   m[2] = v4f_set(8.f, -9.0f, 10.f, 11.f);
    391   v = aosf44_eq(m, n);
    392   CHK(v4f_mask_x(v) == 0);
    393   CHK(v4f_mask_y(v) == 0);
    394   CHK(v4f_mask_z(v) == 0);
    395   CHK(v4f_mask_w(v) == 0);
    396   m[2] = v4f_set(8.f, 9.0f, 10.f, 11.f);
    397 
    398   n[3] = v4f_set(12.f, 13.1f, 14.f, 15.f);
    399   v = aosf44_eq(m, n);
    400   CHK(v4f_mask_x(v) == 0);
    401   CHK(v4f_mask_y(v) == 0);
    402   CHK(v4f_mask_z(v) == 0);
    403   CHK(v4f_mask_w(v) == 0);
    404 
    405   v = aosf44_eq(m, m);
    406   CHK(v4f_mask_x(v) == ~0);
    407   CHK(v4f_mask_y(v) == ~0);
    408   CHK(v4f_mask_z(v) == ~0);
    409   CHK(v4f_mask_w(v) == ~0);
    410   n[3] = v4f_set(12.f, 13.0f, 14.f, 15.f);
    411 
    412   v = aosf44_eq(m, n);
    413   CHK(v4f_mask_x(v) == ~0);
    414   CHK(v4f_mask_y(v) == ~0);
    415   CHK(v4f_mask_z(v) == ~0);
    416   CHK(v4f_mask_w(v) == ~0);
    417   return 0;
    418 }
    419 
    420