rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

test_v8f.c (18332B)


      1 /* Copyright (C) 2014-2019, 2021, 2023, 2025 Vincent Forest (vaplv@free.fr)
      2  *
      3  * The RSIMD library is free software: you can redistribute it and/or modify
      4  * it under the terms of the GNU General Public License as published
      5  * by the Free Software Foundation, either version 3 of the License, or
      6  * (at your option) any later version.
      7  *
      8  * The RSIMD library is distributed in the hope that it will be useful,
      9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     11  * GNU General Public License for more details.
     12  *
     13  * You should have received a copy of the GNU General Public License
     14  * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
     15 
     16 #include "rsimd.h"
     17 
     18 int
     19 main(int argc, char** argv)
     20 {
     21   v8f_T i, j, k;
     22   ALIGN(32) union { int32_t i[8]; float f[8]; } cast;
     23   ALIGN(32) float tmp[9] = {0.f,1.f,2.f,3.f,4.f,5.f,6.f,7.f,8.f};
     24   (void)argc, (void)argv;
     25 
     26   i = v8f_loadu(tmp+1);
     27   CHK(v4f_x(v8f_abcd(i)) == 1.f);
     28   CHK(v4f_y(v8f_abcd(i)) == 2.f);
     29   CHK(v4f_z(v8f_abcd(i)) == 3.f);
     30   CHK(v4f_w(v8f_abcd(i)) == 4.f);
     31   CHK(v4f_x(v8f_efgh(i)) == 5.f);
     32   CHK(v4f_y(v8f_efgh(i)) == 6.f);
     33   CHK(v4f_z(v8f_efgh(i)) == 7.f);
     34   CHK(v4f_w(v8f_efgh(i)) == 8.f);
     35 
     36   i = v8f_load(tmp);
     37   CHK(v4f_x(v8f_abcd(i)) == 0.f);
     38   CHK(v4f_y(v8f_abcd(i)) == 1.f);
     39   CHK(v4f_z(v8f_abcd(i)) == 2.f);
     40   CHK(v4f_w(v8f_abcd(i)) == 3.f);
     41   CHK(v4f_x(v8f_efgh(i)) == 4.f);
     42   CHK(v4f_y(v8f_efgh(i)) == 5.f);
     43   CHK(v4f_z(v8f_efgh(i)) == 6.f);
     44   CHK(v4f_w(v8f_efgh(i)) == 7.f);
     45 
     46   tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0.f;
     47   tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0.f;
     48   CHK(v8f_store(tmp, i) == tmp);
     49   CHK(tmp[0] == 0.f);
     50   CHK(tmp[1] == 1.f);
     51   CHK(tmp[2] == 2.f);
     52   CHK(tmp[3] == 3.f);
     53   CHK(tmp[4] == 4.f);
     54   CHK(tmp[5] == 5.f);
     55   CHK(tmp[6] == 6.f);
     56   CHK(tmp[7] == 7.f);
     57   CHK(tmp[8] == 8.f);
     58 
     59   i = v8f_set1(-2.f);
     60   CHK(v4f_x(v8f_abcd(i)) == -2.f);
     61   CHK(v4f_y(v8f_abcd(i)) == -2.f);
     62   CHK(v4f_z(v8f_abcd(i)) == -2.f);
     63   CHK(v4f_w(v8f_abcd(i)) == -2.f);
     64   CHK(v4f_x(v8f_efgh(i)) == -2.f);
     65   CHK(v4f_y(v8f_efgh(i)) == -2.f);
     66   CHK(v4f_z(v8f_efgh(i)) == -2.f);
     67   CHK(v4f_w(v8f_efgh(i)) == -2.f);
     68 
     69   i = v8f_set(0.f,1.f,2.f,3.f,4.f,5.f,6.f,7.f);
     70   CHK(v4f_x(v8f_abcd(i)) == 0.f);
     71   CHK(v4f_y(v8f_abcd(i)) == 1.f);
     72   CHK(v4f_z(v8f_abcd(i)) == 2.f);
     73   CHK(v4f_w(v8f_abcd(i)) == 3.f);
     74   CHK(v4f_x(v8f_efgh(i)) == 4.f);
     75   CHK(v4f_y(v8f_efgh(i)) == 5.f);
     76   CHK(v4f_z(v8f_efgh(i)) == 6.f);
     77   CHK(v4f_w(v8f_efgh(i)) == 7.f);
     78 
     79   i = v8f_zero();
     80   CHK(v4f_x(v8f_abcd(i)) == 0.f);
     81   CHK(v4f_y(v8f_abcd(i)) == 0.f);
     82   CHK(v4f_z(v8f_abcd(i)) == 0.f);
     83   CHK(v4f_w(v8f_abcd(i)) == 0.f);
     84   CHK(v4f_x(v8f_efgh(i)) == 0.f);
     85   CHK(v4f_y(v8f_efgh(i)) == 0.f);
     86   CHK(v4f_z(v8f_efgh(i)) == 0.f);
     87   CHK(v4f_w(v8f_efgh(i)) == 0.f);
     88 
     89   i = v8f_mask(~0,~0,0,0,0,~0,~0,0);
     90   cast.f[0] = v4f_x(v8f_abcd(i)); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
     91   cast.f[1] = v4f_y(v8f_abcd(i)); CHK(cast.i[1] == (int32_t)0xFFFFFFFF);
     92   cast.f[2] = v4f_z(v8f_abcd(i)); CHK(cast.i[2] == (int32_t)0x00000000);
     93   cast.f[3] = v4f_w(v8f_abcd(i)); CHK(cast.i[3] == (int32_t)0x00000000);
     94   cast.f[4] = v4f_x(v8f_efgh(i)); CHK(cast.i[4] == (int32_t)0x00000000);
     95   cast.f[5] = v4f_y(v8f_efgh(i)); CHK(cast.i[5] == (int32_t)0xFFFFFFFF);
     96   cast.f[6] = v4f_z(v8f_efgh(i)); CHK(cast.i[6] == (int32_t)0xFFFFFFFF);
     97   cast.f[7] = v4f_w(v8f_efgh(i)); CHK(cast.i[7] == (int32_t)0x00000000);
     98 
     99   i = v8f_mask1(~0);
    100   cast.f[0] = v4f_x(v8f_abcd(i)); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
    101   cast.f[1] = v4f_y(v8f_abcd(i)); CHK(cast.i[1] == (int32_t)0xFFFFFFFF);
    102   cast.f[2] = v4f_z(v8f_abcd(i)); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
    103   cast.f[3] = v4f_w(v8f_abcd(i)); CHK(cast.i[3] == (int32_t)0xFFFFFFFF);
    104   cast.f[4] = v4f_x(v8f_efgh(i)); CHK(cast.i[4] == (int32_t)0xFFFFFFFF);
    105   cast.f[5] = v4f_y(v8f_efgh(i)); CHK(cast.i[5] == (int32_t)0xFFFFFFFF);
    106   cast.f[6] = v4f_z(v8f_efgh(i)); CHK(cast.i[6] == (int32_t)0xFFFFFFFF);
    107   cast.f[7] = v4f_w(v8f_efgh(i)); CHK(cast.i[7] == (int32_t)0xFFFFFFFF);
    108 
    109   i = v8f_true();
    110   cast.f[0] = v4f_x(v8f_abcd(i)); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
    111   cast.f[1] = v4f_y(v8f_abcd(i)); CHK(cast.i[1] == (int32_t)0xFFFFFFFF);
    112   cast.f[2] = v4f_z(v8f_abcd(i)); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
    113   cast.f[3] = v4f_w(v8f_abcd(i)); CHK(cast.i[3] == (int32_t)0xFFFFFFFF);
    114   cast.f[4] = v4f_x(v8f_efgh(i)); CHK(cast.i[4] == (int32_t)0xFFFFFFFF);
    115   cast.f[5] = v4f_y(v8f_efgh(i)); CHK(cast.i[5] == (int32_t)0xFFFFFFFF);
    116   cast.f[6] = v4f_z(v8f_efgh(i)); CHK(cast.i[6] == (int32_t)0xFFFFFFFF);
    117   cast.f[7] = v4f_w(v8f_efgh(i)); CHK(cast.i[7] == (int32_t)0xFFFFFFFF);
    118 
    119   i = v8f_false();
    120   cast.f[0] = v4f_x(v8f_abcd(i)); CHK(cast.i[0] == (int32_t)0x00000000);
    121   cast.f[1] = v4f_y(v8f_abcd(i)); CHK(cast.i[1] == (int32_t)0x00000000);
    122   cast.f[2] = v4f_z(v8f_abcd(i)); CHK(cast.i[2] == (int32_t)0x00000000);
    123   cast.f[3] = v4f_w(v8f_abcd(i)); CHK(cast.i[3] == (int32_t)0x00000000);
    124   cast.f[4] = v4f_x(v8f_efgh(i)); CHK(cast.i[4] == (int32_t)0x00000000);
    125   cast.f[5] = v4f_y(v8f_efgh(i)); CHK(cast.i[5] == (int32_t)0x00000000);
    126   cast.f[6] = v4f_z(v8f_efgh(i)); CHK(cast.i[6] == (int32_t)0x00000000);
    127   cast.f[7] = v4f_w(v8f_efgh(i)); CHK(cast.i[7] == (int32_t)0x00000000);
    128 
    129   i = v8f_mask(~0,~0,0,0,0,~0,~0,0);
    130   j = v8f_mask(~0,0,~0,0,0,~0,0,~0);
    131   k = v8f_or(i, j);
    132   cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
    133   cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] == (int32_t)0xFFFFFFFF);
    134   cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
    135   cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] == (int32_t)0x00000000);
    136   cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] == (int32_t)0x00000000);
    137   cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] == (int32_t)0xFFFFFFFF);
    138   cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] == (int32_t)0xFFFFFFFF);
    139   cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] == (int32_t)0xFFFFFFFF);
    140 
    141   k = v8f_and(i, j);
    142   cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
    143   cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] == (int32_t)0x00000000);
    144   cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] == (int32_t)0x00000000);
    145   cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] == (int32_t)0x00000000);
    146   cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] == (int32_t)0x00000000);
    147   cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] == (int32_t)0xFFFFFFFF);
    148   cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] == (int32_t)0x00000000);
    149   cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] == (int32_t)0x00000000);
    150 
    151   k = v8f_andnot(i, j);
    152   cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] == (int32_t)0x00000000);
    153   cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] == (int32_t)0x00000000);
    154   cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
    155   cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] == (int32_t)0x00000000);
    156   cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] == (int32_t)0x00000000);
    157   cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] == (int32_t)0x00000000);
    158   cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] == (int32_t)0x00000000);
    159   cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] == (int32_t)0xFFFFFFFF);
    160 
    161   k = v8f_xor(i, j);
    162   cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] == (int32_t)0x00000000);
    163   cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] == (int32_t)0xFFFFFFFF);
    164   cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
    165   cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] == (int32_t)0x00000000);
    166   cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] == (int32_t)0x00000000);
    167   cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] == (int32_t)0x00000000);
    168   cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] == (int32_t)0xFFFFFFFF);
    169   cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] == (int32_t)0xFFFFFFFF);
    170 
    171   CHK(v8f_movemask(k) == 0xC6);
    172   i = v8f_mask
    173     ((int32_t)0x01020401, (int32_t)0x80605040, (int32_t)0x7F1F2F3F, (int32_t)0,
    174      (int32_t)0xF0000000, (int32_t)0xFFFFFFFF, (int32_t)0x7FFFFFFF, (int32_t)~0);
    175   CHK(v8f_movemask(i) == 0xB2);
    176 
    177   i = v8f_set(0.f,1.f,2.f,3.f,4.f,5.f,6.f,7.f);
    178   j = v8f_set(8.f,9.f,10.f,11.f,12.f,13.f,14.f,15.f);
    179   k = v8f_sel(i, j, v8f_mask(~0,~0,0,0,0,~0,~0,0));
    180   CHK(v4f_x(v8f_abcd(k)) == 8.f);
    181   CHK(v4f_y(v8f_abcd(k)) == 9.f);
    182   CHK(v4f_z(v8f_abcd(k)) == 2.f);
    183   CHK(v4f_w(v8f_abcd(k)) == 3.f);
    184   CHK(v4f_x(v8f_efgh(k)) == 4.f);
    185   CHK(v4f_y(v8f_efgh(k)) == 13.f);
    186   CHK(v4f_z(v8f_efgh(k)) == 14.f);
    187   CHK(v4f_w(v8f_efgh(k)) == 7.f);
    188 
    189   k = v8f_minus(i);
    190   CHK(v4f_x(v8f_abcd(k)) == -0.f);
    191   CHK(v4f_y(v8f_abcd(k)) == -1.f);
    192   CHK(v4f_z(v8f_abcd(k)) == -2.f);
    193   CHK(v4f_w(v8f_abcd(k)) == -3.f);
    194   CHK(v4f_x(v8f_efgh(k)) == -4.f);
    195   CHK(v4f_y(v8f_efgh(k)) == -5.f);
    196   CHK(v4f_z(v8f_efgh(k)) == -6.f);
    197   CHK(v4f_w(v8f_efgh(k)) == -7.f);
    198 
    199   k = v8f_add(i, j);
    200   CHK(v4f_x(v8f_abcd(k)) == 8.f);
    201   CHK(v4f_y(v8f_abcd(k)) == 10.f);
    202   CHK(v4f_z(v8f_abcd(k)) == 12.f);
    203   CHK(v4f_w(v8f_abcd(k)) == 14.f);
    204   CHK(v4f_x(v8f_efgh(k)) == 16.f);
    205   CHK(v4f_y(v8f_efgh(k)) == 18.f);
    206   CHK(v4f_z(v8f_efgh(k)) == 20.f);
    207   CHK(v4f_w(v8f_efgh(k)) == 22.f);
    208 
    209   k = v8f_sub(i, j);
    210   CHK(v4f_x(v8f_abcd(k)) == -8.f);
    211   CHK(v4f_y(v8f_abcd(k)) == -8.f);
    212   CHK(v4f_z(v8f_abcd(k)) == -8.f);
    213   CHK(v4f_w(v8f_abcd(k)) == -8.f);
    214   CHK(v4f_x(v8f_efgh(k)) == -8.f);
    215   CHK(v4f_y(v8f_efgh(k)) == -8.f);
    216   CHK(v4f_z(v8f_efgh(k)) == -8.f);
    217   CHK(v4f_w(v8f_efgh(k)) == -8.f);
    218 
    219   k = v8f_mul(i, j);
    220   CHK(v4f_x(v8f_abcd(k)) == 0.f);
    221   CHK(v4f_y(v8f_abcd(k)) == 9.f);
    222   CHK(v4f_z(v8f_abcd(k)) == 20.f);
    223   CHK(v4f_w(v8f_abcd(k)) == 33.f);
    224   CHK(v4f_x(v8f_efgh(k)) == 48.f);
    225   CHK(v4f_y(v8f_efgh(k)) == 65.f);
    226   CHK(v4f_z(v8f_efgh(k)) == 84.f);
    227   CHK(v4f_w(v8f_efgh(k)) == 105.f);
    228 
    229   k = v8f_div(i, j);
    230   CHK(v4f_x(v8f_abcd(k)) == 0.f);
    231   CHK(v4f_y(v8f_abcd(k)) == 1.f/9.f);
    232   CHK(v4f_z(v8f_abcd(k)) == 0.2f);
    233   CHK(v4f_w(v8f_abcd(k)) == 3.f/11.f);
    234   CHK(v4f_x(v8f_efgh(k)) == 1.f/3.f);
    235   CHK(v4f_y(v8f_efgh(k)) == 5.f/13.f);
    236   CHK(v4f_z(v8f_efgh(k)) == 3.f/7.f);
    237   CHK(v4f_w(v8f_efgh(k)) == 7.f/15.f);
    238 
    239   k = v8f_set(0.1f,0.2f,0.3f,0.4f,0.5f,0.6f,0.7f,0.8f);
    240   k = v8f_madd(i, j, k);
    241   CHK(v4f_x(v8f_abcd(k)) == 0.1f);
    242   CHK(v4f_y(v8f_abcd(k)) == 9.2f);
    243   CHK(v4f_z(v8f_abcd(k)) == 20.3f);
    244   CHK(v4f_w(v8f_abcd(k)) == 33.4f);
    245   CHK(v4f_x(v8f_efgh(k)) == 48.5f);
    246   CHK(v4f_y(v8f_efgh(k)) == 65.6f);
    247   CHK(v4f_z(v8f_efgh(k)) == 84.7f);
    248   CHK(v4f_w(v8f_efgh(k)) == 105.8f);
    249 
    250   k = v8f_abs(v8f_minus(i));
    251   CHK(v4f_x(v8f_abcd(k)) == 0.f);
    252   CHK(v4f_y(v8f_abcd(k)) == 1.f);
    253   CHK(v4f_z(v8f_abcd(k)) == 2.f);
    254   CHK(v4f_w(v8f_abcd(k)) == 3.f);
    255   CHK(v4f_x(v8f_efgh(k)) == 4.f);
    256   CHK(v4f_y(v8f_efgh(k)) == 5.f);
    257   CHK(v4f_z(v8f_efgh(k)) == 6.f);
    258   CHK(v4f_w(v8f_efgh(k)) == 7.f);
    259 
    260   i = v8f_set(1.f, 4.f, 9.f, 16.f, 25.f, 36.f, 49.f, 64.f);
    261   k = v8f_sqrt(i);
    262   CHK(v4f_x(v8f_abcd(k)) == 1.f);
    263   CHK(v4f_y(v8f_abcd(k)) == 2.f);
    264   CHK(v4f_z(v8f_abcd(k)) == 3.f);
    265   CHK(v4f_w(v8f_abcd(k)) == 4.f);
    266   CHK(v4f_x(v8f_efgh(k)) == 5.f);
    267   CHK(v4f_y(v8f_efgh(k)) == 6.f);
    268   CHK(v4f_z(v8f_efgh(k)) == 7.f);
    269   CHK(v4f_w(v8f_efgh(k)) == 8.f);
    270 
    271   k = v8f_rsqrte(i);
    272   CHK(eq_epsf(v4f_x(v8f_abcd(k)), 1.f/1.f, 1.e-3f));
    273   CHK(eq_epsf(v4f_y(v8f_abcd(k)), 1.f/2.f, 1.e-3f));
    274   CHK(eq_epsf(v4f_z(v8f_abcd(k)), 1.f/3.f, 1.e-3f));
    275   CHK(eq_epsf(v4f_w(v8f_abcd(k)), 1.f/4.f, 1.e-3f));
    276   CHK(eq_epsf(v4f_x(v8f_efgh(k)), 1.f/5.f, 1.e-3f));
    277   CHK(eq_epsf(v4f_y(v8f_efgh(k)), 1.f/6.f, 1.e-3f));
    278   CHK(eq_epsf(v4f_z(v8f_efgh(k)), 1.f/7.f, 1.e-3f));
    279   CHK(eq_epsf(v4f_w(v8f_efgh(k)), 1.f/8.f, 1.e-3f));
    280 
    281   k = v8f_rsqrt(i);
    282   CHK(eq_epsf(v4f_x(v8f_abcd(k)), 1.f/1.f, 1.e-6f));
    283   CHK(eq_epsf(v4f_y(v8f_abcd(k)), 1.f/2.f, 1.e-6f));
    284   CHK(eq_epsf(v4f_z(v8f_abcd(k)), 1.f/3.f, 1.e-6f));
    285   CHK(eq_epsf(v4f_w(v8f_abcd(k)), 1.f/4.f, 1.e-6f));
    286   CHK(eq_epsf(v4f_x(v8f_efgh(k)), 1.f/5.f, 1.e-6f));
    287   CHK(eq_epsf(v4f_y(v8f_efgh(k)), 1.f/6.f, 1.e-6f));
    288   CHK(eq_epsf(v4f_z(v8f_efgh(k)), 1.f/7.f, 1.e-6f));
    289   CHK(eq_epsf(v4f_w(v8f_efgh(k)), 1.f/8.f, 1.e-6f));
    290 
    291   i = v8f_set(1.f,2.f,3.f,4.f,5.f,6.f,7.f,8.f);
    292   k = v8f_rcpe(i);
    293   CHK(eq_epsf(v4f_x(v8f_abcd(k)), 1.f/1.f, 1.e-3f));
    294   CHK(eq_epsf(v4f_y(v8f_abcd(k)), 1.f/2.f, 1.e-3f));
    295   CHK(eq_epsf(v4f_z(v8f_abcd(k)), 1.f/3.f, 1.e-3f));
    296   CHK(eq_epsf(v4f_w(v8f_abcd(k)), 1.f/4.f, 1.e-3f));
    297   CHK(eq_epsf(v4f_x(v8f_efgh(k)), 1.f/5.f, 1.e-3f));
    298   CHK(eq_epsf(v4f_y(v8f_efgh(k)), 1.f/6.f, 1.e-3f));
    299   CHK(eq_epsf(v4f_z(v8f_efgh(k)), 1.f/7.f, 1.e-3f));
    300   CHK(eq_epsf(v4f_w(v8f_efgh(k)), 1.f/8.f, 1.e-3f));
    301 
    302   k = v8f_rcp(i);
    303   CHK(eq_epsf(v4f_x(v8f_abcd(k)), 1.f/1.f, 1.e-6f));
    304   CHK(eq_epsf(v4f_y(v8f_abcd(k)), 1.f/2.f, 1.e-6f));
    305   CHK(eq_epsf(v4f_z(v8f_abcd(k)), 1.f/3.f, 1.e-6f));
    306   CHK(eq_epsf(v4f_w(v8f_abcd(k)), 1.f/4.f, 1.e-6f));
    307   CHK(eq_epsf(v4f_x(v8f_efgh(k)), 1.f/5.f, 1.e-6f));
    308   CHK(eq_epsf(v4f_y(v8f_efgh(k)), 1.f/6.f, 1.e-6f));
    309   CHK(eq_epsf(v4f_z(v8f_efgh(k)), 1.f/7.f, 1.e-6f));
    310   CHK(eq_epsf(v4f_w(v8f_efgh(k)), 1.f/8.f, 1.e-6f));
    311 
    312   j = v8f_set(2.f,3.f,4.f,5.f,6.f,7.f,8.f,9.f);
    313   k = v8f_lerp(i, j, v8f_set1(0.5f));
    314   CHK(v4f_x(v8f_abcd(k)) == 1.5f);
    315   CHK(v4f_y(v8f_abcd(k)) == 2.5f);
    316   CHK(v4f_z(v8f_abcd(k)) == 3.5f);
    317   CHK(v4f_w(v8f_abcd(k)) == 4.5f);
    318   CHK(v4f_x(v8f_efgh(k)) == 5.5f);
    319   CHK(v4f_y(v8f_efgh(k)) == 6.5f);
    320   CHK(v4f_z(v8f_efgh(k)) == 7.5f);
    321   CHK(v4f_w(v8f_efgh(k)) == 8.5f);
    322 
    323   i = v8f_set(0.f, 1.f,2.f,3.f, 4.f,5.f,6.f,7.f);
    324   j = v8f_set(0.f,-1.f,4.f,4.f,-2.f,6.f,6.f,8.f);
    325 
    326   k = v8f_eq(i, j);
    327   cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] ==~0);
    328   cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] == 0);
    329   cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] == 0);
    330   cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] == 0);
    331   cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] == 0);
    332   cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] == 0);
    333   cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] ==~0);
    334   cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] == 0);
    335 
    336   k = v8f_neq(i, j);
    337   cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] == 0);
    338   cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] ==~0);
    339   cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] ==~0);
    340   cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] ==~0);
    341   cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] ==~0);
    342   cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] ==~0);
    343   cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] == 0);
    344   cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] ==~0);
    345 
    346   k = v8f_ge(i, j);
    347   cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] ==~0);
    348   cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] ==~0);
    349   cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] == 0);
    350   cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] == 0);
    351   cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] ==~0);
    352   cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] == 0);
    353   cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] ==~0);
    354   cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] == 0);
    355 
    356   k = v8f_le(i, j);
    357   cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] ==~0);
    358   cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] == 0);
    359   cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] ==~0);
    360   cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] ==~0);
    361   cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] == 0);
    362   cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] ==~0);
    363   cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] ==~0);
    364   cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] ==~0);
    365 
    366   k = v8f_gt(i, j);
    367   cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] == 0);
    368   cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] ==~0);
    369   cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] == 0);
    370   cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] == 0);
    371   cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] ==~0);
    372   cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] == 0);
    373   cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] == 0);
    374   cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] == 0);
    375 
    376   k = v8f_lt(i, j);
    377   cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] == 0);
    378   cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] == 0);
    379   cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] ==~0);
    380   cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] ==~0);
    381   cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] == 0);
    382   cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] ==~0);
    383   cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] == 0);
    384   cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] ==~0);
    385 
    386   j = v8f_set(0.0001f, 0.99999f, 2.f, 3.1f, 4.001f, 5.0002f, 6.f, 6.999999f);
    387   k = v8f_eq_eps(i, j, v8f_set1(1.e-4f));
    388   cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] ==~0);
    389   cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] ==~0);
    390   cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] ==~0);
    391   cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] == 0);
    392   cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] == 0);
    393   cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] == 0);
    394   cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] ==~0);
    395   cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] ==~0);
    396 
    397   k = v8f_eq_eps(i, j, v8f_set(1.e-4f, 1.e-4f, 0.f, 0.1f, 1.e-3f, 2.e-4f, 0.f, 1.e-5f));
    398   cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] ==~0);
    399   cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] ==~0);
    400   cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] ==~0);
    401   cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] ==~0);
    402   cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] ==~0);
    403   cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] ==~0);
    404   cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] ==~0);
    405   cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] ==~0);
    406 
    407   i = v8f_set(0.f, 1.f,2.f,3.f, 4.f,5.f,6.f,7.f);
    408   j = v8f_set(0.f,-1.f,4.f,4.f,-2.f,6.f,6.f,8.f);
    409 
    410   k = v8f_min(i, j);
    411   CHK(v4f_x(v8f_abcd(k)) == 0.f);
    412   CHK(v4f_y(v8f_abcd(k)) ==-1.f);
    413   CHK(v4f_z(v8f_abcd(k)) == 2.f);
    414   CHK(v4f_w(v8f_abcd(k)) == 3.f);
    415   CHK(v4f_x(v8f_efgh(k)) ==-2.f);
    416   CHK(v4f_y(v8f_efgh(k)) == 5.f);
    417   CHK(v4f_z(v8f_efgh(k)) == 6.f);
    418   CHK(v4f_w(v8f_efgh(k)) == 7.f);
    419 
    420   k = v8f_max(i, j);
    421   CHK(v4f_x(v8f_abcd(k)) == 0.f);
    422   CHK(v4f_y(v8f_abcd(k)) == 1.f);
    423   CHK(v4f_z(v8f_abcd(k)) == 4.f);
    424   CHK(v4f_w(v8f_abcd(k)) == 4.f);
    425   CHK(v4f_x(v8f_efgh(k)) == 4.f);
    426   CHK(v4f_y(v8f_efgh(k)) == 6.f);
    427   CHK(v4f_z(v8f_efgh(k)) == 6.f);
    428   CHK(v4f_w(v8f_efgh(k)) == 8.f);
    429 
    430   CHK(v8f_reduce_min(i) == 0.f);
    431   CHK(v8f_reduce_min(j) ==-2.f);
    432   CHK(v8f_reduce_max(i) == 7.f);
    433   CHK(v8f_reduce_max(j) == 8.f);
    434 
    435   k = v8f_clamp(i,
    436     v8f_set(1.f, 1.f, 3.1f, 5.f, 4.f, 0.f, 0.f, -1.f),
    437     v8f_set(1.f, 1.f, 4.f, 6.f, 4.f, 1.f, 6.f, 5.f));
    438 
    439   CHK(v4f_x(v8f_abcd(k)) == 1.f);
    440   CHK(v4f_y(v8f_abcd(k)) == 1.f);
    441   CHK(v4f_z(v8f_abcd(k)) == 3.1f);
    442   CHK(v4f_w(v8f_abcd(k)) == 5.f);
    443   CHK(v4f_x(v8f_efgh(k)) == 4.f);
    444   CHK(v4f_y(v8f_efgh(k)) == 1.f);
    445   CHK(v4f_z(v8f_efgh(k)) == 6.f);
    446   CHK(v4f_w(v8f_efgh(k)) == 5.f);
    447 
    448   return 0;
    449 }
    450