star-sf

Set of surface and volume scattering functions
git clone git://git.meso-star.fr/star-sf.git
Log | Files | Refs | README | LICENSE

ssf_phase_rdgfa_simdX.h (5855B)


      1 /* Copyright (C) 2016-2018, 2021-2025 |Méso|Star> (contact@meso-star.com)
      2  *
      3  * This program is free software: you can redistribute it and/or modify
      4  * it under the terms of the GNU General Public License as published by
      5  * the Free Software Foundation, either version 3 of the License, or
      6  * (at your option) any later version.
      7  *
      8  * This program is distributed in the hope that it will be useful,
      9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     11  * GNU General Public License for more details.
     12  *
     13  * You should have received a copy of the GNU General Public License
     14  * along with this program. If not, see <http://www.gnu.org/licenses/>. */
     15 
     16 #if !defined(SIMD_WIDTH__)
     17   #error "Undefined SIMD_WIDTH__ macro"
     18 #endif
     19 #if SIMD_WIDTH__ != 4 && SIMD_WIDTH__ != 8
     20   #error "Unexpected SIMD_WIDTH__ value of "STR(RSIMD_WIDTH__)
     21 #endif
     22 
     23 /* Check that internal macros are not already defined */
     24 #if defined(vXf__)                                                             \
     25  || defined(vXf_T__)                                                           \
     26  || defined(SIMD_FUNC__)
     27  #error "Unexpected macro definition"
     28 #endif
     29 
     30 /* Macros generic to the SIMD_WIDTH__ */
     31 #define vXf_T CONCAT(CONCAT(v, SIMD_WIDTH__), f_T)
     32 #define vXf(Func) CONCAT(CONCAT(CONCAT(v, SIMD_WIDTH__), f_), Func)
     33 #define SIMD_FUNC(Func) CONCAT(CONCAT(Func, _simd), SIMD_WIDTH__)
     34 
     35 static INLINE vXf_T
     36 SIMD_FUNC(eval_f)(struct rdgfa* rdgfa, const vXf_T theta)
     37 {
     38   /* Input arguments */
     39   const vXf_T Df = vXf(set1)((float)rdgfa->fractal_dimension);
     40 
     41   /* Precompute constants */
     42   const vXf_T Rg2 = vXf(set1)((float)rdgfa->Rg2);
     43   const vXf_T half_theta = vXf(mul)(theta, vXf(set1)(0.5f));
     44 
     45   /* Precompute values */
     46   const vXf_T sin_half_theta = vXf(sin)(half_theta);
     47   const vXf_T q = vXf(mul)(vXf(set1)((float)rdgfa->cst_4pi_div_lambda), sin_half_theta);
     48   const vXf_T q2Rg2 = vXf(mul)(vXf(mul)(q, q), Rg2);
     49 
     50   /* Evaluate f(theta) when q2Rg2 < 1.5*Df */
     51   const vXf_T val0 = vXf(exp)(vXf(mul)(vXf(set1)(-1.f/3.f), q2Rg2));
     52 
     53   /* Evaluate f(theta) when q2Rg2 >= 1.5*Df */
     54   const vXf_T tmp0 = vXf(div)(vXf(set1)((float)rdgfa->cst_3Df_div_2E), q2Rg2);
     55   const vXf_T half_Df = vXf(mul)(Df, vXf(set1)(0.5f));
     56   const vXf_T val1 = vXf(pow)(tmp0, half_Df);
     57 
     58   /* Setup f */
     59   const vXf_T mask = vXf(lt)(q2Rg2, vXf(mul)(Df, vXf(set1)(1.5f)));
     60   const vXf_T f = vXf(sel)(val1, val0, mask);
     61   return f;
     62 }
     63 
     64 static INLINE vXf_T
     65 SIMD_FUNC(eval2)
     66   (struct rdgfa* rdgfa,
     67    const vXf_T theta,
     68    const vXf_T cos_theta)
     69 {
     70   const vXf_T f = SIMD_FUNC(eval_f)(rdgfa, theta);
     71   const vXf_T g = vXf(set1)((float)rdgfa->g);
     72   const vXf_T cos2_theta = vXf(mul)(cos_theta, cos_theta);
     73   const vXf_T cst0 = vXf(set1)(3.f/(16.f*(float)PI));
     74   const vXf_T tmp0 = vXf(div)(f, g);
     75   const vXf_T tmp1 = vXf(add)(vXf(set1)(1), cos2_theta);
     76   const vXf_T phase = vXf(mul)(vXf(mul)(cst0, tmp0), tmp1);
     77   return phase;
     78 }
     79 
     80 static INLINE vXf_T
     81 SIMD_FUNC(eval)(struct rdgfa* rdgfa, const vXf_T theta)
     82 {
     83   return SIMD_FUNC(eval2)(rdgfa, theta, vXf(cos)(theta));
     84 }
     85 
     86 static INLINE res_T
     87 SIMD_FUNC(compute_cumulative)(struct rdgfa* rdgfa)
     88 {
     89   vXf_T dtheta;
     90   vXf_T theta1;
     91   vXf_T step;
     92   vXf_T two_PI;
     93   float* f_list = NULL;
     94   float* d_omega_list = NULL;
     95   double* cdf = NULL;
     96   size_t nangles;
     97   size_t i;
     98   res_T res = RES_OK;
     99   ASSERT(rdgfa);
    100 
    101   /* Force the number of angles to be a multiple of the SIMD width */
    102   nangles = rdgfa->nintervals + 1;
    103   nangles = (nangles + SIMD_WIDTH__-1)/ SIMD_WIDTH__ * SIMD_WIDTH__;
    104 
    105   /* Allocate the cumulative array */
    106   res = darray_double_resize(&rdgfa->cdf, rdgfa->nintervals);
    107   if(res != RES_OK) goto error;
    108 
    109   /* Allocate temporaries arrays */
    110   res = darray_simdf_resize(&rdgfa->f_list, nangles);
    111   if(res != RES_OK) goto error;
    112   res = darray_simdf_resize(&rdgfa->d_omega_list, nangles);
    113   if(res != RES_OK) goto error;
    114 
    115   /* Fetch data */
    116   cdf = darray_double_data_get(&rdgfa->cdf);
    117   f_list = darray_simdf_data_get(&rdgfa->f_list);
    118   d_omega_list = darray_simdf_data_get(&rdgfa->d_omega_list);
    119 
    120   /* Compute the angular step for the angular domain */
    121   rdgfa->dtheta = PI / (double)rdgfa->nintervals;
    122 
    123   step = vXf(set1)((float)rdgfa->dtheta*(float)SIMD_WIDTH__);
    124   dtheta = vXf(set1)((float)rdgfa->dtheta);
    125   two_PI = vXf(set1)((float)(2*PI));
    126 #if SIMD_WIDTH__ == 4
    127   theta1 = vXf(mul)(dtheta, vXf(set)(0, 1, 2, 3));
    128 #elif SIMD_WIDTH__ == 8
    129   theta1 = vXf(mul)(dtheta, vXf(set)(0, 1, 2, 3, 4, 5, 6, 7));
    130 #endif
    131 
    132   /* Compute f and d_omaga */
    133   FOR_EACH(i, 0, nangles/SIMD_WIDTH__) {
    134     /* Compute f */
    135     const vXf_T f = SIMD_FUNC(eval)(rdgfa, theta1);
    136 
    137     /* Compute d_omega */
    138     const vXf_T theta2 = vXf(add)(theta1, dtheta);
    139     const vXf_T tmp0 = vXf(mul)(vXf(add)(theta1, theta2), vXf(set1)(0.5f));
    140     const vXf_T d_omega = vXf(mul)(vXf(mul)(two_PI, vXf(sin)(tmp0)), dtheta);
    141 
    142     /* Store the result */
    143     vXf(store)(&f_list[i*SIMD_WIDTH__], f);
    144     vXf(store)(&d_omega_list[i*SIMD_WIDTH__], d_omega);
    145 
    146     /* Go to the next angles */
    147     theta1 = vXf(add)(theta1, step);
    148   }
    149 
    150   /* Compute the (unormalized) cumulative */
    151   FOR_EACH(i, 0, rdgfa->nintervals) {
    152     const double f1 = f_list[i+0];
    153     const double f2 = f_list[i+1];
    154     const double d_omega = d_omega_list[i];
    155     const double tmp = (f1 + f2) * 0.5 * d_omega;
    156     cdf[i] = (i == 0 ? tmp : tmp + cdf[i-1]);
    157   }
    158 
    159   /* Save the normalization factor */
    160   rdgfa->rcp_normalize_factor = 1.0 / cdf[rdgfa->nintervals-1];
    161 
    162   /* Finally normalize the CDF */
    163   FOR_EACH(i, 0, rdgfa->nintervals) {
    164     cdf[i] *= rdgfa->rcp_normalize_factor;
    165   }
    166 
    167 exit:
    168   return res;
    169 error:
    170   darray_double_clear(&rdgfa->cdf);
    171   darray_simdf_clear(&rdgfa->f_list);
    172   darray_simdf_clear(&rdgfa->d_omega_list);
    173   goto exit;
    174 }
    175 
    176 /* Undef generic macros */
    177 #undef vXf_T
    178 #undef vXf
    179 #undef SIMD_FUNC
    180 
    181 /* Undef parameter */
    182 #undef SIMD_WIDTH__
    183