ssf_phase_rdgfa_simdX.h (5855B)
1 /* Copyright (C) 2016-2018, 2021-2025 |Méso|Star> (contact@meso-star.com) 2 * 3 * This program is free software: you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License as published by 5 * the Free Software Foundation, either version 3 of the License, or 6 * (at your option) any later version. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. */ 15 16 #if !defined(SIMD_WIDTH__) 17 #error "Undefined SIMD_WIDTH__ macro" 18 #endif 19 #if SIMD_WIDTH__ != 4 && SIMD_WIDTH__ != 8 20 #error "Unexpected SIMD_WIDTH__ value of "STR(RSIMD_WIDTH__) 21 #endif 22 23 /* Check that internal macros are not already defined */ 24 #if defined(vXf__) \ 25 || defined(vXf_T__) \ 26 || defined(SIMD_FUNC__) 27 #error "Unexpected macro definition" 28 #endif 29 30 /* Macros generic to the SIMD_WIDTH__ */ 31 #define vXf_T CONCAT(CONCAT(v, SIMD_WIDTH__), f_T) 32 #define vXf(Func) CONCAT(CONCAT(CONCAT(v, SIMD_WIDTH__), f_), Func) 33 #define SIMD_FUNC(Func) CONCAT(CONCAT(Func, _simd), SIMD_WIDTH__) 34 35 static INLINE vXf_T 36 SIMD_FUNC(eval_f)(struct rdgfa* rdgfa, const vXf_T theta) 37 { 38 /* Input arguments */ 39 const vXf_T Df = vXf(set1)((float)rdgfa->fractal_dimension); 40 41 /* Precompute constants */ 42 const vXf_T Rg2 = vXf(set1)((float)rdgfa->Rg2); 43 const vXf_T half_theta = vXf(mul)(theta, vXf(set1)(0.5f)); 44 45 /* Precompute values */ 46 const vXf_T sin_half_theta = vXf(sin)(half_theta); 47 const vXf_T q = vXf(mul)(vXf(set1)((float)rdgfa->cst_4pi_div_lambda), sin_half_theta); 48 const vXf_T q2Rg2 = vXf(mul)(vXf(mul)(q, q), Rg2); 49 50 /* Evaluate f(theta) when q2Rg2 < 1.5*Df */ 51 const vXf_T val0 = vXf(exp)(vXf(mul)(vXf(set1)(-1.f/3.f), q2Rg2)); 52 53 /* Evaluate f(theta) when q2Rg2 >= 1.5*Df */ 54 const vXf_T tmp0 = vXf(div)(vXf(set1)((float)rdgfa->cst_3Df_div_2E), q2Rg2); 55 const vXf_T half_Df = vXf(mul)(Df, vXf(set1)(0.5f)); 56 const vXf_T val1 = vXf(pow)(tmp0, half_Df); 57 58 /* Setup f */ 59 const vXf_T mask = vXf(lt)(q2Rg2, vXf(mul)(Df, vXf(set1)(1.5f))); 60 const vXf_T f = vXf(sel)(val1, val0, mask); 61 return f; 62 } 63 64 static INLINE vXf_T 65 SIMD_FUNC(eval2) 66 (struct rdgfa* rdgfa, 67 const vXf_T theta, 68 const vXf_T cos_theta) 69 { 70 const vXf_T f = SIMD_FUNC(eval_f)(rdgfa, theta); 71 const vXf_T g = vXf(set1)((float)rdgfa->g); 72 const vXf_T cos2_theta = vXf(mul)(cos_theta, cos_theta); 73 const vXf_T cst0 = vXf(set1)(3.f/(16.f*(float)PI)); 74 const vXf_T tmp0 = vXf(div)(f, g); 75 const vXf_T tmp1 = vXf(add)(vXf(set1)(1), cos2_theta); 76 const vXf_T phase = vXf(mul)(vXf(mul)(cst0, tmp0), tmp1); 77 return phase; 78 } 79 80 static INLINE vXf_T 81 SIMD_FUNC(eval)(struct rdgfa* rdgfa, const vXf_T theta) 82 { 83 return SIMD_FUNC(eval2)(rdgfa, theta, vXf(cos)(theta)); 84 } 85 86 static INLINE res_T 87 SIMD_FUNC(compute_cumulative)(struct rdgfa* rdgfa) 88 { 89 vXf_T dtheta; 90 vXf_T theta1; 91 vXf_T step; 92 vXf_T two_PI; 93 float* f_list = NULL; 94 float* d_omega_list = NULL; 95 double* cdf = NULL; 96 size_t nangles; 97 size_t i; 98 res_T res = RES_OK; 99 ASSERT(rdgfa); 100 101 /* Force the number of angles to be a multiple of the SIMD width */ 102 nangles = rdgfa->nintervals + 1; 103 nangles = (nangles + SIMD_WIDTH__-1)/ SIMD_WIDTH__ * SIMD_WIDTH__; 104 105 /* Allocate the cumulative array */ 106 res = darray_double_resize(&rdgfa->cdf, rdgfa->nintervals); 107 if(res != RES_OK) goto error; 108 109 /* Allocate temporaries arrays */ 110 res = darray_simdf_resize(&rdgfa->f_list, nangles); 111 if(res != RES_OK) goto error; 112 res = darray_simdf_resize(&rdgfa->d_omega_list, nangles); 113 if(res != RES_OK) goto error; 114 115 /* Fetch data */ 116 cdf = darray_double_data_get(&rdgfa->cdf); 117 f_list = darray_simdf_data_get(&rdgfa->f_list); 118 d_omega_list = darray_simdf_data_get(&rdgfa->d_omega_list); 119 120 /* Compute the angular step for the angular domain */ 121 rdgfa->dtheta = PI / (double)rdgfa->nintervals; 122 123 step = vXf(set1)((float)rdgfa->dtheta*(float)SIMD_WIDTH__); 124 dtheta = vXf(set1)((float)rdgfa->dtheta); 125 two_PI = vXf(set1)((float)(2*PI)); 126 #if SIMD_WIDTH__ == 4 127 theta1 = vXf(mul)(dtheta, vXf(set)(0, 1, 2, 3)); 128 #elif SIMD_WIDTH__ == 8 129 theta1 = vXf(mul)(dtheta, vXf(set)(0, 1, 2, 3, 4, 5, 6, 7)); 130 #endif 131 132 /* Compute f and d_omaga */ 133 FOR_EACH(i, 0, nangles/SIMD_WIDTH__) { 134 /* Compute f */ 135 const vXf_T f = SIMD_FUNC(eval)(rdgfa, theta1); 136 137 /* Compute d_omega */ 138 const vXf_T theta2 = vXf(add)(theta1, dtheta); 139 const vXf_T tmp0 = vXf(mul)(vXf(add)(theta1, theta2), vXf(set1)(0.5f)); 140 const vXf_T d_omega = vXf(mul)(vXf(mul)(two_PI, vXf(sin)(tmp0)), dtheta); 141 142 /* Store the result */ 143 vXf(store)(&f_list[i*SIMD_WIDTH__], f); 144 vXf(store)(&d_omega_list[i*SIMD_WIDTH__], d_omega); 145 146 /* Go to the next angles */ 147 theta1 = vXf(add)(theta1, step); 148 } 149 150 /* Compute the (unormalized) cumulative */ 151 FOR_EACH(i, 0, rdgfa->nintervals) { 152 const double f1 = f_list[i+0]; 153 const double f2 = f_list[i+1]; 154 const double d_omega = d_omega_list[i]; 155 const double tmp = (f1 + f2) * 0.5 * d_omega; 156 cdf[i] = (i == 0 ? tmp : tmp + cdf[i-1]); 157 } 158 159 /* Save the normalization factor */ 160 rdgfa->rcp_normalize_factor = 1.0 / cdf[rdgfa->nintervals-1]; 161 162 /* Finally normalize the CDF */ 163 FOR_EACH(i, 0, rdgfa->nintervals) { 164 cdf[i] *= rdgfa->rcp_normalize_factor; 165 } 166 167 exit: 168 return res; 169 error: 170 darray_double_clear(&rdgfa->cdf); 171 darray_simdf_clear(&rdgfa->f_list); 172 darray_simdf_clear(&rdgfa->d_omega_list); 173 goto exit; 174 } 175 176 /* Undef generic macros */ 177 #undef vXf_T 178 #undef vXf 179 #undef SIMD_FUNC 180 181 /* Undef parameter */ 182 #undef SIMD_WIDTH__ 183