Merge branch 'feature_simd' - atrstm - Load and structure a combustion gas mixture

commit 16f9712ee6acaa269a691992046fdf3c13a5e41a
parent 477fb701f096b706c222a18958146814be6d5a8b
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Tue,  2 Feb 2021 09:02:33 +0100

Merge branch 'feature_simd'

Diffstat:
M cmake/CMakeLists.txt  | 35 ++++++++++++++++++++++++++++++++---
A src/atrstm_radcoefs_simd4.c  | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/atrstm_radcoefs_simd4.h  | 204 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/atrstm_setup_octrees.c  | 9 +++++++++
M src/test_atrstm_radcoefs.c  | 17 ++++++++++++++++-
A src/test_atrstm_radcoefs_simd.c  | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

6 files changed, 493 insertions(+), 4 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 CNRS
+# Copyright (C) 2020, 2021 CNRS
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -13,7 +13,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 
-cmake_minimum_required(VERSION 2.8)
+cmake_minimum_required(VERSION 3.1)
 project(atrstm C)
 enable_testing()
 
@@ -31,6 +31,16 @@ find_package(RSys 0.12 REQUIRED)
 find_package(StarTetraHedra 0.0 REQUIRED)
 find_package(StarUVM 0.0 REQUIRED)
 find_package(StarVX 0.2 REQUIRED)
+find_package(RSIMD 0.3)
+if(RSIMD_FOUND)
+  option(USE_SIMD "Use SIMD instruction sets" ON)
+endif()
+
+if(USE_SIMD)
+  message(STATUS "Use SIMD instruction sets")
+else()
+  message(STATUS "Do not use SIMD instruction sets")
+endif()
 
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${RCMAKE_SOURCE_DIR})
 include(rcmake)
@@ -77,15 +87,31 @@ set(ATRSTM_FILES_INC_API
 
 set(ATRSTM_FILES_DOC COPYING README.md)
 
+# SIMD files
+if(USE_SIMD)
+  set(ATRSTM_FILES_SRC ${ATRSTM_FILES_SRC} atrstm_radcoefs_simd4.c)
+  set(ATRSTM_FILES_INC ${ATRSTM_FILES_INC} atrstm_radcoefs_simd4.h)
+endif()
+
 # Prepend each file in the `ATRSTM_FILES_<SRC|INC>' list by `ATRSTM_SOURCE_DIR'
 rcmake_prepend_path(ATRSTM_FILES_SRC ${ATRSTM_SOURCE_DIR})
 rcmake_prepend_path(ATRSTM_FILES_INC ${ATRSTM_SOURCE_DIR})
 rcmake_prepend_path(ATRSTM_FILES_INC_API ${ATRSTM_SOURCE_DIR})
 rcmake_prepend_path(ATRSTM_FILES_DOC ${PROJECT_SOURCE_DIR}/../)
 
-add_library(atrstm SHARED ${ATRSTM_FILES_SRC} ${ATRSTM_FILES_INC} ${ATRSTM_FILES_INC_API})
+add_library(atrstm SHARED
+  ${ATRSTM_FILES_SRC}
+  ${ATRSTM_FILES_INC}
+  ${ATRSTM_FILES_INC_API})
 target_link_libraries(atrstm AtrRI AtrTP RSys StarTetraHedra StarUVM StarVX m)
 
+# Setup SIMD support on the target
+if(USE_SIMD)
+  target_link_libraries(atrstm RSIMD)
+  set_target_properties(atrstm PROPERTIES
+    COMPILE_DEFINITIONS ATRSTM_USE_SIMD)
+endif()
+
 if(CMAKE_COMPILER_IS_GNUCC)
   set_target_properties(atrstm PROPERTIES LINK_FLAGS "${OpenMP_C_FLAGS}")
 endif()
@@ -115,6 +141,9 @@ if(NOT NO_TEST)
 
   build_test(test_atrstm)
   new_test(test_atrstm_radcoefs)
+  if(USE_SIMD)
+    new_test(test_atrstm_radcoefs_simd)
+  endif()
 endif()
 
 ################################################################################
diff --git a/src/atrstm_radcoefs_simd4.c b/src/atrstm_radcoefs_simd4.c
@@ -0,0 +1,81 @@
+/* Copyright (C) 2020, 2021 CNRS
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "atrstm_c.h"
+#include "atrstm_radcoefs_simd4.h"
+
+#include <astoria/atrtp.h>
+#include <star/suvm.h>
+
+/*******************************************************************************
+ * Local functions
+ ******************************************************************************/
+res_T
+primitive_compute_radcoefs_simd4
+  (const struct atrstm* atrstm,
+   const struct atrri_refractive_index* refract_id,
+   const struct suvm_primitive* prim,
+   const int radcoefs_mask, /* Combination of atrstm_radcoef_flag */
+   struct radcoefs_simd4* radcoefs)
+{
+  struct radcoefs_compute_simd4_args args = RADCOEFS_COMPUTE_SIMD4_ARGS_NULL;
+  struct atrtp_desc desc = ATRTP_DESC_NULL;
+  const double* node[4];
+  res_T res = RES_OK;
+  ASSERT(atrstm && prim && prim->nvertices == 4 && refract_id && radcoefs);
+
+  res = atrtp_get_desc(atrstm->atrtp, &desc);
+  if(res != RES_OK) goto error;
+
+  /* Setup the constant input arguments of the optical properties computation
+   * routine */
+  args.lambda = v4f_set1((float)refract_id->wavelength);
+  args.n = v4f_set1((float)refract_id->n);
+  args.kappa = v4f_set1((float)refract_id->kappa);
+  args.gyration_radius_prefactor = v4f_set1((float)atrstm->gyration_radius_prefactor);
+  args.fractal_dimension = v4f_set1((float)atrstm->fractal_dimension);
+  args.radcoefs_mask = radcoefs_mask;
+
+  /* Fetch the thermodynamic properties of the 4 primitive nodes */
+  node[0] = atrtp_desc_get_node_properties(&desc, prim->indices[0]);
+  node[1] = atrtp_desc_get_node_properties(&desc, prim->indices[1]);
+  node[2] = atrtp_desc_get_node_properties(&desc, prim->indices[2]);
+  node[3] = atrtp_desc_get_node_properties(&desc, prim->indices[3]);
+
+  /* Scatter Soot properties */
+  args.soot_volumic_fraction = v4f_set
+    ((float)node[0][ATRTP_SOOT_VOLFRAC],
+     (float)node[1][ATRTP_SOOT_VOLFRAC],
+     (float)node[2][ATRTP_SOOT_VOLFRAC],
+     (float)node[3][ATRTP_SOOT_VOLFRAC]);
+  args.soot_primary_particles_count = v4f_set
+    ((float)node[0][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT],
+     (float)node[1][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT],
+     (float)node[2][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT],
+     (float)node[3][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT]);
+  args.soot_primary_particles_diameter = v4f_set
+    ((float)node[0][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER],
+     (float)node[1][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER],
+     (float)node[2][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER],
+     (float)node[3][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER]);
+
+  /* Compute the per primitive node optical properties */
+  radcoefs_compute_simd4(radcoefs, &args);
+
+exit:
+  return res;
+error:
+  goto exit;
+}
diff --git a/src/atrstm_radcoefs_simd4.h b/src/atrstm_radcoefs_simd4.h
@@ -0,0 +1,204 @@
+/* Copyright (C) 2020, 2021 CNRS
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef ATRSTM_RADCOEFS_SIMD4_H
+#define ATRSTM_RADCOEFS_SIMD4_H
+
+#include "atrstm.h"
+#include "atrstm_radcoefs.h"
+
+#include <rsimd/math.h>
+#include <rsimd/rsimd.h>
+
+#include <rsys/math.h>
+#include <rsys/rsys.h>
+
+/* Radiative coefficients in m^-1 */
+struct radcoefs_simd4 {
+  v4f_T ka; /* Absorption coefficient */
+  v4f_T ks; /* Scattering coefficient */
+  v4f_T kext; /* Extinction coefficient */
+};
+static const struct radcoefs_simd4 RADCOEFS_SIMD4_NULL;
+
+struct radcoefs_compute_simd4_args {
+  v4f_T lambda; /* wavelength [nm] */
+  v4f_T n; /* Refractive index (real component) */
+  v4f_T kappa; /* Refractive index (imaginary component) */
+  v4f_T gyration_radius_prefactor;
+  v4f_T fractal_dimension;
+  v4f_T soot_volumic_fraction; /* In m^3(soot) / m^3 */
+  v4f_T soot_primary_particles_count;
+  v4f_T soot_primary_particles_diameter; /* In nm */
+  int radcoefs_mask; /* Combination of atrstm_radcoef_flag */
+};
+static const struct radcoefs_compute_simd4_args RADCOEFS_COMPUTE_SIMD4_ARGS_NULL;
+
+/* Forward declarations */
+struct atrstm;
+struct atrri_refractive_index;
+struct suvm_primitive;
+
+extern LOCAL_SYM res_T
+primitive_compute_radcoefs_simd4
+  (const struct atrstm* atrstm,
+   const struct atrri_refractive_index* refract_id,
+   const struct suvm_primitive* prim,
+   const int radcoefs_mask,
+   struct radcoefs_simd4* radcoefs); /* Per node radiative coefficients */
+
+static INLINE res_T
+primitive_compute_radcoefs_range_simd4
+  (const struct atrstm* atrstm,
+   const struct atrri_refractive_index* refract_id,
+   const struct suvm_primitive* prim,
+   struct radcoefs* radcoefs_min,
+   struct radcoefs* radcoefs_max)
+{
+  struct radcoefs_simd4 radcoefs;
+  v4f_T ka[2];
+  v4f_T ks[2];
+  v4f_T kext[2];
+  res_T res = RES_OK;
+  ASSERT(radcoefs_min && radcoefs_max);
+
+  res = primitive_compute_radcoefs_simd4
+    (atrstm, refract_id, prim, ATRSTM_RADCOEFS_MASK_ALL, &radcoefs);
+  if(res != RES_OK) return res;
+
+  ka[0] = v4f_reduce_min(radcoefs.ka);
+  ka[1] = v4f_reduce_max(radcoefs.ka);
+  ks[0] = v4f_reduce_min(radcoefs.ks);
+  ks[1] = v4f_reduce_max(radcoefs.ks);
+  kext[0] = v4f_reduce_min(radcoefs.kext);
+  kext[1] = v4f_reduce_max(radcoefs.kext);
+
+  radcoefs_min->ka = v4f_x(ka[0]);
+  radcoefs_max->ka = v4f_x(ka[1]);
+  radcoefs_min->ks = v4f_x(ks[0]);
+  radcoefs_max->ks = v4f_x(ks[1]);
+  radcoefs_min->kext = v4f_x(kext[0]);
+  radcoefs_max->kext = v4f_x(kext[1]);
+
+  return RES_OK;
+}
+
+/* SIMD version of the radcoefs_compute function. Refer to its scalar version
+ * for informations of what it does */
+static INLINE void
+radcoefs_compute_simd4
+  (struct radcoefs_simd4* radcoefs,
+   const struct radcoefs_compute_simd4_args* args)
+{
+  /* Constant */
+  const v4f_T pi = v4f_set1((float)PI);
+
+  /* Input variables */
+  const v4f_T lambda = args->lambda; /* [nm] */
+  const v4f_T n = args->n;
+  const v4f_T kappa = args->kappa;
+  const v4f_T Fv = args->soot_volumic_fraction; /* [nm^3(soot)/nm] */
+  const v4f_T Np = args->soot_primary_particles_count;
+  const v4f_T Dp = args->soot_primary_particles_diameter; /* [nm] */
+  const v4f_T kf = args->gyration_radius_prefactor;
+  const v4f_T Df = args->fractal_dimension;
+  const int ka = (args->radcoefs_mask & ATRSTM_RADCOEF_FLAG_Ka) != 0;
+  const int ks = (args->radcoefs_mask & ATRSTM_RADCOEF_FLAG_Ks) != 0;
+  const int kext = (args->radcoefs_mask & ATRSTM_RADCOEF_FLAG_Kext) != 0;
+
+  /* SIMD mask */
+  const v4f_T is_Fv_not_null = v4f_neq(Fv, v4f_zero());
+  const v4f_T is_Np_not_null = v4f_neq(Np, v4f_zero());
+  const v4f_T query_radcoefs =
+    v4f_mask1(args->radcoefs_mask & ATRSTM_RADCOEFS_MASK_ALL ? ~0 : 0);
+  const v4f_T mask =
+    v4f_and(v4f_and(is_Fv_not_null, is_Np_not_null), query_radcoefs);
+
+  /* Clean up radiative coefficients */
+  radcoefs->ka = v4f_zero();
+  radcoefs->ks = v4f_zero();
+  radcoefs->kext = v4f_zero();
+
+  if(v4f_movemask(mask) != 0) {
+    /* Precomputed values */
+    const v4f_T n2 = v4f_mul(n, n);
+    const v4f_T kappa2 = v4f_mul(kappa, kappa);
+    const v4f_T four_n2_kappa2 = v4f_mul(v4f_set1(4), v4f_mul(n2, kappa2));
+    const v4f_T xp = v4f_div(v4f_mul(pi, Dp), lambda);
+    const v4f_T xp3 = v4f_mul(v4f_mul(xp, xp), xp);
+    const v4f_T k = v4f_div(v4f_mul(v4f_set1(2), pi), lambda);
+    const v4f_T k2 = v4f_mul(k, k);
+
+    const v4f_T Dp3 = v4f_mul(v4f_mul(Dp, Dp), Dp);
+    const v4f_T Np_pi_Dp3 = v4f_mul(v4f_mul(Np, pi), Dp3);
+    const v4f_T Va = v4f_div(Np_pi_Dp3, v4f_set1(6)); /* [nm^3] */
+    const v4f_T rho = v4f_div(Fv, Va); /* [#aggregates / nm^3] */
+    const v4f_T tmp0 = v4f_mul(v4f_set1(1e9f), rho);
+
+    /* E(m), m = n + i*kappa */
+    const v4f_T tmp1 = v4f_add(v4f_sub(n2, kappa2), v4f_set1(2));
+    const v4f_T denom = v4f_madd(tmp1, tmp1, four_n2_kappa2);
+    const v4f_T six_n_kappa = v4f_mul(v4f_mul(v4f_set1(6), n), kappa);
+    const v4f_T Em = v4f_div(six_n_kappa, denom);
+
+    if(ka || kext) {
+      /* Absorption cross section, [nm^3/aggrefate] */
+      const v4f_T four_pi_xp3 = v4f_mul(v4f_mul(v4f_set1(4), pi), xp3);
+      const v4f_T Cabs = v4f_mul(v4f_mul(Np, v4f_div(four_pi_xp3, k2)), Em);
+
+      /* Absorption coefficient, [m^-1] */
+      radcoefs->ka = v4f_and(v4f_mul(tmp0, Cabs), mask);
+    }
+
+    if(ks || kext) {
+      /* F(m), m = n + i*kappa */
+      const v4f_T n2_sub_kappa2 = v4f_sub(n2, kappa2);
+      const v4f_T n2_sub_kappa2_sub_1 = v4f_sub(n2_sub_kappa2, v4f_set1(1));
+      const v4f_T n2_sub_kappa2_add_2 = v4f_add(n2_sub_kappa2, v4f_set1(2));
+      const v4f_T tmp2 = v4f_mul(n2_sub_kappa2_sub_1, n2_sub_kappa2_add_2);
+      const v4f_T tmp3 = v4f_div(v4f_add(tmp2, four_n2_kappa2), denom);
+      const v4f_T Fm = v4f_madd(tmp3, tmp3, v4f_mul(Em, Em));
+
+      /* Gyration radius */
+      const v4f_T tmp4 = v4f_pow(v4f_div(Np, kf), v4f_rcp(Df));
+      const v4f_T Rg = v4f_mul(v4f_mul(v4f_set1(0.5f), Dp), tmp4); /* [nm] */
+      const v4f_T Rg2 = v4f_mul(Rg, Rg);
+      const v4f_T four_k2_Rg2 = v4f_mul(v4f_mul(v4f_set1(4), k2), Rg2);
+      const v4f_T tmp5 = v4f_div(four_k2_Rg2, v4f_mul(v4f_set1(3), Df));
+      const v4f_T tmp6 = v4f_add(v4f_set1(1), tmp5);
+      const v4f_T g = v4f_pow(tmp6, v4f_mul(Df, v4f_set1(-0.5f)));
+
+      /* Scattering cross section, [nm^3/aggrefate] */
+      const v4f_T Np2 = v4f_mul(Np, Np);
+      const v4f_T xp6 = v4f_mul(xp3, xp3);
+      const v4f_T eight_pi_xp6 = v4f_mul(v4f_mul(v4f_set1(8), pi), xp6);
+      const v4f_T tmp7 = v4f_div(eight_pi_xp6, v4f_mul(v4f_set1(3), k2));
+      const v4f_T Csca = v4f_mul(v4f_mul(v4f_mul(Np2, tmp7), Fm), g);
+
+      /* Scattering coefficient, [m^-1] */
+      radcoefs->ks = v4f_and(v4f_mul(tmp0, Csca), mask);
+    }
+
+    if(kext) {
+      radcoefs->kext = v4f_add(radcoefs->ka, radcoefs->ks);
+    }
+
+    ASSERT(v4f_movemask(mask) && v4f_movemask(v4f_neq(denom, v4f_zero())));
+    ASSERT(v4f_movemask(mask) && v4f_movemask(v4f_neq(Df, v4f_zero())));
+    ASSERT(v4f_movemask(mask) && v4f_movemask(v4f_neq(Dp, v4f_zero())));
+  }
+}
+
+#endif /* ATRSTM_RADCOEFS_SIMD4_H */
diff --git a/src/atrstm_setup_octrees.c b/src/atrstm_setup_octrees.c
@@ -23,6 +23,10 @@
 #include "atrstm_setup_octrees.h"
 #include "atrstm_svx.h"
 
+#ifdef ATRSTM_USE_SIMD
+  #include "atrstm_radcoefs_simd4.h"
+#endif
+
 #include <astoria/atrri.h>
 
 #include <star/suvm.h>
@@ -234,8 +238,13 @@ voxelize_partition
             voxel_commit_radcoefs_range
               (vox, ATRSTM_CPNT_GAS, &radcoefs_min, &radcoefs_max);
 
+#ifdef ATRSTM_USE_SIMD
+            res = primitive_compute_radcoefs_range_simd4
+              (atrstm, refract_id, &prim, &radcoefs_min, &radcoefs_max);
+#else 
             res = primitive_compute_radcoefs_range
               (atrstm, refract_id, &prim, &radcoefs_min, &radcoefs_max);
+#endif
             if(UNLIKELY(res != RES_OK)) goto error;
             voxel_commit_radcoefs_range
               (vox, ATRSTM_CPNT_SOOT, &radcoefs_min, &radcoefs_max);
diff --git a/src/test_atrstm_radcoefs.c b/src/test_atrstm_radcoefs.c
@@ -21,7 +21,7 @@ main(int argc, char** argv)
   const double ka_ref = 5.7382401729092799E-1;
   const double ks_ref = 7.2169062018378995E-6;
 
-  struct radcoefs radcoefs;
+  struct radcoefs radcoefs = RADCOEFS_NULL;
   struct radcoefs_compute_args args = RADCOEFS_COMPUTE_ARGS_NULL;
   (void)argc, (void)argv;
 
@@ -60,5 +60,20 @@ main(int argc, char** argv)
   radcoefs_compute(&radcoefs, &args);
   CHK(eq_eps(radcoefs.kext, ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
 
+  args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL;
+
+  args.soot_primary_particles_count = 0;
+  radcoefs_compute(&radcoefs, &args);
+  CHK(radcoefs.ka == 0);
+  CHK(radcoefs.ks == 0);
+  CHK(radcoefs.kext == 0);
+
+  args.soot_primary_particles_count = 100;
+  args.soot_volumic_fraction = 0;
+  radcoefs_compute(&radcoefs, &args);
+  CHK(radcoefs.ka == 0);
+  CHK(radcoefs.ks == 0);
+  CHK(radcoefs.kext == 0);
+
   return 0;
 }
diff --git a/src/test_atrstm_radcoefs_simd.c b/src/test_atrstm_radcoefs_simd.c
@@ -0,0 +1,151 @@
+/* Copyright (C) 2020, 2021 CNRS
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "atrstm_radcoefs_simd4.c"
+
+int
+main(int argc, char** argv)
+{
+  const double ka_ref = 5.7382401729092799E-1;
+  const double ks_ref = 7.2169062018378995E-6;
+
+  const double ka_ref2[4] = {
+    0.52178067472799794,
+    0.52178067472799794,
+    1.0435613494559959,
+    0.52178067472799794
+  };
+  const double ks_ref2[4] = {
+    9.6010140939975883e-002,
+    3.3272961678492224e-002,
+    0.19202028187995177,
+    9.9964602374815484e-002
+  };
+  struct radcoefs_simd4 radcoefs = RADCOEFS_SIMD4_NULL;
+  struct radcoefs_compute_simd4_args args = RADCOEFS_COMPUTE_SIMD4_ARGS_NULL;
+  float ALIGN(16) ka[4], ks[4], kext[4];
+  (void)argc, (void)argv;
+
+  args.lambda = v4f_set1(633.f);
+  args.n = v4f_set1(1.90f);
+  args.kappa = v4f_set1(0.55f);
+  args.gyration_radius_prefactor = v4f_set1(1.70f);
+  args.fractal_dimension = v4f_set1(1.75f);
+  args.soot_volumic_fraction = v4f_set(1e-7f, 0.f, 1e-7f, 1e-7f);
+  args.soot_primary_particles_count = v4f_set(100.f, 100.f, 0.f, 100.f);
+  args.soot_primary_particles_diameter = v4f_set1(1.f);
+  args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL;
+
+  radcoefs_compute_simd4(&radcoefs, &args);
+
+  v4f_store(ka, radcoefs.ka);
+  v4f_store(ks, radcoefs.ks);
+  v4f_store(kext, radcoefs.kext);
+  printf("ka = {%g, %g, %g, %g}; ks = {%g, %g, %g, %g}\n",
+    SPLIT4(ka), SPLIT4(ks));
+
+  CHK(eq_eps(ka[0], ka_ref, ka_ref*1.e-6));
+  CHK(eq_eps(ka[3], ka_ref, ka_ref*1.e-6));
+  CHK(ka[1] == 0);
+  CHK(ka[2] == 0);
+
+  CHK(eq_eps(ks[0], ks_ref, ks_ref*1.e-6));
+  CHK(eq_eps(ks[3], ks_ref, ks_ref*1.e-6));
+  CHK(ks[1] == 0);
+  CHK(ks[2] == 0);
+
+  CHK(eq_eps(kext[0], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
+  CHK(eq_eps(kext[3], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
+  CHK(kext[1] == 0);
+  CHK(kext[2] == 0);
+
+  args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Ka;
+  radcoefs_compute_simd4(&radcoefs, &args);
+  v4f_store(ka, radcoefs.ka);
+  v4f_store(ks, radcoefs.ks);
+  v4f_store(kext, radcoefs.kext);
+  CHK(eq_eps(ka[0], ka_ref, ka_ref*1.e-6));
+  CHK(eq_eps(ka[3], ka_ref, ka_ref*1.e-6));
+  CHK(ka[1] == 0);
+  CHK(ka[2] == 0);
+  CHK(ks[0] == 0 && ks[1] == 0 && ks[2] == 0 && ks[0] == 0);
+  CHK(kext[0] == 0 && kext[1] == 0 && kext[2] == 0 && kext[0] == 0);
+
+  args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Ks;
+  radcoefs_compute_simd4(&radcoefs, &args);
+  v4f_store(ka, radcoefs.ka);
+  v4f_store(ks, radcoefs.ks);
+  v4f_store(kext, radcoefs.kext);
+  CHK(eq_eps(ks[0], ks_ref, ks_ref*1.e-6));
+  CHK(eq_eps(ks[3], ks_ref, ks_ref*1.e-6));
+  CHK(ks[1] == 0);
+  CHK(ks[2] == 0);
+  CHK(ka[0] == 0 && ka[1] == 0 && ka[2] == 0 && ka[0] == 0);
+  CHK(kext[0] == 0 && kext[1] == 0 && kext[2] == 0 && kext[0] == 0);
+
+  /* Note that actually even though Ka and Ks are note required they are
+   * internally computed to evaluate kext and are returned to the caller. Their
+   * value are thus not null */
+  args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Kext;
+  radcoefs_compute_simd4(&radcoefs, &args);
+  v4f_store(kext, radcoefs.kext);
+  CHK(eq_eps(kext[0], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
+  CHK(eq_eps(kext[3], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
+  CHK(kext[1] == 0);
+  CHK(kext[2] == 0);
+
+  args.lambda = v4f_set1(633.f);
+  args.n = v4f_set1(1.75f);
+  args.kappa = v4f_set1(0.435f);
+  args.gyration_radius_prefactor = v4f_set1(1.70f);
+  args.fractal_dimension = v4f_set1(1.75f);
+  args.soot_volumic_fraction = v4f_set
+    (9.9999999999999995e-008f,
+     9.9999999999999995e-008f,
+     1.9999999999999999e-007f,
+     9.9999999999999995e-008f);
+  args.soot_primary_particles_count = v4f_set
+    (400.f,
+     400.f,
+     400.f,
+     800.f);
+  args.soot_primary_particles_diameter = v4f_set
+    (34.000000006413003f,
+     17.000000003206502f,
+     34.000000006413003f,
+     34.000000006413003f);
+  args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL;
+  radcoefs_compute_simd4(&radcoefs, &args);
+  v4f_store(ka, radcoefs.ka);
+  v4f_store(ks, radcoefs.ks);
+  v4f_store(kext, radcoefs.kext);
+  printf("ka = {%g, %g, %g, %g}; ks = {%g, %g, %g, %g}\n",
+    SPLIT4(ka), SPLIT4(ks));
+  CHK(eq_eps(ka[0], ka_ref2[0], ka_ref2[0]*1.e-6));
+  CHK(eq_eps(ka[1], ka_ref2[1], ka_ref2[1]*1.e-6));
+  CHK(eq_eps(ka[2], ka_ref2[2], ka_ref2[2]*1.e-6));
+  CHK(eq_eps(ka[3], ka_ref2[3], ka_ref2[3]*1.e-6));
+  CHK(eq_eps(ks[0], ks_ref2[0], ks_ref2[0]*1.e-6));
+  CHK(eq_eps(ks[1], ks_ref2[1], ks_ref2[1]*1.e-6));
+  CHK(eq_eps(ks[2], ks_ref2[2], ks_ref2[2]*1.e-6));
+  CHK(eq_eps(ks[3], ks_ref2[3], ks_ref2[3]*1.e-6));
+  CHK(eq_eps(kext[0], ka_ref2[0]+ks_ref2[0], (ka_ref2[0]+ks_ref2[0])*1.e-6));
+  CHK(eq_eps(kext[1], ka_ref2[1]+ks_ref2[1], (ka_ref2[1]+ks_ref2[1])*1.e-6));
+  CHK(eq_eps(kext[2], ka_ref2[2]+ks_ref2[2], (ka_ref2[2]+ks_ref2[2])*1.e-6));
+  CHK(eq_eps(kext[3], ka_ref2[3]+ks_ref2[3], (ka_ref2[3]+ks_ref2[3])*1.e-6));
+
+  return 0;
+}
+

	atrstm Load and structure a combustion gas mixture
	git clone git://git.meso-star.fr/atrstm.git
	Log \| Files \| Refs \| README \| LICENSE

M	cmake/CMakeLists.txt	\|	35	++++++++++++++++++++++++++++++++---
A	src/atrstm_radcoefs_simd4.c	\|	81	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/atrstm_radcoefs_simd4.h	\|	204	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/atrstm_setup_octrees.c	\|	9	+++++++++
M	src/test_atrstm_radcoefs.c	\|	17	++++++++++++++++-
A	src/test_atrstm_radcoefs_simd.c	\|	151	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++