commit 16f9712ee6acaa269a691992046fdf3c13a5e41a
parent 477fb701f096b706c222a18958146814be6d5a8b
Author: Vincent Forest <vincent.forest@meso-star.com>
Date: Tue, 2 Feb 2021 09:02:33 +0100
Merge branch 'feature_simd'
Diffstat:
6 files changed, 493 insertions(+), 4 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 CNRS
+# Copyright (C) 2020, 2021 CNRS
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -13,7 +13,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-cmake_minimum_required(VERSION 2.8)
+cmake_minimum_required(VERSION 3.1)
project(atrstm C)
enable_testing()
@@ -31,6 +31,16 @@ find_package(RSys 0.12 REQUIRED)
find_package(StarTetraHedra 0.0 REQUIRED)
find_package(StarUVM 0.0 REQUIRED)
find_package(StarVX 0.2 REQUIRED)
+find_package(RSIMD 0.3)
+if(RSIMD_FOUND)
+ option(USE_SIMD "Use SIMD instruction sets" ON)
+endif()
+
+if(USE_SIMD)
+ message(STATUS "Use SIMD instruction sets")
+else()
+ message(STATUS "Do not use SIMD instruction sets")
+endif()
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${RCMAKE_SOURCE_DIR})
include(rcmake)
@@ -77,15 +87,31 @@ set(ATRSTM_FILES_INC_API
set(ATRSTM_FILES_DOC COPYING README.md)
+# SIMD files
+if(USE_SIMD)
+ set(ATRSTM_FILES_SRC ${ATRSTM_FILES_SRC} atrstm_radcoefs_simd4.c)
+ set(ATRSTM_FILES_INC ${ATRSTM_FILES_INC} atrstm_radcoefs_simd4.h)
+endif()
+
# Prepend each file in the `ATRSTM_FILES_<SRC|INC>' list by `ATRSTM_SOURCE_DIR'
rcmake_prepend_path(ATRSTM_FILES_SRC ${ATRSTM_SOURCE_DIR})
rcmake_prepend_path(ATRSTM_FILES_INC ${ATRSTM_SOURCE_DIR})
rcmake_prepend_path(ATRSTM_FILES_INC_API ${ATRSTM_SOURCE_DIR})
rcmake_prepend_path(ATRSTM_FILES_DOC ${PROJECT_SOURCE_DIR}/../)
-add_library(atrstm SHARED ${ATRSTM_FILES_SRC} ${ATRSTM_FILES_INC} ${ATRSTM_FILES_INC_API})
+add_library(atrstm SHARED
+ ${ATRSTM_FILES_SRC}
+ ${ATRSTM_FILES_INC}
+ ${ATRSTM_FILES_INC_API})
target_link_libraries(atrstm AtrRI AtrTP RSys StarTetraHedra StarUVM StarVX m)
+# Setup SIMD support on the target
+if(USE_SIMD)
+ target_link_libraries(atrstm RSIMD)
+ set_target_properties(atrstm PROPERTIES
+ COMPILE_DEFINITIONS ATRSTM_USE_SIMD)
+endif()
+
if(CMAKE_COMPILER_IS_GNUCC)
set_target_properties(atrstm PROPERTIES LINK_FLAGS "${OpenMP_C_FLAGS}")
endif()
@@ -115,6 +141,9 @@ if(NOT NO_TEST)
build_test(test_atrstm)
new_test(test_atrstm_radcoefs)
+ if(USE_SIMD)
+ new_test(test_atrstm_radcoefs_simd)
+ endif()
endif()
################################################################################
diff --git a/src/atrstm_radcoefs_simd4.c b/src/atrstm_radcoefs_simd4.c
@@ -0,0 +1,81 @@
+/* Copyright (C) 2020, 2021 CNRS
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "atrstm_c.h"
+#include "atrstm_radcoefs_simd4.h"
+
+#include <astoria/atrtp.h>
+#include <star/suvm.h>
+
+/*******************************************************************************
+ * Local functions
+ ******************************************************************************/
+res_T
+primitive_compute_radcoefs_simd4
+ (const struct atrstm* atrstm,
+ const struct atrri_refractive_index* refract_id,
+ const struct suvm_primitive* prim,
+ const int radcoefs_mask, /* Combination of atrstm_radcoef_flag */
+ struct radcoefs_simd4* radcoefs)
+{
+ struct radcoefs_compute_simd4_args args = RADCOEFS_COMPUTE_SIMD4_ARGS_NULL;
+ struct atrtp_desc desc = ATRTP_DESC_NULL;
+ const double* node[4];
+ res_T res = RES_OK;
+ ASSERT(atrstm && prim && prim->nvertices == 4 && refract_id && radcoefs);
+
+ res = atrtp_get_desc(atrstm->atrtp, &desc);
+ if(res != RES_OK) goto error;
+
+ /* Setup the constant input arguments of the optical properties computation
+ * routine */
+ args.lambda = v4f_set1((float)refract_id->wavelength);
+ args.n = v4f_set1((float)refract_id->n);
+ args.kappa = v4f_set1((float)refract_id->kappa);
+ args.gyration_radius_prefactor = v4f_set1((float)atrstm->gyration_radius_prefactor);
+ args.fractal_dimension = v4f_set1((float)atrstm->fractal_dimension);
+ args.radcoefs_mask = radcoefs_mask;
+
+ /* Fetch the thermodynamic properties of the 4 primitive nodes */
+ node[0] = atrtp_desc_get_node_properties(&desc, prim->indices[0]);
+ node[1] = atrtp_desc_get_node_properties(&desc, prim->indices[1]);
+ node[2] = atrtp_desc_get_node_properties(&desc, prim->indices[2]);
+ node[3] = atrtp_desc_get_node_properties(&desc, prim->indices[3]);
+
+ /* Scatter Soot properties */
+ args.soot_volumic_fraction = v4f_set
+ ((float)node[0][ATRTP_SOOT_VOLFRAC],
+ (float)node[1][ATRTP_SOOT_VOLFRAC],
+ (float)node[2][ATRTP_SOOT_VOLFRAC],
+ (float)node[3][ATRTP_SOOT_VOLFRAC]);
+ args.soot_primary_particles_count = v4f_set
+ ((float)node[0][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT],
+ (float)node[1][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT],
+ (float)node[2][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT],
+ (float)node[3][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT]);
+ args.soot_primary_particles_diameter = v4f_set
+ ((float)node[0][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER],
+ (float)node[1][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER],
+ (float)node[2][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER],
+ (float)node[3][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER]);
+
+ /* Compute the per primitive node optical properties */
+ radcoefs_compute_simd4(radcoefs, &args);
+
+exit:
+ return res;
+error:
+ goto exit;
+}
diff --git a/src/atrstm_radcoefs_simd4.h b/src/atrstm_radcoefs_simd4.h
@@ -0,0 +1,204 @@
+/* Copyright (C) 2020, 2021 CNRS
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef ATRSTM_RADCOEFS_SIMD4_H
+#define ATRSTM_RADCOEFS_SIMD4_H
+
+#include "atrstm.h"
+#include "atrstm_radcoefs.h"
+
+#include <rsimd/math.h>
+#include <rsimd/rsimd.h>
+
+#include <rsys/math.h>
+#include <rsys/rsys.h>
+
+/* Radiative coefficients in m^-1 */
+struct radcoefs_simd4 {
+ v4f_T ka; /* Absorption coefficient */
+ v4f_T ks; /* Scattering coefficient */
+ v4f_T kext; /* Extinction coefficient */
+};
+static const struct radcoefs_simd4 RADCOEFS_SIMD4_NULL;
+
+struct radcoefs_compute_simd4_args {
+ v4f_T lambda; /* wavelength [nm] */
+ v4f_T n; /* Refractive index (real component) */
+ v4f_T kappa; /* Refractive index (imaginary component) */
+ v4f_T gyration_radius_prefactor;
+ v4f_T fractal_dimension;
+ v4f_T soot_volumic_fraction; /* In m^3(soot) / m^3 */
+ v4f_T soot_primary_particles_count;
+ v4f_T soot_primary_particles_diameter; /* In nm */
+ int radcoefs_mask; /* Combination of atrstm_radcoef_flag */
+};
+static const struct radcoefs_compute_simd4_args RADCOEFS_COMPUTE_SIMD4_ARGS_NULL;
+
+/* Forward declarations */
+struct atrstm;
+struct atrri_refractive_index;
+struct suvm_primitive;
+
+extern LOCAL_SYM res_T
+primitive_compute_radcoefs_simd4
+ (const struct atrstm* atrstm,
+ const struct atrri_refractive_index* refract_id,
+ const struct suvm_primitive* prim,
+ const int radcoefs_mask,
+ struct radcoefs_simd4* radcoefs); /* Per node radiative coefficients */
+
+static INLINE res_T
+primitive_compute_radcoefs_range_simd4
+ (const struct atrstm* atrstm,
+ const struct atrri_refractive_index* refract_id,
+ const struct suvm_primitive* prim,
+ struct radcoefs* radcoefs_min,
+ struct radcoefs* radcoefs_max)
+{
+ struct radcoefs_simd4 radcoefs;
+ v4f_T ka[2];
+ v4f_T ks[2];
+ v4f_T kext[2];
+ res_T res = RES_OK;
+ ASSERT(radcoefs_min && radcoefs_max);
+
+ res = primitive_compute_radcoefs_simd4
+ (atrstm, refract_id, prim, ATRSTM_RADCOEFS_MASK_ALL, &radcoefs);
+ if(res != RES_OK) return res;
+
+ ka[0] = v4f_reduce_min(radcoefs.ka);
+ ka[1] = v4f_reduce_max(radcoefs.ka);
+ ks[0] = v4f_reduce_min(radcoefs.ks);
+ ks[1] = v4f_reduce_max(radcoefs.ks);
+ kext[0] = v4f_reduce_min(radcoefs.kext);
+ kext[1] = v4f_reduce_max(radcoefs.kext);
+
+ radcoefs_min->ka = v4f_x(ka[0]);
+ radcoefs_max->ka = v4f_x(ka[1]);
+ radcoefs_min->ks = v4f_x(ks[0]);
+ radcoefs_max->ks = v4f_x(ks[1]);
+ radcoefs_min->kext = v4f_x(kext[0]);
+ radcoefs_max->kext = v4f_x(kext[1]);
+
+ return RES_OK;
+}
+
+/* SIMD version of the radcoefs_compute function. Refer to its scalar version
+ * for informations of what it does */
+static INLINE void
+radcoefs_compute_simd4
+ (struct radcoefs_simd4* radcoefs,
+ const struct radcoefs_compute_simd4_args* args)
+{
+ /* Constant */
+ const v4f_T pi = v4f_set1((float)PI);
+
+ /* Input variables */
+ const v4f_T lambda = args->lambda; /* [nm] */
+ const v4f_T n = args->n;
+ const v4f_T kappa = args->kappa;
+ const v4f_T Fv = args->soot_volumic_fraction; /* [nm^3(soot)/nm] */
+ const v4f_T Np = args->soot_primary_particles_count;
+ const v4f_T Dp = args->soot_primary_particles_diameter; /* [nm] */
+ const v4f_T kf = args->gyration_radius_prefactor;
+ const v4f_T Df = args->fractal_dimension;
+ const int ka = (args->radcoefs_mask & ATRSTM_RADCOEF_FLAG_Ka) != 0;
+ const int ks = (args->radcoefs_mask & ATRSTM_RADCOEF_FLAG_Ks) != 0;
+ const int kext = (args->radcoefs_mask & ATRSTM_RADCOEF_FLAG_Kext) != 0;
+
+ /* SIMD mask */
+ const v4f_T is_Fv_not_null = v4f_neq(Fv, v4f_zero());
+ const v4f_T is_Np_not_null = v4f_neq(Np, v4f_zero());
+ const v4f_T query_radcoefs =
+ v4f_mask1(args->radcoefs_mask & ATRSTM_RADCOEFS_MASK_ALL ? ~0 : 0);
+ const v4f_T mask =
+ v4f_and(v4f_and(is_Fv_not_null, is_Np_not_null), query_radcoefs);
+
+ /* Clean up radiative coefficients */
+ radcoefs->ka = v4f_zero();
+ radcoefs->ks = v4f_zero();
+ radcoefs->kext = v4f_zero();
+
+ if(v4f_movemask(mask) != 0) {
+ /* Precomputed values */
+ const v4f_T n2 = v4f_mul(n, n);
+ const v4f_T kappa2 = v4f_mul(kappa, kappa);
+ const v4f_T four_n2_kappa2 = v4f_mul(v4f_set1(4), v4f_mul(n2, kappa2));
+ const v4f_T xp = v4f_div(v4f_mul(pi, Dp), lambda);
+ const v4f_T xp3 = v4f_mul(v4f_mul(xp, xp), xp);
+ const v4f_T k = v4f_div(v4f_mul(v4f_set1(2), pi), lambda);
+ const v4f_T k2 = v4f_mul(k, k);
+
+ const v4f_T Dp3 = v4f_mul(v4f_mul(Dp, Dp), Dp);
+ const v4f_T Np_pi_Dp3 = v4f_mul(v4f_mul(Np, pi), Dp3);
+ const v4f_T Va = v4f_div(Np_pi_Dp3, v4f_set1(6)); /* [nm^3] */
+ const v4f_T rho = v4f_div(Fv, Va); /* [#aggregates / nm^3] */
+ const v4f_T tmp0 = v4f_mul(v4f_set1(1e9f), rho);
+
+ /* E(m), m = n + i*kappa */
+ const v4f_T tmp1 = v4f_add(v4f_sub(n2, kappa2), v4f_set1(2));
+ const v4f_T denom = v4f_madd(tmp1, tmp1, four_n2_kappa2);
+ const v4f_T six_n_kappa = v4f_mul(v4f_mul(v4f_set1(6), n), kappa);
+ const v4f_T Em = v4f_div(six_n_kappa, denom);
+
+ if(ka || kext) {
+ /* Absorption cross section, [nm^3/aggrefate] */
+ const v4f_T four_pi_xp3 = v4f_mul(v4f_mul(v4f_set1(4), pi), xp3);
+ const v4f_T Cabs = v4f_mul(v4f_mul(Np, v4f_div(four_pi_xp3, k2)), Em);
+
+ /* Absorption coefficient, [m^-1] */
+ radcoefs->ka = v4f_and(v4f_mul(tmp0, Cabs), mask);
+ }
+
+ if(ks || kext) {
+ /* F(m), m = n + i*kappa */
+ const v4f_T n2_sub_kappa2 = v4f_sub(n2, kappa2);
+ const v4f_T n2_sub_kappa2_sub_1 = v4f_sub(n2_sub_kappa2, v4f_set1(1));
+ const v4f_T n2_sub_kappa2_add_2 = v4f_add(n2_sub_kappa2, v4f_set1(2));
+ const v4f_T tmp2 = v4f_mul(n2_sub_kappa2_sub_1, n2_sub_kappa2_add_2);
+ const v4f_T tmp3 = v4f_div(v4f_add(tmp2, four_n2_kappa2), denom);
+ const v4f_T Fm = v4f_madd(tmp3, tmp3, v4f_mul(Em, Em));
+
+ /* Gyration radius */
+ const v4f_T tmp4 = v4f_pow(v4f_div(Np, kf), v4f_rcp(Df));
+ const v4f_T Rg = v4f_mul(v4f_mul(v4f_set1(0.5f), Dp), tmp4); /* [nm] */
+ const v4f_T Rg2 = v4f_mul(Rg, Rg);
+ const v4f_T four_k2_Rg2 = v4f_mul(v4f_mul(v4f_set1(4), k2), Rg2);
+ const v4f_T tmp5 = v4f_div(four_k2_Rg2, v4f_mul(v4f_set1(3), Df));
+ const v4f_T tmp6 = v4f_add(v4f_set1(1), tmp5);
+ const v4f_T g = v4f_pow(tmp6, v4f_mul(Df, v4f_set1(-0.5f)));
+
+ /* Scattering cross section, [nm^3/aggrefate] */
+ const v4f_T Np2 = v4f_mul(Np, Np);
+ const v4f_T xp6 = v4f_mul(xp3, xp3);
+ const v4f_T eight_pi_xp6 = v4f_mul(v4f_mul(v4f_set1(8), pi), xp6);
+ const v4f_T tmp7 = v4f_div(eight_pi_xp6, v4f_mul(v4f_set1(3), k2));
+ const v4f_T Csca = v4f_mul(v4f_mul(v4f_mul(Np2, tmp7), Fm), g);
+
+ /* Scattering coefficient, [m^-1] */
+ radcoefs->ks = v4f_and(v4f_mul(tmp0, Csca), mask);
+ }
+
+ if(kext) {
+ radcoefs->kext = v4f_add(radcoefs->ka, radcoefs->ks);
+ }
+
+ ASSERT(v4f_movemask(mask) && v4f_movemask(v4f_neq(denom, v4f_zero())));
+ ASSERT(v4f_movemask(mask) && v4f_movemask(v4f_neq(Df, v4f_zero())));
+ ASSERT(v4f_movemask(mask) && v4f_movemask(v4f_neq(Dp, v4f_zero())));
+ }
+}
+
+#endif /* ATRSTM_RADCOEFS_SIMD4_H */
diff --git a/src/atrstm_setup_octrees.c b/src/atrstm_setup_octrees.c
@@ -23,6 +23,10 @@
#include "atrstm_setup_octrees.h"
#include "atrstm_svx.h"
+#ifdef ATRSTM_USE_SIMD
+ #include "atrstm_radcoefs_simd4.h"
+#endif
+
#include <astoria/atrri.h>
#include <star/suvm.h>
@@ -234,8 +238,13 @@ voxelize_partition
voxel_commit_radcoefs_range
(vox, ATRSTM_CPNT_GAS, &radcoefs_min, &radcoefs_max);
+#ifdef ATRSTM_USE_SIMD
+ res = primitive_compute_radcoefs_range_simd4
+ (atrstm, refract_id, &prim, &radcoefs_min, &radcoefs_max);
+#else
res = primitive_compute_radcoefs_range
(atrstm, refract_id, &prim, &radcoefs_min, &radcoefs_max);
+#endif
if(UNLIKELY(res != RES_OK)) goto error;
voxel_commit_radcoefs_range
(vox, ATRSTM_CPNT_SOOT, &radcoefs_min, &radcoefs_max);
diff --git a/src/test_atrstm_radcoefs.c b/src/test_atrstm_radcoefs.c
@@ -21,7 +21,7 @@ main(int argc, char** argv)
const double ka_ref = 5.7382401729092799E-1;
const double ks_ref = 7.2169062018378995E-6;
- struct radcoefs radcoefs;
+ struct radcoefs radcoefs = RADCOEFS_NULL;
struct radcoefs_compute_args args = RADCOEFS_COMPUTE_ARGS_NULL;
(void)argc, (void)argv;
@@ -60,5 +60,20 @@ main(int argc, char** argv)
radcoefs_compute(&radcoefs, &args);
CHK(eq_eps(radcoefs.kext, ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
+ args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL;
+
+ args.soot_primary_particles_count = 0;
+ radcoefs_compute(&radcoefs, &args);
+ CHK(radcoefs.ka == 0);
+ CHK(radcoefs.ks == 0);
+ CHK(radcoefs.kext == 0);
+
+ args.soot_primary_particles_count = 100;
+ args.soot_volumic_fraction = 0;
+ radcoefs_compute(&radcoefs, &args);
+ CHK(radcoefs.ka == 0);
+ CHK(radcoefs.ks == 0);
+ CHK(radcoefs.kext == 0);
+
return 0;
}
diff --git a/src/test_atrstm_radcoefs_simd.c b/src/test_atrstm_radcoefs_simd.c
@@ -0,0 +1,151 @@
+/* Copyright (C) 2020, 2021 CNRS
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "atrstm_radcoefs_simd4.c"
+
+int
+main(int argc, char** argv)
+{
+ const double ka_ref = 5.7382401729092799E-1;
+ const double ks_ref = 7.2169062018378995E-6;
+
+ const double ka_ref2[4] = {
+ 0.52178067472799794,
+ 0.52178067472799794,
+ 1.0435613494559959,
+ 0.52178067472799794
+ };
+ const double ks_ref2[4] = {
+ 9.6010140939975883e-002,
+ 3.3272961678492224e-002,
+ 0.19202028187995177,
+ 9.9964602374815484e-002
+ };
+ struct radcoefs_simd4 radcoefs = RADCOEFS_SIMD4_NULL;
+ struct radcoefs_compute_simd4_args args = RADCOEFS_COMPUTE_SIMD4_ARGS_NULL;
+ float ALIGN(16) ka[4], ks[4], kext[4];
+ (void)argc, (void)argv;
+
+ args.lambda = v4f_set1(633.f);
+ args.n = v4f_set1(1.90f);
+ args.kappa = v4f_set1(0.55f);
+ args.gyration_radius_prefactor = v4f_set1(1.70f);
+ args.fractal_dimension = v4f_set1(1.75f);
+ args.soot_volumic_fraction = v4f_set(1e-7f, 0.f, 1e-7f, 1e-7f);
+ args.soot_primary_particles_count = v4f_set(100.f, 100.f, 0.f, 100.f);
+ args.soot_primary_particles_diameter = v4f_set1(1.f);
+ args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL;
+
+ radcoefs_compute_simd4(&radcoefs, &args);
+
+ v4f_store(ka, radcoefs.ka);
+ v4f_store(ks, radcoefs.ks);
+ v4f_store(kext, radcoefs.kext);
+ printf("ka = {%g, %g, %g, %g}; ks = {%g, %g, %g, %g}\n",
+ SPLIT4(ka), SPLIT4(ks));
+
+ CHK(eq_eps(ka[0], ka_ref, ka_ref*1.e-6));
+ CHK(eq_eps(ka[3], ka_ref, ka_ref*1.e-6));
+ CHK(ka[1] == 0);
+ CHK(ka[2] == 0);
+
+ CHK(eq_eps(ks[0], ks_ref, ks_ref*1.e-6));
+ CHK(eq_eps(ks[3], ks_ref, ks_ref*1.e-6));
+ CHK(ks[1] == 0);
+ CHK(ks[2] == 0);
+
+ CHK(eq_eps(kext[0], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
+ CHK(eq_eps(kext[3], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
+ CHK(kext[1] == 0);
+ CHK(kext[2] == 0);
+
+ args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Ka;
+ radcoefs_compute_simd4(&radcoefs, &args);
+ v4f_store(ka, radcoefs.ka);
+ v4f_store(ks, radcoefs.ks);
+ v4f_store(kext, radcoefs.kext);
+ CHK(eq_eps(ka[0], ka_ref, ka_ref*1.e-6));
+ CHK(eq_eps(ka[3], ka_ref, ka_ref*1.e-6));
+ CHK(ka[1] == 0);
+ CHK(ka[2] == 0);
+ CHK(ks[0] == 0 && ks[1] == 0 && ks[2] == 0 && ks[0] == 0);
+ CHK(kext[0] == 0 && kext[1] == 0 && kext[2] == 0 && kext[0] == 0);
+
+ args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Ks;
+ radcoefs_compute_simd4(&radcoefs, &args);
+ v4f_store(ka, radcoefs.ka);
+ v4f_store(ks, radcoefs.ks);
+ v4f_store(kext, radcoefs.kext);
+ CHK(eq_eps(ks[0], ks_ref, ks_ref*1.e-6));
+ CHK(eq_eps(ks[3], ks_ref, ks_ref*1.e-6));
+ CHK(ks[1] == 0);
+ CHK(ks[2] == 0);
+ CHK(ka[0] == 0 && ka[1] == 0 && ka[2] == 0 && ka[0] == 0);
+ CHK(kext[0] == 0 && kext[1] == 0 && kext[2] == 0 && kext[0] == 0);
+
+ /* Note that actually even though Ka and Ks are note required they are
+ * internally computed to evaluate kext and are returned to the caller. Their
+ * value are thus not null */
+ args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Kext;
+ radcoefs_compute_simd4(&radcoefs, &args);
+ v4f_store(kext, radcoefs.kext);
+ CHK(eq_eps(kext[0], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
+ CHK(eq_eps(kext[3], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
+ CHK(kext[1] == 0);
+ CHK(kext[2] == 0);
+
+ args.lambda = v4f_set1(633.f);
+ args.n = v4f_set1(1.75f);
+ args.kappa = v4f_set1(0.435f);
+ args.gyration_radius_prefactor = v4f_set1(1.70f);
+ args.fractal_dimension = v4f_set1(1.75f);
+ args.soot_volumic_fraction = v4f_set
+ (9.9999999999999995e-008f,
+ 9.9999999999999995e-008f,
+ 1.9999999999999999e-007f,
+ 9.9999999999999995e-008f);
+ args.soot_primary_particles_count = v4f_set
+ (400.f,
+ 400.f,
+ 400.f,
+ 800.f);
+ args.soot_primary_particles_diameter = v4f_set
+ (34.000000006413003f,
+ 17.000000003206502f,
+ 34.000000006413003f,
+ 34.000000006413003f);
+ args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL;
+ radcoefs_compute_simd4(&radcoefs, &args);
+ v4f_store(ka, radcoefs.ka);
+ v4f_store(ks, radcoefs.ks);
+ v4f_store(kext, radcoefs.kext);
+ printf("ka = {%g, %g, %g, %g}; ks = {%g, %g, %g, %g}\n",
+ SPLIT4(ka), SPLIT4(ks));
+ CHK(eq_eps(ka[0], ka_ref2[0], ka_ref2[0]*1.e-6));
+ CHK(eq_eps(ka[1], ka_ref2[1], ka_ref2[1]*1.e-6));
+ CHK(eq_eps(ka[2], ka_ref2[2], ka_ref2[2]*1.e-6));
+ CHK(eq_eps(ka[3], ka_ref2[3], ka_ref2[3]*1.e-6));
+ CHK(eq_eps(ks[0], ks_ref2[0], ks_ref2[0]*1.e-6));
+ CHK(eq_eps(ks[1], ks_ref2[1], ks_ref2[1]*1.e-6));
+ CHK(eq_eps(ks[2], ks_ref2[2], ks_ref2[2]*1.e-6));
+ CHK(eq_eps(ks[3], ks_ref2[3], ks_ref2[3]*1.e-6));
+ CHK(eq_eps(kext[0], ka_ref2[0]+ks_ref2[0], (ka_ref2[0]+ks_ref2[0])*1.e-6));
+ CHK(eq_eps(kext[1], ka_ref2[1]+ks_ref2[1], (ka_ref2[1]+ks_ref2[1])*1.e-6));
+ CHK(eq_eps(kext[2], ka_ref2[2]+ks_ref2[2], (ka_ref2[2]+ks_ref2[2])*1.e-6));
+ CHK(eq_eps(kext[3], ka_ref2[3]+ks_ref2[3], (ka_ref2[3]+ks_ref2[3])*1.e-6));
+
+ return 0;
+}
+