commit dabafcbc621834b06af3f74677405aef2315ef33
parent 83e34981cd05b07d2b156c24cfa911f442822251
Author: Vincent Forest <vincent.forest@meso-star.com>
Date: Mon, 1 Feb 2021 14:48:57 +0100
Add the primitive_compute_radcoefs_[range]_simd4 functions
Setup the CMakeFile to optionally support SIMD instruction sets.
Diffstat:
6 files changed, 328 insertions(+), 151 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 CNRS
+# Copyright (C) 2020, 2021 CNRS
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -28,10 +28,19 @@ find_package(AtrTP 0.0 REQUIRED)
find_package(OpenMP 1.2 REQUIRED)
find_package(RCMake 0.4 REQUIRED)
find_package(RSys 0.12 REQUIRED)
-find_package(RSIMD 0.3 REQUIRED)
find_package(StarTetraHedra 0.0 REQUIRED)
find_package(StarUVM 0.0 REQUIRED)
find_package(StarVX 0.2 REQUIRED)
+find_package(RSIMD 0.3)
+if(RSIMD_FOUND)
+ option(USE_SIMD "Use SIMD instruction sets" ON)
+endif()
+
+if(USE_SIMD)
+ message(STATUS "Use SIMD instruction sets")
+else()
+ message(STATUS "Do not use SIMD instruction sets")
+endif()
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${RCMAKE_SOURCE_DIR})
include(rcmake)
@@ -70,7 +79,6 @@ set(ATRSTM_FILES_INC
atrstm_log.h
atrstm_partition.h
atrstm_radcoefs.h
- atrstm_radcoefs_simd4.h
atrstm_setup_octrees.h
atrstm_svx.h)
@@ -79,14 +87,30 @@ set(ATRSTM_FILES_INC_API
set(ATRSTM_FILES_DOC COPYING README.md)
+# SIMD files
+if(USE_SIMD)
+ set(ATRSTM_FILES_SRC ${ATRSTM_FILES_SRC} atrstm_radcoefs_simd4.c)
+ set(ATRSTM_FILES_INC ${ATRSTM_FILES_INC} atrstm_radcoefs_simd4.h)
+endif()
+
# Prepend each file in the `ATRSTM_FILES_<SRC|INC>' list by `ATRSTM_SOURCE_DIR'
rcmake_prepend_path(ATRSTM_FILES_SRC ${ATRSTM_SOURCE_DIR})
rcmake_prepend_path(ATRSTM_FILES_INC ${ATRSTM_SOURCE_DIR})
rcmake_prepend_path(ATRSTM_FILES_INC_API ${ATRSTM_SOURCE_DIR})
rcmake_prepend_path(ATRSTM_FILES_DOC ${PROJECT_SOURCE_DIR}/../)
-add_library(atrstm SHARED ${ATRSTM_FILES_SRC} ${ATRSTM_FILES_INC} ${ATRSTM_FILES_INC_API})
-target_link_libraries(atrstm AtrRI AtrTP RSys RSIMD StarTetraHedra StarUVM StarVX m)
+add_library(atrstm SHARED
+ ${ATRSTM_FILES_SRC}
+ ${ATRSTM_FILES_INC}
+ ${ATRSTM_FILES_INC_API})
+target_link_libraries(atrstm AtrRI AtrTP RSys StarTetraHedra StarUVM StarVX m)
+
+# Setup SIMD support on the target
+if(USE_SIMD)
+ target_link_libraries(atrstm RSIMD)
+ set_target_properties(atrstm PROPERTIES
+ COMPILE_DEFINITIONS ATRSTM_USE_SIMD)
+endif()
if(CMAKE_COMPILER_IS_GNUCC)
set_target_properties(atrstm PROPERTIES LINK_FLAGS "${OpenMP_C_FLAGS}")
@@ -117,6 +141,9 @@ if(NOT NO_TEST)
build_test(test_atrstm)
new_test(test_atrstm_radcoefs)
+ if(USE_SIMD)
+ new_test(test_atrstm_radcoefs_simd)
+ endif()
endif()
################################################################################
diff --git a/src/atrstm_radcoefs_simd4.c b/src/atrstm_radcoefs_simd4.c
@@ -0,0 +1,81 @@
+/* Copyright (C) 2020, 2021 CNRS
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "atrstm_c.h"
+#include "atrstm_radcoefs_simd4.h"
+
+#include <astoria/atrtp.h>
+#include <star/suvm.h>
+
+/*******************************************************************************
+ * Local functions
+ ******************************************************************************/
+res_T
+primitive_compute_radcoefs_simd4
+ (const struct atrstm* atrstm,
+ const struct atrri_refractive_index* refract_id,
+ const struct suvm_primitive* prim,
+ const int radcoefs_mask, /* Combination of atrstm_radcoef_flag */
+ struct radcoefs_simd4* radcoefs)
+{
+ struct radcoefs_compute_simd4_args args = RADCOEFS_COMPUTE_SIMD4_ARGS_NULL;
+ struct atrtp_desc desc = ATRTP_DESC_NULL;
+ const double* node[4];
+ res_T res = RES_OK;
+ ASSERT(atrstm && prim && prim->nvertices == 4 && refract_id && radcoefs);
+
+ res = atrtp_get_desc(atrstm->atrtp, &desc);
+ if(res != RES_OK) goto error;
+
+ /* Setup the constant input arguments of the optical properties computation
+ * routine */
+ args.lambda = v4f_set1((float)refract_id->wavelength);
+ args.n = v4f_set1((float)refract_id->n);
+ args.kappa = v4f_set1((float)refract_id->kappa);
+ args.gyration_radius_prefactor = v4f_set1((float)atrstm->gyration_radius_prefactor);
+ args.fractal_dimension = v4f_set1((float)atrstm->fractal_dimension);
+ args.radcoefs_mask = radcoefs_mask;
+
+ /* Fetch the thermodynamic properties of the 4 primitive nodes */
+ node[0] = atrtp_desc_get_node_properties(&desc, prim->indices[0]);
+ node[1] = atrtp_desc_get_node_properties(&desc, prim->indices[1]);
+ node[2] = atrtp_desc_get_node_properties(&desc, prim->indices[2]);
+ node[3] = atrtp_desc_get_node_properties(&desc, prim->indices[3]);
+
+ /* Scatter Soot properties */
+ args.soot_volumic_fraction = v4f_set
+ ((float)node[0][ATRTP_SOOT_VOLFRAC],
+ (float)node[1][ATRTP_SOOT_VOLFRAC],
+ (float)node[2][ATRTP_SOOT_VOLFRAC],
+ (float)node[3][ATRTP_SOOT_VOLFRAC]);
+ args.soot_primary_particles_count = v4f_set
+ ((float)node[0][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT],
+ (float)node[1][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT],
+ (float)node[2][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT],
+ (float)node[3][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT]);
+ args.soot_primary_particles_diameter = v4f_set
+ ((float)node[0][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER],
+ (float)node[1][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER],
+ (float)node[2][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER],
+ (float)node[3][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER]);
+
+ /* Compute the per primitive node optical properties */
+ radcoefs_compute_simd4(radcoefs, &args);
+
+exit:
+ return res;
+error:
+ goto exit;
+}
diff --git a/src/atrstm_radcoefs_simd4.h b/src/atrstm_radcoefs_simd4.h
@@ -17,6 +17,7 @@
#define ATRSTM_RADCOEFS_SIMD4_H
#include "atrstm.h"
+#include "atrstm_radcoefs.h"
#include <rsimd/math.h>
#include <rsimd/rsimd.h>
@@ -45,8 +46,57 @@ struct radcoefs_compute_simd4_args {
};
static const struct radcoefs_compute_simd4_args RADCOEFS_COMPUTE_SIMD4_ARGS_NULL;
-/* SIMD version of the radcoefs_compute function. Please refer to its scalar
- * version for informations of what it does */
+/* Forward declarations */
+struct atrstm;
+struct atrri_refractive_index;
+struct suvm_primitive;
+
+extern LOCAL_SYM res_T
+primitive_compute_radcoefs_simd4
+ (const struct atrstm* atrstm,
+ const struct atrri_refractive_index* refract_id,
+ const struct suvm_primitive* prim,
+ const int radcoefs_mask,
+ struct radcoefs_simd4* radcoefs); /* Per node radiative coefficients */
+
+static INLINE res_T
+primitive_compute_radcoefs_range_simd4
+ (const struct atrstm* atrstm,
+ const struct atrri_refractive_index* refract_id,
+ const struct suvm_primitive* prim,
+ struct radcoefs* radcoefs_min,
+ struct radcoefs* radcoefs_max)
+{
+ struct radcoefs_simd4 radcoefs;
+ v4f_T ka[2];
+ v4f_T ks[2];
+ v4f_T kext[2];
+ res_T res = RES_OK;
+ ASSERT(radcoefs_min && radcoefs_max);
+
+ res = primitive_compute_radcoefs_simd4
+ (atrstm, refract_id, prim, ATRSTM_RADCOEFS_MASK_ALL, &radcoefs);
+ if(res != RES_OK) return res;
+
+ ka[0] = v4f_reduce_min(radcoefs.ka);
+ ka[1] = v4f_reduce_max(radcoefs.ka);
+ ks[0] = v4f_reduce_min(radcoefs.ks);
+ ks[1] = v4f_reduce_max(radcoefs.ks);
+ kext[0] = v4f_reduce_min(radcoefs.kext);
+ kext[1] = v4f_reduce_max(radcoefs.kext);
+
+ radcoefs_min->ka = v4f_x(ka[0]);
+ radcoefs_max->ka = v4f_x(ka[1]);
+ radcoefs_min->ks = v4f_x(ks[0]);
+ radcoefs_max->ks = v4f_x(ks[1]);
+ radcoefs_min->kext = v4f_x(kext[0]);
+ radcoefs_max->kext = v4f_x(kext[1]);
+
+ return RES_OK;
+}
+
+/* SIMD version of the radcoefs_compute function. Refer to its scalar version
+ * for informations of what it does */
static INLINE void
radcoefs_compute_simd4
(struct radcoefs_simd4* radcoefs,
diff --git a/src/atrstm_setup_octrees.c b/src/atrstm_setup_octrees.c
@@ -23,6 +23,10 @@
#include "atrstm_setup_octrees.h"
#include "atrstm_svx.h"
+#ifdef ATRSTM_USE_SIMD
+ #include "atrstm_radcoefs_simd4.h"
+#endif
+
#include <astoria/atrri.h>
#include <star/suvm.h>
@@ -234,8 +238,13 @@ voxelize_partition
voxel_commit_radcoefs_range
(vox, ATRSTM_CPNT_GAS, &radcoefs_min, &radcoefs_max);
+#ifdef ATRSTM_USE_SIMD
+ res = primitive_compute_radcoefs_range_simd4
+ (atrstm, refract_id, &prim, &radcoefs_min, &radcoefs_max);
+#else
res = primitive_compute_radcoefs_range
(atrstm, refract_id, &prim, &radcoefs_min, &radcoefs_max);
+#endif
if(UNLIKELY(res != RES_OK)) goto error;
voxel_commit_radcoefs_range
(vox, ATRSTM_CPNT_SOOT, &radcoefs_min, &radcoefs_max);
diff --git a/src/test_atrstm_radcoefs.c b/src/test_atrstm_radcoefs.c
@@ -14,16 +14,16 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include "atrstm_radcoefs.h"
-#include "atrstm_radcoefs_simd4.h"
-static void
-test_scalar(void)
+int
+main(int argc, char** argv)
{
const double ka_ref = 5.7382401729092799E-1;
const double ks_ref = 7.2169062018378995E-6;
struct radcoefs radcoefs = RADCOEFS_NULL;
struct radcoefs_compute_args args = RADCOEFS_COMPUTE_ARGS_NULL;
+ (void)argc, (void)argv;
args.lambda = 633;
args.n = 1.90;
@@ -74,147 +74,6 @@ test_scalar(void)
CHK(radcoefs.ka == 0);
CHK(radcoefs.ks == 0);
CHK(radcoefs.kext == 0);
-}
-
-static void
-test_simd(void)
-{
- const double ka_ref = 5.7382401729092799E-1;
- const double ks_ref = 7.2169062018378995E-6;
-
- const double ka_ref2[4] = {
- 0.52178067472799794,
- 0.52178067472799794,
- 1.0435613494559959,
- 0.52178067472799794
- };
- const double ks_ref2[4] = {
- 9.6010140939975883e-002,
- 3.3272961678492224e-002,
- 0.19202028187995177,
- 9.9964602374815484e-002
- };
- struct radcoefs_simd4 radcoefs = RADCOEFS_SIMD4_NULL;
- struct radcoefs_compute_simd4_args args = RADCOEFS_COMPUTE_SIMD4_ARGS_NULL;
- float ALIGN(16) ka[4], ks[4], kext[4];
-
- args.lambda = v4f_set1(633.f);
- args.n = v4f_set1(1.90f);
- args.kappa = v4f_set1(0.55f);
- args.gyration_radius_prefactor = v4f_set1(1.70f);
- args.fractal_dimension = v4f_set1(1.75f);
- args.soot_volumic_fraction = v4f_set(1e-7f, 0.f, 1e-7f, 1e-7f);
- args.soot_primary_particles_count = v4f_set(100.f, 100.f, 0.f, 100.f);
- args.soot_primary_particles_diameter = v4f_set1(1.f);
- args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL;
-
- radcoefs_compute_simd4(&radcoefs, &args);
-
- v4f_store(ka, radcoefs.ka);
- v4f_store(ks, radcoefs.ks);
- v4f_store(kext, radcoefs.kext);
- printf("ka = {%g, %g, %g, %g}; ks = {%g, %g, %g, %g}\n",
- SPLIT4(ka), SPLIT4(ks));
-
- CHK(eq_eps(ka[0], ka_ref, ka_ref*1.e-6));
- CHK(eq_eps(ka[3], ka_ref, ka_ref*1.e-6));
- CHK(ka[1] == 0);
- CHK(ka[2] == 0);
-
- CHK(eq_eps(ks[0], ks_ref, ks_ref*1.e-6));
- CHK(eq_eps(ks[3], ks_ref, ks_ref*1.e-6));
- CHK(ks[1] == 0);
- CHK(ks[2] == 0);
-
- CHK(eq_eps(kext[0], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
- CHK(eq_eps(kext[3], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
- CHK(kext[1] == 0);
- CHK(kext[2] == 0);
-
- args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Ka;
- radcoefs_compute_simd4(&radcoefs, &args);
- v4f_store(ka, radcoefs.ka);
- v4f_store(ks, radcoefs.ks);
- v4f_store(kext, radcoefs.kext);
- CHK(eq_eps(ka[0], ka_ref, ka_ref*1.e-6));
- CHK(eq_eps(ka[3], ka_ref, ka_ref*1.e-6));
- CHK(ka[1] == 0);
- CHK(ka[2] == 0);
- CHK(ks[0] == 0 && ks[1] == 0 && ks[2] == 0 && ks[0] == 0);
- CHK(kext[0] == 0 && kext[1] == 0 && kext[2] == 0 && kext[0] == 0);
-
- args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Ks;
- radcoefs_compute_simd4(&radcoefs, &args);
- v4f_store(ka, radcoefs.ka);
- v4f_store(ks, radcoefs.ks);
- v4f_store(kext, radcoefs.kext);
- CHK(eq_eps(ks[0], ks_ref, ks_ref*1.e-6));
- CHK(eq_eps(ks[3], ks_ref, ks_ref*1.e-6));
- CHK(ks[1] == 0);
- CHK(ks[2] == 0);
- CHK(ka[0] == 0 && ka[1] == 0 && ka[2] == 0 && ka[0] == 0);
- CHK(kext[0] == 0 && kext[1] == 0 && kext[2] == 0 && kext[0] == 0);
-
- /* Note that actually even though Ka and Ks are note required they are
- * internally computed to evaluate kext and are returned to the caller. Their
- * value are thus not null */
- args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Kext;
- radcoefs_compute_simd4(&radcoefs, &args);
- v4f_store(kext, radcoefs.kext);
- CHK(eq_eps(kext[0], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
- CHK(eq_eps(kext[3], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
- CHK(kext[1] == 0);
- CHK(kext[2] == 0);
-
- args.lambda = v4f_set1(633.f);
- args.n = v4f_set1(1.75f);
- args.kappa = v4f_set1(0.435f);
- args.gyration_radius_prefactor = v4f_set1(1.70f);
- args.fractal_dimension = v4f_set1(1.75f);
- args.soot_volumic_fraction = v4f_set
- (9.9999999999999995e-008f,
- 9.9999999999999995e-008f,
- 1.9999999999999999e-007f,
- 9.9999999999999995e-008f);
- args.soot_primary_particles_count = v4f_set
- (400.f,
- 400.f,
- 400.f,
- 800.f);
- args.soot_primary_particles_diameter = v4f_set
- (34.000000006413003f,
- 17.000000003206502f,
- 34.000000006413003f,
- 34.000000006413003f);
- args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL;
- radcoefs_compute_simd4(&radcoefs, &args);
- v4f_store(ka, radcoefs.ka);
- v4f_store(ks, radcoefs.ks);
- v4f_store(kext, radcoefs.kext);
- printf("ka = {%g, %g, %g, %g}; ks = {%g, %g, %g, %g}\n",
- SPLIT4(ka), SPLIT4(ks));
- CHK(eq_eps(ka[0], ka_ref2[0], ka_ref2[0]*1.e-6));
- CHK(eq_eps(ka[1], ka_ref2[1], ka_ref2[1]*1.e-6));
- CHK(eq_eps(ka[2], ka_ref2[2], ka_ref2[2]*1.e-6));
- CHK(eq_eps(ka[3], ka_ref2[3], ka_ref2[3]*1.e-6));
- CHK(eq_eps(ks[0], ks_ref2[0], ks_ref2[0]*1.e-6));
- CHK(eq_eps(ks[1], ks_ref2[1], ks_ref2[1]*1.e-6));
- CHK(eq_eps(ks[2], ks_ref2[2], ks_ref2[2]*1.e-6));
- CHK(eq_eps(ks[3], ks_ref2[3], ks_ref2[3]*1.e-6));
- CHK(eq_eps(kext[0], ka_ref2[0]+ks_ref2[0], (ka_ref2[0]+ks_ref2[0])*1.e-6));
- CHK(eq_eps(kext[1], ka_ref2[1]+ks_ref2[1], (ka_ref2[1]+ks_ref2[1])*1.e-6));
- CHK(eq_eps(kext[2], ka_ref2[2]+ks_ref2[2], (ka_ref2[2]+ks_ref2[2])*1.e-6));
- CHK(eq_eps(kext[3], ka_ref2[3]+ks_ref2[3], (ka_ref2[3]+ks_ref2[3])*1.e-6));
-}
-
-
-int
-main(int argc, char** argv)
-{
- (void)argc, (void)argv;
-
- test_scalar();
- test_simd();
return 0;
}
diff --git a/src/test_atrstm_radcoefs_simd.c b/src/test_atrstm_radcoefs_simd.c
@@ -0,0 +1,151 @@
+/* Copyright (C) 2020, 2021 CNRS
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "atrstm_radcoefs_simd4.c"
+
+int
+main(int argc, char** argv)
+{
+ const double ka_ref = 5.7382401729092799E-1;
+ const double ks_ref = 7.2169062018378995E-6;
+
+ const double ka_ref2[4] = {
+ 0.52178067472799794,
+ 0.52178067472799794,
+ 1.0435613494559959,
+ 0.52178067472799794
+ };
+ const double ks_ref2[4] = {
+ 9.6010140939975883e-002,
+ 3.3272961678492224e-002,
+ 0.19202028187995177,
+ 9.9964602374815484e-002
+ };
+ struct radcoefs_simd4 radcoefs = RADCOEFS_SIMD4_NULL;
+ struct radcoefs_compute_simd4_args args = RADCOEFS_COMPUTE_SIMD4_ARGS_NULL;
+ float ALIGN(16) ka[4], ks[4], kext[4];
+ (void)argc, (void)argv;
+
+ args.lambda = v4f_set1(633.f);
+ args.n = v4f_set1(1.90f);
+ args.kappa = v4f_set1(0.55f);
+ args.gyration_radius_prefactor = v4f_set1(1.70f);
+ args.fractal_dimension = v4f_set1(1.75f);
+ args.soot_volumic_fraction = v4f_set(1e-7f, 0.f, 1e-7f, 1e-7f);
+ args.soot_primary_particles_count = v4f_set(100.f, 100.f, 0.f, 100.f);
+ args.soot_primary_particles_diameter = v4f_set1(1.f);
+ args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL;
+
+ radcoefs_compute_simd4(&radcoefs, &args);
+
+ v4f_store(ka, radcoefs.ka);
+ v4f_store(ks, radcoefs.ks);
+ v4f_store(kext, radcoefs.kext);
+ printf("ka = {%g, %g, %g, %g}; ks = {%g, %g, %g, %g}\n",
+ SPLIT4(ka), SPLIT4(ks));
+
+ CHK(eq_eps(ka[0], ka_ref, ka_ref*1.e-6));
+ CHK(eq_eps(ka[3], ka_ref, ka_ref*1.e-6));
+ CHK(ka[1] == 0);
+ CHK(ka[2] == 0);
+
+ CHK(eq_eps(ks[0], ks_ref, ks_ref*1.e-6));
+ CHK(eq_eps(ks[3], ks_ref, ks_ref*1.e-6));
+ CHK(ks[1] == 0);
+ CHK(ks[2] == 0);
+
+ CHK(eq_eps(kext[0], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
+ CHK(eq_eps(kext[3], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
+ CHK(kext[1] == 0);
+ CHK(kext[2] == 0);
+
+ args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Ka;
+ radcoefs_compute_simd4(&radcoefs, &args);
+ v4f_store(ka, radcoefs.ka);
+ v4f_store(ks, radcoefs.ks);
+ v4f_store(kext, radcoefs.kext);
+ CHK(eq_eps(ka[0], ka_ref, ka_ref*1.e-6));
+ CHK(eq_eps(ka[3], ka_ref, ka_ref*1.e-6));
+ CHK(ka[1] == 0);
+ CHK(ka[2] == 0);
+ CHK(ks[0] == 0 && ks[1] == 0 && ks[2] == 0 && ks[0] == 0);
+ CHK(kext[0] == 0 && kext[1] == 0 && kext[2] == 0 && kext[0] == 0);
+
+ args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Ks;
+ radcoefs_compute_simd4(&radcoefs, &args);
+ v4f_store(ka, radcoefs.ka);
+ v4f_store(ks, radcoefs.ks);
+ v4f_store(kext, radcoefs.kext);
+ CHK(eq_eps(ks[0], ks_ref, ks_ref*1.e-6));
+ CHK(eq_eps(ks[3], ks_ref, ks_ref*1.e-6));
+ CHK(ks[1] == 0);
+ CHK(ks[2] == 0);
+ CHK(ka[0] == 0 && ka[1] == 0 && ka[2] == 0 && ka[0] == 0);
+ CHK(kext[0] == 0 && kext[1] == 0 && kext[2] == 0 && kext[0] == 0);
+
+ /* Note that actually even though Ka and Ks are note required they are
+ * internally computed to evaluate kext and are returned to the caller. Their
+ * value are thus not null */
+ args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Kext;
+ radcoefs_compute_simd4(&radcoefs, &args);
+ v4f_store(kext, radcoefs.kext);
+ CHK(eq_eps(kext[0], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
+ CHK(eq_eps(kext[3], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6));
+ CHK(kext[1] == 0);
+ CHK(kext[2] == 0);
+
+ args.lambda = v4f_set1(633.f);
+ args.n = v4f_set1(1.75f);
+ args.kappa = v4f_set1(0.435f);
+ args.gyration_radius_prefactor = v4f_set1(1.70f);
+ args.fractal_dimension = v4f_set1(1.75f);
+ args.soot_volumic_fraction = v4f_set
+ (9.9999999999999995e-008f,
+ 9.9999999999999995e-008f,
+ 1.9999999999999999e-007f,
+ 9.9999999999999995e-008f);
+ args.soot_primary_particles_count = v4f_set
+ (400.f,
+ 400.f,
+ 400.f,
+ 800.f);
+ args.soot_primary_particles_diameter = v4f_set
+ (34.000000006413003f,
+ 17.000000003206502f,
+ 34.000000006413003f,
+ 34.000000006413003f);
+ args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL;
+ radcoefs_compute_simd4(&radcoefs, &args);
+ v4f_store(ka, radcoefs.ka);
+ v4f_store(ks, radcoefs.ks);
+ v4f_store(kext, radcoefs.kext);
+ printf("ka = {%g, %g, %g, %g}; ks = {%g, %g, %g, %g}\n",
+ SPLIT4(ka), SPLIT4(ks));
+ CHK(eq_eps(ka[0], ka_ref2[0], ka_ref2[0]*1.e-6));
+ CHK(eq_eps(ka[1], ka_ref2[1], ka_ref2[1]*1.e-6));
+ CHK(eq_eps(ka[2], ka_ref2[2], ka_ref2[2]*1.e-6));
+ CHK(eq_eps(ka[3], ka_ref2[3], ka_ref2[3]*1.e-6));
+ CHK(eq_eps(ks[0], ks_ref2[0], ks_ref2[0]*1.e-6));
+ CHK(eq_eps(ks[1], ks_ref2[1], ks_ref2[1]*1.e-6));
+ CHK(eq_eps(ks[2], ks_ref2[2], ks_ref2[2]*1.e-6));
+ CHK(eq_eps(ks[3], ks_ref2[3], ks_ref2[3]*1.e-6));
+ CHK(eq_eps(kext[0], ka_ref2[0]+ks_ref2[0], (ka_ref2[0]+ks_ref2[0])*1.e-6));
+ CHK(eq_eps(kext[1], ka_ref2[1]+ks_ref2[1], (ka_ref2[1]+ks_ref2[1])*1.e-6));
+ CHK(eq_eps(kext[2], ka_ref2[2]+ks_ref2[2], (ka_ref2[2]+ks_ref2[2])*1.e-6));
+ CHK(eq_eps(kext[3], ka_ref2[3]+ks_ref2[3], (ka_ref2[3]+ks_ref2[3])*1.e-6));
+
+ return 0;
+}
+