atrstm

Load and structure a combustion gas mixture
git clone git://git.meso-star.fr/atrstm.git
Log | Files | Refs | README | LICENSE

commit dabafcbc621834b06af3f74677405aef2315ef33
parent 83e34981cd05b07d2b156c24cfa911f442822251
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Mon,  1 Feb 2021 14:48:57 +0100

Add the primitive_compute_radcoefs_[range]_simd4 functions

Setup the CMakeFile to optionally support SIMD instruction sets.

Diffstat:
Mcmake/CMakeLists.txt | 37++++++++++++++++++++++++++++++++-----
Asrc/atrstm_radcoefs_simd4.c | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/atrstm_radcoefs_simd4.h | 54++++++++++++++++++++++++++++++++++++++++++++++++++++--
Msrc/atrstm_setup_octrees.c | 9+++++++++
Msrc/test_atrstm_radcoefs.c | 147++-----------------------------------------------------------------------------
Asrc/test_atrstm_radcoefs_simd.c | 151++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 328 insertions(+), 151 deletions(-)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2020 CNRS +# Copyright (C) 2020, 2021 CNRS # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -28,10 +28,19 @@ find_package(AtrTP 0.0 REQUIRED) find_package(OpenMP 1.2 REQUIRED) find_package(RCMake 0.4 REQUIRED) find_package(RSys 0.12 REQUIRED) -find_package(RSIMD 0.3 REQUIRED) find_package(StarTetraHedra 0.0 REQUIRED) find_package(StarUVM 0.0 REQUIRED) find_package(StarVX 0.2 REQUIRED) +find_package(RSIMD 0.3) +if(RSIMD_FOUND) + option(USE_SIMD "Use SIMD instruction sets" ON) +endif() + +if(USE_SIMD) + message(STATUS "Use SIMD instruction sets") +else() + message(STATUS "Do not use SIMD instruction sets") +endif() set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${RCMAKE_SOURCE_DIR}) include(rcmake) @@ -70,7 +79,6 @@ set(ATRSTM_FILES_INC atrstm_log.h atrstm_partition.h atrstm_radcoefs.h - atrstm_radcoefs_simd4.h atrstm_setup_octrees.h atrstm_svx.h) @@ -79,14 +87,30 @@ set(ATRSTM_FILES_INC_API set(ATRSTM_FILES_DOC COPYING README.md) +# SIMD files +if(USE_SIMD) + set(ATRSTM_FILES_SRC ${ATRSTM_FILES_SRC} atrstm_radcoefs_simd4.c) + set(ATRSTM_FILES_INC ${ATRSTM_FILES_INC} atrstm_radcoefs_simd4.h) +endif() + # Prepend each file in the `ATRSTM_FILES_<SRC|INC>' list by `ATRSTM_SOURCE_DIR' rcmake_prepend_path(ATRSTM_FILES_SRC ${ATRSTM_SOURCE_DIR}) rcmake_prepend_path(ATRSTM_FILES_INC ${ATRSTM_SOURCE_DIR}) rcmake_prepend_path(ATRSTM_FILES_INC_API ${ATRSTM_SOURCE_DIR}) rcmake_prepend_path(ATRSTM_FILES_DOC ${PROJECT_SOURCE_DIR}/../) -add_library(atrstm SHARED ${ATRSTM_FILES_SRC} ${ATRSTM_FILES_INC} ${ATRSTM_FILES_INC_API}) -target_link_libraries(atrstm AtrRI AtrTP RSys RSIMD StarTetraHedra StarUVM StarVX m) +add_library(atrstm SHARED + ${ATRSTM_FILES_SRC} + ${ATRSTM_FILES_INC} + ${ATRSTM_FILES_INC_API}) +target_link_libraries(atrstm AtrRI AtrTP RSys StarTetraHedra StarUVM StarVX m) + +# Setup SIMD support on the target +if(USE_SIMD) + target_link_libraries(atrstm RSIMD) + set_target_properties(atrstm PROPERTIES + COMPILE_DEFINITIONS ATRSTM_USE_SIMD) +endif() if(CMAKE_COMPILER_IS_GNUCC) set_target_properties(atrstm PROPERTIES LINK_FLAGS "${OpenMP_C_FLAGS}") @@ -117,6 +141,9 @@ if(NOT NO_TEST) build_test(test_atrstm) new_test(test_atrstm_radcoefs) + if(USE_SIMD) + new_test(test_atrstm_radcoefs_simd) + endif() endif() ################################################################################ diff --git a/src/atrstm_radcoefs_simd4.c b/src/atrstm_radcoefs_simd4.c @@ -0,0 +1,81 @@ +/* Copyright (C) 2020, 2021 CNRS + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include "atrstm_c.h" +#include "atrstm_radcoefs_simd4.h" + +#include <astoria/atrtp.h> +#include <star/suvm.h> + +/******************************************************************************* + * Local functions + ******************************************************************************/ +res_T +primitive_compute_radcoefs_simd4 + (const struct atrstm* atrstm, + const struct atrri_refractive_index* refract_id, + const struct suvm_primitive* prim, + const int radcoefs_mask, /* Combination of atrstm_radcoef_flag */ + struct radcoefs_simd4* radcoefs) +{ + struct radcoefs_compute_simd4_args args = RADCOEFS_COMPUTE_SIMD4_ARGS_NULL; + struct atrtp_desc desc = ATRTP_DESC_NULL; + const double* node[4]; + res_T res = RES_OK; + ASSERT(atrstm && prim && prim->nvertices == 4 && refract_id && radcoefs); + + res = atrtp_get_desc(atrstm->atrtp, &desc); + if(res != RES_OK) goto error; + + /* Setup the constant input arguments of the optical properties computation + * routine */ + args.lambda = v4f_set1((float)refract_id->wavelength); + args.n = v4f_set1((float)refract_id->n); + args.kappa = v4f_set1((float)refract_id->kappa); + args.gyration_radius_prefactor = v4f_set1((float)atrstm->gyration_radius_prefactor); + args.fractal_dimension = v4f_set1((float)atrstm->fractal_dimension); + args.radcoefs_mask = radcoefs_mask; + + /* Fetch the thermodynamic properties of the 4 primitive nodes */ + node[0] = atrtp_desc_get_node_properties(&desc, prim->indices[0]); + node[1] = atrtp_desc_get_node_properties(&desc, prim->indices[1]); + node[2] = atrtp_desc_get_node_properties(&desc, prim->indices[2]); + node[3] = atrtp_desc_get_node_properties(&desc, prim->indices[3]); + + /* Scatter Soot properties */ + args.soot_volumic_fraction = v4f_set + ((float)node[0][ATRTP_SOOT_VOLFRAC], + (float)node[1][ATRTP_SOOT_VOLFRAC], + (float)node[2][ATRTP_SOOT_VOLFRAC], + (float)node[3][ATRTP_SOOT_VOLFRAC]); + args.soot_primary_particles_count = v4f_set + ((float)node[0][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT], + (float)node[1][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT], + (float)node[2][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT], + (float)node[3][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT]); + args.soot_primary_particles_diameter = v4f_set + ((float)node[0][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER], + (float)node[1][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER], + (float)node[2][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER], + (float)node[3][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER]); + + /* Compute the per primitive node optical properties */ + radcoefs_compute_simd4(radcoefs, &args); + +exit: + return res; +error: + goto exit; +} diff --git a/src/atrstm_radcoefs_simd4.h b/src/atrstm_radcoefs_simd4.h @@ -17,6 +17,7 @@ #define ATRSTM_RADCOEFS_SIMD4_H #include "atrstm.h" +#include "atrstm_radcoefs.h" #include <rsimd/math.h> #include <rsimd/rsimd.h> @@ -45,8 +46,57 @@ struct radcoefs_compute_simd4_args { }; static const struct radcoefs_compute_simd4_args RADCOEFS_COMPUTE_SIMD4_ARGS_NULL; -/* SIMD version of the radcoefs_compute function. Please refer to its scalar - * version for informations of what it does */ +/* Forward declarations */ +struct atrstm; +struct atrri_refractive_index; +struct suvm_primitive; + +extern LOCAL_SYM res_T +primitive_compute_radcoefs_simd4 + (const struct atrstm* atrstm, + const struct atrri_refractive_index* refract_id, + const struct suvm_primitive* prim, + const int radcoefs_mask, + struct radcoefs_simd4* radcoefs); /* Per node radiative coefficients */ + +static INLINE res_T +primitive_compute_radcoefs_range_simd4 + (const struct atrstm* atrstm, + const struct atrri_refractive_index* refract_id, + const struct suvm_primitive* prim, + struct radcoefs* radcoefs_min, + struct radcoefs* radcoefs_max) +{ + struct radcoefs_simd4 radcoefs; + v4f_T ka[2]; + v4f_T ks[2]; + v4f_T kext[2]; + res_T res = RES_OK; + ASSERT(radcoefs_min && radcoefs_max); + + res = primitive_compute_radcoefs_simd4 + (atrstm, refract_id, prim, ATRSTM_RADCOEFS_MASK_ALL, &radcoefs); + if(res != RES_OK) return res; + + ka[0] = v4f_reduce_min(radcoefs.ka); + ka[1] = v4f_reduce_max(radcoefs.ka); + ks[0] = v4f_reduce_min(radcoefs.ks); + ks[1] = v4f_reduce_max(radcoefs.ks); + kext[0] = v4f_reduce_min(radcoefs.kext); + kext[1] = v4f_reduce_max(radcoefs.kext); + + radcoefs_min->ka = v4f_x(ka[0]); + radcoefs_max->ka = v4f_x(ka[1]); + radcoefs_min->ks = v4f_x(ks[0]); + radcoefs_max->ks = v4f_x(ks[1]); + radcoefs_min->kext = v4f_x(kext[0]); + radcoefs_max->kext = v4f_x(kext[1]); + + return RES_OK; +} + +/* SIMD version of the radcoefs_compute function. Refer to its scalar version + * for informations of what it does */ static INLINE void radcoefs_compute_simd4 (struct radcoefs_simd4* radcoefs, diff --git a/src/atrstm_setup_octrees.c b/src/atrstm_setup_octrees.c @@ -23,6 +23,10 @@ #include "atrstm_setup_octrees.h" #include "atrstm_svx.h" +#ifdef ATRSTM_USE_SIMD + #include "atrstm_radcoefs_simd4.h" +#endif + #include <astoria/atrri.h> #include <star/suvm.h> @@ -234,8 +238,13 @@ voxelize_partition voxel_commit_radcoefs_range (vox, ATRSTM_CPNT_GAS, &radcoefs_min, &radcoefs_max); +#ifdef ATRSTM_USE_SIMD + res = primitive_compute_radcoefs_range_simd4 + (atrstm, refract_id, &prim, &radcoefs_min, &radcoefs_max); +#else res = primitive_compute_radcoefs_range (atrstm, refract_id, &prim, &radcoefs_min, &radcoefs_max); +#endif if(UNLIKELY(res != RES_OK)) goto error; voxel_commit_radcoefs_range (vox, ATRSTM_CPNT_SOOT, &radcoefs_min, &radcoefs_max); diff --git a/src/test_atrstm_radcoefs.c b/src/test_atrstm_radcoefs.c @@ -14,16 +14,16 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include "atrstm_radcoefs.h" -#include "atrstm_radcoefs_simd4.h" -static void -test_scalar(void) +int +main(int argc, char** argv) { const double ka_ref = 5.7382401729092799E-1; const double ks_ref = 7.2169062018378995E-6; struct radcoefs radcoefs = RADCOEFS_NULL; struct radcoefs_compute_args args = RADCOEFS_COMPUTE_ARGS_NULL; + (void)argc, (void)argv; args.lambda = 633; args.n = 1.90; @@ -74,147 +74,6 @@ test_scalar(void) CHK(radcoefs.ka == 0); CHK(radcoefs.ks == 0); CHK(radcoefs.kext == 0); -} - -static void -test_simd(void) -{ - const double ka_ref = 5.7382401729092799E-1; - const double ks_ref = 7.2169062018378995E-6; - - const double ka_ref2[4] = { - 0.52178067472799794, - 0.52178067472799794, - 1.0435613494559959, - 0.52178067472799794 - }; - const double ks_ref2[4] = { - 9.6010140939975883e-002, - 3.3272961678492224e-002, - 0.19202028187995177, - 9.9964602374815484e-002 - }; - struct radcoefs_simd4 radcoefs = RADCOEFS_SIMD4_NULL; - struct radcoefs_compute_simd4_args args = RADCOEFS_COMPUTE_SIMD4_ARGS_NULL; - float ALIGN(16) ka[4], ks[4], kext[4]; - - args.lambda = v4f_set1(633.f); - args.n = v4f_set1(1.90f); - args.kappa = v4f_set1(0.55f); - args.gyration_radius_prefactor = v4f_set1(1.70f); - args.fractal_dimension = v4f_set1(1.75f); - args.soot_volumic_fraction = v4f_set(1e-7f, 0.f, 1e-7f, 1e-7f); - args.soot_primary_particles_count = v4f_set(100.f, 100.f, 0.f, 100.f); - args.soot_primary_particles_diameter = v4f_set1(1.f); - args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL; - - radcoefs_compute_simd4(&radcoefs, &args); - - v4f_store(ka, radcoefs.ka); - v4f_store(ks, radcoefs.ks); - v4f_store(kext, radcoefs.kext); - printf("ka = {%g, %g, %g, %g}; ks = {%g, %g, %g, %g}\n", - SPLIT4(ka), SPLIT4(ks)); - - CHK(eq_eps(ka[0], ka_ref, ka_ref*1.e-6)); - CHK(eq_eps(ka[3], ka_ref, ka_ref*1.e-6)); - CHK(ka[1] == 0); - CHK(ka[2] == 0); - - CHK(eq_eps(ks[0], ks_ref, ks_ref*1.e-6)); - CHK(eq_eps(ks[3], ks_ref, ks_ref*1.e-6)); - CHK(ks[1] == 0); - CHK(ks[2] == 0); - - CHK(eq_eps(kext[0], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6)); - CHK(eq_eps(kext[3], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6)); - CHK(kext[1] == 0); - CHK(kext[2] == 0); - - args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Ka; - radcoefs_compute_simd4(&radcoefs, &args); - v4f_store(ka, radcoefs.ka); - v4f_store(ks, radcoefs.ks); - v4f_store(kext, radcoefs.kext); - CHK(eq_eps(ka[0], ka_ref, ka_ref*1.e-6)); - CHK(eq_eps(ka[3], ka_ref, ka_ref*1.e-6)); - CHK(ka[1] == 0); - CHK(ka[2] == 0); - CHK(ks[0] == 0 && ks[1] == 0 && ks[2] == 0 && ks[0] == 0); - CHK(kext[0] == 0 && kext[1] == 0 && kext[2] == 0 && kext[0] == 0); - - args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Ks; - radcoefs_compute_simd4(&radcoefs, &args); - v4f_store(ka, radcoefs.ka); - v4f_store(ks, radcoefs.ks); - v4f_store(kext, radcoefs.kext); - CHK(eq_eps(ks[0], ks_ref, ks_ref*1.e-6)); - CHK(eq_eps(ks[3], ks_ref, ks_ref*1.e-6)); - CHK(ks[1] == 0); - CHK(ks[2] == 0); - CHK(ka[0] == 0 && ka[1] == 0 && ka[2] == 0 && ka[0] == 0); - CHK(kext[0] == 0 && kext[1] == 0 && kext[2] == 0 && kext[0] == 0); - - /* Note that actually even though Ka and Ks are note required they are - * internally computed to evaluate kext and are returned to the caller. Their - * value are thus not null */ - args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Kext; - radcoefs_compute_simd4(&radcoefs, &args); - v4f_store(kext, radcoefs.kext); - CHK(eq_eps(kext[0], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6)); - CHK(eq_eps(kext[3], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6)); - CHK(kext[1] == 0); - CHK(kext[2] == 0); - - args.lambda = v4f_set1(633.f); - args.n = v4f_set1(1.75f); - args.kappa = v4f_set1(0.435f); - args.gyration_radius_prefactor = v4f_set1(1.70f); - args.fractal_dimension = v4f_set1(1.75f); - args.soot_volumic_fraction = v4f_set - (9.9999999999999995e-008f, - 9.9999999999999995e-008f, - 1.9999999999999999e-007f, - 9.9999999999999995e-008f); - args.soot_primary_particles_count = v4f_set - (400.f, - 400.f, - 400.f, - 800.f); - args.soot_primary_particles_diameter = v4f_set - (34.000000006413003f, - 17.000000003206502f, - 34.000000006413003f, - 34.000000006413003f); - args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL; - radcoefs_compute_simd4(&radcoefs, &args); - v4f_store(ka, radcoefs.ka); - v4f_store(ks, radcoefs.ks); - v4f_store(kext, radcoefs.kext); - printf("ka = {%g, %g, %g, %g}; ks = {%g, %g, %g, %g}\n", - SPLIT4(ka), SPLIT4(ks)); - CHK(eq_eps(ka[0], ka_ref2[0], ka_ref2[0]*1.e-6)); - CHK(eq_eps(ka[1], ka_ref2[1], ka_ref2[1]*1.e-6)); - CHK(eq_eps(ka[2], ka_ref2[2], ka_ref2[2]*1.e-6)); - CHK(eq_eps(ka[3], ka_ref2[3], ka_ref2[3]*1.e-6)); - CHK(eq_eps(ks[0], ks_ref2[0], ks_ref2[0]*1.e-6)); - CHK(eq_eps(ks[1], ks_ref2[1], ks_ref2[1]*1.e-6)); - CHK(eq_eps(ks[2], ks_ref2[2], ks_ref2[2]*1.e-6)); - CHK(eq_eps(ks[3], ks_ref2[3], ks_ref2[3]*1.e-6)); - CHK(eq_eps(kext[0], ka_ref2[0]+ks_ref2[0], (ka_ref2[0]+ks_ref2[0])*1.e-6)); - CHK(eq_eps(kext[1], ka_ref2[1]+ks_ref2[1], (ka_ref2[1]+ks_ref2[1])*1.e-6)); - CHK(eq_eps(kext[2], ka_ref2[2]+ks_ref2[2], (ka_ref2[2]+ks_ref2[2])*1.e-6)); - CHK(eq_eps(kext[3], ka_ref2[3]+ks_ref2[3], (ka_ref2[3]+ks_ref2[3])*1.e-6)); -} - - -int -main(int argc, char** argv) -{ - (void)argc, (void)argv; - - test_scalar(); - test_simd(); return 0; } diff --git a/src/test_atrstm_radcoefs_simd.c b/src/test_atrstm_radcoefs_simd.c @@ -0,0 +1,151 @@ +/* Copyright (C) 2020, 2021 CNRS + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include "atrstm_radcoefs_simd4.c" + +int +main(int argc, char** argv) +{ + const double ka_ref = 5.7382401729092799E-1; + const double ks_ref = 7.2169062018378995E-6; + + const double ka_ref2[4] = { + 0.52178067472799794, + 0.52178067472799794, + 1.0435613494559959, + 0.52178067472799794 + }; + const double ks_ref2[4] = { + 9.6010140939975883e-002, + 3.3272961678492224e-002, + 0.19202028187995177, + 9.9964602374815484e-002 + }; + struct radcoefs_simd4 radcoefs = RADCOEFS_SIMD4_NULL; + struct radcoefs_compute_simd4_args args = RADCOEFS_COMPUTE_SIMD4_ARGS_NULL; + float ALIGN(16) ka[4], ks[4], kext[4]; + (void)argc, (void)argv; + + args.lambda = v4f_set1(633.f); + args.n = v4f_set1(1.90f); + args.kappa = v4f_set1(0.55f); + args.gyration_radius_prefactor = v4f_set1(1.70f); + args.fractal_dimension = v4f_set1(1.75f); + args.soot_volumic_fraction = v4f_set(1e-7f, 0.f, 1e-7f, 1e-7f); + args.soot_primary_particles_count = v4f_set(100.f, 100.f, 0.f, 100.f); + args.soot_primary_particles_diameter = v4f_set1(1.f); + args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL; + + radcoefs_compute_simd4(&radcoefs, &args); + + v4f_store(ka, radcoefs.ka); + v4f_store(ks, radcoefs.ks); + v4f_store(kext, radcoefs.kext); + printf("ka = {%g, %g, %g, %g}; ks = {%g, %g, %g, %g}\n", + SPLIT4(ka), SPLIT4(ks)); + + CHK(eq_eps(ka[0], ka_ref, ka_ref*1.e-6)); + CHK(eq_eps(ka[3], ka_ref, ka_ref*1.e-6)); + CHK(ka[1] == 0); + CHK(ka[2] == 0); + + CHK(eq_eps(ks[0], ks_ref, ks_ref*1.e-6)); + CHK(eq_eps(ks[3], ks_ref, ks_ref*1.e-6)); + CHK(ks[1] == 0); + CHK(ks[2] == 0); + + CHK(eq_eps(kext[0], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6)); + CHK(eq_eps(kext[3], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6)); + CHK(kext[1] == 0); + CHK(kext[2] == 0); + + args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Ka; + radcoefs_compute_simd4(&radcoefs, &args); + v4f_store(ka, radcoefs.ka); + v4f_store(ks, radcoefs.ks); + v4f_store(kext, radcoefs.kext); + CHK(eq_eps(ka[0], ka_ref, ka_ref*1.e-6)); + CHK(eq_eps(ka[3], ka_ref, ka_ref*1.e-6)); + CHK(ka[1] == 0); + CHK(ka[2] == 0); + CHK(ks[0] == 0 && ks[1] == 0 && ks[2] == 0 && ks[0] == 0); + CHK(kext[0] == 0 && kext[1] == 0 && kext[2] == 0 && kext[0] == 0); + + args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Ks; + radcoefs_compute_simd4(&radcoefs, &args); + v4f_store(ka, radcoefs.ka); + v4f_store(ks, radcoefs.ks); + v4f_store(kext, radcoefs.kext); + CHK(eq_eps(ks[0], ks_ref, ks_ref*1.e-6)); + CHK(eq_eps(ks[3], ks_ref, ks_ref*1.e-6)); + CHK(ks[1] == 0); + CHK(ks[2] == 0); + CHK(ka[0] == 0 && ka[1] == 0 && ka[2] == 0 && ka[0] == 0); + CHK(kext[0] == 0 && kext[1] == 0 && kext[2] == 0 && kext[0] == 0); + + /* Note that actually even though Ka and Ks are note required they are + * internally computed to evaluate kext and are returned to the caller. Their + * value are thus not null */ + args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Kext; + radcoefs_compute_simd4(&radcoefs, &args); + v4f_store(kext, radcoefs.kext); + CHK(eq_eps(kext[0], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6)); + CHK(eq_eps(kext[3], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6)); + CHK(kext[1] == 0); + CHK(kext[2] == 0); + + args.lambda = v4f_set1(633.f); + args.n = v4f_set1(1.75f); + args.kappa = v4f_set1(0.435f); + args.gyration_radius_prefactor = v4f_set1(1.70f); + args.fractal_dimension = v4f_set1(1.75f); + args.soot_volumic_fraction = v4f_set + (9.9999999999999995e-008f, + 9.9999999999999995e-008f, + 1.9999999999999999e-007f, + 9.9999999999999995e-008f); + args.soot_primary_particles_count = v4f_set + (400.f, + 400.f, + 400.f, + 800.f); + args.soot_primary_particles_diameter = v4f_set + (34.000000006413003f, + 17.000000003206502f, + 34.000000006413003f, + 34.000000006413003f); + args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL; + radcoefs_compute_simd4(&radcoefs, &args); + v4f_store(ka, radcoefs.ka); + v4f_store(ks, radcoefs.ks); + v4f_store(kext, radcoefs.kext); + printf("ka = {%g, %g, %g, %g}; ks = {%g, %g, %g, %g}\n", + SPLIT4(ka), SPLIT4(ks)); + CHK(eq_eps(ka[0], ka_ref2[0], ka_ref2[0]*1.e-6)); + CHK(eq_eps(ka[1], ka_ref2[1], ka_ref2[1]*1.e-6)); + CHK(eq_eps(ka[2], ka_ref2[2], ka_ref2[2]*1.e-6)); + CHK(eq_eps(ka[3], ka_ref2[3], ka_ref2[3]*1.e-6)); + CHK(eq_eps(ks[0], ks_ref2[0], ks_ref2[0]*1.e-6)); + CHK(eq_eps(ks[1], ks_ref2[1], ks_ref2[1]*1.e-6)); + CHK(eq_eps(ks[2], ks_ref2[2], ks_ref2[2]*1.e-6)); + CHK(eq_eps(ks[3], ks_ref2[3], ks_ref2[3]*1.e-6)); + CHK(eq_eps(kext[0], ka_ref2[0]+ks_ref2[0], (ka_ref2[0]+ks_ref2[0])*1.e-6)); + CHK(eq_eps(kext[1], ka_ref2[1]+ks_ref2[1], (ka_ref2[1]+ks_ref2[1])*1.e-6)); + CHK(eq_eps(kext[2], ka_ref2[2]+ks_ref2[2], (ka_ref2[2]+ks_ref2[2])*1.e-6)); + CHK(eq_eps(kext[3], ka_ref2[3]+ks_ref2[3], (ka_ref2[3]+ks_ref2[3])*1.e-6)); + + return 0; +} +