star-4v_s

An invariant property of diffuse random walks
git clone git://git.meso-star.fr/star-4v_s.git
Log | Files | Refs | README | LICENSE

commit a3536fc67d27b7ee3b4a1b7c9267d2642ea414ce
parent 64be232e3a407e2d5a38b3a521b387e093b2ebd8
Author: Christophe Coustet <christophe.coustet@meso-star.com>
Date:   Thu, 10 Mar 2016 16:58:15 +0100

OpenMP parallelization

Diffstat:
Mcmake/CMakeLists.txt | 9+++++++++
Msrc/s4vs.c | 64+++++++++++++++++++++++++++++++++++++++++++++++-----------------
2 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt @@ -34,12 +34,17 @@ set(S4VS_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../src/) ################################################################################ # Check dependencies ################################################################################ +find_package(OpenMP) find_package(RCMake 0.2 REQUIRED) find_package(RSys 0.2.1 REQUIRED) find_package(Star3D 0.3 REQUIRED) find_package(Star3DAW 0.1.2 REQUIRED) find_package(StarSP 0.2 REQUIRED) +if(NOT OPENMP_FOUND) + message(STATUS "No OpenMP support: muti-threading is disabled") +endif() + include_directories( ${RSys_INCLUDE_DIR} ${Star3D_INCLUDE_DIR} @@ -65,6 +70,10 @@ set(S4VS_FILES_DOC COPYING.fr COPYING.en README.md) rcmake_prepend_path(S4VS_FILES_SRC ${S4VS_SOURCE_DIR}) rcmake_prepend_path(S4VS_FILES_DOC ${PROJECT_SOURCE_DIR}/../) +if(OPENMP_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") +endif() + if(CMAKE_COMPILER_IS_GNUCC) set(MATH_LIB m) endif() diff --git a/src/s4vs.c b/src/s4vs.c @@ -34,6 +34,8 @@ #include <star/s3daw.h> #include <star/ssp.h> +#include <omp.h> + /* Maximum number of failures before an error occurs */ #define MAX_FAILURES 10 @@ -46,7 +48,7 @@ struct context { double ks; double sum; double sum2; - ATOMIC nfailures; + int nfailures; char exit_failure; }; @@ -55,7 +57,7 @@ realization(struct ssp_rng* rng, struct context* ctx) { struct s3d_attrib attrib; struct s3d_primitive prim; - double lambda; + double lambda, sum = 0, sum2 = 0; float normal[3], direction[4], origin[3], range[2], st[2]; struct s3d_hit hit; int nfailures = 0; @@ -104,12 +106,12 @@ realization(struct ssp_rng* rng, struct context* ctx) for(;;) { lambda = ssp_ran_exp(rng, ctx->ks); if(lambda >= hit.distance) { - ctx->sum += hit.distance; - ctx->sum2 += hit.distance * hit.distance; + sum += hit.distance; + sum2 += hit.distance * hit.distance; break; } else { - ctx->sum += lambda; - ctx->sum2 += lambda * lambda; + sum += lambda; + sum2 += lambda * lambda; f3_add(origin, origin, f3_mulf(direction, direction, (float)lambda)); do { @@ -128,7 +130,12 @@ realization(struct ssp_rng* rng, struct context* ctx) } while(S3D_HIT_NONE(&hit)); } } - ATOMIC_ADD(&ctx->nfailures, nfailures); +#pragma omp atomic update + ctx->sum += sum; +#pragma omp atomic update + ctx->sum2 += sum2; +#pragma omp atomic update + ctx->nfailures += nfailures; } static res_T @@ -136,13 +143,15 @@ compute_4v_s(struct s3d_scene* scene, const size_t max_steps, const double ks) { char buf[512]; struct time begin, end, elapsed; - struct s3d_shape* shape = NULL; + struct s3d_shape* shape; struct context ctx; double sig, length, tmp; float S, V; res_T res = RES_OK; - struct ssp_rng* rng = NULL; + struct ssp_rng* rng; + struct ssp_rng_proxy* proxy; unsigned i; + size_t nb_t, me; ASSERT(max_steps && ks > 0.0); @@ -178,11 +187,29 @@ compute_4v_s(struct s3d_scene* scene, const size_t max_steps, const double ks) ctx.sum = 0; ctx.sum2 = 0; - CHECK(ssp_rng_create(NULL, &ssp_rng_threefry, &rng), RES_OK); + /* be prepared to OpenMP absence */ + nb_t = 1; + me = 0; + +#pragma omp parallel private(me, rng) + { +#ifdef _OPENMP + me = (size_t)omp_get_thread_num(); +#endif +#pragma omp single + { + nb_t = (size_t)omp_get_num_threads(); + CHECK(ssp_rng_proxy_create(NULL, &ssp_rng_threefry, nb_t, &proxy), RES_OK); + } + +#pragma omp barrier + CHECK(ssp_rng_proxy_create_rng(proxy, me, &rng), RES_OK); - time_current(&begin); - for (i = 0; i < max_steps; i++) { - realization(rng, &ctx); + time_current(&begin); +#pragma omp for + for (i = 0; i < max_steps; i++) { + realization(rng, &ctx); + } } length = ctx.sum / (double)max_steps; tmp = ctx.sum2 / (double)max_steps - length * length; @@ -201,10 +228,13 @@ compute_4v_s(struct s3d_scene* scene, const size_t max_steps, const double ks) } logger_print(LOGGER_DEFAULT, LOG_OUTPUT, - "\n4V/S = %g ~ %g +/- %g - # failures: %lu/%lu\nElapsed time: %s\n", - 4.0*V/S, length, sig, - (unsigned long)(ATOMIC_GET(&ctx.nfailures)), max_steps, - buf); + "\n4V/S = %g ~ %g +/- %g\n# failures: %d/%lu\nElapsed time: %s\n", + 4.0*V/S, length, sig, ctx.nfailures, max_steps, buf); + +#ifdef _OPENMP + logger_print(LOGGER_DEFAULT, LOG_OUTPUT, + "On %d threads\n", omp_get_num_threads()); +#endif exit: if(scene) {