commit a3536fc67d27b7ee3b4a1b7c9267d2642ea414ce
parent 64be232e3a407e2d5a38b3a521b387e093b2ebd8
Author: Christophe Coustet <christophe.coustet@meso-star.com>
Date: Thu, 10 Mar 2016 16:58:15 +0100
OpenMP parallelization
Diffstat:
2 files changed, 56 insertions(+), 17 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -34,12 +34,17 @@ set(S4VS_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../src/)
################################################################################
# Check dependencies
################################################################################
+find_package(OpenMP)
find_package(RCMake 0.2 REQUIRED)
find_package(RSys 0.2.1 REQUIRED)
find_package(Star3D 0.3 REQUIRED)
find_package(Star3DAW 0.1.2 REQUIRED)
find_package(StarSP 0.2 REQUIRED)
+if(NOT OPENMP_FOUND)
+ message(STATUS "No OpenMP support: muti-threading is disabled")
+endif()
+
include_directories(
${RSys_INCLUDE_DIR}
${Star3D_INCLUDE_DIR}
@@ -65,6 +70,10 @@ set(S4VS_FILES_DOC COPYING.fr COPYING.en README.md)
rcmake_prepend_path(S4VS_FILES_SRC ${S4VS_SOURCE_DIR})
rcmake_prepend_path(S4VS_FILES_DOC ${PROJECT_SOURCE_DIR}/../)
+if(OPENMP_FOUND)
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+endif()
+
if(CMAKE_COMPILER_IS_GNUCC)
set(MATH_LIB m)
endif()
diff --git a/src/s4vs.c b/src/s4vs.c
@@ -34,6 +34,8 @@
#include <star/s3daw.h>
#include <star/ssp.h>
+#include <omp.h>
+
/* Maximum number of failures before an error occurs */
#define MAX_FAILURES 10
@@ -46,7 +48,7 @@ struct context {
double ks;
double sum;
double sum2;
- ATOMIC nfailures;
+ int nfailures;
char exit_failure;
};
@@ -55,7 +57,7 @@ realization(struct ssp_rng* rng, struct context* ctx)
{
struct s3d_attrib attrib;
struct s3d_primitive prim;
- double lambda;
+ double lambda, sum = 0, sum2 = 0;
float normal[3], direction[4], origin[3], range[2], st[2];
struct s3d_hit hit;
int nfailures = 0;
@@ -104,12 +106,12 @@ realization(struct ssp_rng* rng, struct context* ctx)
for(;;) {
lambda = ssp_ran_exp(rng, ctx->ks);
if(lambda >= hit.distance) {
- ctx->sum += hit.distance;
- ctx->sum2 += hit.distance * hit.distance;
+ sum += hit.distance;
+ sum2 += hit.distance * hit.distance;
break;
} else {
- ctx->sum += lambda;
- ctx->sum2 += lambda * lambda;
+ sum += lambda;
+ sum2 += lambda * lambda;
f3_add(origin, origin, f3_mulf(direction, direction, (float)lambda));
do {
@@ -128,7 +130,12 @@ realization(struct ssp_rng* rng, struct context* ctx)
} while(S3D_HIT_NONE(&hit));
}
}
- ATOMIC_ADD(&ctx->nfailures, nfailures);
+#pragma omp atomic update
+ ctx->sum += sum;
+#pragma omp atomic update
+ ctx->sum2 += sum2;
+#pragma omp atomic update
+ ctx->nfailures += nfailures;
}
static res_T
@@ -136,13 +143,15 @@ compute_4v_s(struct s3d_scene* scene, const size_t max_steps, const double ks)
{
char buf[512];
struct time begin, end, elapsed;
- struct s3d_shape* shape = NULL;
+ struct s3d_shape* shape;
struct context ctx;
double sig, length, tmp;
float S, V;
res_T res = RES_OK;
- struct ssp_rng* rng = NULL;
+ struct ssp_rng* rng;
+ struct ssp_rng_proxy* proxy;
unsigned i;
+ size_t nb_t, me;
ASSERT(max_steps && ks > 0.0);
@@ -178,11 +187,29 @@ compute_4v_s(struct s3d_scene* scene, const size_t max_steps, const double ks)
ctx.sum = 0;
ctx.sum2 = 0;
- CHECK(ssp_rng_create(NULL, &ssp_rng_threefry, &rng), RES_OK);
+ /* be prepared to OpenMP absence */
+ nb_t = 1;
+ me = 0;
+
+#pragma omp parallel private(me, rng)
+ {
+#ifdef _OPENMP
+ me = (size_t)omp_get_thread_num();
+#endif
+#pragma omp single
+ {
+ nb_t = (size_t)omp_get_num_threads();
+ CHECK(ssp_rng_proxy_create(NULL, &ssp_rng_threefry, nb_t, &proxy), RES_OK);
+ }
+
+#pragma omp barrier
+ CHECK(ssp_rng_proxy_create_rng(proxy, me, &rng), RES_OK);
- time_current(&begin);
- for (i = 0; i < max_steps; i++) {
- realization(rng, &ctx);
+ time_current(&begin);
+#pragma omp for
+ for (i = 0; i < max_steps; i++) {
+ realization(rng, &ctx);
+ }
}
length = ctx.sum / (double)max_steps;
tmp = ctx.sum2 / (double)max_steps - length * length;
@@ -201,10 +228,13 @@ compute_4v_s(struct s3d_scene* scene, const size_t max_steps, const double ks)
}
logger_print(LOGGER_DEFAULT, LOG_OUTPUT,
- "\n4V/S = %g ~ %g +/- %g - # failures: %lu/%lu\nElapsed time: %s\n",
- 4.0*V/S, length, sig,
- (unsigned long)(ATOMIC_GET(&ctx.nfailures)), max_steps,
- buf);
+ "\n4V/S = %g ~ %g +/- %g\n# failures: %d/%lu\nElapsed time: %s\n",
+ 4.0*V/S, length, sig, ctx.nfailures, max_steps, buf);
+
+#ifdef _OPENMP
+ logger_print(LOGGER_DEFAULT, LOG_OUTPUT,
+ "On %d threads\n", omp_get_num_threads());
+#endif
exit:
if(scene) {