commit e15fba476cf5869bfb47e10a3d03ea497de1a6b9
parent 922814aeb8a1977f27ee6897a5160d2c957e71da
Author: Vincent Forest <vincent.forest@meso-star.com>
Date: Wed, 7 Feb 2024 16:50:29 +0100
Add the sdis_solve_probe_boundary_list function
Unlike its single-probe counterpart, this function will parallelize the
list of boundary probes, rather than parallelizing Monte Carlo
realizations. In fact, it provides the same behavior for boundary
probes as the sdis_solve_probe_list function on probes.
To share the maximum of source code between these 2 functions, we wrote
a generic creation function that allocates and configures an estimator
buffer from a list of observables. The observable type is the generic
parameter of the function.
Diffstat:
8 files changed, 503 insertions(+), 98 deletions(-)
diff --git a/src/sdis.c b/src/sdis.c
@@ -476,10 +476,10 @@ compute_process_index_range
per_process_indices = nindices / (size_t)dev->mpi_nprocs;
range[0] = per_process_indices * (size_t)dev->mpi_rank;
- range[1] = range[0] + per_process_indices; /* Upper bound is _exclusive */
+ range[1] = range[0] + per_process_indices; /* Upper bound is _exclusive_ */
ASSERT(range[0] <= range[1]);
- /* Set the remaining number of indexes that are not managed by one process */
+ /* Set the remaining number of indices that are not managed by one process */
remaining_indices =
nindices - per_process_indices * (size_t)dev->mpi_nprocs;
diff --git a/src/sdis.h b/src/sdis.h
@@ -553,6 +553,27 @@ static const struct sdis_solve_probe_boundary_args
SDIS_SOLVE_PROBE_BOUNDARY_ARGS_DEFAULT =
SDIS_SOLVE_PROBE_BOUNDARY_ARGS_DEFAULT__;
+/* Input arguments of the solve function that distributes the calculations of
+ * several boundary probes rather than the realizations of a probe */
+struct sdis_solve_probe_boundary_list_args {
+ struct sdis_solve_probe_boundary_args* probes; /* List of probes to compute */
+ size_t nprobes; /* Total number of probes */
+
+ /* State/type of the RNG to use for the list of probes to calculate.
+ * The state/type defines per probe is ignored */
+ struct ssp_rng* rng_state; /* Initial RNG state. May be NULL */
+ enum ssp_rng_type rng_type; /* RNG type to use if `rng_state' is NULL */
+};
+#define SDIS_SOLVE_PROBE_BOUNDARY_LIST_ARGS_DEFAULT__ { \
+ NULL, /* List of probes */ \
+ 0, /* #probes */ \
+ NULL, /* RNG state */ \
+ SSP_RNG_THREEFRY /* RNG type */ \
+}
+static const struct sdis_solve_probe_boundary_list_args
+SDIS_SOLVE_PROBE_BOUNDARY_LIST_ARGS_DEFAULT =
+ SDIS_SOLVE_PROBE_BOUNDARY_LIST_ARGS_DEFAULT__;
+
struct sdis_solve_boundary_args {
size_t nrealisations; /* #realisations */
const size_t* primitives; /* List of boundary primitives to handle */
@@ -1424,18 +1445,6 @@ sdis_solve_probe
const struct sdis_solve_probe_args* args,
struct sdis_estimator** estimator);
-/* Calculate temperature for a list of probe points. Unlike its
- * single-probe counterpart, this function parallelizes the list of
- * probes, rather than calculating a single probe. Calling this function
- * is therefore more advantageous in terms of load distribution when the
- * number of probe points to be evaluated is large compared to the cost
- * of calculating a single probe point. */
-SDIS_API res_T
-sdis_solve_probe_list
- (struct sdis_scene* scn,
- const struct sdis_solve_probe_list_args* args,
- struct sdis_estimator_buffer** buf);
-
SDIS_API res_T
sdis_solve_probe_boundary
(struct sdis_scene* scn,
@@ -1486,6 +1495,27 @@ sdis_compute_power
struct sdis_estimator** estimator);
/*******************************************************************************
+ * Solvers of a list of probes
+ *
+ * Unlike their single-probe counterpart, this function parallelizes the list of
+ * probes, rather than calculating a single probe. Calling these functions is
+ * therefore more advantageous in terms of load distribution when the number of
+ * probes to be evaluated is large compared to the cost of calculating a single
+ * probe.
+ ******************************************************************************/
+SDIS_API res_T
+sdis_solve_probe_list
+ (struct sdis_scene* scn,
+ const struct sdis_solve_probe_list_args* args,
+ struct sdis_estimator_buffer** buf);
+
+SDIS_API res_T
+sdis_solve_probe_boundary_list
+ (struct sdis_scene* scn,
+ const struct sdis_solve_probe_boundary_list_args* args,
+ struct sdis_estimator_buffer** buf);
+
+/*******************************************************************************
* Green solvers.
*
* Note that only the interfaces/media with flux/volumic power defined during
diff --git a/src/sdis_estimator_buffer.c b/src/sdis_estimator_buffer.c
@@ -273,3 +273,8 @@ estimator_buffer_save_rng_state
return create_rng_from_rng_proxy(buf->dev, proxy, &buf->rng);
}
+/* Define the functions generic to the observable type */
+#define SDIS_X_OBS probe
+#include "sdis_estimator_buffer_X_obs.h"
+#define SDIS_X_OBS probe_boundary
+#include "sdis_estimator_buffer_X_obs.h"
diff --git a/src/sdis_estimator_buffer_X_obs.h b/src/sdis_estimator_buffer_X_obs.h
@@ -0,0 +1,118 @@
+/* Copyright (C) 2016-2023 |Méso|Star> (contact@meso-star.com)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Define functions on estimator buffer generic to the observable type. */
+
+#ifndef SDIS_ESTIMATOR_BUFFER_X_OBS_H
+#define SDIS_ESTIMATOR_BUFFER_X_OBS_H
+
+#include "sdis.h"
+#include "sdis_estimator_c.h"
+#include "sdis_estimator_buffer_c.h"
+#include "sdis_misc.h"
+
+#endif /* SDIS_ESTIMATOR_BUFFER_X_OBS_H */
+
+/* Check the generic observable parameter */
+#ifndef SDIS_X_OBS
+ #error "The SDIS_X_OBS macro must be defined."
+#endif
+
+#define X_OBS(Name) CONCAT(CONCAT(Name, _), SDIS_X_OBS)
+#define SDIS_SOLVE_X_OBS_ARGS CONCAT(CONCAT(sdis_solve_, SDIS_X_OBS),_args)
+
+res_T
+X_OBS(estimator_buffer_create_from_observable_list)
+ (struct sdis_device* dev,
+ struct ssp_rng_proxy* rng_proxy,
+ const struct SDIS_SOLVE_X_OBS_ARGS obs_list_args[],
+ const struct accum* per_obs_acc_temp,
+ const struct accum* per_obs_acc_time,
+ const size_t nobs, /* #observables */
+ struct sdis_estimator_buffer** out_estim_buffer)
+{
+ /* Accumulators throughout the buffer */
+ struct accum acc_temp = ACCUM_NULL;
+ struct accum acc_time = ACCUM_NULL;
+ size_t nrealisations = 0;
+
+ struct sdis_estimator_buffer* estim_buf = NULL;
+ size_t iobs = 0;
+ res_T res = RES_OK;
+
+ ASSERT(dev && rng_proxy);
+ ASSERT(obs_list_args || !nobs);
+ ASSERT(per_obs_acc_time && per_obs_acc_time && out_estim_buffer);
+
+ res = estimator_buffer_create(dev, nobs, 1, &estim_buf);
+ if(res != RES_OK) {
+ log_err(dev, "Unable to allocate the estimator buffer.\n");
+ goto error;
+ }
+
+ FOR_EACH(iobs, 0, nobs) {
+ const struct SDIS_SOLVE_X_OBS_ARGS* obs_args = NULL;
+ const struct accum* obs_acc_temp = NULL;
+ const struct accum* obs_acc_time = NULL;
+ struct sdis_estimator* estim = NULL;
+
+ /* Get observable data */
+ obs_args = obs_list_args + iobs;
+ obs_acc_temp = per_obs_acc_temp + iobs;
+ obs_acc_time = per_obs_acc_time + iobs;
+ ASSERT(obs_acc_temp->count == obs_acc_time->count);
+
+ /* Setup the estimator of the current observable */
+ estim = estimator_buffer_grab(estim_buf, iobs, 0);
+ estimator_setup_realisations_count
+ (estim, obs_args->nrealisations, obs_acc_temp->count);
+ estimator_setup_temperature
+ (estim, obs_acc_temp->sum, obs_acc_temp->sum2);
+ estimator_setup_realisation_time
+ (estim, obs_acc_time->sum, obs_acc_time->sum2);
+
+ /* Update global accumulators */
+ acc_temp.sum += obs_acc_temp->sum;
+ acc_temp.sum2 += obs_acc_temp->sum2;
+ acc_temp.count += obs_acc_temp->count;
+ acc_time.sum += obs_acc_time->sum;
+ acc_time.sum2 += obs_acc_time->sum2;
+ acc_time.count += obs_acc_time->count;
+ nrealisations += obs_args->nrealisations;
+ }
+
+ ASSERT(acc_temp.count == acc_time.count);
+
+ /* Setup global estimator */
+ estimator_buffer_setup_realisations_count
+ (estim_buf, nrealisations, acc_temp.count);
+ estimator_buffer_setup_temperature
+ (estim_buf, acc_temp.sum, acc_temp.sum2);
+ estimator_buffer_setup_realisation_time
+ (estim_buf, acc_time.sum, acc_time.sum2);
+
+ res = estimator_buffer_save_rng_state(estim_buf, rng_proxy);
+ if(res != RES_OK) goto error;
+
+exit:
+ *out_estim_buffer = estim_buf;
+ return res;
+error:
+ goto exit;
+}
+
+#undef X_OBS
+#undef SDIS_SOLVE_X_OBS_ARGS
+#undef SDIS_X_OBS
diff --git a/src/sdis_estimator_buffer_c.h b/src/sdis_estimator_buffer_c.h
@@ -59,5 +59,25 @@ estimator_buffer_save_rng_state
(struct sdis_estimator_buffer* buf,
const struct ssp_rng_proxy* proxy);
+extern LOCAL_SYM res_T
+estimator_buffer_create_from_observable_list_probe
+ (struct sdis_device* dev,
+ struct ssp_rng_proxy* rng_proxy,
+ const struct sdis_solve_probe_args obs_list_args[],
+ const struct accum* per_obs_acc_temp,
+ const struct accum* per_obs_acc_time,
+ const size_t nobs, /* #observables */
+ struct sdis_estimator_buffer** out_estim_buffer);
+
+extern LOCAL_SYM res_T
+estimator_buffer_create_from_observable_list_probe_boundary
+ (struct sdis_device* dev,
+ struct ssp_rng_proxy* rng_proxy,
+ const struct sdis_solve_probe_boundary_args obs_list_args[],
+ const struct accum* per_obs_acc_temp,
+ const struct accum* per_obs_acc_time,
+ const size_t nobs, /* #observables */
+ struct sdis_estimator_buffer** out_estim_buffer);
+
#endif /* SDIS_ESTIMATOR_BUFFER_C_H */
diff --git a/src/sdis_solve.c b/src/sdis_solve.c
@@ -59,7 +59,7 @@ sdis_solve_probe
res_T
sdis_solve_probe_list
(struct sdis_scene* scn,
- const struct sdis_solve_probe_list_args args[],
+ const struct sdis_solve_probe_list_args* args,
struct sdis_estimator_buffer** out_buf)
{
if(!scn) return RES_BAD_ARG;
@@ -99,6 +99,20 @@ sdis_solve_probe_boundary
}
res_T
+sdis_solve_probe_boundary_list
+ (struct sdis_scene* scn,
+ const struct sdis_solve_probe_boundary_list_args* args,
+ struct sdis_estimator_buffer** out_buf)
+{
+ if(!scn) return RES_BAD_ARG;
+ if(scene_is_2d(scn)) {
+ return solve_probe_boundary_list_2d(scn, args, out_buf);
+ } else {
+ return solve_probe_boundary_list_3d(scn, args, out_buf);
+ }
+}
+
+res_T
sdis_solve_probe_boundary_green_function
(struct sdis_scene* scn,
const struct sdis_solve_probe_boundary_args* args,
diff --git a/src/sdis_solve_probe_Xd.h b/src/sdis_solve_probe_Xd.h
@@ -101,84 +101,6 @@ check_solve_probe_list_args
return RES_OK;
}
-static res_T
-setup_estimator_buffer
- (struct sdis_device* dev,
- struct ssp_rng_proxy* rng_proxy,
- const struct sdis_solve_probe_list_args* solve_args,
- const struct accum* per_probe_acc_temp,
- const struct accum* per_probe_acc_time,
- struct sdis_estimator_buffer** out_estim_buffer)
-{
- /* Accumulators throughout the buffer */
- struct accum acc_temp = ACCUM_NULL;
- struct accum acc_time = ACCUM_NULL;
- size_t nrealisations = 0;
-
- struct sdis_estimator_buffer* estim_buf = NULL;
- size_t iprobe = 0;
- res_T res = RES_OK;
-
- ASSERT(dev && rng_proxy && solve_args);
- ASSERT(per_probe_acc_time && per_probe_acc_time && out_estim_buffer);
-
- res = estimator_buffer_create(dev, solve_args->nprobes, 1, &estim_buf);
- if(res != RES_OK) {
- log_err(dev, "Unable to allocate the estimator buffer.\n");
- goto error;
- }
-
- FOR_EACH(iprobe, 0, solve_args->nprobes) {
- const struct sdis_solve_probe_args* probe = NULL;
- const struct accum* probe_acc_temp = NULL;
- const struct accum* probe_acc_time = NULL;
- struct sdis_estimator* estim = NULL;
-
- /* Get probe data */
- probe = solve_args->probes + iprobe;
- probe_acc_temp = per_probe_acc_temp + iprobe;
- probe_acc_time = per_probe_acc_time + iprobe;
- ASSERT(probe_acc_temp->count == probe_acc_time->count);
-
- /* Setup probe estimator */
- estim = estimator_buffer_grab(estim_buf, iprobe, 0);
- estimator_setup_realisations_count
- (estim, probe->nrealisations, probe_acc_temp->count);
- estimator_setup_temperature
- (estim, probe_acc_temp->sum, probe_acc_temp->sum2);
- estimator_setup_realisation_time
- (estim, probe_acc_time->sum, probe_acc_time->sum2);
-
- /* Update global accumulators */
- acc_temp.sum += probe_acc_temp->sum;
- acc_temp.sum2 += probe_acc_temp->sum2;
- acc_temp.count += probe_acc_temp->count;
- acc_time.sum += probe_acc_time->sum;
- acc_time.sum2 += probe_acc_time->sum2;
- acc_time.count += probe_acc_time->count;
- nrealisations += probe->nrealisations;
- }
-
- ASSERT(acc_temp.count == acc_time.count);
-
- /* Setup global estimator */
- estimator_buffer_setup_realisations_count
- (estim_buf, nrealisations, acc_temp.count);
- estimator_buffer_setup_temperature
- (estim_buf, acc_temp.sum, acc_temp.sum2);
- estimator_buffer_setup_realisation_time
- (estim_buf, acc_time.sum, acc_time.sum2);
-
- res = estimator_buffer_save_rng_state(estim_buf, rng_proxy);
- if(res != RES_OK) goto error;
-
-exit:
- *out_estim_buffer = estim_buf;
- return res;
-error:
- goto exit;
-}
-
#endif /* SDIS_SOLVE_PROBE_XD_H */
static res_T
@@ -626,9 +548,6 @@ XD(solve_probe_list)
goto post_sync;
}
- /* Begin time registration of the computation */
- time_current(&time0);
-
/* Allocate the list of accumulators per probe. On the master process,
* allocate a complete list in which the accumulators of all processes will be
* stored. */
@@ -641,6 +560,9 @@ XD(solve_probe_list)
goto error;
}
+ /* Begin time registration of the computation */
+ time_current(&time0);
+
/* Here we go! Calculation of probe list */
omp_set_num_threads((int)scn->dev->nthreads);
#pragma omp parallel for schedule(static)
@@ -726,8 +648,9 @@ post_sync:
log_info(scn->dev, "Probes accumulator gathered in %s.\n", buf);
if(is_master_process) {
- res = setup_estimator_buffer(scn->dev, rng_proxy, args, per_probe_acc_temp,
- per_probe_acc_time, &estim_buf);
+ res = estimator_buffer_create_from_observable_list_probe
+ (scn->dev, rng_proxy, args->probes, per_probe_acc_temp,
+ per_probe_acc_time, args->nprobes, &estim_buf);
if(res != RES_OK) goto error;
}
diff --git a/src/sdis_solve_probe_boundary_Xd.h b/src/sdis_solve_probe_boundary_Xd.h
@@ -75,6 +75,40 @@ check_solve_probe_boundary_args
}
static INLINE res_T
+check_solve_probe_boundary_list_args
+ (struct sdis_device* dev,
+ const struct sdis_solve_probe_boundary_list_args* args)
+{
+ size_t iprobe = 0;
+
+ if(!args) return RES_BAD_ARG;
+
+ /* Check the list of probes */
+ if(!args->probes || !args->nprobes) {
+ return RES_BAD_ARG;
+ }
+
+ /* Check the RNG type */
+ if(!args->rng_state && args->rng_type >= SSP_RNG_TYPES_COUNT__) {
+ return RES_BAD_ARG;
+ }
+
+ FOR_EACH(iprobe, 0, args->nprobes) {
+ const res_T res = check_solve_probe_boundary_args(args->probes+iprobe);
+ if(res != RES_OK) return res;
+
+ if(args->probes[iprobe].register_paths != SDIS_HEAT_PATH_NONE) {
+ log_warn(dev,
+ "Unable to save paths for probe boundary %lu. "
+ "Saving path is not supported when solving multiple probes\n",
+ (unsigned long)iprobe);
+ }
+ }
+
+ return RES_OK;
+}
+
+static INLINE res_T
check_solve_probe_boundary_flux_args
(const struct sdis_solve_probe_boundary_flux_args* args)
{
@@ -109,6 +143,66 @@ check_solve_probe_boundary_flux_args
#endif /* SDIS_SOLVE_PROBE_BOUNDARY_XD_H */
+static res_T
+XD(solve_one_probe_boundary)
+ (struct sdis_scene* scn,
+ struct ssp_rng* rng,
+ const struct sdis_solve_probe_boundary_args* args,
+ struct accum* acc_temp,
+ struct accum* acc_time)
+{
+ size_t irealisation = 0;
+ res_T res = RES_OK;
+ ASSERT(scn && rng && check_solve_probe_boundary_args(args) == RES_OK);
+
+ *acc_temp = ACCUM_NULL;
+ *acc_time = ACCUM_NULL;
+
+ FOR_EACH(irealisation, 0, args->nrealisations) {
+ struct boundary_realisation_args realis_args = BOUNDARY_REALISATION_ARGS_NULL;
+ double w = NaN; /* MC weight */
+ double usec = 0; /* Time of a realisation */
+ double time = 0; /* Sampled observation time */
+ struct time t0, t1; /* Register the time spent solving a realisation */
+
+ /* Begin time registration of the realisation */
+ time_current(&t0);
+
+ /* Sample observation time */
+ time = sample_time(rng, args->time_range);
+
+ /* Run a realisation */
+ realis_args.rng = rng;
+ realis_args.iprim = args->iprim;
+ realis_args.time = time;
+ realis_args.picard_order = args->picard_order;
+ realis_args.side = args->side;
+ realis_args.irealisation = irealisation;
+ realis_args.uv[0] = args->uv[0];
+#if SDIS_XD_DIMENSION == 3
+ realis_args.uv[1] = args->uv[1];
+#endif
+ res = XD(boundary_realisation)(scn, &realis_args, &w);
+ if(res != RES_OK) goto error;
+
+ /* Stop time registration */
+ time_sub(&t0, time_current(&t1), &t0);
+ usec = (double)time_val(&t0, TIME_NSEC) * 0.001;
+
+ /* Update MC weights */
+ acc_temp->sum += w;
+ acc_temp->sum2 += w*w;
+ acc_temp->count += 1;
+ acc_time->sum += usec;
+ acc_time->sum2 += usec*usec;
+ acc_time->count += 1;
+ }
+exit:
+ return res;
+error:
+ goto exit;
+}
+
/*******************************************************************************
* Local functions
******************************************************************************/
@@ -411,6 +505,207 @@ error:
}
static res_T
+XD(solve_probe_boundary_list)
+ (struct sdis_scene* scn,
+ const struct sdis_solve_probe_boundary_list_args* args,
+ struct sdis_estimator_buffer** out_estim_buf)
+{
+ /* Time registration */
+ struct time time0, time1;
+ char buf[128]; /* Temporary buffer used to store formated time */
+
+ /* Device variable */
+ struct mem_allocator* allocator = NULL;
+
+ /* Stardis variables */
+ struct sdis_estimator_buffer* estim_buf = NULL;
+
+ /* Random Number generator */
+ struct ssp_rng_proxy* rng_proxy = NULL;
+ struct ssp_rng** per_thread_rng = NULL;
+
+ /* Probe variables */
+ size_t process_probes[2] = {0, 0}; /* Probes range managed by the process */
+ size_t process_nprobes = 0; /* Number of probes managed by the process */
+ size_t nprobes = 0;
+ struct accum* per_probe_acc_temp = NULL;
+ struct accum* per_probe_acc_time = NULL;
+
+ /* Miscellaneous */
+ int32_t* progress = NULL; /* Per process progress bar */
+ int is_master_process = 0;
+ int pcent_progress = 1; /* Percentage requiring progress update */
+ int64_t i = 0;
+ ATOMIC nsolved_probes = 0;
+ ATOMIC res = RES_OK;
+
+ if(!scn || !out_estim_buf) { res = RES_BAD_ARG; goto error; }
+ res = check_solve_probe_boundary_list_args(scn->dev, args);
+ if(res != RES_OK) goto error;
+ res = XD(scene_check_dimensionality)(scn);
+ if(res != RES_OK) goto error;
+
+#ifdef SDIS_ENABLE_MPI
+ is_master_process = !scn->dev->use_mpi || scn->dev->mpi_rank == 0;
+#endif
+
+ allocator = scn->dev->allocator;
+
+ /* Update the progress bar every percent if escape sequences are allowed in
+ * log messages or only every 10 percent when only plain text is allowed.
+ * This reduces the number of lines of plain text printed */
+ pcent_progress = scn->dev->no_escape_sequence ? 10 : 1;
+
+ /* Create the per threads RNGs */
+ res = create_per_thread_rng
+ (scn->dev, args->rng_state, args->rng_type, &rng_proxy, &per_thread_rng);
+ if(res != RES_OK) goto error;
+
+ /* Allocate the per process progress status */
+ res = alloc_process_progress(scn->dev, &progress);
+ if(res != RES_OK) goto error;
+
+ /* Synchronise the processes */
+ process_barrier(scn->dev);
+
+ /* Define the range of probes to manage in this process */
+ process_nprobes = compute_process_index_range
+ (scn->dev, args->nprobes, process_probes);
+
+ #define PROGRESS_MSG "Solving surface probes: "
+ print_progress(scn->dev, progress, PROGRESS_MSG);
+
+ /* If there is no work to be done on this process (i.e. no probe to
+ * calculate), simply print its completion and go straight to the
+ * synchronization barrier.*/
+ if(process_nprobes == 0) {
+ progress[0] = 100;
+ print_progress_update(scn->dev, progress, PROGRESS_MSG);
+ goto post_sync;
+ }
+
+ /* Allocate the list of accumulators per probe. On the master process,
+ * allocate a complete list in which the accumulators of all processes will be
+ * stored. */
+ nprobes = is_master_process ? args->nprobes : process_nprobes;
+ per_probe_acc_temp = MEM_CALLOC(allocator, nprobes, sizeof(*per_probe_acc_temp));
+ per_probe_acc_time = MEM_CALLOC(allocator, nprobes, sizeof(*per_probe_acc_time));
+ if(!per_probe_acc_temp || !per_probe_acc_time) {
+ log_err(scn->dev, "Unable to allocate the list of accumulators per probe.\n");
+ res = RES_MEM_ERR;
+ goto error;
+ }
+
+ /* Begin time registration of the computation */
+ time_current(&time0);
+
+ /* Calculation of probe list */
+ #pragma omp parallel for schedule(static)
+ for(i = 0; i < (int64_t)process_nprobes; ++i) {
+ /* Thread */
+ const int ithread = omp_get_thread_num(); /* Thread ID */
+ struct ssp_rng* rng = per_thread_rng[ithread]; /* RNG of the thread */
+
+ /* Probe */
+ struct accum* probe_acc_temp = NULL;
+ struct accum* probe_acc_time = NULL;
+ const struct sdis_solve_probe_boundary_args* probe_args = NULL;
+ const size_t iprobe = process_probes[0] + (size_t)i; /* Probe ID */
+
+ /* Miscellaneous */
+ size_t n = 0; /* Number of solved probes */
+ int pcent = 0; /* Current progress */
+ res_T res_local = RES_OK;
+
+ if(ATOMIC_GET(&res) != RES_OK) continue; /* An error occurred */
+
+ /* Retrieve the probe arguments */
+ probe_args = &args->probes[iprobe];
+
+ /* Retrieve the probe accumulators */
+ probe_acc_temp = &per_probe_acc_temp[i];
+ probe_acc_time = &per_probe_acc_time[i];
+
+ res_local = XD(solve_one_probe_boundary)
+ (scn, rng, probe_args, probe_acc_temp, probe_acc_time);
+ if(res_local != RES_OK) {
+ ATOMIC_SET(&res, res_local);
+ continue;
+ }
+
+ /* Update progress */
+ n = (size_t)ATOMIC_INCR(&nsolved_probes);
+ pcent = (int)((double)n * 100.0 / (double)process_nprobes + 0.5/*round*/);
+
+ #pragma omp critical
+ if(pcent/pcent_progress > progress[0]/pcent_progress) {
+ progress[0] = pcent;
+ print_progress_update(scn->dev, progress, PROGRESS_MSG);
+ }
+ }
+
+post_sync:
+ /* Synchronise processes */
+ process_barrier(scn->dev);
+
+ res = gather_res_T(scn->dev, (res_T)res);
+ if(res != RES_OK) goto error;
+
+ print_progress_completion(scn->dev, progress, PROGRESS_MSG);
+ #undef PROGRESS_MSG
+
+ /* Report computatio time */
+ time_sub(&time0, time_current(&time1), &time0);
+ time_dump(&time0, TIME_ALL, NULL, buf, sizeof(buf));
+ log_info(scn->dev, "%lu surface probes solved in %s.\n",
+ (unsigned long)args->nprobes, buf);
+
+ /* Gather the RNG proxy sequence IDs and ensure that the RNG proxy state of
+ * the master process is greater than the RNG proxy state of all other
+ * processes */
+ res = gather_rng_proxy_sequence_id(scn->dev, rng_proxy);
+ if(res != RES_OK) goto error;
+
+ time_current(&time0);
+
+ /* Gather the list of accumulators */
+ #define GATHER_ACCUMS_LIST(Msg, Acc) { \
+ res = gather_accumulators_list \
+ (scn->dev, Msg, args->nprobes, process_probes, per_probe_acc_##Acc); \
+ if(res != RES_OK) goto error; \
+ } (void)0
+ GATHER_ACCUMS_LIST(MPI_SDIS_MSG_ACCUM_TEMP, temp);
+ GATHER_ACCUMS_LIST(MPI_SDIS_MSG_ACCUM_TIME, time);
+ #undef GATHER_ACCUMS_LIST
+
+ time_sub(&time0, time_current(&time1), &time0);
+ time_dump(&time0, TIME_ALL, NULL, buf, sizeof(buf));
+ log_info(scn->dev, "Probes accumulator gathered in %s.\n", buf);
+
+ if(is_master_process) {
+ res = estimator_buffer_create_from_observable_list_probe_boundary
+ (scn->dev, rng_proxy, args->probes, per_probe_acc_temp,
+ per_probe_acc_time, args->nprobes, &estim_buf);
+ if(res != RES_OK) goto error;
+ }
+
+exit:
+ if(per_thread_rng) release_per_thread_rng(scn->dev, per_thread_rng);
+ if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy));
+ if(per_probe_acc_temp) MEM_RM(allocator, per_probe_acc_temp);
+ if(per_probe_acc_time) MEM_RM(allocator, per_probe_acc_time);
+ if(progress) free_process_progress(scn->dev, progress);
+ if(out_estim_buf) *out_estim_buf = estim_buf;
+ return (res_T)res;
+error:
+ if(estim_buf) {
+ SDIS(estimator_buffer_ref_put(estim_buf));
+ estim_buf = NULL;
+ }
+ goto exit;
+}
+
+static res_T
XD(solve_probe_boundary_flux)
(struct sdis_scene* scn,
const struct sdis_solve_probe_boundary_flux_args* args,