commit 0c3169117baa442269a8855036894c3884a53a9c
parent dc29b53107ce603be7562ded7c873b71a1b20206
Author: Vincent Forest <vincent.forest@meso-star.com>
Date: Thu, 14 Dec 2023 15:01:53 +0100
Merge branch 'feature_solve_probe_list' into develop
Diffstat:
13 files changed, 998 insertions(+), 60 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -9,5 +9,6 @@ test*
.config
.config_test
.test
+rng_state
tags
*.pc
diff --git a/Makefile b/Makefile
@@ -255,6 +255,7 @@ test_all: test
clean_test:
@$(SHELL) make.sh clean_test $(TEST_SRC) $(TEST_SRC_MPI) $(TEST_SRC_LONG)
+ rm -f rng_state
################################################################################
# Regular tests
diff --git a/src/sdis.c b/src/sdis.c
@@ -453,36 +453,54 @@ free_process_progress(struct sdis_device* dev, int32_t progress[])
}
size_t
-compute_process_realisations_count
+compute_process_index_range
(const struct sdis_device* dev,
- const size_t nrealisations)
+ const size_t nindices,
+ size_t range[2])
{
#ifndef SDIS_ENABLE_MPI
- (void)dev, (void)nrealisations;
- return nrealisations;
+ (void)dev;
+ range[0] = 0;
+ range[1] = nindices; /* Upper bound is _exclusive_ */
#else
- size_t per_process_nrealisations = 0;
- size_t remaining_nrealisations = 0;
ASSERT(dev);
- if(!dev->use_mpi) return nrealisations;
-
- /* Compute minimum the number of realisations on each process */
- per_process_nrealisations = nrealisations / (size_t)dev->mpi_nprocs;
-
- /* Define the remaining number of realisations that are not handle by one
- * process */
- remaining_nrealisations =
- nrealisations
- - per_process_nrealisations * (size_t)dev->mpi_nprocs;
-
- /* Distribute the remaining realisations onto the processes */
- if((size_t)dev->mpi_rank >= remaining_nrealisations) {
- return per_process_nrealisations;
+ if(!dev->use_mpi) {
+ range[0] = 0;
+ range[1] = nindices;
} else {
- return per_process_nrealisations + 1;
+ size_t per_process_indices = 0;
+ size_t remaining_indices = 0;
+
+ /* Compute the minimum number of indices on each process */
+ per_process_indices = nindices / (size_t)dev->mpi_nprocs;
+
+ range[0] = per_process_indices * (size_t)dev->mpi_rank;
+ range[1] = range[0] + per_process_indices; /* Upper bound is _exclusive */
+ ASSERT(range[0] <= range[1]);
+
+ /* Set the remaining number of indexes that are not managed by one process */
+ remaining_indices =
+ nindices - per_process_indices * (size_t)dev->mpi_nprocs;
+
+ /* Distribute the remaining indices among the processes. Each process whose
+ * rank is lower than the number of remaining indices takes an additional
+ * index. To ensure continuity of indices per process, subsequent processes
+ * shift their initial rank accordingly, i.e. process 1 shifts its indices
+ * by 1, process 2 shifts them by 2 and so on until there are no more
+ * indices to distribute. From then on, subsequent processes simply shift
+ * their index range by the number of remaining indices that have been
+ * distributed. */
+ if((size_t)dev->mpi_rank < remaining_indices) {
+ range[0] += (size_t)dev->mpi_rank;
+ range[1] += (size_t)dev->mpi_rank + 1/* Take one more index */;
+ } else {
+ range[0] += remaining_indices;
+ range[1] += remaining_indices;
+ }
}
#endif
+ return range[1] - range[0];
}
#ifndef SDIS_ENABLE_MPI
@@ -568,6 +586,119 @@ error:
#ifndef SDIS_ENABLE_MPI
res_T
+gather_accumulators_list
+ (struct sdis_device* dev,
+ const enum mpi_sdis_message msg,
+ const size_t nprobes, /* Total number of probes */
+ const size_t process_probes[2], /* Ids of the probes managed by the process */
+ struct accum* per_probe_acc) /* List of per probe accumulators */
+{
+ (void)dev, (void)msg, (void) nprobes;
+ (void)process_probes, (void)per_probe_acc;
+ return RES_OK;
+}
+#else
+res_T
+gather_accumulators_list
+ (struct sdis_device* dev,
+ const enum mpi_sdis_message msg,
+ const size_t nprobes, /* Total number of probes */
+ const size_t process_probes[2], /* Range of probes managed by the process */
+ struct accum* per_probe_acc) /* List of per probe accumulators */
+{
+ struct accum_list {
+ size_t size;
+ /* Simulate a C99 flexible array */
+ ALIGN(16) struct accum accums[1/*Dummy element*/];
+ }* accum_list = NULL;
+ size_t max_per_process_nprobes = 0; /* Maximum #probes per process */
+ size_t process_nprobes = 0; /* Number of process probes */
+ size_t msg_sz = 0; /* Size in bytes of the message to send */
+ res_T res = RES_OK;
+
+ /* Check pre-conditions */
+ ASSERT(dev);
+ ASSERT(process_nprobes == 0 || (process_probes && per_probe_acc));
+
+ /* Without MPI, do nothing since per_probe_acc already has all the
+ * accumulators */
+ if(!dev->use_mpi) goto exit;
+
+ /* Defines the maximum number of probes managed by a process. In fact, it's
+ * the number of probes divided by the number of processes, plus one to manage
+ * the remainder of the entire division: the remaining probes are distributed
+ * between the processes */
+ max_per_process_nprobes = nprobes/(size_t)dev->mpi_nprocs + 1;
+
+ /* Number of probes */
+ process_nprobes = process_probes[1] - process_probes[0];
+
+ /* Allocate the array into which the data to be collected is copied */
+ msg_sz =
+ sizeof(struct accum_list)
+ + sizeof(struct accum)*max_per_process_nprobes
+ - 1/*Dummy element */;
+ if(msg_sz > INT_MAX) {
+ log_err(dev, "%s: invalid MPI message size %lu.\n",
+ FUNC_NAME, (unsigned long)msg_sz);
+ res = RES_BAD_ARG;
+ goto error;
+ }
+
+ accum_list = MEM_CALLOC(dev->allocator, 1, msg_sz);
+ if(!accum_list) {
+ log_err(dev,
+ "%s: unable to allocate the temporary list of accumulators.\n",
+ FUNC_NAME);
+ res = RES_MEM_ERR;
+ goto error;
+ }
+
+ /* Non master process */
+ if(dev->mpi_rank != 0) {
+
+ /* Setup the message to be sent */
+ accum_list->size = process_nprobes;
+ memcpy(accum_list->accums, per_probe_acc,
+ sizeof(struct accum)*process_nprobes);
+
+ mutex_lock(dev->mpi_mutex);
+ MPI(Send(accum_list, (int)msg_sz, MPI_CHAR, 0/*Dst*/, msg, MPI_COMM_WORLD));
+ mutex_unlock(dev->mpi_mutex);
+
+ /* Master process */
+ } else {
+ size_t gathered_nprobes = process_nprobes;
+ int iproc;
+
+ FOR_EACH(iproc, 1, dev->mpi_nprocs) {
+ MPI_Request req;
+
+ /* Asynchronously receive the accumulator of `iproc' */
+ mutex_lock(dev->mpi_mutex);
+ MPI(Irecv
+ (accum_list, (int)msg_sz, MPI_CHAR, iproc, msg, MPI_COMM_WORLD, &req));
+ mutex_unlock(dev->mpi_mutex);
+
+ mpi_waiting_for_request(dev, &req);
+
+ memcpy(per_probe_acc+gathered_nprobes, accum_list->accums,
+ sizeof(struct accum)*accum_list->size);
+
+ gathered_nprobes += accum_list->size;
+ }
+ }
+
+exit:
+ if(accum_list) MEM_RM(dev->allocator, accum_list);
+ return res;
+error:
+ goto exit;
+}
+#endif
+
+#ifndef SDIS_ENABLE_MPI
+res_T
gather_green_functions
(struct sdis_scene* scn,
struct ssp_rng_proxy* rng_proxy,
diff --git a/src/sdis.h b/src/sdis.h
@@ -466,6 +466,24 @@ struct sdis_solve_probe_args {
static const struct sdis_solve_probe_args SDIS_SOLVE_PROBE_ARGS_DEFAULT =
SDIS_SOLVE_PROBE_ARGS_DEFAULT__;
+struct sdis_solve_probe_list_args {
+ struct sdis_solve_probe_args* probes; /* List of probes to compute */
+ size_t nprobes; /* Total number of probes */
+
+ /* State/type of the RNG to use for the list of probes to calculate.
+ * The state/type defines per probe is ignored */
+ struct ssp_rng* rng_state; /* Initial RNG state. May be NULL */
+ enum ssp_rng_type rng_type; /* RNG type to use if `rng_state' is NULL */
+};
+#define SDIS_SOLVE_PROBE_LIST_ARGS_DEFAULT__ { \
+ NULL, /* List of probes */ \
+ 0, /* #probes */ \
+ NULL, /* RNG state */ \
+ SSP_RNG_THREEFRY /* RNG type */ \
+}
+static const struct sdis_solve_probe_list_args
+SDIS_SOLVE_PROBE_LIST_ARGS_DEFAULT = SDIS_SOLVE_PROBE_LIST_ARGS_DEFAULT__;
+
/* Arguments of a probe simulation */
struct sdis_solve_probe_boundary_args {
size_t nrealisations; /* #realisations */
@@ -712,6 +730,11 @@ sdis_device_ref_put
(struct sdis_device* dev);
SDIS_API res_T
+sdis_device_is_mpi_used
+ (struct sdis_device* dev,
+ int* is_mpi_used);
+
+SDIS_API res_T
sdis_device_get_mpi_rank
(struct sdis_device* dev,
int* rank);
@@ -1089,6 +1112,11 @@ sdis_scene_get_medium_spread
const struct sdis_medium* mdm,
double* spread);
+SDIS_API res_T
+sdis_scene_get_device
+ (struct sdis_scene* scn,
+ struct sdis_device** device);
+
/*******************************************************************************
* An estimator stores the state of a simulation
******************************************************************************/
@@ -1330,6 +1358,18 @@ sdis_solve_probe
const struct sdis_solve_probe_args* args,
struct sdis_estimator** estimator);
+/* Calculate temperature for a list of probe points. Unlike its
+ * single-probe counterpart, this function parallelizes the list of
+ * probes, rather than calculating a single probe. Calling this function
+ * is therefore more advantageous in terms of load distribution when the
+ * number of probe points to be evaluated is large compared to the cost
+ * of calculating a single probe point. */
+SDIS_API res_T
+sdis_solve_probe_list
+ (struct sdis_scene* scn,
+ const struct sdis_solve_probe_list_args* args,
+ struct sdis_estimator_buffer** buf);
+
SDIS_API res_T
sdis_solve_probe_boundary
(struct sdis_scene* scn,
diff --git a/src/sdis_c.h b/src/sdis_c.h
@@ -85,11 +85,25 @@ free_process_progress
(struct sdis_device* dev,
int32_t progress[]);
-/* Compute the number of realisations for the current process */
+/* Calculate the index range of the current process. It returns the size of the
+ * range. The overall_count is the number of calculations to parallelize between
+ * processes. For example, it may be the number of realisations of one
+ * calculation, or the total number of probe calculations. */
extern LOCAL_SYM size_t
+compute_process_index_range
+ (const struct sdis_device* dev,
+ const size_t overall_count,
+ size_t range[2]); /* [lower, upper[ */
+
+/* Return the number of realisations for the current process */
+static INLINE size_t
compute_process_realisations_count
(const struct sdis_device* dev,
- const size_t overall_realisations_count);
+ const size_t overall_realisations_count)
+{
+ size_t range[2];
+ return compute_process_index_range(dev, overall_realisations_count, range);
+}
/* Gather the accumulators and sum them in acc. With MPI, non master processes
* store in acc the gathering of their per thread accumulators that are sent to
@@ -102,6 +116,21 @@ gather_accumulators
const struct accum* per_thread_acc,
struct accum* acc);
+/* Collect accumulators evaluated over multiple processes, with each accumulator
+ * storing a complete Monte Carlo calculation. Without MPI, nothing happens
+ * since the per_probe_acc variable already stores the entire list of
+ * accumulators. With MPI, non-master processes send their list of accumulators
+ * to the master process which saves them in the per_probe_acc, after its
+ * accumulators that it has managed, sorted against the identifiers of the
+ * probes listed in process_probes. */
+extern LOCAL_SYM res_T
+gather_accumulators_list
+ (struct sdis_device* dev,
+ const enum mpi_sdis_message msg,
+ const size_t nprobes, /* Total number of probes */
+ const size_t process_probes[2], /* Ids of the probes managed by the process */
+ struct accum* per_probe_acc); /* List of per probe accumulators */
+
/* Gather the green functions. With MPI, non master processes store in green
* the gathering of their per thread green functions and sent the result to the
* master process. The master process gathers both per thread green functions
diff --git a/src/sdis_device.c b/src/sdis_device.c
@@ -373,6 +373,18 @@ sdis_device_ref_put(struct sdis_device* dev)
}
res_T
+sdis_device_is_mpi_used(struct sdis_device* dev, int* is_mpi_used)
+{
+ if(!dev || !is_mpi_used) return RES_BAD_ARG;
+#ifndef SDIS_ENABLE_MPI
+ *is_mpi_used = 0;
+#else
+ *is_mpi_used = dev->use_mpi;
+#endif
+ return RES_OK;
+}
+
+res_T
sdis_device_get_mpi_rank(struct sdis_device* dev, int* rank)
{
#ifndef SDIS_ENABLE_MPI
diff --git a/src/sdis_scene.c b/src/sdis_scene.c
@@ -410,6 +410,14 @@ error:
goto exit;
}
+res_T
+sdis_scene_get_device(struct sdis_scene* scn, struct sdis_device** device)
+{
+ if(!scn || !device) return RES_BAD_ARG;
+ *device = scn->dev;
+ return RES_OK;
+}
+
/*******************************************************************************
* Local miscellaneous function
******************************************************************************/
@@ -556,4 +564,3 @@ exit:
error:
goto exit;
}
-
diff --git a/src/sdis_solve.c b/src/sdis_solve.c
@@ -57,6 +57,20 @@ sdis_solve_probe
}
res_T
+sdis_solve_probe_list
+ (struct sdis_scene* scn,
+ const struct sdis_solve_probe_list_args args[],
+ struct sdis_estimator_buffer** out_buf)
+{
+ if(!scn) return RES_BAD_ARG;
+ if(scene_is_2d(scn)) {
+ return solve_probe_list_2d(scn, args, out_buf);
+ } else {
+ return solve_probe_list_3d(scn, args, out_buf);
+ }
+}
+
+res_T
sdis_solve_probe_green_function
(struct sdis_scene* scn,
const struct sdis_solve_probe_args* args,
diff --git a/src/sdis_solve_camera.c b/src/sdis_solve_camera.c
@@ -495,7 +495,7 @@ sdis_solve_camera
char buffer[128]; /* Temporary buffer used to store formated time */
/* Stardis variables */
- struct sdis_estimator_buffer* buf= NULL;
+ struct sdis_estimator_buffer* buf = NULL;
struct sdis_medium* medium = NULL;
/* Random number generators */
diff --git a/src/sdis_solve_probe_Xd.h b/src/sdis_solve_probe_Xd.h
@@ -16,6 +16,7 @@
#include "sdis_c.h"
#include "sdis_device_c.h"
#include "sdis_estimator_c.h"
+#include "sdis_estimator_buffer_c.h"
#include "sdis_log.h"
#include "sdis_green.h"
#include "sdis_misc.h"
@@ -66,8 +67,183 @@ check_solve_probe_args(const struct sdis_solve_probe_args* args)
return RES_OK;
}
+static INLINE res_T
+check_solve_probe_list_args
+ (struct sdis_device* dev,
+ const struct sdis_solve_probe_list_args* args)
+{
+ size_t iprobe = 0;
+
+ if(!args) return RES_BAD_ARG;
+
+ /* Check the list of probes */
+ if(!args->probes || !args->nprobes) {
+ return RES_BAD_ARG;
+ }
+
+ /* Check the RNG type */
+ if(!args->rng_state && args->rng_type >= SSP_RNG_TYPES_COUNT__) {
+ return RES_BAD_ARG;
+ }
+
+ FOR_EACH(iprobe, 0, args->nprobes) {
+ const res_T res = check_solve_probe_args(args->probes+iprobe);
+ if(res != RES_OK) return res;
+
+ if(args->probes[iprobe].register_paths != SDIS_HEAT_PATH_NONE) {
+ log_warn(dev,
+ "Unable to save paths for probe %lu. "
+ "Saving path is not supported when solving multiple probes\n",
+ (unsigned long)iprobe);
+ }
+ }
+
+ return RES_OK;
+}
+
+static res_T
+setup_estimator_buffer
+ (struct sdis_device* dev,
+ struct ssp_rng_proxy* rng_proxy,
+ const struct sdis_solve_probe_list_args* solve_args,
+ const struct accum* per_probe_acc_temp,
+ const struct accum* per_probe_acc_time,
+ struct sdis_estimator_buffer** out_estim_buffer)
+{
+ /* Accumulators throughout the buffer */
+ struct accum acc_temp = ACCUM_NULL;
+ struct accum acc_time = ACCUM_NULL;
+ size_t nrealisations = 0;
+
+ struct sdis_estimator_buffer* estim_buf = NULL;
+ size_t iprobe = 0;
+ res_T res = RES_OK;
+
+ ASSERT(dev && rng_proxy && solve_args);
+ ASSERT(per_probe_acc_time && per_probe_acc_time && out_estim_buffer);
+
+ res = estimator_buffer_create(dev, solve_args->nprobes, 1, &estim_buf);
+ if(res != RES_OK) {
+ log_err(dev, "Unable to allocate the estimator buffer.\n");
+ goto error;
+ }
+
+ FOR_EACH(iprobe, 0, solve_args->nprobes) {
+ const struct sdis_solve_probe_args* probe = NULL;
+ const struct accum* probe_acc_temp = NULL;
+ const struct accum* probe_acc_time = NULL;
+ struct sdis_estimator* estim = NULL;
+
+ /* Get probe data */
+ probe = solve_args->probes + iprobe;
+ probe_acc_temp = per_probe_acc_temp + iprobe;
+ probe_acc_time = per_probe_acc_time + iprobe;
+ ASSERT(probe_acc_temp->count == probe_acc_time->count);
+
+ /* Setup probe estimator */
+ estim = estimator_buffer_grab(estim_buf, iprobe, 0);
+ estimator_setup_realisations_count
+ (estim, probe->nrealisations, probe_acc_temp->count);
+ estimator_setup_temperature
+ (estim, probe_acc_temp->sum, probe_acc_temp->sum2);
+ estimator_setup_realisation_time
+ (estim, probe_acc_time->sum, probe_acc_time->sum2);
+
+ /* Update global accumulators */
+ acc_temp.sum += probe_acc_temp->sum;
+ acc_temp.sum2 += probe_acc_temp->sum2;
+ acc_temp.count += probe_acc_temp->count;
+ acc_time.sum += probe_acc_time->sum;
+ acc_time.sum2 += probe_acc_time->sum2;
+ acc_time.count += probe_acc_time->count;
+ nrealisations += probe->nrealisations;
+ }
+
+ ASSERT(acc_temp.count == acc_time.count);
+
+ /* Setup global estimator */
+ estimator_buffer_setup_realisations_count
+ (estim_buf, nrealisations, acc_temp.count);
+ estimator_buffer_setup_temperature
+ (estim_buf, acc_temp.sum, acc_temp.sum2);
+ estimator_buffer_setup_realisation_time
+ (estim_buf, acc_time.sum, acc_time.sum2);
+
+ res = estimator_buffer_save_rng_state(estim_buf, rng_proxy);
+ if(res != RES_OK) goto error;
+
+exit:
+ *out_estim_buffer = estim_buf;
+ return res;
+error:
+ goto exit;
+}
+
#endif /* SDIS_SOLVE_PROBE_XD_H */
+static res_T
+XD(solve_one_probe)
+ (struct sdis_scene* scn,
+ struct ssp_rng* rng,
+ const struct sdis_solve_probe_args* args,
+ struct accum* acc_temp,
+ struct accum* acc_time)
+{
+ struct sdis_medium* medium = NULL; /* Medium in which the probe lies */
+ size_t irealisation = 0;
+ res_T res = RES_OK;
+ ASSERT(scn && rng && check_solve_probe_args(args) == RES_OK);
+ ASSERT(acc_temp && acc_time);
+
+ *acc_temp = ACCUM_NULL;
+ *acc_time = ACCUM_NULL;
+
+ /* Retrieve the medium in which the submitted position lies */
+ res = scene_get_medium(scn, args->position, NULL, &medium);
+ if(res != RES_OK) goto error;
+
+ FOR_EACH(irealisation, 0, args->nrealisations) {
+ struct probe_realisation_args realis_args = PROBE_REALISATION_ARGS_NULL;
+ double w = NaN; /* MC weight */
+ double usec = 0; /* Time of a realisation */
+ double time = 0; /* Sampled observation time */
+ struct time t0, t1; /* Register the time spent solving a realisation */
+
+ /* Begin time registration of the realisation */
+ time_current(&t0);
+
+ /* Sample observation time */
+ time = sample_time(rng, args->time_range);
+
+ /* Run a realisation */
+ realis_args.rng = rng;
+ realis_args.medium = medium;
+ realis_args.time = time;
+ realis_args.picard_order = args->picard_order;
+ realis_args.irealisation = irealisation;
+ dX(set)(realis_args.position, args->position);
+ res = XD(probe_realisation)(scn, &realis_args, &w);
+ if(res != RES_OK) goto error;
+
+ /* Stop time registration */
+ time_sub(&t0, time_current(&t1), &t0);
+ usec = (double)time_val(&t0, TIME_NSEC) * 0.001;
+
+ /* Update MC weights */
+ acc_temp->sum += w;
+ acc_temp->sum2 += w*w;
+ acc_temp->count += 1;
+ acc_time->sum += usec;
+ acc_time->sum2 += usec*usec;
+ acc_time->count += 1;
+ }
+
+exit:
+ return res;
+error:
+ goto exit;
+}
+
/*******************************************************************************
* Generic solve function
******************************************************************************/
@@ -316,7 +492,7 @@ XD(solve_probe)
if(res != RES_OK) goto error; \
} (void)0
GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_TEMP, acc_temp);
- GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_TEMP, acc_time);
+ GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_TIME, acc_time);
#undef GATHER_ACCUMS
time_sub(&time0, time_current(&time1), &time0);
@@ -369,5 +545,206 @@ error:
goto exit;
}
-#include "sdis_Xd_end.h"
+static res_T
+XD(solve_probe_list)
+ (struct sdis_scene* scn,
+ const struct sdis_solve_probe_list_args* args,
+ struct sdis_estimator_buffer** out_estim_buf)
+{
+ /* Time registration */
+ struct time time0, time1;
+ char buf[128]; /* Temporary buffer used to store formated time */
+
+ /* Device variables */
+ struct mem_allocator* allocator = NULL;
+
+ /* Stardis variables */
+ struct sdis_estimator_buffer* estim_buf = NULL;
+
+ /* Random Number generator */
+ struct ssp_rng_proxy* rng_proxy = NULL;
+ struct ssp_rng** per_thread_rng = NULL;
+
+ /* Probe variables */
+ size_t process_probes[2] = {0, 0}; /* Probes range managed by the process */
+ size_t process_nprobes = 0; /* Number of probes managed by the process */
+ size_t nprobes = 0;
+ struct accum* per_probe_acc_temp = NULL;
+ struct accum* per_probe_acc_time = NULL;
+
+ /* Miscellaneous */
+ int32_t* progress = NULL; /* Per process progress bar */
+ int pcent_progress = 1; /* Percentage requiring progress update */
+ int is_master_process = 0;
+ int64_t i = 0;
+ ATOMIC nsolved_probes = 0;
+ ATOMIC res = RES_OK;
+ /* Check input arguments */
+ if(!scn || !out_estim_buf) { res = RES_BAD_ARG; goto error; }
+ res = check_solve_probe_list_args(scn->dev, args);
+ if(res != RES_OK) goto error;
+ res = XD(scene_check_dimensionality)(scn);
+ if(res != RES_OK) goto error;
+
+#ifdef SDIS_ENABLE_MPI
+ is_master_process = !scn->dev->use_mpi || scn->dev->mpi_rank == 0;
+#endif
+
+ allocator = scn->dev->allocator;
+
+ /* Update the progress bar every percent if escape sequences are allowed in
+ * log messages or only every 10 percent when only plain text is allowed.
+ * This reduces the number of lines of plain text printed */
+ pcent_progress = scn->dev->no_escape_sequence ? 10 : 1;
+
+ /* Create the per thread RNGs */
+ res = create_per_thread_rng
+ (scn->dev, args->rng_state, args->rng_type, &rng_proxy, &per_thread_rng);
+ if(res != RES_OK) goto error;
+
+ /* Allocate the per process progress status */
+ res = alloc_process_progress(scn->dev, &progress);
+ if(res != RES_OK) goto error;
+
+ /* Synchronise the processes */
+ process_barrier(scn->dev);
+
+ /* Define the range of probes to manage in this process */
+ process_nprobes = compute_process_index_range
+ (scn->dev, args->nprobes, process_probes);
+
+ #define PROGRESS_MSG "Solving probes: "
+ print_progress(scn->dev, progress, PROGRESS_MSG);
+
+ /* If there is no work to be done on this process (i.e. no probe to
+ * calculate), simply print its completion and go straight to the
+ * synchronization barrier.*/
+ if(process_nprobes == 0) {
+ progress[0] = 100;
+ print_progress_update(scn->dev, progress, PROGRESS_MSG);
+ goto post_sync;
+ }
+
+ /* Begin time registration of the computation */
+ time_current(&time0);
+
+ /* Allocate the list of accumulators per probe. On the master process,
+ * allocate a complete list in which the accumulators of all processes will be
+ * stored. */
+ nprobes = is_master_process ? args->nprobes : process_nprobes;
+ per_probe_acc_temp = MEM_CALLOC(allocator, nprobes, sizeof(*per_probe_acc_temp));
+ per_probe_acc_time = MEM_CALLOC(allocator, nprobes, sizeof(*per_probe_acc_time));
+ if(!per_probe_acc_temp || !per_probe_acc_time) {
+ log_err(scn->dev, "Unable to allocate the list of accumulators per probe.\n");
+ res = RES_MEM_ERR;
+ goto error;
+ }
+
+ /* Here we go! Calculation of probe list */
+ omp_set_num_threads((int)scn->dev->nthreads);
+ #pragma omp parallel for schedule(static)
+ for(i = 0; i < (int64_t)process_nprobes; ++i) {
+ /* Thread */
+ const int ithread = omp_get_thread_num(); /* Thread ID */
+ struct ssp_rng* rng = per_thread_rng[ithread]; /* RNG of the thread */
+
+ /* Probe */
+ struct accum* probe_acc_temp = NULL;
+ struct accum* probe_acc_time = NULL;
+ const struct sdis_solve_probe_args* probe_args = NULL; /* Solve args */
+ const size_t iprobe = process_probes[0] + (size_t)i; /* Probe ID */
+
+ /* Misc */
+ size_t n = 0; /* Number of solved probes */
+ int pcent = 0; /* Current progress */
+ res_T res_local = RES_OK;
+
+ if(ATOMIC_GET(&res) != RES_OK) continue; /* An error occured */
+
+ /* Retrieve the probe arguments */
+ probe_args = &args->probes[iprobe];
+
+ /* Retrieve the probe accumulators */
+ probe_acc_temp = &per_probe_acc_temp[i];
+ probe_acc_time = &per_probe_acc_time[i];
+
+ res_local = XD(solve_one_probe)
+ (scn, rng, probe_args, probe_acc_temp, probe_acc_time);
+ if(res_local != RES_OK) {
+ ATOMIC_SET(&res, res_local);
+ continue;
+ }
+
+ /* Update progress */
+ n = (size_t)ATOMIC_INCR(&nsolved_probes);
+ pcent = (int)((double)n * 100.0 / (double)process_nprobes + 0.5/*round*/);
+
+ #pragma omp critical
+ if(pcent/pcent_progress > progress[0]/pcent_progress) {
+ progress[0] = pcent;
+ print_progress_update(scn->dev, progress, PROGRESS_MSG);
+ }
+ }
+
+post_sync:
+ /* Synchronise processes */
+ process_barrier(scn->dev);
+
+ res = gather_res_T(scn->dev, (res_T)res);
+ if(res != RES_OK) goto error;
+
+ print_progress_completion(scn->dev, progress, PROGRESS_MSG);
+ #undef PROGRESS_MSG
+
+ /* Report computation time */
+ time_sub(&time0, time_current(&time1), &time0);
+ time_dump(&time0, TIME_ALL, NULL, buf, sizeof(buf));
+ log_info(scn->dev, "%lu probes solved in %s.\n",
+ (unsigned long)args->nprobes, buf);
+
+ /* Gather the RNG proxy sequence IDs and ensure that the RNG proxy state of
+ * the master process is greater than the RNG proxy state of all other
+ * processes */
+ res = gather_rng_proxy_sequence_id(scn->dev, rng_proxy);
+ if(res != RES_OK) goto error;
+
+ time_current(&time0);
+
+ /* Gather the list of accumulators */
+ #define GATHER_ACCUMS_LIST(Msg, Acc) { \
+ res = gather_accumulators_list \
+ (scn->dev, Msg, args->nprobes, process_probes, per_probe_acc_##Acc); \
+ if(res != RES_OK) goto error; \
+ } (void)0
+ GATHER_ACCUMS_LIST(MPI_SDIS_MSG_ACCUM_TEMP, temp);
+ GATHER_ACCUMS_LIST(MPI_SDIS_MSG_ACCUM_TIME, time);
+ #undef GATHER_ACCUMS_LIST
+
+ time_sub(&time0, time_current(&time1), &time0);
+ time_dump(&time0, TIME_ALL, NULL, buf, sizeof(buf));
+ log_info(scn->dev, "Probes accumulator gathered in %s.\n", buf);
+
+ if(is_master_process) {
+ res = setup_estimator_buffer(scn->dev, rng_proxy, args, per_probe_acc_temp,
+ per_probe_acc_time, &estim_buf);
+ if(res != RES_OK) goto error;
+ }
+
+exit:
+ if(per_thread_rng) release_per_thread_rng(scn->dev, per_thread_rng);
+ if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy));
+ if(per_probe_acc_temp) MEM_RM(allocator, per_probe_acc_temp);
+ if(per_probe_acc_time) MEM_RM(allocator, per_probe_acc_time);
+ if(progress) free_process_progress(scn->dev, progress);
+ if(out_estim_buf) *out_estim_buf = estim_buf;
+ return (res_T)res;
+error:
+ if(estim_buf) {
+ SDIS(estimator_buffer_ref_put(estim_buf));
+ estim_buf = NULL;
+ }
+ goto exit;
+}
+
+#include "sdis_Xd_end.h"
diff --git a/src/test_sdis_device.c b/src/test_sdis_device.c
@@ -37,6 +37,7 @@ main(int argc, char** argv)
struct logger logger;
struct mem_allocator allocator;
struct sdis_device* dev;
+ int is_mpi_used;
#ifdef SDIS_ENABLE_MPI
int provided;
#endif
@@ -84,6 +85,9 @@ main(int argc, char** argv)
args.nthreads_hint = SDIS_NTHREADS_DEFAULT;
OK(sdis_device_create(&args, &dev));
+ BA(sdis_device_is_mpi_used(NULL, &is_mpi_used));
+ BA(sdis_device_is_mpi_used(dev, NULL));
+
OK(sdis_device_ref_put(dev));
args.use_mpi = 1;
@@ -91,10 +95,14 @@ main(int argc, char** argv)
#ifndef SDIS_ENABLE_MPI
OK(sdis_device_create(&args, &dev));
+ OK(sdis_device_is_mpi_used(dev, &is_mpi_used));
+ CHK(!is_mpi_used);
OK(sdis_device_ref_put(dev));
#else
CHK(MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided) == MPI_SUCCESS);
OK(sdis_device_create(&args, &dev));
+ OK(sdis_device_is_mpi_used(dev, &is_mpi_used));
+ CHK(is_mpi_used);
CHK(MPI_Finalize() == MPI_SUCCESS);
OK(sdis_device_ref_put(dev));
#endif
diff --git a/src/test_sdis_scene.c b/src/test_sdis_scene.c
@@ -99,6 +99,7 @@ test_scene_3d(struct sdis_device* dev, struct sdis_interface* interf)
struct senc3d_scene* scn3d;
struct sdis_scene_create_args scn_args = SDIS_SCENE_CREATE_ARGS_DEFAULT;
struct sdis_scene* scn = NULL;
+ struct sdis_device* dev2 = NULL;
size_t ntris, npos;
size_t iprim;
size_t i;
@@ -163,6 +164,11 @@ test_scene_3d(struct sdis_device* dev, struct sdis_interface* interf)
OK(sdis_scene_get_dimension(scn, &dim));
CHK(dim == SDIS_SCENE_3D);
+ BA(sdis_scene_get_device(NULL, &dev2));
+ BA(sdis_scene_get_device(scn, NULL));
+ OK(sdis_scene_get_device(scn, &dev2));
+ CHK(dev == dev2);
+
BA(sdis_scene_get_aabb(NULL, lower, upper));
BA(sdis_scene_get_aabb(scn, NULL, upper));
BA(sdis_scene_get_aabb(scn, lower, NULL));
diff --git a/src/test_sdis_solve_probe_list.c b/src/test_sdis_solve_probe_list.c
@@ -16,8 +16,15 @@
#include "sdis.h"
#include "test_sdis_utils.h"
+#include <rsys/float3.h>
+
#include <star/s3d.h>
#include <star/s3dut.h>
+#include <star/ssp.h>
+
+#ifndef SDIS_ENABLE_MPI
+ #include <mpi.h>
+#endif
/*
* The system is a trilinear profile of steady state temperature, i.e. at each
@@ -44,8 +51,8 @@ static double
trilinear_profile(const double pos[3])
{
/* Range in X, Y and Z in which the trilinear profile is defined */
- const double lower = -10;
- const double upper = +10;
+ const double lower = -3;
+ const double upper = +3;
/* Upper temperature limit in X, Y and Z [K]. Lower temperature limit is
* implicitly 0 */
@@ -67,17 +74,28 @@ trilinear_profile(const double pos[3])
return a*x + b*y + c*z;
}
-static double
-compute_delta(struct s3d_scene_view* view)
+static INLINE float
+rand_canonic(void)
{
- float S = 0; /* Surface */
- float V = 0; /* Volume */
-
- OK(s3d_scene_view_compute_area(view, &S));
- OK(s3d_scene_view_compute_volume(view, &V));
- CHK(S > 0 && V > 0);
+ return (float)rand() / (float)((unsigned)RAND_MAX+1);
+}
- return (4.0*V/S)/20.0;
+static INLINE void
+check_intersection
+ (const double val0,
+ const double eps0,
+ const double val1,
+ const double eps1)
+{
+ double interval0[2], interval1[2];
+ double intersection[2];
+ interval0[0] = val0 - eps0;
+ interval0[1] = val0 + eps0;
+ interval1[0] = val1 - eps1;
+ interval1[1] = val1 + eps1;
+ intersection[0] = MMAX(interval0[0], interval1[0]);
+ intersection[1] = MMIN(interval0[1], interval1[1]);
+ CHK(intersection[0] <= intersection[1]);
}
/*******************************************************************************
@@ -156,6 +174,66 @@ create_view(struct s3dut_mesh* mesh)
return view;
}
+static double
+view_compute_delta(struct s3d_scene_view* view)
+{
+ float S = 0; /* Surface */
+ float V = 0; /* Volume */
+
+ OK(s3d_scene_view_compute_area(view, &S));
+ OK(s3d_scene_view_compute_volume(view, &V));
+ CHK(S > 0 && V > 0);
+
+ return (4.0*V/S)/30.0;
+}
+
+static void
+view_sample_position
+ (struct s3d_scene_view* view,
+ const double delta,
+ double pos[3])
+{
+ /* Ray variables */
+ const float dir[3] = {0, 1, 0};
+ const float range[2] = {0, FLT_MAX};
+ float org[3];
+
+ /* View variables */
+ float low[3];
+ float upp[3];
+
+ OK(s3d_scene_view_get_aabb(view, low, upp));
+
+ /* Sample a position in the supershape by uniformly sampling a position within
+ * its bounding box and rejecting it if it is not in the supershape or if it
+ * is too close to its boundaries. */
+ for(;;) {
+ struct s3d_hit hit = S3D_HIT_NULL;
+ float N[3] = {0, 0, 0}; /* Normal */
+
+ pos[0] = low[0] + rand_canonic()*(upp[0] - low[0]);
+ pos[1] = low[1] + rand_canonic()*(upp[1] - low[1]);
+ pos[2] = low[2] + rand_canonic()*(upp[2] - low[2]);
+
+ org[0] = (float)pos[0];
+ org[1] = (float)pos[1];
+ org[2] = (float)pos[2];
+ OK(s3d_scene_view_trace_ray(view, org, dir, range, NULL, &hit));
+
+ if(S3D_HIT_NONE(&hit)) continue;
+
+ f3_normalize(N, hit.normal);
+ if(f3_dot(N, dir) > 0) continue;
+
+ OK(s3d_scene_view_closest_point(view, org, (float)INF, NULL, &hit));
+ CHK(!S3D_HIT_NONE(&hit));
+
+ /* Sample position is in the super shape and is not too close to its
+ * boundaries */
+ if(hit.distance > delta) break;
+ }
+}
+
/*******************************************************************************
* Scene, i.e. the system to simulate
******************************************************************************/
@@ -251,11 +329,11 @@ create_solid(struct sdis_device* sdis, struct s3d_scene_view* view)
struct sdis_solid_shader shader = SDIS_SOLID_SHADER_NULL;
struct sdis_medium* solid = NULL;
struct sdis_data* data = NULL;
- double* delta = NULL;
+ double* pdelta = NULL;
OK(sdis_data_create(sdis, sizeof(double), ALIGNOF(double), NULL, &data));
- delta = sdis_data_get(data);
- *delta = compute_delta(view);
+ pdelta = sdis_data_get(data);
+ *pdelta = view_compute_delta(view);
shader.calorific_capacity = solid_get_calorific_capacity;
shader.thermal_conductivity = solid_get_thermal_conductivity;
@@ -263,6 +341,8 @@ create_solid(struct sdis_device* sdis, struct s3d_scene_view* view)
shader.delta = solid_get_delta;
shader.temperature = solid_get_temperature;
OK(sdis_solid_create(sdis, &shader, data, &solid));
+
+ OK(sdis_data_ref_put(data));
return solid;
}
@@ -315,26 +395,254 @@ create_interface
* Validations
******************************************************************************/
static void
-check_probe(struct sdis_scene* scn)
+check_probe_list_api
+ (struct sdis_scene* scn,
+ const struct sdis_solve_probe_list_args* in_args)
+{
+ struct sdis_solve_probe_list_args args = *in_args;
+ struct sdis_estimator_buffer* estim_buf = NULL;
+
+ /* Check API */
+ BA(sdis_solve_probe_list(NULL, &args, &estim_buf));
+ BA(sdis_solve_probe_list(scn, NULL, &estim_buf));
+ BA(sdis_solve_probe_list(scn, &args, NULL));
+ args.nprobes = 0;
+ BA(sdis_solve_probe_list(scn, &args, &estim_buf));
+ args.nprobes = in_args->nprobes;
+ args.probes = NULL;
+ BA(sdis_solve_probe_list(scn, &args, &estim_buf));
+}
+
+/* Check the estimators against the analytical solution */
+static void
+check_estimator_buffer
+ (const struct sdis_estimator_buffer* estim_buf,
+ const struct sdis_solve_probe_list_args* args)
{
- struct sdis_solve_probe_args args = SDIS_SOLVE_PROBE_ARGS_DEFAULT;
struct sdis_mc T = SDIS_MC_NULL;
- struct sdis_estimator* estimator = NULL;
- double ref = 0; /* Analytical reference */
-
- args.nrealisations = 100000;
- args.position[0] = 0;
- args.position[1] = 0;
- args.position[2] = 0;
- OK(sdis_solve_probe(scn, &args, &estimator));
- OK(sdis_estimator_get_temperature(estimator, &T));
-
- ref = trilinear_profile(args.position);
- printf("T(%g, %g, %g) = %g ~ %g +/- %g\n",
- SPLIT3(args.position), ref, T.E, T.SE);
- CHK(eq_eps(ref, T.E, 3*T.SE));
-
- OK(sdis_estimator_ref_put(estimator));
+ size_t iprobe = 0;
+
+ /* Variables used to check the global estimation results */
+ size_t total_nrealisations = 0;
+ size_t total_nrealisations_ref = 0;
+ size_t total_nfailures = 0;
+ size_t total_nfailures_ref = 0;
+ double sum = 0; /* Global sum of weights */
+ double sum2 = 0; /* Global sum of squared weights */
+ double N = 0; /* Number of (successful) realisations */
+ double E = 0; /* Expected value */
+ double V = 0; /* Variance */
+ double SE = 0; /* Standard Error */
+
+ /* Check the results */
+ FOR_EACH(iprobe, 0, args->nprobes) {
+ const struct sdis_estimator* estimator = NULL;
+ size_t probe_nrealisations = 0;
+ size_t probe_nfailures = 0;
+ double probe_sum = 0;
+ double probe_sum2 = 0;
+ double ref = 0;
+
+ /* Fetch result */
+ OK(sdis_estimator_buffer_at(estim_buf, iprobe, 0, &estimator));
+ OK(sdis_estimator_get_temperature(estimator, &T));
+ OK(sdis_estimator_get_realisation_count(estimator, &probe_nrealisations));
+ OK(sdis_estimator_get_failure_count(estimator, &probe_nfailures));
+
+ /* Check probe estimation */
+ ref = trilinear_profile(args->probes[iprobe].position);
+ printf("T(%g, %g, %g) = %g ~ %g +/- %g\n",
+ SPLIT3(args->probes[iprobe].position), ref, T.E, T.SE);
+ CHK(eq_eps(ref, T.E, 3*T.SE));
+
+ /* Check miscellaneous results */
+ CHK(probe_nrealisations == args->probes[iprobe].nrealisations);
+
+ /* Compute the sum of weights and sum of squared weights of the probe */
+ probe_sum = T.E * (double)probe_nrealisations;
+ probe_sum2 = (T.V + T.E*T.E) * (double)probe_nrealisations;
+
+ /* Update the global estimation */
+ total_nrealisations_ref += probe_nrealisations;
+ total_nfailures_ref += probe_nfailures;
+ sum += probe_sum;
+ sum2 += probe_sum2;
+ }
+
+ /* Check the overall estimate of the estimate buffer. This estimate is not
+ * really a result expected by the caller since the probes are all
+ * independent. But to be consistent with the estimate buffer API, we need to
+ * provide it. And so we check its validity */
+ OK(sdis_estimator_buffer_get_temperature(estim_buf, &T));
+ OK(sdis_estimator_buffer_get_realisation_count(estim_buf, &total_nrealisations));
+ OK(sdis_estimator_buffer_get_failure_count(estim_buf, &total_nfailures));
+
+ CHK(total_nrealisations == total_nrealisations_ref);
+ CHK(total_nfailures == total_nfailures_ref);
+
+ N = (double)total_nrealisations_ref;
+ E = sum / N;
+ V = sum2 / N - E*E;
+ SE = sqrt(V/N);
+ check_intersection(E, SE, T.E, T.SE);
+}
+
+/* Check that the buffers store statistically independent estimates */
+static void
+check_estimator_buffer_combination
+ (const struct sdis_estimator_buffer* estim_buf1,
+ const struct sdis_estimator_buffer* estim_buf2,
+ const struct sdis_solve_probe_list_args* args)
+{
+ size_t iprobe;
+
+ /* Check that the 2 series of estimates are compatible but not identical */
+ FOR_EACH(iprobe, 0, args->nprobes) {
+ const struct sdis_estimator* estimator1 = NULL;
+ const struct sdis_estimator* estimator2 = NULL;
+ size_t nrealisations1 = 0;
+ size_t nrealisations2 = 0;
+ struct sdis_mc T1 = SDIS_MC_NULL;
+ struct sdis_mc T2 = SDIS_MC_NULL;
+ double sum = 0; /* Sum of weights */
+ double sum2 = 0; /* Sum of squared weights */
+ double E = 0; /* Expected value */
+ double V = 0; /* Variance */
+ double SE = 0; /* Standard Error */
+ double N = 0; /* Number of realisations */
+ double ref = 0; /* Analytical solution */
+
+ /* Retrieve the estimation results */
+ OK(sdis_estimator_buffer_at(estim_buf1, iprobe, 0, &estimator1));
+ OK(sdis_estimator_buffer_at(estim_buf2, iprobe, 0, &estimator2));
+ OK(sdis_estimator_get_temperature(estimator1, &T1));
+ OK(sdis_estimator_get_temperature(estimator2, &T2));
+
+ /* Estimates are not identical... */
+ CHK(T1.E != T2.E);
+ CHK(T1.SE != T2.SE);
+
+ /* ... but are compatible ... */
+ check_intersection(T1.E, 3*T1.SE, T2.E, 3*T2.SE);
+
+ /* We can combine the 2 estimates since they must be numerically
+ * independent. We can therefore verify that their combination is valid with
+ * respect to the analytical solution */
+ OK(sdis_estimator_get_realisation_count(estimator1, &nrealisations1));
+ OK(sdis_estimator_get_realisation_count(estimator2, &nrealisations2));
+
+ sum =
+ T1.E*(double)nrealisations1
+ + T2.E*(double)nrealisations2;
+ sum2 =
+ (T1.V + T1.E*T1.E)*(double)nrealisations1
+ + (T2.V + T2.E*T2.E)*(double)nrealisations2;
+ N = (double)(nrealisations1 + nrealisations2);
+ E = sum / N;
+ V = sum2 / N - E*E;
+ SE = sqrt(V/N);
+
+ ref = trilinear_profile(args->probes[iprobe].position);
+ printf("T(%g, %g, %g) = %g ~ %g +/- %g\n",
+ SPLIT3(args->probes[iprobe].position), ref, E, SE);
+ CHK(eq_eps(ref, E, 3*SE));
+ }
+}
+
+static void
+check_probe_list
+ (struct sdis_scene* scn,
+ struct s3d_scene_view* view,
+ const int is_master_process)
+{
+ #define NPROBES 10
+
+ /* RNG state */
+ const char* rng_state_filename = "rng_state";
+ struct ssp_rng* rng = NULL;
+ enum ssp_rng_type rng_type = SSP_RNG_TYPE_NULL;
+
+ /* Probe variables */
+ struct sdis_solve_probe_args probes[NPROBES];
+ struct sdis_solve_probe_list_args args = SDIS_SOLVE_PROBE_LIST_ARGS_DEFAULT;
+ size_t iprobe = 0;
+
+ /* Estimations */
+ struct sdis_estimator_buffer* estim_buf = NULL;
+ struct sdis_estimator_buffer* estim_buf2 = NULL;
+
+ /* Misc */
+ double delta = 0;
+ FILE* fp = NULL;
+
+ delta = view_compute_delta(view);
+
+ /* Setup the list of probes to calculate */
+ args.probes = probes;
+ args.nprobes = NPROBES;
+ FOR_EACH(iprobe, 0, NPROBES) {
+ probes[iprobe] = SDIS_SOLVE_PROBE_ARGS_DEFAULT;
+ probes[iprobe].nrealisations = 10000;
+ view_sample_position(view, delta, probes[iprobe].position);
+ }
+
+ check_probe_list_api(scn, &args);
+
+ /* Solve the probes */
+ OK(sdis_solve_probe_list(scn, &args, &estim_buf));
+
+ if(!is_master_process) {
+ CHK(estim_buf == NULL);
+ } else {
+ check_estimator_buffer(estim_buf, &args);
+
+ /* Check the use of a non-default rng_state. Run the calculations using the
+ * final RNG state from the previous estimate. Thus, the estimates are
+ * statically independent and can be combined. To do this, write the RNG
+ * state to a file so that it is read by all processes for the next test. */
+ OK(sdis_estimator_buffer_get_rng_state(estim_buf, &rng));
+ OK(ssp_rng_get_type(rng, &rng_type));
+ CHK(fp = fopen(rng_state_filename, "w"));
+ CHK(fwrite(&rng_type, sizeof(rng_type), 1, fp) == 1);
+ OK(ssp_rng_write(rng, fp));
+ CHK(fclose(fp) == 0);
+ }
+
+#ifdef SDIS_ENABLE_MPI
+ /* Synchronize processes. Wait for the master process to write RNG state to
+ * be used */
+ {
+ struct sdis_device* sdis = NULL;
+ int is_mpi_used = 0;
+ OK(sdis_scene_get_device(scn, &sdis));
+ OK(sdis_device_is_mpi_used(sdis, &is_mpi_used));
+ if(is_mpi_used) {
+ CHK(MPI_Barrier(MPI_COMM_WORLD) == MPI_SUCCESS);
+ }
+ }
+#endif
+
+ /* Read the RNG state */
+ CHK(fp = fopen(rng_state_filename, "r"));
+ CHK(fread(&rng_type, sizeof(rng_type), 1, fp) == 1);
+ OK(ssp_rng_create(NULL, rng_type, &rng));
+ OK(ssp_rng_read(rng, fp));
+ CHK(fclose(fp) == 0);
+
+ /* Run a new calculation using the read RNG state */
+ args.rng_state = rng;
+ OK(sdis_solve_probe_list(scn, &args, &estim_buf2));
+ OK(ssp_rng_ref_put(rng));
+
+ if(!is_master_process) {
+ CHK(estim_buf2 == NULL);
+ } else {
+ check_estimator_buffer_combination(estim_buf, estim_buf2, &args);
+ OK(sdis_estimator_buffer_ref_put(estim_buf));
+ OK(sdis_estimator_buffer_ref_put(estim_buf2));
+ }
+
+ #undef NPROBES
}
/*******************************************************************************
@@ -367,14 +675,18 @@ main(int argc, char** argv)
interf = create_interface(sdis, solid, dummy);
scn = create_scene(sdis, super_shape, interf);
- check_probe(scn);
+ check_probe_list(scn, view, is_master_process);
OK(s3dut_mesh_ref_put(super_shape));
+ OK(s3d_scene_view_ref_put(view));
OK(sdis_medium_ref_put(solid));
OK(sdis_medium_ref_put(dummy));
OK(sdis_interface_ref_put(interf));
OK(sdis_scene_ref_put(scn));
free_default_device(sdis);
+
+ CHK(mem_allocated_size() == 0);
+
return 0;
}