stardis-solver

Solve coupled heat transfers
git clone git://git.meso-star.fr/stardis-solver.git
Log | Files | Refs | README | LICENSE

commit a28a544b6e37143f42110276fd6669accd6de583
parent a00aafaca0f141d902d368962e7c5309c10849f4
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Wed,  8 Dec 2021 16:25:49 +0100

In MPI, carefully setup the returned RNG state

Set the RNG state of the green estimator/function to a state that is
above the state of the RNG proxies of all processes. This ensures that
any random numbers that can be generated from this state were not
generated when calculating the green estimator/function. As a result,
this state could be used, for example, to run a new set of
realisations to make the previous calculation more accurate.

Diffstat:
Mcmake/CMakeLists.txt | 2+-
Msrc/sdis.c | 90+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
Msrc/sdis_c.h | 11+++++++++++
Msrc/sdis_solve_probe_Xd.h | 6++++++
4 files changed, 95 insertions(+), 14 deletions(-)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt @@ -34,7 +34,7 @@ CMAKE_DEPENDENT_OPTION(ALL_TESTS find_package(RCMake 0.4 REQUIRED) find_package(Star2D 0.5 REQUIRED) find_package(Star3D 0.8 REQUIRED) -find_package(StarSP 0.12 REQUIRED) +find_package(StarSP 0.13 REQUIRED) find_package(StarEnc2D 0.5 REQUIRED) find_package(StarEnc3D 0.5 REQUIRED) find_package(RSys 0.12 REQUIRED) diff --git a/src/sdis.c b/src/sdis.c @@ -557,21 +557,9 @@ setup_estimator estimator_setup_temperature(estimator, acc_temp->sum, acc_temp->sum2); estimator_setup_realisation_time(estimator, acc_time->sum, acc_time->sum2); - /* TODO correctly handle RNG state with MPI. Currently, we only store the RNG - * proxy state of the master process, but non-master processes can rely on - * much more advanced seeds. Therefore, rerun the simulation with the saved - * RNG state can lead to non-master processes generating random numbers that - * were already generated at the previous run. */ res = estimator_save_rng_state(estimator, proxy); if(res != RES_OK) goto error; -#ifdef SDIS_ENABLE_MPI - if(estimator->dev->use_mpi) { - log_warn(estimator->dev, - "The estimator RNG state is not well defined when MPI is used.\n"); - } -#endif - exit: return res; error: @@ -620,7 +608,7 @@ gather_green_functions if(res != RES_OK) goto error; /* Gather the accumulators. The master process gathers all accumulators and - * non master process gather their per thread accumulators only that is is + * non master process gather their per thread accumulators only that is * sent to the master process */ res = gather_accumulators (scn->dev, MPI_SDIS_MSG_ACCUM_TIME, per_thread_acc_time, &acc_time); @@ -700,6 +688,82 @@ error: #endif +#ifndef SDIS_ENABLE_MPI +res_T +gather_rng_proxy_sequence_id + (struct sdis_device* dev, + struct ssp_rng_proxy* proxy) +{ + ASSERT(dev && proxy); + (void)dev, (void)proxy; + return RES_OK; +} +#else + +res_T +gather_rng_proxy_sequence_id + (struct sdis_device* dev, + struct ssp_rng_proxy* proxy) +{ + unsigned long proc_seq_id = 0; + size_t seq_id = SSP_SEQUENCE_ID_NONE; + res_T res = RES_OK; + ASSERT(dev && proxy); + + if(!dev->use_mpi) goto exit; + + /* Retrieve the sequence id of the process */ + SSP(rng_proxy_get_sequence_id(proxy, &seq_id)); + CHK(seq_id <= ULONG_MAX); + proc_seq_id = (unsigned long)seq_id; + + /* Non master process */ + if(dev->mpi_rank != 0) { + + /* Send the sequence id to the master process */ + mutex_lock(dev->mpi_mutex); + MPI(Send(&proc_seq_id, 1, MPI_UNSIGNED_LONG, 0/*Dst*/, + MPI_SDIS_MSG_RNG_PROXY_SEQUENCE_ID, MPI_COMM_WORLD)); + mutex_unlock(dev->mpi_mutex); + + /* Master process */ + } else { + size_t nseqs_to_flush = 0; + unsigned long max_seq_id = 0; + int iproc; + + max_seq_id = proc_seq_id; + + /* Gather per process sequence id and defined the maximum sequence id */ + FOR_EACH(iproc, 1, dev->mpi_nprocs) { + MPI_Request req; + unsigned long tmp_seq_id; + + /* Asynchronously receive the sequence id of `iproc' */ + mutex_lock(dev->mpi_mutex); + MPI(Irecv(&tmp_seq_id, 1, MPI_UNSIGNED_LONG, iproc, + MPI_SDIS_MSG_RNG_PROXY_SEQUENCE_ID, MPI_COMM_WORLD, &req)); + mutex_unlock(dev->mpi_mutex); + mpi_waiting_for_request(dev, &req); + + /* Define the maximum sequence id between all processes */ + max_seq_id = MMAX(max_seq_id, tmp_seq_id); + } + + /* Flush the current sequence that is already consumed in addition to the + * sequences queried by the other processes */ + nseqs_to_flush = 1/*Current sequence*/ + max_seq_id - proc_seq_id; + res = ssp_rng_proxy_flush_sequences(proxy, nseqs_to_flush); + if(res != RES_OK) goto error; + } + +exit: + return res; +error: + goto exit; +} +#endif + void print_progress (struct sdis_device* dev, diff --git a/src/sdis_c.h b/src/sdis_c.h @@ -24,6 +24,7 @@ enum mpi_sdis_message { MPI_SDIS_MSG_ACCUM_TIME, /* Time accumulator */ MPI_SDIS_MSG_GREEN_FUNCTION, /* Serialized green function */ MPI_SDIS_MSG_PROGRESS, /* Progress status */ + MPI_SDIS_MSG_RNG_PROXY_SEQUENCE_ID, /* Index of the current RNG sequence */ MPI_SDIS_MSG_COUNT__ }; @@ -110,6 +111,16 @@ gather_green_functions const struct accum* acc_time, struct sdis_green_function** green); +/* Gather the sequence IDs of the proxy RNGs. Without MPI, nothing happens. + * With MPI, non-master processes send the sequence ID of their proxy RNG to + * the master process. The master process updates its proxy RNG to ensure that + * its state is greater than the state of all other proxies, that is, its + * sequence ID is greater than the sequence IDs received. */ +extern LOCAL_SYM res_T +gather_rng_proxy_sequence_id + (struct sdis_device* dev, + struct ssp_rng_proxy* proxy); + /* Print the progress status. With MPI, the master process print the progress * of all processes stored in the progress list. Non master processes do not * print anything */ diff --git a/src/sdis_solve_probe_Xd.h b/src/sdis_solve_probe_Xd.h @@ -293,6 +293,12 @@ XD(solve_probe) time_dump(&time0, TIME_ALL, NULL, buf, sizeof(buf)); log_info(scn->dev, "Probe temperature solved in %s.\n", buf); + /* Gather the RNG proxy sequence IDs and ensure that the RNG proxy state of + * the master process is greater than the RNG proxy state of all other + * processes */ + res = gather_rng_proxy_sequence_id(scn->dev, rng_proxy); + if(res != RES_OK) goto error; + /* Setup the estimated values */ if(out_estimator) { struct accum acc_temp, acc_time;