Start implementing the sdis_solve_probe_list function - stardis-solver

commit 59c0f9ae68a619d1f30b53719e6bdd91274413ad
parent a0d5330c85c9de8522d73dc0250be490eea19b89
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Wed,  6 Dec 2023 17:58:32 +0100

Start implementing the sdis_solve_probe_list function

Unlike its single-probe counterpart, this function will parallelize the
list of probes, rather than parallelizing Monte Carlo realizations.
Currently only the very beginning of the function is written as the
calculation of the probe list by MPI process and... not much more.

Diffstat:
M src/sdis.c  | 45 ++++++++++++++++++++++++++-------------------
M src/sdis.h  | 13 +++++++++++++
M src/sdis_c.h  | 18 ++++++++++++++++--
M src/sdis_solve.c  | 15 +++++++++++++++
M src/sdis_solve_camera.c  | 2 +-
M src/sdis_solve_probe_Xd.h  | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

6 files changed, 155 insertions(+), 22 deletions(-)
diff --git a/src/sdis.c b/src/sdis.c
@@ -453,38 +453,45 @@ free_process_progress(struct sdis_device* dev, int32_t progress[])
 }
 
 size_t
-compute_process_realisations_count
+compute_process_index_range
   (const struct sdis_device* dev,
-   const size_t nrealisations)
+   const size_t nindices,
+   size_t range[2])
 {
 #ifndef SDIS_ENABLE_MPI
-  (void)dev, (void)nrealisations;
-  return nrealisations;
+  (void)dev;
+  range[0] = 0;
+  range[1] = nindices; /* Upper bound is _exclusive_ */
 #else
-  size_t per_process_nrealisations = 0;
-  size_t remaining_nrealisations = 0;
   ASSERT(dev);
 
-  if(!dev->use_mpi) return nrealisations;
+  if(!dev->use_mpi) {
+    range[0] = 0;
+    range[1] = nindices;
+  } else {
+    size_t per_process_indices = 0;
+    size_t remaining_indices = 0;
 
-  /* Compute minimum the number of realisations on each process */
-  per_process_nrealisations = nrealisations / (size_t)dev->mpi_nprocs;
+    /* Compute minimum the number of indices on each process */
+    per_process_indices = nindices / (size_t)dev->mpi_nprocs;
 
-  /* Define the remaining number of realisations that are not handle by one
-   * process */
-  remaining_nrealisations =
-    nrealisations
-  - per_process_nrealisations * (size_t)dev->mpi_nprocs;
+    range[0] = per_process_indices * (size_t)dev->mpi_rank;
+    range[1] = per_process_indices; /* Upper bound is _exclusive */
 
-  /* Distribute the remaining realisations onto the processes */
-  if((size_t)dev->mpi_rank >= remaining_nrealisations) {
-    return per_process_nrealisations;
-  } else {
-    return per_process_nrealisations + 1;
+    /* Set the remaining number of indexes that are not managed by 1 process */
+    remaining_indices =
+      nindices - per_process_indices * (size_t)dev->mpi_nprocs;
+
+    /* Distribute the remaining indices onto the processes */
+    if((size_t)dev->mpi_rank < remaining_indices) {
+      range[1] += 1;
+    }
   }
 #endif
+  return range[1] - range[0];
 }
 
+
 #ifndef SDIS_ENABLE_MPI
 res_T
 gather_accumulators
diff --git a/src/sdis.h b/src/sdis.h
@@ -1330,6 +1330,19 @@ sdis_solve_probe
    const struct sdis_solve_probe_args* args,
    struct sdis_estimator** estimator);
 
+/* Calculate temperature for a list of probe points. Unlike its
+ * single-probe counterpart, this function parallelizes the list of
+ * probes, rather than calculating a single probe. Calling this function
+ * is therefore more advantageous in terms of load distribution when the
+ * number of probe points to be evaluated is large compared to the cost
+ * of calculating a single probe point. */
+SDIS_API res_T
+sdis_solve_probe_list
+  (struct sdis_scene* scn,
+   const struct sdis_solve_probe_args args[],
+   const size_t nprobes,
+   struct sdis_estimator_buffer** buf);
+
 SDIS_API res_T
 sdis_solve_probe_boundary
   (struct sdis_scene* scn,
diff --git a/src/sdis_c.h b/src/sdis_c.h
@@ -85,11 +85,25 @@ free_process_progress
   (struct sdis_device* dev,
    int32_t progress[]);
 
-/* Compute the number of realisations for the current process */
+/* Calculate the index range of the current process. It returns the size of the
+ * range. The overall_count is the number of calculations to parallelize between
+ * processes. For example, it may be the number of realisations of one
+ * calculation, or the total number of probe calculations. */
 extern LOCAL_SYM size_t
+compute_process_index_range
+  (const struct sdis_device* dev,
+   const size_t overall_count,
+   size_t range[2]); /* [lower, upper[ */
+
+/* Return the number of realisations for the current process */
+static INLINE size_t
 compute_process_realisations_count
   (const struct sdis_device* dev,
-   const size_t overall_realisations_count);
+   const size_t overall_realisations_count)
+{
+  size_t range[2];
+  return compute_process_index_range(dev, overall_realisations_count, range);
+}
 
 /* Gather the accumulators and sum them in acc. With MPI, non master processes
  * store in acc the gathering of their per thread accumulators that are sent to
diff --git a/src/sdis_solve.c b/src/sdis_solve.c
@@ -57,6 +57,21 @@ sdis_solve_probe
 }
 
 res_T
+sdis_solve_probe_list
+  (struct sdis_scene* scn,
+   const struct sdis_solve_probe_args args[],
+   const size_t nprobes,
+   struct sdis_estimator_buffer** out_buf)
+{
+  if(!scn) return RES_BAD_ARG;
+  if(scene_is_2d(scn)) {
+    return solve_probe_list_2d(scn, args, nprobes, out_buf);
+  } else {
+    return solve_probe_list_3d(scn, args, nprobes, out_buf);
+  }
+}
+
+res_T
 sdis_solve_probe_green_function
   (struct sdis_scene* scn,
    const struct sdis_solve_probe_args* args,
diff --git a/src/sdis_solve_camera.c b/src/sdis_solve_camera.c
@@ -495,7 +495,7 @@ sdis_solve_camera
   char buffer[128]; /* Temporary buffer used to store formated time */
 
   /* Stardis variables */
-  struct sdis_estimator_buffer* buf= NULL;
+  struct sdis_estimator_buffer* buf = NULL;
   struct sdis_medium* medium = NULL;
 
   /* Random number generators */
diff --git a/src/sdis_solve_probe_Xd.h b/src/sdis_solve_probe_Xd.h
@@ -369,5 +369,89 @@ error:
   goto exit;
 }
 
+static res_T
+XD(solve_probe_list)
+  (struct sdis_scene* scn,
+   const struct sdis_solve_probe_args args[],
+   const size_t nprobes,
+   struct sdis_estimator_buffer** out_buf)
+{
+  /* Time registration */
+  struct time time0, time1;
+
+  /* Device variables */
+  struct mem_allocator* allocator = NULL;
+  size_t nthreads = 0;
+
+  /* Stardis variables */
+  struct sdis_estimator_buffer* buf = NULL;
+
+  /* Probe variables */
+  size_t per_process_probes[2];
+  size_t per_process_nprobes;
+  int64_t iprobe = 0;
+
+  /* Miscellaneous */
+  int32_t* progress = NULL; /* Per process progress bar */
+  int pcent_progress = 1; /* Percentage requiring progress update */
+  int is_master_process = 0;
+  res_T res = RES_OK;
+
+  /* Check input arguments */
+  if(!scn || !args || !out_buf) { res = RES_BAD_ARG; goto error; }
+  FOR_EACH(iprobe, 0, nprobes) {
+    res = check_solve_probe_args(&args[iprobe]);
+    if(res != RES_OK) goto error;
+  }
+  res = XD(scene_check_dimensionality)(scn);
+  if(res != RES_OK) goto error;
+
+#ifdef SDIS_ENABLE_MPI
+  is_master_process = !scn->dev->use_mpi || scn->dev->mpi_rank == 0;
+#endif
+
+  nthreads = scn->dev->nthreads;
+  allocator = scn->dev->allocator;
+
+  /* Update the progress bar every percent if escape sequences are allowed in
+   * log messages or only every 10 percent when only plain text is allowed.
+   * This reduces the number of lines of plain text printed */
+  pcent_progress = scn->dev->no_escape_sequence ? 10 : 1;
+
+  /* Allocate the per process progress status */
+  res = alloc_process_progress(scn->dev, &progress);
+  if(res != RES_OK) goto error;
+
+  /* Synchronise the processes */
+  process_barrier(scn->dev);
+
+  /* Begin time registration of the computation */
+  time_current(&time0);
+
+  /* Here we go! Calclation of probe list */
+  per_process_nprobes = compute_process_index_range
+    (scn->dev, nprobes, per_process_probes);
+
+  /* Create the global estimator on the master process only */
+  if(is_master_process) {
+    res = estimator_buffer_create(scn->dev, nprobes, 1, &buf);
+    if(res != RES_OK) goto error;
+  }
+
+  omp_set_num_threads((int)scn->dev->nthreads);
+  #pragma omp parallel for schedule(static)
+  for(iprobe = 0; iprobe < (int64_t)per_process_nprobes; ++iprobe) {
+    /* TODO */
+  }
+
+error:
+  if(progress) free_process_progress(scn->dev, progress);
+  if(out_buf) *out_buf = buf;
+  return res;
+exit:
+  if(buf) { SDIS(estimator_buffer_ref_put(buf)); buf = NULL; }
+  goto exit;
+}
+
 #include "sdis_Xd_end.h"

	stardis-solver Solve coupled heat transfers
	git clone git://git.meso-star.fr/stardis-solver.git
	Log \| Files \| Refs \| README \| LICENSE

M	src/sdis.c	\|	45	++++++++++++++++++++++++++-------------------
M	src/sdis.h	\|	13	+++++++++++++
M	src/sdis_c.h	\|	18	++++++++++++++++--
M	src/sdis_solve.c	\|	15	+++++++++++++++
M	src/sdis_solve_camera.c	\|	2	+-
M	src/sdis_solve_probe_Xd.h	\|	84	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++