Upd the work scheduling policy - htrdr - Solving radiative transfer in heterogeneous media

commit bdb57b9e281af7b6b846ba8495e285e277fed313
parent a945a02670bbb791ba61430b8aec6043efa8eddf
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Tue, 30 Oct 2018 16:19:39 +0100

Upd the work scheduling policy

The #tiles to steal are now stolen to the same process.

Diffstat:
M src/htrdr_draw_radiance_sw.c  | 129 ++++++++++++++++++++++++++++++++++++++++++++++---------------------------------

1 file changed, 75 insertions(+), 54 deletions(-)
diff --git a/src/htrdr_draw_radiance_sw.c b/src/htrdr_draw_radiance_sw.c
@@ -21,7 +21,7 @@
 #include "htrdr_solve.h"
 
 #include <rsys/cstr.h>
-#include <rsys/dynamic_array_size_t.h>
+#include <rsys/dynamic_array_u32.h>
 #include <rsys/math.h>
 #include <star/ssp.h>
 
@@ -37,11 +37,11 @@ STATIC_ASSERT(IS_POW2(TILE_SIZE), TILE_SIZE_must_be_a_power_of_2);
 
 /* Overall work of a process */
 struct proc_work {
-  struct darray_size_t tiles; /* #tiles to render */
+  struct darray_u32 tiles; /* #tiles to render */
   size_t itile; /* Next tile to render in the above list of tiles */
 };
 
-#define TILE_MCODE_NULL INT64_MAX
+#define TILE_MCODE_NULL UINT32_MAX
 
 /*******************************************************************************
  * Helper functions
@@ -57,18 +57,29 @@ morton2D_decode(const uint32_t u32)
   return (uint16_t)x;
 }
 
+static FINLINE uint32_t
+morton2D_encode(const uint16_t u16)
+{
+  uint32_t u32 = u16;
+  u32 = (u32 | (u32 << 8)) & 0x00FF00FF;
+  u32 = (u32 | (u32 << 4)) & 0X0F0F0F0F;
+  u32 = (u32 | (u32 << 2)) & 0x33333333;
+  u32 = (u32 | (u32 << 1)) & 0x55555555;
+  return u32;
+}
+
 static INLINE void
 proc_work_init(struct mem_allocator* allocator, struct proc_work* work)
 {
   ASSERT(work);
-  darray_size_t_init(allocator, &work->tiles);
+  darray_u32_init(allocator, &work->tiles);
   work->itile = 0;
 }
 
 static INLINE void
 proc_work_release(struct proc_work* work)
 {
-  darray_size_t_release(&work->tiles);
+  darray_u32_release(&work->tiles);
 }
 
 static INLINE void
@@ -77,36 +88,41 @@ proc_work_reset(struct proc_work* work)
   ASSERT(work);
   #pragma omp critical
   {
-    darray_size_t_clear(&work->tiles);
+    darray_u32_clear(&work->tiles);
     work->itile = 0;
   }
 }
 
 static INLINE void
-proc_work_add_tile(struct proc_work* work, const int64_t mcode)
+proc_work_add_tile(struct proc_work* work, const uint32_t mcode)
 {
-  size_t st = (size_t)mcode;
-  ASSERT(mcode >= 0);
   #pragma omp critical
-  CHK(darray_size_t_push_back(&work->tiles, &st) == RES_OK);
+  CHK(darray_u32_push_back(&work->tiles, &mcode) == RES_OK);
 }
 
-static INLINE int64_t
+static INLINE uint32_t
 proc_work_get_tile(struct proc_work* work)
 {
-  int64_t mcode;
+  uint32_t mcode;
   #pragma omp critical
   {
-    if(work->itile >= darray_size_t_size_get(&work->tiles)) {
+    if(work->itile >= darray_u32_size_get(&work->tiles)) {
       mcode = TILE_MCODE_NULL;
     } else {
-      mcode = (int64_t)darray_size_t_cdata_get(&work->tiles)[work->itile];
+      mcode = darray_u32_cdata_get(&work->tiles)[work->itile];
       ++work->itile;
     }
   }
   return mcode;
 }
 
+static INLINE size_t
+proc_work_get_ntiles(struct proc_work* work)
+{
+  ASSERT(work);
+  return darray_u32_size_get(&work->tiles);
+}
+
 static res_T
 draw_tile
   (struct htrdr* htrdr,
@@ -225,6 +241,7 @@ mpi_probe_thieves
    struct proc_work* work,
    ATOMIC* probe_thieves)
 {
+  uint32_t tiles[UINT8_MAX];
   struct timespec t;
   ASSERT(htrdr && work && probe_thieves);
 
@@ -242,8 +259,8 @@ mpi_probe_thieves
 
   while(ATOMIC_GET(probe_thieves)) {
     MPI_Status status;
+    size_t itile;
     int msg;
-    int64_t tile;
 
     /* Probe if a steal request was submitted by any processes */
     P_MPI(Iprobe(MPI_ANY_SOURCE, HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &msg,
@@ -251,18 +268,20 @@ mpi_probe_thieves
 
     if(msg) { /* A steal request was posted */
       MPI_Request req;
-      int8_t dummy;
+      uint8_t ntiles_to_steal;
 
       /* Asynchronously receive the steal request */
-      P_MPI(Irecv(&dummy, 1, MPI_INT8_T, status.MPI_SOURCE,
+      P_MPI(Irecv(&ntiles_to_steal, 1, MPI_UINT8_T, status.MPI_SOURCE,
         HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &req));
 
       /* Wait for the completion of the steal request */
       mpi_wait_for_request(htrdr, &req);
 
-      /* Thief a tile */
-      tile = proc_work_get_tile(work);
-      P_MPI(Send(&tile, 1, MPI_INT64_T, status.MPI_SOURCE,
+      /* Thief some tiles */
+      FOR_EACH(itile, 0, ntiles_to_steal) {
+        tiles[itile] = proc_work_get_tile(work);
+      }
+      P_MPI(Send(&tiles, ntiles_to_steal, MPI_UINT32_T, status.MPI_SOURCE,
         HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD));
     }
     t.tv_nsec = 500000000; /* 500ms */
@@ -301,9 +320,13 @@ mpi_steal_work
    struct ssp_rng* rng,
    struct proc_work* work)
 {
-  size_t ntiles_to_steal = (size_t)htrdr->nthreads;
+  MPI_Request req;
+  size_t itile;
   size_t nthieves = 0;
-  ASSERT(htrdr && rng && work);
+  uint32_t tiles[UINT8_MAX]; /* Morton code of the stolen tile */
+  int proc_to_steal; /* Process to steal */
+  uint8_t ntiles_to_steal = (uint8_t)(htrdr->nthreads*2);
+  ASSERT(htrdr && rng && work && htrdr->nthreads < UINT8_MAX);
 
   /* Protect MPI calls of multiple invocations from concurrent threads */
   #define P_MPI(Func) {                                                        \
@@ -312,33 +335,31 @@ mpi_steal_work
     mutex_unlock(htrdr->mpi_mutex);                                            \
   } (void)0
 
-  while(nthieves < ntiles_to_steal && htrdr->mpi_nworking_procs) {
-    MPI_Request req;
-    const int8_t dummy = 1;
-    int64_t tile; /* Morton code of the stolen tile */
-    int proc_to_steal; /* Process to steal */
+  /* No more working process => nohting to steal */
+  if(!htrdr->mpi_nworking_procs) return 0;
 
-    /* Sample a process to steal */
-    proc_to_steal = mpi_sample_working_process(htrdr, rng);
+  /* Sample a process to steal */
+  proc_to_steal = mpi_sample_working_process(htrdr, rng);
 
-    /* Send a steal request to the sampled process and wait for a response */
-    P_MPI(Send(&dummy, 1, MPI_INT8_T, proc_to_steal, HTRDR_MPI_STEAL_REQUEST,
-      MPI_COMM_WORLD));
+  /* Send a steal request to the sampled process and wait for a response */
+  P_MPI(Send(&ntiles_to_steal, 1, MPI_UINT8_T, proc_to_steal,
+    HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD));
 
-    /* Receive the stolen tile from the sampled process */
-    P_MPI(Irecv(&tile, 1, MPI_INT64_T, proc_to_steal,
-      HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD, &req));
+  /* Receive the stolen tile from the sampled process */
+  P_MPI(Irecv(tiles, ntiles_to_steal, MPI_UINT32_T, proc_to_steal,
+    HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD, &req));
 
-    mpi_wait_for_request(htrdr, &req);
+  mpi_wait_for_request(htrdr, &req);
 
-    if(tile != TILE_MCODE_NULL) {
-      proc_work_add_tile(work, tile);
-      ++nthieves;
-    } else {
+  FOR_EACH(itile, 0, ntiles_to_steal) {
+    if(tiles[itile] == TILE_MCODE_NULL) {
       ASSERT(htrdr->mpi_working_procs[proc_to_steal] != 0);
       htrdr->mpi_working_procs[proc_to_steal] = 0;
       htrdr->mpi_nworking_procs--;
+      break;
     }
+    proc_work_add_tile(work, tiles[itile]);
+    ++nthieves;
   }
   #undef P_MPI
   return nthieves;
@@ -428,9 +449,9 @@ htrdr_draw_radiance_sw
    struct htrdr_buffer* buf)
 {
   struct ssp_rng* rng_main = NULL;
-  size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted;
+  size_t ntiles_x, ntiles_y, ntiles_adjusted;
+  size_t itile;
   struct proc_work work;
-  int64_t itile;
   struct htrdr_buffer_layout layout;
   double pix_sz[2]; /* Pixel size in the normalized image plane */
   size_t proc_ntiles;
@@ -465,7 +486,6 @@ htrdr_draw_radiance_sw
   /* Compute the overall number of tiles */
   ntiles_x = (layout.width + (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
   ntiles_y = (layout.height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
-  ntiles = ntiles_x * ntiles_y;
 
   /* Adjust the #tiles for the morton-encoding procedure */
   ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y));
@@ -476,27 +496,27 @@ htrdr_draw_radiance_sw
   pix_sz[1] = 1.0 / (double)layout.height;
 
   /* Define the initial number of tiles of the current process */
-  proc_ntiles = ntiles / (size_t)htrdr->mpi_nprocs;
   proc_ntiles_adjusted = ntiles_adjusted / (size_t)htrdr->mpi_nprocs;
-  if(htrdr->mpi_rank == 0) {/* Affect the remaining tiles to the master proc */
-    proc_ntiles += ntiles - proc_ntiles*(size_t)htrdr->mpi_nprocs;
+  if(htrdr->mpi_rank == 0) { /* Affect the remaining tiles to the master proc */
     proc_ntiles_adjusted +=
-      ntiles_adjusted - proc_ntiles*(size_t)htrdr->mpi_nprocs;
+      ntiles_adjusted - proc_ntiles_adjusted*(size_t)htrdr->mpi_nprocs;
   }
 
   /* Define the initial list of tiles of the process */
   FOR_EACH(itile, 0, proc_ntiles_adjusted) {
-    size_t tile_org[2];
-    int64_t mcode = htrdr->mpi_rank + (itile*htrdr->mpi_nprocs);
+    uint32_t mcode;
+    uint16_t tile_org[2];
 
-    /* Decode the morton code to retrieve the tile index  */
-    tile_org[0] = morton2D_decode((uint32_t)(mcode>>0));
-    if(tile_org[0] >= ntiles_x) continue; /* Skip border tile */
-    tile_org[1] = morton2D_decode((uint32_t)(mcode>>1));
-    if(tile_org[1] >= ntiles_y) continue; /* Skip border tile */
+    mcode = (uint32_t)itile*(uint32_t)htrdr->mpi_nprocs
+          + (uint32_t)htrdr->mpi_rank;
 
+    tile_org[0] = morton2D_decode(mcode>>0);
+    if(tile_org[0] >= ntiles_x) continue;
+    tile_org[1] = morton2D_decode(mcode>>1);
+    if(tile_org[1] >= ntiles_y) continue;
     proc_work_add_tile(&work, mcode);
   }
+  proc_ntiles = proc_work_get_ntiles(&work);
 
   if(htrdr->mpi_rank == 0) {
     fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
@@ -588,6 +608,7 @@ htrdr_draw_radiance_sw
 
         proc_work_reset(&work);
         nthieves = mpi_steal_work(htrdr, rng_main, &work);
+        nthreads = MMIN(nthieves, htrdr->nthreads);
       } while(nthieves);
 
       /* Synchronize the process */

	htrdr Solving radiative transfer in heterogeneous media
	git clone git://git.meso-star.fr/htrdr.git
	Log \| Files \| Refs \| README \| LICENSE