Rewrite the build_octree parallelisation - rnatm - Load and structure data describing an atmosphere

commit 185312a43b7666353e43fb14f8e13c49b3c38122
parent 6930b5c8dd2215a5b32eae85fe1a6c0457431df0
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Wed, 24 Aug 2022 10:35:20 +0200

Rewrite the build_octree parallelisation

We need to synchronize construction threads when a batch of voxels has
been fully consumed. In the previous implementation, it was impossible
to add a barrier in the parallel loop. In this commit, we are rewriting
the build_octrees function to use a parallel block instead of a parallel
loop

Diffstat:
M src/rnatm_octree.c  | 100 ++++++++++++++++++++++++++++++++++++++++++++++---------------------------------

1 file changed, 58 insertions(+), 42 deletions(-)
diff --git a/src/rnatm_octree.c b/src/rnatm_octree.c
@@ -804,15 +804,18 @@ voxelize_atmosphere
     batch_args.accel_structs = accel_structs + item_range[0];
     batch_args.batch_size = item_range[1] - item_range[0];
 
-    /* Ensure that the build thread has finished to consume the previous batch */
+    /* Wait for the building thread to finish consuming the previous batch */
     mutex_lock(sync->mutex);
     if(sync->ibatch != ibatch) {
       ASSERT(sync->ibatch == ibatch - 1);
       cond_wait(sync->cond, sync->mutex);
-      ASSERT(sync->ibatch == ibatch);
+      /* An error occured in the building thread */
+      if(sync->ibatch != ibatch) res = RES_BAD_ARG;
     }
     mutex_unlock(sync->mutex);
+    if(res != RES_OK) goto error;
 
+    /* Generate the voxels of the current batch */
     res = voxelize_batch(atm, &batch_args);
     if(res != RES_OK) goto error;
   }
@@ -946,12 +949,12 @@ build_octrees
    struct build_sync* sync)
 {
   struct svx_device* svx = NULL;
+  struct accel_struct* accel_structs = NULL;
   double low[3], upp[3];
   size_t def[3];
   size_t istruct;
   size_t naccel_structs;
   size_t voxel_width;
-  ATOMIC nbuilt_structs = 0;
   ATOMIC res = RES_OK;
   ASSERT(atm && args && pool);
 
@@ -969,56 +972,69 @@ build_octrees
   def[1] = (size_t)atm->grid_definition[1];
   def[2] = (size_t)atm->grid_definition[2];
 
+  accel_structs = darray_accel_struct_data_get(&atm->accel_structs);
   naccel_structs = darray_accel_struct_size_get(&atm->accel_structs);
   voxel_width = pool_get_voxel_width(pool);
 
-  /* Build the octrees. Each thread consumes an element of a partition. So, we
-   * set the number of threads to the voxel width */
-  omp_set_num_threads((int)voxel_width);
-  #pragma omp parallel for schedule(static, 1/*chunk size*/)
-  for(istruct = 0; istruct < naccel_structs; ++istruct) {
-    struct build_octree_context ctx = BUILD_OCTREE_CONTEXT_NULL;
-    struct svx_voxel_desc vx_desc = SVX_VOXEL_DESC_NULL;
-    struct svx_tree* octree = NULL;
-    res_T res_local = RES_OK;
-
-    if(ATOMIC_GET(&res) != RES_OK) continue;
-
-    /* Setup the build context */
-    ctx.pool = pool;
-    ctx.part = NULL;
-    ctx.iitem = istruct % voxel_width;
-    ctx.tau_threshold = args->optical_thickness;
-
-    /* Setup the voxel descriptor */
-    vx_desc.get = vx_get;
-    vx_desc.merge = vx_merge;
-    vx_desc.challenge_merge = vx_challenge_merge;
-    vx_desc.context = &ctx;
-    vx_desc.size = NFLOATS_PER_VOXEL * sizeof(float);
-
-    res_local = svx_octree_create(svx, low, upp, def, &vx_desc, &octree);
-    if(ctx.part) partition_free(ctx.part);
-    if(res_local != RES_OK) { ATOMIC_SET(&res, res_local); continue; };
-
-    if((size_t)ATOMIC_INCR(&nbuilt_structs) % voxel_width == 0) {
-      /* Notify the thread voxelizing the atmospheric meshes that we are
-       * consuming for the next batch */
-      mutex_lock(sync->mutex);
-      sync->ibatch += 1;
-      mutex_unlock(sync->mutex);
-      cond_signal(sync->cond);
+  /* Build the octrees. Each thread consumes an element of the voxels generated
+   * by the voxelization thread, each element corresponding to the voxel of an
+   * octree to be constructed. By fixing the number of threads to the width of
+   * the voxel, we therefore build `voxel_width' octrees in parallel from a
+   * single voxelization of the atmospheric meshes */
+  for(istruct = 0; istruct < naccel_structs; istruct += voxel_width) {
+    const size_t nthreads = MMIN(voxel_width, naccel_structs - istruct);
+    omp_set_num_threads((int)nthreads);
+
+    /* Note that we are using a parallel block rather than a parallel loop in
+     * order to add an implicit barrier after a batch has been fully consumed.
+     * This is necessary to prevent a thread from consuming voxels from the
+     * previous batch */
+    #pragma omp parallel
+    {
+      struct build_octree_context ctx = BUILD_OCTREE_CONTEXT_NULL;
+      struct svx_voxel_desc vx_desc = SVX_VOXEL_DESC_NULL;
+      struct svx_tree* octree = NULL;
+      const int ithread = omp_get_thread_num();
+      const size_t istruct_curr = (size_t)ithread + istruct;
+      res_T res_local = RES_OK;
+
+      /* Setup the build context */
+      ctx.pool = pool;
+      ctx.part = NULL;
+      ctx.iitem = (size_t)ithread;
+      ctx.tau_threshold = args->optical_thickness;
+
+      /* Setup the voxel descriptor */
+      vx_desc.get = vx_get;
+      vx_desc.merge = vx_merge;
+      vx_desc.challenge_merge = vx_challenge_merge;
+      vx_desc.context = &ctx;
+      vx_desc.size = NFLOATS_PER_VOXEL * sizeof(float);
+
+      res_local = svx_octree_create(svx, low, upp, def, &vx_desc, &octree);
+      if(ctx.part) partition_free(ctx.part);
+      if(res_local != RES_OK) {
+        ATOMIC_SET(&res, res_local);
+      } else { /* Register the built octree */
+        accel_structs[istruct_curr].octree = octree;
+      }
     }
+    if(res != RES_OK) goto error;
 
-    /* Register the built octree */
-    darray_accel_struct_data_get(&atm->accel_structs)[istruct].octree = octree;
+    /* Signal the voxelization thread to generate the next batch */
+    mutex_lock(sync->mutex);
+    sync->ibatch += 1;
+    mutex_unlock(sync->mutex);
+    cond_signal(sync->cond);
   }
-  if(res != RES_OK) goto error;
 
 exit:
   if(svx) SVX(device_ref_put(svx));
   return (res_T)res;
 error:
+  /* Signal to the voxelization thread that there is no need to wait for the
+   * build thread */
+  cond_signal(sync->cond);
   darray_accel_struct_clear(&atm->accel_structs);
   goto exit;
 }

	rnatm Load and structure data describing an atmosphere
	git clone git://git.meso-star.fr/rnatm.git
	Log \| Files \| Refs \| README \| LICENSE