commit de9190697bfec46def5015569d0a2e9b29154ae1
parent 10fe56bef42a403625762f1ff1045cdbbd36b2a6
Author: Vincent Forest <vincent.forest@meso-star.com>
Date: Wed, 31 Oct 2018 10:50:32 +0100
Improve work stealing strategy
Previously, the draw function was waiting for the completion of the
rendering threads and then submitted a steal request to the concurrent
processes. Some rendering threads were thus waiting for the completion
of the other rendering threads without performing any computation.
In this commit a thread with no more work does not wait anymore and
directly try to steal tiles to concurrent processes.
Diffstat:
1 file changed, 76 insertions(+), 68 deletions(-)
diff --git a/src/htrdr_draw_radiance_sw.c b/src/htrdr_draw_radiance_sw.c
@@ -242,7 +242,7 @@ mpi_steal_work
size_t nthieves = 0;
uint32_t tiles[UINT8_MAX]; /* Morton code of the stolen tile */
int proc_to_steal; /* Process to steal */
- uint8_t ntiles_to_steal = (uint8_t)(htrdr->nthreads*2);
+ uint8_t ntiles_to_steal = MMIN((uint8_t)(htrdr->nthreads*2), 16);
ASSERT(htrdr && rng && work && htrdr->nthreads < UINT8_MAX);
/* Protect MPI calls of multiple invocations from concurrent threads */
@@ -294,6 +294,11 @@ mpi_gather_buffer
res_T res = RES_OK;
ASSERT(htrdr && buf);
+ /* The process is alone. There is nothing to gather since all work was done
+ * by it */
+ if(htrdr->mpi_nprocs == 1)
+ goto exit;
+
/* Fetch the memory layout of the submitted buffer */
htrdr_buffer_get_layout(buf, &layout);
ASSERT(layout.elmt_size == sizeof(struct htrdr_accum) * 3/*#channels*/);
@@ -474,86 +479,89 @@ draw_image
pix_sz[0] = 1.0 / (double)layout.width;
pix_sz[1] = 1.0 / (double)layout.height;
- nthreads = htrdr->nthreads;
proc_ntiles = proc_work_get_ntiles(work);
+ nthreads = MMIN(htrdr->nthreads, proc_ntiles);
/* The process is not considered as a working process for himself */
htrdr->mpi_working_procs[htrdr->mpi_rank] = 0;
--htrdr->mpi_nworking_procs;
- do {
- omp_set_num_threads((int)nthreads);
-
- #pragma omp parallel
- for(;;) {
- const int ithread = omp_get_thread_num();
- struct ssp_rng_proxy* rng_proxy = NULL;
- struct ssp_rng* rng;
- uint32_t mcode;
- size_t tile_org[2];
- size_t tile_sz[2];
- size_t n;
- res_T res_local = RES_OK;
- int32_t pcent;
-
- if(ATOMIC_GET(&res) != RES_OK) continue;
-
+ omp_set_num_threads((int)nthreads);
+ #pragma omp parallel
+ for(;;) {
+ const int ithread = omp_get_thread_num();
+ struct ssp_rng_proxy* rng_proxy = NULL;
+ struct ssp_rng* rng;
+ uint32_t mcode;
+ size_t tile_org[2];
+ size_t tile_sz[2];
+ size_t n;
+ res_T res_local = RES_OK;
+ int32_t pcent;
+
+ /* Get a tile to draw */
+ #pragma omp critical
+ {
mcode = proc_work_get_tile(work);
- if(mcode == TILE_MCODE_NULL) break;
-
- /* Decode the morton code to retrieve the tile index */
- tile_org[0] = morton2D_decode((uint32_t)(mcode>>0));
- tile_org[1] = morton2D_decode((uint32_t)(mcode>>1));
- ASSERT(tile_org[0] < ntiles_x && tile_org[1] < ntiles_y);
-
- /* Define the tile origin in pixel space */
- tile_org[0] *= TILE_SIZE;
- tile_org[1] *= TILE_SIZE;
-
- /* Compute the size of the tile clamped by the borders of the buffer */
- tile_sz[0] = MMIN(TILE_SIZE, layout.width - tile_org[0]);
- tile_sz[1] = MMIN(TILE_SIZE, layout.height - tile_org[1]);
-
- SSP(rng_proxy_create2
- (&htrdr->lifo_allocators[ithread],
- &ssp_rng_threefry,
- RNG_SEQUENCE_SIZE * (size_t)mcode, /* Offset */
- RNG_SEQUENCE_SIZE, /* Size */
- RNG_SEQUENCE_SIZE * (size_t)ntiles_adjusted, /* Pitch */
- 1, &rng_proxy));
- SSP(rng_proxy_create_rng(rng_proxy, 0, &rng));
-
- res_local = draw_tile(htrdr, (size_t)ithread, mcode, tile_org, tile_sz,
- pix_sz, cam, spp, rng, buf);
-
- SSP(rng_proxy_ref_put(rng_proxy));
- SSP(rng_ref_put(rng));
-
- if(res_local != RES_OK) {
- ATOMIC_SET(&res, res_local);
- continue;
+ if(mcode == TILE_MCODE_NULL) { /* No more work on this process */
+ /* Try to steal works to concurrent processes */
+ proc_work_reset(work);
+ nthieves = mpi_steal_work(htrdr, rng_main, work);
+ if(nthieves != 0) {
+ mcode = proc_work_get_tile(work);
+ }
}
+ }
+ if(mcode == TILE_MCODE_NULL) break; /* No more work */
+
+ /* Decode the morton code to retrieve the tile index */
+ tile_org[0] = morton2D_decode((uint32_t)(mcode>>0));
+ tile_org[1] = morton2D_decode((uint32_t)(mcode>>1));
+ ASSERT(tile_org[0] < ntiles_x && tile_org[1] < ntiles_y);
+
+ /* Define the tile origin in pixel space */
+ tile_org[0] *= TILE_SIZE;
+ tile_org[1] *= TILE_SIZE;
+
+ /* Compute the size of the tile clamped by the borders of the buffer */
+ tile_sz[0] = MMIN(TILE_SIZE, layout.width - tile_org[0]);
+ tile_sz[1] = MMIN(TILE_SIZE, layout.height - tile_org[1]);
+
+ SSP(rng_proxy_create2
+ (&htrdr->lifo_allocators[ithread],
+ &ssp_rng_threefry,
+ RNG_SEQUENCE_SIZE * (size_t)mcode, /* Offset */
+ RNG_SEQUENCE_SIZE, /* Size */
+ RNG_SEQUENCE_SIZE * (size_t)ntiles_adjusted, /* Pitch */
+ 1, &rng_proxy));
+ SSP(rng_proxy_create_rng(rng_proxy, 0, &rng));
+
+ res_local = draw_tile(htrdr, (size_t)ithread, mcode, tile_org, tile_sz,
+ pix_sz, cam, spp, rng, buf);
+
+ SSP(rng_proxy_ref_put(rng_proxy));
+ SSP(rng_ref_put(rng));
+
+ if(res_local != RES_OK) {
+ ATOMIC_SET(&res, res_local);
+ break;
+ }
- n = (size_t)ATOMIC_INCR(&nsolved_tiles);
- pcent = (int32_t)((double)n * 100.0 / (double)proc_ntiles + 0.5/*round*/);
+ n = (size_t)ATOMIC_INCR(&nsolved_tiles);
+ pcent = (int32_t)((double)n * 100.0 / (double)proc_ntiles + 0.5/*round*/);
- #pragma omp critical
- if(pcent > htrdr->mpi_progress_render[0]) {
- htrdr->mpi_progress_render[0] = pcent;
- if(htrdr->mpi_rank == 0) {
- update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
- } else { /* Send the progress percentage to the master process */
- send_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING, pcent);
- }
+ #pragma omp critical
+ if(pcent > htrdr->mpi_progress_render[0]) {
+ htrdr->mpi_progress_render[0] = pcent;
+ if(htrdr->mpi_rank == 0) {
+ update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
+ } else { /* Send the progress percentage to the master process */
+ send_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING, pcent);
}
}
+ }
- if(ATOMIC_GET(&res) != RES_OK) goto error;
-
- proc_work_reset(work);
- nthieves = mpi_steal_work(htrdr, rng_main, work);
- nthreads = MMIN(nthieves, htrdr->nthreads);
- } while(nthieves);
+ if(ATOMIC_GET(&res) != RES_OK) goto error;
/* Synchronize the process */
mutex_lock(htrdr->mpi_mutex);