htrdr

Solving radiative transfer in heterogeneous media
git clone git://git.meso-star.fr/htrdr.git
Log | Files | Refs | README | LICENSE

commit de9190697bfec46def5015569d0a2e9b29154ae1
parent 10fe56bef42a403625762f1ff1045cdbbd36b2a6
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Wed, 31 Oct 2018 10:50:32 +0100

Improve work stealing strategy

Previously, the draw function was waiting for the completion of the
rendering threads and then submitted a steal request to the concurrent
processes. Some rendering threads were thus waiting for the completion
of the other rendering threads without performing any computation.

In this commit a thread with no more work does not wait anymore and
directly try to steal tiles to concurrent processes.

Diffstat:
Msrc/htrdr_draw_radiance_sw.c | 144++++++++++++++++++++++++++++++++++++++++++-------------------------------------
1 file changed, 76 insertions(+), 68 deletions(-)

diff --git a/src/htrdr_draw_radiance_sw.c b/src/htrdr_draw_radiance_sw.c @@ -242,7 +242,7 @@ mpi_steal_work size_t nthieves = 0; uint32_t tiles[UINT8_MAX]; /* Morton code of the stolen tile */ int proc_to_steal; /* Process to steal */ - uint8_t ntiles_to_steal = (uint8_t)(htrdr->nthreads*2); + uint8_t ntiles_to_steal = MMIN((uint8_t)(htrdr->nthreads*2), 16); ASSERT(htrdr && rng && work && htrdr->nthreads < UINT8_MAX); /* Protect MPI calls of multiple invocations from concurrent threads */ @@ -294,6 +294,11 @@ mpi_gather_buffer res_T res = RES_OK; ASSERT(htrdr && buf); + /* The process is alone. There is nothing to gather since all work was done + * by it */ + if(htrdr->mpi_nprocs == 1) + goto exit; + /* Fetch the memory layout of the submitted buffer */ htrdr_buffer_get_layout(buf, &layout); ASSERT(layout.elmt_size == sizeof(struct htrdr_accum) * 3/*#channels*/); @@ -474,86 +479,89 @@ draw_image pix_sz[0] = 1.0 / (double)layout.width; pix_sz[1] = 1.0 / (double)layout.height; - nthreads = htrdr->nthreads; proc_ntiles = proc_work_get_ntiles(work); + nthreads = MMIN(htrdr->nthreads, proc_ntiles); /* The process is not considered as a working process for himself */ htrdr->mpi_working_procs[htrdr->mpi_rank] = 0; --htrdr->mpi_nworking_procs; - do { - omp_set_num_threads((int)nthreads); - - #pragma omp parallel - for(;;) { - const int ithread = omp_get_thread_num(); - struct ssp_rng_proxy* rng_proxy = NULL; - struct ssp_rng* rng; - uint32_t mcode; - size_t tile_org[2]; - size_t tile_sz[2]; - size_t n; - res_T res_local = RES_OK; - int32_t pcent; - - if(ATOMIC_GET(&res) != RES_OK) continue; - + omp_set_num_threads((int)nthreads); + #pragma omp parallel + for(;;) { + const int ithread = omp_get_thread_num(); + struct ssp_rng_proxy* rng_proxy = NULL; + struct ssp_rng* rng; + uint32_t mcode; + size_t tile_org[2]; + size_t tile_sz[2]; + size_t n; + res_T res_local = RES_OK; + int32_t pcent; + + /* Get a tile to draw */ + #pragma omp critical + { mcode = proc_work_get_tile(work); - if(mcode == TILE_MCODE_NULL) break; - - /* Decode the morton code to retrieve the tile index */ - tile_org[0] = morton2D_decode((uint32_t)(mcode>>0)); - tile_org[1] = morton2D_decode((uint32_t)(mcode>>1)); - ASSERT(tile_org[0] < ntiles_x && tile_org[1] < ntiles_y); - - /* Define the tile origin in pixel space */ - tile_org[0] *= TILE_SIZE; - tile_org[1] *= TILE_SIZE; - - /* Compute the size of the tile clamped by the borders of the buffer */ - tile_sz[0] = MMIN(TILE_SIZE, layout.width - tile_org[0]); - tile_sz[1] = MMIN(TILE_SIZE, layout.height - tile_org[1]); - - SSP(rng_proxy_create2 - (&htrdr->lifo_allocators[ithread], - &ssp_rng_threefry, - RNG_SEQUENCE_SIZE * (size_t)mcode, /* Offset */ - RNG_SEQUENCE_SIZE, /* Size */ - RNG_SEQUENCE_SIZE * (size_t)ntiles_adjusted, /* Pitch */ - 1, &rng_proxy)); - SSP(rng_proxy_create_rng(rng_proxy, 0, &rng)); - - res_local = draw_tile(htrdr, (size_t)ithread, mcode, tile_org, tile_sz, - pix_sz, cam, spp, rng, buf); - - SSP(rng_proxy_ref_put(rng_proxy)); - SSP(rng_ref_put(rng)); - - if(res_local != RES_OK) { - ATOMIC_SET(&res, res_local); - continue; + if(mcode == TILE_MCODE_NULL) { /* No more work on this process */ + /* Try to steal works to concurrent processes */ + proc_work_reset(work); + nthieves = mpi_steal_work(htrdr, rng_main, work); + if(nthieves != 0) { + mcode = proc_work_get_tile(work); + } } + } + if(mcode == TILE_MCODE_NULL) break; /* No more work */ + + /* Decode the morton code to retrieve the tile index */ + tile_org[0] = morton2D_decode((uint32_t)(mcode>>0)); + tile_org[1] = morton2D_decode((uint32_t)(mcode>>1)); + ASSERT(tile_org[0] < ntiles_x && tile_org[1] < ntiles_y); + + /* Define the tile origin in pixel space */ + tile_org[0] *= TILE_SIZE; + tile_org[1] *= TILE_SIZE; + + /* Compute the size of the tile clamped by the borders of the buffer */ + tile_sz[0] = MMIN(TILE_SIZE, layout.width - tile_org[0]); + tile_sz[1] = MMIN(TILE_SIZE, layout.height - tile_org[1]); + + SSP(rng_proxy_create2 + (&htrdr->lifo_allocators[ithread], + &ssp_rng_threefry, + RNG_SEQUENCE_SIZE * (size_t)mcode, /* Offset */ + RNG_SEQUENCE_SIZE, /* Size */ + RNG_SEQUENCE_SIZE * (size_t)ntiles_adjusted, /* Pitch */ + 1, &rng_proxy)); + SSP(rng_proxy_create_rng(rng_proxy, 0, &rng)); + + res_local = draw_tile(htrdr, (size_t)ithread, mcode, tile_org, tile_sz, + pix_sz, cam, spp, rng, buf); + + SSP(rng_proxy_ref_put(rng_proxy)); + SSP(rng_ref_put(rng)); + + if(res_local != RES_OK) { + ATOMIC_SET(&res, res_local); + break; + } - n = (size_t)ATOMIC_INCR(&nsolved_tiles); - pcent = (int32_t)((double)n * 100.0 / (double)proc_ntiles + 0.5/*round*/); + n = (size_t)ATOMIC_INCR(&nsolved_tiles); + pcent = (int32_t)((double)n * 100.0 / (double)proc_ntiles + 0.5/*round*/); - #pragma omp critical - if(pcent > htrdr->mpi_progress_render[0]) { - htrdr->mpi_progress_render[0] = pcent; - if(htrdr->mpi_rank == 0) { - update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); - } else { /* Send the progress percentage to the master process */ - send_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING, pcent); - } + #pragma omp critical + if(pcent > htrdr->mpi_progress_render[0]) { + htrdr->mpi_progress_render[0] = pcent; + if(htrdr->mpi_rank == 0) { + update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); + } else { /* Send the progress percentage to the master process */ + send_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING, pcent); } } + } - if(ATOMIC_GET(&res) != RES_OK) goto error; - - proc_work_reset(work); - nthieves = mpi_steal_work(htrdr, rng_main, work); - nthreads = MMIN(nthieves, htrdr->nthreads); - } while(nthieves); + if(ATOMIC_GET(&res) != RES_OK) goto error; /* Synchronize the process */ mutex_lock(htrdr->mpi_mutex);