htrdr

Solving radiative transfer in heterogeneous media
git clone git://git.meso-star.fr/htrdr.git
Log | Files | Refs | README | LICENSE

commit bdb57b9e281af7b6b846ba8495e285e277fed313
parent a945a02670bbb791ba61430b8aec6043efa8eddf
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Tue, 30 Oct 2018 16:19:39 +0100

Upd the work scheduling policy

The #tiles to steal are now stolen to the same process.

Diffstat:
Msrc/htrdr_draw_radiance_sw.c | 129++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
1 file changed, 75 insertions(+), 54 deletions(-)

diff --git a/src/htrdr_draw_radiance_sw.c b/src/htrdr_draw_radiance_sw.c @@ -21,7 +21,7 @@ #include "htrdr_solve.h" #include <rsys/cstr.h> -#include <rsys/dynamic_array_size_t.h> +#include <rsys/dynamic_array_u32.h> #include <rsys/math.h> #include <star/ssp.h> @@ -37,11 +37,11 @@ STATIC_ASSERT(IS_POW2(TILE_SIZE), TILE_SIZE_must_be_a_power_of_2); /* Overall work of a process */ struct proc_work { - struct darray_size_t tiles; /* #tiles to render */ + struct darray_u32 tiles; /* #tiles to render */ size_t itile; /* Next tile to render in the above list of tiles */ }; -#define TILE_MCODE_NULL INT64_MAX +#define TILE_MCODE_NULL UINT32_MAX /******************************************************************************* * Helper functions @@ -57,18 +57,29 @@ morton2D_decode(const uint32_t u32) return (uint16_t)x; } +static FINLINE uint32_t +morton2D_encode(const uint16_t u16) +{ + uint32_t u32 = u16; + u32 = (u32 | (u32 << 8)) & 0x00FF00FF; + u32 = (u32 | (u32 << 4)) & 0X0F0F0F0F; + u32 = (u32 | (u32 << 2)) & 0x33333333; + u32 = (u32 | (u32 << 1)) & 0x55555555; + return u32; +} + static INLINE void proc_work_init(struct mem_allocator* allocator, struct proc_work* work) { ASSERT(work); - darray_size_t_init(allocator, &work->tiles); + darray_u32_init(allocator, &work->tiles); work->itile = 0; } static INLINE void proc_work_release(struct proc_work* work) { - darray_size_t_release(&work->tiles); + darray_u32_release(&work->tiles); } static INLINE void @@ -77,36 +88,41 @@ proc_work_reset(struct proc_work* work) ASSERT(work); #pragma omp critical { - darray_size_t_clear(&work->tiles); + darray_u32_clear(&work->tiles); work->itile = 0; } } static INLINE void -proc_work_add_tile(struct proc_work* work, const int64_t mcode) +proc_work_add_tile(struct proc_work* work, const uint32_t mcode) { - size_t st = (size_t)mcode; - ASSERT(mcode >= 0); #pragma omp critical - CHK(darray_size_t_push_back(&work->tiles, &st) == RES_OK); + CHK(darray_u32_push_back(&work->tiles, &mcode) == RES_OK); } -static INLINE int64_t +static INLINE uint32_t proc_work_get_tile(struct proc_work* work) { - int64_t mcode; + uint32_t mcode; #pragma omp critical { - if(work->itile >= darray_size_t_size_get(&work->tiles)) { + if(work->itile >= darray_u32_size_get(&work->tiles)) { mcode = TILE_MCODE_NULL; } else { - mcode = (int64_t)darray_size_t_cdata_get(&work->tiles)[work->itile]; + mcode = darray_u32_cdata_get(&work->tiles)[work->itile]; ++work->itile; } } return mcode; } +static INLINE size_t +proc_work_get_ntiles(struct proc_work* work) +{ + ASSERT(work); + return darray_u32_size_get(&work->tiles); +} + static res_T draw_tile (struct htrdr* htrdr, @@ -225,6 +241,7 @@ mpi_probe_thieves struct proc_work* work, ATOMIC* probe_thieves) { + uint32_t tiles[UINT8_MAX]; struct timespec t; ASSERT(htrdr && work && probe_thieves); @@ -242,8 +259,8 @@ mpi_probe_thieves while(ATOMIC_GET(probe_thieves)) { MPI_Status status; + size_t itile; int msg; - int64_t tile; /* Probe if a steal request was submitted by any processes */ P_MPI(Iprobe(MPI_ANY_SOURCE, HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &msg, @@ -251,18 +268,20 @@ mpi_probe_thieves if(msg) { /* A steal request was posted */ MPI_Request req; - int8_t dummy; + uint8_t ntiles_to_steal; /* Asynchronously receive the steal request */ - P_MPI(Irecv(&dummy, 1, MPI_INT8_T, status.MPI_SOURCE, + P_MPI(Irecv(&ntiles_to_steal, 1, MPI_UINT8_T, status.MPI_SOURCE, HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &req)); /* Wait for the completion of the steal request */ mpi_wait_for_request(htrdr, &req); - /* Thief a tile */ - tile = proc_work_get_tile(work); - P_MPI(Send(&tile, 1, MPI_INT64_T, status.MPI_SOURCE, + /* Thief some tiles */ + FOR_EACH(itile, 0, ntiles_to_steal) { + tiles[itile] = proc_work_get_tile(work); + } + P_MPI(Send(&tiles, ntiles_to_steal, MPI_UINT32_T, status.MPI_SOURCE, HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD)); } t.tv_nsec = 500000000; /* 500ms */ @@ -301,9 +320,13 @@ mpi_steal_work struct ssp_rng* rng, struct proc_work* work) { - size_t ntiles_to_steal = (size_t)htrdr->nthreads; + MPI_Request req; + size_t itile; size_t nthieves = 0; - ASSERT(htrdr && rng && work); + uint32_t tiles[UINT8_MAX]; /* Morton code of the stolen tile */ + int proc_to_steal; /* Process to steal */ + uint8_t ntiles_to_steal = (uint8_t)(htrdr->nthreads*2); + ASSERT(htrdr && rng && work && htrdr->nthreads < UINT8_MAX); /* Protect MPI calls of multiple invocations from concurrent threads */ #define P_MPI(Func) { \ @@ -312,33 +335,31 @@ mpi_steal_work mutex_unlock(htrdr->mpi_mutex); \ } (void)0 - while(nthieves < ntiles_to_steal && htrdr->mpi_nworking_procs) { - MPI_Request req; - const int8_t dummy = 1; - int64_t tile; /* Morton code of the stolen tile */ - int proc_to_steal; /* Process to steal */ + /* No more working process => nohting to steal */ + if(!htrdr->mpi_nworking_procs) return 0; - /* Sample a process to steal */ - proc_to_steal = mpi_sample_working_process(htrdr, rng); + /* Sample a process to steal */ + proc_to_steal = mpi_sample_working_process(htrdr, rng); - /* Send a steal request to the sampled process and wait for a response */ - P_MPI(Send(&dummy, 1, MPI_INT8_T, proc_to_steal, HTRDR_MPI_STEAL_REQUEST, - MPI_COMM_WORLD)); + /* Send a steal request to the sampled process and wait for a response */ + P_MPI(Send(&ntiles_to_steal, 1, MPI_UINT8_T, proc_to_steal, + HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD)); - /* Receive the stolen tile from the sampled process */ - P_MPI(Irecv(&tile, 1, MPI_INT64_T, proc_to_steal, - HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD, &req)); + /* Receive the stolen tile from the sampled process */ + P_MPI(Irecv(tiles, ntiles_to_steal, MPI_UINT32_T, proc_to_steal, + HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD, &req)); - mpi_wait_for_request(htrdr, &req); + mpi_wait_for_request(htrdr, &req); - if(tile != TILE_MCODE_NULL) { - proc_work_add_tile(work, tile); - ++nthieves; - } else { + FOR_EACH(itile, 0, ntiles_to_steal) { + if(tiles[itile] == TILE_MCODE_NULL) { ASSERT(htrdr->mpi_working_procs[proc_to_steal] != 0); htrdr->mpi_working_procs[proc_to_steal] = 0; htrdr->mpi_nworking_procs--; + break; } + proc_work_add_tile(work, tiles[itile]); + ++nthieves; } #undef P_MPI return nthieves; @@ -428,9 +449,9 @@ htrdr_draw_radiance_sw struct htrdr_buffer* buf) { struct ssp_rng* rng_main = NULL; - size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted; + size_t ntiles_x, ntiles_y, ntiles_adjusted; + size_t itile; struct proc_work work; - int64_t itile; struct htrdr_buffer_layout layout; double pix_sz[2]; /* Pixel size in the normalized image plane */ size_t proc_ntiles; @@ -465,7 +486,6 @@ htrdr_draw_radiance_sw /* Compute the overall number of tiles */ ntiles_x = (layout.width + (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; ntiles_y = (layout.height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; - ntiles = ntiles_x * ntiles_y; /* Adjust the #tiles for the morton-encoding procedure */ ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y)); @@ -476,27 +496,27 @@ htrdr_draw_radiance_sw pix_sz[1] = 1.0 / (double)layout.height; /* Define the initial number of tiles of the current process */ - proc_ntiles = ntiles / (size_t)htrdr->mpi_nprocs; proc_ntiles_adjusted = ntiles_adjusted / (size_t)htrdr->mpi_nprocs; - if(htrdr->mpi_rank == 0) {/* Affect the remaining tiles to the master proc */ - proc_ntiles += ntiles - proc_ntiles*(size_t)htrdr->mpi_nprocs; + if(htrdr->mpi_rank == 0) { /* Affect the remaining tiles to the master proc */ proc_ntiles_adjusted += - ntiles_adjusted - proc_ntiles*(size_t)htrdr->mpi_nprocs; + ntiles_adjusted - proc_ntiles_adjusted*(size_t)htrdr->mpi_nprocs; } /* Define the initial list of tiles of the process */ FOR_EACH(itile, 0, proc_ntiles_adjusted) { - size_t tile_org[2]; - int64_t mcode = htrdr->mpi_rank + (itile*htrdr->mpi_nprocs); + uint32_t mcode; + uint16_t tile_org[2]; - /* Decode the morton code to retrieve the tile index */ - tile_org[0] = morton2D_decode((uint32_t)(mcode>>0)); - if(tile_org[0] >= ntiles_x) continue; /* Skip border tile */ - tile_org[1] = morton2D_decode((uint32_t)(mcode>>1)); - if(tile_org[1] >= ntiles_y) continue; /* Skip border tile */ + mcode = (uint32_t)itile*(uint32_t)htrdr->mpi_nprocs + + (uint32_t)htrdr->mpi_rank; + tile_org[0] = morton2D_decode(mcode>>0); + if(tile_org[0] >= ntiles_x) continue; + tile_org[1] = morton2D_decode(mcode>>1); + if(tile_org[1] >= ntiles_y) continue; proc_work_add_tile(&work, mcode); } + proc_ntiles = proc_work_get_ntiles(&work); if(htrdr->mpi_rank == 0) { fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); @@ -588,6 +608,7 @@ htrdr_draw_radiance_sw proc_work_reset(&work); nthieves = mpi_steal_work(htrdr, rng_main, &work); + nthreads = MMIN(nthieves, htrdr->nthreads); } while(nthieves); /* Synchronize the process */