rnatm

Load and structure data describing an atmosphere
git clone git://git.meso-star.fr/rnatm.git
Log | Files | Refs | README | LICENSE

commit 004fa7d27a5892e18bf3e2d17430929535ad2365
parent 28355a13fb86d4b1a3df478519c552cf2a12d2c1
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Fri, 29 Jul 2022 17:43:43 +0200

Preparing to build multiple octrees in one voxelization

Diffstat:
Msrc/rnatm_octree.c | 639+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 480 insertions(+), 159 deletions(-)

diff --git a/src/rnatm_octree.c b/src/rnatm_octree.c @@ -30,6 +30,7 @@ #include <rsys/clock_time.h> #include <rsys/cstr.h> +#include <rsys/double3.h> #include <rsys/math.h> #include <rsys/morton.h> #include <rsys/rsys.h> @@ -53,10 +54,157 @@ struct radcoefs { float ks_min, ks_max; float kext_min, kext_max; }; +#define RADCOEFS_NULL__ { \ + FLT_MAX, -FLT_MAX, \ + FLT_MAX, -FLT_MAX, \ + FLT_MAX, -FLT_MAX \ +} + +struct spectral_item { + size_t iband; + size_t iquad_pt; +}; +#define SPECTRAL_ITEM_NULL__ {0, 0} + +/* Generate the dynamic array of radiative coefficient lists */ +#define DARRAY_NAME radcoef_list +#define DARRAY_DATA struct radcoefs* +#include <rsys/dynamic_array.h> + +struct voxelize_batch_args { + /* Working data structure */ + struct darray_size_t_list* per_thread_tetra_list; + struct darray_radcoef_list* per_thread_radcoef_list; + struct pool* pool; + + size_t part_def; /* Partition definition */ + size_t nparts[3]; /* #partitions required to cover the entire grid */ + size_t nparts_adjusted; /* #partitions allowing their indexing by morton id */ + + /* AABB of the atmosphere */ + double atm_low[3]; + double atm_upp[3]; + + double vxsz[3]; /* Size of a voxel */ + + struct spectral_item* items; + size_t batch_size; /* #spectral items */ +}; +#define VOXELIZE_BATCH_ARGS_NULL__ { \ + NULL, /* Per thread tetra list */ \ + NULL, /* Per thread radiative coefs */ \ + NULL, /* Partition pool */ \ + \ + 0, /* Partition definition */ \ + {0,0,0}, /* #partitions to cover the entire grid */ \ + 0, /* #partitions adjusted wrt morton indexing */ \ + \ + /* AABB of the atmosphere */ \ + { DBL_MAX, DBL_MAX, DBL_MAX}, \ + {-DBL_MAX,-DBL_MAX,-DBL_MAX}, \ + \ + {0,0,0}, /* Size of a voxel */ \ + \ + NULL, /* Spectral items */ \ + 0 /* Batch size */ \ +} +static const struct voxelize_batch_args VOXELIZE_BATCH_ARGS_NULL = + VOXELIZE_BATCH_ARGS_NULL__; + +struct voxelize_args { + struct darray_size_t* tetra_ids; + struct radcoefs* per_item_radcoefs; + + struct partition* part; + size_t part_def; /* Partition definition */ + + /* AABB of the partition */ + double part_low[3]; + double part_upp[3]; + + double vxsz[3]; /* Size of a voxel */ + + const struct spectral_item* items; + size_t batch_size; /* #spectral items */ +}; +#define VOXELIZE_ARGS_NULL__ { \ + NULL, /* Tetra ids */ \ + NULL, /* Radiative coefs */ \ + \ + NULL, /* Partition */ \ + 0, /* Partition definition */ \ + \ + /* AABB of the partition */ \ + { DBL_MAX, DBL_MAX, DBL_MAX}, \ + {-DBL_MAX,-DBL_MAX,-DBL_MAX}, \ + \ + {0,0,0}, /* Size of a voxel */ \ + \ + NULL, /* Spectral items */ \ + 0 /* Batch size */ \ +} +static const struct voxelize_args VOXELIZE_ARGS_NULL = VOXELIZE_ARGS_NULL__; /******************************************************************************* * Helper functions ******************************************************************************/ +static INLINE res_T +check_voxelize_batch_args + (struct rnatm* atm, + const struct voxelize_batch_args* args) +{ + size_t i; + if(!args + || !args->per_thread_tetra_list + || !args->per_thread_radcoef_list + || !args->pool + || !IS_POW2(args->part_def) + || !args->nparts[0] + || !args->nparts[1] + || !args->nparts[2] + || !IS_POW2(args->nparts_adjusted) + || args->nparts_adjusted < args->nparts[0]*args->nparts[1]*args->nparts[2] + || args->atm_low[0] >= args->atm_upp[0] + || args->atm_low[1] >= args->atm_upp[1] + || args->atm_low[2] >= args->atm_upp[2] + || !args->vxsz[0] + || !args->vxsz[1] + || !args->vxsz[2] + || !args->items + || !args->batch_size) { + return RES_BAD_ARG; + } + if(atm->nthreads != darray_size_t_list_size_get(args->per_thread_tetra_list) + || atm->nthreads != darray_radcoef_list_size_get(args->per_thread_radcoef_list)) { + return RES_BAD_ARG; + } + FOR_EACH(i, 0, atm->nthreads) { + if(!darray_radcoef_list_cdata_get(args->per_thread_radcoef_list)[i]) { + return RES_BAD_ARG; + } + } + return RES_OK; +} + +static INLINE res_T +check_voxelize_args(const struct voxelize_args* args) +{ + if(!args->tetra_ids + || !args->per_item_radcoefs + || args->part_low[0] >= args->part_upp[0] + || args->part_low[1] >= args->part_upp[1] + || args->part_low[2] >= args->part_upp[2] + || !IS_POW2(args->part_def) + || !args->vxsz[0] + || !args->vxsz[1] + || !args->vxsz[2] + || !args->items + || !args->batch_size) { + return RES_BAD_ARG; + } + return RES_OK; +} + static FINLINE unsigned round_pow2(const unsigned val) { @@ -68,6 +216,76 @@ round_pow2(const unsigned val) } } +/* Return the number of quadrature points for the submitted range of bands. + * boundaries are inclusives */ +static INLINE size_t +compute_nquad_pts(const struct rnatm* atm, const size_t bands[2]) +{ + size_t nquad_pts = 0; + size_t nbands = 0; + size_t i = 0; + ASSERT(atm && bands && bands[0] <= bands[1]); + + nbands = bands[1] - bands[0] + 1; + FOR_EACH(i, 0, nbands) { + struct sck_band band; + SCK(get_band(atm->gas.ck, bands[i], &band)); + nquad_pts += band.quad_pts_count; + } + return nquad_pts; +} + +static res_T +setup_batches + (struct rnatm* atm, + const size_t bands[2], /* Boundaries are inclusives */ + struct spectral_item** out_items, + size_t* out_nitems) +{ + struct spectral_item* items = NULL; + size_t iitem; + size_t nitems; + size_t nbands; + size_t i; + res_T res = RES_OK; + ASSERT(atm && out_items && out_nitems && bands && bands[0] <= bands[1]); + + nitems = compute_nquad_pts(atm, bands); + items = MEM_CALLOC(atm->allocator, nitems, sizeof(*items)); + if(items == NULL) { + log_err(atm, "%s: failed to allocate memory\n", FUNC_NAME); + res = RES_MEM_ERR; + goto error; + } + + iitem = 0; + nbands = bands[1] - bands[0] + 1; + FOR_EACH(i, 0, nbands) { + struct sck_band band; + const size_t iband = bands[i]; + size_t iquad_pt; + + SCK(get_band(atm->gas.ck, iband, &band)); + FOR_EACH(iquad_pt, 0, band.quad_pts_count) { + ASSERT(iitem < nitems); + items[iitem].iband = iband; + items[iitem].iquad_pt = iquad_pt; + } + } + +exit: + *out_items = items; + *out_nitems = nitems; + return res; +error: + if(items) { + MEM_RM(atm->allocator, items); + items = NULL; + nitems = 0; + } + goto exit; +} + static INLINE void register_tetra (const struct suvm_primitive* prim, @@ -220,26 +438,18 @@ update_voxel } static res_T -voxelize_gas - (struct rnatm* atm, - const double part_low[3], - const double part_upp[3], - const double vxsz[3], - const struct darray_size_t* tetra_ids, - struct partition* part) +voxelize_gas(struct rnatm* atm, const struct voxelize_args* args) { size_t i; - ASSERT(atm && part_low && part_upp && tetra_ids && part); - ASSERT(vxsz[0] > 0 && vxsz[1] > 0 && vxsz[2] > 0); - ASSERT(part_low[0] < part_upp[0]); - ASSERT(part_low[1] < part_upp[1]); - ASSERT(part_low[2] < part_upp[2]); + ASSERT(atm && check_voxelize_args(args) == RES_OK); - partition_clear_voxels(part); + /* FIXME one has to support a batch_size > 1 */ + ASSERT(args->batch_size == 1); - FOR_EACH(i, 0, darray_size_t_size_get(tetra_ids)) { + partition_clear_voxels(args->part); + + FOR_EACH(i, 0, darray_size_t_size_get(args->tetra_ids)) { struct suvm_primitive tetra; - struct radcoefs radcoefs; struct suvm_polyhedron poly; double poly_low[3]; double poly_upp[3]; @@ -249,59 +459,82 @@ voxelize_gas uint32_t ivx_upp[3]; uint32_t ivx[3]; uint64_t mcode[3]; /* Cache of 3D morton code */ - const size_t itetra = darray_size_t_cdata_get(tetra_ids)[i]; + const size_t itetra = darray_size_t_cdata_get(args->tetra_ids)[i]; enum suvm_intersection_type intersect; /* Recover the tetrahedron and setup its polyhedron */ SUVM(volume_get_primitive(atm->gas.volume, itetra, &tetra)); SUVM(primitive_setup_polyhedron(&tetra, &poly)); - ASSERT(poly.lower[0] <= part_upp[0] && poly.upper[0] >= part_low[0]); - ASSERT(poly.lower[1] <= part_upp[1] && poly.upper[1] >= part_low[1]); - ASSERT(poly.lower[2] <= part_upp[2] && poly.upper[2] >= part_low[2]); + ASSERT(poly.lower[0] <= args->part_upp[0]); + ASSERT(poly.lower[1] <= args->part_upp[1]); + ASSERT(poly.lower[2] <= args->part_upp[2]); + ASSERT(poly.upper[0] >= args->part_low[0]); + ASSERT(poly.upper[1] >= args->part_low[1]); + ASSERT(poly.upper[2] >= args->part_low[2]); /* Clamp the AABB of the polyhedra to the partition bounds */ - poly_low[0] = MMAX(poly.lower[0], part_low[0]); - poly_low[1] = MMAX(poly.lower[1], part_low[1]); - poly_low[2] = MMAX(poly.lower[2], part_low[2]); - poly_upp[0] = MMIN(poly.upper[0], part_upp[0]); - poly_upp[1] = MMIN(poly.upper[1], part_upp[1]); - poly_upp[2] = MMIN(poly.upper[2], part_upp[2]); + poly_low[0] = MMAX(poly.lower[0], args->part_low[0]); + poly_low[1] = MMAX(poly.lower[1], args->part_low[1]); + poly_low[2] = MMAX(poly.lower[2], args->part_low[2]); + poly_upp[0] = MMIN(poly.upper[0], args->part_upp[0]); + poly_upp[1] = MMIN(poly.upper[1], args->part_upp[1]); + poly_upp[2] = MMIN(poly.upper[2], args->part_upp[2]); /* Transform the AABB of the polyhedron in voxel space of the partition */ - ivx_low[0] = (uint32_t)((poly_low[0] - part_low[0]) / vxsz[0]); - ivx_low[1] = (uint32_t)((poly_low[1] - part_low[1]) / vxsz[1]); - ivx_low[2] = (uint32_t)((poly_low[2] - part_low[2]) / vxsz[2]); - ivx_upp[0] = (uint32_t)ceil((poly_upp[0] - part_low[0]) / vxsz[0]); - ivx_upp[1] = (uint32_t)ceil((poly_upp[1] - part_low[1]) / vxsz[1]); - ivx_upp[2] = (uint32_t)ceil((poly_upp[2] - part_low[2]) / vxsz[2]); - ASSERT(ivx_upp[0] <= partition_get_definition(part)); - ASSERT(ivx_upp[1] <= partition_get_definition(part)); - ASSERT(ivx_upp[2] <= partition_get_definition(part)); - - /* Compute the range of the tetrahedron radiative coefficients. - * TODO setup the band and quadrature point */ - setup_tetra_radcoefs(atm, &tetra, 0/*iband*/, 0/*iquad*/, &radcoefs); + ivx_low[0] = (uint32_t)((poly_low[0] - args->part_low[0]) / args->vxsz[0]); + ivx_low[1] = (uint32_t)((poly_low[1] - args->part_low[1]) / args->vxsz[1]); + ivx_low[2] = (uint32_t)((poly_low[2] - args->part_low[2]) / args->vxsz[2]); + ivx_upp[0] = (uint32_t)ceil((poly_upp[0] - args->part_low[0]) / args->vxsz[0]); + ivx_upp[1] = (uint32_t)ceil((poly_upp[1] - args->part_low[1]) / args->vxsz[1]); + ivx_upp[2] = (uint32_t)ceil((poly_upp[2] - args->part_low[2]) / args->vxsz[2]); + ASSERT(ivx_upp[0] <= args->part_def); + ASSERT(ivx_upp[1] <= args->part_def); + ASSERT(ivx_upp[2] <= args->part_def); + + /* Compute the range of the tetrahedron radiative coefficients + * FIXME handle batch_size > 1 */ + #if 1 + setup_tetra_radcoefs(atm, &tetra, + args->items[0].iband, + args->items[0].iquad_pt, + &args->per_item_radcoefs[0]); + #else + FOR_EACH(item, 0, args->batch_size) { + const size_t iband = args->items[item].iband; + const size_t iquad_pt = args->items[item].iquad_pt; + struct radcoefs* radcoefs = args->per_item_radcoefs + item; + setup_tetra_radcoefs(atm, &tetra, iband, iquad_pt, radcoefs); + } + #endif /* Iterate voxels intersected by the AABB of the polyedron */ FOR_EACH(ivx[2], ivx_low[2], ivx_upp[2]) { - vx_low[2] = (float)((double)ivx[2]*vxsz[2] + part_low[2]); - vx_upp[2] = vx_low[2] + (float)vxsz[2]; + vx_low[2] = (float)((double)ivx[2]*args->vxsz[2] + args->part_low[2]); + vx_upp[2] = vx_low[2] + (float)args->vxsz[2]; mcode[2] = morton3D_encode_u21(ivx[2]); FOR_EACH(ivx[1], ivx_low[1], ivx_upp[1]) { - vx_low[1] = (float)((double)ivx[1]*vxsz[1] + part_low[1]); - vx_upp[1] = vx_low[1] + (float)vxsz[1]; + vx_low[1] = (float)((double)ivx[1]*args->vxsz[1] + args->part_low[1]); + vx_upp[1] = vx_low[1] + (float)args->vxsz[1]; mcode[1] = (morton3D_encode_u21(ivx[1]) << 1) | mcode[2]; FOR_EACH(ivx[0], ivx_low[0], ivx_upp[0]) { - vx_low[0] = (float)((double)ivx[0]*vxsz[0] + part_low[0]); - vx_upp[0] = vx_low[0] + (float)vxsz[0]; + vx_low[0] = (float)((double)ivx[0]*args->vxsz[0] + args->part_low[0]); + vx_upp[0] = vx_low[0] + (float)args->vxsz[0]; mcode[0] = (morton3D_encode_u21(ivx[0]) << 2) | mcode[1]; intersect = suvm_polyhedron_intersect_aabb(&poly, vx_low, vx_upp); if(intersect == SUVM_INTERSECT_NONE) continue; - update_voxel(atm, &radcoefs, part, mcode[0]); + /* Update the intersected voxel FIXME handle batch_size > 1*/ + #if 1 + update_voxel(atm, &args->per_item_radcoefs[0], args->part, mcode[0]); + #else + FOR_EACH(item, 0, args->batch_size) { + struct radcoefs* radcoefs = args->per_item_radcoefs + item; + update_voxel(atm, radcoefs, part, mcode[0]); + } + #endif } } } @@ -311,33 +544,28 @@ voxelize_gas } static res_T -voxelize_partition - (struct rnatm* atm, - const double part_low[3], - const double part_upp[3], - const double vxsz[3], - struct darray_size_t* tetra_ids, - struct partition* part) +voxelize_partition(struct rnatm* atm, const struct voxelize_args* args) { res_T res = RES_OK; - ASSERT(atm && part_low && part_upp && tetra_ids && part); - ASSERT(part_low[0] < part_upp[0]); - ASSERT(part_low[1] < part_upp[1]); - ASSERT(part_low[2] < part_upp[2]); + ASSERT(atm && check_voxelize_args(args) == RES_OK); /* Find the list of gas tetrahedra that overlap the partition */ - darray_size_t_clear(tetra_ids); + darray_size_t_clear(args->tetra_ids); res = suvm_volume_intersect_aabb - (atm->gas.volume, part_low, part_upp, register_tetra, tetra_ids); + (atm->gas.volume, + args->part_low, + args->part_upp, + register_tetra, + args->tetra_ids); if(res != RES_OK) goto error; /* The partition is not covered by any tetrahedron */ - if(darray_size_t_size_get(tetra_ids) == 0) { - partition_empty(part); + if(darray_size_t_size_get(args->tetra_ids) == 0) { + partition_empty(args->part); goto exit; } - res = voxelize_gas(atm, part_low, part_upp, vxsz, tetra_ids, part); + res = voxelize_gas(atm, args); if(res != RES_OK) goto error; /* TODO voxelise aerosols */ @@ -349,45 +577,16 @@ error: } static res_T -voxelize_atmosphere(struct rnatm* atm, struct pool* pools[]) +voxelize_batch(struct rnatm* atm, const struct voxelize_batch_args* args) { - struct darray_size_t_list per_thread_tetra_list; - double atm_low[3]; - double atm_upp[3]; - double vxsz[3]; - size_t nparts[3]; /* #partitions along the 3 axis */ - size_t nparts_adjusted; - size_t part_def; /* Definition of a partition */ int64_t i; int progress = 0; ATOMIC nparts_voxelized = 0; ATOMIC res = RES_OK; - ASSERT(atm); + ASSERT(atm && check_voxelize_batch_args(atm, args) == RES_OK); - /* Allocate the per thread list of tetra list */ - darray_size_t_list_init(atm->allocator, &per_thread_tetra_list); - res = darray_size_t_list_resize(&per_thread_tetra_list, atm->nthreads); - if(res != RES_OK) goto error; - - /* Recover the AABB atmosphere and compute the size of a voxel */ - SUVM(volume_get_aabb(atm->gas.volume, atm_low, atm_upp)); - vxsz[0] = (atm_upp[0] - atm_low[0]) / (double)atm->grid_definition[0]; - vxsz[1] = (atm_upp[1] - atm_low[1]) / (double)atm->grid_definition[1]; - vxsz[2] = (atm_upp[2] - atm_low[2]) / (double)atm->grid_definition[2]; - - /* Number of partitions required to cover the entire atmosphere grid */ - part_def = pool_get_partition_definition(pools[0]); - nparts[0] = (atm->grid_definition[0] + (part_def-1)/*ceil*/) / part_def; - nparts[1] = (atm->grid_definition[1] + (part_def-1)/*ceil*/) / part_def; - nparts[2] = (atm->grid_definition[2] + (part_def-1)/*ceil*/) / part_def; - - /* Adjust the #partitions allowing their indexing by their morton code */ - nparts_adjusted = MMAX(nparts[0], MMAX(nparts[1], nparts[2])); - nparts_adjusted = round_up_pow2(nparts_adjusted); - nparts_adjusted = nparts_adjusted * nparts_adjusted * nparts_adjusted; - - /* Print the size of a voxel */ - log_info(atm, "voxel size = {%g, %g, %g}\n", SPLIT3(vxsz)); + /* FIXME one has to support a batch_size > 1 */ + ASSERT(args->batch_size == 1); /* Print status message */ #define LOG_MSG "voxelize atmosphere: %3d%%\r" @@ -397,9 +596,11 @@ voxelize_atmosphere(struct rnatm* atm, struct pool* pools[]) * and voxelizes the tetrahedrons that overlap them */ omp_set_num_threads((int)atm->nthreads); #pragma omp parallel for schedule(static, 1/*chunk size*/) - for(i = 0; i < (int64_t)nparts_adjusted; ++i) { + for(i = 0; i < (int64_t)args->nparts_adjusted; ++i) { + struct voxelize_args voxelize_args = VOXELIZE_ARGS_NULL; struct darray_size_t* tetra_ids = NULL; struct partition* part = NULL; + struct radcoefs* per_item_radcoefs = NULL; double part_low[3]; double part_upp[3]; uint32_t part_ids[3]; @@ -410,34 +611,52 @@ voxelize_atmosphere(struct rnatm* atm, struct pool* pools[]) if(ATOMIC_GET(&res) != RES_OK) continue; - /* Recover current partition */ - part = pool_next_partition(pools[0]); + /* Recover the batch partitions. + * FIXME add support of multiple spectral item + * per partition to allow several voxels the _same_ partition id */ + part = pool_next_partition(args->pool); if(!part) { ATOMIC_SET(&res, RES_UNKNOWN_ERR); continue; }; - /* Is the partition out of bounds of the atmosphere grid*/ + /* Is the current partition out of bounds of the atmosphere grid */ morton_xyz_decode_u21((uint64_t)partition_get_id(part), part_ids); - if(part_ids[0] >= nparts[0] - || part_ids[1] >= nparts[1] - || part_ids[2] >= nparts[2]) { + if(part_ids[0] >= args->nparts[0] + || part_ids[1] >= args->nparts[1] + || part_ids[2] >= args->nparts[2]) { partition_free(part); continue; } /* Compute the AABB of the partition */ - part_low[0] = (double)(part_ids[0] * part_def) * vxsz[0] + atm_low[0]; - part_low[1] = (double)(part_ids[1] * part_def) * vxsz[1] + atm_low[1]; - part_low[2] = (double)(part_ids[2] * part_def) * vxsz[2] + atm_low[2]; - part_upp[0] = part_low[0] + (double)part_def * vxsz[0]; - part_upp[1] = part_low[1] + (double)part_def * vxsz[1]; - part_upp[2] = part_low[2] + (double)part_def * vxsz[2]; + part_low[0] = (double)(part_ids[0] * args->part_def) * args->vxsz[0]; + part_low[1] = (double)(part_ids[1] * args->part_def) * args->vxsz[1]; + part_low[2] = (double)(part_ids[2] * args->part_def) * args->vxsz[2]; + part_low[0] = part_low[0] + args->atm_low[0]; + part_low[1] = part_low[1] + args->atm_low[1]; + part_low[2] = part_low[2] + args->atm_low[2]; + part_upp[0] = part_low[0] + (double)args->part_def * args->vxsz[0]; + part_upp[1] = part_low[1] + (double)args->part_def * args->vxsz[1]; + part_upp[2] = part_low[2] + (double)args->part_def * args->vxsz[2]; /* Retrieves the array where to store the indices of tetrahedra that * overlap the partition */ - tetra_ids = darray_size_t_list_data_get(&per_thread_tetra_list) + ithread; + tetra_ids = darray_size_t_list_data_get(args->per_thread_tetra_list) + ithread; + + /* Retrieve the spectral item data structure used to store radiative + * coeficients */ + per_item_radcoefs = darray_radcoef_list_data_get + (args->per_thread_radcoef_list)[ithread]; /* Voxelizes the partition and once done, commits */ - res_local = voxelize_partition - (atm, part_low, part_upp, vxsz, tetra_ids, part); + voxelize_args.tetra_ids = tetra_ids; + voxelize_args.part_def = args->part_def; + d3_set(voxelize_args.part_low, part_low); + d3_set(voxelize_args.part_upp, part_upp); + d3_set(voxelize_args.vxsz, args->vxsz); + voxelize_args.part = part; + voxelize_args.per_item_radcoefs = per_item_radcoefs; + voxelize_args.items = args->items; + voxelize_args.batch_size = args->batch_size; + res_local = voxelize_partition(atm, &voxelize_args); if(res_local == RES_OK) { partition_commit(part); } else { @@ -448,7 +667,7 @@ voxelize_atmosphere(struct rnatm* atm, struct pool* pools[]) /* Update progress bar */ n = (size_t)ATOMIC_INCR(&nparts_voxelized); - pcent = (int)((n * 100) / (nparts[0]*nparts[1]*nparts[2])); + pcent = (int)((n * 100) / (args->nparts[0]*args->nparts[1]*args->nparts[2])); #pragma omp critical if(pcent > progress) { progress = pcent; @@ -462,12 +681,139 @@ voxelize_atmosphere(struct rnatm* atm, struct pool* pools[]) #undef LOG_MSG exit: - darray_size_t_list_release(&per_thread_tetra_list); return (res_T)res; error: goto exit; } +static res_T +voxelize_atmosphere + (struct rnatm* atm, + /* Range of spectral bands to handle. Limits are inclusives */ + const size_t bands[2], + struct pool* pool, + const size_t batch_size) +{ + /* Batch of spectral items to process in a single voxelization */ + struct voxelize_batch_args batch_args = VOXELIZE_BATCH_ARGS_NULL; + size_t nbatches; + size_t ibatch; + + /* Working data structures */ + struct darray_size_t_list per_thread_tetra_list; + struct darray_radcoef_list per_thread_radcoef_list; + + /* List of spectral items to handle */ + struct spectral_item* items = NULL; + size_t nitems = 0; + + /* Voxelization parameters */ + double atm_low[3]; + double atm_upp[3]; + double vxsz[3]; + size_t nparts[3]; /* #partitions along the 3 axis */ + size_t nparts_adjusted; + size_t part_def; /* Definition of a partition */ + + /* Miscellaneous variables */ + size_t i; + res_T res = RES_OK; + + ASSERT(atm && bands && pool); + ASSERT(bands[0] <= bands[1]); + ASSERT(bands[1] < sck_get_bands_count(atm->gas.ck)); + + /* FIXME handle batch_size > 1 */ + ASSERT(batch_size == 1); + + darray_size_t_list_init(atm->allocator, &per_thread_tetra_list); + darray_radcoef_list_init(atm->allocator, &per_thread_radcoef_list); + + /* Allocate the per thread lists */ + res = darray_size_t_list_resize(&per_thread_tetra_list, atm->nthreads); + if(res != RES_OK) goto error; + res = darray_radcoef_list_resize(&per_thread_radcoef_list, atm->nthreads); + if(res != RES_OK) goto error; + + /* Setup the per thread and per item working data structures */ + FOR_EACH(i, 0, atm->nthreads) { + struct radcoefs* per_item_k = NULL; + per_item_k = MEM_CALLOC(atm->allocator, batch_size, sizeof(*per_item_k)); + if(!per_item_k) { res = RES_MEM_ERR; goto error; } + darray_radcoef_list_data_get(&per_thread_radcoef_list)[i] = per_item_k; + } + + res = setup_batches(atm, bands, &items, &nitems); + if(res != RES_OK) goto error; + + /* Recover the AABB atmosphere and compute the size of a voxel */ + SUVM(volume_get_aabb(atm->gas.volume, atm_low, atm_upp)); + vxsz[0] = (atm_upp[0] - atm_low[0]) / (double)atm->grid_definition[0]; + vxsz[1] = (atm_upp[1] - atm_low[1]) / (double)atm->grid_definition[1]; + vxsz[2] = (atm_upp[2] - atm_low[2]) / (double)atm->grid_definition[2]; + + /* Number of partitions required to cover the entire atmosphere grid */ + part_def = pool_get_partition_definition(pool); + nparts[0] = (atm->grid_definition[0] + (part_def-1)/*ceil*/) / part_def; + nparts[1] = (atm->grid_definition[1] + (part_def-1)/*ceil*/) / part_def; + nparts[2] = (atm->grid_definition[2] + (part_def-1)/*ceil*/) / part_def; + + /* Adjust the #partitions allowing their indexing by their morton code */ + nparts_adjusted = MMAX(nparts[0], MMAX(nparts[1], nparts[2])); + nparts_adjusted = round_up_pow2(nparts_adjusted); + nparts_adjusted = nparts_adjusted * nparts_adjusted * nparts_adjusted; + + /* Calculate the number of batches required to build the accelerating + * structures for the spectral bands submitted; currently we are building one + * octree per quadrature point of each band (i.e. per spectral item). And + * each octree requires a complete voxelization of the atmospheric meshes + * which takes time. To amortize this cost without prohitive increase in + * memory space, one fills up to N octrees from a single voxelization of the + * mesh. The number of batches corresponds to the total number of + * voxelization required */ + nbatches = (nitems + (batch_size - 1)/*ceil*/) / batch_size; + + /* Print the size of a voxel */ + log_info(atm, "voxel size = {%g, %g, %g}\n", SPLIT3(vxsz)); + + /* Setup regular batch arguments */ + batch_args.per_thread_tetra_list = &per_thread_tetra_list; + batch_args.per_thread_radcoef_list = &per_thread_radcoef_list; + batch_args.pool = pool; + batch_args.part_def = part_def; + batch_args.nparts[0] = nparts[0]; + batch_args.nparts[1] = nparts[1]; + batch_args.nparts[2] = nparts[2]; + batch_args.nparts_adjusted = nparts_adjusted; + d3_set(batch_args.atm_low, atm_low); + d3_set(batch_args.atm_upp, atm_upp); + d3_set(batch_args.vxsz, vxsz); + + /* Voxelize the batches */ + FOR_EACH(ibatch, 0, nbatches) { + size_t item_range[2]; + item_range[0] = ibatch * batch_size; + item_range[1] = MMIN(item_range[0] + batch_size, nitems); + + batch_args.items = items + item_range[0]; + batch_args.batch_size = item_range[1] - item_range[0]; + res = voxelize_batch(atm, &batch_args); + if(res != RES_OK) goto error; + } + +exit: + FOR_EACH(i, 0, darray_radcoef_list_size_get(&per_thread_radcoef_list)) { + MEM_RM(atm->allocator, + darray_radcoef_list_data_get(&per_thread_radcoef_list)[i]); + } + MEM_RM(atm->allocator, items); + darray_size_t_list_release(&per_thread_tetra_list); + darray_radcoef_list_release(&per_thread_radcoef_list); + return res; +error: + goto exit; +} + static void vx_get(const size_t xyz[3], const uint64_t mcode, void* dst, void* context) { @@ -581,7 +927,7 @@ static res_T build_octrees (struct rnatm* atm, const struct rnatm_create_args* args, - struct pool* pools[]) + struct pool* pool) { struct build_octrees_context ctx = BUILD_OCTREES_CONTEXT_NULL; struct svx_voxel_desc vx_desc = SVX_VOXEL_DESC_NULL; @@ -589,13 +935,13 @@ build_octrees double low[3], upp[3]; size_t def[3]; res_T res = RES_OK; - ASSERT(atm && args && pools); + ASSERT(atm && args && pool); res = svx_device_create(atm->logger, &atm->svx_allocator, atm->verbose, &svx); if(res != RES_OK) goto error; /* Setup the build context */ - ctx.pool = pools[0]; + ctx.pool = pool; ctx.tau_threshold = args->optical_thickness; ctx.part = NULL; @@ -632,27 +978,8 @@ error: goto exit; } -static void -delete_per_thread_pools(struct rnatm* atm, struct pool* pools[]) -{ - size_t ipool; - ASSERT(atm && pools); - FOR_EACH(ipool, 0, atm->nthreads) { - if(pools[ipool]) pool_ref_put(pools[ipool]); - } - MEM_RM(atm->allocator, pools); -} - -static INLINE void -invalidate_per_thread_pools(struct rnatm* atm, struct pool* pools[]) -{ - size_t ipool; - ASSERT(atm && pools); - FOR_EACH(ipool, 0, atm->nthreads) pool_invalidate(pools[ipool]); -} - static res_T -create_per_thread_pools(struct rnatm* atm, struct pool** out_pools[]) +create_pool(struct rnatm* atm, struct pool** out_pool) { /* Empirical constant that defines the number of non empty partitions to * pre-allocate per thread to avoid contension between the thread building the @@ -670,46 +997,38 @@ create_per_thread_pools(struct rnatm* atm, struct pool** out_pools[]) const size_t PARTITION_DEFINITION = 8; struct pool_create_args pool_args = POOL_CREATE_ARGS_DEFAULT; - struct pool** pools = NULL; - size_t ipool = 0; + struct pool* pool = NULL; res_T res = RES_OK; - ASSERT(atm && out_pools); + ASSERT(atm && out_pool); - pools = MEM_CALLOC(atm->allocator, atm->nthreads, sizeof(*pools)); - if(!pools) { res = RES_MEM_ERR; goto error; } - - /* Create the per thread partition pools */ + /* Create the partition pool + * TODO add support of multiple spectral item per pool */ pool_args.npartitions = atm->nthreads * OVERALL_NPARTITIONS_PER_THREAD; pool_args.npreallocated_partitions = atm->nthreads * NPARTITIONS_PER_THREAD; pool_args.partition_definition = PARTITION_DEFINITION; pool_args.allocator = atm->allocator; - FOR_EACH(ipool, 0, atm->nthreads) { - res = pool_create(&pool_args, pools+ipool); - if(res != RES_OK) goto error; - } + res = pool_create(&pool_args, &pool); + if(res != RES_OK) goto error; exit: - *out_pools = pools; + *out_pool = pool; return res; error: - log_err(atm, "failed to allocate the per thread partition pool -- %s\n", + log_err(atm, "failed to create the partition pool -- %s\n", res_to_cstr(res)); - if(pools) { - delete_per_thread_pools(atm, pools); - pools = NULL; - } + if(pool) { pool_ref_put(pool); pool = NULL; } goto exit; } static res_T create_octrees(struct rnatm* atm, const struct rnatm_create_args* args) { - struct pool** pools = NULL; + struct pool* pool = NULL; ATOMIC res = RES_OK; ASSERT(atm); - res = create_per_thread_pools(atm, &pools); + res = create_pool(atm, &pool); if(res != RES_OK) goto error; log_info(atm, "partitionning of radiative properties (grid definition: %ux%ux%u)\n", @@ -723,21 +1042,23 @@ create_octrees(struct rnatm* atm, const struct rnatm_create_args* args) { #pragma omp section { - const res_T res_local = voxelize_atmosphere(atm, pools); + const size_t bands[2] = {0,0}; /* TODO handle multiple bands */ + const res_T res_local = voxelize_atmosphere + (atm, bands, pool, 1/*FIXME handle batch_size > 1*/); if(res_local != RES_OK) { log_err(atm, "atmosphere voxelization error -- %s\n", res_to_cstr((res_T)res)); - invalidate_per_thread_pools(atm, pools); + pool_invalidate(pool); ATOMIC_SET(&res, res_local); } } #pragma omp section { - const res_T res_local = build_octrees(atm, args, pools); + const res_T res_local = build_octrees(atm, args, pool); if(res_local != RES_OK) { log_err(atm, "error building octrees -- %s\n", res_to_cstr((res_T)res)); - invalidate_per_thread_pools(atm, pools); + pool_invalidate(pool); ATOMIC_SET(&res, res_local); } } @@ -745,7 +1066,7 @@ create_octrees(struct rnatm* atm, const struct rnatm_create_args* args) if(res != RES_OK) goto error; exit: - if(pools) delete_per_thread_pools(atm, pools); + if(pool) pool_ref_put(pool); return (res_T)res; error: goto exit;