commit 004fa7d27a5892e18bf3e2d17430929535ad2365
parent 28355a13fb86d4b1a3df478519c552cf2a12d2c1
Author: Vincent Forest <vincent.forest@meso-star.com>
Date: Fri, 29 Jul 2022 17:43:43 +0200
Preparing to build multiple octrees in one voxelization
Diffstat:
| M | src/rnatm_octree.c | | | 639 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------- |
1 file changed, 480 insertions(+), 159 deletions(-)
diff --git a/src/rnatm_octree.c b/src/rnatm_octree.c
@@ -30,6 +30,7 @@
#include <rsys/clock_time.h>
#include <rsys/cstr.h>
+#include <rsys/double3.h>
#include <rsys/math.h>
#include <rsys/morton.h>
#include <rsys/rsys.h>
@@ -53,10 +54,157 @@ struct radcoefs {
float ks_min, ks_max;
float kext_min, kext_max;
};
+#define RADCOEFS_NULL__ { \
+ FLT_MAX, -FLT_MAX, \
+ FLT_MAX, -FLT_MAX, \
+ FLT_MAX, -FLT_MAX \
+}
+
+struct spectral_item {
+ size_t iband;
+ size_t iquad_pt;
+};
+#define SPECTRAL_ITEM_NULL__ {0, 0}
+
+/* Generate the dynamic array of radiative coefficient lists */
+#define DARRAY_NAME radcoef_list
+#define DARRAY_DATA struct radcoefs*
+#include <rsys/dynamic_array.h>
+
+struct voxelize_batch_args {
+ /* Working data structure */
+ struct darray_size_t_list* per_thread_tetra_list;
+ struct darray_radcoef_list* per_thread_radcoef_list;
+ struct pool* pool;
+
+ size_t part_def; /* Partition definition */
+ size_t nparts[3]; /* #partitions required to cover the entire grid */
+ size_t nparts_adjusted; /* #partitions allowing their indexing by morton id */
+
+ /* AABB of the atmosphere */
+ double atm_low[3];
+ double atm_upp[3];
+
+ double vxsz[3]; /* Size of a voxel */
+
+ struct spectral_item* items;
+ size_t batch_size; /* #spectral items */
+};
+#define VOXELIZE_BATCH_ARGS_NULL__ { \
+ NULL, /* Per thread tetra list */ \
+ NULL, /* Per thread radiative coefs */ \
+ NULL, /* Partition pool */ \
+ \
+ 0, /* Partition definition */ \
+ {0,0,0}, /* #partitions to cover the entire grid */ \
+ 0, /* #partitions adjusted wrt morton indexing */ \
+ \
+ /* AABB of the atmosphere */ \
+ { DBL_MAX, DBL_MAX, DBL_MAX}, \
+ {-DBL_MAX,-DBL_MAX,-DBL_MAX}, \
+ \
+ {0,0,0}, /* Size of a voxel */ \
+ \
+ NULL, /* Spectral items */ \
+ 0 /* Batch size */ \
+}
+static const struct voxelize_batch_args VOXELIZE_BATCH_ARGS_NULL =
+ VOXELIZE_BATCH_ARGS_NULL__;
+
+struct voxelize_args {
+ struct darray_size_t* tetra_ids;
+ struct radcoefs* per_item_radcoefs;
+
+ struct partition* part;
+ size_t part_def; /* Partition definition */
+
+ /* AABB of the partition */
+ double part_low[3];
+ double part_upp[3];
+
+ double vxsz[3]; /* Size of a voxel */
+
+ const struct spectral_item* items;
+ size_t batch_size; /* #spectral items */
+};
+#define VOXELIZE_ARGS_NULL__ { \
+ NULL, /* Tetra ids */ \
+ NULL, /* Radiative coefs */ \
+ \
+ NULL, /* Partition */ \
+ 0, /* Partition definition */ \
+ \
+ /* AABB of the partition */ \
+ { DBL_MAX, DBL_MAX, DBL_MAX}, \
+ {-DBL_MAX,-DBL_MAX,-DBL_MAX}, \
+ \
+ {0,0,0}, /* Size of a voxel */ \
+ \
+ NULL, /* Spectral items */ \
+ 0 /* Batch size */ \
+}
+static const struct voxelize_args VOXELIZE_ARGS_NULL = VOXELIZE_ARGS_NULL__;
/*******************************************************************************
* Helper functions
******************************************************************************/
+static INLINE res_T
+check_voxelize_batch_args
+ (struct rnatm* atm,
+ const struct voxelize_batch_args* args)
+{
+ size_t i;
+ if(!args
+ || !args->per_thread_tetra_list
+ || !args->per_thread_radcoef_list
+ || !args->pool
+ || !IS_POW2(args->part_def)
+ || !args->nparts[0]
+ || !args->nparts[1]
+ || !args->nparts[2]
+ || !IS_POW2(args->nparts_adjusted)
+ || args->nparts_adjusted < args->nparts[0]*args->nparts[1]*args->nparts[2]
+ || args->atm_low[0] >= args->atm_upp[0]
+ || args->atm_low[1] >= args->atm_upp[1]
+ || args->atm_low[2] >= args->atm_upp[2]
+ || !args->vxsz[0]
+ || !args->vxsz[1]
+ || !args->vxsz[2]
+ || !args->items
+ || !args->batch_size) {
+ return RES_BAD_ARG;
+ }
+ if(atm->nthreads != darray_size_t_list_size_get(args->per_thread_tetra_list)
+ || atm->nthreads != darray_radcoef_list_size_get(args->per_thread_radcoef_list)) {
+ return RES_BAD_ARG;
+ }
+ FOR_EACH(i, 0, atm->nthreads) {
+ if(!darray_radcoef_list_cdata_get(args->per_thread_radcoef_list)[i]) {
+ return RES_BAD_ARG;
+ }
+ }
+ return RES_OK;
+}
+
+static INLINE res_T
+check_voxelize_args(const struct voxelize_args* args)
+{
+ if(!args->tetra_ids
+ || !args->per_item_radcoefs
+ || args->part_low[0] >= args->part_upp[0]
+ || args->part_low[1] >= args->part_upp[1]
+ || args->part_low[2] >= args->part_upp[2]
+ || !IS_POW2(args->part_def)
+ || !args->vxsz[0]
+ || !args->vxsz[1]
+ || !args->vxsz[2]
+ || !args->items
+ || !args->batch_size) {
+ return RES_BAD_ARG;
+ }
+ return RES_OK;
+}
+
static FINLINE unsigned
round_pow2(const unsigned val)
{
@@ -68,6 +216,76 @@ round_pow2(const unsigned val)
}
}
+/* Return the number of quadrature points for the submitted range of bands.
+ * boundaries are inclusives */
+static INLINE size_t
+compute_nquad_pts(const struct rnatm* atm, const size_t bands[2])
+{
+ size_t nquad_pts = 0;
+ size_t nbands = 0;
+ size_t i = 0;
+ ASSERT(atm && bands && bands[0] <= bands[1]);
+
+ nbands = bands[1] - bands[0] + 1;
+ FOR_EACH(i, 0, nbands) {
+ struct sck_band band;
+ SCK(get_band(atm->gas.ck, bands[i], &band));
+ nquad_pts += band.quad_pts_count;
+ }
+ return nquad_pts;
+}
+
+static res_T
+setup_batches
+ (struct rnatm* atm,
+ const size_t bands[2], /* Boundaries are inclusives */
+ struct spectral_item** out_items,
+ size_t* out_nitems)
+{
+ struct spectral_item* items = NULL;
+ size_t iitem;
+ size_t nitems;
+ size_t nbands;
+ size_t i;
+ res_T res = RES_OK;
+ ASSERT(atm && out_items && out_nitems && bands && bands[0] <= bands[1]);
+
+ nitems = compute_nquad_pts(atm, bands);
+ items = MEM_CALLOC(atm->allocator, nitems, sizeof(*items));
+ if(items == NULL) {
+ log_err(atm, "%s: failed to allocate memory\n", FUNC_NAME);
+ res = RES_MEM_ERR;
+ goto error;
+ }
+
+ iitem = 0;
+ nbands = bands[1] - bands[0] + 1;
+ FOR_EACH(i, 0, nbands) {
+ struct sck_band band;
+ const size_t iband = bands[i];
+ size_t iquad_pt;
+
+ SCK(get_band(atm->gas.ck, iband, &band));
+ FOR_EACH(iquad_pt, 0, band.quad_pts_count) {
+ ASSERT(iitem < nitems);
+ items[iitem].iband = iband;
+ items[iitem].iquad_pt = iquad_pt;
+ }
+ }
+
+exit:
+ *out_items = items;
+ *out_nitems = nitems;
+ return res;
+error:
+ if(items) {
+ MEM_RM(atm->allocator, items);
+ items = NULL;
+ nitems = 0;
+ }
+ goto exit;
+}
+
static INLINE void
register_tetra
(const struct suvm_primitive* prim,
@@ -220,26 +438,18 @@ update_voxel
}
static res_T
-voxelize_gas
- (struct rnatm* atm,
- const double part_low[3],
- const double part_upp[3],
- const double vxsz[3],
- const struct darray_size_t* tetra_ids,
- struct partition* part)
+voxelize_gas(struct rnatm* atm, const struct voxelize_args* args)
{
size_t i;
- ASSERT(atm && part_low && part_upp && tetra_ids && part);
- ASSERT(vxsz[0] > 0 && vxsz[1] > 0 && vxsz[2] > 0);
- ASSERT(part_low[0] < part_upp[0]);
- ASSERT(part_low[1] < part_upp[1]);
- ASSERT(part_low[2] < part_upp[2]);
+ ASSERT(atm && check_voxelize_args(args) == RES_OK);
- partition_clear_voxels(part);
+ /* FIXME one has to support a batch_size > 1 */
+ ASSERT(args->batch_size == 1);
- FOR_EACH(i, 0, darray_size_t_size_get(tetra_ids)) {
+ partition_clear_voxels(args->part);
+
+ FOR_EACH(i, 0, darray_size_t_size_get(args->tetra_ids)) {
struct suvm_primitive tetra;
- struct radcoefs radcoefs;
struct suvm_polyhedron poly;
double poly_low[3];
double poly_upp[3];
@@ -249,59 +459,82 @@ voxelize_gas
uint32_t ivx_upp[3];
uint32_t ivx[3];
uint64_t mcode[3]; /* Cache of 3D morton code */
- const size_t itetra = darray_size_t_cdata_get(tetra_ids)[i];
+ const size_t itetra = darray_size_t_cdata_get(args->tetra_ids)[i];
enum suvm_intersection_type intersect;
/* Recover the tetrahedron and setup its polyhedron */
SUVM(volume_get_primitive(atm->gas.volume, itetra, &tetra));
SUVM(primitive_setup_polyhedron(&tetra, &poly));
- ASSERT(poly.lower[0] <= part_upp[0] && poly.upper[0] >= part_low[0]);
- ASSERT(poly.lower[1] <= part_upp[1] && poly.upper[1] >= part_low[1]);
- ASSERT(poly.lower[2] <= part_upp[2] && poly.upper[2] >= part_low[2]);
+ ASSERT(poly.lower[0] <= args->part_upp[0]);
+ ASSERT(poly.lower[1] <= args->part_upp[1]);
+ ASSERT(poly.lower[2] <= args->part_upp[2]);
+ ASSERT(poly.upper[0] >= args->part_low[0]);
+ ASSERT(poly.upper[1] >= args->part_low[1]);
+ ASSERT(poly.upper[2] >= args->part_low[2]);
/* Clamp the AABB of the polyhedra to the partition bounds */
- poly_low[0] = MMAX(poly.lower[0], part_low[0]);
- poly_low[1] = MMAX(poly.lower[1], part_low[1]);
- poly_low[2] = MMAX(poly.lower[2], part_low[2]);
- poly_upp[0] = MMIN(poly.upper[0], part_upp[0]);
- poly_upp[1] = MMIN(poly.upper[1], part_upp[1]);
- poly_upp[2] = MMIN(poly.upper[2], part_upp[2]);
+ poly_low[0] = MMAX(poly.lower[0], args->part_low[0]);
+ poly_low[1] = MMAX(poly.lower[1], args->part_low[1]);
+ poly_low[2] = MMAX(poly.lower[2], args->part_low[2]);
+ poly_upp[0] = MMIN(poly.upper[0], args->part_upp[0]);
+ poly_upp[1] = MMIN(poly.upper[1], args->part_upp[1]);
+ poly_upp[2] = MMIN(poly.upper[2], args->part_upp[2]);
/* Transform the AABB of the polyhedron in voxel space of the partition */
- ivx_low[0] = (uint32_t)((poly_low[0] - part_low[0]) / vxsz[0]);
- ivx_low[1] = (uint32_t)((poly_low[1] - part_low[1]) / vxsz[1]);
- ivx_low[2] = (uint32_t)((poly_low[2] - part_low[2]) / vxsz[2]);
- ivx_upp[0] = (uint32_t)ceil((poly_upp[0] - part_low[0]) / vxsz[0]);
- ivx_upp[1] = (uint32_t)ceil((poly_upp[1] - part_low[1]) / vxsz[1]);
- ivx_upp[2] = (uint32_t)ceil((poly_upp[2] - part_low[2]) / vxsz[2]);
- ASSERT(ivx_upp[0] <= partition_get_definition(part));
- ASSERT(ivx_upp[1] <= partition_get_definition(part));
- ASSERT(ivx_upp[2] <= partition_get_definition(part));
-
- /* Compute the range of the tetrahedron radiative coefficients.
- * TODO setup the band and quadrature point */
- setup_tetra_radcoefs(atm, &tetra, 0/*iband*/, 0/*iquad*/, &radcoefs);
+ ivx_low[0] = (uint32_t)((poly_low[0] - args->part_low[0]) / args->vxsz[0]);
+ ivx_low[1] = (uint32_t)((poly_low[1] - args->part_low[1]) / args->vxsz[1]);
+ ivx_low[2] = (uint32_t)((poly_low[2] - args->part_low[2]) / args->vxsz[2]);
+ ivx_upp[0] = (uint32_t)ceil((poly_upp[0] - args->part_low[0]) / args->vxsz[0]);
+ ivx_upp[1] = (uint32_t)ceil((poly_upp[1] - args->part_low[1]) / args->vxsz[1]);
+ ivx_upp[2] = (uint32_t)ceil((poly_upp[2] - args->part_low[2]) / args->vxsz[2]);
+ ASSERT(ivx_upp[0] <= args->part_def);
+ ASSERT(ivx_upp[1] <= args->part_def);
+ ASSERT(ivx_upp[2] <= args->part_def);
+
+ /* Compute the range of the tetrahedron radiative coefficients
+ * FIXME handle batch_size > 1 */
+ #if 1
+ setup_tetra_radcoefs(atm, &tetra,
+ args->items[0].iband,
+ args->items[0].iquad_pt,
+ &args->per_item_radcoefs[0]);
+ #else
+ FOR_EACH(item, 0, args->batch_size) {
+ const size_t iband = args->items[item].iband;
+ const size_t iquad_pt = args->items[item].iquad_pt;
+ struct radcoefs* radcoefs = args->per_item_radcoefs + item;
+ setup_tetra_radcoefs(atm, &tetra, iband, iquad_pt, radcoefs);
+ }
+ #endif
/* Iterate voxels intersected by the AABB of the polyedron */
FOR_EACH(ivx[2], ivx_low[2], ivx_upp[2]) {
- vx_low[2] = (float)((double)ivx[2]*vxsz[2] + part_low[2]);
- vx_upp[2] = vx_low[2] + (float)vxsz[2];
+ vx_low[2] = (float)((double)ivx[2]*args->vxsz[2] + args->part_low[2]);
+ vx_upp[2] = vx_low[2] + (float)args->vxsz[2];
mcode[2] = morton3D_encode_u21(ivx[2]);
FOR_EACH(ivx[1], ivx_low[1], ivx_upp[1]) {
- vx_low[1] = (float)((double)ivx[1]*vxsz[1] + part_low[1]);
- vx_upp[1] = vx_low[1] + (float)vxsz[1];
+ vx_low[1] = (float)((double)ivx[1]*args->vxsz[1] + args->part_low[1]);
+ vx_upp[1] = vx_low[1] + (float)args->vxsz[1];
mcode[1] = (morton3D_encode_u21(ivx[1]) << 1) | mcode[2];
FOR_EACH(ivx[0], ivx_low[0], ivx_upp[0]) {
- vx_low[0] = (float)((double)ivx[0]*vxsz[0] + part_low[0]);
- vx_upp[0] = vx_low[0] + (float)vxsz[0];
+ vx_low[0] = (float)((double)ivx[0]*args->vxsz[0] + args->part_low[0]);
+ vx_upp[0] = vx_low[0] + (float)args->vxsz[0];
mcode[0] = (morton3D_encode_u21(ivx[0]) << 2) | mcode[1];
intersect = suvm_polyhedron_intersect_aabb(&poly, vx_low, vx_upp);
if(intersect == SUVM_INTERSECT_NONE) continue;
- update_voxel(atm, &radcoefs, part, mcode[0]);
+ /* Update the intersected voxel FIXME handle batch_size > 1*/
+ #if 1
+ update_voxel(atm, &args->per_item_radcoefs[0], args->part, mcode[0]);
+ #else
+ FOR_EACH(item, 0, args->batch_size) {
+ struct radcoefs* radcoefs = args->per_item_radcoefs + item;
+ update_voxel(atm, radcoefs, part, mcode[0]);
+ }
+ #endif
}
}
}
@@ -311,33 +544,28 @@ voxelize_gas
}
static res_T
-voxelize_partition
- (struct rnatm* atm,
- const double part_low[3],
- const double part_upp[3],
- const double vxsz[3],
- struct darray_size_t* tetra_ids,
- struct partition* part)
+voxelize_partition(struct rnatm* atm, const struct voxelize_args* args)
{
res_T res = RES_OK;
- ASSERT(atm && part_low && part_upp && tetra_ids && part);
- ASSERT(part_low[0] < part_upp[0]);
- ASSERT(part_low[1] < part_upp[1]);
- ASSERT(part_low[2] < part_upp[2]);
+ ASSERT(atm && check_voxelize_args(args) == RES_OK);
/* Find the list of gas tetrahedra that overlap the partition */
- darray_size_t_clear(tetra_ids);
+ darray_size_t_clear(args->tetra_ids);
res = suvm_volume_intersect_aabb
- (atm->gas.volume, part_low, part_upp, register_tetra, tetra_ids);
+ (atm->gas.volume,
+ args->part_low,
+ args->part_upp,
+ register_tetra,
+ args->tetra_ids);
if(res != RES_OK) goto error;
/* The partition is not covered by any tetrahedron */
- if(darray_size_t_size_get(tetra_ids) == 0) {
- partition_empty(part);
+ if(darray_size_t_size_get(args->tetra_ids) == 0) {
+ partition_empty(args->part);
goto exit;
}
- res = voxelize_gas(atm, part_low, part_upp, vxsz, tetra_ids, part);
+ res = voxelize_gas(atm, args);
if(res != RES_OK) goto error;
/* TODO voxelise aerosols */
@@ -349,45 +577,16 @@ error:
}
static res_T
-voxelize_atmosphere(struct rnatm* atm, struct pool* pools[])
+voxelize_batch(struct rnatm* atm, const struct voxelize_batch_args* args)
{
- struct darray_size_t_list per_thread_tetra_list;
- double atm_low[3];
- double atm_upp[3];
- double vxsz[3];
- size_t nparts[3]; /* #partitions along the 3 axis */
- size_t nparts_adjusted;
- size_t part_def; /* Definition of a partition */
int64_t i;
int progress = 0;
ATOMIC nparts_voxelized = 0;
ATOMIC res = RES_OK;
- ASSERT(atm);
+ ASSERT(atm && check_voxelize_batch_args(atm, args) == RES_OK);
- /* Allocate the per thread list of tetra list */
- darray_size_t_list_init(atm->allocator, &per_thread_tetra_list);
- res = darray_size_t_list_resize(&per_thread_tetra_list, atm->nthreads);
- if(res != RES_OK) goto error;
-
- /* Recover the AABB atmosphere and compute the size of a voxel */
- SUVM(volume_get_aabb(atm->gas.volume, atm_low, atm_upp));
- vxsz[0] = (atm_upp[0] - atm_low[0]) / (double)atm->grid_definition[0];
- vxsz[1] = (atm_upp[1] - atm_low[1]) / (double)atm->grid_definition[1];
- vxsz[2] = (atm_upp[2] - atm_low[2]) / (double)atm->grid_definition[2];
-
- /* Number of partitions required to cover the entire atmosphere grid */
- part_def = pool_get_partition_definition(pools[0]);
- nparts[0] = (atm->grid_definition[0] + (part_def-1)/*ceil*/) / part_def;
- nparts[1] = (atm->grid_definition[1] + (part_def-1)/*ceil*/) / part_def;
- nparts[2] = (atm->grid_definition[2] + (part_def-1)/*ceil*/) / part_def;
-
- /* Adjust the #partitions allowing their indexing by their morton code */
- nparts_adjusted = MMAX(nparts[0], MMAX(nparts[1], nparts[2]));
- nparts_adjusted = round_up_pow2(nparts_adjusted);
- nparts_adjusted = nparts_adjusted * nparts_adjusted * nparts_adjusted;
-
- /* Print the size of a voxel */
- log_info(atm, "voxel size = {%g, %g, %g}\n", SPLIT3(vxsz));
+ /* FIXME one has to support a batch_size > 1 */
+ ASSERT(args->batch_size == 1);
/* Print status message */
#define LOG_MSG "voxelize atmosphere: %3d%%\r"
@@ -397,9 +596,11 @@ voxelize_atmosphere(struct rnatm* atm, struct pool* pools[])
* and voxelizes the tetrahedrons that overlap them */
omp_set_num_threads((int)atm->nthreads);
#pragma omp parallel for schedule(static, 1/*chunk size*/)
- for(i = 0; i < (int64_t)nparts_adjusted; ++i) {
+ for(i = 0; i < (int64_t)args->nparts_adjusted; ++i) {
+ struct voxelize_args voxelize_args = VOXELIZE_ARGS_NULL;
struct darray_size_t* tetra_ids = NULL;
struct partition* part = NULL;
+ struct radcoefs* per_item_radcoefs = NULL;
double part_low[3];
double part_upp[3];
uint32_t part_ids[3];
@@ -410,34 +611,52 @@ voxelize_atmosphere(struct rnatm* atm, struct pool* pools[])
if(ATOMIC_GET(&res) != RES_OK) continue;
- /* Recover current partition */
- part = pool_next_partition(pools[0]);
+ /* Recover the batch partitions.
+ * FIXME add support of multiple spectral item
+ * per partition to allow several voxels the _same_ partition id */
+ part = pool_next_partition(args->pool);
if(!part) { ATOMIC_SET(&res, RES_UNKNOWN_ERR); continue; };
- /* Is the partition out of bounds of the atmosphere grid*/
+ /* Is the current partition out of bounds of the atmosphere grid */
morton_xyz_decode_u21((uint64_t)partition_get_id(part), part_ids);
- if(part_ids[0] >= nparts[0]
- || part_ids[1] >= nparts[1]
- || part_ids[2] >= nparts[2]) {
+ if(part_ids[0] >= args->nparts[0]
+ || part_ids[1] >= args->nparts[1]
+ || part_ids[2] >= args->nparts[2]) {
partition_free(part);
continue;
}
/* Compute the AABB of the partition */
- part_low[0] = (double)(part_ids[0] * part_def) * vxsz[0] + atm_low[0];
- part_low[1] = (double)(part_ids[1] * part_def) * vxsz[1] + atm_low[1];
- part_low[2] = (double)(part_ids[2] * part_def) * vxsz[2] + atm_low[2];
- part_upp[0] = part_low[0] + (double)part_def * vxsz[0];
- part_upp[1] = part_low[1] + (double)part_def * vxsz[1];
- part_upp[2] = part_low[2] + (double)part_def * vxsz[2];
+ part_low[0] = (double)(part_ids[0] * args->part_def) * args->vxsz[0];
+ part_low[1] = (double)(part_ids[1] * args->part_def) * args->vxsz[1];
+ part_low[2] = (double)(part_ids[2] * args->part_def) * args->vxsz[2];
+ part_low[0] = part_low[0] + args->atm_low[0];
+ part_low[1] = part_low[1] + args->atm_low[1];
+ part_low[2] = part_low[2] + args->atm_low[2];
+ part_upp[0] = part_low[0] + (double)args->part_def * args->vxsz[0];
+ part_upp[1] = part_low[1] + (double)args->part_def * args->vxsz[1];
+ part_upp[2] = part_low[2] + (double)args->part_def * args->vxsz[2];
/* Retrieves the array where to store the indices of tetrahedra that
* overlap the partition */
- tetra_ids = darray_size_t_list_data_get(&per_thread_tetra_list) + ithread;
+ tetra_ids = darray_size_t_list_data_get(args->per_thread_tetra_list) + ithread;
+
+ /* Retrieve the spectral item data structure used to store radiative
+ * coeficients */
+ per_item_radcoefs = darray_radcoef_list_data_get
+ (args->per_thread_radcoef_list)[ithread];
/* Voxelizes the partition and once done, commits */
- res_local = voxelize_partition
- (atm, part_low, part_upp, vxsz, tetra_ids, part);
+ voxelize_args.tetra_ids = tetra_ids;
+ voxelize_args.part_def = args->part_def;
+ d3_set(voxelize_args.part_low, part_low);
+ d3_set(voxelize_args.part_upp, part_upp);
+ d3_set(voxelize_args.vxsz, args->vxsz);
+ voxelize_args.part = part;
+ voxelize_args.per_item_radcoefs = per_item_radcoefs;
+ voxelize_args.items = args->items;
+ voxelize_args.batch_size = args->batch_size;
+ res_local = voxelize_partition(atm, &voxelize_args);
if(res_local == RES_OK) {
partition_commit(part);
} else {
@@ -448,7 +667,7 @@ voxelize_atmosphere(struct rnatm* atm, struct pool* pools[])
/* Update progress bar */
n = (size_t)ATOMIC_INCR(&nparts_voxelized);
- pcent = (int)((n * 100) / (nparts[0]*nparts[1]*nparts[2]));
+ pcent = (int)((n * 100) / (args->nparts[0]*args->nparts[1]*args->nparts[2]));
#pragma omp critical
if(pcent > progress) {
progress = pcent;
@@ -462,12 +681,139 @@ voxelize_atmosphere(struct rnatm* atm, struct pool* pools[])
#undef LOG_MSG
exit:
- darray_size_t_list_release(&per_thread_tetra_list);
return (res_T)res;
error:
goto exit;
}
+static res_T
+voxelize_atmosphere
+ (struct rnatm* atm,
+ /* Range of spectral bands to handle. Limits are inclusives */
+ const size_t bands[2],
+ struct pool* pool,
+ const size_t batch_size)
+{
+ /* Batch of spectral items to process in a single voxelization */
+ struct voxelize_batch_args batch_args = VOXELIZE_BATCH_ARGS_NULL;
+ size_t nbatches;
+ size_t ibatch;
+
+ /* Working data structures */
+ struct darray_size_t_list per_thread_tetra_list;
+ struct darray_radcoef_list per_thread_radcoef_list;
+
+ /* List of spectral items to handle */
+ struct spectral_item* items = NULL;
+ size_t nitems = 0;
+
+ /* Voxelization parameters */
+ double atm_low[3];
+ double atm_upp[3];
+ double vxsz[3];
+ size_t nparts[3]; /* #partitions along the 3 axis */
+ size_t nparts_adjusted;
+ size_t part_def; /* Definition of a partition */
+
+ /* Miscellaneous variables */
+ size_t i;
+ res_T res = RES_OK;
+
+ ASSERT(atm && bands && pool);
+ ASSERT(bands[0] <= bands[1]);
+ ASSERT(bands[1] < sck_get_bands_count(atm->gas.ck));
+
+ /* FIXME handle batch_size > 1 */
+ ASSERT(batch_size == 1);
+
+ darray_size_t_list_init(atm->allocator, &per_thread_tetra_list);
+ darray_radcoef_list_init(atm->allocator, &per_thread_radcoef_list);
+
+ /* Allocate the per thread lists */
+ res = darray_size_t_list_resize(&per_thread_tetra_list, atm->nthreads);
+ if(res != RES_OK) goto error;
+ res = darray_radcoef_list_resize(&per_thread_radcoef_list, atm->nthreads);
+ if(res != RES_OK) goto error;
+
+ /* Setup the per thread and per item working data structures */
+ FOR_EACH(i, 0, atm->nthreads) {
+ struct radcoefs* per_item_k = NULL;
+ per_item_k = MEM_CALLOC(atm->allocator, batch_size, sizeof(*per_item_k));
+ if(!per_item_k) { res = RES_MEM_ERR; goto error; }
+ darray_radcoef_list_data_get(&per_thread_radcoef_list)[i] = per_item_k;
+ }
+
+ res = setup_batches(atm, bands, &items, &nitems);
+ if(res != RES_OK) goto error;
+
+ /* Recover the AABB atmosphere and compute the size of a voxel */
+ SUVM(volume_get_aabb(atm->gas.volume, atm_low, atm_upp));
+ vxsz[0] = (atm_upp[0] - atm_low[0]) / (double)atm->grid_definition[0];
+ vxsz[1] = (atm_upp[1] - atm_low[1]) / (double)atm->grid_definition[1];
+ vxsz[2] = (atm_upp[2] - atm_low[2]) / (double)atm->grid_definition[2];
+
+ /* Number of partitions required to cover the entire atmosphere grid */
+ part_def = pool_get_partition_definition(pool);
+ nparts[0] = (atm->grid_definition[0] + (part_def-1)/*ceil*/) / part_def;
+ nparts[1] = (atm->grid_definition[1] + (part_def-1)/*ceil*/) / part_def;
+ nparts[2] = (atm->grid_definition[2] + (part_def-1)/*ceil*/) / part_def;
+
+ /* Adjust the #partitions allowing their indexing by their morton code */
+ nparts_adjusted = MMAX(nparts[0], MMAX(nparts[1], nparts[2]));
+ nparts_adjusted = round_up_pow2(nparts_adjusted);
+ nparts_adjusted = nparts_adjusted * nparts_adjusted * nparts_adjusted;
+
+ /* Calculate the number of batches required to build the accelerating
+ * structures for the spectral bands submitted; currently we are building one
+ * octree per quadrature point of each band (i.e. per spectral item). And
+ * each octree requires a complete voxelization of the atmospheric meshes
+ * which takes time. To amortize this cost without prohitive increase in
+ * memory space, one fills up to N octrees from a single voxelization of the
+ * mesh. The number of batches corresponds to the total number of
+ * voxelization required */
+ nbatches = (nitems + (batch_size - 1)/*ceil*/) / batch_size;
+
+ /* Print the size of a voxel */
+ log_info(atm, "voxel size = {%g, %g, %g}\n", SPLIT3(vxsz));
+
+ /* Setup regular batch arguments */
+ batch_args.per_thread_tetra_list = &per_thread_tetra_list;
+ batch_args.per_thread_radcoef_list = &per_thread_radcoef_list;
+ batch_args.pool = pool;
+ batch_args.part_def = part_def;
+ batch_args.nparts[0] = nparts[0];
+ batch_args.nparts[1] = nparts[1];
+ batch_args.nparts[2] = nparts[2];
+ batch_args.nparts_adjusted = nparts_adjusted;
+ d3_set(batch_args.atm_low, atm_low);
+ d3_set(batch_args.atm_upp, atm_upp);
+ d3_set(batch_args.vxsz, vxsz);
+
+ /* Voxelize the batches */
+ FOR_EACH(ibatch, 0, nbatches) {
+ size_t item_range[2];
+ item_range[0] = ibatch * batch_size;
+ item_range[1] = MMIN(item_range[0] + batch_size, nitems);
+
+ batch_args.items = items + item_range[0];
+ batch_args.batch_size = item_range[1] - item_range[0];
+ res = voxelize_batch(atm, &batch_args);
+ if(res != RES_OK) goto error;
+ }
+
+exit:
+ FOR_EACH(i, 0, darray_radcoef_list_size_get(&per_thread_radcoef_list)) {
+ MEM_RM(atm->allocator,
+ darray_radcoef_list_data_get(&per_thread_radcoef_list)[i]);
+ }
+ MEM_RM(atm->allocator, items);
+ darray_size_t_list_release(&per_thread_tetra_list);
+ darray_radcoef_list_release(&per_thread_radcoef_list);
+ return res;
+error:
+ goto exit;
+}
+
static void
vx_get(const size_t xyz[3], const uint64_t mcode, void* dst, void* context)
{
@@ -581,7 +927,7 @@ static res_T
build_octrees
(struct rnatm* atm,
const struct rnatm_create_args* args,
- struct pool* pools[])
+ struct pool* pool)
{
struct build_octrees_context ctx = BUILD_OCTREES_CONTEXT_NULL;
struct svx_voxel_desc vx_desc = SVX_VOXEL_DESC_NULL;
@@ -589,13 +935,13 @@ build_octrees
double low[3], upp[3];
size_t def[3];
res_T res = RES_OK;
- ASSERT(atm && args && pools);
+ ASSERT(atm && args && pool);
res = svx_device_create(atm->logger, &atm->svx_allocator, atm->verbose, &svx);
if(res != RES_OK) goto error;
/* Setup the build context */
- ctx.pool = pools[0];
+ ctx.pool = pool;
ctx.tau_threshold = args->optical_thickness;
ctx.part = NULL;
@@ -632,27 +978,8 @@ error:
goto exit;
}
-static void
-delete_per_thread_pools(struct rnatm* atm, struct pool* pools[])
-{
- size_t ipool;
- ASSERT(atm && pools);
- FOR_EACH(ipool, 0, atm->nthreads) {
- if(pools[ipool]) pool_ref_put(pools[ipool]);
- }
- MEM_RM(atm->allocator, pools);
-}
-
-static INLINE void
-invalidate_per_thread_pools(struct rnatm* atm, struct pool* pools[])
-{
- size_t ipool;
- ASSERT(atm && pools);
- FOR_EACH(ipool, 0, atm->nthreads) pool_invalidate(pools[ipool]);
-}
-
static res_T
-create_per_thread_pools(struct rnatm* atm, struct pool** out_pools[])
+create_pool(struct rnatm* atm, struct pool** out_pool)
{
/* Empirical constant that defines the number of non empty partitions to
* pre-allocate per thread to avoid contension between the thread building the
@@ -670,46 +997,38 @@ create_per_thread_pools(struct rnatm* atm, struct pool** out_pools[])
const size_t PARTITION_DEFINITION = 8;
struct pool_create_args pool_args = POOL_CREATE_ARGS_DEFAULT;
- struct pool** pools = NULL;
- size_t ipool = 0;
+ struct pool* pool = NULL;
res_T res = RES_OK;
- ASSERT(atm && out_pools);
+ ASSERT(atm && out_pool);
- pools = MEM_CALLOC(atm->allocator, atm->nthreads, sizeof(*pools));
- if(!pools) { res = RES_MEM_ERR; goto error; }
-
- /* Create the per thread partition pools */
+ /* Create the partition pool
+ * TODO add support of multiple spectral item per pool */
pool_args.npartitions = atm->nthreads * OVERALL_NPARTITIONS_PER_THREAD;
pool_args.npreallocated_partitions = atm->nthreads * NPARTITIONS_PER_THREAD;
pool_args.partition_definition = PARTITION_DEFINITION;
pool_args.allocator = atm->allocator;
- FOR_EACH(ipool, 0, atm->nthreads) {
- res = pool_create(&pool_args, pools+ipool);
- if(res != RES_OK) goto error;
- }
+ res = pool_create(&pool_args, &pool);
+ if(res != RES_OK) goto error;
exit:
- *out_pools = pools;
+ *out_pool = pool;
return res;
error:
- log_err(atm, "failed to allocate the per thread partition pool -- %s\n",
+ log_err(atm, "failed to create the partition pool -- %s\n",
res_to_cstr(res));
- if(pools) {
- delete_per_thread_pools(atm, pools);
- pools = NULL;
- }
+ if(pool) { pool_ref_put(pool); pool = NULL; }
goto exit;
}
static res_T
create_octrees(struct rnatm* atm, const struct rnatm_create_args* args)
{
- struct pool** pools = NULL;
+ struct pool* pool = NULL;
ATOMIC res = RES_OK;
ASSERT(atm);
- res = create_per_thread_pools(atm, &pools);
+ res = create_pool(atm, &pool);
if(res != RES_OK) goto error;
log_info(atm, "partitionning of radiative properties (grid definition: %ux%ux%u)\n",
@@ -723,21 +1042,23 @@ create_octrees(struct rnatm* atm, const struct rnatm_create_args* args)
{
#pragma omp section
{
- const res_T res_local = voxelize_atmosphere(atm, pools);
+ const size_t bands[2] = {0,0}; /* TODO handle multiple bands */
+ const res_T res_local = voxelize_atmosphere
+ (atm, bands, pool, 1/*FIXME handle batch_size > 1*/);
if(res_local != RES_OK) {
log_err(atm, "atmosphere voxelization error -- %s\n",
res_to_cstr((res_T)res));
- invalidate_per_thread_pools(atm, pools);
+ pool_invalidate(pool);
ATOMIC_SET(&res, res_local);
}
}
#pragma omp section
{
- const res_T res_local = build_octrees(atm, args, pools);
+ const res_T res_local = build_octrees(atm, args, pool);
if(res_local != RES_OK) {
log_err(atm, "error building octrees -- %s\n", res_to_cstr((res_T)res));
- invalidate_per_thread_pools(atm, pools);
+ pool_invalidate(pool);
ATOMIC_SET(&res, res_local);
}
}
@@ -745,7 +1066,7 @@ create_octrees(struct rnatm* atm, const struct rnatm_create_args* args)
if(res != RES_OK) goto error;
exit:
- if(pools) delete_per_thread_pools(atm, pools);
+ if(pool) pool_ref_put(pool);
return (res_T)res;
error:
goto exit;