commit 15717a07e44b95260b92114d87753a399ace4ee2
parent 25db0cef4cc231bbb80f8288ee97b4e9904f40d5
Author: Vincent Forest <vincent.forest@meso-star.com>
Date: Fri, 23 Jan 2026 12:42:24 +0100
No longer compress lines
The compression ratio was actually quite low (less than 2:1), while
access performance was significantly degraded (between 1 and 2 orders of
magnitude), even for linear access, where the naive implementation of
the cache worked well.
Diffstat:
8 files changed, 61 insertions(+), 616 deletions(-)
diff --git a/Makefile b/Makefile
@@ -33,7 +33,6 @@ all: library tests utils
################################################################################
SRC = \
src/shtr.c \
- src/shtr_cache.c \
src/shtr_isotope_metadata.c \
src/shtr_line_list.c \
src/shtr_param.c
@@ -66,7 +65,6 @@ libshtr.o: $(OBJ)
.config: config.mk
$(PKG_CONFIG) --atleast-version $(RSYS_VERSION) rsys
- $(PKG_CONFIG) --atleast-version $(ZLIB_VERSION) zlib
echo "config done" > $@
.SUFFIXES: .c .d .o
diff --git a/config.mk b/config.mk
@@ -29,10 +29,9 @@ PCFLAGS_STATIC = --static
PCFLAGS = $(PCFLAGS_$(LIB_TYPE))
RSYS_VERSION = 0.14
-ZLIB_VERSION = 1
-INCS = $$($(PKG_CONFIG) $(PCFLAGS) --cflags rsys zlib)
-LIBS = $$($(PKG_CONFIG) $(PCFLAGS) --libs rsys zlib)
+INCS = $$($(PKG_CONFIG) $(PCFLAGS) --cflags rsys)
+LIBS = $$($(PKG_CONFIG) $(PCFLAGS) --libs rsys)
################################################################################
# Compilation options
diff --git a/shtr.pc.in b/shtr.pc.in
@@ -3,10 +3,8 @@ includedir=${prefix}/include
libdir=${prefix}/lib
Requires: rsys >= @RSYS_VERSION@
-Requieres.private: zlib >= @ZLIB_VERSION@
-Name: Star-STL
+Name: Star-HITRAN
Description: Star HITRAN library
Version: @VERSION@
Libs: -L${libdir} -lshtr
-Libs.private: -lm
CFlags: -I${includedir}
diff --git a/src/shtr.h b/src/shtr.h
@@ -44,7 +44,6 @@
#define SHTR_MAX_MOLECULES_COUNT 100
#define SHTR_MAX_ISOTOPES_COUNT 10
-#define SHTR_DEFAULT_COMPRESSION INT_MAX
struct shtr_isotope {
double abundance; /* in ]0, 1] */
@@ -163,9 +162,8 @@ static const struct shtr_create_args SHTR_CREATE_ARGS_DEFAULT =
struct shtr_line_list_load_args {
const char* filename; /* Name of the file to load or of the provided stream */
FILE* file; /* Stream from where data are loaded. NULL <=> load from file */
- int compression_level;
};
-#define SHTR_LINE_LIST_LOAD_ARGS_NULL__ {NULL, NULL, SHTR_DEFAULT_COMPRESSION}
+#define SHTR_LINE_LIST_LOAD_ARGS_NULL__ {NULL, NULL}
static const struct shtr_line_list_load_args SHTR_LINE_LIST_LOAD_ARGS_NULL =
SHTR_LINE_LIST_LOAD_ARGS_NULL__;
diff --git a/src/shtr_cache.c b/src/shtr_cache.c
@@ -1,143 +0,0 @@
-/* Copyright (C) 2022, 2025, 2026 |Méso|Star> (contact@meso-star.com)
- * Copyright (C) 2025, 2026 Université de Lorraine
- * Copyright (C) 2022 Centre National de la Recherche Scientifique
- * Copyright (C) 2022 Université Paul Sabatier
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-#include "shtr_c.h"
-#include "shtr_cache.h"
-
-#include <rsys/cstr.h>
-#include <rsys/mutex.h>
-#include <rsys/ref_count.h>
-
-#include <string.h> /* memcpy */
-
-#define CHUNK_ID_NONE SIZE_MAX
-
-/* Simple, dumb cache structure storing uncompressed lines from the last chunk
- * accessed. It should improve linear access performance, but not random access
- * performance, which will be disastrous because most accesses will require
- * decompressing an entire block of lines, only one of which will be accessed
- * before the block is discarded.
- *
- * TODO: implement a more general LRU cache */
-struct cache {
- size_t chunk_id;
- struct line lines[NLINES_PER_CHUNK];
-
- struct mutex* mutex;
- struct shtr* shtr;
- ref_T ref;
-};
-
-/*******************************************************************************
- * Helper functions
- ******************************************************************************/
-static void
-release_cache(ref_T* ref)
-{
- struct cache* cache = CONTAINER_OF(ref, struct cache, ref);
- struct shtr* shtr = NULL;
- ASSERT(ref);
- shtr = cache->shtr;
- if(cache->mutex) mutex_destroy(cache->mutex);
- MEM_RM(shtr->allocator, cache);
- SHTR(ref_put(shtr));
-}
-
-/*******************************************************************************
- * Local functions
- ******************************************************************************/
-res_T
-cache_create(struct shtr* shtr, struct cache** out_cache)
-{
- struct cache* cache = NULL;
- res_T res = RES_OK;
-
- ASSERT(shtr && out_cache); /* Pre-conditions */
-
- cache = MEM_CALLOC(shtr->allocator, 1, sizeof(*cache));
- if(!cache) { res = RES_MEM_ERR; goto error; }
- ref_init(&cache->ref);
- SHTR(ref_get(shtr));
- cache->shtr = shtr;
- cache->chunk_id = CHUNK_ID_NONE;
-
- cache->mutex = mutex_create();
- if(!cache->mutex) { res = RES_MEM_ERR; goto error; }
-
-exit:
- *out_cache = cache;
- return res;
-error:
- ERROR(shtr, "Error creating line cache -- %s\n", res_to_cstr(res));
- if(cache) { cache_ref_put(cache); cache = NULL; }
- goto exit;
-}
-
-void
-cache_ref_get(struct cache* cache)
-{
- ASSERT(cache);
- ref_get(&cache->ref);
-}
-
-void
-cache_ref_put(struct cache* cache)
-{
- ASSERT(cache);
- ref_put(&cache->ref, release_cache);
-}
-
-res_T
-cache_get_line
- (struct cache* cache,
- const size_t line_id,
- struct line* line)
-{
- const size_t chunk_id = line_id / NLINES_PER_CHUNK;
- const size_t chunk_line_id = line_id % NLINES_PER_CHUNK;
- res_T res = RES_OK;
-
- ASSERT(cache && line);
- ASSERT(chunk_id != CHUNK_ID_NONE && chunk_line_id < NLINES_PER_CHUNK);
-
- mutex_lock(cache->mutex);
- if(cache->chunk_id != chunk_id) {
- res = RES_BAD_ARG;
- } else {
- *line = cache->lines[chunk_line_id];
- }
- mutex_unlock(cache->mutex);
-
- return res;
-}
-
-void
-cache_put_chunk
- (struct cache* cache,
- const size_t chunk_id,
- const struct line lines[NLINES_PER_CHUNK])
-{
- ASSERT(cache && chunk_id != CHUNK_ID_NONE && lines);
-
- mutex_lock(cache->mutex);
- if(cache->chunk_id != chunk_id) {
- cache->chunk_id = chunk_id;
- memcpy(cache->lines, lines, sizeof(struct line)*NLINES_PER_CHUNK);
- }
- mutex_unlock(cache->mutex);
-}
diff --git a/src/shtr_cache.h b/src/shtr_cache.h
@@ -1,54 +0,0 @@
-/* Copyright (C) 2022, 2025, 2026 |Méso|Star> (contact@meso-star.com)
- * Copyright (C) 2025, 2026 Université de Lorraine
- * Copyright (C) 2022 Centre National de la Recherche Scientifique
- * Copyright (C) 2022 Université Paul Sabatier
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-#ifndef SHTR_CACHE_H
-#define SHTR_CACHE_H
-
-#include "shtr_line_list_c.h"
-
-/* Forward declarations */
-struct shtr;
-struct cache;
-
-extern LOCAL_SYM res_T
-cache_create
- (struct shtr* shtr,
- struct cache** cache);
-
-extern LOCAL_SYM void
-cache_ref_get
- (struct cache* cache);
-
-extern LOCAL_SYM void
-cache_ref_put
- (struct cache* cache);
-
-/* Returns RES_BAD_ARG if the line is not in the cache */
-extern LOCAL_SYM res_T
-cache_get_line
- (struct cache* cache,
- const size_t line_id,
- struct line* line);
-
-extern LOCAL_SYM void
-cache_put_chunk
- (struct cache* cache,
- const size_t chunk_id,
- const struct line lines[NLINES_PER_CHUNK]);
-
-#endif /* SHTR_CACHE_H */
diff --git a/src/shtr_line_list.c b/src/shtr_line_list.c
@@ -17,198 +17,14 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include "shtr_c.h"
-#include "shtr_cache.h"
#include "shtr_line_list_c.h"
#include "shtr_param.h"
#include <rsys/cstr.h>
#include <rsys/text_reader.h>
-/* Maximum size of a compressed block, which in the worst case could correspond
- * to the initial block size plus an overhead of 6 bytes, in addition to 5 bytes
- * per 16 KB of uncompressed data (see https://www.zlib.net/zlib_tech.html) */
-#define ZCHUNK_MAX_SIZE (CHUNK_SIZE + 6 + (5*(CHUNK_SIZE+16383/*ceil*/)/16384))
-
-/*******************************************************************************
- * Compression API
- ******************************************************************************/
-struct zctx {
- struct line* lines; /* Uncompressed Lines */
- size_t nlines; /* Number of uncompressed lines */
-
- struct line last_line; /* Last line added. Used to check the order of lines */
-
- char* zlines; /* Compressed lines */
-
- z_stream stream; /* zlib */
- int zlib_is_init;
-
- struct shtr* shtr;
-};
-static const struct zctx ZCTX_NULL = {0};
-
-static voidpf
-zalloc_func(voidpf opaque, uInt items, uInt size)
-{
- ASSERT(opaque);
- return MEM_CALLOC((struct mem_allocator*)opaque, items, size);
-}
-
-static void
-zfree_func(voidpf opaque, voidpf address)
-{
- ASSERT(opaque);
- MEM_RM((struct mem_allocator*)opaque, address);
-}
-
-static void
-zctx_release(struct zctx* zctx)
-{
- ASSERT(zctx);
- if(zctx->lines) MEM_RM(zctx->shtr->allocator, zctx->lines);
- if(zctx->zlines) MEM_RM(zctx->shtr->allocator, zctx->zlines);
- if(zctx->zlib_is_init) deflateEnd(&zctx->stream);
- SHTR(ref_put(zctx->shtr));
-}
-
-static res_T
-zctx_init(struct zctx* zctx, struct shtr* shtr, const int level)
-{
- int ret = Z_OK;
- int z_level = 0;
- res_T res = RES_OK;
- ASSERT(zctx && shtr);
-
- *zctx = ZCTX_NULL;
-
- SHTR(ref_get(shtr));
- zctx->shtr = shtr;
- zctx->nlines = 0;
-
- /* Allocate memory of uncompressed data */
- zctx->lines = MEM_CALLOC
- (zctx->shtr->allocator, NLINES_PER_CHUNK, sizeof(*zctx->lines));
- if(!zctx->lines) { res = RES_MEM_ERR; goto error; }
-
-
- /* Define the zlib compression level */
- if(level == SHTR_DEFAULT_COMPRESSION) {
- z_level = Z_DEFAULT_COMPRESSION;
- } else {
- z_level = CLAMP(level, 0, 9); /* zlib compression level in [0,9] */
- }
-
- if(z_level != 0) {
- /* Allocate memory of compressed data */
- zctx->zlines = MEM_ALLOC(zctx->shtr->allocator, ZCHUNK_MAX_SIZE);
- if(!zctx->zlines) { res = RES_MEM_ERR; goto error; }
-
- /* Initialize zlib */
- zctx->stream.zalloc = zalloc_func;
- zctx->stream.zfree = zfree_func;
- zctx->stream.opaque = zctx->shtr->allocator;
- ret = deflateInit(&zctx->stream, z_level);
- if(ret != Z_OK) { res = RES_UNKNOWN_ERR; goto error; }
- zctx->zlib_is_init = 1;
- }
-
-exit:
- return res;
-error:
- zctx_release(zctx);
- goto exit;
-}
-
-static res_T
-zctx_deflate(struct zctx* zctx, struct shtr_line_list* list)
-{
- struct zchunk zchunk = ZCHUNK_NULL__;
- char* block = NULL;
- size_t sz_total = 0;
- size_t nblocks = 0;
- size_t n = 0;
- int ret = 0;
- res_T res = RES_OK;
-
- ASSERT(zctx && list);
-
- if(!zctx->nlines) goto exit; /* Nothing to do */
-
- if(!zctx->zlib_is_init) { /* Compression is disabled */
- zchunk.size = (uint32_t)(zctx->nlines * sizeof(*zctx->lines));
-
- } else {
- /* Setup input/output for zlib */
- zctx->stream.next_in = (unsigned char*)zctx->lines;
- zctx->stream.avail_in = (uInt)(zctx->nlines * sizeof(*zctx->lines));
- zctx->stream.next_out = (unsigned char*)zctx->zlines;
- zctx->stream.avail_out = ZCHUNK_MAX_SIZE;
-
- /* Compress */
- ret = deflate(&zctx->stream, Z_FINISH);
- if(ret != Z_STREAM_END) { res = RES_UNKNOWN_ERR; goto error; }
-
- CHK(deflateReset(&zctx->stream) == Z_OK);
-
- /* Calculate the size after compression */
- zchunk.size = ZCHUNK_MAX_SIZE - zctx->stream.avail_out;
- }
-
- /* Calculate the total size already allocated for compressed lines */
- nblocks = darray_charp_size_get(&list->blocks);
- sz_total = nblocks * BLOCK_SIZE;
-
- /* Check that the last memory block has enough space to store the compressed
- * chunk */
- n = darray_zchunk_size_get(&list->zchunks);
- if(n) { /* Is there a block? */
- struct zchunk* prev_chunk = &darray_zchunk_data_get(&list->zchunks)[n-1];
- size_t sz_in_use = prev_chunk->offset + prev_chunk->size;
- size_t sz_remain = sz_total - sz_in_use;
-
- if(sz_remain > zchunk.size) {
- zchunk.offset = sz_in_use;
- block = darray_charp_data_get(&list->blocks)[nblocks-1];
- }
- }
-
- /* No memory available. Allocate a new block */
- if(!block) {
- block = MEM_CALLOC(list->shtr->allocator, 1, BLOCK_SIZE);
- if(!block) { res = RES_MEM_ERR; goto error; }
-
- res = darray_charp_push_back(&list->blocks, &block);
- if(res != RES_OK) goto error;
-
- zchunk.offset = sz_total;
- }
-
- /* Register the chunk */
- res = darray_zchunk_push_back(&list->zchunks, &zchunk);
- if(res != RES_OK) goto error;
-
- if(zctx->zlib_is_init) {
- /* Save compressed chunk data */
- memcpy(block + zchunk.offset % BLOCK_SIZE, zctx->zlines, zchunk.size);
- } else {
- /* Save un-compressed chunk data */
- memcpy(block + zchunk.offset % BLOCK_SIZE, zctx->lines, zchunk.size);
- }
-
- /* Update the number of fully recorded lines,
- * i.e., compressed and stored in the list */
- list->nlines += zctx->nlines;
-
- /* No lines waiting for compression. */
- zctx->nlines = 0;
-
-exit:
- return res;
-error:
- ERROR(list->shtr, "Error while compressing lines -- %s\n",
- zctx->stream.msg ? zctx->stream.msg : res_to_cstr(res));
- goto exit;
-}
+/* Maximum number of lines that can be stored in a memory block */
+#define NLINES_PER_BLOCK (BLOCK_SIZE/sizeof(struct line))
/*******************************************************************************
* Helper functions
@@ -242,13 +58,9 @@ create_line_list
ref_init(&list->ref);
SHTR(ref_get(shtr));
list->shtr = shtr;
- darray_zchunk_init(shtr->allocator, &list->zchunks);
darray_charp_init(shtr->allocator, &list->blocks);
list->info = SHTR_LINE_LIST_INFO_NULL;
- res = cache_create(shtr, &list->cache);
- if(res != RES_OK) goto error;
-
exit:
*out_list = list;
return res;
@@ -260,27 +72,15 @@ error:
goto exit;
}
-static res_T
-setup_zlib(struct shtr_line_list* list)
+static INLINE const struct line*
+get_line(const struct shtr_line_list* list, const size_t i)
{
- int ret = Z_OK; /* zlib */
- res_T res = RES_OK;
- ASSERT(list);
-
- list->z_stream.zalloc = zalloc_func;
- list->z_stream.zfree = zfree_func;
- list->z_stream.opaque = list->shtr->allocator;
- ret = inflateInit(&list->z_stream);
- if(ret != Z_OK) { res = RES_UNKNOWN_ERR; goto error; }
+ const size_t iblock = i / NLINES_PER_BLOCK;
+ const size_t iline = i % NLINES_PER_BLOCK;
- list->zlib_is_init = 1;
-
-exit:
- return res;
-error:
- ERROR(list->shtr,
- "Error intializing line decompressor -- %s\n", res_to_cstr(res));
- goto exit;
+ ASSERT(list && i < list->nlines);
+ ASSERT(iblock < darray_charp_size_get(&list->blocks));
+ return (struct line*)(darray_charp_cdata_get(&list->blocks))[iblock] + iline;
}
static void
@@ -359,22 +159,24 @@ static res_T
register_line
(struct shtr_line_list* list,
const struct txtrdr* txtrdr,
- const struct shtr_line* line,
- struct zctx* zctx)
+ const struct shtr_line* line)
{
struct shtr_line ln = SHTR_LINE_NULL;
+ struct line* lines = NULL;
struct line ln_encoded = LINE_NULL;
+ size_t iblock = 0; /* Index of the block in which the line is stored */
+ size_t iline = 0; /* Index of the line in the block */
res_T res = RES_OK;
/* Pre-conditions */
ASSERT(list && txtrdr && line);
- ASSERT(zctx && zctx->nlines < NLINES_PER_CHUNK);
line_encode(line, &ln_encoded);
/* Check if a line has been saved. If so, ensure that the lines are sorted */
- if(darray_zchunk_size_get(&list->zchunks) || zctx->nlines) {
- if(zctx->last_line.wavenumber > ln_encoded.wavenumber) {
+ if(list->nlines) {
+ const struct line* ln_encoded_prev = get_line(list, list->nlines-1);
+ if(ln_encoded_prev->wavenumber > ln_encoded.wavenumber) {
ERROR(list->shtr,
"%s:%lu: lines are not sorted in ascending order wrt their wavenumber.\n",
txtrdr_get_name(txtrdr), txtrdr_get_line_num(txtrdr));
@@ -383,16 +185,24 @@ register_line
}
}
- zctx->last_line = ln_encoded;
- zctx->lines[zctx->nlines] = ln_encoded;
- zctx->nlines += 1;
+ iblock = list->nlines / NLINES_PER_BLOCK;
+ iline = list->nlines % NLINES_PER_BLOCK;
- /* The chunk is full. Compress it */
- if(zctx->nlines == NLINES_PER_CHUNK) {
- res = zctx_deflate(zctx, list);
+ /* Ensure there is sufficient space to store the line */
+ if(iline == 0) {
+ /* There is no more space in the last allocated block. Allocate a new one. */
+ char* block = MEM_CALLOC(list->shtr->allocator, 1, BLOCK_SIZE);
+ if(!block) { res = RES_MEM_ERR; goto error; }
+
+ res = darray_charp_push_back(&list->blocks, &block);
if(res != RES_OK) goto error;
}
+ /* Store the encoded line */
+ lines = (struct line*)darray_charp_data_get(&list->blocks)[iblock];
+ lines[iline] = ln_encoded;
+ ++list->nlines;
+
line_decode(&ln_encoded, &ln);
ASSERT(ln.molecule_id == line->molecule_id);
ASSERT(ln.isotope_id_local == line->isotope_id_local);
@@ -528,7 +338,6 @@ load_stream
const struct shtr_line_list_load_args* args,
struct shtr_line_list** out_lines)
{
- struct zctx zctx = ZCTX_NULL;
struct shtr_line_list* list = NULL;
struct txtrdr* txtrdr = NULL;
const char* name = NULL;
@@ -545,14 +354,6 @@ load_stream
res = create_line_list(shtr, &list);
if(res != RES_OK) goto error;
- if(args->compression_level > 0) {
- res = setup_zlib(list);
- if(res != RES_OK) goto error;
- }
-
- res = zctx_init(&zctx, shtr, args->compression_level);
- if(res != RES_OK) goto error;
-
res = txtrdr_stream(list->shtr->allocator, stream, name,
0/*No comment char*/, &txtrdr);
if(res != RES_OK) {
@@ -576,17 +377,12 @@ load_stream
res = parse_line(list, txtrdr, &ln);
if(res != RES_OK) goto error;
- res = register_line(list, txtrdr, &ln, &zctx);
+ res = register_line(list, txtrdr, &ln);
if(res != RES_OK) goto error;
}
- /* Ensure that remaining lines are compressed and stored */
- res = zctx_deflate(&zctx, list);
- if(res != RES_OK) goto error;
-
exit:
if(txtrdr) txtrdr_ref_put(txtrdr);
- zctx_release(&zctx);
*out_lines = list;
return res;
error:
@@ -597,54 +393,6 @@ error:
goto exit;
}
-static res_T
-decompress_zchunk
- (struct shtr_line_list* list,
- const size_t chunk_id,
- struct line lines[NLINES_PER_CHUNK])
-{
- const struct zchunk* zchunk = NULL;
- char* block = NULL;
- size_t block_id = 0;
- size_t block_offset = 0;
- int ret = Z_OK; /* zlib */
- res_T res = RES_OK;
-
- ASSERT(list && lines && chunk_id < darray_zchunk_size_get(&list->zchunks));
-
- zchunk = darray_zchunk_cdata_get(&list->zchunks) + chunk_id;
- block_id = zchunk->offset / BLOCK_SIZE;
- block_offset = zchunk->offset % BLOCK_SIZE;
-
- block = darray_charp_cdata_get(&list->blocks)[block_id];
-
- if(!list->zlib_is_init) {
- /* Data are not compressed */
- memcpy(lines, block+block_offset, zchunk->size);
-
- } else {
- list->z_stream.next_in = (unsigned char*)(block + block_offset);
- list->z_stream.avail_in = (uInt)zchunk->size;
- list->z_stream.next_out = (unsigned char*)lines;
- list->z_stream.avail_out = (uInt)(sizeof(struct line)*NLINES_PER_CHUNK);
- ret = inflate(&list->z_stream, Z_FINISH);
- if(ret != Z_STREAM_END) {
- ASSERT(list->z_stream.msg);
- ERROR(list->shtr, "Error decompressing the chunk of lines -- %s\n",
- list->z_stream.msg);
- res = RES_UNKNOWN_ERR;
- goto error;
- }
-
- CHK(inflateReset(&list->z_stream) == Z_OK);
- }
-
-exit:
- return res;
-error:
- goto exit;
-}
-
static void
release_lines(ref_T * ref)
{
@@ -657,14 +405,10 @@ release_lines(ref_T * ref)
shtr = list->shtr;
- if(list->cache) cache_ref_put(list->cache);
- if(list->zlib_is_init) inflateEnd(&list->z_stream);
-
n = darray_charp_size_get(&list->blocks);
blocks = darray_charp_data_get(&list->blocks);
FOR_EACH(i, 0, n) { if(blocks[i]) MEM_RM(shtr->allocator, blocks[i]); }
- darray_zchunk_release(&list->zchunks);
darray_charp_release(&list->blocks);
MEM_RM(shtr->allocator, list);
SHTR(ref_put(shtr));
@@ -715,9 +459,9 @@ shtr_line_list_create_from_stream
struct shtr_line_list** out_list)
{
struct shtr_line_list* list = NULL;
+ size_t nblocks = 0;
char** blocks = NULL;
- size_t i=0, n=0;
- int is_compression_enabled = 0;
+ size_t i = 0;
int version = 0;
res_T res = RES_OK;
@@ -755,20 +499,18 @@ shtr_line_list_create_from_stream
}
READ(&list->nlines, 1);
- READ(&is_compression_enabled, 1);
-
- /* Memory descriptor of compressed chunks */
- READ(&n, 1);
- if((res = darray_zchunk_resize(&list->zchunks, n)) != RES_OK) goto error;
- READ(darray_zchunk_data_get(&list->zchunks), n);
+ nblocks = (list->nlines + (NLINES_PER_BLOCK-1)/*ceil*/) / NLINES_PER_BLOCK;
- /* Compressed data stored in memory blocks */
- READ(&n, 1);
- if((res = darray_charp_resize(&list->blocks, n)) != RES_OK) goto error;
+ /* Line stored in memory blocks */
+ if((res = darray_charp_resize(&list->blocks, nblocks)) != RES_OK) goto error;
blocks = darray_charp_data_get(&list->blocks);
- FOR_EACH(i, 0, n) {
+ FOR_EACH(i, 0, nblocks) {
blocks[i] = MEM_ALLOC(list->shtr->allocator, BLOCK_SIZE);
- if(!blocks[i]) { res = RES_MEM_ERR; goto error; }
+ if(!blocks[i]) {
+ ERROR(shtr, "%s: error allocating memory block\n", FUNC_NAME);
+ res = RES_MEM_ERR;
+ goto error;
+ }
READ(blocks[i], BLOCK_SIZE);
}
@@ -777,11 +519,6 @@ shtr_line_list_create_from_stream
#undef READ
- if(is_compression_enabled) {
- res = setup_zlib(list);
- if(res != RES_OK) goto error;
- }
-
exit:
if(out_list) *out_list = list;
return res;
@@ -822,30 +559,12 @@ shtr_line_list_at
const size_t i,
struct shtr_line* line)
{
- struct line ln_encoded = LINE_NULL;
- res_T res = RES_OK;
+ const struct line* ln_encoded = NULL;
if(!list || !line || i >= list->nlines) return RES_BAD_ARG;
-
- res = cache_get_line(list->cache, i, &ln_encoded);
-
- if(res != RES_OK) { /* Cache miss */
- const size_t chunk_id = i / NLINES_PER_CHUNK;
- const size_t line_id = i % NLINES_PER_CHUNK;
- struct line lines[NLINES_PER_CHUNK];
-
- if((res = decompress_zchunk(list, chunk_id, lines)) != RES_OK) goto error;
- cache_put_chunk(list->cache, chunk_id, lines);
-
- ln_encoded = lines[line_id];
- }
-
- line_decode(&ln_encoded, line);
-
-exit:
- return res;
-error:
- goto exit;
+ ln_encoded = get_line(list, i);
+ line_decode(ln_encoded, line);
+ return RES_OK;
}
res_T
@@ -874,18 +593,9 @@ shtr_line_list_write
/* Number of lines in the list */
WRITE(&list->nlines, 1);
- /* Is decompression enabled */
- WRITE(&list->zlib_is_init, 1);
-
- /* Memory descriptor of compressed chunks */
- n = darray_zchunk_size_get(&list->zchunks);
- WRITE(&n, 1);
- WRITE(darray_zchunk_cdata_get(&list->zchunks), n);
-
- /* Compressed data stored in memory blocks */
+ /* Lines stored in memory blocks. */
blocks = darray_charp_cdata_get(&list->blocks);
n = darray_charp_size_get(&list->blocks);
- WRITE(&n, 1);
FOR_EACH(i, 0, n) { WRITE(blocks[i], BLOCK_SIZE); }
/* Informations on line parameters */
diff --git a/src/shtr_line_list_c.h b/src/shtr_line_list_c.h
@@ -24,8 +24,6 @@
#include <rsys/dynamic_array.h>
#include <rsys/ref_count.h>
-#include <zlib.h>
-
/*
* Brief summary of the design
*
@@ -33,65 +31,18 @@
* reduce the memory footprint. Several line parameters are therefore encoded
* with reduced precision (see “struct line”).
*
- * In addition, lines are first stored in a chunk of CHUNK_SIZE bytes which,
- * once filled, is compressed using zlib. The compressed data is then stored in
- * a fixed-size memory block. Each chunk can be accessed and decompressed
- * separately. The memory descriptors for the compressed chunks, i.e., the data
- * defining the location where the compressed data is stored in the memory
- * blocks (see “struct zchunk”), are stored in a dynamic array in the order in
- * which the lines are loaded. Similarly, the memory blocks that contain the
- * compressed data are also listed in a dynamic array in the order in which the
- * lines are read. Therefore, the index of a line is sufficient to index the
- * chunk in which the line is stored, and finally the memory block in which its
- * parameters are compressed.
- *
- * The use of dynamic arrays allows this access by simple indexing, but at the
- * cost of memory overhead due to the dynamic array allocation policy (up to
- * twice the required size in the worst case). However, the memory space in
- * question here is not a major problem, since the zchunk structure and the
- * pointer to a memory block take up very little space. And while linked lists
- * could have been used instead, they would not only have complicated data
- * access, with pointers replacing indexing, but they would also have
- * complicated data [de]serialization, precisely because of the use of pointers
- * instead of indexes.
- *
- * Note that the use of memory blocks stored in dynamic arrays rather than a
- * simple dynamic array of contiguous bytes is motivated by the issue of
- * additional memory overhead associated with the use of dynamic arrays. As said
- * above, on the worst case, the memory overhead here is equal to twice the
- * number of blocks multiplied by the size of a pointer, compared to twice the
- * size required to store all the lines.
- *
- * A cache is ultimately used to speed up access to lines, which must now be
- * decompressed. This cache stores the decompressed blocks in which the most
- * recently accessed lines are stored. The implementation of this cache is
- * independent of the line storage. However, it must be thread-safe to allow
- * simultaneous access.
+ * Lines are saved in memory blocks of BLOCK_SIZE, stored in a dynamic arrayf.
+ * Using a dynamic array of memory blocks rather than a simple dynamic array of
+ * contiguous bytes is motivated by the issue of additional memory overhead
+ * associated with the use of dynamic arrays for which the overall number of
+ * entries is unknown. On the worst case, the memory overhead here is equal to
+ * twice the number of blocks multiplied by the size of a pointer, compared to
+ * twice the size required to store all the lines.
*/
/* Size in bytes of a memory block in which compressed data is stored */
#define BLOCK_SIZE (1024*1024)
-/* Size in bytes of an uncompressed chunk */
-#define CHUNK_SIZE (64*1024)
-
-/* Number of lines in a chunk */
-#define NLINES_PER_CHUNK (CHUNK_SIZE/sizeof(struct line))
-
-/* Memory descriptor of a compressed chunk */
-struct zchunk {
- /* Offset to chunk data. The offset is indicated as if the compressed data
- * were stored sequentially. However, the data is stored in memory blocks of
- * fixed size. The offset therefore defines both the block index
- * (offset/BLOCK_SIZE) and the offset within the block (offset%BLOCK_SIZE) */
- size_t offset;
-
- /* Size in bytes of the compressed chunk */
- uint32_t size;
-};
-#define ZCHUNK_NULL__ {0}
-static const struct zchunk ZCHUNK_NULL = ZCHUNK_NULL__;
-
struct line {
double wavenumber; /* Central wavenumber in vacuum [cm^-1] */
double intensity; /* Reference intensity [cm^-1/(molec.cm^2)] */
@@ -122,12 +73,7 @@ static const struct line LINE_NULL = LINE_NULL__;
STATIC_ASSERT(sizeof(struct line)==32, Unexpected_sizeof_struct_line);
-/* Generate the dynamic array of zchunk */
-#define DARRAY_NAME zchunk
-#define DARRAY_DATA struct zchunk
-#include <rsys/dynamic_array.h>
-
-/* Generate he dynamic array of char* */
+/* Generate the dynamic array of char*, the dynamic array of memory blocks */
#define DARRAY_NAME charp
#define DARRAY_DATA char*
#include <rsys/dynamic_array.h>
@@ -140,20 +86,13 @@ static const int SHTR_LINE_LIST_VERSION = 2;
struct cache;
struct shtr_line_list {
- /* Compressed lines sorted in ascending order wrt their wavenumber */
- struct darray_zchunk zchunks; /* Accessor to compressed lines */
+ /* Lines sorted in ascending order wrt their wavenumber */
struct darray_charp blocks; /* Memory where compressed lines are stored */
- size_t nlines; /* Number of lines */
+ size_t nlines; /* Overall number of lines */
/* Informations on line parameters */
struct shtr_line_list_info info;
- struct cache* cache;
-
- /* zlib */
- z_stream z_stream;
- int zlib_is_init;
-
struct shtr* shtr;
ref_T ref;
};