star-hitran

Load line-by-line data from the HITRAN database
git clone git://git.meso-star.fr/star-hitran.git
Log | Files | Refs | README | LICENSE

shtr_line_list.c (18856B)


      1 /* Copyright (C) 2022, 2025, 2026 |Méso|Star> (contact@meso-star.com)
      2  * Copyright (C) 2025, 2026 Université de Lorraine
      3  * Copyright (C) 2022 Centre National de la Recherche Scientifique
      4  * Copyright (C) 2022 Université Paul Sabatier
      5  *
      6  * This program is free software: you can redistribute it and/or modify
      7  * it under the terms of the GNU General Public License as published by
      8  * the Free Software Foundation, either version 3 of the License, or
      9  * (at your option) any later version.
     10  *
     11  * This program is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     14  * GNU General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU General Public License
     17  * along with this program. If not, see <http://www.gnu.org/licenses/>. */
     18 
     19 #include "shtr_c.h"
     20 #include "shtr_line_list_c.h"
     21 #include "shtr_param.h"
     22 
     23 #include <rsys/cstr.h>
     24 #include <rsys/text_reader.h>
     25 
     26 /* Maximum number of lines that can be stored in a memory block */
     27 #define NLINES_PER_BLOCK (BLOCK_SIZE/sizeof(struct line))
     28 
     29 /*******************************************************************************
     30  * Helper functions
     31  ******************************************************************************/
     32 static res_T
     33 check_shtr_line_list_load_args(const struct shtr_line_list_load_args* args)
     34 {
     35   if(!args) return RES_BAD_ARG;
     36 
     37   /* Source is missing */
     38   if(!args->file && !args->filename) return RES_BAD_ARG;
     39 
     40   return RES_OK;
     41 }
     42 
     43 static res_T
     44 create_line_list
     45   (struct shtr* shtr,
     46    struct shtr_line_list** out_list)
     47 {
     48   struct shtr_line_list* list = NULL;
     49   res_T res = RES_OK;
     50   ASSERT(shtr && out_list);
     51 
     52   list = MEM_CALLOC(shtr->allocator, 1, sizeof(*list));
     53   if(!list) {
     54     ERROR(shtr, "Could not allocate the list of lines.\n");
     55     res = RES_MEM_ERR;
     56     goto error;
     57   }
     58   ref_init(&list->ref);
     59   SHTR(ref_get(shtr));
     60   list->shtr = shtr;
     61   darray_charp_init(shtr->allocator, &list->blocks);
     62   list->info = SHTR_LINE_LIST_INFO_NULL;
     63 
     64 exit:
     65   *out_list = list;
     66   return res;
     67 error:
     68   if(list) {
     69     SHTR(line_list_ref_put(list));
     70     list = NULL;
     71   }
     72   goto exit;
     73 }
     74 
     75 static INLINE const struct line*
     76 get_line(const struct shtr_line_list* list, const size_t i)
     77 {
     78   const size_t iblock = i / NLINES_PER_BLOCK;
     79   const size_t iline  = i % NLINES_PER_BLOCK;
     80 
     81   ASSERT(list && i < list->nlines);
     82   ASSERT(iblock < darray_charp_size_get(&list->blocks));
     83   return (struct line*)(darray_charp_cdata_get(&list->blocks))[iblock] + iline;
     84 }
     85 
     86 static void
     87 line_encode(const struct shtr_line* line, struct line* line_encoded)
     88 {
     89   union { float flt; int32_t i32; } ucast;
     90   ASSERT(line && line_encoded);
     91 
     92   /* Keep the wavenumber and the intensity as it */
     93   line_encoded->wavenumber = line->wavenumber;
     94   line_encoded->intensity = line->intensity;
     95 
     96   /* Encode the lower state energy and delta air in single precision  */
     97   line_encoded->lower_state_energy = (float)line->lower_state_energy;
     98   line_encoded->delta_air = (float)line->delta_air;
     99 
    100   /* Store gamma_air as an unsigned fixed-point number (0:14), i.e., 0 bit for
    101    * the integer part and 14 bits for the fractional part */
    102   ASSERT((int64_t)line->gamma_air == 0 && line->gamma_air >= 0);
    103   line_encoded->gair14_gself14_isoid4 =
    104     ((int32_t)float2fix(line->gamma_air, 0, 14) & (BIT_I32(14)-1)) << 18;
    105 
    106   /* Store gamma_self as an unsigned fixed-point number (0:14), i.e., 0 bit for
    107    * the integer part and 14 bits for the fractional part */
    108   ASSERT((int64_t)line->gamma_self == 0 && line->gamma_self >= 0);
    109   line_encoded->gair14_gself14_isoid4 |=
    110     ((int32_t)float2fix(line->gamma_self, 0, 14) & (BIT_I32(14)-1)) << 4;
    111 
    112   /* Store the isotope id on 4 bits */
    113   ASSERT(line->isotope_id_local < 16);
    114   line_encoded->gair14_gself14_isoid4 |= line->isotope_id_local & (BIT_I32(4)-1);
    115 
    116   /* Encode n_air in single precision with its last 7 bits of matissa disabled
    117    * in order to store the molecule identifier */
    118   ucast.flt = (float)line->n_air;
    119   ucast.i32 &= ~(BIT_I32(7)-1);
    120 
    121   /* Store the molecule id on 7 bits */
    122   ASSERT(line->molecule_id < 128);
    123   ucast.i32 |= (int32_t)line->molecule_id & (BIT_I32(7)-1);
    124   line_encoded->nair25_molid7 = ucast.i32;
    125 }
    126 
    127 static void
    128 line_decode(const struct line* line_encoded, struct shtr_line* line)
    129 {
    130   union { float flt; int32_t i32; } ucast;
    131   int64_t i64 = 0;
    132   ASSERT(line && line_encoded);
    133 
    134   line->wavenumber = line_encoded->wavenumber;
    135   line->intensity = line_encoded->intensity;
    136   line->lower_state_energy = line_encoded->lower_state_energy;
    137   line->delta_air = line_encoded->delta_air;
    138 
    139   /* Convert gamma_air and gamma_self from fixed-point numbers (0:14) to
    140    * double-precision floating-point numbers */
    141   i64 = (line_encoded->gair14_gself14_isoid4 >> 18) & (BIT_I32(14)-1);
    142   line->gamma_air = fix2float(i64, 0, 14);
    143   i64 = (line_encoded->gair14_gself14_isoid4 >> 4) & (BIT_I32(14)-1);
    144   line->gamma_self = fix2float(i64, 0, 14);
    145 
    146   /* Unpack the isotope ID */
    147   line->isotope_id_local = line_encoded->gair14_gself14_isoid4 & (BIT_I32(4)-1);
    148 
    149   /* Unpack the molecule ID */
    150   ucast.i32 = line_encoded->nair25_molid7;
    151   line->molecule_id = ucast.i32 & (BIT_I32(7)-1);
    152   ucast.i32 &= ~(BIT_I32(7)-1);
    153 
    154   /* Convert n_air from single precision to double precision */
    155   line->n_air = ucast.flt;
    156 }
    157 
    158 static res_T
    159 register_line
    160   (struct shtr_line_list* list,
    161    const struct txtrdr* txtrdr,
    162    const struct shtr_line* line)
    163 {
    164   struct shtr_line ln = SHTR_LINE_NULL;
    165   struct line* lines = NULL;
    166   struct line ln_encoded = LINE_NULL;
    167   size_t iblock = 0; /* Index of the block in which the line is stored */
    168   size_t iline  = 0; /* Index of the line in the block */
    169   res_T res = RES_OK;
    170 
    171   /* Pre-conditions */
    172   ASSERT(list && txtrdr && line);
    173 
    174   line_encode(line, &ln_encoded);
    175 
    176   /* Check if a line has been saved. If so, ensure that the lines are sorted */
    177   if(list->nlines) {
    178     const struct line* ln_encoded_prev = get_line(list, list->nlines-1);
    179     if(ln_encoded_prev->wavenumber > ln_encoded.wavenumber) {
    180       ERROR(list->shtr,
    181         "%s:%lu: lines are not sorted in ascending order wrt their wavenumber.\n",
    182         txtrdr_get_name(txtrdr), txtrdr_get_line_num(txtrdr));
    183       res = RES_BAD_ARG;
    184       goto error;
    185     }
    186   }
    187 
    188   iblock = list->nlines / NLINES_PER_BLOCK;
    189   iline  = list->nlines % NLINES_PER_BLOCK;
    190 
    191   /* Ensure there is sufficient space to store the line */
    192   if(iline == 0) {
    193     /* There is no more space in the last allocated block. Allocate a new one. */
    194     char* block = MEM_CALLOC(list->shtr->allocator, 1, BLOCK_SIZE);
    195     if(!block) { res = RES_MEM_ERR; goto error; }
    196 
    197     res = darray_charp_push_back(&list->blocks, &block);
    198     if(res != RES_OK) goto error;
    199   }
    200 
    201   /* Store the encoded line */
    202   lines = (struct line*)darray_charp_data_get(&list->blocks)[iblock];
    203   lines[iline] = ln_encoded;
    204   ++list->nlines;
    205 
    206   line_decode(&ln_encoded, &ln);
    207   ASSERT(ln.molecule_id == line->molecule_id);
    208   ASSERT(ln.isotope_id_local == line->isotope_id_local);
    209 
    210   #define UPDATE_INFO(Name) { \
    211     const double err = fabs(line->Name - ln.Name); \
    212     list->info.Name.range[0] = MMIN(list->info.Name.range[0], ln.Name); \
    213     list->info.Name.range[1] = MMAX(list->info.Name.range[1], ln.Name); \
    214     list->info.Name.err = MMAX(list->info.Name.err, err); \
    215   } (void) 0
    216   UPDATE_INFO(wavenumber);
    217   UPDATE_INFO(intensity);
    218   UPDATE_INFO(gamma_air);
    219   UPDATE_INFO(gamma_self);
    220   UPDATE_INFO(lower_state_energy);
    221   UPDATE_INFO(n_air);
    222   UPDATE_INFO(delta_air);
    223   #undef UPDATE_INFO
    224 
    225 exit:
    226   return res;
    227 error:
    228   goto exit;
    229 }
    230 
    231 static res_T
    232 parse_line
    233   (struct shtr_line_list* list,
    234    struct txtrdr* txtrdr,
    235    struct shtr_line* ln)
    236 {
    237   struct param_desc param = PARAM_DESC_NULL;
    238   struct shtr* shtr = NULL;
    239   char* line = NULL;
    240   char* str = NULL;
    241   char* end = NULL;
    242   char backup;
    243   int molecule_id;
    244   int isotope_id_local;
    245   res_T res = RES_OK;
    246 
    247   ASSERT(list && txtrdr && ln);
    248 
    249   line = txtrdr_get_line(txtrdr);
    250   ASSERT(line);
    251 
    252   shtr = list->shtr;
    253   param.path = txtrdr_get_name(txtrdr);
    254   param.line = txtrdr_get_line_num(txtrdr);
    255 
    256   str = end = line;
    257   backup = str[0];
    258   #define NEXT(Size) {                                                         \
    259     *end = backup;                                                             \
    260     str = end;                                                                 \
    261     end = str+(Size);                                                          \
    262     backup = *end;                                                             \
    263     *end = '\0';                                                               \
    264   } (void)0
    265   #define PARSE(Var, Size, Type, Name, Low, Upp, LowIncl, UppIncl) {           \
    266     NEXT(Size);                                                                \
    267     param.name = (Name);                                                       \
    268     param.low = (Low);                                                         \
    269     param.upp = (Upp);                                                         \
    270     param.is_low_incl = (LowIncl);                                             \
    271     param.is_upp_incl = (UppIncl);                                             \
    272     res = parse_param_##Type(shtr, str, &param, Var);                          \
    273     if(res != RES_OK) goto error;                                              \
    274   } (void)0
    275 
    276   PARSE(&molecule_id, 2, int, "molecule identifier", 0,99,1,1);
    277   ln->molecule_id = (int32_t)molecule_id;
    278 
    279   PARSE(&isotope_id_local, 1, int, "isotope local identifier", 0,9,1,1);
    280   ln->isotope_id_local = (int32_t)
    281     (isotope_id_local == 0 ? 9 : (isotope_id_local - 1));
    282 
    283   PARSE(&ln->wavenumber, 12, double, "central wavenumber", 0,INF,0,1);
    284   PARSE(&ln->intensity, 10, double, "reference intensity", 0,INF,0,1);
    285 
    286   NEXT(10); /* Skip the Enstein coef */
    287 
    288   PARSE(&ln->gamma_air, 5, double, "air broadening half-width", 0,INF,1,1);
    289   PARSE(&ln->gamma_self, 5, double, "self broadening half-width", 0,INF,1,1);
    290 
    291   /* Handle unavailable lower state energy */
    292   PARSE(&ln->lower_state_energy, 10, double, "lower state energy",-INF,INF,1,1);
    293   if(ln->lower_state_energy == -1) {
    294     WARN(shtr,
    295       "%s:%lu: the lower state energy is unavailable for this line, so it is "
    296       "ignored.\n", txtrdr_get_name(txtrdr), txtrdr_get_line_num(txtrdr));
    297     goto exit; /* Skip the line */
    298   }
    299   /* Check the domain validity */
    300   if(ln->lower_state_energy < 0) {
    301     ERROR(shtr,
    302       "%s:%lu: invalid lower state energy %g. It must be in [0, INF].\n",
    303       txtrdr_get_name(txtrdr), txtrdr_get_line_num(txtrdr),
    304       ln->lower_state_energy);
    305     res = RES_BAD_ARG;
    306     goto error;
    307   }
    308 
    309   PARSE(&ln->n_air, 4, double, "temperature-dependent exponent",-INF,INF,1,1);
    310   PARSE(&ln->delta_air, 8, double, "air-pressure wavenumber shift", -INF,INF,1,1);
    311 
    312   /* Skip the remaining values */
    313 
    314   #undef NEXT
    315   #undef PARSE
    316 
    317   /* Check the size of the remaining data to ensure that there is at least the
    318    * expected number of bytes wrt the HITRAN fileformat */
    319   *end = backup;
    320   str = end;
    321   if(strlen(str) != 93) {
    322     ERROR(list->shtr, "%s:%lu: missing data after delta air.\n",
    323       param.path, (unsigned long)param.line);
    324     res = RES_BAD_ARG;
    325     goto error;
    326   }
    327 
    328 exit:
    329   return res;
    330 error:
    331   goto exit;
    332 }
    333 
    334 static res_T
    335 load_stream
    336   (struct shtr* shtr,
    337    FILE* stream,
    338    const struct shtr_line_list_load_args* args,
    339    struct shtr_line_list** out_lines)
    340 {
    341   struct shtr_line_list* list = NULL;
    342   struct txtrdr* txtrdr = NULL;
    343   const char* name = NULL;
    344   res_T res = RES_OK;
    345 
    346   ASSERT(shtr && stream && args && out_lines);
    347 
    348   if(args->file) { /* Load from stream */
    349     name = args->filename ? args->filename : "<stream>";
    350   } else {
    351     name = args->filename;
    352   }
    353 
    354   res = create_line_list(shtr,  &list);
    355   if(res != RES_OK) goto error;
    356 
    357   res = txtrdr_stream(list->shtr->allocator, stream, name,
    358     0/*No comment char*/, &txtrdr);
    359   if(res != RES_OK) {
    360     ERROR(shtr, "%s: error creating the text reader -- %s.\n",
    361       name, res_to_cstr(res));
    362     goto error;
    363   }
    364 
    365   for(;;) {
    366     struct shtr_line ln = SHTR_LINE_NULL;
    367 
    368     res = txtrdr_read_line(txtrdr);
    369     if(res != RES_OK) {
    370       ERROR(shtr, "%s: error reading the line `%lu' -- %s.\n",
    371         name, (unsigned long)txtrdr_get_line_num(txtrdr), res_to_cstr(res));
    372       goto error;
    373     }
    374 
    375     if(!txtrdr_get_cline(txtrdr)) break; /* No more parsed line */
    376 
    377     res = parse_line(list, txtrdr, &ln);
    378     if(res != RES_OK) goto error;
    379 
    380     res = register_line(list, txtrdr, &ln);
    381     if(res != RES_OK) goto error;
    382   }
    383 
    384 exit:
    385   if(txtrdr) txtrdr_ref_put(txtrdr);
    386   *out_lines = list;
    387   return res;
    388 error:
    389   if(list) {
    390     SHTR(line_list_ref_put(list));
    391     list = NULL;
    392   }
    393   goto exit;
    394 }
    395 
    396 static void
    397 release_lines(ref_T * ref)
    398 {
    399   struct shtr* shtr = NULL;
    400   struct shtr_line_list* list = CONTAINER_OF(ref, struct shtr_line_list, ref);
    401   char** blocks = NULL;
    402   size_t i=0, n=0;
    403 
    404   ASSERT(ref);
    405 
    406   shtr = list->shtr;
    407 
    408   n = darray_charp_size_get(&list->blocks);
    409   blocks = darray_charp_data_get(&list->blocks);
    410   FOR_EACH(i, 0, n) { if(blocks[i]) MEM_RM(shtr->allocator, blocks[i]); }
    411 
    412   darray_charp_release(&list->blocks);
    413   MEM_RM(shtr->allocator, list);
    414   SHTR(ref_put(shtr));
    415 }
    416 
    417 /*******************************************************************************
    418  * Exported functions
    419  ******************************************************************************/
    420 res_T
    421 shtr_line_list_load
    422   (struct shtr* shtr,
    423    const struct shtr_line_list_load_args* args,
    424    struct shtr_line_list** list)
    425 {
    426   FILE* file = NULL;
    427   res_T res = RES_OK;
    428 
    429   if(!shtr || !list) { res = RES_BAD_ARG; goto error; }
    430   res = check_shtr_line_list_load_args(args);
    431   if(res != RES_OK) goto error;
    432 
    433   if(args->file) { /* Load from stream */
    434     file = args->file;
    435 
    436   } else { /* Load from file */
    437     file = fopen(args->filename, "r");
    438     if(!file) {
    439       ERROR(shtr, "%s: error opening file `%s'.\n", FUNC_NAME, args->filename);
    440       res = RES_IO_ERR;
    441       goto error;
    442     }
    443   }
    444 
    445   res = load_stream(shtr, file, args, list);
    446   if(res != RES_OK) goto error;
    447 
    448 exit:
    449   if(file && file != args->file) fclose(file);
    450   return res;
    451 error:
    452   goto exit;
    453 }
    454 
    455 res_T
    456 shtr_line_list_create_from_stream
    457   (struct shtr* shtr,
    458    FILE* stream,
    459    struct shtr_line_list** out_list)
    460 {
    461   struct shtr_line_list* list = NULL;
    462   size_t nblocks = 0;
    463   char** blocks = NULL;
    464   size_t i = 0;
    465   int version = 0;
    466   res_T res = RES_OK;
    467 
    468   if(!shtr || !out_list || !stream) {
    469     res = RES_BAD_ARG;
    470     goto error;
    471   }
    472 
    473   res = create_line_list(shtr, &list);
    474   if(res != RES_OK) goto error;
    475 
    476   #define READ(Var, Nb) {                                                      \
    477     if(fread((Var), sizeof(*(Var)), (Nb), stream) != (Nb)) {                   \
    478       if(feof(stream)) {                                                       \
    479         res = RES_BAD_ARG;                                                     \
    480       } else if(ferror(stream)) {                                              \
    481         res = RES_IO_ERR;                                                      \
    482       } else {                                                                 \
    483         res = RES_UNKNOWN_ERR;                                                 \
    484       }                                                                        \
    485       ERROR(shtr,                                                              \
    486         "%s: error reading line list -- %s.\n", FUNC_NAME, res_to_cstr(res));  \
    487       goto error;                                                              \
    488     }                                                                          \
    489   } (void)0
    490 
    491   READ(&version, 1);
    492   if(version != SHTR_LINE_LIST_VERSION) {
    493     ERROR(shtr,
    494       "%s: unexpected line list version %d. "
    495       "Expecting a line list in version %d.\n",
    496       FUNC_NAME, version, SHTR_LINE_LIST_VERSION);
    497     res = RES_BAD_ARG;
    498     goto error;
    499   }
    500 
    501   READ(&list->nlines, 1);
    502   nblocks = (list->nlines + (NLINES_PER_BLOCK-1)/*ceil*/) / NLINES_PER_BLOCK;
    503 
    504   /* Line stored in memory blocks */
    505   if((res = darray_charp_resize(&list->blocks, nblocks)) != RES_OK) goto error;
    506   blocks = darray_charp_data_get(&list->blocks);
    507   FOR_EACH(i, 0, nblocks) {
    508     blocks[i] = MEM_ALLOC(list->shtr->allocator, BLOCK_SIZE);
    509     if(!blocks[i]) {
    510       ERROR(shtr, "%s: error allocating memory block\n", FUNC_NAME);
    511       res = RES_MEM_ERR;
    512       goto error;
    513     }
    514     READ(blocks[i], BLOCK_SIZE);
    515   }
    516 
    517   /* Informations on line parameters */
    518   READ(&list->info, 1);
    519 
    520   #undef READ
    521 
    522 exit:
    523   if(out_list) *out_list = list;
    524   return res;
    525 error:
    526   if(list) { SHTR(line_list_ref_put(list)); list = NULL; }
    527   goto exit;
    528 }
    529 
    530 res_T
    531 shtr_line_list_ref_get(struct shtr_line_list* list)
    532 {
    533   if(!list) return RES_BAD_ARG;
    534   ref_get(&list->ref);
    535   return RES_OK;
    536 }
    537 
    538 res_T
    539 shtr_line_list_ref_put(struct shtr_line_list* list)
    540 {
    541   if(!list) return RES_BAD_ARG;
    542   ref_put(&list->ref, release_lines);
    543   return RES_OK;
    544 }
    545 
    546 res_T
    547 shtr_line_list_get_size
    548   (const struct shtr_line_list* list,
    549    size_t* nlines)
    550 {
    551   if(!list || !nlines) return RES_BAD_ARG;
    552   *nlines = list->nlines;
    553   return RES_OK;
    554 }
    555 
    556 res_T
    557 shtr_line_list_at
    558   (struct shtr_line_list* list,
    559    const size_t i,
    560    struct shtr_line* line)
    561 {
    562   const struct line* ln_encoded = NULL;
    563 
    564   if(!list || !line || i >= list->nlines) return RES_BAD_ARG;
    565   ln_encoded = get_line(list, i);
    566   line_decode(ln_encoded, line);
    567   return RES_OK;
    568 }
    569 
    570 res_T
    571 shtr_line_list_write
    572   (const struct shtr_line_list* list,
    573    FILE* stream)
    574 {
    575   char* const* blocks = NULL;
    576   size_t i=0, n=0;
    577   res_T res = RES_OK;
    578 
    579   if(!list || !stream) { res = RES_BAD_ARG; goto error; }
    580 
    581   #define WRITE(Var, Nb) {                                                     \
    582     if(fwrite((Var), sizeof(*(Var)), (Nb), stream) != (Nb)) {                  \
    583       res = RES_IO_ERR;                                                        \
    584       ERROR(list->shtr,                                                        \
    585         "%s: error writing line list -- %s\n", FUNC_NAME, res_to_cstr(res));   \
    586       goto error;                                                              \
    587     }                                                                          \
    588   } (void)0
    589 
    590   /* Version management */
    591   WRITE(&SHTR_LINE_LIST_VERSION, 1);
    592 
    593   /* Number of lines in the list */
    594   WRITE(&list->nlines, 1);
    595 
    596   /* Lines stored in memory blocks. */
    597   blocks = darray_charp_cdata_get(&list->blocks);
    598   n = darray_charp_size_get(&list->blocks);
    599   FOR_EACH(i, 0, n) { WRITE(blocks[i], BLOCK_SIZE); }
    600 
    601   /* Informations on line parameters */
    602   WRITE(&list->info, 1);
    603 
    604   #undef WRITE
    605 
    606 exit:
    607   return res;
    608 error:
    609   goto exit;
    610 }
    611 
    612 res_T
    613 shtr_line_list_get_info
    614   (const struct shtr_line_list* list,
    615    struct shtr_line_list_info* info)
    616 {
    617   if(!list || !info) return RES_BAD_ARG;
    618   *info = list->info;
    619   return RES_OK;
    620 }