star-meteo

Time varying meteorological data
git clone git://git.meso-star.fr/star-meteo.git
Log | Files | Refs | README | LICENSE

smeteo_load.c (14752B)


      1 /* Copyright (C) 2025 |Méso|Star> (contact@meso-star.com)
      2  *
      3  * This program is free software: you can redistribute it and/or modify
      4  * it under the terms of the GNU General Public License as published by
      5  * the Free Software Foundation, either version 3 of the License, or
      6  * (at your option) any later version.
      7  *
      8  * This program is dismshbuted in the hope that it will be useful,
      9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     11  * GNU General Public License for more details.
     12  *
     13  * You should have received a copy of the GNU General Public License
     14  * along with this program. If not, see <http://www.gnu.org/licenses/>. */
     15 
     16 #define _XOPEN_SOURCE /* strptime support */
     17 
     18 #include "smeteo.h"
     19 #include "smeteo_c.h"
     20 
     21 #include <rsys/cstr.h>
     22 #include <rsys/text_reader.h>
     23 
     24 #include <errno.h>
     25 #include <string.h>
     26 #include <time.h>
     27 
     28 /* Error message when reading a line */
     29 #define ERROR_READ_LINE(SMeteo, TxtRdr, Res) \
     30   ERROR((SMeteo), "%s: error reading line -- %s\n", \
     31     txtrdr_get_name(TxtRdr), res_to_cstr(Res))
     32 
     33 /*******************************************************************************
     34  * Helper functions
     35  ******************************************************************************/
     36 static INLINE void
     37 reset(struct smeteo* smeteo)
     38 {
     39   ASSERT(smeteo);
     40   str_clear(&smeteo->filename);
     41   darray_entry_clear(&smeteo->entries);
     42 }
     43 
     44 static res_T
     45 parse_time
     46   (struct smeteo* smeteo,
     47    struct txtrdr* txtrdr,
     48    const char* date,
     49    const char* hour,
     50    struct tm* time)
     51 {
     52   char buf[64] = {0};
     53   size_t ldate=0, lhour=0; /* Length of corresponding string */
     54   res_T res = RES_OK;
     55 
     56   ASSERT(smeteo && time);
     57 
     58   if(!date || !time) { res = RES_BAD_ARG; goto error; }
     59 
     60   /* Check memory space to build a string concatenating date and time */
     61   ldate = strlen(date);
     62   lhour = strlen(hour);
     63   if(ldate + lhour > sizeof(buf)+1/*NUL byte*/+1/*separator*/+1/*end char*/) {
     64     res = RES_MEM_ERR;
     65     goto error;
     66   }
     67 
     68   /* Concatenate the strings representing the date and time. End the string with
     69    * a semicolon (;) to force sptrtime to parse all the digits of the seconds
     70    * and not just the part that suits it, so that it returns an error if the
     71    * number of seconds is actually invalid */
     72   strcpy(buf, date);
     73   buf[ldate] = 'T'; /* Separator between date and time */
     74   strcpy(buf+ldate+1, hour);
     75   buf[ldate+1/*separator*/+lhour] = ';'; /* end char */
     76   buf[ldate+1/*separator*/+lhour+1] = '\0'; /* NUL char */
     77 
     78   /* Note that strptime will not return an error with a number of seconds equal
     79    * to 60 (and even 61 in some implementations of the C library). This allows
     80    * for leap seconds, which occur occasionally to adjust the Earth's position.
     81    * Don't try to outsmart strptime; trust it to check the validity of the time
     82    * without adding any additional tests */
     83   if(!strptime(buf, "%d-%b-%YT%H:%M:%S;", time)) {
     84     res = RES_BAD_ARG;
     85     goto error;
     86   }
     87 
     88 exit:
     89   return res;
     90 error:
     91   ERROR(smeteo, "%s:%zu: error parsing time '%s' '%s' -- %s\n",
     92     txtrdr_get_name(txtrdr),
     93     txtrdr_get_line_num(txtrdr),
     94     date ? date : "(null)",
     95     hour ? hour : "(null)",
     96     res_to_cstr(res));
     97   goto exit;
     98 }
     99 
    100 static res_T
    101 parse_double
    102   (struct smeteo* smeteo,
    103    struct txtrdr* txtrdr,
    104    const char* name,
    105    const char* str,
    106    const double min, /* Inclusive boundary */
    107    const double max, /* Inclusive boundary */
    108    double* out_dbl)
    109 {
    110   double dbl = 0;
    111   res_T res = RES_OK;
    112   ASSERT(smeteo && txtrdr && name && min <= max && out_dbl);
    113 
    114   if(!str) { res = RES_BAD_ARG; goto error; }
    115 
    116   res = cstr_to_double(str, &dbl);
    117   if(res == RES_OK && (dbl < min || max < dbl)) res = RES_BAD_ARG;
    118   if(res == RES_OK && IS_NaN(dbl)) res = RES_BAD_ARG;
    119   if(res != RES_OK) goto error;
    120 
    121   *out_dbl = dbl;
    122 
    123 exit:
    124   return res;
    125 error:
    126   ERROR(smeteo, "%s:%zu: error parsing %s '%s' -- %s\n",
    127     txtrdr_get_name(txtrdr),
    128     txtrdr_get_line_num(txtrdr),
    129     name,
    130     str ? str : "(null)",
    131     res_to_cstr(res));
    132   goto exit;
    133 }
    134 
    135 static res_T
    136 parse_line(struct smeteo* smeteo, struct txtrdr* txtrdr)
    137 {
    138   struct smeteo_entry entry = SMETEO_ENTRY_NULL;
    139 
    140   /* Shortwave downward flux parsed but not stored internally because it is
    141    * equal to the sum of the direct and diffuse components, both of which are
    142    * analyzed and stored */
    143   double SWdn = 0;
    144 
    145   /* Shortwave downward shortwave flux calculated from the sum of its
    146    * components, and the epsilon used to compare it with the one read */
    147   double SWdn_sum = 0;
    148   double SWdn_eps = 0;
    149 
    150   char* line = NULL;
    151   char* date = NULL;
    152   char* hour = NULL;
    153   char* tk_ctx = NULL;
    154   res_T res = RES_OK;
    155 
    156   ASSERT(smeteo && txtrdr && txtrdr_get_line(txtrdr));
    157 
    158   line = txtrdr_get_line(txtrdr);
    159 
    160   date = strtok_r(line, " \t", &tk_ctx);
    161   hour = strtok_r(NULL, " \t", &tk_ctx);
    162   res = parse_time(smeteo, txtrdr, date, hour, &entry.time);
    163   if(res != RES_OK) goto error;
    164 
    165   if(entry.time.tm_year + 1900 < 1850) { /* tm_year: year minus 1900 */
    166     ERROR(smeteo,
    167       "%s:%zu: unexpected date `%s'. "
    168       "The year should be greater than or equal to 1850.\n",
    169       txtrdr_get_name(txtrdr), txtrdr_get_line_num(txtrdr), date);
    170     res = RES_BAD_ARG;
    171     goto error;
    172   }
    173 
    174   #define PARSE_DOUBLE(Name, Var, Min, Max) { \
    175     char* str = strtok_r(NULL, " \t", &tk_ctx); \
    176     res = parse_double(smeteo, txtrdr, Name, str, Min, Max, &Var); \
    177     if(res != RES_OK) goto error; \
    178   } (void)0
    179 
    180   #define PARSE_ATTRIB(Name, Var, Min, Max) \
    181     PARSE_DOUBLE(Name, entry.Var, Min, Max)
    182 
    183   PARSE_ATTRIB("surface temperature [K]", Tsrf, 0, INF);
    184   PARSE_ATTRIB("atmosphere temperature [K]", Tatm, 0, INF);
    185   PARSE_ATTRIB("air humidity [g(water)/kg(air)]", Ahum, 0, INF);
    186   PARSE_ATTRIB("relative humidity", Rhum, 0, 100);
    187   PARSE_DOUBLE("shortwave downward flux [W/m^2]", SWdn, 0, INF);
    188   PARSE_ATTRIB("direct shortwave downward flux [W/m^2]", SWdn_direct, 0, INF);
    189   PARSE_ATTRIB("diffuse shortwave downward flux [W/m^2]", SWdn_diffuse, 0, INF);
    190   PARSE_ATTRIB("shortwave upward flux [W/m^2]", SWup, 0, INF);
    191   PARSE_ATTRIB("radiative temperature [K]", Trad, 0, INF);
    192   PARSE_ATTRIB("convection coefficient [W/K/m^2]", H, -INF, INF);
    193   PARSE_ATTRIB("latent flux [W/m^2]", LE, -INF, INF);
    194   PARSE_ATTRIB("number of days since 1850", day_1850, 0, INF);
    195 
    196   #undef PARSE_DOUBLE
    197   #undef PARSE_ATTRIB
    198 
    199   SWdn_sum = entry.SWdn_direct + entry.SWdn_diffuse;
    200   SWdn_eps = SWdn*1e-6;
    201   if(!eq_eps(SWdn_sum, SWdn, SWdn*1e-6)) {
    202     WARN(smeteo,
    203       "%s:%zu: suspicious downward flux in shortwave. It does not appear to be "
    204       "(approximately) equal to the sum of its components (direct + diffuse): "
    205       "%g != %g +/- %g [W/m^2]\n",
    206       txtrdr_get_name(txtrdr), txtrdr_get_line_num(txtrdr),
    207       SWdn, SWdn_sum, SWdn_eps);
    208   }
    209 
    210   res = darray_entry_push_back(&smeteo->entries, &entry);
    211   if(res != RES_OK) {
    212     ERROR(smeteo, "%s:%zu: error registering data -- %s\n",
    213       txtrdr_get_name(txtrdr), txtrdr_get_line_num(txtrdr), res_to_cstr(res));
    214     goto error;
    215   }
    216 
    217 exit:
    218   return res;
    219 error:
    220   goto exit;
    221 }
    222 
    223 static res_T
    224 parse_header_var
    225   (struct smeteo* smeteo,
    226    struct txtrdr* txtrdr,
    227    const char* name,
    228    const double min, /* Inclusive boundary */
    229    const double max, /* Inclusive boundary */
    230    double* var)
    231 {
    232   char* line = NULL;
    233   char* tk = NULL;
    234   char* tk_ctx = NULL;
    235 
    236   res_T res = RES_OK;
    237   ASSERT(smeteo && txtrdr && name && min <= max);
    238 
    239   if((res = txtrdr_read_line(txtrdr)) != RES_OK) {
    240     ERROR_READ_LINE(smeteo, txtrdr, res);
    241     goto error;
    242   }
    243 
    244   if((line = txtrdr_get_line(txtrdr)) == NULL) {
    245     ERROR(smeteo, "%s:%zu: missing %s\n",
    246       txtrdr_get_name(txtrdr), txtrdr_get_line_num(txtrdr), name);
    247     res = RES_BAD_ARG;
    248     goto error;
    249   }
    250 
    251   tk = strtok_r(line, " \t", &tk_ctx);
    252   res = parse_double(smeteo, txtrdr, name, tk, min, max, var);
    253   if(res != RES_OK) goto error;
    254 
    255 exit:
    256   return res;
    257 error:
    258   goto exit;
    259 }
    260 
    261 static res_T
    262 parse_header(struct smeteo* smeteo, struct txtrdr* txtrdr)
    263 {
    264   double ndate = 0;
    265   res_T res = RES_OK;
    266   ASSERT(smeteo && txtrdr);
    267 
    268   #define PARSE_VAR(Name, Min, Max, Var) { \
    269     res = parse_header_var(smeteo, txtrdr, Name, Min, Max, Var); \
    270     if(res != RES_OK) goto error; \
    271   } (void)0
    272 
    273   PARSE_VAR("albedo", 0, 1, &smeteo->albedo);
    274   PARSE_VAR("longitude", -180, 180, &smeteo->longitude);
    275   PARSE_VAR("latitude", -90, 90, &smeteo->latitude);
    276 
    277   /* To simplify, parse the number of intervals in double precision and ensure
    278    * that it can encode an integer. The representation of doubles allows all
    279    * integers up to 2^48 to be represented. This is therefore the upper limit of
    280    * the analyzed value, since even if a larger integer could be analyzed, there
    281    * would be no guarantee that it would actually correspond to the integer
    282    * entered */
    283   PARSE_VAR("date count", 0, (double)(1llu<<48), &ndate);
    284 
    285   #undef PARSE_VAR
    286 
    287   /* Ensure that the number of dates is an integer
    288    * (for simplicity, it has been analyzed as a double) */
    289   if(ndate != (size_t)ndate) {
    290     ERROR(smeteo, "%s:%zu: invalid date count `%g'\n",
    291       txtrdr_get_name(txtrdr), txtrdr_get_line_num(txtrdr), ndate);
    292     res = RES_OK;
    293     goto error;
    294   }
    295 
    296   /* Pre-allocate the list of time intervals */
    297   res = darray_entry_reserve(&smeteo->entries, (size_t)ndate);
    298   if(res != RES_OK) {
    299     ERROR(smeteo,
    300       "%s: error allocating the data list by time interval -- %s\n",
    301       txtrdr_get_name(txtrdr), res_to_cstr(res));
    302     goto error;
    303   }
    304 
    305 exit:
    306   return res;
    307 error:
    308   goto exit;
    309 }
    310 
    311 static res_T
    312 check_interval_duration
    313   (const struct smeteo* smeteo,
    314    const struct txtrdr* txtrdr,
    315    const size_t id, /* Index of the interval to check */
    316    const double expected_duration) /* [day/1850] */
    317 {
    318   const struct smeteo_entry* entries = NULL;
    319   const double eps = expected_duration*1e-6;
    320   double duration = 0;
    321   res_T res = RES_OK;
    322 
    323   ASSERT(smeteo && txtrdr && expected_duration > 0);
    324   ASSERT(id < darray_entry_size_get(&smeteo->entries));
    325 
    326   entries = darray_entry_cdata_get(&smeteo->entries);
    327 
    328   if(id == 0) {
    329     /* The dates of the intervals are relative to midnight on January 1, 1850.
    330      * In addition, their time is defined at the center of the interval. Thus,
    331      * for the first interval, calculate its duration by multiplying its date
    332      * relative to January 1, 1850 by 2 */
    333     duration = entries[0].day_1850*2;
    334   } else {
    335     duration = entries[id-0].day_1850 - entries[id-1].day_1850;
    336   }
    337 
    338   if(!eq_eps(duration, expected_duration, eps)) {
    339      ERROR(smeteo,
    340        "%s:%lu: invalid time interval. "
    341        "It is calculated from the \"day_1850\" variables between consecutive "
    342        "intervals. The interval duration must be constant, which is not the "
    343        "case here. "
    344        "Expected duration: %g [day/1850]; current duration: %g [day/1850] -- "
    345        "%g != %g +/- %g [day/1850]\n",
    346        txtrdr_get_name(txtrdr), txtrdr_get_line_num(txtrdr),
    347        expected_duration, duration, expected_duration, duration, eps);
    348      res = RES_BAD_ARG;
    349      goto error;
    350   }
    351 
    352 exit:
    353   return res;
    354 error:
    355   goto exit;
    356 }
    357 
    358 static res_T
    359 load_stream(struct smeteo* smeteo, FILE* fp, const char* name)
    360 {
    361   struct txtrdr* txtrdr = NULL;
    362   size_t i = 0;
    363   size_t nintervals = 0;
    364   double duration_ref = -1; /* Reference duration of an interval */
    365   res_T res = RES_OK;
    366   ASSERT(smeteo && name);
    367 
    368   reset(smeteo);
    369 
    370   res = txtrdr_stream(smeteo->allocator, fp, name, '#', &txtrdr);
    371   if(res != RES_OK) goto error;
    372 
    373   if((res = str_set(&smeteo->filename, name)) != RES_OK) {
    374     ERROR(smeteo, "Error copying file name '%s' -- %s\n", name, res_to_cstr(res));
    375     goto error;
    376   }
    377 
    378   if((res = parse_header(smeteo, txtrdr)) != RES_OK) goto error;
    379 
    380   nintervals = darray_entry_capacity(&smeteo->entries);
    381 
    382   FOR_EACH(i, 0, nintervals) {
    383     if((res = txtrdr_read_line(txtrdr)) != RES_OK) {
    384       ERROR_READ_LINE(smeteo, txtrdr, res);
    385       goto error;
    386     }
    387 
    388     if(!txtrdr_get_cline(txtrdr)) {
    389       WARN(smeteo,
    390         "%s: missing data. %lu meteorological data sets varying over time are "
    391         "expected, but only %lu are provided.\n",
    392         txtrdr_get_name(txtrdr), nintervals, i);
    393       break;
    394     }
    395 
    396     if((res = parse_line(smeteo, txtrdr)) != RES_OK) goto error;
    397 
    398     if(i == 0) {
    399       /* Calculate the reference of the interval duration.
    400        *
    401        * The dates of the intervals are relative to midnight on January 1, 1850
    402        * UTC +00:00.  In addition, their time is defined at the center of the
    403        * interval. Thus, for the first interval, calculate its duration by
    404        * multiplying its date relative to January 1, 1850 by 2. This will be the
    405        * reference duration*/
    406        duration_ref = darray_entry_cdata_get(&smeteo->entries)[i].day_1850 * 2;
    407 
    408     } else {
    409       res = check_interval_duration(smeteo, txtrdr, i, duration_ref);
    410       if(res != RES_OK) goto error;
    411 
    412       /* Note that although this is a constraint of the smeteo file format, it
    413        * is not actually necessary to check that the intervals are sorted in
    414        * ascending order. Checking the duration of the interval will also reveal
    415        * an invalid order, as the duration would be negative when calculated
    416        * from the temporal center of two consecutive intervals. Unless the
    417        * day_1850 variables are invalid in relation to the date of the
    418        * intervals. But in this situation, checking the order would also give a
    419        * false positive. Not to mention that the correspondence between the date
    420        * and the value day_1850 should then be verified. However, this would be
    421        * difficult to achieve, as converting a date to the "day_1850" format
    422        * seems complicated given all the special cases related to time
    423        * conversion (leap years, leap seconds). It would be interesting to study
    424        * how this is done by the user who generates the smeteo files. */
    425     }
    426   }
    427 
    428 exit:
    429   if(txtrdr) txtrdr_ref_put(txtrdr);
    430   return res;
    431 error:
    432   goto exit;
    433 }
    434 
    435 /*******************************************************************************
    436  * Exported functions
    437  ******************************************************************************/
    438 SMETEO_API res_T
    439 smeteo_load(struct smeteo* smeteo, const char* filename)
    440 {
    441   FILE* fp = NULL;
    442   res_T res = RES_OK;
    443 
    444   if(!smeteo || !filename) { res = RES_BAD_ARG; goto error; }
    445 
    446   if(!(fp = fopen(filename, "r"))) {
    447     ERROR(smeteo, "Error opening file '%s' -- %s\n", filename, strerror(errno));
    448     res = RES_IO_ERR;
    449     goto error;
    450   }
    451 
    452   if((res = load_stream(smeteo, fp, filename)) != RES_OK) goto error;
    453 
    454 exit:
    455   if(fp) CHK(fclose(fp) == 0);
    456   return res;
    457 error:
    458   goto exit;
    459 }
    460 
    461 SMETEO_API res_T
    462 smeteo_load_stream(struct smeteo* smeteo, FILE* fp, const char* name)
    463 {
    464   if(!smeteo || !fp || !name) return RES_BAD_ARG;
    465   return load_stream(smeteo, fp, name);
    466 }