def find_index_first_greater_or_equal(np_array, value, tol=1e-6):
    """Find the index of the first value of np_array that is
    greater or equal than value.
    Input:
        - np_array: numpy array in which to look, should
            be monotonic and 1D.
        - value: the value to use as a reference for
            comparison.
        - tol: a tolerance for performing the comparison.
    Output:
        - the index of the first entry that is greater
            or equal to value.
    If no valid value, raise an error.
    """

    ras(isinstance(np_array, np.ndarray))
    np_array = np_array.squeeze()
    ras(len(np_array.shape) == 1)

    assert_strict_monotonic(np_array)

    if np_array[-1] < value:
        raise ValueError("no entry greater or equal than the asked value")

    first_index = np.argmax(np_array >= value - tol)

    return first_index
def assert_is_utc_datetime(date_in):
    """Assert that date_in is an UTC datetime."""
    ras(isinstance(date_in, datetime.datetime))

    if not (date_in.tzinfo == pytz.utc or
            date_in.tzinfo == datetime.timezone.utc):
        raise Exception("not utc!")

    if date_in.tzinfo == pytz.utc:
        print("prefer using datetime.timezone.utc to pytz.utc")
    def check_time_segment_within_bounds(self, segment_start, segment_end,
                                         bound_start, bound_end):
        assert_is_utc_datetime(segment_start)
        assert_is_utc_datetime(segment_end)
        assert_is_utc_datetime(bound_start)
        assert_is_utc_datetime(bound_end)

        ras(segment_start < segment_end)
        ras(bound_start < bound_end)

        if (segment_start > bound_start) and (segment_end < bound_end):
            return (True)
        else:
            return (False)
def assert_10min_multiple(date_in):
    """Assert that date_in is a datetime that is a
    multiple of 10 minutes.
    """
    ras(isinstance(date_in, datetime.datetime))
    ras(date_in.second == 0)
    ras((date_in.minute % 10) == 0)
    ras(date_in.microsecond == 0)
def warn_on_maintenance():
    """Warn if an API request is performed on the last Thursday of the
    month; this is when server maintenance takes place."""

    behavior = "warn"

    crrt_datetime = datetime.datetime.utcnow().replace(tzinfo=pytz.UTC)
    last_thursday_of_month = get_last_thursday_in_month(crrt_datetime)

    ras(last_thursday_of_month.year == crrt_datetime.year)
    ras(last_thursday_of_month.month == crrt_datetime.month)

    if last_thursday_of_month.day == crrt_datetime.day:
        if behavior == "warn":
            logging.warning("maintenance of the API server may be happening!")
def assert_strict_monotonic(array, list_dimensions=None):
    """Check that an array is strictly monotonic. Raise a
    ValueError if not.
    Input:
        - array: a numpy array of any dimension.
        - list_dimensions: the list of dimensions on which to do
            the check. Check all dimensions if None (default).
    Output: None.
    Can raise:
        a ValueError indicating the first non monotonic dimension.
    """

    if list_dimensions is None:
        n_dim = len(np.shape(array))
        list_dimensions = range(n_dim)
    else:
        ras(isinstance(list_dimensions, list))

    for dim in list_dimensions:
        dim_diff = np.diff(array, axis=dim)
        if not (np.all(dim_diff < 0) or np.all(dim_diff > 0)):
            raise ValueError(
                "Array non stricly monotonic on dim {}".format(dim))
def datetime_range(datetime_start, datetime_end, step_timedelta):
    """Yield a datetime range, in the range [datetime_start; datetime_end[,
    with step step_timedelta."""
    assert_is_utc_datetime(datetime_start)
    assert_is_utc_datetime(datetime_end)
    ras(isinstance(step_timedelta, datetime.timedelta))
    ras(datetime_start < datetime_end)
    ras(step_timedelta > datetime.timedelta(0))

    crrt_time = datetime_start
    yield crrt_time

    while True:
        crrt_time += step_timedelta
        if crrt_time < datetime_end:
            yield crrt_time
        else:
            break
def datetime_segments(datetime_start, datetime_end, step_timedelta):
    """Generate a succession of segments, that cover [datetime_start; datetime_end].
    The segments will have length step_timedelta, except possibly the last segment
    that may be shorter."""
    assert_is_utc_datetime(datetime_start)
    assert_is_utc_datetime(datetime_end)
    ras(isinstance(step_timedelta, datetime.timedelta))
    ras(datetime_start < datetime_end)
    ras(step_timedelta > datetime.timedelta(0))

    crrt_segment_start = datetime_start
    crrt_segment_end = crrt_segment_start + step_timedelta

    while True:
        if crrt_segment_end >= datetime_end:
            yield (crrt_segment_start, datetime_end)
            break
        else:
            yield (crrt_segment_start, crrt_segment_end)
            crrt_segment_start += step_timedelta
            crrt_segment_end += step_timedelta
    def generate_netCDF4_dataset(self,
                                 datetime_start,
                                 datetime_end,
                                 list_station_ids=None,
                                 nc4_path="./data_kartverket_storm_surge.nc4"):
        assert_is_utc_datetime(datetime_start)
        assert_is_utc_datetime(datetime_end)

        dict_station_data = self.get_stations_information()
        list_available_station_ids = sorted(list(dict_station_data.keys()))

        if list_station_ids is None:
            list_station_ids = list_available_station_ids
        else:
            for crrt_station in list_station_ids:
                ras(crrt_station in list_available_station_ids)

        ras(isinstance(list_station_ids, list))

        timestamps_vector = [
            crrt_datetime.timestamp() for crrt_datetime in datetime_range(
                datetime_start, datetime_end, self.resolution_timedelta)
        ]

        number_of_time_entries = len(timestamps_vector)

        with nc4.Dataset(nc4_path, "w", format="NETCDF4") as nc4_fh:
            nc4_fh.set_auto_mask(False)

            description_string = "Storm surge dataset from the Norwegian coast, " +\
                                 "built from the data obtained from kartverket web API, " +\
                                 "using the code at: " +\
                                 "MachineOcean-WP12/storm_surge/learn_error/prepare_data/prepare_data.py " +\
                                 "generated on {} ".format(datetime.datetime.now().isoformat()[:10]) +\
                                 "in all the following, except stated otherwise, CD (chart datum) ref level " +\
                                 "is used, units are cm, and all timestamps are UTC. "

            nc4_fh.Conventions = "CF-X.X"
            nc4_fh.title = "storm surge from kartverket API"
            nc4_fh.description = description_string
            nc4_fh.institution = "IT department, Norwegian Meteorological Institute, using data from Kartverket"
            nc4_fh.Contact = "*****@*****.**"

            _ = nc4_fh.createDimension('station', len(list_station_ids))
            _ = nc4_fh.createDimension('time', number_of_time_entries)

            stationid = nc4_fh.createVariable("stationid", str, ('station'))
            latitude = nc4_fh.createVariable('latitude', 'f4', ('station'))
            longitude = nc4_fh.createVariable('longitude', 'f4', ('station'))
            timestamps = nc4_fh.createVariable('timestamps', 'i8', ('time'))
            observation = nc4_fh.createVariable('observation', 'f4',
                                                ('station', 'time'))
            prediction = nc4_fh.createVariable('prediction', 'f4',
                                               ('station', 'time'))
            timestamp_start = nc4_fh.createVariable('timestamp_start', 'i8',
                                                    ('station'))
            timestamp_end = nc4_fh.createVariable('timestamp_end', 'i8',
                                                  ('station'))

            stationid.description = "unique ID string of each station"
            stationid.units = "none, 3 capital letters"

            latitude.description = "latitude of each station"
            latitude.units = "degree North"

            longitude.description = "longitude of each station"
            longitude.units = "degree East"

            timestamps.description = "common time base for all data"
            timestamps.units = "POSIX timestamp"

            observation.description = "water level observation at each station over the time base, "\
                "CD (Chart Datum) reference level"
            observation.units = "cm, fill value: 1.0e37"
            observation.standard_name = "observed_sea_surface_height_at_chartdatum"

            prediction.description = "water level prediction by Kartverket, " +\
                "using only astronomic tide effects, at each station over the time base, "\
                "CD (Chart Datum) reference level"
            prediction.units = "cm, fill value: 1.0e37"
            prediction.standard_name = "sea_surface_height_amplitude_due_to_earth_tide"

            timestamp_start.description = "first timestamp for which data are available, " +\
                "for each station; there may be holes though"
            timestamp_start.units = "POSIX timestamp"

            timestamp_end.description = "last timestamp for which data are available, " +\
                "for each station; there may be holes though"
            timestamp_end.units = "POSIX timestamp"

            timestamps[:] = np.array(timestamps_vector)

            for ind, crrt_station_id in tqdm(enumerate(list_station_ids),
                                             desc="station",
                                             total=len(list_station_ids)):
                stationid[ind] = crrt_station_id
                latitude[ind] = dict_station_data[crrt_station_id]["latitude"]
                longitude[ind] = dict_station_data[crrt_station_id][
                    "longitude"]

                dict_crrt_timebounds = self.get_individual_station_time_bounds(
                    crrt_station_id)
                timestamp_start[ind] = dict_crrt_timebounds["first"].timestamp(
                )
                timestamp_end[ind] = dict_crrt_timebounds["last"].timestamp()

                np_observations = -self.fill_value * np.ones(
                    (number_of_time_entries, ))
                np_predictions = -self.fill_value * np.ones(
                    (number_of_time_entries, ))

                crrt_filling_index = 0
                approx_nbr_segments = math.ceil(
                    (datetime_end - datetime_start) / self.segment_duration)

                for crrt_segment in tqdm(datetime_segments(
                        datetime_start, datetime_end, self.segment_duration),
                                         desc="segment",
                                         total=approx_nbr_segments):

                    dict_crrt_segment =\
                        self.get_individual_station_data_between_datetimes(crrt_station_id,
                                                                           crrt_segment[0],
                                                                           crrt_segment[1])

                    for crrt_time in list(dict_crrt_segment.keys()):
                        ras(timestamps[crrt_filling_index] ==
                            crrt_time.timestamp())
                        np_observations[
                            crrt_filling_index] = dict_crrt_segment[crrt_time][
                                "observation_cm_CD"]
                        np_predictions[crrt_filling_index] = dict_crrt_segment[
                            crrt_time]["prediction_cm_CD"]
                        crrt_filling_index += 1

                observation[ind, :] = np_observations
                prediction[ind, :] = np_predictions

        # populate metadata
        # note: initialize the whole dataset as "invalid"
        pass