Пример #1
0
    def get_forecast(self,
                     input_source_types,
                     utc_period,
                     t_c,
                     geo_location_criteria=None):
        """Get shyft source vectors of time series for input_source_types

        Parameters
        ----------
        input_source_types: list
            List of source types to retrieve (precipitation, temperature..)
        geo_location_criteria: object, optional
            Some type (to be decided), extent (bbox + coord.ref)

        Returns
        -------
        geo_loc_ts: dictionary
            dictionary keyed by time series name, where values are api vectors of geo
            located timeseries.
        """
        # filename = self._filename
        # filename = self._get_files(t_c, "_(\d{8})([T_])(\d{2})(Z)?.nc$")
        filename = self._get_files(t_c, "(\d{8})([T_])(\d{2})(.*.nc)")
        with Dataset(filename) as dataset:
            if utc_period is None:
                time = dataset.variables.get("time", None)
                conv_time = convert_netcdf_time(time.units, time)
                start_t = conv_time[0]
                last_t = conv_time[-1]
                utc_period = api.UtcPeriod(int(start_t), int(last_t))

            return self._get_data_from_dataset(dataset, input_source_types,
                                               utc_period,
                                               geo_location_criteria)
Пример #2
0
    def get_forecast_ensemble(self, input_source_types, utc_period, t_c, geo_location_criteria=None):
        """
        Parameters
        ----------
        input_source_types: list
            List of source types to retrieve (precipitation, temperature, ...)
        utc_period: api.UtcPeriod
            The utc time period that should (as a minimum) be covered.
        t_c: long
            Forecast specification; return newest forecast older than t_c.
        geo_location_criteria: object
            Some type (to be decided), extent (bbox + coord.ref).

        Returns
        -------
        ensemble: list of geo_loc_ts dictionaries
            Dictionaries are keyed by time series type, with values
            being api vectors of geo located timeseries.
        """

        #filename = self._filename
        filename = self._get_files(t_c, "_(\d{8})([T_])(\d{2})(Z)?.nc$")
        with Dataset(filename) as dataset:
            if utc_period is None:
                time = dataset.variables.get("time", None)
                conv_time = convert_netcdf_time(time.units, time)
                start_t = conv_time[0]
                last_t = conv_time[-1]
                utc_period = api.UtcPeriod(int(start_t), int(last_t))
            # for idx in range(dataset.dimensions["ensemble_member"].size):
            res = self._get_data_from_dataset(dataset, input_source_types, utc_period,
                                              geo_location_criteria,
                                              ensemble_member='all')
            return res
Пример #3
0
    def remove_tp_data(self, period: UtcPeriod):
        """
        delete data given within the time period

        :param period:
        :return:
        """
        time_series_cropped = None

        with Dataset(self.file_path, 'a') as ds:
            # 1. load the data
            time_variable = 'time'
            time = ds.variables.get(time_variable, None)

            if time is None:
                raise TimeSeriesStoreError(
                    'Something is wrong with the dataset. time not found.')
            var = ds.variables.get(self.ts_meta_info.variable_name, None)

            if var is None:
                raise TimeSeriesStoreError(
                    'Something is wrong with the dataset. variable {0} not found.'
                    .format(self.ts_meta_info.variable_name))

            if len(time):
                # 2. get indices of the data to delete
                time_utc = convert_netcdf_time(time.units, time)

                idx_min = np.searchsorted(time_utc, period.start, side='left')
                idx_max = np.searchsorted(time_utc, period.end, side='right')

                # check if there is data outside the range
                if idx_max - idx_min != len(time):
                    # print('indices ', idx_min, idx_max, len(time))
                    # 3. crop the data array
                    if idx_max < len(time):
                        time_cropped = np.append(time[0:idx_min],
                                                 time[idx_max:])
                        var_cropped = np.append(var[0:idx_min], var[idx_max:])
                    else:
                        time_cropped = np.append(time[0:idx_min], [])
                        var_cropped = np.append(var[0:idx_min], [])
                    last_time_point = 2 * time_cropped[-1] - time_cropped[-2]
                    # print(type(time_cropped[0]))
                    # print(UtcTimeVector.from_numpy(time_cropped.astype(np.int64)).to_numpy())
                    ta = TimeAxis(
                        UtcTimeVector.from_numpy(time_cropped.astype(
                            np.int64)), int(last_time_point))
                    # print(var_cropped)
                    # print(type(var_cropped))
                    time_series_cropped = TimeSeries(
                        ta, dv.from_numpy(var_cropped),
                        point_fx.POINT_INSTANT_VALUE
                    )  # TODO: is this correct point policy?

        # 4. save the cropped data
        self.create_new_file()
        if time_series_cropped:
            self.append_ts_data(time_series_cropped)
Пример #4
0
 def test_unit_conversion(self):
     utc = api.Calendar()
     t_num = np.arange(
         -24, 24, 1, dtype=np.float64
     )  # we use both before and after epoch to ensure sign is ok
     t_converted = convert_netcdf_time('hours since 1970-01-01 00:00:00',
                                       t_num)
     t_axis = api.TimeAxisFixedDeltaT(utc.time(1969, 12, 31, 0, 0, 0),
                                      api.deltahours(1), 2 * 24)
     [
         self.assertEqual(t_converted[i],
                          t_axis(i).start) for i in range(t_axis.size())
     ]
Пример #5
0
    def _get_data_from_dataset(self,
                               dataset,
                               input_source_types,
                               utc_period,
                               geo_location_criteria,
                               ensemble_member=None):

        if geo_location_criteria is not None:
            self._bounding_box = geo_location_criteria

        if "wind_speed" in input_source_types:
            input_source_types = list(
                input_source_types)  # We change input list, so take a copy
            input_source_types.remove("wind_speed")
            input_source_types.append("x_wind")
            input_source_types.append("y_wind")
        no_temp = False
        if "temperature" not in input_source_types: no_temp = True
        if "relative_humidity" in input_source_types:
            if not isinstance(input_source_types, list):
                input_source_types = list(
                    input_source_types)  # We change input list, so take a copy
            input_source_types.remove("relative_humidity")
            input_source_types.extend(
                ["surface_air_pressure", "dew_point_temperature_2m"])
            if no_temp: input_source_types.extend(["temperature"])

        unit_ok = {
            k: dataset.variables[k].units in self.var_units[k]
            for k in dataset.variables.keys()
            if self._arome_shyft_map.get(k, None) in input_source_types
        }
        if not all(unit_ok.values()):
            raise EcDataRepositoryError(
                "The following variables have wrong unit: {}.".format(
                    ', '.join([k for k, v in unit_ok.items() if not v])))

        raw_data = {}
        x = dataset.variables.get("longitude", None)
        y = dataset.variables.get("latitude", None)
        time = dataset.variables.get("time", None)
        if not all([x, y, time]):
            raise EcDataRepositoryError("Something is wrong with the dataset."
                                        " x/y coords or time not found.")
        if not all([var.units in ['km', 'm']
                    for var in [x, y]]) and x.units == y.units:
            raise EcDataRepositoryError(
                "The unit for x and y coordinates should be either m or km.")
        coord_conv = 1.
        if x.units == 'km':
            coord_conv = 1000.
        time = convert_netcdf_time(time.units, time)
        data_cs = dataset.variables.get("projection_regular_ll", None)
        if data_cs is None:
            raise EcDataRepositoryError(
                "No coordinate system information in dataset.")

        idx_min = np.searchsorted(time, utc_period.start, side='left')
        idx_max = np.searchsorted(time, utc_period.end, side='right')
        issubset = True if idx_max < len(time) - 1 else False
        time_slice = slice(idx_min, idx_max)
        x, y, (m_x, m_y), _ = self._limit(x[:] * coord_conv, y[:] * coord_conv,
                                          data_cs.proj4, self.shyft_cs)
        #x, y, (m_x, m_y), _ = self._limit(x[:]*coord_conv, y[:]*coord_conv, data_cs.proj4, data_cs.proj4)
        for k in dataset.variables.keys():
            if self._arome_shyft_map.get(k, None) in input_source_types:
                if k in self._shift_fields and issubset:  # Add one to time slice
                    data_time_slice = slice(time_slice.start,
                                            time_slice.stop + 1)
                else:
                    data_time_slice = time_slice
                data = dataset.variables[k]
                dims = data.dimensions
                data_slice = len(data.dimensions) * [slice(None)]
                if isinstance(ensemble_member, int):
                    data_slice[dims.index("ensemble_member")] = ensemble_member
                elif ensemble_member == 'all':
                    data_slice[dims.index("ensemble_member")] = slice(
                        0, dataset.dimensions['ensemble_member'].size, None)
                data_slice[dims.index("longitude")] = m_x
                data_slice[dims.index("latitude")] = m_y
                data_slice[dims.index("time")] = data_time_slice
                pure_arr = data[data_slice]
                if isinstance(pure_arr, np.ma.core.MaskedArray):
                    #print(pure_arr.fill_value)
                    pure_arr = pure_arr.filled(np.nan)
                raw_data[self._arome_shyft_map[k]] = pure_arr, k
                #raw_data[self._arome_shyft_map[k]] = np.array(data[data_slice], dtype='d'), k

        if self.elevation_file is not None:
            _x, _y, z = self._read_elevation_file(self.elevation_file)
            assert np.linalg.norm(x -
                                  _x) < 1.0e-10  # x/y coordinates should match
            assert np.linalg.norm(y - _y) < 1.0e-10
        elif any([
                nm in dataset.variables.keys()
                for nm in ['altitude', 'surface_geopotential']
        ]):
            var_nm = ['altitude', 'surface_geopotential'][[
                nm in dataset.variables.keys()
                for nm in ['altitude', 'surface_geopotential']
            ].index(True)]
            data = dataset.variables[var_nm]
            dims = data.dimensions
            data_slice = len(data.dimensions) * [slice(None)]
            data_slice[dims.index("longitude")] = m_x
            data_slice[dims.index("latitude")] = m_y
            z = data[data_slice]
            shp = z.shape
            z = z.reshape(shp[-2], shp[-1])
            if var_nm == 'surface_geopotential':
                z /= self._G
        else:
            raise EcDataRepositoryError("No elevations found in dataset"
                                        ", and no elevation file given.")

        pts = np.dstack((x, y, z)).reshape(*(x.shape + (3, )))

        # Make sure requested fields are valid, and that dataset contains the requested data.
        if not self.allow_subset and not (set(
                raw_data.keys()).issuperset(input_source_types)):
            raise EcDataRepositoryError("Could not find all data fields")

        if set(("x_wind", "y_wind")).issubset(raw_data):
            x_wind, _ = raw_data.pop("x_wind")
            y_wind, _ = raw_data.pop("y_wind")
            raw_data["wind_speed"] = np.sqrt(
                np.square(x_wind) + np.square(y_wind)), "wind_speed"
        if set(("surface_air_pressure",
                "dew_point_temperature_2m")).issubset(raw_data):
            sfc_p, _ = raw_data.pop("surface_air_pressure")
            dpt_t, _ = raw_data.pop("dew_point_temperature_2m")
            if no_temp:
                sfc_t, _ = raw_data.pop("temperature")
            else:
                sfc_t, _ = raw_data["temperature"]
            raw_data["relative_humidity"] = self.calc_RH(
                sfc_t, dpt_t, sfc_p), "relative_humidity"
        if ensemble_member == 'all':
            returndata = []
            for i in range(51):
                ensemble_raw = {
                    k: (raw_data[k][0][:, 0, i, :, :], raw_data[k][1])
                    for k in raw_data.keys()
                }
                #print([(k,ensemble_raw[k][0].shape) for k in ensemble_raw])
                extracted_data = self._transform_raw(ensemble_raw,
                                                     time[time_slice],
                                                     issubset=issubset)
                returndata.append(
                    self._geo_ts_to_vec(
                        self._convert_to_timeseries(extracted_data), pts))
        else:
            extracted_data = self._transform_raw(raw_data,
                                                 time[time_slice],
                                                 issubset=issubset)
            returndata = self._geo_ts_to_vec(
                self._convert_to_timeseries(extracted_data), pts)
        return returndata
Пример #6
0
 def test_unit_conversion(self):
     utc = api.Calendar()
     t_num = np.arange(-24, 24, 1, dtype=np.float64)  # we use both before and after epoch to ensure sign is ok
     t_converted = convert_netcdf_time('hours since 1970-01-01 00:00:00', t_num)
     t_axis = api.Timeaxis(utc.time(api.YMDhms(1969, 12, 31, 0, 0, 0)), api.deltahours(1), 2 * 24)
     [self.assertEqual(t_converted[i], t_axis(i).start) for i in range(t_axis.size())]
Пример #7
0
    def append_ts_data(self, time_series: TimeSeries):
        """
        ensure that the data-file content
        are equal to time_series for the time_series.time_axis.total_period().
        If needed, create and update the file meta-data.
        :param time_series:
        :return:
        """
        period = time_series.total_period()
        n_new_val = time_series.size()
        crop_data = False
        time_series_cropped = None

        with Dataset(self.file_path, 'a') as ds:
            # read time, from ts.time_axis.start()
            #  or last value of time
            # then consider if we should fill in complete time-axis ?
            #
            # figure out the start-index,
            # then
            # ds.time[startindex:] = ts.time_axis.numpy values
            # ds.temperature[startindex:] = ts.values.to_numpy()
            #
            # or if more advanced algorithm,
            #  first read
            #  diff
            #   result -> delete range, replace range, insert range..
            time_variable = 'time'
            time = ds.variables.get(time_variable, None)

            if time is None:
                raise TimeSeriesStoreError(
                    'Something is wrong with the dataset. time not found.')
            var = ds.variables.get(self.ts_meta_info.variable_name, None)

            if var is None:
                raise TimeSeriesStoreError(
                    'Something is wrong with the dataset. variable {0} not found.'
                    .format(self.ts_meta_info.variable_name))

            if len(time):
                time_utc = convert_netcdf_time(time.units, time)

                idx_min = np.searchsorted(time_utc, period.start, side='left')
                idx_max = np.searchsorted(
                    time_utc, period.end, side='left'
                )  # use 'left' since period.end = time_point(last_value)+dt
                idx_data_end = idx_min + n_new_val
                # print('indices ', idx_min, idx_max, idx_data_end, len(time))
                # move data if we are overlap or new data`s time before saved time:
                if idx_min < len(time_utc) and idx_max < len(
                        time_utc) and idx_max - idx_min != n_new_val:
                    # print('In moving condition ', idx_max - idx_min, n_new_val)
                    idx_last = len(time_utc)
                    time[idx_data_end:] = time[idx_max:idx_last]
                    var[idx_data_end:, 0] = var[idx_max:idx_last, 0]
                # insert new data
                time[idx_min:
                     idx_data_end] = time_series.time_axis.time_points[:-1]
                var[idx_min:idx_data_end, 0] = time_series.values.to_numpy()
                # crop all data which should not be there
                if idx_max - idx_min - n_new_val > 0:
                    idx_del_start = len(time) - idx_max + idx_min + n_new_val
                    # print("we need to delete something at the end ", idx_max - idx_min - n_new_val, idx_del_start)
                    crop_data = True
                    time_cropped = time[0:idx_del_start]
                    var_cropped = var[0:idx_del_start, 0]
                    last_time_point = 2 * time_cropped[-1] - time_cropped[-2]
                    # print(type(time_cropped[0]))
                    # print(UtcTimeVector.from_numpy(time_cropped.astype(np.int64)).to_numpy())
                    ta = TimeAxis(
                        UtcTimeVector.from_numpy(time_cropped.astype(
                            np.int64)), int(last_time_point))
                    # print(var_cropped)
                    # print(type(var_cropped))
                    time_series_cropped = TimeSeries(
                        ta, dv.from_numpy(var_cropped), point_fx.
                        POINT_INSTANT_VALUE)  # TODO: is this right policy?

            else:
                time[:] = time_series.time_axis.time_points[:-1]
                var[:, 0] = time_series.values.to_numpy()

            # for i, (t, val) in enumerate(zip(time[:], var[:])):
            #     print('{:<4} : {} - {} - {}'.format(i, datetime.fromtimestamp(t), val[0], type(val[0])))
            ds.sync()

        if crop_data and time_series_cropped:
            self.create_new_file()
            self.append_ts_data(time_series_cropped)