def test_filter_by_date_monthly(): result = pd.DataFrame.from_dict([{ "STATION_ID": 1048, "PARAMETER": "climate_summary", "ELEMENT": "temperature_air_max_200", "FROM_DATE": parse_datetime("2019-12-28T00:00:00.000"), "TO_DATE": parse_datetime("2020-01-28T00:00:00.000"), "VALUE": 1.3, "QUALITY": None, }]) df = result.dwd.filter_by_date("2019-12/2020-01", DWDObservationResolution.MONTHLY) assert not df.empty df = result.dwd.filter_by_date("2020/2022", DWDObservationResolution.MONTHLY) assert df.empty df = result.dwd.filter_by_date("2020", DWDObservationResolution.MONTHLY) assert df.empty
def test_filter_by_date_annual(): result = pd.DataFrame.from_dict([{ "STATION_ID": 1048, "PARAMETER": "climate_summary", "ELEMENT": "temperature_air_max_200", "FROM_DATE": parse_datetime("2019-01-01T00:00:00.000"), "TO_DATE": parse_datetime("2019-12-31T00:00:00.000"), "VALUE": 1.3, "QUALITY": None, }]) df = result.dwd.filter_by_date("2019-05/2019-09", TimeResolution.ANNUAL) assert not df.empty df = result.dwd.filter_by_date("2020/2022", TimeResolution.ANNUAL) assert df.empty df = result.dwd.filter_by_date("2020", TimeResolution.ANNUAL) assert df.empty
def __init__( self, start_date: Union[None, str, Timestamp] = None, end_date: Union[None, str, Timestamp] = None, ) -> None: """ :param start_date: start date for filtering stations for their available data :param end_date: end date for filtering stations for their available data """ # TODO: make datetimes timezone sensible start_date = (start_date if not start_date or isinstance( start_date, datetime) else parse_datetime(start_date)) end_date = (end_date if not end_date or isinstance(end_date, datetime) else parse_datetime(end_date)) start_date = start_date.replace(tzinfo=self.tz) if start_date else None end_date = end_date.replace(tzinfo=self.tz) if end_date else None if start_date and end_date: if start_date > end_date: raise StartDateEndDateError( "'start_date' has to be before 'end_date'") self.start_date = start_date self.end_date = end_date
def test_parse_datetime(): assert parse_datetime("2020-05-01") == datetime( 2020, 5, 1, 0, 0).replace(tzinfo=timezone("UTC")) assert parse_datetime("2020-05-01T13:14:15") == datetime( 2020, 5, 1, 13, 14, 15).replace(tzinfo=timezone("UTC")) assert parse_datetime("2020-05-01T13") == datetime( 2020, 5, 1, 13, 0).replace(tzinfo=timezone("UTC"))
def filter_by_date(self, date: str, time_resolution: TimeResolution) -> pd.DataFrame: """ Filter Pandas DataFrame by date or date interval. Accepts different kinds of date formats, like: - 2020-05-01 - 2020-06-15T12 - 2020-05 - 2019 - 2020-05-01/2020-05-05 - 2017-01/2019-12 - 2010/2020 :param date: :param time_resolution: :return: Filtered DataFrame """ # Filter by date interval. if "/" in date: date_from, date_to = date.split("/") date_from = parse_datetime(date_from) date_to = parse_datetime(date_to) if time_resolution in ( TimeResolution.ANNUAL, TimeResolution.MONTHLY, ): date_from, date_to = mktimerange(time_resolution, date_from, date_to) expression = ( date_from <= self.df[DWDMetaColumns.FROM_DATE.value]) & ( self.df[DWDMetaColumns.TO_DATE.value] <= date_to) else: expression = ( date_from <= self.df[DWDMetaColumns.DATE.value]) & ( self.df[DWDMetaColumns.DATE.value] <= date_to) df = self.df[expression] # Filter by specific date. else: date = parse_datetime(date) if time_resolution in ( TimeResolution.ANNUAL, TimeResolution.MONTHLY, ): date_from, date_to = mktimerange(time_resolution, date) expression = ( date_from <= self.df[DWDMetaColumns.FROM_DATE.value]) & ( self.df[DWDMetaColumns.TO_DATE.value] <= date_to) else: expression = date == self.df[DWDMetaColumns.DATE.value] df = self.df[expression] return df
def __init__( self, start_date: Union[None, str, Timestamp] = None, end_date: Union[None, str, Timestamp] = None, ) -> None: start_date = ( start_date if not start_date or isinstance(start_date, datetime) else parse_datetime(start_date) ) end_date = ( end_date if not end_date or isinstance(end_date, datetime) else parse_datetime(end_date) ) if start_date and end_date: if start_date > end_date: raise StartDateEndDateError("'start_date' has to be before 'end_date'") self.start_date = start_date self.end_date = end_date
def test_filter_by_date_annual(): result = pd.DataFrame.from_dict({ "STATION_ID": ["01048"], "PARAMETER_SET": ["climate_summary"], "PARAMETER": ["temperature_air_max_200"], "FROM_DATE": [parse_datetime("2019-01-01T00:00:00.000")], "TO_DATE": [parse_datetime("2019-12-31T00:00:00.000")], "VALUE": [1.3], "QUALITY": [None], }) df = result.dwd.filter_by_date("2019-05/2019-09", DWDObservationResolution.ANNUAL) assert not df.empty df = result.dwd.filter_by_date("2020/2022", DWDObservationResolution.ANNUAL) assert df.empty df = result.dwd.filter_by_date("2020", DWDObservationResolution.ANNUAL) assert df.empty
import pytest from surrogate import surrogate from wetterdienst.dwd.observations import ( DWDObservationData, DWDObservationParameterSet, DWDObservationResolution, DWDObservationPeriod, ) from wetterdienst.dwd.util import parse_datetime df_station = pd.DataFrame.from_dict([{ "STATION_ID": 19087, "FROM_DATE": parse_datetime("1957-05-01T00:00:00.000Z"), "TO_DATE": parse_datetime("1995-11-30T00:00:00.000Z"), "STATION_HEIGHT": 645.0, "LAT": 48.8049, "LON": 13.5528, "STATION_NAME": "Freyung vorm Wald", "STATE": "Bayern", "HAS_FILE": False, }])
def get_nearby_stations_by_number( latitude: float, longitude: float, num_stations_nearby: int, parameter: Union[Parameter, str], time_resolution: Union[TimeResolution, str], period_type: Union[PeriodType, str], minimal_available_date: Optional[Union[datetime, str]] = None, maximal_available_date: Optional[Union[datetime, str]] = None, ) -> pd.DataFrame: """ Provides a list of weather station ids for the requested data :param latitude: Latitude of location to search for nearest weather station :param longitude: Longitude of location to search for nearest weather station :param minimal_available_date: Start date of timespan where measurements should be available :param maximal_available_date: End date of timespan where measurements should be available :param parameter: Observation measure :param time_resolution: Frequency/granularity of measurement interval :param period_type: Recent or historical files :param num_stations_nearby: Number of stations that should be nearby :return: DataFrames with valid stations in radius per requested location """ if num_stations_nearby <= 0: raise ValueError("'num_stations_nearby' has to be at least 1.") parameter = parse_enumeration_from_template(parameter, Parameter) time_resolution = parse_enumeration_from_template(time_resolution, TimeResolution) period_type = parse_enumeration_from_template(period_type, PeriodType) if not check_parameters(parameter, time_resolution, period_type): raise InvalidParameterCombination( f"The combination of {parameter.value}, {time_resolution.value}, " f"{period_type.value} is invalid.") minimal_available_date = (minimal_available_date if not minimal_available_date or isinstance(minimal_available_date, datetime) else parse_datetime(minimal_available_date)) maximal_available_date = (maximal_available_date if not minimal_available_date or isinstance(maximal_available_date, datetime) else parse_datetime(maximal_available_date)) if minimal_available_date and maximal_available_date: if minimal_available_date > maximal_available_date: raise ValueError("'minimal_available_date' has to be before " "'maximal_available_date'") coords = Coordinates(np.array(latitude), np.array(longitude)) metadata = metadata_for_climate_observations(parameter, time_resolution, period_type) # Filter only for stations that have a file metadata = metadata[metadata[DWDMetaColumns.HAS_FILE.value].values] if minimal_available_date: metadata = metadata[ metadata[DWDMetaColumns.FROM_DATE.value] <= minimal_available_date] if maximal_available_date: metadata = metadata[ metadata[DWDMetaColumns.TO_DATE.value] >= maximal_available_date] metadata = metadata.reset_index(drop=True) distances, indices_nearest_neighbours = _derive_nearest_neighbours( metadata.LAT.values, metadata.LON.values, coords, num_stations_nearby) distances = pd.Series(distances) indices_nearest_neighbours = pd.Series(indices_nearest_neighbours) # If num_stations_nearby is higher then the actual amount of stations # further indices and distances are added which have to be filtered out distances = distances[:min(metadata.shape[0], num_stations_nearby)] indices_nearest_neighbours = indices_nearest_neighbours[:min( metadata.shape[0], num_stations_nearby)] distances_km = np.array(distances * KM_EARTH_RADIUS) metadata_location = metadata.iloc[ indices_nearest_neighbours, :].reset_index(drop=True) metadata_location[DWDMetaColumns.DISTANCE_TO_LOCATION.value] = distances_km if metadata_location.empty: logger.warning(f"No weather stations were found for coordinate " f"{latitude}°N and {longitude}°E ") return metadata_location
def test_parse_datetime(): assert parse_datetime("2020-05-01") == datetime(2020, 5, 1, 0, 0) assert parse_datetime("2020-05-01T13:14:15") == datetime( 2020, 5, 1, 13, 14, 15) assert parse_datetime("2020-05-01T13") == datetime(2020, 5, 1, 13, 0)
import mock import pandas as pd import pytest from surrogate import surrogate from wetterdienst.dwd.observations import ( DWDObservationData, DWDObservationParameterSet, DWDObservationResolution, DWDObservationPeriod, ) from wetterdienst.dwd.util import parse_datetime df_station = pd.DataFrame.from_dict({ "STATION_ID": ["19087"], "FROM_DATE": [parse_datetime("1957-05-01T00:00:00.000Z")], "TO_DATE": [parse_datetime("1995-11-30T00:00:00.000Z")], "STATION_HEIGHT": [645.0], "LAT": [48.8049], "LON": [13.5528], "STATION_NAME": ["Freyung vorm Wald"], "STATE": ["Bayern"], "HAS_FILE": [False], }) df_data = pd.DataFrame.from_dict({ "STATION_ID": ["01048"], "PARAMETER_SET": ["CLIMATE_SUMMARY"], "PARAMETER": ["TEMPERATURE_AIR_MAX_200"], "DATE": [parse_datetime("2019-12-28T00:00:00.000Z")], "VALUE": [1.3],
def filter_by_date( self, date: str, resolution: DWDObservationResolution ) -> pd.DataFrame: """ Filter Pandas DataFrame by date or date interval. Accepts different kinds of date formats, like: - 2020-05-01 - 2020-06-15T12 - 2020-05 - 2019 - 2020-05-01/2020-05-05 - 2017-01/2019-12 - 2010/2020 :param date: :param resolution: :return: Filtered DataFrame """ # TODO: datetimes should be aware of tz # TODO: resolution is not necessarily available and ideally filtering does not # depend on it # Filter by date interval. if "/" in date: date_from, date_to = date.split("/") date_from = parse_datetime(date_from) date_to = parse_datetime(date_to) if resolution in ( DWDObservationResolution.ANNUAL, DWDObservationResolution.MONTHLY, ): date_from, date_to = mktimerange(resolution, date_from, date_to) expression = (date_from <= self.df[DWDMetaColumns.FROM_DATE.value]) & ( self.df[DWDMetaColumns.TO_DATE.value] <= date_to ) else: expression = (date_from <= self.df[DWDMetaColumns.DATE.value]) & ( self.df[DWDMetaColumns.DATE.value] <= date_to ) df = self.df[expression] # Filter by specific date. else: # TODO: make datetime tz aware date = parse_datetime(date) if resolution in ( DWDObservationResolution.ANNUAL, DWDObservationResolution.MONTHLY, ): date_from, date_to = mktimerange(resolution, date) expression = (date_from <= self.df[DWDMetaColumns.FROM_DATE.value]) & ( self.df[DWDMetaColumns.TO_DATE.value] <= date_to ) else: expression = date == self.df[DWDMetaColumns.DATE.value] df = self.df[expression] return df