def __init__( self, start_date: Union[None, str, Timestamp] = None, end_date: Union[None, str, Timestamp] = None, ) -> None: """ :param start_date: start date for filtering stations for their available data :param end_date: end date for filtering stations for their available data """ # TODO: make datetimes timezone sensible start_date = (start_date if not start_date or isinstance( start_date, datetime) else parse_datetime(start_date)) end_date = (end_date if not end_date or isinstance(end_date, datetime) else parse_datetime(end_date)) start_date = start_date.replace(tzinfo=self.tz) if start_date else None end_date = end_date.replace(tzinfo=self.tz) if end_date else None if start_date and end_date: if start_date > end_date: raise StartDateEndDateError( "'start_date' has to be before 'end_date'") self.start_date = start_date self.end_date = end_date
def __init__( self, station_ids: Tuple[str], parameters: Tuple[Union[str, Enum]], start_date: Optional[Union[str, Timestamp, datetime]] = None, end_date: Optional[Union[str, Timestamp, datetime]] = None, humanize_parameters: bool = False, ) -> None: """ :param station_ids: station ids for which data is requested :param parameters: parameters either as strings or enumerations for which data is requested :param start_date: start date of the resulting data, if not start_date: start_date = end_date :param end_date: end date of the resulting data if not end_date: end_date = start_date :param humanize_parameters: bool if parameters should be renamed to meaningful names """ # Make sure we receive a list of ids self.station_ids = pd.Series(station_ids).astype(str).tolist() self.parameters = self._parse_parameters(parameters) # TODO: replace this with a response + logging # TODO: move this to self.collect_data if not self.parameters: raise NoParametersFound( f"No parameters could be parsed from {parameters}") if start_date or end_date: # If only one date given, set the other one to equal if not start_date: start_date = end_date if not end_date: end_date = start_date # TODO: use dynamic parsing that accepts entered timestamps with given # timezone start_date = Timestamp(dateparser.parse(str(start_date)), tz=pytz.UTC) end_date = Timestamp(dateparser.parse(str(end_date)), tz=pytz.UTC) # TODO: replace this with a response + logging if not start_date <= end_date: raise StartDateEndDateError( "Error: 'start_date' must be smaller or equal to 'end_date'." ) self.start_date = start_date self.end_date = end_date self.humanize_parameters = humanize_parameters
def convert_timestamps( start_date: Optional[Union[str, datetime, pd.Timestamp]] = None, end_date: Optional[Union[str, datetime, pd.Timestamp]] = None, ) -> Union[Tuple[None, None], Tuple[pd.Timestamp, pd.Timestamp]]: """ Sort out start_date vs. end_date, parse strings to datetime objects and finally convert both to pd.Timestamp types. :param start_date: Start date for filtering stations for their available data :param end_date: End date for filtering stations for their available data :return: pd.Timestamp objects tuple of (start_date, end_date) """ if start_date is None and end_date is None: return None, None if start_date: if isinstance(start_date, str): start_date = dateutil.parser.isoparse(start_date) if not start_date.tzinfo: start_date = start_date.replace(tzinfo=pytz.UTC) if end_date: if isinstance(end_date, str): end_date = dateutil.parser.isoparse(end_date) if not end_date.tzinfo: end_date = end_date.replace(tzinfo=pytz.UTC) # If only one date given, set the other one to equal. if not start_date: start_date = end_date if not end_date: end_date = start_date # TODO: replace this with a response + logging if not start_date <= end_date: raise StartDateEndDateError( "Error: 'start_date' must be smaller or equal to 'end_date'." ) return pd.Timestamp(start_date), pd.Timestamp(end_date)
def __init__( self, start_date: Union[None, str, Timestamp] = None, end_date: Union[None, str, Timestamp] = None, ) -> None: start_date = ( start_date if not start_date or isinstance(start_date, datetime) else parse_datetime(start_date) ) end_date = ( end_date if not end_date or isinstance(end_date, datetime) else parse_datetime(end_date) ) if start_date and end_date: if start_date > end_date: raise StartDateEndDateError("'start_date' has to be before 'end_date'") self.start_date = start_date self.end_date = end_date
def __init__( self, station_ids: Union[str, int, List[Union[int, str]]], parameter: Union[str, Parameter, List[Union[str, Parameter]]], time_resolution: Union[str, TimeResolution], period_type: Union[Union[None, str, PeriodType], List[Union[str, PeriodType]]] = None, start_date: Union[None, str, Timestamp] = None, end_date: Union[None, str, Timestamp] = None, prefer_local: bool = False, write_file: bool = False, folder: Union[str, Path] = DWD_FOLDER_MAIN, tidy_data: bool = True, humanize_column_names: bool = False, ) -> None: """ Class with mostly flexible arguments to define a request regarding DWD data. Special handling for period type. If start_date/end_date are given all period types are considered and merged together and the data is filtered for the given dates afterwards. :param station_ids: definition of stations by str, int or list of str/int, will be parsed to list of int :param parameter: Observation measure :param time_resolution: Frequency/granularity of measurement interval :param period_type: Recent or historical files (optional), if None and start_date and end_date None, all period types are used :param start_date: Replacement for period type to define exact time of requested data, if used, period type will be set to all period types (hist, recent, now) :param end_date: Replacement for period type to define exact time of requested data, if used, period type will be set to all period types (hist, recent, now) :param prefer_local: Definition if data should rather be taken from a local source :param write_file: Should data be written to a local file :param folder: Place where file lists (and station data) are stored :param tidy_data: Reshape DataFrame to a more tidy and row-based version of data :param humanize_column_names: Replace column names by more meaningful ones """ try: self.station_ids = pd.Series(station_ids).astype(int).tolist() except ValueError: raise ValueError( "List of station id's can not be parsed to integers.") self.parameter = (pd.Series(parameter).apply( parse_enumeration_from_template, args=(Parameter, )).tolist()) self.time_resolution = parse_enumeration_from_template( time_resolution, TimeResolution) # If any date is given, use all period types and filter, else if not period type # is given use all period types if start_date or end_date or not period_type: self.period_type = [*PeriodType] # Otherwise period types will be parsed else: # For the case that a period_type is given, parse the period type(s) self.period_type = (pd.Series(period_type).apply( parse_enumeration_from_template, args=(PeriodType, )).sort_values().tolist()) if start_date or end_date: # If only one date given, make the other one equal if not start_date: start_date = end_date if not end_date: end_date = start_date self.start_date = Timestamp(dateparser.parse(start_date)) self.end_date = Timestamp(dateparser.parse(end_date)) if not self.start_date <= self.end_date: raise StartDateEndDateError( "Error: 'start_date' must be smaller or equal to 'end_date'." ) else: self.start_date = start_date self.end_date = end_date self.prefer_local = prefer_local self.write_file = write_file self.folder = folder # If more then one parameter requested, automatically tidy data self.tidy_data = len(self.parameter) == 2 or tidy_data self.humanize_column_names = humanize_column_names
def __init__( self, station_ids: List[Union[int, str]], parameters: List[ Union[str, DWDObservationParameter, DWDObservationParameterSet] ], resolution: Union[str, DWDObservationResolution], periods: Optional[List[Union[str, DWDObservationPeriod]]] = None, start_date: Union[None, str, Timestamp, datetime] = None, end_date: Union[None, str, Timestamp, datetime] = None, storage: StorageAdapter = None, tidy_data: bool = True, humanize_column_names: bool = False, ) -> None: """ Class with mostly flexible arguments to define a request regarding DWD data. Special handling for period type. If start_date/end_date are given all period types are considered and merged together and the data is filtered for the given dates afterwards. :param station_ids: definition of stations by str, int or list of str/int, will be parsed to list of int :param parameters: Observation measure :param resolution: Frequency/granularity of measurement interval :param periods: Recent or historical files (optional), if None and start_date and end_date None, all period types are used :param start_date: Replacement for period type to define exact time of requested data, if used, period type will be set to all period types (hist, recent, now) :param end_date: Replacement for period type to define exact time of requested data, if used, period type will be set to all period types (hist, recent, now) :param storage: Storage adapter. :param tidy_data: Reshape DataFrame to a more tidy and row-based version of data :param humanize_column_names: Replace column names by more meaningful ones """ try: self.station_ids = pd.Series(station_ids).astype(int).tolist() except ValueError: raise ValueError("List of station id's can not be parsed to integers.") self.resolution = parse_enumeration_from_template( resolution, DWDObservationResolution ) self.parameters = [] for parameter in pd.Series(parameters): try: ( parameter, parameter_set, ) = create_parameter_to_parameter_set_combination( parameter, self.resolution ) self.parameters.append((parameter, parameter_set)) except InvalidParameter as e: log.info(str(e)) if not self.parameters: raise NoParametersFound(f"No parameters could be parsed from {parameters}") # If any date is given, use all period types and filter, else if not period type # is given use all period types if start_date or end_date or not periods: self.periods = [*DWDObservationPeriod] # Otherwise period types will be parsed else: # For the case that a period_type is given, parse the period type(s) self.periods = ( pd.Series(periods) .apply(parse_enumeration_from_template, args=(DWDObservationPeriod,)) .sort_values() .tolist() ) if start_date or end_date: # If only one date given, make the other one equal if not start_date: start_date = end_date if not end_date: end_date = start_date self.start_date = Timestamp(dateparser.parse(str(start_date))) self.end_date = Timestamp(dateparser.parse(str(end_date))) if not self.start_date <= self.end_date: raise StartDateEndDateError( "Error: 'start_date' must be smaller or equal to 'end_date'." ) else: self.start_date = start_date self.end_date = end_date self.storage = storage # If more then one parameter requested, automatically tidy data self.tidy_data = ( len(self.parameters) > 1 or any( [ not isinstance(parameter, DWDObservationParameterSet) for parameter, parameter_set in self.parameters ] ) or tidy_data ) self.humanize_column_names = humanize_column_names
def __init__( self, mosmix_type: DWDMosmixType, station_ids: List[str], parameters: Optional[List[Union[str, DWDForecastParameter]]] = None, start_date: Optional[Union[str, datetime, DWDForecastDate]] = DWDForecastDate.LATEST, end_date: Optional[Union[str, datetime, timedelta]] = None, tidy_data: bool = True, humanize_column_names: bool = False, ) -> None: """ Args: mosmix_type: type of forecast, either small (MOSMIX-S) or large (MOSMIX-L), as string or enumeration station_ids: station ids which are being queried from the MOSMIX foreacst parameters: optional parameters for which the forecasts are filtered start_date: start date of the MOSMIX forecast, can be used in combination with end date to query multiple MOSMIX forecasts, or instead used with enumeration to only query LATEST MOSMIX forecast end_date: end date of MOSMIX forecast, can be used to query multiple MOSMIX forecasts available on the server tidy_data: boolean if pandas.DataFrame shall be tidied and values put in rows humanize_column_names: boolean if parameters shall be renamed to human readable names """ if mosmix_type not in DWDMosmixType: raise ValueError( "period_type should be one of FORECAST_SHORT or FORECAST_LONG") if station_ids: station_ids = pd.Series(station_ids).astype(str).tolist() if parameters: parameters = (pd.Series(parameters).apply( parse_enumeration_from_template, args=(DWDForecastParameter, ), ).tolist()) if not start_date and not end_date: start_date = DWDForecastDate.LATEST elif not end_date: end_date = start_date elif not start_date: start_date = end_date if start_date is not DWDForecastDate.LATEST: start_date = pd.to_datetime(start_date, infer_datetime_format=True).floor("1H") end_date = pd.to_datetime(end_date, infer_datetime_format=True).floor("1H") if not start_date <= end_date: raise StartDateEndDateError( "end_date should be same or later then start_date") # Shift dates to 3, 9, 15, 21 hour format if mosmix_type == DWDMosmixType.LARGE: start_date = self.adjust_datetime(start_date) end_date = self.adjust_datetime(end_date) self.forecast_type = mosmix_type self.station_ids = station_ids self.parameters = parameters self.start_date = start_date self.end_date = end_date self.tidy_data = tidy_data self.humanize_column_names = humanize_column_names if mosmix_type == DWDMosmixType.SMALL: self.freq = "1H" # short forecasts released every hour else: self.freq = "6H" self.kml = KMLReader(station_ids=self.station_ids, parameters=self.parameters)
def __init__( self, station_ids: Union[str, int, List[Union[int, str]]], parameter: Union[str, Parameter, List[Union[str, Parameter]]], time_resolution: Union[str, TimeResolution], period_type: Union[ Union[None, str, PeriodType], List[Union[None, str, PeriodType]] ] = None, start_date: Union[None, str, Timestamp] = None, end_date: Union[None, str, Timestamp] = None, prefer_local: bool = False, write_file: bool = False, folder: Union[str, Path] = DWD_FOLDER_MAIN, tidy_data: bool = True, humanize_column_names: bool = False, create_new_file_index: bool = False, ) -> None: """ Class with mostly flexible arguments to define a request regarding DWD data. Special handling for period type. If start_date/end_date are given all period types are considered and merged together and the data is filtered for the given dates afterwards. Args: station_ids: definition of stations by str, int or list of str/int, will be parsed to list of int parameter: str or parameter enumeration defining the requested parameter time_resolution: str or time resolution enumeration defining the requested time resolution period_type: str or period type enumeration defining the requested period type start_date: replacement for period type to define exact time of requested data end_date: replacement for period type to define exact time of requested data prefer_local: definition if data should rather be taken from a local source write_file: should data be written to a local file folder: place where file lists (and station data) are stored tidy_data: reshape DataFrame to a more tidy, row based version of data humanize_column_names: replace column names by more meaningful ones create_new_file_index: definition if the file index should be recreated """ if not (period_type or start_date or end_date): raise ValueError( "Define either a 'time_resolution' or one of or both 'start_date' and " "'end_date' and leave 'time_resolution' empty!" ) try: self.station_ids = [ int(station_id) for station_id in cast_to_list(station_ids) ] except ValueError: raise ValueError("List of station id's can not be parsed to integers.") self.parameter = [] for p in cast_to_list(parameter): self.parameter.append(parse_enumeration_from_template(p, Parameter)) self.time_resolution = parse_enumeration_from_template( time_resolution, TimeResolution ) # start date and end date required for collect_data in any case self.start_date = None self.end_date = None if period_type: # For the case that a period_type is given, parse the period type(s) self.period_type = [] for pt in cast_to_list(period_type): if pt is None: self.period_type.append(None) else: self.period_type.append( parse_enumeration_from_template(pt, PeriodType) ) # Additional sorting required for self.period_type to ensure that for # multiple periods the data is first sourced from historical self.period_type = sorted(self.period_type) else: # working with ranges of data means expecting data to be laying between # periods, thus including all periods self.period_type = [ PeriodType.HISTORICAL, PeriodType.RECENT, PeriodType.NOW, ] # If only one date given, make the other one equal if not start_date: start_date = end_date if not end_date: end_date = start_date self.start_date = Timestamp(dateparser.parse(start_date)) self.end_date = Timestamp(dateparser.parse(end_date)) if not self.start_date <= self.end_date: raise StartDateEndDateError( "Error: 'start_date' must be smaller or equal to 'end_date'." ) self.prefer_local = prefer_local self.write_file = write_file self.folder = folder # If more then one parameter requested, automatically tidy data self.tidy_data = len(self.parameter) == 2 or tidy_data self.humanize_column_names = humanize_column_names self.create_new_file_index = create_new_file_index