Exemplos de check_parameters em Python, exemplos de python_dwd.additionals.functions.check_parameters em Python

Exemplo n.º 1

0

Exibir arquivo

def create_file_list_for_dwd_server(statid: List[int],
                                    parameter: Parameter,
                                    time_resolution: TimeResolution,
                                    period_type: PeriodType,
                                    folder: str = MAIN_FOLDER,
                                    create_new_filelist=False) -> List[str]:
    """
    Function for selecting datafiles (links to archives) for given
    statid, parameter, time_resolution and period_type under consideration of a
    created list of files that are
    available online.

    Args:
        statid: id for the weather station to ask for data
        parameter: observation measure
        time_resolution: frequency/granularity of measurement interval
        period_type: recent or historical files
        folder:
        create_new_filelist: boolean for checking existing file list or not

    Returns:
        List of path's to file

    """
    # Check type of function parameters
    assert isinstance(statid, list)
    assert isinstance(parameter, Parameter)
    assert isinstance(time_resolution, TimeResolution)
    assert isinstance(period_type, PeriodType)
    assert isinstance(folder, str)
    assert isinstance(create_new_filelist, bool)

    # Check for the combination of requested parameters
    check_parameters(parameter=parameter,
                     time_resolution=time_resolution,
                     period_type=period_type)

    folder = correct_folder_path(folder)

    # Create name of fileslistfile
    filelist_local = f'{FILELIST_NAME}_{parameter.value}_' \
                     f'{time_resolution.value}_{period_type.value}'

    # Create filepath to filelist in folder
    filelist_local_path = Path(folder, SUB_FOLDER_METADATA, filelist_local)

    filelist_local_path = f"{filelist_local_path}{DATA_FORMAT}"

    if create_new_filelist or not Path(filelist_local_path).is_file():
        # If there was an error with reading in the fileslist get a new
        # fileslist
        create_fileindex(parameter=parameter,
                         time_resolution=time_resolution,
                         period_type=period_type,
                         folder=folder)

    filelist = pd.read_csv(filelist_local_path)

    return filelist.loc[filelist[STATION_ID_NAME].isin(statid),
                        FILENAME_NAME].tolist()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: geo_location.py Projeto: libPhipp/python_dwd

def get_nearest_station(latitudes: Union[List[float], np.array],
                        longitudes: Union[List[float], np.array],
                        parameter: Union[Parameter, str],
                        time_resolution: Union[TimeResolution, str],
                        period_type: Union[PeriodType, str],
                        num_stations_nearby: int = 1) -> \
        Tuple[List[int], List[float]]:
    """
    Provides a list of weather station ids for the requested data
    Args:
        latitudes: latitudes of locations to search for nearest
            weather station
        longitudes: longitudes of locations to search for nearest
            weather station
        parameter: observation measure
        time_resolution: frequency/granularity of measurement interval
        period_type: recent or historical files
        num_stations_nearby: Number of stations that should be nearby 

    Returns:
        list of stations ids for the given locations/coordinate pairs and
        a list of distances in kilometer to the weather station

    """
    parameter = Parameter(parameter)
    time_resolution = TimeResolution(time_resolution)
    period_type = PeriodType(period_type)

    if not isinstance(latitudes, list):
        latitudes = np.array(latitudes)
    if not isinstance(longitudes, list):
        latitudes = np.array(longitudes)

    check_parameters(parameter, time_resolution, period_type)

    coords = Coordinates(latitudes, longitudes)

    metadata = metadata_for_dwd_data(parameter, time_resolution, period_type)

    distances, indices_nearest_neighbours = _derive_nearest_neighbours(
        metadata.LAT.values, metadata.LON.values, coords, num_stations_nearby)
    if np.max(indices_nearest_neighbours.shape) > 1:
        indices_nearest_neighbours = indices_nearest_neighbours[0]
    return metadata.loc[indices_nearest_neighbours, 'STATION_ID'].tolist(),\
        (distances * KM_EARTH_RADIUS).tolist()

Exemplo n.º 3

0

Exibir arquivo

def get_nearest_station(latitudes: Union[List[float], np.array],
                        longitudes: Union[List[float], np.array],
                        parameter: Parameter,
                        time_resolution: TimeResolution,
                        period_type: PeriodType) -> \
        Tuple[List[int], List[float]]:
    """
    Provides a list of weather station ids for the requested data
    Args:
        latitudes: latitudes of locations to search for nearest
            weather station
        longitudes: longitudes of locations to search for nearest
            weather station
        parameter: observation measure
        time_resolution: frequency/granularity of measurement interval
        period_type: recent or historical files

    Returns:
        list of stations ids for the given locations/coordinate pairs and
        a list of distances in kilometer to the weather station

    """
    if not isinstance(latitudes, list):
        latitudes = np.array(latitudes)
    if not isinstance(longitudes, list):
        latitudes = np.array(longitudes)

    check_parameters(parameter, time_resolution, period_type)

    coords = Coordinates(latitudes, longitudes)

    metadata = metadata_for_dwd_data(parameter,
                                     time_resolution,
                                     period_type)

    distances, indices_nearest_neighbours = derive_nearest_neighbours(
        metadata.LAT.values,
        metadata.LON.values,
        coords)

    return metadata.loc[indices_nearest_neighbours, 'STATION_ID'].tolist(),\
        (distances * KM_EARTH_RADIUS).tolist()

Exemplo n.º 4

0

Exibir arquivo

    def __init__(self,
                 station_id: Union[str, int, List[Union[int, str]]],
                 parameter: Union[str, Parameter],
                 time_resolution: Union[str, TimeResolution],
                 period_type: Union[None, str, list, PeriodType] = None,
                 start_date: Union[None, str, Timestamp] = None,
                 end_date: Union[None, str, Timestamp] = None) -> None:

        if not (period_type or (start_date and end_date)):
            raise ValueError("Define either a 'time_resolution' or both the 'start_date' and 'end_date' and "
                             "leave the other one empty!")

        if not all(isinstance(x, int) for x in station_id):
            raise ValueError("List of station id's contains none integer values or is at least not given as a list")
        
        self.station_id = [int(s) for s in cast_to_list(station_id)]

        self.parameter = parameter if isinstance(parameter, Parameter) \
            else _parse_parameter_from_value(parameter, PARAMETER_WORDLIST_MAPPING)

        self.time_resolution = time_resolution if isinstance(time_resolution, TimeResolution) \
            else _parse_parameter_from_value(time_resolution, TIMERESOLUTION_WORDLIST_MAPPING)

        self.period_type = cast_to_list(period_type) if isinstance(period_type, (PeriodType, type(None))) \
            else [_parse_parameter_from_value(period_type, PERIODTYPE_WORDLIST_MAPPING)
                  for period_type in cast_to_list(period_type)]

        self.start_date = parse_date(start_date)
        self.end_date = parse_date(end_date)

        if self.start_date:
            # working with ranges of data means expecting data to be laying between periods, thus including all
            self.period_type = [PeriodType.HISTORICAL, PeriodType.RECENT, PeriodType.NOW]

            if not self.start_date <= self.end_date:
                raise StartDateEndDateError

        for period_type in self.period_type.copy():
            if not check_parameters(parameter=self.parameter,
                                    time_resolution=self.time_resolution,
                                    period_type=period_type):
                print(f"Combination of: parameter {self.parameter.value}, "
                      f"time_resolution {self.time_resolution.value}, "
                      f"period_type {period_type} not available and removed.")
                self.period_type.remove(period_type)

        # Use the clean up of self.period_type to identify if there's any data with those parameters
        if not self.period_type:
            raise ValueError("Error: no combination for parameter, time_resolution and period_type could be found.")

Exemplo n.º 5

0

Exibir arquivo

Arquivo: metadata_dwd.py Projeto: pacomunuera/python_dwd

def metadata_for_dwd_data(parameter: Parameter,
                          time_resolution: TimeResolution,
                          period_type: PeriodType,
                          folder: str = MAIN_FOLDER,
                          write_file: bool = True,
                          create_new_filelist: bool = False):
    """
    A main function to retrieve metadata for a set of parameters that creates a
        corresponding csv.

    STATE information is added to metadata for cases where there's no such named
    column (e.g. STATE) in the dataframe.
    For this purpose we use daily precipitation data. That has two reasons:
     - daily precipitation data has a STATE information combined with a city
     - daily precipitation data is the most common data served by the DWD


    Args:
        parameter: observation measure
        time_resolution: frequency/granularity of measurement interval
        period_type: recent or historical files
        folder: local file system folder where files should be stored
        write_file: writes the meta data file to the local file system
        create_new_filelist: if true: a new file_list for metadata will
         be created

    Returns:

    """
    assert isinstance(parameter, Parameter)
    assert isinstance(time_resolution, TimeResolution)
    assert isinstance(period_type, PeriodType)
    assert isinstance(folder, str)
    assert isinstance(write_file, bool)
    assert isinstance(create_new_filelist, bool)

    check_parameters(parameter=parameter,
                     time_resolution=time_resolution,
                     period_type=period_type)

    file_path = create_metainfo_fpath(folder, parameter, period_type,
                                      time_resolution)

    if check_file_exist(file_path) and not create_new_filelist:
        metainfo = pd.read_csv(filepath_or_buffer=file_path)
        return metainfo

    if time_resolution != TimeResolution.MINUTE_1:
        metainfo = create_metaindex(parameter=parameter,
                                    time_resolution=time_resolution,
                                    period_type=period_type)

    else:
        metainfo = metaindex_for_1minute_data(parameter=parameter,
                                              time_resolution=time_resolution,
                                              folder=folder)

    if STATE_NAME not in metainfo.columns:
        mdp = metadata_for_dwd_data(Parameter.PRECIPITATION_MORE,
                                    TimeResolution.DAILY,
                                    PeriodType.HISTORICAL,
                                    folder=folder,
                                    write_file=False,
                                    create_new_filelist=False)

        metainfo = metainfo.merge(mdp.loc[:, [STATIONNAME_NAME, STATE_NAME]],
                                  on=STATIONNAME_NAME).reset_index(drop=True)

    metainfo = add_filepresence(metainfo=metainfo,
                                parameter=parameter,
                                time_resolution=time_resolution,
                                period_type=period_type,
                                folder=folder,
                                create_new_filelist=create_new_filelist)

    if write_file and not check_file_exist(file_path) and not \
            create_new_filelist:
        remove_old_file(file_type=METADATA_NAME,
                        file_postfix=DATA_FORMAT,
                        parameter=parameter,
                        time_resolution=time_resolution,
                        period_type=period_type,
                        folder=folder,
                        subfolder=SUB_FOLDER_METADATA)

        metainfo.to_csv(path_or_buf=file_path, header=True, index=False)

    return metainfo

Exemplo n.º 6

0

Exibir arquivo

Arquivo: test_functions.py Projeto: pacomunuera/python_dwd

def test_check_parameters():
    assert check_parameters(Parameter.PRECIPITATION, TimeResolution.MINUTE_10, PeriodType.HISTORICAL) is None

Exemplo n.º 7

0

Exibir arquivo

def create_file_list_for_dwd_server(station_ids: List[int],
                                    parameter: Parameter,
                                    time_resolution: TimeResolution,
                                    period_type: PeriodType,
                                    folder: str = DWD_FOLDER_MAIN,
                                    create_new_filelist=False) -> pd.DataFrame:
    """
    Function for selecting datafiles (links to archives) for given
    station_ids, parameter, time_resolution and period_type under consideration of a
    created list of files that are
    available online.

    Args:
        station_ids: id(s) for the weather station to ask for data
        parameter: observation measure
        time_resolution: frequency/granularity of measurement interval
        period_type: recent or historical files
        folder:
        create_new_filelist: boolean for checking existing file list or not

    Returns:
        List of path's to file

    """
    # Check type of function parameters
    assert isinstance(station_ids, list)
    station_ids = [int(statid) for statid in station_ids]
    assert isinstance(parameter, Parameter)
    assert isinstance(time_resolution, TimeResolution)
    assert isinstance(period_type, PeriodType)
    assert isinstance(folder, str)
    assert isinstance(create_new_filelist, bool)

    # Check for the combination of requested parameters
    check_parameters(parameter=parameter,
                     time_resolution=time_resolution,
                     period_type=period_type)

    folder = correct_folder_path(folder)

    # Create name of fileslistfile
    filelist_local = f'{FILELIST_NAME}_{parameter.value}_' \
                     f'{time_resolution.value}_{period_type.value}'

    # Create filepath to filelist in folder
    filelist_local_path = Path(folder,
                               DWD_FOLDER_METADATA,
                               filelist_local)

    filelist_local_path = f"{filelist_local_path}{DATA_FORMAT}"

    if create_new_filelist or not Path(filelist_local_path).is_file():
        create_fileindex(parameter=parameter,
                         time_resolution=time_resolution,
                         period_type=period_type,
                         folder=folder)

    filelist = pd.read_csv(filepath_or_buffer=filelist_local_path,
                           sep=",",
                           dtype={DWDColumns.FILEID.value: int,
                                  DWDColumns.STATION_ID.value: int,
                                  DWDColumns.FILENAME.value: str})

    return filelist.loc[filelist[DWDColumns.STATION_ID.value].isin(station_ids), :]

Exemplo n.º 8

0

Exibir arquivo

Arquivo: dwd_station_request.py Projeto: libPhipp/python_dwd

    def __init__(self,
                 station_ids: Union[str, int, List[Union[int, str]]],
                 parameter: Union[str, Parameter],
                 time_resolution: Union[str, TimeResolution],
                 period_type: Union[None, str, list, PeriodType] = None,
                 start_date: Union[None, str, Timestamp] = None,
                 end_date: Union[None, str, Timestamp] = None,
                 humanize_column_names: bool = False) -> None:

        if not (period_type or (start_date and end_date)):
            raise ValueError("Define either a 'time_resolution' or both the 'start_date' and 'end_date' and "
                             "leave the other one empty!")

        try:
            self.station_ids = [int(station_id) for station_id in cast_to_list(station_ids)]
        except ValueError:
            raise ValueError("List of station id's can not be parsed to integers.")

        try:
            self.parameter = Parameter(parameter)
        except ValueError:
            self.parameter = _parse_parameter_from_value(
                parameter, PARAMETER_WORDLIST_MAPPING)

        try:
            self.time_resolution = TimeResolution(time_resolution)
        except ValueError:
            self.time_resolution = _parse_parameter_from_value(
                time_resolution, TIMERESOLUTION_WORDLIST_MAPPING)

        self.period_type = []
        for pt in cast_to_list(period_type):
            if pt is None:
                self.period_type.append(None)
                continue

            try:
                self.period_type.append(PeriodType(pt))
            except ValueError:
                self.period_type.append(
                    _parse_parameter_from_value(period_type, PERIODTYPE_WORDLIST_MAPPING))

        # Additional sorting required for self.period_type to ensure that for multiple
        # periods the data is first sourced from historical
        self.period_type = sorted(self.period_type)

        self.start_date = parse_date(start_date)
        self.end_date = parse_date(end_date)

        if self.start_date:
            # working with ranges of data means expecting data to be laying between periods, thus including all
            self.period_type = [PeriodType.HISTORICAL, PeriodType.RECENT, PeriodType.NOW]

            if not self.start_date <= self.end_date:
                raise StartDateEndDateError("Error: 'start_date' must be smaller or equal to 'end_date'.")

        for period_type in self.period_type.copy():
            if not check_parameters(parameter=self.parameter,
                                    time_resolution=self.time_resolution,
                                    period_type=period_type):
                log.info(f"Combination of: parameter {self.parameter.value}, "
                         f"time_resolution {self.time_resolution.value}, "
                         f"period_type {period_type} not available and removed.")
                self.period_type.remove(period_type)

        # Use the clean up of self.period_type to identify if there's any data with those parameters
        if not self.period_type:
            raise ValueError("No combination for parameter, time_resolution "
                             "and period_type could be found.")

        self.humanize_column_names = humanize_column_names

Exemplo n.º 9

0

Exibir arquivo

def metadata_for_dwd_data(parameter: Union[Parameter, str],
                          time_resolution: Union[TimeResolution, str],
                          period_type: Union[PeriodType, str],
                          folder: str = DWD_FOLDER_MAIN,
                          write_file: bool = True,
                          create_new_file_index: bool = False) -> pd.DataFrame:
    """
    A main function to retrieve metadata for a set of parameters that creates a
        corresponding csv.
    STATE information is added to metadata for cases where there's no such named
    column (e.g. STATE) in the pandas.DataFrame.
    For this purpose we use daily precipitation data. That has two reasons:
     - daily precipitation data has a STATE information combined with a city
     - daily precipitation data is the most common data served by the DWD
    Args:
        parameter: observation measure
        time_resolution: frequency/granularity of measurement interval
        period_type: recent or historical files
        folder: local file system folder where files should be stored
        write_file: writes the meta data file to the local file system
        create_new_file_index: if true: a new file_list for metadata will
         be created
    Returns:
        pandas.DataFrame with metadata for selected parameters
    """
    if create_new_file_index:
        reset_file_index_cache()

    parameter = Parameter(parameter)
    time_resolution = TimeResolution(time_resolution)
    period_type = PeriodType(period_type)

    check_parameters(parameter=parameter,
                     time_resolution=time_resolution,
                     period_type=period_type)

    file_path = create_metainfo_fpath(folder,
                                      parameter,
                                      period_type,
                                      time_resolution)

    if time_resolution == TimeResolution.MINUTE_1:
        metainfo = metaindex_for_1minute_data(parameter=parameter,
                                              time_resolution=time_resolution)
    else:
        metainfo = create_metaindex(parameter=parameter,
                                    time_resolution=time_resolution,
                                    period_type=period_type)

    if all(pd.isnull(metainfo[DWDMetaColumns.STATE.value])):
        # @todo avoid calling function in function -> we have to build a function around to manage missing data
        mdp = metadata_for_dwd_data(Parameter.PRECIPITATION_MORE,
                                    TimeResolution.DAILY,
                                    PeriodType.HISTORICAL,
                                    create_new_file_index=False)

        stateinfo = pd.merge(metainfo[DWDMetaColumns.STATION_ID],
                             mdp.loc[:, [DWDMetaColumns.STATION_ID.value, DWDMetaColumns.STATE.value]],
                             how="left")

        metainfo[DWDMetaColumns.STATE.value] = stateinfo[DWDMetaColumns.STATE.value]

    metainfo = add_filepresence(metainfo=metainfo,
                                parameter=parameter,
                                time_resolution=time_resolution,
                                period_type=period_type)

    if write_file and not file_path.is_file() and create_new_file_index:
        remove_old_file(file_type=METADATA_NAME,
                        file_postfix=DATA_FORMAT,
                        parameter=parameter,
                        time_resolution=time_resolution,
                        period_type=period_type,
                        folder=folder,
                        subfolder=DWD_FOLDER_METADATA)

        metainfo.to_csv(path_or_buf=file_path,
                        header=True,
                        index=False)

    return metainfo

Exemplo n.º 10

0

Exibir arquivo

Arquivo: metadata_dwd.py Projeto: ikamensh/python_dwd

def metadata_for_dwd_data(parameter: Parameter,
                          time_resolution: TimeResolution,
                          period_type: PeriodType,
                          folder: str = DWD_FOLDER_MAIN,
                          write_file: bool = True,
                          create_new_filelist: bool = False) -> pd.DataFrame:
    """
    A main function to retrieve metadata for a set of parameters that creates a
        corresponding csv.

    STATE information is added to metadata for cases where there's no such named
    column (e.g. STATE) in the dataframe.
    For this purpose we use daily precipitation data. That has two reasons:
     - daily precipitation data has a STATE information combined with a city
     - daily precipitation data is the most common data served by the DWD


    Args:
        parameter: observation measure
        time_resolution: frequency/granularity of measurement interval
        period_type: recent or historical files
        folder: local file system folder where files should be stored
        write_file: writes the meta data file to the local file system
        create_new_filelist: if true: a new file_list for metadata will
         be created

    Returns:

    """

    if not isinstance(parameter, Parameter):
        raise TypeError("Error: 'parameter' is not of type Parameter(Enum).")
    if not isinstance(time_resolution, TimeResolution):
        raise TypeError(
            "Error: 'time_resolution' is not of type TimeResolution(Enum).")
    if not isinstance(period_type, PeriodType):
        raise TypeError(
            "Error: 'period_type' is not of type PeriodType(Enum).")
    if not isinstance(folder, str):
        raise TypeError("Error: 'folder' is not a string.")
    if not isinstance(write_file, bool):
        raise TypeError("Error: 'write_file' is not a bool.")
    if not isinstance(create_new_filelist, bool):
        raise TypeError("Error: 'create_new_filelist' is not a bool.")

    check_parameters(parameter=parameter,
                     time_resolution=time_resolution,
                     period_type=period_type)

    file_path = create_metainfo_fpath(folder, parameter, period_type,
                                      time_resolution)

    if check_file_exist(file_path) and not create_new_filelist:
        metainfo = pd.read_csv(filepath_or_buffer=file_path)
        return metainfo

    if time_resolution == TimeResolution.MINUTE_1:
        metainfo = metaindex_for_1minute_data(parameter=parameter,
                                              time_resolution=time_resolution)
    else:
        metainfo = create_metaindex(parameter=parameter,
                                    time_resolution=time_resolution,
                                    period_type=period_type)

    if all(pd.isnull(metainfo[DWDColumns.STATE.value])):
        # @todo avoid calling function in function -> we have to build a function around to manage missing data
        mdp = metadata_for_dwd_data(Parameter.PRECIPITATION_MORE,
                                    TimeResolution.DAILY,
                                    PeriodType.HISTORICAL,
                                    folder=folder,
                                    write_file=False,
                                    create_new_filelist=False)

        stateinfo = pd.merge(
            metainfo[DWDColumns.STATION_ID],
            mdp.loc[:, [DWDColumns.STATION_ID.value, DWDColumns.STATE.value]],
            how="left")

        metainfo[DWDColumns.STATE.value] = stateinfo[DWDColumns.STATE.value]

        # for station, state in mdp.loc[:, [DWDColumns.STATIONNAME.value, DWDColumns.STATE.value]]:
        #     metainfo.loc[metainfo[DWDColumns.STATIONNAME.value] == station, DWDColumns.STATE.value] = state

    metainfo = add_filepresence(metainfo=metainfo,
                                parameter=parameter,
                                time_resolution=time_resolution,
                                period_type=period_type,
                                folder=folder,
                                create_new_filelist=create_new_filelist)

    if write_file and not check_file_exist(file_path) and not \
            create_new_filelist:
        remove_old_file(file_type=METADATA_NAME,
                        file_postfix=DATA_FORMAT,
                        parameter=parameter,
                        time_resolution=time_resolution,
                        period_type=period_type,
                        folder=folder,
                        subfolder=DWD_FOLDER_METADATA)

        metainfo.to_csv(path_or_buf=file_path, header=True, index=False)

    return metainfo

Exemplo n.º 11

0

Exibir arquivo

Arquivo: parse_data_from_files.py Projeto: pacomunuera/python_dwd

def parse_dwd_data(local_files: List[Path],
                   keep_zip: bool = False) -> pd.DataFrame:
    """
    This function is used to read the stationdata for which the local zip link is
    provided by the 'download_dwd' function. It checks the zipfile from the link
    for its parameters, opens every zipfile in the list of files and reads in the
    containing product file, and if there's an error or it's wanted the zipfile is
    removed afterwards.

    Args:
        local_files: list of local stored files that should be read
        keep_zip: If true: The raw zip file will not be deleted, Default is: False.

    Returns:
        DataFrame with requested data

    """
    # Test for types of input parameters
    assert isinstance(local_files, list)
    assert isinstance(keep_zip, bool)

    # Check for files and if empty return empty DataFrame
    if not local_files:
        return pd.DataFrame()

    first_filename = str(local_files[0]).split("/")[-1]

    parameter, time_resolution, period_type = determine_parameters(
        first_filename)
    check_parameters(parameter, time_resolution, period_type)

    data = []

    for file in local_files:
        # Try doing everything without know of the existance of file
        try:
            with ZipFile(file) as zip_file:
                # List of fileitems in zipfile
                zip_file_files = zip_file.infolist()

                # List of filenames of fileitems
                zip_file_files = [
                    zip_file_file.filename for zip_file_file in zip_file_files
                ]

                # Filter file with 'produkt' in filename
                file_data = [
                    zip_file_file for zip_file_file in zip_file_files if all([
                        matchstring in zip_file_file.lower()
                        for matchstring in STATIONDATA_MATCHSTRINGS
                    ])
                ]

                # List to filename
                file_data = file_data.pop(0)

                with zip_file.open(file_data) as file_opened:
                    # Read data into a dataframe
                    data_file = pd.read_csv(filepath_or_buffer=file_opened,
                                            sep=";",
                                            na_values="-999")

            # Append dataframe to list of all data read
            data.append(data_file)

        except Exception:
            # In case something goes wrong there's a print
            print(f'''The zipfile
                  {file}
                  couldn't be opened/read and will be removed.''')
            # Data will be removed
            Path(file).unlink()

        finally:
            # If file shouldn't be kept remove it
            if not keep_zip:
                Path(file).unlink()

    # Put together list of files to a DataFrame
    data = pd.concat(data)

    # Extract column names
    column_names = data.columns

    # Strip empty chars from before and after column names
    column_names = [
        column_name.upper().strip() for column_name in column_names
    ]

    # Replace certain names by conform names
    column_names = [
        GERMAN_TO_ENGLISH_COLUMNS_MAPPING.get(column_name, column_name)
        for column_name in column_names
    ]

    # Reassign column names to DataFrame
    data.columns = column_names

    # String to date
    data[DATE_NAME] = data[DATE_NAME].apply(
        lambda date: dt.strptime(str(date), "%Y%m%d"))

    return data