Exemplo n.º 1
0
    def adapt(df, config):
        """
        Adapt the line listing data to the desired dataframe format.
        """
        # Extract the yaml config settings
        date_low, date_high = get_date_low_high(config)
        settings = config["CasesData"]
        pillars = settings["pillars"]
        measure = settings["measure"].casefold()

        # this key might not be stored in the config file
        # if it's not, we need to grab it using AreaCodeData
        if "lad19cds" not in config:
            _df = AreaCodeData.process(config)
        areacodes = config["lad19cds"]

        if settings["input"] == "processed":
            return df

        if settings["format"].lower() == "phe":
            df = CasesData.adapt_phe(
                df,
                date_low,
                date_high,
                pillars,
                measure,
                areacodes,
            )
        elif (settings["input"] == "url") and (settings["format"] == "json"):
            df = CasesData.adapt_gov_api(df, date_low, date_high, pillars,
                                         measure, areacodes)

        return df
Exemplo n.º 2
0
    def check(df, config):
        """
        Check that data format seems correct
        """
        nareas = len(config["lad19cds"])
        date_low, date_high = get_date_low_high(config)
        dates = pd.date_range(start=date_low, end=date_high, closed="left")
        days = len(dates)
        entries = days * nareas

        if not (((dims[1] >= 3) & (dims[0] == entries))
                | ((dims[1] == days) & (dims[0] == nareas))):
            print(df)
            raise ValueError("Incorrect CasesData dimensions")

        if "date" in df:
            _df = df
        elif df.columns.name == "date":
            _df = pd.DataFrame({"date": df.columns})
        else:
            raise ValueError("Cannot determine date axis")

        check_date_bounds(df, date_low, date_high)
        check_date_format(df)
        check_lad19cd_format(df)
        return True
Exemplo n.º 3
0
 def process_lancs(config):
     global_settings = config["Global"]
     settings = config["TierData"]
     if "lad19cds" not in config:
         _df = AreaCodeData.process(config)
     areacodes = config["lad19cds"]
     date_low, date_high = get_date_low_high(config)
     if config["TierData"]["format"].lower() == "lancs_raw":
         return LancsData.read_tier_restriction_data(
             settings["address"], areacodes, date_low, date_high)
     elif config["TierData"]["format"].lower() == "lancs_tidy":
         return LancsData.read_challen_tier_restriction(
             settings["address"], date_low, date_high, areacodes)
     elif config["TierData"]["format"].lower() == "api":
         raise NotImplementedError(f"Tier data api not implemented")
     else:
         raise NotImplementedError(
             f'Format type {config["TierData"]["format"]} not implemented')
Exemplo n.º 4
0
    def adapt(df, config):
        """
        Adapt the dataframe to the desired format.
        """
        settings = config["TierData"]

        # TODO this key might not be stored in the config file
        # if it's not, we need to grab it using AreaCodeData
        if "lad19cds" not in config:
            areacodes = AreaCodeData.process(config)["lad19cd"]
        else:
            areacodes = config["lad19cds"]

        # Below is assuming inference_period dates
        date_low, date_high = get_date_low_high(config)

        if settings["format"].lower() == "tidy":
            xarray = TierData.adapt_xarray(df, date_low, date_high, areacodes,
                                           settings)
        elif settings["format"].lower() == "api":
            xarray = TierData.adapt_api_xarray(df, date_low, date_high,
                                               areacodes, settings)

        return xarray