Пример #1
0
def vaccinated_to_df(vaccinated: List[Vaccinated]) -> pd.DataFrame:
    df = pd.DataFrame(vaccinated)
    # Move field to top level.
    df["data_date"] = df["source"].apply(lambda s: s["data_date"])
    df["real_date"] = df["source"].apply(lambda s: s["real_date"])
    df["dose"] = df["slice"].apply(lambda s: s["dose"].csv_str())
    df["group"] = df["slice"].apply(lambda s: Group(**s["group"]).csv_str())
    df["location"] = df["slice"].apply(
        lambda s: Location(**s["location"]).csv_str())
    df = df.drop("source", axis=1)
    df = df.drop("slice", axis=1)
    df["vaccinated"] = df["vaccinated"].astype(int)
    return df
Пример #2
0
def __parse_df_weekly(source: Source,
                      df: pd.DataFrame) -> Iterable[Vaccinated]:
    def is_start(cell) -> bool:
        return type(cell) == str and (cell.lower() == "region of residence"
                                      or cell.lower()
                                      == "nhs region of residence")

    def is_end(cell) -> bool:
        return type(cell) == str and cell.lower() == "data quality notes:"

    def is_nan(cell) -> bool:
        return type(cell) == float and math.isnan(cell)

    a = df.to_numpy()

    # Trim.
    (start_y, ), (start_x, ) = np.where(np.vectorize(is_start)(a))
    (end_y, ), (_, ) = np.where(np.vectorize(is_end)(a))
    a = a[start_y:end_y, start_x:]

    # Remove NaNs.
    is_nans = np.vectorize(is_nan)(a)
    a = a[:, ~np.all(is_nans, axis=0)]
    a = a[~np.all(is_nans, axis=1), :]

    # Fill in dose row.
    filled_in_doses = []
    current_dose = None
    for population in a[0, 1:]:
        if not is_nan(population):
            current_dose = population
        filled_in_doses.append(current_dose)
    a[0, 1:] = filled_in_doses

    vaccinated_by_slice: DefaultDict[Slice, int] = defaultdict(int)

    for y in range(2, a.shape[0]):
        for x in range(1, a.shape[1]):
            dose = a[0, x]
            group = a[1, x]
            location = a[y, 0]
            vaccinated = a[y, x]

            ignore = [
                # Ignore population estimates.
                "population estimates",
                # Ignore precalculated %
                "% who have had at least 1 dose",
                "% who have had both doses",
                # Ignore dose summaries.
                "total 1st doses",
                "total 2nd doses",
            ]

            if any(map(lambda d: d in dose.lower(), ignore)):
                continue
            if type(group) == str and "percent of all" in group.lower():
                # Ignore percentage reports.
                continue

            is_dose_and_group_all = "cumulative total doses to date" in dose.lower(
            )

            if dose in ["1st dose", "1st dose5"]:
                dose = Dose.DOSE_1
            elif dose in ["2nd dose", "2nd dose5"]:
                dose = Dose.DOSE_2
            elif is_dose_and_group_all:
                dose = Dose.ALL
            else:
                raise AssertionError(
                    f"Unexpected dose {dose} in source {source}")

            if is_dose_and_group_all:
                group = ALL_AGES
            else:
                group = Group.from_csv_str(group)

            if re.match(r"^Total\d?$", location):
                location = ALL_LOCATIONS
            else:
                location = Location(location)

            vaccinated_by_slice[Slice(dose, group, location)] += vaccinated

    for slice_, vaccinated in vaccinated_by_slice.items():
        yield Vaccinated(source, vaccinated, slice_)
Пример #3
0
def __get_population_by_group() -> Dict[Group, int]:
    disjoint_ages = {
        Group(0, 15): 10_816_679,
        Group(16, 29): 10_910_865,
        Group(30, 34): 4_709_736,
        Group(35, 39): 4_483_905,
        Group(40, 44): 3_414_297,
        Group(45, 49): 3_715_812,
        Group(50, 54): 3_907_461,
        Group(55, 59): 3_670_651,
        Group(60, 64): 3_111_835,
        Group(65, 69): 2_796_740,
        Group(70, 74): 2_779_326,
        Group(75, 79): 1_940_686,
        Group(80, None): 2_836_964,
    }

    cumulative_ages = dict()
    cumulative_sum = 0
    for group in sorted(disjoint_ages.keys(), key=lambda g: g.age_lower):
        cumulative_sum += disjoint_ages[group]
        if group.age_lower == 0:
            continue
        cumulative_ages[Group(0, group.age_upper)] = cumulative_sum

    return {k.csv_str(): v for k, v in {**disjoint_ages, **cumulative_ages}.items()}