Exemplo n.º 1
0
def load_mcmc_tables(calib_dirpath: str):
    mcmc_tables = []
    for db_path in _find_db_paths(calib_dirpath):
        db = Database(db_path)
        mcmc_tables.append(db.query("mcmc_run"))

    return mcmc_tables
Exemplo n.º 2
0
def load_derived_output_tables(calib_dirpath: str):
    derived_output_tables = []
    for db_path in _find_db_paths(calib_dirpath):
        db = Database(db_path)
        derived_output_tables.append(db.query("derived_outputs"))

    return derived_output_tables
Exemplo n.º 3
0
def plot_timeseries_with_uncertainty_for_powerbi(
    region_name: str, powerbi_db_path: str, output_dir: str
):
    """
    works on powerbi version
    Assumes a COVID model.
    TODO: Unify PowerBI and local version
    """
    os.makedirs(output_dir, exist_ok=True)
    plot_config = load_plot_config(region_name)
    db = Database(powerbi_db_path)
    uncertainty_df = db.query("uncertainty")
    outputs = uncertainty_df["type"].unique().tolist()
    quantile_vals = uncertainty_df["quantile"].unique().tolist()
    for output_name in outputs:
        this_output_dir = os.path.join(output_dir, output_name)
        os.makedirs(this_output_dir, exist_ok=True)
        plotter = FilePlotter(this_output_dir, plot_config["translations"])
        mask = uncertainty_df["type"] == output_name
        output_df = uncertainty_df[mask]
        scenarios = output_df.Scenario.unique().tolist()
        for scenario in scenarios:
            mask = output_df["Scenario"] == scenario
            scenario_df = output_df[mask]
            quantiles = {}
            for q in quantile_vals:
                mask = scenario_df["quantile"] == q
                quantiles[q] = scenario_df[mask]["value"].tolist()

            times = scenario_df.time.unique()
            logger.info("Plotting uncertainty for output %s, scenario %s", output_name, scenario)
            plots.plot_timeseries_with_uncertainty_for_powerbi(
                plotter, output_name, scenario, quantiles, times, plot_config
            )
Exemplo n.º 4
0
def test_calibrate_autumn_mcmc(temp_data_dir):
    # Import autumn stuff inside function so we can mock out the database.
    priors = [{
        "param_name": "ice_cream_sales",
        "distribution": "uniform",
        "distri_params": [1, 5],
    }]
    target_outputs = [{
        "output_key": "shark_attacks",
        "years": [2000, 2001, 2002, 2003, 2004],
        "values": [3, 6, 9, 12, 15],
        "loglikelihood_distri": "poisson",
    }]
    multipliers = {}
    params = {
        "default": {
            "start_time": 2000
        },
        "scenario_start_time": 2000,
        "scenarios": {},
    }
    calib = Calibration(
        "sharks",
        _build_mock_model,
        params,
        priors,
        target_outputs,
        multipliers,
        1,
        1,
    )
    calib.run_fitting_algorithm(
        run_mode=CalibrationMode.AUTUMN_MCMC,
        n_iterations=50,
        n_burned=10,
        n_chains=1,
        available_time=1e6,
    )
    app_dir = os.path.join(temp_data_dir, "outputs", "calibrate", "sharks",
                           "main")
    run_dir = os.path.join(app_dir, os.listdir(app_dir)[0])
    db_fname = [
        fname for fname in os.listdir(run_dir) if fname.endswith(".db")
    ][0]
    out_db_path = os.path.join(run_dir, db_fname)
    assert os.path.exists(out_db_path)

    out_db = Database(out_db_path)
    assert set(out_db.engine.table_names()) == {
        "outputs",
        "derived_outputs",
        "mcmc_run",
    }
    mcmc_runs = out_db.query("mcmc_run")
    max_idx = mcmc_runs.loglikelihood.idxmax()
    best_run = mcmc_runs.iloc[max_idx]
    ice_cream_sales_mle = best_run.ice_cream_sales
    # This value is non-deterministic due to fixed seed.
    assert 2.9 < ice_cream_sales_mle < 3.1
Exemplo n.º 5
0
def load_output_tables(calib_dirpath: str):
    output_tables = []
    for db_path in find_db_paths(calib_dirpath):
        db = Database(db_path)
        df = db.query("outputs")
        output_tables.append(df)

    return output_tables
Exemplo n.º 6
0
def preprocess_demography(input_db: Database):
    loc_df = read_location_df()
    pop_df = read_population_df(loc_df)
    birth_df = read_crude_birth_df(loc_df)
    death_df = read_death_df(loc_df)
    expect_df = read_life_expectancy_df(loc_df)
    input_db.dump_df("countries", loc_df)
    input_db.dump_df("population", pop_df)
    input_db.dump_df("birth_rates", birth_df)
    input_db.dump_df("deaths", death_df)
    input_db.dump_df("life_expectancy", expect_df)
    return loc_df
Exemplo n.º 7
0
def preprocess_mobility(input_db: Database, country_df):
    """
    Read Google Mobility data from CSV into input database
    """
    mob_df = pd.read_csv(MOBILITY_CSV_PATH)

    dhhs_cluster_mobility = reshape_to_clusters(mob_df)

    # Drop all sub-region 2 data, too detailed.
    major_region_mask = mob_df["sub_region_2"].isnull() & mob_df["metro_area"].isnull()
    davao_mask = mob_df.metro_area == "Davao City Metropolitan Area"
    mob_df = mob_df[major_region_mask | davao_mask].copy()

    # These two regions are the same
    mob_df.loc[(mob_df.sub_region_1 == "National Capital Region"), "sub_region_1"] = "Metro Manila"
    mob_df.loc[(mob_df.metro_area == "Davao City Metropolitan Area"), "sub_region_1"] = "Davao City"
    mob_df.loc[
        (mob_df.sub_region_1 == "Federal Territory of Kuala Lumpur"), "sub_region_1"
    ] = "Kuala Lumpur"

    mob_df = mob_df.append(dhhs_cluster_mobility)

    # Drop all rows that have NA values in 1 or more mobility columns.
    mob_cols = [c for c in mob_df.columns if c.endswith(MOBILITY_SUFFIX)]
    mask = False
    for c in mob_cols:
        mask = mask | mob_df[c].isnull()

    mob_df = mob_df[~mask].copy()
    for c in mob_cols:
        # Convert percent values to decimal: 1.0 being no change.
        mob_df[c] = mob_df[c].apply(lambda x: 1 + x / 100)

    # Drop unused columns, rename kept columns
    cols_to_keep = [*mob_cols, "country_region", "sub_region_1", "date"]
    cols_to_drop = [c for c in mob_df.columns if not c in cols_to_keep]
    mob_df = mob_df.drop(columns=cols_to_drop)
    mob_col_rename = {c: c.replace(MOBILITY_SUFFIX, "") for c in mob_cols}
    mob_df.rename(columns={**mob_col_rename, "sub_region_1": "region"}, inplace=True)

    # Convert countries to ISO3
    countries = mob_df["country_region"].unique().tolist()
    iso3s = {c: get_iso3(c, country_df) for c in countries}
    iso3_series = mob_df["country_region"].apply(lambda c: iso3s[c])
    mob_df.insert(0, "iso3", iso3_series)
    mob_df = mob_df.drop(columns=["country_region"])

    mob_df = mob_df.sort_values(["iso3", "region", "date"])
    input_db.dump_df("mobility", mob_df)
Exemplo n.º 8
0
def run_full_models_for_mcmc(burn_in: int, src_db_path: str, dest_db_path: str,
                             build_model, params: dict):
    """
    Run the full baseline model and all scenarios for all accepted MCMC runs in src db.
    """
    src_db = Database(src_db_path)
    dest_db = Database(dest_db_path)

    logger.info("Copying mcmc_run table to %s", dest_db_path)
    mcmc_run_df = src_db.query("mcmc_run")

    # Apply burn in and save to destination
    burned_runs_str = ", ".join(mcmc_run_df[:burn_in].idx)
    logger.info("Burned MCMC runs %s", burned_runs_str)
    mcmc_run_df = mcmc_run_df[burn_in:]
    dest_db.dump_df("mcmc_run", mcmc_run_df)

    mcmc_runs = list(mcmc_run_df.T.to_dict().values())
    for mcmc_run in mcmc_runs:
        meta = {k: v for k, v in mcmc_run.items() if k in META_COLS}
        if not meta["accept"]:
            logger.info("Ignoring non-accepted MCMC run %s", meta["idx"])
            continue

        logger.info("Running full model for MCMC run %s", meta["idx"])
        param_updates = {
            k: v
            for k, v in mcmc_run.items() if k not in META_COLS
        }

        run_idx = meta["idx"].split("_")[-1]

        def update_func(ps: dict):
            return update_params(ps, param_updates)

        with Timer("Running model scenarios"):
            num_scenarios = 1 + len(params["scenarios"].keys())
            scenarios = []
            for scenario_idx in range(num_scenarios):
                scenario = Scenario(build_model, scenario_idx, params)
                scenarios.append(scenario)

            # Run the baseline scenario.
            baseline_scenario = scenarios[0]
            baseline_scenario.run(update_func=update_func)
            baseline_model = baseline_scenario.model

            # Run all the other scenarios
            for scenario in scenarios[1:]:
                scenario.run(base_model=baseline_model,
                             update_func=update_func)

        with Timer("Saving model outputs to the database"):
            models = [s.model for s in scenarios]
            store_run_models(models, dest_db_path, run_idx=run_idx)

    logger.info("Finished running full models for all accepted MCMC runs.")
Exemplo n.º 9
0
def preprocess_social_mixing(input_db: Database, country_df):
    for location in LOCATIONS:
        for sheet_number, header_arg in SHEET_NUMBERS:
            sheet_name = f"MUestimates_{location}_{sheet_number}.xlsx"
            sheet_path = os.path.join(MIXING_DIRPATH, sheet_name)
            xl = pd.ExcelFile(sheet_path)
            sheet_names = xl.sheet_names
            iso3s = [get_iso3(n, country_df) for n in sheet_names]
            for idx, sheet_name in enumerate(sheet_names):
                iso3 = iso3s[idx]
                mix_df = pd.read_excel(xl,
                                       header=header_arg,
                                       sheet_name=sheet_name)
                if sheet_number == "2":
                    renames = {n - 1: f"X{n}" for n in range(1, 17)}
                    mix_df.rename(columns=renames, inplace=True)

                mix_df.insert(0, "location",
                              [location for _ in range(len(mix_df))])
                mix_df.insert(0, "iso3", [iso3 for _ in range(len(mix_df))])
                input_db.dump_df("social_mixing", mix_df)
Exemplo n.º 10
0
def collect_map_estimate(calib_dirpath: str):
    """
    Read all MCMC outputs found in mcmc_db_folder and print the map parameter values.
    :return: dict of parameters
    """
    mcmc_tables = []
    db_paths = [
        os.path.join(calib_dirpath, f) for f in os.listdir(calib_dirpath)
        if f.endswith(".db") and not f.startswith("mcmc_percentiles")
    ]
    for db_path in db_paths:
        db = Database(db_path)
        mcmc_tables.append(
            db.query("mcmc_run").sort_values(by="loglikelihood",
                                             ascending=False))

    print("Maximum loglikelihood for each chain:")
    print([
        mcmc_tables[i]["loglikelihood"].iloc[0]
        for i in range(len(mcmc_tables))
    ])
    print()

    print("Chains' lengths:")
    print([
        len(mcmc_tables[i]["loglikelihood"]) for i in range(len(mcmc_tables))
    ])
    print()

    best_chain_index = np.argmax([
        mcmc_tables[i]["loglikelihood"].iloc[0]
        for i in range(len(mcmc_tables))
    ])
    non_param_cols = ["idx", "Scenario", "loglikelihood", "accept"]
    param_list = [c for c in mcmc_tables[0].columns if c not in non_param_cols]
    map_estimates = {}
    for param in param_list:
        map_estimates[param] = mcmc_tables[best_chain_index][param].iloc[0]
    return map_estimates, best_chain_index
Exemplo n.º 11
0
def run_mcmc_plots():
    app_dirname, app_dirpath = selectors.app()
    calib_dirname, calib_dirpath = selectors.calibration_run(app_dirpath)
    if not calib_dirname:
        st.write("No calibration folder found")
        return

    # Load MCMC tables
    mcmc_tables = []
    db_paths = [
        os.path.join(calib_dirpath, f) for f in os.listdir(calib_dirpath)
        if f.endswith(".db")
    ]
    for db_path in db_paths:
        db = Database(db_path)
        mcmc_tables.append(db.db_query("mcmc_run"))

    plotter = StreamlitPlotter({})
    plot_type = st.sidebar.selectbox("Select plot type",
                                     list(PLOT_FUNCS.keys()))
    plot_func = PLOT_FUNCS[plot_type]
    plot_func(plotter, mcmc_tables)
Exemplo n.º 12
0
def build_input_database(force: bool = False, rebuild: bool = False):
    """
    Builds the input database from scratch.
    If force is True, build the database from scratch and ignore any previous hashes.
    If force is False, do not build if it already exists,
    and crash if the built database hash does not match.

    If rebuild is True, then we force rebuild the database, but we don't write a new hash.

    Returns a Database, representing the input database.
    """
    if os.path.exists(input_db_path) and not (force or rebuild):
        input_db = Database(input_db_path)
    else:
        logger.info("Building a new database.")
        input_db = Database(input_db_path)
        with Timer("Deleting all existing data."):
            input_db.delete_everything()

        with Timer("Ingesting COVID AU data."):
            preprocess_covid_au(input_db)

        with Timer("Ingesting COVID PHL data."):
            preprocess_covid_phl(input_db)

        with Timer("Ingesting Our World in Data data."):
            preprocess_our_world_in_data(input_db)

        with Timer("Ingesting demography data."):
            country_df = preprocess_demography(input_db)

        with Timer("Ingesting social mixing data."):
            preprocess_social_mixing(input_db, country_df)

        with Timer("Ingesting mobility data."):
            preprocess_mobility(input_db, country_df)

    current_db_hash = input_db.get_hash()
    if force:
        # Write the file hash
        write_file_hash(current_db_hash, input_db_hash_path)
    else:
        # Read the file hash and compare
        saved_db_hash = read_file_hash(input_db_hash_path)
        is_hash_mismatch = current_db_hash != saved_db_hash
        if rebuild and is_hash_mismatch:
            msg = "Input database does not match canonical version."
            raise ValueError(msg)
        elif is_hash_mismatch:
            logger.info("Hash mismatch, try rebuilding database...")
            build_input_database(rebuild=True)

    return input_db
Exemplo n.º 13
0
def plot_uncertainty(targets: dict, powerbi_db_path: str, output_dir: str):
    """
    works on powerbi version
    Assumes a COVID model.
    """
    os.makedirs(output_dir, exist_ok=True)
    db = Database(powerbi_db_path)
    uncertainty_df = db.query("uncertainty")
    outputs = uncertainty_df["type"].unique().tolist()
    for output_name in outputs:
        this_output_dir = os.path.join(output_dir, output_name)
        os.makedirs(this_output_dir, exist_ok=True)
        plotter = FilePlotter(this_output_dir, targets)
        scenario_idxs = uncertainty_df["scenario"].unique().tolist()
        for scenario_idx in scenario_idxs:
            logger.info("Plotting uncertainty for output %s, scenario %s",
                        output_name, scenario_idx)
            if scenario_idx == 0:
                # Just plot the baseline scenario for the full time period.
                scenario_idxs = [0]
                x_low = 0
            else:
                # Plot the baseline compared ot the scenario, but only for the time period
                # where the scenario is active.
                scenario_idxs = [0, scenario_idx]
                mask = uncertainty_df["scenario"] == scenario_idx
                x_low = uncertainty_df[mask]["time"].min()

            plots.plot_timeseries_with_uncertainty(
                plotter,
                uncertainty_df,
                output_name,
                scenario_idxs,
                targets,
                x_low=x_low,
            )
Exemplo n.º 14
0
def test_plot_uncertainty(tmp_path):
    """
    Ensure uncertainty plotting code works.
    """
    output_dir = tmp_path
    powerbi_db_path = os.path.join(tmp_path, "powerbi.db")
    targets = {
        "incidence": {
            "output_key": "incidence",
            "title": "incidence",
            "times": [],
            "values": [],
            "quantiles": [0.25, 0.5, 0.75],
        },
        "foo": {
            "output_key": "foo",
            "title": "foo",
            "times": [],
            "values": [],
            "quantiles": [0.25, 0.5, 0.75],
        },
    }
    funcs = [
        lambda t: 2 * t + random.random(), lambda t: t**3 + random.random()
    ]
    # Build data for plotting
    do_df, mcmc_df, _ = build_synthetic_calibration(targets,
                                                    funcs,
                                                    chains=2,
                                                    runs=20,
                                                    times=20)
    unc_df = calculate_mcmc_uncertainty(mcmc_df, do_df, targets)
    # Create database for plotting
    db = Database(powerbi_db_path)
    db.dump_df("mcmc_run", mcmc_df)
    db.dump_df("derived_outputs", do_df)
    db.dump_df("uncertainty", unc_df)
    # Create plots
    plot_uncertainty(targets, powerbi_db_path, output_dir)
    # Check plots
    expected_foo_path = os.path.join(tmp_path, "foo", "uncertainty-foo-0.png")
    expected_incidence_path = os.path.join(tmp_path, "incidence",
                                           "uncertainty-incidence-0.png")
    assert os.path.exists(expected_foo_path)
    assert os.path.exists(expected_incidence_path)
Exemplo n.º 15
0
import os
from autumn import constants

from autumn.demography.social_mixing import get_all_prem_countries
from autumn.db import Database
from apps.covid_19.john_hopkins import (
    get_all_jh_countries,
    read_john_hopkins_data_from_csv,
    plot_jh_data,
)

INPUT_DB_PATH = os.path.join(constants.DATA_PATH, "inputs.db")

input_database = Database(database_name=INPUT_DB_PATH)

prem_country_list = get_all_prem_countries()  # N=152
jh_country_list = get_all_jh_countries()  # N=180
intercept_country_list = list(set(prem_country_list)
                              & set(jh_country_list))  # N=126

all_data = {}
for i, country in enumerate(intercept_country_list):
    all_data[country] = read_john_hopkins_data_from_csv(country=country)
# plot_jh_data(all_data)

# print list of countries with more than 1000 cases
countries_1000 = []
for country, n_cases in all_data.items():
    if sum(n_cases) >= 1000:
        countries_1000.append(country)
print(countries_1000)
Exemplo n.º 16
0
def preprocess_social_mixing(input_db: Database, country_df):
    for location in LOCATIONS:
        for sheet_number, header_arg in SHEET_NUMBERS:
            sheet_name = f"MUestimates_{location}_{sheet_number}.xlsx"
            sheet_path = os.path.join(MIXING_DIRPATH, sheet_name)
            xl = pd.ExcelFile(sheet_path)
            sheet_names = xl.sheet_names
            iso3s = [get_iso3(n, country_df) for n in sheet_names]
            for idx, sheet_name in enumerate(sheet_names):
                iso3 = iso3s[idx]
                mix_df = pd.read_excel(xl,
                                       header=header_arg,
                                       sheet_name=sheet_name)
                if sheet_number == "2":
                    renames = {n - 1: f"X{n}" for n in range(1, 17)}
                    mix_df.rename(columns=renames, inplace=True)

                mix_df.insert(0, "location",
                              [location for _ in range(len(mix_df))])
                mix_df.insert(0, "iso3", [iso3 for _ in range(len(mix_df))])
                input_db.dump_df("social_mixing", mix_df)

    # Next gen social mixing
    original_mm = input_db.query("social_mixing")

    df = pd.read_csv(
        os.path.join(MIXING_DIRPATH, "synthetic_contacts_2020.csv"))
    df = df[df.setting == "overall"]
    df.drop(columns="setting", inplace=True)
    df.replace(
        {
            "0 to 4": "00 to 04",
            "5 to 9": "05 to 09",
            "all": "all_locations",
            "others": "other_locations",
        },
        inplace=True,
    )

    # The contactor is in j (columns) and the contactee is in i (rows)
    df = df.pivot_table(
        index=["iso3c", "location_contact", "age_cotactee"],
        columns="age_contactor",
        values="mean_number_of_contacts",
    )
    df = df.reset_index()
    df.drop(columns="age_cotactee", inplace=True)

    cols = list(df.columns[2:])
    new_col = ["X" + str(x) for x in range(1, len(cols) + 1)]
    replace_col = dict(zip(cols, new_col))
    df.rename(columns=replace_col, inplace=True)
    df.rename(columns={
        "iso3c": "iso3",
        "location_contact": "location"
    },
              inplace=True)

    iso3_diff = set(original_mm.iso3).difference(df.iso3)
    iso3_mask = original_mm.iso3.isin(iso3_diff)
    df = df.append(original_mm[iso3_mask], ignore_index=True)

    input_db.dump_df("social_mixing_2020", df)
Exemplo n.º 17
0
def preprocess_covid_au(input_db: Database):
    df = pd.read_csv(COVID_AU_CSV_PATH)
    input_db.dump_df("covid_au", df)
    df = pd.read_csv(COVID_LGA_CSV_PATH)
    df = reshape_to_clusters(df)
    input_db.dump_df("covid_dhhs_test", df)
Exemplo n.º 18
0
def preprocess_our_world_in_data(input_db: Database):
    df = pd.read_csv(OUR_WORLD_IN_DATA_CSV_PATH)

    # Replace the one strange value for test numbers in Malaysia
    df.loc[(df.iso_code == "MYS") & (df.new_tests > 1e5), "new_tests"] = np.nan
    input_db.dump_df("owid", df)
Exemplo n.º 19
0
def build_model(params, update_params={}):
    external_params = deepcopy(params)
    external_params.update(update_params)
    model_parameters = {
        "contact_rate": external_params["contact_rate"],
        "contact_rate_recovered": external_params["contact_rate"]
        * external_params["rr_transmission_recovered"],
        "contact_rate_late_latent": external_params["contact_rate"]
        * external_params["rr_transmission_late_latent"],
        "recovery": external_params["self_recovery_rate"],
        "infect_death": external_params["tb_mortality_rate"],
        **external_params,
    }
    stratify_by = external_params["stratify_by"]
    derived_output_types = external_params["derived_outputs"]

    input_database = Database(database_name=INPUT_DB_PATH)
    n_iter = (
        int(
            round(
                (external_params["end_time"] - external_params["start_time"])
                / external_params["time_step"]
            )
        )
        + 1
    )
    integration_times = numpy.linspace(
        external_params["start_time"], external_params["end_time"], n_iter
    ).tolist()

    model_parameters.update(change_parameter_unit(provide_aggregated_latency_parameters(), 365.251))

    # sequentially add groups of flows
    flows = add_standard_infection_flows([])
    flows = add_standard_latency_flows(flows)
    flows = add_standard_natural_history_flows(flows)

    # compartments
    compartments = ["susceptible", "early_latent", "late_latent", "infectious", "recovered"]

    # define model     #replace_deaths  add_crude_birth_rate
    init_pop = {"infectious": 1000, "late_latent": 1000000}

    tb_model = StratifiedModel(
        integration_times,
        compartments,
        init_pop,
        model_parameters,
        flows,
        birth_approach="replace_deaths",
        starting_population=external_params["start_population"],
        output_connections={},
        derived_output_functions={},
        death_output_categories=((), ("age_0",)),
    )

    # add crude birth rate from un estimates
    tb_model = add_birth_rate_functions(tb_model, input_database, "MNG")

    # add case detection process to basic model
    tb_model.add_transition_flow(
        {
            "type": "standard_flows",
            "parameter": "case_detection",
            "origin": "infectious",
            "to": "recovered",
        }
    )

    # Add IPT as a customised flow
    def ipt_flow_func(model, n_flow, _time, _compartment_values):
        """
        Work out the number of detected individuals from the relevant active TB compartments (with regard to the origin
        latent compartment of n_flow) multiplied with the proportion of the relevant infected contacts that is from this
        latent compartment.
        """
        dict_flows = model.transition_flows_dict
        origin_comp_name = dict_flows["origin"][n_flow]
        components_latent_comp = find_name_components(origin_comp_name)

        # find compulsory tags to be found in relevant infectious compartments
        tags = []
        for component in components_latent_comp:
            if "location_" in component or "strain_" in component:
                tags.append(component)

        # loop through all relevant infectious compartments
        total_tb_detected = 0.0
        for comp_ind in model.infectious_indices["all_strains"]:
            active_components = find_name_components(model.compartment_names[comp_ind])
            if all(elem in active_components for elem in tags):
                infectious_pop = _compartment_values[comp_ind]
                detection_indices = [
                    index
                    for index, val in dict_flows["parameter"].items()
                    if "case_detection" in val
                ]
                flow_index = [
                    index
                    for index in detection_indices
                    if dict_flows["origin"][index] == model.compartment_names[comp_ind]
                ][0]
                param_name = dict_flows["parameter"][flow_index]
                detection_tx_rate = model.get_parameter_value(param_name, _time)
                tsr = mongolia_tsr(_time) + external_params["reduction_negative_tx_outcome"] * (
                    1.0 - mongolia_tsr(_time)
                )
                if "strain_mdr" in model.compartment_names[comp_ind]:
                    tsr = external_params["mdr_tsr"] * external_params["prop_mdr_detected_as_mdr"]
                if tsr > 0.0:
                    total_tb_detected += infectious_pop * detection_tx_rate / tsr

        # list all latent compartments relevant to the relevant infectious population
        relevant_latent_compartments_indices = [
            i
            for i, comp_name in enumerate(model.compartment_names)
            if find_stem(comp_name) == "early_latent" and all(elem in comp_name for elem in tags)
        ]

        total_relevant_latent_size = sum(
            _compartment_values[i] for i in relevant_latent_compartments_indices
        )
        current_latent_size = _compartment_values[model.compartment_names.index(origin_comp_name)]
        prop_of_relevant_latent = (
            current_latent_size / total_relevant_latent_size
            if total_relevant_latent_size > 0.0
            else 0.0
        )

        return total_tb_detected * prop_of_relevant_latent

    tb_model.add_transition_flow(
        {
            "type": "customised_flows",
            "parameter": "ipt_rate",
            "origin": "early_latent",
            "to": "recovered",
            "function": ipt_flow_func,
        }
    )

    # add ACF flow
    tb_model.add_transition_flow(
        {
            "type": "standard_flows",
            "parameter": "acf_rate",
            "origin": "infectious",
            "to": "recovered",
        }
    )

    # load time-variant case detection rate
    cdr_scaleup_overall = build_mongolia_timevariant_cdr(external_params["cdr_multiplier"])

    # targeted TB prevalence proportions by organ
    prop_smearpos = 0.25
    prop_smearneg = 0.40
    prop_extrapul = 0.35

    # disease duration by organ
    overall_duration = prop_smearpos * 1.6 + 5.3 * (1 - prop_smearpos)
    disease_duration = {
        "smearpos": 1.6,
        "smearneg": 5.3,
        "extrapul": 5.3,
        "overall": overall_duration,
    }

    # work out the CDR for smear-positive TB
    def cdr_smearpos(time):
        # Had to replace external_params['diagnostic_sensitivity_smearneg'] with its hard-coded value .7 to avoid
        # cdr_smearpos to be affected when increasing diagnostic_sensitivity_smearneg in interventions (e.g. Xpert)

        # return (cdr_scaleup_overall(time) /
        #         (prop_smearpos + prop_smearneg * external_params['diagnostic_sensitivity_smearneg'] +
        #          prop_extrapul * external_params['diagnostic_sensitivity_extrapul']))
        return cdr_scaleup_overall(time) / (
            prop_smearpos
            + prop_smearneg * 0.7
            + prop_extrapul * external_params["diagnostic_sensitivity_extrapul"]
        )

    def cdr_smearneg(time):
        return cdr_smearpos(time) * external_params["diagnostic_sensitivity_smearneg"]

    def cdr_extrapul(time):
        return cdr_smearpos(time) * external_params["diagnostic_sensitivity_extrapul"]

    cdr_by_organ = {
        "smearpos": cdr_smearpos,
        "smearneg": cdr_smearneg,
        "extrapul": cdr_extrapul,
        "overall": cdr_scaleup_overall,
    }
    detect_rate_by_organ = {}
    for organ in ["smearpos", "smearneg", "extrapul", "overall"]:
        prop_to_rate = convert_competing_proportion_to_rate(1.0 / disease_duration[organ])
        detect_rate_by_organ[organ] = return_function_of_function(cdr_by_organ[organ], prop_to_rate)

    # load time-variant treatment success rate
    mongolia_tsr = build_mongolia_timevariant_tsr()

    # create a treatment succes rate function adjusted for treatment support intervention
    tsr_function = lambda t: mongolia_tsr(t) + external_params["reduction_negative_tx_outcome"] * (
        1.0 - mongolia_tsr(t)
    )

    # tb control recovery rate (detection and treatment) function set for overall if not organ-specific, smearpos otherwise
    if "organ" not in stratify_by:
        tb_control_recovery_rate = lambda t: tsr_function(t) * detect_rate_by_organ["overall"](t)
    else:
        tb_control_recovery_rate = lambda t: tsr_function(t) * detect_rate_by_organ["smearpos"](t)

    # initialise ipt_rate function assuming coverage of 1.0 before age stratification
    ipt_rate_function = (
        lambda t: 1.0
        * external_params["yield_contact_ct_tstpos_per_detected_tb"]
        * external_params["ipt_efficacy"]
    )

    # initialise acf_rate function
    acf_rate_function = (
        lambda t: external_params["acf_coverage"]
        * external_params["acf_sensitivity"]
        * (
            mongolia_tsr(t)
            + external_params["reduction_negative_tx_outcome"] * (1.0 - mongolia_tsr(t))
        )
    )

    # assign newly created functions to model parameters
    tb_model.adaptation_functions["case_detection"] = tb_control_recovery_rate
    tb_model.parameters["case_detection"] = "case_detection"

    tb_model.adaptation_functions["ipt_rate"] = ipt_rate_function
    tb_model.parameters["ipt_rate"] = "ipt_rate"

    tb_model.adaptation_functions["acf_rate"] = acf_rate_function
    tb_model.parameters["acf_rate"] = "acf_rate"

    if "strain" in stratify_by:
        mdr_adjustment = (
            external_params["prop_mdr_detected_as_mdr"] * external_params["mdr_tsr"] / 0.9
        )  # /.9 for last DS TSR

        tb_model.stratify(
            "strain",
            ["ds", "mdr"],
            ["early_latent", "late_latent", "infectious"],
            verbose=False,
            requested_proportions={"mdr": 0.0},
            adjustment_requests={
                "contact_rate": {"ds": 1.0, "mdr": 1.0},
                "case_detection": {"mdr": mdr_adjustment},
                "ipt_rate": {
                    "ds": 1.0,  # external_params['ds_ipt_switch'],
                    "mdr": external_params["mdr_ipt_switch"],
                },
            },
            infectiousness_adjustments={
                "ds": 1.0,
                "mdr": external_params["mdr_infectiousness_multiplier"],
            },
        )

        tb_model.add_transition_flow(
            {
                "type": "standard_flows",
                "parameter": "dr_amplification",
                "origin": "infectiousXstrain_ds",
                "to": "infectiousXstrain_mdr",
                "implement": len(tb_model.all_stratifications),
            }
        )

        dr_amplification_rate = (
            lambda t: detect_rate_by_organ["overall"](t)
            * (1.0 - mongolia_tsr(t))
            * (1.0 - external_params["reduction_negative_tx_outcome"])
            * external_params["dr_amplification_prop_among_nonsuccess"]
        )

        tb_model.adaptation_functions["dr_amplification"] = dr_amplification_rate
        tb_model.parameters["dr_amplification"] = "dr_amplification"

    if "age" in stratify_by:
        age_breakpoints = [0, 5, 15, 60]
        age_infectiousness = get_parameter_dict_from_function(
            logistic_scaling_function(10.0), age_breakpoints
        )
        age_params = get_adapted_age_parameters(age_breakpoints)
        age_params.update(split_age_parameter(age_breakpoints, "contact_rate"))

        # adjustment of latency parameters
        for param in ["early_progression", "late_progression"]:
            for age_break in age_breakpoints:
                if age_break > 5:
                    age_params[param][str(age_break) + "W"] *= external_params[
                        "adult_latency_adjustment"
                    ]

        pop_morts = get_pop_mortality_functions(
            input_database, age_breakpoints, country_iso_code="MNG"
        )
        age_params["universal_death_rate"] = {}
        for age_break in age_breakpoints:
            tb_model.time_variants["universal_death_rateXage_" + str(age_break)] = pop_morts[
                age_break
            ]
            tb_model.parameters[
                "universal_death_rateXage_" + str(age_break)
            ] = "universal_death_rateXage_" + str(age_break)

            age_params["universal_death_rate"][
                str(age_break) + "W"
            ] = "universal_death_rateXage_" + str(age_break)
        tb_model.parameters["universal_death_rateX"] = 0.0

        # age-specific IPT
        ipt_by_age = {"ipt_rate": {}}
        for age_break in age_breakpoints:
            ipt_by_age["ipt_rate"][str(age_break)] = external_params[
                "ipt_age_" + str(age_break) + "_ct_coverage"
            ]
        age_params.update(ipt_by_age)

        # add BCG effect without stratification assuming constant 100% coverage
        bcg_wane = create_sloping_step_function(15.0, 0.3, 30.0, 1.0)
        age_bcg_efficacy_dict = get_parameter_dict_from_function(
            lambda value: bcg_wane(value), age_breakpoints
        )
        age_params.update({"contact_rate": age_bcg_efficacy_dict})

        tb_model.stratify(
            "age",
            deepcopy(age_breakpoints),
            [],
            {},
            adjustment_requests=age_params,
            infectiousness_adjustments=age_infectiousness,
            verbose=False,
        )

        # patch for IPT to overwrite parameters when ds_ipt has been turned off while we still need some coverage at baseline
        if external_params["ds_ipt_switch"] == 0.0 and external_params["mdr_ipt_switch"] == 1.0:
            tb_model.parameters["ipt_rateXstrain_dsXage_0"] = 0.17
            for age_break in [5, 15, 60]:
                tb_model.parameters["ipt_rateXstrain_dsXage_" + str(age_break)] = 0.0

    if "organ" in stratify_by:
        props_smear = {
            "smearpos": external_params["prop_smearpos"],
            "smearneg": 1.0 - (external_params["prop_smearpos"] + 0.20),
            "extrapul": 0.20,
        }
        mortality_adjustments = {"smearpos": 1.0, "smearneg": 0.064, "extrapul": 0.064}
        recovery_adjustments = {"smearpos": 1.0, "smearneg": 0.56, "extrapul": 0.56}

        # workout the detection rate adjustment by organ status
        adjustment_smearneg = (
            detect_rate_by_organ["smearneg"](2015.0) / detect_rate_by_organ["smearpos"](2015.0)
            if detect_rate_by_organ["smearpos"](2015.0) > 0.0
            else 1.0
        )
        adjustment_extrapul = (
            detect_rate_by_organ["extrapul"](2015.0) / detect_rate_by_organ["smearpos"](2015.0)
            if detect_rate_by_organ["smearpos"](2015.0) > 0.0
            else 1.0
        )

        tb_model.stratify(
            "organ",
            ["smearpos", "smearneg", "extrapul"],
            ["infectious"],
            infectiousness_adjustments={"smearpos": 1.0, "smearneg": 0.25, "extrapul": 0.0},
            verbose=False,
            requested_proportions=props_smear,
            adjustment_requests={
                "recovery": recovery_adjustments,
                "infect_death": mortality_adjustments,
                "case_detection": {
                    "smearpos": 1.0,
                    "smearneg": adjustment_smearneg,
                    "extrapul": adjustment_extrapul,
                },
                "early_progression": props_smear,
                "late_progression": props_smear,
            },
        )

    if "location" in stratify_by:
        props_location = {
            "rural_province": 0.48,
            "urban_nonger": 0.368,
            "urban_ger": 0.15,
            "prison": 0.002,
        }
        raw_relative_risks_loc = {"rural_province": 1.0}
        for stratum in ["urban_nonger", "urban_ger", "prison"]:
            raw_relative_risks_loc[stratum] = external_params["rr_transmission_" + stratum]
        scaled_relative_risks_loc = scale_relative_risks_for_equivalence(
            props_location, raw_relative_risks_loc
        )

        # dummy matrix for mixing by location
        location_mixing = numpy.array(
            [
                0.899,
                0.05,
                0.05,
                0.001,
                0.049,
                0.7,
                0.25,
                0.001,
                0.049,
                0.25,
                0.7,
                0.001,
                0.1,
                0.1,
                0.1,
                0.7,
            ]
        ).reshape((4, 4))
        location_mixing *= 3.0  # adjusted such that heterogeneous mixing yields similar overall burden as homogeneous

        location_adjustments = {}
        for beta_type in ["", "_late_latent", "_recovered"]:
            location_adjustments["contact_rate" + beta_type] = scaled_relative_risks_loc

        location_adjustments["acf_rate"] = {}
        for stratum in ["rural_province", "urban_nonger", "urban_ger", "prison"]:
            location_adjustments["acf_rate"][stratum] = external_params[
                "acf_" + stratum + "_switch"
            ]

        tb_model.stratify(
            "location",
            ["rural_province", "urban_nonger", "urban_ger", "prison"],
            [],
            requested_proportions=props_location,
            verbose=False,
            entry_proportions=props_location,
            adjustment_requests=location_adjustments,
            mixing_matrix=location_mixing,
        )

    # tb_model.transition_flows.to_csv("transitions.csv")
    # tb_model.death_flows.to_csv("deaths.csv")

    # create some customised derived_outputs

    if "notifications" in derived_output_types:

        def notification_function_builder(stratum):
            """
                example of stratum: "Xage_0Xstrain_mdr"
            """

            def calculate_notifications(model, time):

                total_notifications = 0.0
                dict_flows = model.transition_flows_dict

                comp_ind = model.compartment_names.index("infectious" + stratum)
                infectious_pop = model.compartment_values[comp_ind]
                detection_indices = [
                    index
                    for index, val in dict_flows["parameter"].items()
                    if "case_detection" in val
                ]
                flow_index = [
                    index
                    for index in detection_indices
                    if dict_flows["origin"][index] == model.compartment_names[comp_ind]
                ][0]
                param_name = dict_flows["parameter"][flow_index]
                detection_tx_rate = model.get_parameter_value(param_name, time)
                tsr = mongolia_tsr(time) + external_params["reduction_negative_tx_outcome"] * (
                    1.0 - mongolia_tsr(time)
                )
                if "strain_mdr" in model.compartment_names[comp_ind]:
                    tsr = external_params["mdr_tsr"] * external_params["prop_mdr_detected_as_mdr"]
                if tsr > 0.0:
                    total_notifications += infectious_pop * detection_tx_rate / tsr

                return total_notifications

            return calculate_notifications

        for compartment in tb_model.compartment_names:
            if "infectious" in compartment:
                stratum = compartment.split("infectious")[1]
                tb_model.derived_output_functions[
                    "notifications" + stratum
                ] = notification_function_builder(stratum)
                # tb_model.derived_output_functions['popsize_treatment_support' + stratum] = notification_function_builder(stratum)

    if "incidence" in derived_output_types:
        # add output_connections for all stratum-specific incidence outputs
        incidence_output_conns = create_output_connections_for_incidence_by_stratum(
            tb_model.compartment_names
        )
        tb_model.output_connections.update(incidence_output_conns)
        # Create a 'combined incidence' derived output
        early_names = [k for k in incidence_output_conns.keys() if k.startswith("incidence_early")]
        for early_name in early_names:
            rootname = early_name[15:]
            late_name = f"incidence_late{rootname}"
            combined_name = f"incidence{rootname}"

            def add_combined_incidence(model, time, e=early_name, l=late_name):
                time_idx = model.times.index(time)
                early_incidence = model.derived_outputs[e][time_idx]
                late_incidence = model.derived_outputs[l][time_idx]
                return early_incidence + late_incidence

            tb_model.derived_output_functions[combined_name] = add_combined_incidence

    if "mortality" in derived_output_types:
        # prepare death outputs for all strata
        tb_model.death_output_categories = list_all_strata_for_mortality(tb_model.compartment_names)

    ############################################
    #       population sizes for costing
    ############################################
    if "popsizes" in derived_output_types:
        # nb of detected individuals by strain:
        def detected_popsize_function_builder(tag):
            """
                example of tag: "starin_mdr" or "organ_smearpos"
            """

            def calculate_nb_detected(model, time):
                nb_treated = 0.0
                for key, value in model.derived_outputs.items():
                    if "notifications" in key and tag in key:
                        this_time_index = model.times.index(time)
                        nb_treated += value[this_time_index]
                return nb_treated

            return calculate_nb_detected

        for tag in [
            "strain_mdr",
            "strain_ds",
            "organ_smearpos",
            "organ_smearneg",
            "organ_extrapul",
        ]:
            tb_model.derived_output_functions[
                "popsizeXnb_detectedX" + tag
            ] = detected_popsize_function_builder(tag)

        # ACF popsize: number of people screened
        def popsize_acf(model, time):
            if external_params["acf_coverage"] == 0.0:
                return 0.0
            pop_urban_ger = sum(
                [
                    model.compartment_values[i]
                    for i, c_name in enumerate(model.compartment_names)
                    if "location_urban_ger" in c_name
                ]
            )
            return external_params["acf_coverage"] * pop_urban_ger

        tb_model.derived_output_functions["popsizeXnb_screened_acf"] = popsize_acf

    return tb_model
Exemplo n.º 20
0
def build_model(params: dict, update_params={}):
    """
    Build the master function to run the TB model for the Republic of the Marshall Islands

    :param update_params: dict
        Any parameters that need to be updated for the current run
    :return: StratifiedModel
        The final model with all parameters and stratifications
    """
    input_database = Database(database_name=INPUT_DB_PATH)

    # Define compartments and initial conditions.
    compartments = [
        Compartment.SUSCEPTIBLE,
        Compartment.EARLY_LATENT,
        Compartment.LATE_LATENT,
        Compartment.EARLY_INFECTIOUS,
        Compartment.ON_TREATMENT,
        Compartment.RECOVERED,
        # Compartment.LTBI_TREATED,
    ]
    init_pop = {Compartment.EARLY_INFECTIOUS: 10, Compartment.LATE_LATENT: 100}

    model_parameters = params
    model_parameters.update(update_params)

    # Update partial immunity/susceptibility parameters
    model_parameters = update_transmission_parameters(
        model_parameters, [Compartment.RECOVERED, Compartment.LATE_LATENT, Compartment.LTBI_TREATED]
    )

    # Set integration times
    integration_times = get_model_times_from_inputs(
        model_parameters["start_time"], model_parameters["end_time"], model_parameters["time_step"]
    )

    # Sequentially add groups of flows to flows list
    flows = add_standard_infection_flows([])
    flows = add_standard_latency_flows(flows)
    flows = add_standard_natural_history_flows(flows)
    # flows = add_latency_progression(flows)
    flows = add_case_detection(flows, compartments)
    flows = add_treatment_flows(flows)
    # flows = add_acf(flows, compartments)
    # flows = add_acf_ltbi(flows)

    # Make sure incidence and notifications are tracked during integration
    out_connections = {}
    out_connections.update(
        create_request_stratified_incidence(
            model_parameters["incidence_stratification"], model_parameters["all_stratifications"]
        )
    )
    out_connections.update(
        create_request_stratified_notifications(
            model_parameters["notification_stratifications"],
            model_parameters["all_stratifications"],
        )
    )

    # Define model
    tb_model = StratifiedModel(
        integration_times,
        compartments,
        init_pop,
        model_parameters,
        flows,
        birth_approach="add_crude_birth_rate",
        starting_population=model_parameters["start_population"],
        output_connections=out_connections,
        death_output_categories=list_all_strata_for_mortality(compartments),
    )

    # Add crude birth rate from UN estimates (using Federated States of Micronesia as a proxy as no data for RMI)
    tb_model = add_birth_rate_functions(tb_model, input_database, "FSM")

    # Find raw case detection rate with multiplier, which is 1 by default, and adjust for differences by organ status
    cdr_scaleup_raw = build_scale_up_function(
        model_parameters["cdr"], model_parameters["cdr_multiplier"]
    )
    detect_rate_by_organ = find_organ_specific_cdr(
        cdr_scaleup_raw,
        model_parameters,
        model_parameters["all_stratifications"]["organ"],
        target_organ_props=model_parameters["target_organ_props"],
    )

    # Find base case detection rate and time-variant treatment completion function
    base_detection_rate = detect_rate_by_organ[
        "smearpos" if "organ" in model_parameters["stratify_by"] else "overall"
    ]
    treatment_success_rate = (
        lambda time: build_scale_up_function(model_parameters["tsr"])(time)
        / model_parameters["treatment_duration"]
    )
    treatment_nonsuccess_rate = (
        lambda time: (1.0 - build_scale_up_function(model_parameters["tsr"])(time))
        / model_parameters["treatment_duration"]
    )

    # Set acf screening rate using proportion of population reached and duration of intervention
    # acf_screening_rate = -numpy.log(1 - 0.9) / 0.5
    # acf_rate_over_time = progressive_step_function_maker(
    #     2018.2, 2018.7, acf_screening_rate, scaling_time_fraction=0.3
    # )

    # Initialise acf_rate function
    # acf_rate_function = (
    #     lambda t: model_parameters["acf_coverage"]
    #               * (acf_rate_over_time(t))
    #               * model_parameters["acf_sensitivity"]
    # )
    # acf_ltbi_rate_function = (
    #     lambda t: model_parameters["acf_coverage"]
    #               * (acf_rate_over_time(t))
    #               * model_parameters["acf_ltbi_sensitivity"]
    #               * model_parameters["acf_ltbi_efficacy"]
    # )

    # Assign newly created functions to model parameters
    add_time_variant_parameter_to_model(
        tb_model, "case_detection", base_detection_rate, len(model_parameters["stratify_by"])
    )
    add_time_variant_parameter_to_model(
        tb_model, "treatment_success", treatment_success_rate, len(model_parameters["stratify_by"])
    )
    add_time_variant_parameter_to_model(
        tb_model,
        "treatment_nonsuccess",
        treatment_nonsuccess_rate,
        len(model_parameters["stratify_by"]),
    )
    # add_time_variant_parameter_to_model(
    #     tb_model, 'acf_rate', acf_rate_function, len(model_parameters['stratify_by']))
    # add_time_variant_parameter_to_model(
    #     tb_model, 'acf_ltbi_rate', acf_ltbi_rate_function, len(model_parameters['stratify_by']))

    # Stratification processes
    if "age" in model_parameters["stratify_by"]:
        age_specific_latency_parameters = manually_create_age_specific_latency_parameters(
            model_parameters
        )
        tb_model = stratify_by_age(
            tb_model,
            age_specific_latency_parameters,
            input_database,
            model_parameters["all_stratifications"]["age"],
        )
    if "diabetes" in model_parameters["stratify_by"]:
        tb_model = stratify_by_diabetes(
            tb_model,
            model_parameters,
            model_parameters["all_stratifications"]["diabetes"],
            model_parameters["diabetes_target_props"],
            age_specific_prevalence=False,
        )
    if "organ" in model_parameters["stratify_by"]:
        tb_model = stratify_by_organ(
            tb_model,
            model_parameters,
            detect_rate_by_organ,
            model_parameters["all_stratifications"]["organ"],
        )
    if "location" in model_parameters["stratify_by"]:
        tb_model = stratify_by_location(
            tb_model, model_parameters, model_parameters["all_stratifications"]["location"]
        )

    # Capture reported prevalence in Majuro assuming over-reporting (needed for calibration)
    def calculate_reported_majuro_prevalence(model, time):
        true_prev = 0.0
        pop_majuro = 0.0
        for i, compartment in enumerate(model.compartment_names):
            if "majuro" in compartment:
                pop_majuro += model.compartment_values[i]
                if "infectious" in compartment:
                    true_prev += model.compartment_values[i]
        return (
            1.0e5
            * true_prev
            / pop_majuro
            * (1.0 + model_parameters["over_reporting_prevalence_proportion"])
        )

    tb_model.derived_output_functions.update(
        {"reported_majuro_prevalence": calculate_reported_majuro_prevalence}
    )

    return tb_model
Exemplo n.º 21
0
def preprocess_covid_phl(input_db: Database):

    df = pd.read_csv(COVID_PHL_CSV_PATH)
    df = create_region_aggregates(df)
    input_db.dump_df("covid_phl", df)
Exemplo n.º 22
0
def test_unpivot_outputs(tmp_path):
    """
    Verify that unpivot_outputs works. 
    """
    out_db_path = os.path.join(tmp_path, "out.db")
    mock_model = get_mock_model(
        times=[2000, 2001, 2002, 2003, 2004, 2005],
        outputs=[
            [300.0, 300.0, 300.0, 33.0, 33.0, 33.0, 93.0, 39.0],
            [271.0, 300.0, 271.0, 62.0, 33.0, 62.0, 93.0, 69.0],
            [246.0, 300.0, 246.0, 88.0, 33.0, 88.0, 93.0, 89.0],
            [222.0, 300.0, 222.0, 111.0, 33.0, 111.0, 39.0, 119.0],
            [201.0, 300.0, 201.0, 132.0, 33.0, 132.0, 39.0, 139.0],
            [182.0, 300.0, 182.0, 151.0, 33.0, 151.0, 39.0, 159.0],
        ],
    )
    store_run_models([mock_model], out_db_path)
    out_db = Database(out_db_path)
    outputs_df = out_db.query("outputs")
    unpivoted_df = unpivot_outputs(outputs_df)
    expected_columns = [
        "idx",
        "Scenario",
        "times",
        "value",
        "age",
        "compartment",
        "mood",
    ]
    expected_data = [
        ["run_0", "S_0", 2000, 300.0, "age_old", "susceptible", "mood_happy"],
        ["run_0", "S_0", 2001, 271.0, "age_old", "susceptible", "mood_happy"],
        ["run_0", "S_0", 2002, 246.0, "age_old", "susceptible", "mood_happy"],
        ["run_0", "S_0", 2003, 222.0, "age_old", "susceptible", "mood_happy"],
        ["run_0", "S_0", 2004, 201.0, "age_old", "susceptible", "mood_happy"],
        ["run_0", "S_0", 2005, 182.0, "age_old", "susceptible", "mood_happy"],
        ["run_0", "S_0", 2000, 300.0, "age_old", "susceptible", "mood_sad"],
        ["run_0", "S_0", 2001, 300.0, "age_old", "susceptible", "mood_sad"],
        ["run_0", "S_0", 2002, 300.0, "age_old", "susceptible", "mood_sad"],
        ["run_0", "S_0", 2003, 300.0, "age_old", "susceptible", "mood_sad"],
        ["run_0", "S_0", 2004, 300.0, "age_old", "susceptible", "mood_sad"],
        ["run_0", "S_0", 2005, 300.0, "age_old", "susceptible", "mood_sad"],
        [
            "run_0", "S_0", 2000, 300.0, "age_young", "susceptible",
            "mood_happy"
        ],
        [
            "run_0", "S_0", 2001, 271.0, "age_young", "susceptible",
            "mood_happy"
        ],
        [
            "run_0", "S_0", 2002, 246.0, "age_young", "susceptible",
            "mood_happy"
        ],
        [
            "run_0", "S_0", 2003, 222.0, "age_young", "susceptible",
            "mood_happy"
        ],
        [
            "run_0", "S_0", 2004, 201.0, "age_young", "susceptible",
            "mood_happy"
        ],
        [
            "run_0", "S_0", 2005, 182.0, "age_young", "susceptible",
            "mood_happy"
        ],
        ["run_0", "S_0", 2000, 33.0, "age_young", "susceptible", "mood_sad"],
        ["run_0", "S_0", 2001, 62.0, "age_young", "susceptible", "mood_sad"],
        ["run_0", "S_0", 2002, 88.0, "age_young", "susceptible", "mood_sad"],
        ["run_0", "S_0", 2003, 111.0, "age_young", "susceptible", "mood_sad"],
        ["run_0", "S_0", 2004, 132.0, "age_young", "susceptible", "mood_sad"],
        ["run_0", "S_0", 2005, 151.0, "age_young", "susceptible", "mood_sad"],
        ["run_0", "S_0", 2000, 33.0, "age_old", "infectious", "mood_happy"],
        ["run_0", "S_0", 2001, 33.0, "age_old", "infectious", "mood_happy"],
        ["run_0", "S_0", 2002, 33.0, "age_old", "infectious", "mood_happy"],
        ["run_0", "S_0", 2003, 33.0, "age_old", "infectious", "mood_happy"],
        ["run_0", "S_0", 2004, 33.0, "age_old", "infectious", "mood_happy"],
        ["run_0", "S_0", 2005, 33.0, "age_old", "infectious", "mood_happy"],
        ["run_0", "S_0", 2000, 33.0, "age_old", "infectious", "mood_sad"],
        ["run_0", "S_0", 2001, 62.0, "age_old", "infectious", "mood_sad"],
        ["run_0", "S_0", 2002, 88.0, "age_old", "infectious", "mood_sad"],
        ["run_0", "S_0", 2003, 111.0, "age_old", "infectious", "mood_sad"],
        ["run_0", "S_0", 2004, 132.0, "age_old", "infectious", "mood_sad"],
        ["run_0", "S_0", 2005, 151.0, "age_old", "infectious", "mood_sad"],
        ["run_0", "S_0", 2000, 93.0, "age_young", "infectious", "mood_happy"],
        ["run_0", "S_0", 2001, 93.0, "age_young", "infectious", "mood_happy"],
        ["run_0", "S_0", 2002, 93.0, "age_young", "infectious", "mood_happy"],
        ["run_0", "S_0", 2003, 39.0, "age_young", "infectious", "mood_happy"],
        ["run_0", "S_0", 2004, 39.0, "age_young", "infectious", "mood_happy"],
        ["run_0", "S_0", 2005, 39.0, "age_young", "infectious", "mood_happy"],
        ["run_0", "S_0", 2000, 39.0, "age_young", "infectious", "mood_sad"],
        ["run_0", "S_0", 2001, 69.0, "age_young", "infectious", "mood_sad"],
        ["run_0", "S_0", 2002, 89.0, "age_young", "infectious", "mood_sad"],
        ["run_0", "S_0", 2003, 119.0, "age_young", "infectious", "mood_sad"],
        ["run_0", "S_0", 2004, 139.0, "age_young", "infectious", "mood_sad"],
        ["run_0", "S_0", 2005, 159.0, "age_young", "infectious", "mood_sad"],
    ]
    expected_df = pd.DataFrame(expected_data, columns=expected_columns)
    assert_frame_equal(expected_df, unpivoted_df)
Exemplo n.º 23
0
def test_plot_post_calibration(tmp_path):
    plot_dir = tmp_path
    mcmc_dir_path = os.path.join(tmp_path, "mcmc")
    os.makedirs(mcmc_dir_path)
    targets = {
        "incidence": {
            "output_key": "incidence",
            "title": "incidence",
            "times": [],
            "values": [],
            "quantiles": [0.25, 0.5, 0.75],
        },
        "foo": {
            "output_key": "foo",
            "title": "foo",
            "times": [],
            "values": [],
            "quantiles": [0.25, 0.5, 0.75],
        },
    }

    # A dummy prior to pass postirior checks
    priors = [{
        "param_name": "contact_rate",
        "distribution": "uniform",
        "distri_params": [0.01, 0.03]
    }]

    funcs = [
        lambda t: 2 * t + random.random(), lambda t: t**3 + random.random()
    ]
    # Build data for plotting
    do_df, mcmc_df, params_df = build_synthetic_calibration(targets,
                                                            funcs,
                                                            chains=2,
                                                            runs=20,
                                                            times=20)
    chains = set(mcmc_df["chain"].tolist())
    # Create databases for plotting
    for chain in chains:
        db_path = os.path.join(mcmc_dir_path, f"chain-{chain}.db")
        db = Database(db_path)
        db.dump_df("mcmc_run", mcmc_df[mcmc_df["chain"] == chain])
        db.dump_df("mcmc_params", params_df[params_df["chain"] == chain])
        db.dump_df("derived_outputs", do_df[do_df["chain"] == chain])

    # Create plots
    plot_post_calibration(targets, mcmc_dir_path, plot_dir, priors)

    # Check plots - do a super basic check
    expected_files = [
        "burn-in.png",
        "loglikelihood-traces.png",
        "acceptance_ratio.png",
        "params-traces",
        "calibration-fit",
        "params-vs-loglikelihood",
        "posteriors",
    ]
    for fname in expected_files:
        p = os.path.join(plot_dir, fname)
        assert os.path.exists(p)
        if os.path.isdir(p):
            assert len(os.listdir(p)) > 0
Exemplo n.º 24
0
def test_collate_outputs(tmp_path):
    """
    Test the collation of multiple calibration output databases into a single file. 
    """
    # Setup database tables
    mcmc_run_cols = [
        "idx", "Scenario", "ice_cream_sales", "loglikelihood", "accept"
    ]
    mcmc_run_1 = [
        ["run_0", "S_0", 1, -1, 1],
        ["run_1", "S_0", 2, -2, 1],
        ["run_2", "S_0", 3, -3, 0],
        ["run_3", "S_0", 4, -4, 1],
    ]
    mcmc_run_2 = [
        ["run_0", "S_0", 11, -11, 1],
        ["run_1", "S_0", 12, -12, 0],
        ["run_2", "S_0", 13, -13, 1],
        ["run_3", "S_0", 14, -14, 1],
    ]
    derived_outputs_cols = ["idx", "Scenario", "times", "shark_attacks"]
    derived_outputs_1 = [
        ["run_0", "S_0", 2000, 3],
        ["run_0", "S_0", 2001, 6],
        ["run_0", "S_0", 2002, 10],
        ["run_1", "S_0", 2000, 4],
        ["run_1", "S_0", 2001, 7],
        ["run_1", "S_0", 2002, 11],
        ["run_2", "S_0", 2000, 2],
        ["run_2", "S_0", 2001, 5],
        ["run_2", "S_0", 2002, 9],
        ["run_3", "S_0", 2000, 1],
        ["run_3", "S_0", 2001, 2],
        ["run_3", "S_0", 2002, 3],
    ]
    derived_outputs_2 = [
        ["run_0", "S_0", 2000, 3.1],
        ["run_0", "S_0", 2001, 6.1],
        ["run_0", "S_0", 2002, 10.1],
        ["run_1", "S_0", 2000, 4.1],
        ["run_1", "S_0", 2001, 7.1],
        ["run_1", "S_0", 2002, 11.1],
        ["run_2", "S_0", 2000, 2.1],
        ["run_2", "S_0", 2001, 5.1],
        ["run_2", "S_0", 2002, 9.1],
        ["run_3", "S_0", 2000, 1.1],
        ["run_3", "S_0", 2001, 2.1],
        ["run_3", "S_0", 2002, 3.1],
    ]
    outputs_cols = ["idx", "Scenario", "times", "happy", "sad"]
    outputs_1 = [
        ["run_0", "S_0", 2000, 11, 11],
        ["run_0", "S_0", 2001, 12, 21],
        ["run_0", "S_0", 2002, 13, 31],
        ["run_1", "S_0", 2000, 21, 12],
        ["run_1", "S_0", 2001, 22, 22],
        ["run_1", "S_0", 2002, 23, 32],
        ["run_2", "S_0", 2000, 31, 13],
        ["run_2", "S_0", 2001, 32, 23],
        ["run_2", "S_0", 2002, 33, 33],
        ["run_3", "S_0", 2000, 41, 14],
        ["run_3", "S_0", 2001, 42, 24],
        ["run_3", "S_0", 2002, 43, 34],
    ]
    outputs_2 = [
        ["run_0", "S_0", 2000, 111, 211],
        ["run_0", "S_0", 2001, 112, 221],
        ["run_0", "S_0", 2002, 113, 231],
        ["run_1", "S_0", 2000, 121, 212],
        ["run_1", "S_0", 2001, 122, 222],
        ["run_1", "S_0", 2002, 123, 232],
        ["run_2", "S_0", 2000, 131, 213],
        ["run_2", "S_0", 2001, 132, 223],
        ["run_2", "S_0", 2002, 133, 233],
        ["run_3", "S_0", 2000, 141, 214],
        ["run_3", "S_0", 2001, 142, 224],
        ["run_3", "S_0", 2002, 143, 234],
    ]
    # Create dataframes to save to db
    mcmc_run_1_df = pd.DataFrame(mcmc_run_1, columns=mcmc_run_cols)
    mcmc_run_2_df = pd.DataFrame(mcmc_run_2, columns=mcmc_run_cols)
    derived_ouputs_1_df = pd.DataFrame(derived_outputs_1,
                                       columns=derived_outputs_cols)
    derived_ouputs_2_df = pd.DataFrame(derived_outputs_2,
                                       columns=derived_outputs_cols)
    outputs_1_df = pd.DataFrame(outputs_1, columns=outputs_cols)
    outputs_2_df = pd.DataFrame(outputs_2, columns=outputs_cols)

    # Connect to test databases
    target_db_path = os.path.join(tmp_path, "target.db")
    db_1_path = os.path.join(tmp_path, f"src-1.db")
    db_2_path = os.path.join(tmp_path, f"src-2.db")
    src_db_paths = [db_1_path, db_2_path]
    target_db = Database(target_db_path)
    src_1_db = Database(db_1_path)
    src_2_db = Database(db_2_path)

    # Save test data to databases
    mcmc_run_1_df.to_sql("mcmc_run", con=src_1_db.engine, index=False)
    mcmc_run_2_df.to_sql("mcmc_run", con=src_2_db.engine, index=False)
    derived_ouputs_1_df.to_sql("derived_outputs",
                               con=src_1_db.engine,
                               index=False)
    derived_ouputs_2_df.to_sql("derived_outputs",
                               con=src_2_db.engine,
                               index=False)
    outputs_1_df.to_sql("outputs", con=src_1_db.engine, index=False)
    outputs_2_df.to_sql("outputs", con=src_2_db.engine, index=False)

    collate_outputs(src_db_paths, target_db_path, num_runs=2)

    expected_mcmc_runs = [
        ["run_0", "S_0", 2, -2, 1],
        ["run_1", "S_0", 4, -4, 1],
        ["run_2", "S_0", 13, -13, 1],
        ["run_3", "S_0", 14, -14, 1],
    ]
    expected_derived_ouputs = [
        ["run_0", "S_0", 2000, 4],
        ["run_0", "S_0", 2001, 7],
        ["run_0", "S_0", 2002, 11],
        ["run_1", "S_0", 2000, 1],
        ["run_1", "S_0", 2001, 2],
        ["run_1", "S_0", 2002, 3],
        ["run_2", "S_0", 2000, 2.1],
        ["run_2", "S_0", 2001, 5.1],
        ["run_2", "S_0", 2002, 9.1],
        ["run_3", "S_0", 2000, 1.1],
        ["run_3", "S_0", 2001, 2.1],
        ["run_3", "S_0", 2002, 3.1],
    ]
    expected_outputs = [
        ["run_0", "S_0", 2000, 21, 12],
        ["run_0", "S_0", 2001, 22, 22],
        ["run_0", "S_0", 2002, 23, 32],
        ["run_1", "S_0", 2000, 41, 14],
        ["run_1", "S_0", 2001, 42, 24],
        ["run_1", "S_0", 2002, 43, 34],
        ["run_2", "S_0", 2000, 131, 213],
        ["run_2", "S_0", 2001, 132, 223],
        ["run_2", "S_0", 2002, 133, 233],
        ["run_3", "S_0", 2000, 141, 214],
        ["run_3", "S_0", 2001, 142, 224],
        ["run_3", "S_0", 2002, 143, 234],
    ]
    expected_mcmc_run_df = pd.DataFrame(expected_mcmc_runs,
                                        columns=mcmc_run_cols)
    expected_derived_ouputs_df = pd.DataFrame(expected_derived_ouputs,
                                              columns=derived_outputs_cols)
    expected_outputs_df = pd.DataFrame(expected_outputs, columns=outputs_cols)

    # Extract the outputs
    mcmc_df = target_db.query("mcmc_run")
    derived_outputs_df = target_db.query("derived_outputs")
    outputs_df = target_db.query("outputs")

    # Check that the outputs are correct
    assert_frame_equal(expected_mcmc_run_df, mcmc_df)
    assert_frame_equal(expected_derived_ouputs_df, derived_outputs_df)
    assert_frame_equal(expected_outputs_df, outputs_df)
Exemplo n.º 25
0
def test_create_power_bi_outputs(tmp_path):
    """
    Ensure that PowerBI outputs are correctly created from a model output database.
    """
    # Prepare models
    models = [
        get_mock_model(
            times=[2000, 2001, 2002, 2003, 2004, 2005],
            outputs=[
                [1, 2, 3, 4, 5, 6, 7, 8],
                [11, 12, 13, 14, 15, 16, 17, 18],
                [21, 22, 23, 24, 25, 26, 27, 28],
                [31, 32, 33, 34, 35, 36, 37, 38],
                [41, 42, 43, 44, 45, 46, 47, 48],
                [5, 4, 3, 2, 1, 0, -1, -2],
            ],
            derived_outputs={
                "times": [2000, 2001, 2002, 2003, 2004, 2005],
                "snacks": [1, 2, 3, 4, 5, 6],
            },
        ),
        get_mock_model(
            times=[2000, 2001, 2002, 2003, 2004, 2005],
            outputs=[
                [51, 52, 53, 54, 55, 56, 57, 58],
                [61, 62, 63, 64, 65, 66, 67, 68],
                [71, 72, 73, 74, 75, 76, 77, 78],
                [81, 82, 83, 94, 95, 96, 97, 98],
                [91, 92, 93, 84, 85, 86, 87, 88],
                [5, 4, 3, 2, 1, 0, -1, -2],
            ],
            derived_outputs={
                "times": [2000, 2001, 2002, 2003, 2004, 2005],
                "snacks": [7, 8, 9, 10, 11, 12],
            },
        ),
    ]
    mcmc_run_df = pd.DataFrame.from_dict({
        "contact_rate": [5, 10, 6, 4],
        "loglikelihood": [-1, -3, -2, -0.5],
        "accept": [1, 0, 0, 1],
    })
    db_path = os.path.join(tmp_path, "out.db")
    powerbi_db_path = os.path.join(tmp_path, "pbi.db")
    # Store the models
    store_run_models(models, db_path)
    store_database(mcmc_run_df, db_path, "mcmc_run", scenario=0, run_idx=1)
    src_db = Database(db_path)
    mcmc_run_src = src_db.query("mcmc_run")
    derived_outputs_src = src_db.query("derived_outputs")

    # Create Power BI outputs
    create_power_bi_outputs(db_path, powerbi_db_path)
    # Query Power BI outputs
    pbi_db = Database(powerbi_db_path)
    table_0 = pbi_db.query("pbi_scenario_0")
    table_1 = pbi_db.query("pbi_scenario_1")
    mcmc_run_dest = pbi_db.query("mcmc_run")
    derived_outputs_dest = pbi_db.query("derived_outputs")

    # Validate derived_outputs copied over
    assert_frame_equal(derived_outputs_src, derived_outputs_dest)

    # Validate MCMC run copied over
    assert_frame_equal(mcmc_run_src, mcmc_run_dest)

    def get_expected_df(model, scenario):
        outputs_df = pd.DataFrame(model.outputs,
                                  columns=model.compartment_names)
        outputs_df.insert(0, "times", model.times)
        outputs_df.insert(0, "Scenario", scenario)
        outputs_df.insert(0, "idx", "run_0")
        return unpivot_outputs(outputs_df)

    # Validate Power BI outputs transformed correctly
    expected_df = get_expected_df(models[0], "S_0")
    assert_frame_equal(expected_df, table_0)

    expected_df = get_expected_df(models[1], "S_1")
    assert_frame_equal(expected_df, table_1)