Пример #1
0
def load_infd_fit(fit, mi: MaudInput) -> az.InferenceData:
    """Get an arviz InferenceData object from out-of-sample tmp generated csvs."""

    coords = {
        **mi.stan_coords.__dict__,
        **{
            "reactions":
            mi.stan_coords.reactions,
            "kms":
            join_list_of_strings(mi.stan_coords.km_enzs, mi.stan_coords.km_mics),
            "kis":
            join_list_of_strings(mi.stan_coords.ci_enzs, mi.stan_coords.ci_mics),
            "diss_ts":
            join_list_of_strings(mi.stan_coords.ai_enzs, mi.stan_coords.ai_mics),
            "diss_rs":
            join_list_of_strings(mi.stan_coords.aa_enzs, mi.stan_coords.aa_mics),
        },
    }
    return az.from_cmdstan(
        fit,
        coords=coords,
        dims={
            "flux": ["experiments", "reactions"],
            "conc": ["experiments", "mics"],
            "conc_enzyme": ["experiments", "enzymes"],
            "conc_unbalanced": ["experiments", "unbalanced_mics"],
            "conc_phos": ["experiments", "phos_enzs"],
            "saturation": ["experiments", "edges"],
            "allostery": ["experiments", "edges"],
            "phosphorylation": ["experiments", "edges"],
            "reversibility": ["experiments", "edges"],
        },
        save_warmup=True,
    )
def sample(
    stan_file: str,
    input_json: str,
    coords: dict,
    dims: dict,
    sample_kwargs: dict,
    cpp_options: Optional[dict],
    stanc_options: Optional[dict],
) -> InferenceData:
    """Run cmdstanpy.CmdStanModel.sample and return an InferenceData."""
    model = CmdStanModel(
        stan_file=stan_file,
        cpp_options=cpp_options,
        stanc_options=stanc_options,
    )
    with open(input_json, "r") as f:
        stan_input = json.load(f)
    coords["ix_train"] = [i - 1 for i in stan_input["ix_train"]]
    coords["ix_test"] = [i - 1 for i in stan_input["ix_test"]]
    mcmc = model.sample(data=input_json, **sample_kwargs)
    return az.from_cmdstan(
        posterior=mcmc.runset.csv_files,
        log_likelihood="llik",
        posterior_predictive="yrep",
        observed_data=input_json,
        coords=coords,
        dims=dims,
    )
Пример #3
0
def load_infd(csvs: List[str], mi: MaudInput) -> az.InferenceData:
    """Get an arviz InferenceData object from Maud csvs."""

    Numba.disable_numba()
    coords = {
        **mi.stan_coords.__dict__,
        **{
            "reactions":
            mi.stan_coords.reactions,
            "kms":
            join_list_of_strings(mi.stan_coords.km_enzs, mi.stan_coords.km_mics),
            "kis":
            join_list_of_strings(mi.stan_coords.ci_enzs, mi.stan_coords.ci_mics),
            "diss_ts":
            join_list_of_strings(mi.stan_coords.ai_enzs, mi.stan_coords.ai_mics),
            "diss_rs":
            join_list_of_strings(mi.stan_coords.aa_enzs, mi.stan_coords.aa_mics),
            "yconcs":
            join_list_of_strings(mi.stan_coords.yconc_exps, mi.stan_coords.yconc_mics),
            "yfluxs":
            join_list_of_strings(mi.stan_coords.yflux_exps, mi.stan_coords.yflux_rxns),
            "yenzs":
            join_list_of_strings(mi.stan_coords.yenz_exps, mi.stan_coords.yenz_enzs),
        },
    }
    return az.from_cmdstan(
        csvs,
        coords=coords,
        dims={
            "flux": ["experiments", "reactions"],
            "conc": ["experiments", "mics"],
            "conc_enzyme": ["experiments", "enzymes"],
            "conc_unbalanced": ["experiments", "unbalanced_mics"],
            "conc_phos": ["experiments", "phos_enzs"],
            "drain": ["experiments", "drains"],
            "diss_t": ["diss_ts"],
            "diss_r": ["diss_rs"],
            "transfer_constant": ["allosteric_enzymes"],
            "dgf": ["metabolites"],
            "dgrs": ["edges"],
            "keq": ["edges"],
            "kcat": ["enzymes"],
            "kcat_phos": ["phos_enzs"],
            "km": ["kms"],
            "ki": ["kis"],
            "yconc_sim": ["yconcs"],
            "yflux_sim": ["yfluxs"],
            "yenz_sim": ["yenzs"],
            "log_lik_conc": ["yconcs"],
            "log_lik_flux": ["yfluxs"],
            "log_lik_enz": ["yenzs"],
            "saturation": ["experiments", "edges"],
            "allostery": ["experiments", "edges"],
            "phosphorylation": ["experiments", "edges"],
            "reversibility": ["experiments", "edges"],
        },
        save_warmup=True,
    )
Пример #4
0
def get_infd(csvs, stan_codes, tecrdb):
    return az.from_cmdstan(csvs,
                           coords={
                               "compound":
                               list(stan_codes["compound_id"].keys()),
                               "measurement": list(tecrdb.index)
                           },
                           dims={
                               "dgf": ["compound"],
                               "dgr_prime": ["measurement"],
                               "log_lik_dgr": ["measurement"],
                               "kpr_rep": ["measurement"]
                           },
                           log_likelihood='log_lik_dgr')
Пример #5
0
def get_experiment_table_from_sim(sim_dir: str) -> pd.DataFrame:
    """Get a table of simulated measurements.

    The output should be compatible with maud, so that it is possible to
    overwrite the experiments file.

    """
    ui_path = os.path.join(sim_dir, "user_input")
    sim_csv_path = os.path.join(sim_dir, "samples")
    csv_file = os.path.join(
        sim_csv_path,
        next(filter(lambda f: f.endswith(".csv"), os.listdir(sim_csv_path))),
    )
    with open(os.path.join(sim_csv_path, "input_data.json"), "r") as f:
        stan_input = json.load(f)
    mi = load_maud_input_from_toml(ui_path)
    infd = az.from_cmdstan(csv_file)
    code_to_exp = {v: k for k, v in mi.stan_codes.experiment_codes.items()}
    code_to_mic = {v: k for k, v in mi.stan_codes.mic_codes.items()}
    code_to_rxn = {v: k for k, v in mi.stan_codes.reaction_codes.items()}
    conc_sim = pd.DataFrame({
        "measurement_type":
        "mic",
        "target_id":
        map(code_to_mic.get, stan_input["mic_ix_yconc"]),
        "experiment_id":
        map(code_to_exp.get, stan_input["experiment_yconc"]),
        "measurement":
        infd.posterior["yconc_sim"].to_series().values,
        "error_scale":
        stan_input["sigma_conc"],
    })
    flux_sim = pd.DataFrame({
        "measurement_type":
        "mic",
        "target_id":
        map(code_to_rxn.get, stan_input["reaction_yflux"]),
        "experiment_id":
        map(code_to_exp.get, stan_input["experiment_yflux"]),
        "measurement":
        infd.posterior["yflux_sim"].to_series().values,
        "error_scale":
        stan_input["sigma_flux"],
    })
    enz_og = pd.read_csv(mi.config.experiments_file
                         ).loc[lambda df: df["measurement_type"] == "enz"]
    return pd.concat([conc_sim, flux_sim, enz_og], ignore_index=True)
Пример #6
0
def load_infd(csvs: List[str], mi: MaudInput) -> az.InferenceData:
    """Get an arviz InferenceData object from Maud csvs."""
    return az.from_cmdstan(
        csvs,
        coords={
            "enzyme_name": list(mi.stan_codes.enzyme_codes.keys()),
            "mic_name": list(mi.stan_codes.mic_codes.keys()),
            "reaction": list(mi.stan_codes.reaction_codes.keys()),
            "metabolite": list(mi.stan_codes.metabolite_codes.keys()),
            "experiment": list(mi.stan_codes.experiment_codes.keys()),
            "km_id": [p.id[3:] for p in mi.priors.km_priors],
        },
        dims={
            "enzyme": ["experiment", "enzyme_name"],
            "conc": ["experiment", "mic_name"],
            "flux": ["experiment", "reaction"],
            "formation_energy": ["metabolite"],
            "kcat": ["enzyme_name"],
            "km": ["km_id"],
        },
    )
Пример #7
0
ca_cases = pd.read_csv('../data/CA_covid_data/statewide_cases.csv')
daily_cases = ca_cases.groupby(
    'date').newcountconfirmed.sum().to_frame().reset_index()
daily_cases.date = pd.to_datetime(daily_cases.date)

init_prop = [36e6, 4000, 12000, 20000]
init = np.zeros(30)
init[0] = init_prop[0]
init[1:5] = np.repeat(init_prop[1] / 4, 4)
init[5:17] = np.repeat(0.2 * init_prop[2] / 12, 12)
init[17:29] = np.repeat(0.8 * init_prop[2] / 12, 12)
init[29] = init_prop[3]

t_eval = np.arange(0, 180)

inference_data = az.from_cmdstan('../results/outputs/*.csv')
chains = [i for i in range(18)]
samples = [i for i in range(20000)]
incidence = []
for i in range(5000):
    chain = np.random.choice(chains)
    sample = np.random.choice(samples)
    beta_start = inference_data.posterior.data_vars['beta_start'][chain,
                                                                  sample].data
    beta_end = inference_data.posterior.data_vars['beta_end'][chain,
                                                              sample].data
    k = inference_data.posterior.data_vars['k'][chain, sample].data
    seir = TimeVaryingSLAPIR(t_eval=t_eval,
                             beta_start=beta_start,
                             beta_end=beta_end,
                             k=k,
Пример #8
0
 def get_inference_data(self, output, **kwargs):
     return from_cmdstan(output=output, **kwargs)
Пример #9
0
import sys

import pandas as pd
import numpy as np
import scipy as sp
import scipy.stats as ss
import arviz as az

from subprocess import Popen, PIPE

import glob

# glob string
posterior_glob = glob.glob(sys.argv[1] + '/trace-[0-9]*')

cmdstan_data = az.from_cmdstan(posterior=posterior_glob)

func_dict = {
    "q2.5": lambda x: np.percentile(x, 2.5),
    "q25": lambda x: np.percentile(x, 25),
    "median": lambda x: np.percentile(x, 50),
    "q75": lambda x: np.percentile(x, 75),
    "q97.5": lambda x: np.percentile(x, 97.5)
}

# include mean and hpd
stats = az.summary(
    cmdstan_data, credible_interval=0.95
).loc[:, ['mean', 'hpd_2.5%', 'hpd_97.5%', 'ess_bulk', 'ess_tail', 'r_hat'
          ]].reset_index().rename(columns={
              'index': 'var',
Пример #10
0
 def get_inference_data(self, posterior, **kwargs):
     return from_cmdstan(posterior=posterior, **kwargs)
Пример #11
0
def predict(
    mi: MaudInput,
    output_dir: str,
    idata_train: az.InferenceData,
) -> az.InferenceData:
    """Call CmdStanModel.sample for out of sample predictions.

    :param mi: a MaudInput object
    :param output_dir: directory where output will be saved
    :param idata_train: InferenceData object with posterior draws
    """
    model = cmdstanpy.CmdStanModel(
        stan_file=os.path.join(HERE, STAN_PROGRAM_RELATIVE_PATH_PREDICT),
        cpp_options=mi.config.cpp_options,
        stanc_options=mi.config.stanc_options,
    )
    set_up_output_dir(output_dir, mi)
    kinetic_parameters = [
        "keq",
        "km",
        "kcat",
        "dissociation_constant",
        "transfer_constant",
        "kcat_phos",
        "ki",
    ]
    posterior = idata_train.get("posterior")
    sample_stats = idata_train.get("sample_stats")
    assert posterior is not None
    assert sample_stats is not None
    chains = sample_stats["chain"]
    draws = sample_stats["draw"]
    dims = {
        "conc": ["experiment", "mic"],
        "conc_enzyme": ["experiment", "enzyme"],
        "flux": ["experiment", "reaction"],
    }
    for chain in chains:
        for draw in draws:
            inits = {
                par: (
                    posterior[par]
                    .sel(chain=chain, draw=draw)
                    .to_series()
                    .values
                )
                for par in kinetic_parameters
                if par in posterior.keys()
            }
            sample_args: dict = {
                "data": os.path.join(output_dir, "input_data_test.json"),
                "inits": inits,
                "output_dir": output_dir,
                "iter_warmup": 0,
                "iter_sampling": 1,
                "fixed_param": True,
                "show_progress": False,
            }
            if mi.config.cmdstanpy_config_predict is not None:
                sample_args = {
                    **sample_args,
                    **mi.config.cmdstanpy_config_predict,
                }
            mcmc_draw = model.sample(**sample_args)
            idata_draw = az.from_cmdstan(
                mcmc_draw.runset.csv_files,
                coords={
                    "experiment": [
                        e.id for e in mi.measurements.experiments if e.is_test
                    ],
                    "mic": [m.id for m in mi.kinetic_model.mics],
                    "enzyme": [e.id for e in mi.kinetic_model.enzymes],
                    "reaction": [r.id for r in mi.kinetic_model.reactions],
                },
                dims=dims,
            ).assign_coords(
                coords={"chain": [chain], "draw": [draw]},
                groups="posterior_groups",
            )
            if draw == 0:
                idata_chain = idata_draw.copy()
            else:
                idata_chain = az.concat(
                    [idata_chain, idata_draw], dim="draw", reset_dim=False
                )
        if chain == 0:
            out = idata_chain.copy()
        else:
            out = az.concat([out, idata_chain], dim="chain", reset_dim=False)
    return out
Пример #12
0
def get_idata(csvs: List[str], mi: MaudInput, mode: str) -> az.InferenceData:
    """Get an arviz InferenceData object from Maud csvs."""

    Numba.disable_numba()
    experiments = ([e.id for e in mi.measurements.experiments
                    if e.is_train] if mode == "train" else
                   [e.id for e in mi.measurements.experiments if e.is_test])
    coords = {
        "enzymes": [e.id for e in mi.kinetic_model.enzymes],
        "experiments":
        experiments,
        "reactions": [r.id for r in mi.kinetic_model.reactions],
        "drains": [r.id for r in mi.kinetic_model.drains],
        "metabolites": [m.id for m in mi.kinetic_model.metabolites],
        "mics": [m.id for m in mi.kinetic_model.mics],
        "edges": [e.id for e in mi.kinetic_model.edges],
        "unbalanced_mics":
        [m.id for m in mi.kinetic_model.mics if not m.balanced],
        "phosphorylations": [p.id for p in mi.kinetic_model.phosphorylations]
        if mi.kinetic_model.phosphorylations is not None else [],
        "phosphorylation_modifying_enzymes":
        [pme.id
         for pme in mi.kinetic_model.phosphorylation_modifying_enzymes] if
        mi.kinetic_model.phosphorylation_modifying_enzymes is not None else [],
        "allosteries": [p.id for p in mi.kinetic_model.allosteries]
        if mi.kinetic_model.allosteries is not None else [],
        "allosteric_enzymes":
        [e.id for e in mi.kinetic_model.allosteric_enzymes]
        if mi.kinetic_model.allosteric_enzymes is not None else [],
        "competitive_inhibitions":
        [p.id for p in mi.kinetic_model.competitive_inhibitions]
        if mi.kinetic_model.competitive_inhibitions is not None else [],
        "kms":
        mi.stan_variable_set.km.ids[0],
        "kis":
        mi.stan_variable_set.ki.ids[0],
        "dissociation_constants":
        (mi.stan_variable_set.dissociation_constant.ids[0]),
        "yconcs":
        join_str_cols(
            mi.measurements.yconc[[
                "experiment_id", "target_id"
            ]].loc[lambda df: df["experiment_id"].isin(experiments)],
            sep=ID_SEPARATOR,
        ).to_list(),
        "yfluxs":
        join_str_cols(
            mi.measurements.yflux[[
                "experiment_id", "target_id"
            ]].loc[lambda df: df["experiment_id"].isin(experiments)],
            sep=ID_SEPARATOR,
        ).to_list(),
        "yenz":
        join_str_cols(
            mi.measurements.yenz[[
                "experiment_id", "target_id"
            ]].loc[lambda df: df["experiment_id"].isin(experiments)],
            sep=ID_SEPARATOR,
        ).to_list(),
    }
    return az.from_cmdstan(
        csvs,
        coords=coords,
        dims={
            "flux": ["experiments", "reactions"],
            "conc": ["experiments", "mics"],
            "conc_enzyme": ["experiments", "enzymes"],
            "conc_unbalanced": ["experiments", "unbalanced_mics"],
            "conc_pme": ["experiments", "phosphorylation_modifying_enzymes"],
            "drain": ["experiments", "drains"],
            "dissociation_constant": ["allosteries"],
            "transfer_constant": ["allosteric_enzymes"],
            "dgf": ["metabolites"],
            "dgrs": ["experiments", "edges"],
            "keq": ["experiments", "edges"],
            "kcat": ["enzymes"],
            "kcat_pme": ["phosphorylation_modifying_enzymes"],
            "km": ["kms"],
            "ki": ["kis"],
            "psi": ["experiments"],
            "yconc_sim": ["yconcs"],
            "yflux_sim": ["yfluxs"],
            "yenz_sim": ["yenzs"],
            "log_lik_conc": ["yconcs"],
            "log_lik_flux": ["yfluxs"],
            "log_lik_enz": ["yenzs"],
            "saturation": ["experiments", "edges"],
            "allostery": ["experiments", "edges"],
            "phosphorylation": ["experiments", "edges"],
            "reversibility": ["experiments", "edges"],
        },
        save_warmup=True,
    )