Пример #1
0
def main():
    """
    Takes a dismod database that has had predict run on it and converts the predictions
    into the format needed for the IHME Epi Databases. Also uploads inputs to tier 3 which
    allows us to view those inputs in EpiViz.
    """
    args = get_args()
    logging.basicConfig(level=LEVELS[args.loglevel])

    context = Context(model_version_id=args.model_version_id)
    inputs, alchemy, settings = context.read_inputs()

    if not inputs.csmr.raw.empty:
        LOG.info("Uploading CSMR to t3")
        inputs.csmr.attach_to_model_version_in_db(
            model_version_id=args.model_version_id,
            conn_def=context.model_connection)

    LOG.info("Extracting results from DisMod SQLite Database.")
    dismod_file = context.db_file(location_id=args.parent_location_id,
                                  sex_id=args.sex_id,
                                  make=False)
    da = DismodExtractor(path=dismod_file)
    predictions = da.format_predictions_for_ihme()

    LOG.info("Saving the results.")
    rh = ResultsHandler(model_version_id=args.model_version_id)
    rh.save_draw_files(df=predictions, directory=context.draw_dir)
    rh.upload_summaries(directory=context.draw_dir,
                        conn_def=context.model_connection)
Пример #2
0
def main():
    """
    Grabs the inputs for a specific model version ID, sets up the folder
    structure, and pickles the inputs object plus writes the settings json
    for use later on.

    If you're doing a drill, then only get input data from locations
    that will be used for the drilling for parent-children.
    """
    args = get_args()
    logging.basicConfig(level=LEVELS[args.loglevel])

    LOG.info(
        f"Configuring inputs for model version ID {args.model_version_id}.")
    LOG.debug(f"Arguments: {args}.")

    context = Context(model_version_id=args.model_version_id,
                      make=args.make,
                      configure_application=args.configure)
    parameter_json = settings_json_from_model_version_id(
        model_version_id=args.model_version_id,
        conn_def=context.model_connection)
    settings = load_settings(settings_json=parameter_json)

    inputs = MeasurementInputsFromSettings(settings=settings)
    inputs.get_raw_inputs()
    inputs.configure_inputs_for_dismod(settings=settings)

    context.write_inputs(inputs=inputs, settings=parameter_json)
Пример #3
0
def sample(model_version_id: int,
           parent_location_id: int,
           sex_id: int,
           n_sim: int,
           n_pool: int,
           fit_type: str,
           asymptotic: bool = False) -> None:
    """
    Simulates from a dismod database that has already had a fit run on it. Does so
    optionally in parallel.

    Parameters
    ----------
    model_version_id
        The model version ID
    parent_location_id
        The parent location ID specifying location of database
    sex_id
        The sex ID specifying location of database
    n_sim
        The number of simulations to do
    n_pool
        The number of multiprocessing pools to create. If 1, then will not
        run with pools but just run all simulations together in one dmdismod command.
    fit_type
        The type of fit that was performed on this database, one of fixed or both.
    asymptotic
        Whether or not to do asymptotic samples or fit-refit
    """

    context = Context(model_version_id=model_version_id)
    main_db = context.db_file(location_id=parent_location_id, sex_id=sex_id)
    index_file_pattern = context.db_index_file_pattern(
        location_id=parent_location_id, sex_id=sex_id)

    if asymptotic:
        result = sample_asymptotic(path=main_db,
                                   n_sim=n_sim,
                                   fit_type=fit_type)
        try:
            check_sample_asymptotic(
                result[f'sample asymptotic {fit_type} {n_sim}'].stderr)
        except SampleAsymptoticError:
            asymptotic = False
            LOG.info(
                "Jumping to sample simulate because sample asymptotic failed.")
            LOG.warning("Please review the warning from sample asymptotic.")
    if not asymptotic:
        simulate(path=main_db, n_sim=n_sim)
        if n_pool > 1:
            sample_simulate_pool(main_db=main_db,
                                 index_file_pattern=index_file_pattern,
                                 fit_type=fit_type,
                                 n_pool=n_pool,
                                 n_sim=n_sim)
        else:
            sample_simulate_sequence(path=main_db,
                                     n_sim=n_sim,
                                     fit_type=fit_type)
Пример #4
0
def configure_inputs(model_version_id: int,
                     make: bool,
                     configure: bool,
                     midpoint: bool = False,
                     test_dir: Optional[str] = None,
                     json_file: Optional[str] = None) -> None:
    """
    Grabs the inputs for a specific model version ID, sets up the folder
    structure, and pickles the inputs object plus writes the settings json
    for use later on. Also uploads CSMR to the database attached to the model version,
    if applicable.

    Optionally use a json file for settings instead of a model version ID's json file.

    Parameters
    ----------
    model_version_id
        The model version ID to configure inputs for
    make
        Whether or not to make the directory structure for the model version ID
    configure
        Configure the application for the IHME cluster, otherwise will use the
        test_dir for the directory tree instead.
    test_dir
        A test directory to use rather than the directory specified by the
        model version context in the IHME file system.
    json_file
        An optional filepath pointing to a different json than is attached to the
        model_version_id. Will use this instead for settings.
    """
    LOG.info(f"Configuring inputs for model version ID {model_version_id}.")

    context = Context(model_version_id=model_version_id,
                      make=make,
                      configure_application=configure,
                      root_directory=test_dir)
    if json_file:
        LOG.info(f"Reading settings from file: {json_file}")
        with open(json_file, 'r') as json_file:
            parameter_json = json.load(json_file)
    else:
        parameter_json = settings_json_from_model_version_id(
            model_version_id=model_version_id,
            conn_def=context.model_connection)

    settings = load_settings(settings_json=parameter_json)

    inputs = MeasurementInputsFromSettings(settings=settings)
    inputs.get_raw_inputs()
    inputs.configure_inputs_for_dismod(settings=settings, midpoint=midpoint)

    if not inputs.csmr.raw.empty:
        LOG.info("Uploading CSMR to t3 table.")
        inputs.csmr.attach_to_model_version_in_db(
            model_version_id=model_version_id,
            conn_def=context.model_connection)

    context.write_inputs(inputs=inputs, settings=parameter_json)
Пример #5
0
def mulcov_statistics(model_version_id: int, locations: List[int], sexes: List[int],
                      outfile_name: str, sample: bool = True,
                      mean: bool = True, std: bool = True,
                      quantile: Optional[List[float]] = None) -> None:
    """
    Compute statistics for the covariate multipliers.

    Parameters
    ----------
    model_version_id
        The model version ID
    locations
        A list of locations that, when used in combination with sexes, point to the databases
        to pull covariate multiplier estimates from
    sexes
        A list of sexes that, when used in combination with locations, point to the databases
        to pull covariate multiplier estimates from
    outfile_name
        A filepath specifying where to save the covariate multiplier statistics.
    sample
        Whether or not the results are stored in the sample table or the fit_var table.
    mean
        Whether or not to compute the mean
    std
        Whether or not to compute the standard deviation
    quantile
        An optional list of quantiles to compute
    """

    context = Context(model_version_id=model_version_id)
    db_files = [DismodIO(context.db_file(location_id=loc, sex_id=sex))
                for loc in locations for sex in sexes]
    LOG.info(f"There are {len(db_files)} databases that will be aggregated.")

    common_covariates = common_covariate_names(db_files)
    LOG.info(f"The common covariates in the passed databases are {common_covariates}.")

    if sample:
        table_name = 'sample'
    else:
        table_name = 'fit_var'

    LOG.info(f"Will pull from the {table_name} table from each database.")
    mulcov_estimates = get_mulcovs(
        dbs=db_files, covs=common_covariates, table=table_name
    )
    stats = compute_statistics(
        df=mulcov_estimates, mean=mean, std=std, quantile=quantile
    )
    LOG.info('Write to output file.')
    stats.to_csv(context.outputs_dir / f'{outfile_name}.csv', index=False)
Пример #6
0
def main():
    """
    Takes dismod databases that have already had a fit run on them and simulates new datasets, refitting
    on all of them, then combining the results back into one database.
    Returns:

    """
    args = get_args()
    logging.basicConfig(level=LEVELS[args.loglevel])

    context = Context(model_version_id=args.model_version_id)
    main_db = context.db_file(location_id=args.parent_location_id,
                              sex_id=args.sex_id)

    d = DismodIO(path=main_db)
    if d.fit_var.empty:
        raise RuntimeError(
            "Cannot run sample / simulate on a database without fit_var!")

    # Create n_sim simulation datasets based on the fitted parameters
    run_dismod_commands(dm_file=main_db,
                        commands=[
                            'set start_var fit_var'
                            'set truth_var fit_var', 'set scale_var fit_var',
                            f'simulate {args.n_sim}'
                        ])

    if args.n_pool > 1:
        # Make a pool and fit to each of the simulations (uses the __call__ method)
        fit_sample = FitSample(context=context,
                               location_id=args.location_id,
                               sex_id=args.sex_id,
                               fit_type=args.fit_type)
        p = Pool(args.n_pool)
        fits = list(p.map(fit_sample, range(args.n_sim)))
        p.close()

        # Reconstruct the sample table with all n_sim fits
        sample = pd.DataFrame().append(fits).reset_index(drop=True)
        sample.rename(columns={
            'fit_var_id': 'var_id',
            'fit_var_value': 'var_value'
        },
                      inplace=True)
        d.sample = sample
    else:
        # If we only have one pool that means we aren't going to run in parallel
        run_dismod_commands(dm_file=main_db,
                            commands=[f'sample simulate {args.n_sim}'])
Пример #7
0
def main():
    args = get_args()
    logging.basicConfig(level=LEVELS[args.loglevel])
    LOG.info(f"Starting model for {args.model_version_id}.")

    context = Context(model_version_id=args.model_version_id,
                      make=True,
                      configure_application=True)
    context.update_status(status='Submitted')

    settings = settings_from_model_version_id(
        model_version_id=args.model_version_id,
        conn_def=context.model_connection)

    if settings.model.drill == 'drill':
        cascade_command = CASCADE_COMMANDS['drill'](
            model_version_id=args.model_version_id,
            drill_parent_location_id=settings.model.drill_location_start,
            drill_sex=settings.model.drill_sex)
    elif settings.model.drill == 'cascade':
        raise NotImplementedError(
            "Cascade is not implemented yet for Cascade-AT.")
    else:
        raise NotImplementedError(
            f"The drill/cascade setting {settings.model.drill} is not implemented."
        )

    if args.jobmon:
        LOG.info("Configuring jobmon.")
        wf = jobmon_workflow_from_cascade_command(cc=cascade_command,
                                                  context=context)
        error = wf.run()
        if error:
            context.update_status(status='Failed')
            raise RuntimeError("Jobmon workflow failed.")
    else:
        LOG.info("Running without jobmon.")
        for c in cascade_command.get_commands():
            LOG.info(f"Running {c}.")
            process = subprocess.run(c,
                                     shell=True,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
            if process.returncode:
                context.update_status(status='Failed')
                raise RuntimeError(f"Command {c} failed with error"
                                   f"{process.stderr.decode()}")

    context.update_status(status='Complete')
Пример #8
0
def main():
    """
    Creates a dismod database using the saved inputs and the file
    structure specified in the context.
    
    Then runs an optional set of commands on the database passed
    in the --commands argument.
    
    Also passes an optional argument --options as a dictionary to
    the dismod database to fill/modify the options table.
    """
    args = get_args()
    logging.basicConfig(level=LEVELS[args.loglevel])

    context = Context(model_version_id=args.model_version_id)

    inputs, alchemy, settings = context.read_inputs()

    # If we want to override the rate priors with posteriors from a previous
    # database, pass them in here.
    if args.prior_parent or args.prior_sex:
        if not (args.prior_parent and args.prior_sex):
            raise RuntimeError(
                "Need to pass both prior parent and sex or neither.")
        child_prior = DismodExtractor(path=context.db_file(
            location_id=args.prior_parent,
            sex_id=args.prior_sex)).gather_draws_for_prior_grid(
                location_id=args.parent_location_id,
                sex_id=args.sex_id,
                rates=[r.rate for r in settings.rate])
    else:
        child_prior = None

    df = DismodFiller(path=context.db_file(location_id=args.parent_location_id,
                                           sex_id=args.sex_id),
                      settings_configuration=settings,
                      measurement_inputs=inputs,
                      grid_alchemy=alchemy,
                      parent_location_id=args.parent_location_id,
                      sex_id=args.sex_id,
                      child_prior=child_prior)
    df.fill_for_parent_child(**args.options)

    run_dismod_commands(dm_file=df.path.absolute(), commands=args.commands)
Пример #9
0
def format_upload(model_version_id: int,
                  final: bool = False,
                  fit: bool = False,
                  prior: bool = False) -> None:

    context = Context(model_version_id=model_version_id)
    rh = ResultsHandler()

    if final:
        upload_final(context=context, rh=rh)
    if fit:
        upload_fit(context=context, rh=rh)
    if prior:
        upload_prior(context=context, rh=rh)
Пример #10
0
def main():
    args = get_args()
    logging.basicConfig(level=LEVELS[args.loglevel])

    context = Context(model_version_id=args.model_version_id)
    inputs, alchemy, settings = context.read_inputs()

    sourceDB = DismodIO(path=context.db_file(
        location_id=args.source_location, sex_id=args.source_sex, make=False))

    rates = [r.rate for r in settings.rate]
    posterior_grid = get_prior_avgint_grid(settings=settings,
                                           integrands=rates,
                                           sexes=args.target_sexes,
                                           locations=args.target_locations,
                                           midpoint=False)
    posterior_grid = inputs.add_covariates_to_data(df=posterior_grid)
    posterior_grid = prep_data_avgint(df=posterior_grid,
                                      node_df=sourceDB.node,
                                      covariate_df=sourceDB.covariate)
    posterior_grid.rename(columns={'sex_id': 'c_sex_id'}, inplace=True)
    sourceDB.avgint = posterior_grid
    run_dismod_commands(dm_file=sourceDB, commands=['predict sample'])
Пример #11
0
def main():
    """

    Returns:

    """
    args = get_args()
    logging.basicConfig(level=LEVELS[args.loglevel])

    context = Context(model_version_id=args.model_version_id)
    db_files = [
        DismodIO(context.db_file(location_id=loc, sex_id=sex))
        for loc in args.locations for sex in args.sexes
    ]
    LOG.info(f"There are {len(db_files)} databases that will be aggregated.")

    common_covariates = common_covariate_names(db_files)
    LOG.info(
        f"The common covariates in the passed databases are {common_covariates}."
    )

    if args.sample:
        table_name = 'sample'
    else:
        table_name = 'fit_var'

    LOG.info(f"Will pull from the {table_name} table from each database.")
    mulcov_estimates = get_mulcovs(dbs=db_files,
                                   covs=common_covariates,
                                   table=args.sample)
    mulcov_statistics = compute_statistics(df=mulcov_estimates,
                                           mean=args.mean,
                                           std=args.std,
                                           quantile=args.quantile)
    LOG.info()
    mulcov_statistics.to_csv(context.outputs_dir / f'{args.outfile_name}.csv',
                             index=False)
Пример #12
0
def main():
    """
    Cleans up all dismod databases (.db files) associated with the model version ID.
    :return:
    """
    args = get_args()
    logging.basicConfig(level=LEVELS[args.loglevel])
    context = Context(model_version_id=args.model_version_id)

    for root, dirs, files in os.walk(context.database_dir):
        for f in files:
            if f.endswith(".db"):
                file = context.database_dir / root / f
                LOG.info(f"Deleting {file}.")
                os.remove(file)
Пример #13
0
def cleanup(model_version_id: int) -> None:
    """
    Delete all databases (.db) files attached to a model version.

    Parameters
    ----------
    model_version_id
        The model version ID to delete databases for
    """
    context = Context(model_version_id=model_version_id)

    for root, dirs, files in os.walk(context.database_dir):
        for f in files:
            if f.endswith(".db"):
                file = context.database_dir / root / f
                LOG.info(f"Deleting {file}.")
                os.remove(file)
Пример #14
0
def get_mulcov_priors(model_version_id: int):
    convert_type = {
        'rate_value': 'alpha',
        'meas_value': 'beta',
        'meas_std': 'gamma'
    }
    mulcov_prior = {}
    ctx = Context(model_version_id=model_version_id)
    path = os.path.join(ctx.outputs_dir, 'mulcov_stats.csv')
    mulcov_stats_df = pd.read_csv(path)

    for _, row in mulcov_stats_df.iterrows():
        if row['rate_name'] is not None:
            mulcov_prior[(convert_type[row['mulcov_type']],
                          row['c_covariate_name'],
                          row['rate_name'])] = Gaussian(
                              mean=row['mean'], standard_deviation=row['std'])
        if row['integrand_name'] is not None:
            mulcov_prior[(convert_type[row['mulcov_type']],
                          row['c_covariate_name'],
                          row['integrand_name'])] = Gaussian(
                              mean=row['mean'], standard_deviation=row['std'])
    return mulcov_prior
Пример #15
0
def get_mulcov_priors(
        model_version_id: int) -> Dict[Tuple[str, str, str], _Prior]:
    """
    Read in covariate multiplier statistics from a specific model version ID
    and returns a dictionary with a prior object for that covariate
    multiplier type, covariate name, and rate or integrand.

    Parameters
    ----------
    model_version_id
        The model version ID to pull covariate multiplier statistics from
    """
    convert_type = {
        'rate_value': 'alpha',
        'meas_value': 'beta',
        'meas_noise': 'gamma'
    }
    mulcov_prior = {}
    ctx = Context(model_version_id=model_version_id)
    path = os.path.join(ctx.outputs_dir, 'mulcov_stats.csv')
    if not os.path.exists(path):
        return {}
    mulcov_stats_df = pd.read_csv(path)
    if mulcov_stats_df.empty:
        return {}
    for _, row in mulcov_stats_df.iterrows():
        if row['rate_name'] != 'none':
            mulcov_prior[(convert_type[row['mulcov_type']],
                          row['c_covariate_name'],
                          row['rate_name'])] = Gaussian(
                              mean=row['mean'], standard_deviation=row['std'])
        if row['integrand_name'] != 'none':
            mulcov_prior[(convert_type[row['mulcov_type']],
                          row['c_covariate_name'],
                          row['integrand_name'])] = Gaussian(
                              mean=row['mean'], standard_deviation=row['std'])
    return mulcov_prior
Пример #16
0
def dismod_db(model_version_id: int,
              parent_location_id: int,
              sex_id: int,
              dm_commands: List[str],
              dm_options: Dict[str, Union[int, str, float]],
              prior_samples: bool = False,
              prior_parent: Optional[int] = None,
              prior_sex: Optional[int] = None,
              prior_mulcov_model_version_id: Optional[int] = None,
              test_dir: Optional[str] = None,
              fill: bool = False,
              save_fit: bool = True,
              save_prior: bool = True) -> None:
    """
    Creates a dismod database using the saved inputs and the file
    structure specified in the context. Alternatively it will
    skip the filling stage and move straight to the command
    stage if you don't pass --fill.

    Then runs an optional set of commands on the database passed
    in the --commands argument.

    Also passes an optional argument --options as a dictionary to
    the dismod database to fill/modify the options table.

    Parameters
    ----------
    model_version_id
        The model version ID
    parent_location_id
        The parent location for the database
    sex_id
        The parent sex for the database
    dm_commands
        A list of commands to pass to the run_dismod_commands function, executed
        directly on the dismod database
    dm_options
        A dictionary of options to pass to the the dismod option table
    prior_parent
        An optional parent location ID that specifies where to pull the prior
        information from.
    prior_sex
        An optional parent sex ID that specifies where to pull the prior information from.
    test_dir
        A test directory to create the database in rather than the database
        specified by the IHME file system context.
    fill
        Whether or not to fill the database with new inputs based on the model_version_id,
        parent_location_id, and sex_id. If not filling, this script can be used
        to just execute commands on the database instead.
    save_fit
        Whether or not to save the fit from this database as the parent fit.
    save_prior
        Whether or not to save the prior for the children as the prior fit.
    """
    if test_dir is not None:
        context = Context(model_version_id=model_version_id,
                          configure_application=False,
                          root_directory=test_dir)
    else:
        context = Context(model_version_id=model_version_id)

    db_path = context.db_file(location_id=parent_location_id, sex_id=sex_id)
    inputs, alchemy, settings = context.read_inputs()

    # If we want to override the rate priors with posteriors from a previous
    # database, pass them in here.
    if prior_parent or prior_sex:
        if not (prior_parent and prior_sex):
            raise DismodDBError(
                "Need to pass both prior parent and sex or neither.")
        prior_db = context.db_file(location_id=prior_parent, sex_id=prior_sex)
        child_prior = get_prior(path=prior_db,
                                location_id=parent_location_id,
                                sex_id=sex_id,
                                rates=[r.rate for r in settings.rate],
                                samples=prior_samples)
        if save_prior:
            save_predictions(db_file=prior_db,
                             locations=[parent_location_id],
                             sexes=[sex_id],
                             model_version_id=model_version_id,
                             gbd_round_id=settings.gbd_round_id,
                             out_dir=context.prior_dir)
    else:
        child_prior = None
        if save_prior:
            raise DismodDBError(
                "Cannot save the prior because there was no argument"
                "passed in for the prior_parent or prior_sex.")

    if prior_mulcov_model_version_id is not None:
        LOG.info(
            f'Passing mulcov prior from model version id = {prior_mulcov_model_version_id}'
        )
        mulcov_priors = get_mulcov_priors(prior_mulcov_model_version_id)
    else:
        mulcov_priors = None

    if fill:
        fill_database(
            path=db_path,
            inputs=inputs,
            alchemy=alchemy,
            settings=settings,
            parent_location_id=parent_location_id,
            sex_id=sex_id,
            child_prior=child_prior,
            options=dm_options,
            mulcov_prior=mulcov_priors,
        )

    if dm_commands:
        run_dismod_commands(dm_file=str(db_path), commands=dm_commands)

    if save_fit:
        save_predictions(db_file=context.db_file(
            location_id=parent_location_id, sex_id=sex_id),
                         model_version_id=model_version_id,
                         gbd_round_id=settings.gbd_round_id,
                         out_dir=context.fit_dir)
Пример #17
0
def context(tmp_path):
    c = Context(model_version_id=0,
                make=True,
                configure_application=False,
                root_directory=tmp_path)
    return c
Пример #18
0
def predict_sample(model_version_id: int,
                   parent_location_id: int,
                   sex_id: int,
                   child_locations: List[int],
                   child_sexes: List[int],
                   prior_grid: bool = True,
                   save_fit: bool = False,
                   save_final: bool = False,
                   sample: bool = False,
                   n_sim: int = 1,
                   n_pool: int = 1) -> None:
    """
    Takes a database that has already had a fit and simulate sample run on it,
    fills the avgint table for the child_locations and child_sexes you want to make
    predictions for, and then predicts on that grid. Makes predictions on the grid
    that is specified for the primary rates in the model, for the primary rates only.

    Parameters
    ----------
    model_version_id
        The model version ID
    parent_location_id
        The parent location ID that specifies where the database is stored
    sex_id
        The sex ID that specifies where the database is stored
    child_locations
        The child locations to make predictions for on the rate grid
    child_sexes
        The child sexes to make predictions for on the rate grid
    prior_grid
        Whether or not to replace the default gbd-avgint grid with
        a prior grid for the rates.
    save_fit
        Whether or not to save the fit for upload later.
    save_final
        Whether or not to save the final for upload later.
    sample
        Whether to predict from the sample table or the fit_var table
    n_sim
        The number of simulations to predict for
    n_pool
        The number of multiprocessing pools to create. If 1, then will not
        run with pools but just run all simulations together in one dmdismod command.

    """
    predictions = None

    context = Context(model_version_id=model_version_id)
    inputs, alchemy, settings = context.read_inputs()
    main_db = context.db_file(location_id=parent_location_id, sex_id=sex_id)
    index_file_pattern = context.db_index_file_pattern(
        location_id=parent_location_id, sex_id=sex_id)

    if sample:
        table = 'sample'
    else:
        table = 'fit_var'

    if prior_grid:
        fill_avgint_with_priors_grid(inputs=inputs,
                                     alchemy=alchemy,
                                     settings=settings,
                                     source_db_path=main_db,
                                     child_locations=child_locations,
                                     child_sexes=child_sexes)

    if sample and (n_pool > 1):
        predictions = predict_sample_pool(
            main_db=main_db,
            index_file_pattern=index_file_pattern,
            n_sim=n_sim,
            n_pool=n_pool)
    else:
        predict_sample_sequence(path=main_db, table=table)

    if save_fit or save_final:
        if len(child_locations) == 0:
            locations = inputs.location_dag.parent_children(parent_location_id)
        else:
            locations = child_locations
        if len(child_sexes) == 0:
            sexes = [sex_id]
        else:
            sexes = child_sexes
        out_dirs = []
        if save_fit:
            out_dirs.append(context.fit_dir)
        if save_final:
            out_dirs.append(context.draw_dir)
        for folder in out_dirs:
            save_predictions(db_file=main_db,
                             locations=locations,
                             sexes=sexes,
                             model_version_id=model_version_id,
                             gbd_round_id=settings.gbd_round_id,
                             out_dir=folder,
                             sample=sample,
                             predictions=predictions)
Пример #19
0
def run(model_version_id: int,
        jobmon: bool = True,
        make: bool = True,
        n_sim: int = 10,
        n_pool: int = 10,
        addl_workflow_args: Optional[str] = None,
        skip_configure: bool = False,
        json_file: Optional[str] = None,
        test_dir: Optional[str] = None,
        execute_dag: bool = True) -> None:
    """
    Runs the whole cascade or drill for a model version (whichever one is specified
    in the model version settings).

    Creates a cascade command and a bunch of cascade operations based
    on the model version settings. More information on this structure
    is in :ref:`executor`.

    Parameters
    ----------
    model_version_id
        The model version to run
    jobmon
        Whether or not to use Jobmon. If not using Jobmon, executes
        the commands in sequence in this session.
    make
        Whether or not to make the directory structure for the databases, inputs, and outputs.
    n_sim
        Number of simulations to do going down the cascade
    addl_workflow_args
        Additional workflow args to add to the jobmon workflow name
        so that it is unique if you're testing
    skip_configure
        Skip configuring the inputs because
    """
    LOG.info(f"Starting model for {model_version_id}.")

    context = Context(model_version_id=model_version_id,
                      make=make,
                      configure_application=not skip_configure,
                      root_directory=test_dir)
    context.update_status(status='Submitted')

    if json_file:
        with open(json_file) as fn:
            LOG.info(f"Reading settings from {json_file}")
            parameter_json = json.loads(fn.read())
        settings = load_settings(parameter_json)
        # Save the json file as it is used throughout the cascade
        LOG.info(f"Replacing {context.settings_file}")
        context.write_inputs(settings=parameter_json)
    else:
        settings = settings_from_model_version_id(
            model_version_id=model_version_id,
            conn_def=context.model_connection)
    dag = LocationDAG(location_set_version_id=settings.location_set_version_id,
                      gbd_round_id=settings.gbd_round_id)

    if settings.model.drill == 'drill':
        cascade_command = Drill(
            model_version_id=model_version_id,
            drill_parent_location_id=settings.model.drill_location_start,
            drill_sex=settings.model.drill_sex,
            n_sim=n_sim,
            n_pool=n_pool,
            skip_configure=skip_configure,
        )
    elif settings.model.drill == 'cascade':

        location_start = None
        sex = None

        if isinstance(settings.model.drill_location_start, int):
            location_start = settings.model.drill_location_start
        if isinstance(settings.model.drill_sex, int):
            sex = settings.model.drill_sex

        cascade_command = TraditionalCascade(
            model_version_id=model_version_id,
            split_sex=settings.model.split_sex == 'most_detailed',
            dag=dag,
            n_sim=n_sim,
            n_pool=n_pool,
            location_start=settings.model.drill_location_start,
            sex=sex,
            skip_configure=skip_configure,
        )
    else:
        raise NotImplementedError(
            f"The drill/cascade setting {settings.model.drill} is not implemented."
        )

    dag_cmds_path = (context.inputs_dir / 'dag_commands.txt')
    LOG.info(f"Writing cascade dag commands to {dag_cmds_path}.")
    dag_cmds_path.write_text('\n'.join(cascade_command.get_commands()))

    if not execute_dag: return

    if jobmon:
        LOG.info("Configuring jobmon.")
        wf = jobmon_workflow_from_cascade_command(
            cc=cascade_command,
            context=context,
            addl_workflow_args=addl_workflow_args)
        wf_run = wf.run(seconds_until_timeout=60 * 60 * 24 * 3, resume=True)
        if wf_run.status != 'D':
            context.update_status(status='Failed')
            raise RuntimeError("Jobmon workflow failed.")
    else:
        LOG.info("Running without jobmon.")
        for c in cascade_command.get_commands():
            LOG.info(f"Running {c}")
            process = subprocess.run(c,
                                     shell=True,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
            if process.returncode:
                context.update_status(status='Failed')
                raise RuntimeError(f"Command {c} failed with error"
                                   f"{process.stderr.decode()}")
        if process.stderr:
            print(process.stderr.decode())
        if process.stdout:
            print(process.stdout.decode())

    context.update_status(status='Complete')
Пример #20
0
def run(model_version_id: int,
        jobmon: bool = True,
        make: bool = True,
        n_sim: int = 10,
        addl_workflow_args: Optional[str] = None,
        skip_configure: bool = False) -> None:
    """
    Runs the whole cascade or drill for a model version (which one is specified
    in the model version settings).

    Parameters
    ----------
    model_version_id
        The model version to run
    jobmon
        Whether or not to use Jobmon. If not using Jobmon, executes
        the commands in sequence in this session.
    make
        Whether or not to make the directory structure for the databases, inputs, and outputs.
    n_sim
        Number of simulations to do going down the cascade
    addl_workflow_args
    skip_configure
    """
    LOG.info(f"Starting model for {model_version_id}.")

    context = Context(model_version_id=model_version_id,
                      make=make,
                      configure_application=True)
    context.update_status(status='Submitted')

    settings = settings_from_model_version_id(
        model_version_id=model_version_id, conn_def=context.model_connection)
    dag = LocationDAG(location_set_version_id=settings.location_set_version_id,
                      gbd_round_id=settings.gbd_round_id)

    if settings.model.drill == 'drill':
        cascade_command = Drill(
            model_version_id=model_version_id,
            drill_parent_location_id=settings.model.drill_location_start,
            drill_sex=settings.model.drill_sex)
    elif settings.model.drill == 'cascade':

        location_start = None
        sex = None

        if isinstance(settings.model.drill_location_start, int):
            location_start = settings.model.drill_location_start
        if isinstance(settings.model.drill_sex, int):
            sex = settings.model.drill_sex

        cascade_command = TraditionalCascade(
            model_version_id=model_version_id,
            split_sex=settings.model.split_sex == 'most_detailed',
            dag=dag,
            n_sim=n_sim,
            location_start=settings.model.drill_location_start,
            sex=sex,
            skip_configure=skip_configure)
    else:
        raise NotImplementedError(
            f"The drill/cascade setting {settings.model.drill} is not implemented."
        )

    if jobmon:
        LOG.info("Configuring jobmon.")
        wf = jobmon_workflow_from_cascade_command(
            cc=cascade_command,
            context=context,
            addl_workflow_args=addl_workflow_args)
        error = wf.run()
        if error:
            context.update_status(status='Failed')
            raise RuntimeError("Jobmon workflow failed.")
    else:
        LOG.info("Running without jobmon.")
        for c in cascade_command.get_commands():
            LOG.info(f"Running {c}.")
            process = subprocess.run(c,
                                     shell=True,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
            if process.returncode:
                context.update_status(status='Failed')
                raise RuntimeError(f"Command {c} failed with error"
                                   f"{process.stderr.decode()}")

    context.update_status(status='Complete')