def main(): """ Takes a dismod database that has had predict run on it and converts the predictions into the format needed for the IHME Epi Databases. Also uploads inputs to tier 3 which allows us to view those inputs in EpiViz. """ args = get_args() logging.basicConfig(level=LEVELS[args.loglevel]) context = Context(model_version_id=args.model_version_id) inputs, alchemy, settings = context.read_inputs() if not inputs.csmr.raw.empty: LOG.info("Uploading CSMR to t3") inputs.csmr.attach_to_model_version_in_db( model_version_id=args.model_version_id, conn_def=context.model_connection) LOG.info("Extracting results from DisMod SQLite Database.") dismod_file = context.db_file(location_id=args.parent_location_id, sex_id=args.sex_id, make=False) da = DismodExtractor(path=dismod_file) predictions = da.format_predictions_for_ihme() LOG.info("Saving the results.") rh = ResultsHandler(model_version_id=args.model_version_id) rh.save_draw_files(df=predictions, directory=context.draw_dir) rh.upload_summaries(directory=context.draw_dir, conn_def=context.model_connection)
def main(): """ Grabs the inputs for a specific model version ID, sets up the folder structure, and pickles the inputs object plus writes the settings json for use later on. If you're doing a drill, then only get input data from locations that will be used for the drilling for parent-children. """ args = get_args() logging.basicConfig(level=LEVELS[args.loglevel]) LOG.info( f"Configuring inputs for model version ID {args.model_version_id}.") LOG.debug(f"Arguments: {args}.") context = Context(model_version_id=args.model_version_id, make=args.make, configure_application=args.configure) parameter_json = settings_json_from_model_version_id( model_version_id=args.model_version_id, conn_def=context.model_connection) settings = load_settings(settings_json=parameter_json) inputs = MeasurementInputsFromSettings(settings=settings) inputs.get_raw_inputs() inputs.configure_inputs_for_dismod(settings=settings) context.write_inputs(inputs=inputs, settings=parameter_json)
def sample(model_version_id: int, parent_location_id: int, sex_id: int, n_sim: int, n_pool: int, fit_type: str, asymptotic: bool = False) -> None: """ Simulates from a dismod database that has already had a fit run on it. Does so optionally in parallel. Parameters ---------- model_version_id The model version ID parent_location_id The parent location ID specifying location of database sex_id The sex ID specifying location of database n_sim The number of simulations to do n_pool The number of multiprocessing pools to create. If 1, then will not run with pools but just run all simulations together in one dmdismod command. fit_type The type of fit that was performed on this database, one of fixed or both. asymptotic Whether or not to do asymptotic samples or fit-refit """ context = Context(model_version_id=model_version_id) main_db = context.db_file(location_id=parent_location_id, sex_id=sex_id) index_file_pattern = context.db_index_file_pattern( location_id=parent_location_id, sex_id=sex_id) if asymptotic: result = sample_asymptotic(path=main_db, n_sim=n_sim, fit_type=fit_type) try: check_sample_asymptotic( result[f'sample asymptotic {fit_type} {n_sim}'].stderr) except SampleAsymptoticError: asymptotic = False LOG.info( "Jumping to sample simulate because sample asymptotic failed.") LOG.warning("Please review the warning from sample asymptotic.") if not asymptotic: simulate(path=main_db, n_sim=n_sim) if n_pool > 1: sample_simulate_pool(main_db=main_db, index_file_pattern=index_file_pattern, fit_type=fit_type, n_pool=n_pool, n_sim=n_sim) else: sample_simulate_sequence(path=main_db, n_sim=n_sim, fit_type=fit_type)
def configure_inputs(model_version_id: int, make: bool, configure: bool, midpoint: bool = False, test_dir: Optional[str] = None, json_file: Optional[str] = None) -> None: """ Grabs the inputs for a specific model version ID, sets up the folder structure, and pickles the inputs object plus writes the settings json for use later on. Also uploads CSMR to the database attached to the model version, if applicable. Optionally use a json file for settings instead of a model version ID's json file. Parameters ---------- model_version_id The model version ID to configure inputs for make Whether or not to make the directory structure for the model version ID configure Configure the application for the IHME cluster, otherwise will use the test_dir for the directory tree instead. test_dir A test directory to use rather than the directory specified by the model version context in the IHME file system. json_file An optional filepath pointing to a different json than is attached to the model_version_id. Will use this instead for settings. """ LOG.info(f"Configuring inputs for model version ID {model_version_id}.") context = Context(model_version_id=model_version_id, make=make, configure_application=configure, root_directory=test_dir) if json_file: LOG.info(f"Reading settings from file: {json_file}") with open(json_file, 'r') as json_file: parameter_json = json.load(json_file) else: parameter_json = settings_json_from_model_version_id( model_version_id=model_version_id, conn_def=context.model_connection) settings = load_settings(settings_json=parameter_json) inputs = MeasurementInputsFromSettings(settings=settings) inputs.get_raw_inputs() inputs.configure_inputs_for_dismod(settings=settings, midpoint=midpoint) if not inputs.csmr.raw.empty: LOG.info("Uploading CSMR to t3 table.") inputs.csmr.attach_to_model_version_in_db( model_version_id=model_version_id, conn_def=context.model_connection) context.write_inputs(inputs=inputs, settings=parameter_json)
def mulcov_statistics(model_version_id: int, locations: List[int], sexes: List[int], outfile_name: str, sample: bool = True, mean: bool = True, std: bool = True, quantile: Optional[List[float]] = None) -> None: """ Compute statistics for the covariate multipliers. Parameters ---------- model_version_id The model version ID locations A list of locations that, when used in combination with sexes, point to the databases to pull covariate multiplier estimates from sexes A list of sexes that, when used in combination with locations, point to the databases to pull covariate multiplier estimates from outfile_name A filepath specifying where to save the covariate multiplier statistics. sample Whether or not the results are stored in the sample table or the fit_var table. mean Whether or not to compute the mean std Whether or not to compute the standard deviation quantile An optional list of quantiles to compute """ context = Context(model_version_id=model_version_id) db_files = [DismodIO(context.db_file(location_id=loc, sex_id=sex)) for loc in locations for sex in sexes] LOG.info(f"There are {len(db_files)} databases that will be aggregated.") common_covariates = common_covariate_names(db_files) LOG.info(f"The common covariates in the passed databases are {common_covariates}.") if sample: table_name = 'sample' else: table_name = 'fit_var' LOG.info(f"Will pull from the {table_name} table from each database.") mulcov_estimates = get_mulcovs( dbs=db_files, covs=common_covariates, table=table_name ) stats = compute_statistics( df=mulcov_estimates, mean=mean, std=std, quantile=quantile ) LOG.info('Write to output file.') stats.to_csv(context.outputs_dir / f'{outfile_name}.csv', index=False)
def main(): """ Takes dismod databases that have already had a fit run on them and simulates new datasets, refitting on all of them, then combining the results back into one database. Returns: """ args = get_args() logging.basicConfig(level=LEVELS[args.loglevel]) context = Context(model_version_id=args.model_version_id) main_db = context.db_file(location_id=args.parent_location_id, sex_id=args.sex_id) d = DismodIO(path=main_db) if d.fit_var.empty: raise RuntimeError( "Cannot run sample / simulate on a database without fit_var!") # Create n_sim simulation datasets based on the fitted parameters run_dismod_commands(dm_file=main_db, commands=[ 'set start_var fit_var' 'set truth_var fit_var', 'set scale_var fit_var', f'simulate {args.n_sim}' ]) if args.n_pool > 1: # Make a pool and fit to each of the simulations (uses the __call__ method) fit_sample = FitSample(context=context, location_id=args.location_id, sex_id=args.sex_id, fit_type=args.fit_type) p = Pool(args.n_pool) fits = list(p.map(fit_sample, range(args.n_sim))) p.close() # Reconstruct the sample table with all n_sim fits sample = pd.DataFrame().append(fits).reset_index(drop=True) sample.rename(columns={ 'fit_var_id': 'var_id', 'fit_var_value': 'var_value' }, inplace=True) d.sample = sample else: # If we only have one pool that means we aren't going to run in parallel run_dismod_commands(dm_file=main_db, commands=[f'sample simulate {args.n_sim}'])
def main(): args = get_args() logging.basicConfig(level=LEVELS[args.loglevel]) LOG.info(f"Starting model for {args.model_version_id}.") context = Context(model_version_id=args.model_version_id, make=True, configure_application=True) context.update_status(status='Submitted') settings = settings_from_model_version_id( model_version_id=args.model_version_id, conn_def=context.model_connection) if settings.model.drill == 'drill': cascade_command = CASCADE_COMMANDS['drill']( model_version_id=args.model_version_id, drill_parent_location_id=settings.model.drill_location_start, drill_sex=settings.model.drill_sex) elif settings.model.drill == 'cascade': raise NotImplementedError( "Cascade is not implemented yet for Cascade-AT.") else: raise NotImplementedError( f"The drill/cascade setting {settings.model.drill} is not implemented." ) if args.jobmon: LOG.info("Configuring jobmon.") wf = jobmon_workflow_from_cascade_command(cc=cascade_command, context=context) error = wf.run() if error: context.update_status(status='Failed') raise RuntimeError("Jobmon workflow failed.") else: LOG.info("Running without jobmon.") for c in cascade_command.get_commands(): LOG.info(f"Running {c}.") process = subprocess.run(c, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode: context.update_status(status='Failed') raise RuntimeError(f"Command {c} failed with error" f"{process.stderr.decode()}") context.update_status(status='Complete')
def main(): """ Creates a dismod database using the saved inputs and the file structure specified in the context. Then runs an optional set of commands on the database passed in the --commands argument. Also passes an optional argument --options as a dictionary to the dismod database to fill/modify the options table. """ args = get_args() logging.basicConfig(level=LEVELS[args.loglevel]) context = Context(model_version_id=args.model_version_id) inputs, alchemy, settings = context.read_inputs() # If we want to override the rate priors with posteriors from a previous # database, pass them in here. if args.prior_parent or args.prior_sex: if not (args.prior_parent and args.prior_sex): raise RuntimeError( "Need to pass both prior parent and sex or neither.") child_prior = DismodExtractor(path=context.db_file( location_id=args.prior_parent, sex_id=args.prior_sex)).gather_draws_for_prior_grid( location_id=args.parent_location_id, sex_id=args.sex_id, rates=[r.rate for r in settings.rate]) else: child_prior = None df = DismodFiller(path=context.db_file(location_id=args.parent_location_id, sex_id=args.sex_id), settings_configuration=settings, measurement_inputs=inputs, grid_alchemy=alchemy, parent_location_id=args.parent_location_id, sex_id=args.sex_id, child_prior=child_prior) df.fill_for_parent_child(**args.options) run_dismod_commands(dm_file=df.path.absolute(), commands=args.commands)
def format_upload(model_version_id: int, final: bool = False, fit: bool = False, prior: bool = False) -> None: context = Context(model_version_id=model_version_id) rh = ResultsHandler() if final: upload_final(context=context, rh=rh) if fit: upload_fit(context=context, rh=rh) if prior: upload_prior(context=context, rh=rh)
def main(): args = get_args() logging.basicConfig(level=LEVELS[args.loglevel]) context = Context(model_version_id=args.model_version_id) inputs, alchemy, settings = context.read_inputs() sourceDB = DismodIO(path=context.db_file( location_id=args.source_location, sex_id=args.source_sex, make=False)) rates = [r.rate for r in settings.rate] posterior_grid = get_prior_avgint_grid(settings=settings, integrands=rates, sexes=args.target_sexes, locations=args.target_locations, midpoint=False) posterior_grid = inputs.add_covariates_to_data(df=posterior_grid) posterior_grid = prep_data_avgint(df=posterior_grid, node_df=sourceDB.node, covariate_df=sourceDB.covariate) posterior_grid.rename(columns={'sex_id': 'c_sex_id'}, inplace=True) sourceDB.avgint = posterior_grid run_dismod_commands(dm_file=sourceDB, commands=['predict sample'])
def main(): """ Returns: """ args = get_args() logging.basicConfig(level=LEVELS[args.loglevel]) context = Context(model_version_id=args.model_version_id) db_files = [ DismodIO(context.db_file(location_id=loc, sex_id=sex)) for loc in args.locations for sex in args.sexes ] LOG.info(f"There are {len(db_files)} databases that will be aggregated.") common_covariates = common_covariate_names(db_files) LOG.info( f"The common covariates in the passed databases are {common_covariates}." ) if args.sample: table_name = 'sample' else: table_name = 'fit_var' LOG.info(f"Will pull from the {table_name} table from each database.") mulcov_estimates = get_mulcovs(dbs=db_files, covs=common_covariates, table=args.sample) mulcov_statistics = compute_statistics(df=mulcov_estimates, mean=args.mean, std=args.std, quantile=args.quantile) LOG.info() mulcov_statistics.to_csv(context.outputs_dir / f'{args.outfile_name}.csv', index=False)
def main(): """ Cleans up all dismod databases (.db files) associated with the model version ID. :return: """ args = get_args() logging.basicConfig(level=LEVELS[args.loglevel]) context = Context(model_version_id=args.model_version_id) for root, dirs, files in os.walk(context.database_dir): for f in files: if f.endswith(".db"): file = context.database_dir / root / f LOG.info(f"Deleting {file}.") os.remove(file)
def cleanup(model_version_id: int) -> None: """ Delete all databases (.db) files attached to a model version. Parameters ---------- model_version_id The model version ID to delete databases for """ context = Context(model_version_id=model_version_id) for root, dirs, files in os.walk(context.database_dir): for f in files: if f.endswith(".db"): file = context.database_dir / root / f LOG.info(f"Deleting {file}.") os.remove(file)
def get_mulcov_priors(model_version_id: int): convert_type = { 'rate_value': 'alpha', 'meas_value': 'beta', 'meas_std': 'gamma' } mulcov_prior = {} ctx = Context(model_version_id=model_version_id) path = os.path.join(ctx.outputs_dir, 'mulcov_stats.csv') mulcov_stats_df = pd.read_csv(path) for _, row in mulcov_stats_df.iterrows(): if row['rate_name'] is not None: mulcov_prior[(convert_type[row['mulcov_type']], row['c_covariate_name'], row['rate_name'])] = Gaussian( mean=row['mean'], standard_deviation=row['std']) if row['integrand_name'] is not None: mulcov_prior[(convert_type[row['mulcov_type']], row['c_covariate_name'], row['integrand_name'])] = Gaussian( mean=row['mean'], standard_deviation=row['std']) return mulcov_prior
def get_mulcov_priors( model_version_id: int) -> Dict[Tuple[str, str, str], _Prior]: """ Read in covariate multiplier statistics from a specific model version ID and returns a dictionary with a prior object for that covariate multiplier type, covariate name, and rate or integrand. Parameters ---------- model_version_id The model version ID to pull covariate multiplier statistics from """ convert_type = { 'rate_value': 'alpha', 'meas_value': 'beta', 'meas_noise': 'gamma' } mulcov_prior = {} ctx = Context(model_version_id=model_version_id) path = os.path.join(ctx.outputs_dir, 'mulcov_stats.csv') if not os.path.exists(path): return {} mulcov_stats_df = pd.read_csv(path) if mulcov_stats_df.empty: return {} for _, row in mulcov_stats_df.iterrows(): if row['rate_name'] != 'none': mulcov_prior[(convert_type[row['mulcov_type']], row['c_covariate_name'], row['rate_name'])] = Gaussian( mean=row['mean'], standard_deviation=row['std']) if row['integrand_name'] != 'none': mulcov_prior[(convert_type[row['mulcov_type']], row['c_covariate_name'], row['integrand_name'])] = Gaussian( mean=row['mean'], standard_deviation=row['std']) return mulcov_prior
def dismod_db(model_version_id: int, parent_location_id: int, sex_id: int, dm_commands: List[str], dm_options: Dict[str, Union[int, str, float]], prior_samples: bool = False, prior_parent: Optional[int] = None, prior_sex: Optional[int] = None, prior_mulcov_model_version_id: Optional[int] = None, test_dir: Optional[str] = None, fill: bool = False, save_fit: bool = True, save_prior: bool = True) -> None: """ Creates a dismod database using the saved inputs and the file structure specified in the context. Alternatively it will skip the filling stage and move straight to the command stage if you don't pass --fill. Then runs an optional set of commands on the database passed in the --commands argument. Also passes an optional argument --options as a dictionary to the dismod database to fill/modify the options table. Parameters ---------- model_version_id The model version ID parent_location_id The parent location for the database sex_id The parent sex for the database dm_commands A list of commands to pass to the run_dismod_commands function, executed directly on the dismod database dm_options A dictionary of options to pass to the the dismod option table prior_parent An optional parent location ID that specifies where to pull the prior information from. prior_sex An optional parent sex ID that specifies where to pull the prior information from. test_dir A test directory to create the database in rather than the database specified by the IHME file system context. fill Whether or not to fill the database with new inputs based on the model_version_id, parent_location_id, and sex_id. If not filling, this script can be used to just execute commands on the database instead. save_fit Whether or not to save the fit from this database as the parent fit. save_prior Whether or not to save the prior for the children as the prior fit. """ if test_dir is not None: context = Context(model_version_id=model_version_id, configure_application=False, root_directory=test_dir) else: context = Context(model_version_id=model_version_id) db_path = context.db_file(location_id=parent_location_id, sex_id=sex_id) inputs, alchemy, settings = context.read_inputs() # If we want to override the rate priors with posteriors from a previous # database, pass them in here. if prior_parent or prior_sex: if not (prior_parent and prior_sex): raise DismodDBError( "Need to pass both prior parent and sex or neither.") prior_db = context.db_file(location_id=prior_parent, sex_id=prior_sex) child_prior = get_prior(path=prior_db, location_id=parent_location_id, sex_id=sex_id, rates=[r.rate for r in settings.rate], samples=prior_samples) if save_prior: save_predictions(db_file=prior_db, locations=[parent_location_id], sexes=[sex_id], model_version_id=model_version_id, gbd_round_id=settings.gbd_round_id, out_dir=context.prior_dir) else: child_prior = None if save_prior: raise DismodDBError( "Cannot save the prior because there was no argument" "passed in for the prior_parent or prior_sex.") if prior_mulcov_model_version_id is not None: LOG.info( f'Passing mulcov prior from model version id = {prior_mulcov_model_version_id}' ) mulcov_priors = get_mulcov_priors(prior_mulcov_model_version_id) else: mulcov_priors = None if fill: fill_database( path=db_path, inputs=inputs, alchemy=alchemy, settings=settings, parent_location_id=parent_location_id, sex_id=sex_id, child_prior=child_prior, options=dm_options, mulcov_prior=mulcov_priors, ) if dm_commands: run_dismod_commands(dm_file=str(db_path), commands=dm_commands) if save_fit: save_predictions(db_file=context.db_file( location_id=parent_location_id, sex_id=sex_id), model_version_id=model_version_id, gbd_round_id=settings.gbd_round_id, out_dir=context.fit_dir)
def context(tmp_path): c = Context(model_version_id=0, make=True, configure_application=False, root_directory=tmp_path) return c
def predict_sample(model_version_id: int, parent_location_id: int, sex_id: int, child_locations: List[int], child_sexes: List[int], prior_grid: bool = True, save_fit: bool = False, save_final: bool = False, sample: bool = False, n_sim: int = 1, n_pool: int = 1) -> None: """ Takes a database that has already had a fit and simulate sample run on it, fills the avgint table for the child_locations and child_sexes you want to make predictions for, and then predicts on that grid. Makes predictions on the grid that is specified for the primary rates in the model, for the primary rates only. Parameters ---------- model_version_id The model version ID parent_location_id The parent location ID that specifies where the database is stored sex_id The sex ID that specifies where the database is stored child_locations The child locations to make predictions for on the rate grid child_sexes The child sexes to make predictions for on the rate grid prior_grid Whether or not to replace the default gbd-avgint grid with a prior grid for the rates. save_fit Whether or not to save the fit for upload later. save_final Whether or not to save the final for upload later. sample Whether to predict from the sample table or the fit_var table n_sim The number of simulations to predict for n_pool The number of multiprocessing pools to create. If 1, then will not run with pools but just run all simulations together in one dmdismod command. """ predictions = None context = Context(model_version_id=model_version_id) inputs, alchemy, settings = context.read_inputs() main_db = context.db_file(location_id=parent_location_id, sex_id=sex_id) index_file_pattern = context.db_index_file_pattern( location_id=parent_location_id, sex_id=sex_id) if sample: table = 'sample' else: table = 'fit_var' if prior_grid: fill_avgint_with_priors_grid(inputs=inputs, alchemy=alchemy, settings=settings, source_db_path=main_db, child_locations=child_locations, child_sexes=child_sexes) if sample and (n_pool > 1): predictions = predict_sample_pool( main_db=main_db, index_file_pattern=index_file_pattern, n_sim=n_sim, n_pool=n_pool) else: predict_sample_sequence(path=main_db, table=table) if save_fit or save_final: if len(child_locations) == 0: locations = inputs.location_dag.parent_children(parent_location_id) else: locations = child_locations if len(child_sexes) == 0: sexes = [sex_id] else: sexes = child_sexes out_dirs = [] if save_fit: out_dirs.append(context.fit_dir) if save_final: out_dirs.append(context.draw_dir) for folder in out_dirs: save_predictions(db_file=main_db, locations=locations, sexes=sexes, model_version_id=model_version_id, gbd_round_id=settings.gbd_round_id, out_dir=folder, sample=sample, predictions=predictions)
def run(model_version_id: int, jobmon: bool = True, make: bool = True, n_sim: int = 10, n_pool: int = 10, addl_workflow_args: Optional[str] = None, skip_configure: bool = False, json_file: Optional[str] = None, test_dir: Optional[str] = None, execute_dag: bool = True) -> None: """ Runs the whole cascade or drill for a model version (whichever one is specified in the model version settings). Creates a cascade command and a bunch of cascade operations based on the model version settings. More information on this structure is in :ref:`executor`. Parameters ---------- model_version_id The model version to run jobmon Whether or not to use Jobmon. If not using Jobmon, executes the commands in sequence in this session. make Whether or not to make the directory structure for the databases, inputs, and outputs. n_sim Number of simulations to do going down the cascade addl_workflow_args Additional workflow args to add to the jobmon workflow name so that it is unique if you're testing skip_configure Skip configuring the inputs because """ LOG.info(f"Starting model for {model_version_id}.") context = Context(model_version_id=model_version_id, make=make, configure_application=not skip_configure, root_directory=test_dir) context.update_status(status='Submitted') if json_file: with open(json_file) as fn: LOG.info(f"Reading settings from {json_file}") parameter_json = json.loads(fn.read()) settings = load_settings(parameter_json) # Save the json file as it is used throughout the cascade LOG.info(f"Replacing {context.settings_file}") context.write_inputs(settings=parameter_json) else: settings = settings_from_model_version_id( model_version_id=model_version_id, conn_def=context.model_connection) dag = LocationDAG(location_set_version_id=settings.location_set_version_id, gbd_round_id=settings.gbd_round_id) if settings.model.drill == 'drill': cascade_command = Drill( model_version_id=model_version_id, drill_parent_location_id=settings.model.drill_location_start, drill_sex=settings.model.drill_sex, n_sim=n_sim, n_pool=n_pool, skip_configure=skip_configure, ) elif settings.model.drill == 'cascade': location_start = None sex = None if isinstance(settings.model.drill_location_start, int): location_start = settings.model.drill_location_start if isinstance(settings.model.drill_sex, int): sex = settings.model.drill_sex cascade_command = TraditionalCascade( model_version_id=model_version_id, split_sex=settings.model.split_sex == 'most_detailed', dag=dag, n_sim=n_sim, n_pool=n_pool, location_start=settings.model.drill_location_start, sex=sex, skip_configure=skip_configure, ) else: raise NotImplementedError( f"The drill/cascade setting {settings.model.drill} is not implemented." ) dag_cmds_path = (context.inputs_dir / 'dag_commands.txt') LOG.info(f"Writing cascade dag commands to {dag_cmds_path}.") dag_cmds_path.write_text('\n'.join(cascade_command.get_commands())) if not execute_dag: return if jobmon: LOG.info("Configuring jobmon.") wf = jobmon_workflow_from_cascade_command( cc=cascade_command, context=context, addl_workflow_args=addl_workflow_args) wf_run = wf.run(seconds_until_timeout=60 * 60 * 24 * 3, resume=True) if wf_run.status != 'D': context.update_status(status='Failed') raise RuntimeError("Jobmon workflow failed.") else: LOG.info("Running without jobmon.") for c in cascade_command.get_commands(): LOG.info(f"Running {c}") process = subprocess.run(c, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode: context.update_status(status='Failed') raise RuntimeError(f"Command {c} failed with error" f"{process.stderr.decode()}") if process.stderr: print(process.stderr.decode()) if process.stdout: print(process.stdout.decode()) context.update_status(status='Complete')
def run(model_version_id: int, jobmon: bool = True, make: bool = True, n_sim: int = 10, addl_workflow_args: Optional[str] = None, skip_configure: bool = False) -> None: """ Runs the whole cascade or drill for a model version (which one is specified in the model version settings). Parameters ---------- model_version_id The model version to run jobmon Whether or not to use Jobmon. If not using Jobmon, executes the commands in sequence in this session. make Whether or not to make the directory structure for the databases, inputs, and outputs. n_sim Number of simulations to do going down the cascade addl_workflow_args skip_configure """ LOG.info(f"Starting model for {model_version_id}.") context = Context(model_version_id=model_version_id, make=make, configure_application=True) context.update_status(status='Submitted') settings = settings_from_model_version_id( model_version_id=model_version_id, conn_def=context.model_connection) dag = LocationDAG(location_set_version_id=settings.location_set_version_id, gbd_round_id=settings.gbd_round_id) if settings.model.drill == 'drill': cascade_command = Drill( model_version_id=model_version_id, drill_parent_location_id=settings.model.drill_location_start, drill_sex=settings.model.drill_sex) elif settings.model.drill == 'cascade': location_start = None sex = None if isinstance(settings.model.drill_location_start, int): location_start = settings.model.drill_location_start if isinstance(settings.model.drill_sex, int): sex = settings.model.drill_sex cascade_command = TraditionalCascade( model_version_id=model_version_id, split_sex=settings.model.split_sex == 'most_detailed', dag=dag, n_sim=n_sim, location_start=settings.model.drill_location_start, sex=sex, skip_configure=skip_configure) else: raise NotImplementedError( f"The drill/cascade setting {settings.model.drill} is not implemented." ) if jobmon: LOG.info("Configuring jobmon.") wf = jobmon_workflow_from_cascade_command( cc=cascade_command, context=context, addl_workflow_args=addl_workflow_args) error = wf.run() if error: context.update_status(status='Failed') raise RuntimeError("Jobmon workflow failed.") else: LOG.info("Running without jobmon.") for c in cascade_command.get_commands(): LOG.info(f"Running {c}.") process = subprocess.run(c, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode: context.update_status(status='Failed') raise RuntimeError(f"Command {c} failed with error" f"{process.stderr.decode()}") context.update_status(status='Complete')