def finished_experiment(shared_db_engine, shared_project_storage): """A successfully-run experiment. Its database schemas and project storage can be queried. Returns: (triage.experiments.SingleThreadedExperiment) """ populate_source_data(shared_db_engine) base_config = sample_config() experiment = SingleThreadedExperiment( base_config, db_engine=shared_db_engine, project_path=shared_project_storage.project_path ) experiment.run() return experiment
def test_experiment_tracker_exception(db_engine, project_path): experiment = SingleThreadedExperiment( config=sample_config(), db_engine=db_engine, project_path=project_path, ) # no source data means this should blow up with pytest.raises(Exception): experiment.run() with scoped_session(db_engine) as session: experiment_run = session.query(ExperimentRun).get(experiment.run_id) assert experiment_run.current_status == ExperimentRunStatus.failed assert isinstance(experiment_run.last_updated_time, datetime.datetime) assert experiment_run.stacktrace
def finished_experiment(shared_db_engine, shared_project_storage): """A successfully-run experiment. Its database schemas and project storage can be queried. Returns: (triage.experiments.SingleThreadedExperiment) """ populate_source_data(shared_db_engine) base_config = sample_config() with mock.patch("triage.util.conf.open", side_effect=open_side_effect) as mock_file: experiment = SingleThreadedExperiment( base_config, db_engine=shared_db_engine, project_path=shared_project_storage.project_path, ) experiment.run() return experiment
def triage(ctx, config_file, triage_db, replace): config_file = os.path.join(os.sep, "triage", "experiment_config", config_file) click.echo(f"Using the config file {config_file}") with open(config_file) as f: experiment_config = yaml.load(f) click.echo( f"The output (matrices and models) of this experiment will be stored in triage/output" ) click.echo( f"The experiment will utilize any preexisting matrix or model: {not replace}" ) click.echo(f"Creating experiment object") experiment = SingleThreadedExperiment( config=experiment_config, db_engine=sqlalchemy.create_engine(triage_db), model_storage_class=FSModelStorageEngine, project_path='/triage/output', replace=replace) ctx.obj = experiment click.echo("Experiment loaded")
def experiment(self): self.root.setup() # Loading configuration (if exists) db_url = self.root.db_url config = self._load_config() db_engine = create_engine(db_url) common_kwargs = { "db_engine": db_engine, "project_path": self.args.project_path, "config": config, "replace": self.args.replace, "materialize_subquery_fromobjs": self.args.materialize_fromobjs, "features_ignore_cohort": self.args.features_ignore_cohort, "matrix_storage_class": self.matrix_storage_map[self.args.matrix_format], "profile": self.args.profile, "save_predictions": self.args.save_predictions, "skip_validation": not self.args.validate } if self.args.n_db_processes > 1 or self.args.n_processes > 1: experiment = MultiCoreExperiment( n_db_processes=self.args.n_db_processes, n_processes=self.args.n_processes, **common_kwargs, ) else: experiment = SingleThreadedExperiment(**common_kwargs) return experiment
def triage(ctx, config_file, triage_db, replace, debug): config_file = os.path.join(os.sep, "triage", "experiment_config", config_file) click.echo(f"Using the config file {config_file}") with open(config_file) as f: experiment_config = yaml.load(f) click.echo( f"The output (matrices and models) of this experiment will be stored in triage/output" ) click.echo(f"Using data stored in {triage_db}") click.echo( f"The experiment will utilize any preexisting matrix or model: {not replace}" ) click.echo(f"Creating experiment object") experiment = SingleThreadedExperiment( config=experiment_config, db_engine=sqlalchemy.create_engine(triage_db), model_storage_class=FSModelStorageEngine, project_path='/triage/output', replace=replace) ctx.obj = experiment if debug: logging.basicConfig(level=logging.DEBUG) click.echo("Debug enabled (Expect A LOT of output at the screen!!!)") click.echo("Experiment loaded")
def experiment(self): self.root.setup() # Loading configuration (if exists) db_url = self.root.db_url config = yaml.load(self.args.config) db_engine = create_engine(db_url) common_kwargs = { "db_engine": db_engine, "project_path": self.args.project_path, "config": config, "replace": self.args.replace, "matrix_storage_class": self.matrix_storage_map[self.args.matrix_format], } if self.args.n_db_processes > 1 or self.args.n_processes > 1: experiment = MultiCoreExperiment( n_db_processes=self.args.n_db_processes, n_processes=self.args.n_processes, **common_kwargs, ) else: experiment = SingleThreadedExperiment(**common_kwargs) return experiment
def run_exp(config_file, plot_timechops=True, run_exp=True, n_jobs=1): if plot_timechops: visualize_timechop(config_file) config, sql_engine, proj_folder = setup_experiment(config_file) if run_exp: if n_jobs> 1: experiment = MultiCoreExperiment( config=config, db_engine=sql_engine, n_processes=n_jobs, n_db_processes=n_jobs, project_path=proj_folder, replace=False, cleanup=True ) else: experiment = SingleThreadedExperiment( config=config, db_engine=sql_engine, project_path=proj_folder, cleanup=True ) st = time.time() experiment.run() en = time.time() print('Took {} seconds to run the experiement'.format(en-st))
def test_experiment_tracker_exception(db_engine, project_path): with mock.patch("triage.util.conf.open", side_effect=open_side_effect) as mock_file: experiment = SingleThreadedExperiment( config=sample_config(), db_engine=db_engine, project_path=project_path, ) # no source data means this should blow up with pytest.raises(Exception): experiment.run() with scoped_session(db_engine) as session: experiment_run = session.query(TriageRun).get(experiment.run_id) assert experiment_run.current_status == TriageRunStatus.failed assert isinstance(experiment_run.last_updated_time, datetime.datetime) assert experiment_run.stacktrace
def test_profiling(db_engine): populate_source_data(db_engine) with TemporaryDirectory() as temp_dir: project_path = os.path.join(temp_dir, "inspections") SingleThreadedExperiment(config=sample_config(), db_engine=db_engine, project_path=project_path, profile=True).run() assert len(os.listdir(os.path.join(project_path, "profiling_stats"))) == 1
def test_noload_if_wrong_version(self): experiment_config = sample_config() experiment_config["config_version"] = "v0" with TemporaryDirectory() as temp_dir: with self.assertRaises(ValueError): SingleThreadedExperiment( config=experiment_config, db_engine=None, project_path=os.path.join(temp_dir, "inspections"), )
def prepare_experiment(config): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) populate_source_data(db_engine) with TemporaryDirectory() as temp_dir: experiment = SingleThreadedExperiment(config=config, db_engine=db_engine, project_path=os.path.join( temp_dir, 'inspections'), cleanup=False) yield experiment
def test_noload_if_wrong_version(self): experiment_config = sample_config() experiment_config['config_version'] = 'v0' with TemporaryDirectory() as temp_dir: with self.assertRaises(ValueError): SingleThreadedExperiment( config=experiment_config, db_engine=None, model_storage_class=FSModelStorageEngine, project_path=os.path.join(temp_dir, 'inspections'), )
def test_noload_if_wrong_version(self): experiment_config = sample_config() experiment_config["config_version"] = "v0" with TemporaryDirectory() as temp_dir, mock.patch( "triage.util.conf.open", side_effect=open_side_effect) as mock_file: with self.assertRaises(ValueError): SingleThreadedExperiment( config=experiment_config, db_engine=None, project_path=os.path.join(temp_dir, "inspections"), )
def test_load_if_right_version(self): experiment_config = sample_config() experiment_config["config_version"] = CONFIG_VERSION with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) with TemporaryDirectory() as temp_dir: experiment = SingleThreadedExperiment( config=experiment_config, db_engine=db_engine, project_path=os.path.join(temp_dir, "inspections"), ) assert isinstance(experiment, SingleThreadedExperiment)
def experiment(self): self.root.setup() # Loading configuration (if exists) db_url = self.root.db_url config = self._load_config() db_engine = create_engine(db_url) common_kwargs = { "db_engine": db_engine, "project_path": self.args.project_path, "config": config, "replace": self.args.replace, "materialize_subquery_fromobjs": self.args.materialize_fromobjs, "features_ignore_cohort": self.args.features_ignore_cohort, "matrix_storage_class": self.matrix_storage_map[self.args.matrix_format], "profile": self.args.profile, "save_predictions": self.args.save_predictions, "skip_validation": not self.args.validate } logger.info(f"Setting up the experiment") logger.info(f"Configuration file: {self.args.config}") logger.info(f"Results will be stored in DB: {self.root.db_url}") logger.info(f"Artifacts will be saved in {self.args.project_path}") try: if self.args.n_db_processes > 1 or self.args.n_processes > 1: experiment = MultiCoreExperiment( n_db_processes=self.args.n_db_processes, n_processes=self.args.n_processes, **common_kwargs, ) logger.info( f"Experiment will run in multi core mode using {self.args.n_processes} processes and {self.args.n_db_processes} db processes" ) else: experiment = SingleThreadedExperiment(**common_kwargs) logger.info("Experiment will run in serial fashion") return experiment except Exception: logger.exception("Error occurred while creating the experiment!") logger.info( f"Experiment [config file: {self.args.config}] failed at creation" )
def test_profiling(db_engine): populate_source_data(db_engine) with TemporaryDirectory() as temp_dir, mock.patch( "triage.util.conf.open", side_effect=open_side_effect) as mock_file: project_path = os.path.join(temp_dir, "inspections") SingleThreadedExperiment( config=sample_config(), db_engine=db_engine, project_path=project_path, profile=True, ).run() assert len(os.listdir(os.path.join(project_path, "profiling_stats"))) == 1
def test_load_if_right_version(self): experiment_config = sample_config() experiment_config["config_version"] = CONFIG_VERSION with testing.postgresql.Postgresql() as postgresql, TemporaryDirectory( ) as temp_dir, mock.patch("triage.util.conf.open", side_effect=open_side_effect) as mock_file: db_engine = create_engine(postgresql.url()) experiment = SingleThreadedExperiment( config=experiment_config, db_engine=db_engine, project_path=os.path.join(temp_dir, "inspections"), ) assert isinstance(experiment, SingleThreadedExperiment)
def test_load_if_right_version(self): experiment_config = sample_config() experiment_config['config_version'] = CONFIG_VERSION with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) with TemporaryDirectory() as temp_dir: experiment = SingleThreadedExperiment( config=experiment_config, db_engine=db_engine, model_storage_class=FSModelStorageEngine, project_path=os.path.join(temp_dir, 'inspections'), ) assert isinstance(experiment, SingleThreadedExperiment)
def prepare_experiment(config): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) populate_source_data(db_engine) with TemporaryDirectory() as temp_dir: with mock.patch( "triage.util.conf.open", side_effect=open_side_effect ) as mock_file: experiment = SingleThreadedExperiment( config=config, db_engine=db_engine, project_path=os.path.join(temp_dir, "inspections"), cleanup=False, partial_run=True, ) yield experiment
def test_experiment_tracker_in_parts(test_engine, project_path): experiment = SingleThreadedExperiment( config=sample_config(), db_engine=test_engine, project_path=project_path, ) experiment.generate_matrices() experiment.train_and_test_models() with scoped_session(test_engine) as session: experiment_run = session.query(ExperimentRun).get(experiment.run_id) assert experiment_run.start_method == "generate_matrices"
def test_experiment_tracker_in_parts(test_engine, project_path): with mock.patch("triage.util.conf.open", side_effect=open_side_effect) as mock_file: experiment = SingleThreadedExperiment( config=sample_config(), db_engine=test_engine, project_path=project_path, ) experiment.generate_matrices() experiment.train_and_test_models() with scoped_session(test_engine) as session: experiment_run = session.query(TriageRun).get(experiment.run_id) assert experiment_run.start_method == "generate_matrices"
def experiment(self): db_url = self.root.db_url config = yaml.load(self.args.config) db_engine = create_engine(db_url) common_kwargs = { 'db_engine': db_engine, 'project_path': self.args.project_path, 'config': config, 'replace': self.args.replace, } if self.args.n_db_processes > 1 or self.args.n_processes > 1: experiment = MultiCoreExperiment( n_db_processes=self.args.n_db_processes, n_processes=self.args.n_processes, **common_kwargs ) else: experiment = SingleThreadedExperiment(**common_kwargs) return experiment
def model_evaluator(shared_db_engine, shared_project_storage): """Returns an instantiated ModelEvaluator available at module scope""" populate_source_data(shared_db_engine) base_config = sample_config() # We need to have an ensemble model to test ModelEvaluator correctly # so we can't use the finished_experiment fixture""" base_config['grid_config'] = { 'sklearn.ensemble.ExtraTreesClassifier': { 'n_estimators': [10], 'criterion': ['gini'], 'max_depth': [1], 'max_features': ['sqrt'], 'min_samples_split': [2], } } SingleThreadedExperiment( base_config, db_engine=shared_db_engine, project_path=shared_project_storage.project_path).run() return ModelEvaluator(1, 1, shared_db_engine)
def main(): args = parse_args() dburl = os.environ['DBURL'] hiv_engine = create_engine(dburl, pool_pre_ping=True) with open(args.config_path) as f: experiment_config = yaml.load(f) experiment = SingleThreadedExperiment(config=experiment_config, db_engine=hiv_engine, project_path=args.project_path, replace=False) experiment.validate() experiment.run()
def model_evaluator(shared_db_engine, shared_project_storage): """Returns an instantiated ModelEvaluator available at module scope""" populate_source_data(shared_db_engine) base_config = sample_config() # We need to have an ensemble model to test ModelEvaluator correctly # so we can't use the finished_experiment fixture""" base_config["grid_config"] = { "sklearn.ensemble.ExtraTreesClassifier": { "n_estimators": [10], "criterion": ["gini"], "max_depth": [1], "max_features": ["sqrt"], "min_samples_split": [2], } } with mock.patch("triage.util.conf.open", side_effect=open_side_effect) as mock_file: SingleThreadedExperiment( base_config, db_engine=shared_db_engine, project_path=shared_project_storage.project_path, ).run() return ModelEvaluator(1, 1, shared_db_engine)
import os import sqlalchemy import yaml from catwalk.storage import FSModelStorageEngine from triage.experiments import SingleThreadedExperiment food_db = os.environ.get('FOOD_DB_URL') print(food_db) with open('inspections-training.yaml') as f: experiment_config = yaml.load(f) experiment = SingleThreadedExperiment( config=experiment_config, db_engine=sqlalchemy.create_engine(food_db), model_storage_class=FSModelStorageEngine, project_path='./triage-generated') experiment.run()