def test_baseline_exception_handling(sample_matrix_store): grid_config = { 'triage.component.catwalk.baselines.rankers.PercentileRankOneFeature': { 'feature': ['feature_one', 'feature_three'] } } with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) project_path = 'econ-dev/inspections' model_storage_engine = S3ModelStorageEngine(project_path) ensure_db(db_engine) init_engine(db_engine) with mock_s3(): s3_conn = boto3.resource('s3') s3_conn.create_bucket(Bucket='econ-dev') trainer = ModelTrainer(project_path='econ-dev/inspections', experiment_hash=None, model_storage_engine=model_storage_engine, db_engine=db_engine, model_grouper=ModelGrouper()) train_tasks = trainer.generate_train_tasks(grid_config, dict(), sample_matrix_store) # Creates a matrix entry in the matrices table with uuid from train_metadata MatrixFactory(matrix_uuid="1234") session.commit() model_ids = [] for train_task in train_tasks: model_ids.append(trainer.process_train_task(**train_task)) assert model_ids == [1, None]
def test_initialize_tracking_and_get_run_id(db_engine_with_results_schema): experiment = ExperimentFactory() factory_session.commit() experiment_hash = experiment.experiment_hash run_id = initialize_tracking_and_get_run_id( experiment_hash=experiment_hash, experiment_class_path="mymodule.MyClassName", random_seed=1234, experiment_kwargs={"key": "value"}, db_engine=db_engine_with_results_schema, ) assert run_id with scoped_session(db_engine_with_results_schema) as session: experiment_run = session.query(TriageRun).get(run_id) assert experiment_run.run_hash == experiment_hash assert experiment_run.experiment_class_path == "mymodule.MyClassName" assert experiment_run.random_seed == 1234 assert experiment_run.experiment_kwargs == {"key": "value"} new_run_id = initialize_tracking_and_get_run_id( experiment_hash=experiment_hash, experiment_class_path="mymodule.MyClassName", random_seed=5432, experiment_kwargs={"key": "value"}, db_engine=db_engine_with_results_schema, ) assert new_run_id > run_id
def fake_trained_model(db_engine, train_matrix_uuid="efgh", train_end_time=datetime.datetime(2016, 1, 1)): """Creates and stores a trivial trained model and training matrix Args: db_engine (sqlalchemy.engine) Returns: (int) model id for database retrieval """ session = sessionmaker(db_engine)() session.merge(Matrix(matrix_uuid=train_matrix_uuid)) # Create the fake trained model and store in db trained_model = MockTrainedModel() db_model = Model( model_hash="abcd", train_matrix_uuid=train_matrix_uuid, train_end_time=train_end_time, ) session.add(db_model) session.commit() model_id = db_model.model_id session.close() return trained_model, model_id
def test_custom_groups(sample_matrix_store, grid_config): with testing.postgresql.Postgresql() as postgresql: engine = create_engine(postgresql.url()) ensure_db(engine) init_engine(engine) with mock_s3(): s3_conn = boto3.resource('s3') s3_conn.create_bucket(Bucket='econ-dev') MatrixFactory(matrix_uuid="1234") session.commit() # create training set project_path = 'econ-dev/inspections' model_storage_engine = S3ModelStorageEngine(project_path) trainer = ModelTrainer( project_path=project_path, experiment_hash=None, model_storage_engine=model_storage_engine, model_grouper=ModelGrouper(['class_path']), db_engine=engine, ) model_ids = trainer.train_models(grid_config=grid_config, misc_db_parameters=dict(), matrix_store=sample_matrix_store) # expect only one model group now records = [ row[0] for row in engine.execute( 'select distinct model_group_id from model_metadata.models' ) ] assert len(records) == 1 assert records[0] == model_ids[0]
def get_matrix_store(project_storage, matrix=None, metadata=None, write_to_db=True): """Return a matrix store associated with the given project storage. Also adds an entry in the matrices table if it doesn't exist already Args: project_storage (triage.component.catwalk.storage.ProjectStorage) A project's storage matrix (dataframe, optional): A matrix to store. Defaults to the output of matrix_creator() metadata (dict, optional): matrix metadata. defaults to the output of matrix_metadata_creator() """ if matrix is None: matrix = matrix_creator() if not metadata: metadata = matrix_metadata_creator() matrix["as_of_date"] = matrix["as_of_date"].apply(pd.Timestamp) matrix.set_index(MatrixStore.indices, inplace=True) matrix_store = project_storage.matrix_storage_engine().get_store( filename_friendly_hash(metadata)) matrix_store.metadata = metadata new_matrix = matrix.copy() labels = new_matrix.pop(matrix_store.label_column_name) matrix_store.matrix_label_tuple = new_matrix, labels matrix_store.save() matrix_store.clear_cache() if write_to_db: if (session.query(Matrix).filter( Matrix.matrix_uuid == matrix_store.uuid).count() == 0): MatrixFactory(matrix_uuid=matrix_store.uuid) session.commit() return matrix_store
def test_uniform_distribution_entity_id_index(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) model = ModelFactory() feature_importances = [ FeatureImportanceFactory(model_rel=model, feature='feature_{}'.format(i)) for i in range(0, 10) ] data_dict = {'entity_id': [1, 2]} for imp in feature_importances: data_dict[imp.feature] = [0.5, 0.5] test_store = InMemoryMatrixStore( matrix=pandas.DataFrame.from_dict(data_dict), metadata=sample_metadata()) session.commit() results = uniform_distribution(db_engine, model_id=model.model_id, as_of_date='2016-01-01', test_matrix_store=test_store, n_ranks=5) assert len(results) == 10 # 5 features x 2 entities for result in results: assert 'entity_id' in result assert 'feature_name' in result assert 'score' in result assert 'feature_value' in result assert result['feature_value'] == 0.5 assert result['score'] >= 0 assert result['score'] <= 1 assert isinstance(result['feature_name'], str) assert result['entity_id'] in [1, 2]
def filter_same_train_end_times(self, engine): ensure_db(engine) init_engine(engine) mg1 = ModelGroupFactory(model_group_id=1, model_type='modelType1') mg2 = ModelGroupFactory(model_group_id=2, model_type='modelType2') mg3 = ModelGroupFactory(model_group_id=3, model_type='modelType3') mg4 = ModelGroupFactory(model_group_id=4, model_type='modelType4') # model group 1 ModelFactory(model_group_rel=mg1, train_end_time=datetime(2014, 1, 1)) ModelFactory(model_group_rel=mg1, train_end_time=datetime(2015, 1, 1)) ModelFactory(model_group_rel=mg1, train_end_time=datetime(2016, 1, 1)) ModelFactory(model_group_rel=mg1, train_end_time=datetime(2017, 1, 1)) # model group 2 only has three timestamps, should not pass ModelFactory(model_group_rel=mg2, train_end_time=datetime(2014, 1, 1)) # model group 3 ModelFactory(model_group_rel=mg3, train_end_time=datetime(2014, 1, 1)) ModelFactory(model_group_rel=mg3, train_end_time=datetime(2015, 1, 1)) ModelFactory(model_group_rel=mg3, train_end_time=datetime(2016, 1, 1)) ModelFactory(model_group_rel=mg3, train_end_time=datetime(2017, 1, 1)) # model group 4 only has three timestamps, should not pass ModelFactory(model_group_rel=mg4, train_end_time=datetime(2015, 1, 1)) ModelFactory(model_group_rel=mg4, train_end_time=datetime(2016, 1, 1)) session.commit() train_end_times = [ '2014-01-01', '2015-01-01', '2016-01-01', '2017-01-01' ] model_groups = [1, 2, 3, 4] model_group_ids = model_groups_filter( train_end_times=train_end_times, initial_model_group_ids=model_groups, models_table='models', db_engine=engine) return model_group_ids
def replace_db(arg): self.new_server = testing.postgresql.Postgresql(port=port) db_engine = create_engine(self.new_server.url()) ensure_db(db_engine) init_engine(db_engine) # Creates a matrix entry in the matrices table with uuid from train_metadata MatrixFactory(matrix_uuid="1234") session.commit()
def test_increment_field(db_engine_with_results_schema): experiment_run = ExperimentRunFactory() factory_session.commit() increment_field('matrices_made', experiment_run.run_id, db_engine_with_results_schema) increment_field('matrices_made', experiment_run.run_id, db_engine_with_results_schema) with scoped_session(db_engine_with_results_schema) as session: experiment_run_from_db = session.query(ExperimentRun).get(experiment_run.run_id) assert experiment_run_from_db.matrices_made == 2
def update_ranks_test(predictor, entities_scores_labels, rank_col, expected_result, model_random_seed=12345, need_seed_data=True): """Not a test in itself but rather a utility called by many of the ranking tests""" ensure_db(predictor.db_engine) init_engine(predictor.db_engine) model_id = 5 matrix_uuid = "4567" matrix_type = "test" as_of_date = datetime.datetime(2012, 1, 1) if need_seed_data: matrix = MatrixFactory(matrix_uuid=matrix_uuid) model = ModelFactory(model_id=model_id, random_seed=model_random_seed) for entity_id, score, label in entities_scores_labels: PredictionFactory(model_rel=model, matrix_rel=matrix, as_of_date=as_of_date, entity_id=entity_id, score=score, label_value=int(label)) factory_session.commit() predictor.update_db_with_ranks( model_id=model_id, matrix_uuid=matrix_uuid, matrix_type=TestMatrixType, ) ranks = tuple(row for row in predictor.db_engine.execute( f''' select entity_id, {rank_col}::float from {matrix_type}_results.predictions where as_of_date = %s and model_id = %s and matrix_uuid = %s order by {rank_col} asc''', (as_of_date, model_id, matrix_uuid))) assert ranks == expected_result # Test that the predictions metadata table is populated metadata_records = [ row for row in predictor.db_engine.execute( f"""select tiebreaker_ordering, prediction_metadata.random_seed, models.random_seed from {matrix_type}_results.prediction_metadata join triage_metadata.models using (model_id) join triage_metadata.matrices using (matrix_uuid) """) ] assert len(metadata_records) == 1 tiebreaker_ordering, random_seed, received_model_random_seed = metadata_records[ 0] if tiebreaker_ordering == 'random': assert random_seed is model_random_seed else: assert not random_seed assert tiebreaker_ordering == predictor.rank_order assert received_model_random_seed == model_random_seed
def test_get_run_for_update(db_engine_with_results_schema): experiment_run = TriageRunFactory() factory_session.commit() with get_run_for_update(db_engine=db_engine_with_results_schema, run_id=experiment_run.run_id) as run_obj: run_obj.stacktrace = "My stacktrace" with scoped_session(db_engine_with_results_schema) as session: experiment_run_from_db = session.query(TriageRun).get( experiment_run.run_id) assert experiment_run_from_db.stacktrace == "My stacktrace"
def test_Audition(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) num_model_groups = 10 model_types = [ "classifier type {}".format(i) for i in range(0, num_model_groups) ] model_groups = [ ModelGroupFactory(model_type=model_type) for model_type in model_types ] train_end_times = [ datetime(2013, 1, 1), datetime(2014, 1, 1), datetime(2015, 1, 1), datetime(2016, 1, 1), ] models = [ ModelFactory(model_group_rel=model_group, train_end_time=train_end_time) for model_group in model_groups for train_end_time in train_end_times ] metrics = [ ("precision@", "100_abs"), ("recall@", "100_abs"), ("precision@", "50_abs"), ("recall@", "50_abs"), ("fpr@", "10_pct"), ] class ImmediateEvalFactory(EvaluationFactory): evaluation_start_time = factory.LazyAttribute( lambda o: o.model_rel.train_end_time) for model in models: for (metric, parameter) in metrics: ImmediateEvalFactory(model_rel=model, metric=metric, parameter=parameter) session.commit() with tempfile.TemporaryDirectory() as td: with mock.patch('os.getcwd') as mock_getcwd: mock_getcwd.return_value = td AuditionRunner(config_dict=config, db_engine=db_engine, directory=td).run() assert len(os.listdir(os.getcwd())) == 6
def test_ModelEvaluator_needs_evaluation_with_bias_audit( db_engine_with_results_schema): # test that if a bias audit config is passed, and there are no matching bias audits # in the database, needs_evaluation returns true # this all assumes that evaluations are populated. those tests are in the 'no_bias_audit' test model_evaluator = ModelEvaluator( testing_metric_groups=[ { "metrics": ["precision@"], "thresholds": { "top_n": [3] }, }, ], training_metric_groups=[], bias_config={'thresholds': { 'top_n': [2] }}, db_engine=db_engine_with_results_schema, ) model_with_evaluations = ModelFactory() eval_time = datetime.datetime(2016, 1, 1) as_of_date_frequency = "3d" for subset_hash in [""]: EvaluationFactory( model_rel=model_with_evaluations, evaluation_start_time=eval_time, evaluation_end_time=eval_time, as_of_date_frequency=as_of_date_frequency, metric="precision@", parameter="3_abs", subset_hash=subset_hash, ) session.commit() # make a test matrix to pass in metadata_overrides = { 'as_of_date_frequency': as_of_date_frequency, 'as_of_times': [eval_time], } test_matrix_store = MockMatrixStore("test", "1234", 5, db_engine_with_results_schema, metadata_overrides=metadata_overrides) assert model_evaluator.needs_evaluations( matrix_store=test_matrix_store, model_id=model_with_evaluations.model_id, subset_hash="", )
def test_n_jobs_not_new_model(sample_matrix_store): grid_config = { 'sklearn.ensemble.AdaBoostClassifier': { 'n_estimators': [10, 100, 1000] }, 'sklearn.ensemble.RandomForestClassifier': { 'n_estimators': [10, 100], 'max_features': ['sqrt', 'log2'], 'max_depth': [5, 10, 15, 20], 'criterion': ['gini', 'entropy'], 'n_jobs': [12, 24], } } with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) with mock_s3(): s3_conn = boto3.resource('s3') s3_conn.create_bucket(Bucket='econ-dev') trainer = ModelTrainer(project_path='econ-dev/inspections', experiment_hash=None, model_storage_engine=S3ModelStorageEngine( 'econ-dev/inspections'), db_engine=db_engine, model_grouper=ModelGrouper()) train_tasks = trainer.generate_train_tasks( grid_config, dict(), sample_matrix_store, ) # Creates a matrix entry in the matrices table with uuid from train_metadata MatrixFactory(matrix_uuid="1234") session.commit() assert len(train_tasks ) == 35 # 32+3, would be (32*2)+3 if we didn't remove assert len([ task for task in train_tasks if 'n_jobs' in task['parameters'] ]) == 32 for train_task in train_tasks: trainer.process_train_task(**train_task) for row in db_engine.execute( 'select model_parameters from model_metadata.model_groups' ): assert 'n_jobs' not in row[0]
def __init__( self, matrix_type, matrix_uuid, label_count, db_engine, init_labels=None, metadata_overrides=None, matrix=None, init_as_of_dates=None, ): base_metadata = { "feature_start_time": datetime.date(2014, 1, 1), "end_time": datetime.date(2015, 1, 1), "as_of_date_frequency": "1y", "matrix_id": "some_matrix", "label_name": "label", "label_timespan": "3month", "indices": MatrixStore.indices, "matrix_type": matrix_type, "as_of_times": [datetime.date(2014, 10, 1), datetime.date(2014, 7, 1)], } metadata_overrides = metadata_overrides or {} base_metadata.update(metadata_overrides) if matrix is None: matrix = pd.DataFrame.from_dict({ "entity_id": [1, 2], "as_of_date": [pd.Timestamp(2014, 10, 1), pd.Timestamp(2014, 7, 1)], "feature_one": [3, 4], "feature_two": [5, 6], "label": [7, 8], }).set_index(MatrixStore.indices) if init_labels is None: init_labels = [] labels = matrix.pop("label") self.matrix_label_tuple = matrix, labels self.metadata = base_metadata self.label_count = label_count self.init_labels = pd.Series(init_labels, dtype="float64") self.matrix_uuid = matrix_uuid self.init_as_of_dates = init_as_of_dates or [] session = sessionmaker(db_engine)() session.add(Matrix(matrix_uuid=matrix_uuid)) session.commit()
def test_prediction_ranks_multiple_dates(project_storage, db_engine): """make sure that multiple as-of-dates in a single matrix are handled correctly. keep the other variables simple by making no within-date ties that would end up testing the tiebreaker logic, just data for two dates with data that could theoretically confound a bad ranking method: - a different order for entities in both dates - each date has some not in the other """ ensure_db(db_engine) init_engine(db_engine) predictor = Predictor(project_storage.model_storage_engine(), db_engine, 'worst') model_id = 5 matrix_uuid = "4567" matrix_type = "test" entities_dates_and_scores = ( (23, datetime.datetime(2012, 1, 1), 0.95), (34, datetime.datetime(2012, 1, 1), 0.94), (45, datetime.datetime(2013, 1, 1), 0.92), (23, datetime.datetime(2013, 1, 1), 0.45), ) expected_result = ( (23, datetime.datetime(2012, 1, 1), 1), (34, datetime.datetime(2012, 1, 1), 2), (45, datetime.datetime(2013, 1, 1), 3), (23, datetime.datetime(2013, 1, 1), 4), ) matrix = MatrixFactory(matrix_uuid=matrix_uuid) model = ModelFactory(model_id=model_id) for entity_id, as_of_date, score in entities_dates_and_scores: PredictionFactory(model_rel=model, matrix_rel=matrix, as_of_date=as_of_date, entity_id=entity_id, score=score) factory_session.commit() predictor.update_db_with_ranks( model_id=model_id, matrix_uuid=matrix_uuid, matrix_type=TestMatrixType, ) ranks = tuple(row for row in predictor.db_engine.execute( f''' select entity_id, as_of_date, rank_abs_no_ties from {matrix_type}_results.predictions where model_id = %s and matrix_uuid = %s order by rank_abs_no_ties''', ( model_id, matrix_uuid))) assert ranks == expected_result
def test_predictor_get_train_columns(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) project_path = 'econ-dev/inspections' with tempfile.TemporaryDirectory() as temp_dir: train_store, test_store = sample_metta_csv_diff_order(temp_dir) model_storage_engine = InMemoryModelStorageEngine(project_path) _, model_id = \ fake_trained_model( project_path, model_storage_engine, db_engine, train_matrix_uuid=train_store.uuid ) predictor = Predictor(project_path, model_storage_engine, db_engine) # The train_store uuid is stored in fake_trained_model. Storing the other MatrixFactory(matrix_uuid=test_store.uuid) session.commit() # Runs the same test for training and testing predictions for store, mat_type in zip((train_store, test_store), ("train", "test")): predict_proba = predictor.predict( model_id, store, misc_db_parameters=dict(), train_matrix_columns=train_store.columns()) # assert # 1. that we calculated predictions assert len(predict_proba) > 0 # 2. that the predictions table entries are present and # can be linked to the original models records = [ row for row in db_engine.execute( '''select entity_id, as_of_date from {}_results.{}_predictions join model_metadata.models using (model_id)'''.format( mat_type, mat_type)) ] assert len(records) > 0
def fake_trained_model(db_engine, train_matrix_uuid='efgh'): """Creates and stores a trivial trained model and training matrix Args: db_engine (sqlalchemy.engine) Returns: (int) model id for database retrieval """ session = sessionmaker(db_engine)() session.merge(Matrix(matrix_uuid=train_matrix_uuid)) # Create the fake trained model and store in db trained_model = MockTrainedModel() db_model = Model(model_hash='abcd', train_matrix_uuid=train_matrix_uuid) session.add(db_model) session.commit() return trained_model, db_model.model_id
def get_matrix_store(project_storage, matrix=None, metadata=None): """Return a matrix store associated with the given project storage. Also adds an entry in the matrices table if it doesn't exist already Args: project_storage (triage.component.catwalk.storage.ProjectStorage) A project's storage matrix (dataframe, optional): A matrix to store. Defaults to the output of matrix_creator() metadata (dict, optional): matrix metadata. defaults to the output of matrix_metadata_creator() """ if matrix is None: matrix = matrix_creator() if not metadata: metadata = matrix_metadata_creator() matrix_store = project_storage.matrix_storage_engine().get_store(metadata['metta-uuid']) matrix_store.matrix = matrix matrix_store.metadata = metadata matrix_store.save() if session.query(Matrix).filter(Matrix.matrix_uuid == matrix_store.uuid).count() == 0: MatrixFactory(matrix_uuid=matrix_store.uuid) session.commit() return matrix_store
def filter_train_end_times(self, engine, train_end_times): ensure_db(engine) init_engine(engine) mg1 = ModelGroupFactory(model_group_id=1, model_type="modelType1") mg2 = ModelGroupFactory(model_group_id=2, model_type="modelType2") mg3 = ModelGroupFactory(model_group_id=3, model_type="modelType3") mg4 = ModelGroupFactory(model_group_id=4, model_type="modelType4") mg5 = ModelGroupFactory(model_group_id=5, model_type="modelType5") # model group 1 ModelFactory(model_group_rel=mg1, train_end_time=datetime(2014, 1, 1)) ModelFactory(model_group_rel=mg1, train_end_time=datetime(2015, 1, 1)) ModelFactory(model_group_rel=mg1, train_end_time=datetime(2016, 1, 1)) ModelFactory(model_group_rel=mg1, train_end_time=datetime(2017, 1, 1)) # model group 2 only has one timestamps ModelFactory(model_group_rel=mg2, train_end_time=datetime(2014, 1, 1)) # model group 3 ModelFactory(model_group_rel=mg3, train_end_time=datetime(2014, 1, 1)) ModelFactory(model_group_rel=mg3, train_end_time=datetime(2015, 1, 1)) ModelFactory(model_group_rel=mg3, train_end_time=datetime(2016, 1, 1)) ModelFactory(model_group_rel=mg3, train_end_time=datetime(2017, 1, 1)) # model group 4 only has two timestamps ModelFactory(model_group_rel=mg4, train_end_time=datetime(2015, 1, 1)) ModelFactory(model_group_rel=mg4, train_end_time=datetime(2016, 1, 1)) # model group 5 only has three timestamps ModelFactory(model_group_rel=mg5, train_end_time=datetime(2014, 1, 1)) ModelFactory(model_group_rel=mg5, train_end_time=datetime(2015, 1, 1)) ModelFactory(model_group_rel=mg5, train_end_time=datetime(2016, 1, 1)) session.commit() model_groups = [1, 2, 3, 4, 5] model_group_ids = model_groups_filter( train_end_times=train_end_times, initial_model_group_ids=model_groups, models_table="models", db_engine=engine, ) return model_group_ids
def test_initialize_tracking_and_get_run_id(db_engine_with_results_schema): experiment = ExperimentFactory() factory_session.commit() experiment_hash = experiment.experiment_hash run_id = initialize_tracking_and_get_run_id( experiment_hash=experiment_hash, experiment_class_path='mymodule.MyClassName', experiment_kwargs={'key': 'value'}, db_engine=db_engine_with_results_schema ) assert run_id with scoped_session(db_engine_with_results_schema) as session: experiment_run = session.query(ExperimentRun).get(run_id) assert experiment_run.experiment_hash == experiment_hash assert experiment_run.experiment_class_path == 'mymodule.MyClassName' assert experiment_run.experiment_kwargs == {'key': 'value'} new_run_id = initialize_tracking_and_get_run_id( experiment_hash=experiment_hash, experiment_class_path='mymodule.MyClassName', experiment_kwargs={'key': 'value'}, db_engine=db_engine_with_results_schema ) assert new_run_id > run_id
def test_model_trainer(sample_matrix_store, grid_config): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) with mock_s3(): s3_conn = boto3.resource('s3') s3_conn.create_bucket(Bucket='econ-dev') # Creates a matrix entry in the matrices table with uuid from metadata above MatrixFactory(matrix_uuid="1234") session.commit() project_path = 'econ-dev/inspections' model_storage_engine = S3ModelStorageEngine(project_path) trainer = ModelTrainer( project_path=project_path, experiment_hash=None, model_storage_engine=model_storage_engine, model_grouper=ModelGrouper(), db_engine=db_engine, ) model_ids = trainer.train_models(grid_config=grid_config, misc_db_parameters=dict(), matrix_store=sample_matrix_store) # assert # 1. that the models and feature importances table entries are present records = [ row for row in db_engine.execute( 'select * from train_results.feature_importances') ] assert len(records) == 4 * 2 # maybe exclude entity_id? yes records = [ row for row in db_engine.execute( 'select model_hash from model_metadata.models') ] assert len(records) == 4 hashes = [row[0] for row in records] # 2. that the model groups are distinct records = [ row for row in db_engine.execute( 'select distinct model_group_id from model_metadata.models' ) ] assert len(records) == 4 # 3. that the model sizes are saved in the table and all are < 1 kB records = [ row for row in db_engine.execute( 'select model_size from model_metadata.models') ] assert len(records) == 4 for i in records: size = i[0] assert size < 1 # 4. that all four models are cached model_pickles = [ model_storage_engine.get_store(model_hash).load() for model_hash in hashes ] assert len(model_pickles) == 4 assert len([x for x in model_pickles if x is not None]) == 4 # 5. that their results can have predictions made on it test_matrix = pandas.DataFrame.from_dict({ 'entity_id': [3, 4], 'feature_one': [4, 4], 'feature_two': [6, 5], }) test_matrix = InMemoryMatrixStore(matrix=test_matrix, metadata=sample_metadata())\ .matrix for model_pickle in model_pickles: predictions = model_pickle.predict(test_matrix) assert len(predictions) == 2 # 6. when run again, same models are returned new_model_ids = trainer.train_models( grid_config=grid_config, misc_db_parameters=dict(), matrix_store=sample_matrix_store) assert len([ row for row in db_engine.execute( 'select model_hash from model_metadata.models') ]) == 4 assert model_ids == new_model_ids # 7. if replace is set, update non-unique attributes and feature importances max_batch_run_time = [ row[0] for row in db_engine.execute( 'select max(batch_run_time) from model_metadata.models') ][0] trainer = ModelTrainer( project_path=project_path, experiment_hash=None, model_storage_engine=model_storage_engine, model_grouper=ModelGrouper( model_group_keys=['label_name', 'label_timespan']), db_engine=db_engine, replace=True) new_model_ids = trainer.train_models( grid_config=grid_config, misc_db_parameters=dict(), matrix_store=sample_matrix_store, ) assert model_ids == new_model_ids assert [ row['model_id'] for row in db_engine.execute( 'select model_id from model_metadata.models order by 1 asc' ) ] == model_ids new_max_batch_run_time = [ row[0] for row in db_engine.execute( 'select max(batch_run_time) from model_metadata.models') ][0] assert new_max_batch_run_time > max_batch_run_time records = [ row for row in db_engine.execute( 'select * from train_results.feature_importances') ] assert len(records) == 4 * 2 # maybe exclude entity_id? yes # 8. if the cache is missing but the metadata is still there, reuse the metadata for row in db_engine.execute( 'select model_hash from model_metadata.models'): model_storage_engine.get_store(row[0]).delete() new_model_ids = trainer.train_models( grid_config=grid_config, misc_db_parameters=dict(), matrix_store=sample_matrix_store) assert model_ids == sorted(new_model_ids) # 9. that the generator interface works the same way new_model_ids = trainer.generate_trained_models( grid_config=grid_config, misc_db_parameters=dict(), matrix_store=sample_matrix_store) assert model_ids == \ sorted([model_id for model_id in new_model_ids])
def test_ModelEvaluator_needs_evaluation_no_bias_audit(db_engine_with_results_schema): # TEST SETUP: # create two models: one that has zero evaluations, # one that has an evaluation for precision@100_abs # both overall and for each subset model_with_evaluations = ModelFactory() model_without_evaluations = ModelFactory() eval_time = datetime.datetime(2016, 1, 1) as_of_date_frequency = "3d" for subset_hash in [""] + [filename_friendly_hash(subset) for subset in SUBSETS]: EvaluationFactory( model_rel=model_with_evaluations, evaluation_start_time=eval_time, evaluation_end_time=eval_time, as_of_date_frequency=as_of_date_frequency, metric="precision@", parameter="100_abs", subset_hash=subset_hash, ) session.commit() # make a test matrix to pass in metadata_overrides = { "as_of_date_frequency": as_of_date_frequency, "as_of_times": [eval_time], } test_matrix_store = MockMatrixStore( "test", "1234", 5, db_engine_with_results_schema, metadata_overrides=metadata_overrides, ) train_matrix_store = MockMatrixStore( "train", "2345", 5, db_engine_with_results_schema, metadata_overrides=metadata_overrides, ) # the evaluated model has test evaluations for precision, but not recall, # so this needs evaluations for subset in SUBSETS: if not subset: subset_hash = "" else: subset_hash = filename_friendly_hash(subset) assert ModelEvaluator( testing_metric_groups=[ { "metrics": ["precision@", "recall@"], "thresholds": {"top_n": [100]}, } ], training_metric_groups=[], db_engine=db_engine_with_results_schema, ).needs_evaluations( matrix_store=test_matrix_store, model_id=model_with_evaluations.model_id, subset_hash=subset_hash, ) # the evaluated model has test evaluations for precision, # so this should not need evaluations for subset in SUBSETS: if not subset: subset_hash = "" else: subset_hash = filename_friendly_hash(subset) assert not ModelEvaluator( testing_metric_groups=[ { "metrics": ["precision@"], "thresholds": {"top_n": [100]}, } ], training_metric_groups=[], db_engine=db_engine_with_results_schema, ).needs_evaluations( matrix_store=test_matrix_store, model_id=model_with_evaluations.model_id, subset_hash=subset_hash, ) # the non-evaluated model has no evaluations, # so this should need evaluations for subset in SUBSETS: if not subset: subset_hash = "" else: subset_hash = filename_friendly_hash(subset) assert ModelEvaluator( testing_metric_groups=[ { "metrics": ["precision@"], "thresholds": {"top_n": [100]}, } ], training_metric_groups=[], db_engine=db_engine_with_results_schema, ).needs_evaluations( matrix_store=test_matrix_store, model_id=model_without_evaluations.model_id, subset_hash=subset_hash, ) # the evaluated model has no *train* evaluations, # so the train matrix should need evaluations for subset in SUBSETS: if not subset: subset_hash = "" else: subset_hash = filename_friendly_hash(subset) assert ModelEvaluator( testing_metric_groups=[ { "metrics": ["precision@"], "thresholds": {"top_n": [100]}, } ], training_metric_groups=[ { "metrics": ["precision@"], "thresholds": {"top_n": [100]}, } ], db_engine=db_engine_with_results_schema, ).needs_evaluations( matrix_store=train_matrix_store, model_id=model_with_evaluations.model_id, subset_hash=subset_hash, ) session.close() session.remove()
def test_PreAudition(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) # set up data, randomly generated by the factories but conforming # generally to what we expect triage_metadata schema data to look like num_model_groups = 10 model_types = [ "classifier type {}".format(i) for i in range(0, num_model_groups) ] model_configs = [ {"label_definition": "label_1"} if i % 2 == 0 else {"label_definition": "label_2"} for i in range(0, num_model_groups) ] model_groups = [ ModelGroupFactory(model_type=model_type, model_config=model_config) for model_type, model_config in zip(model_types, model_configs) ] train_end_times = [ datetime(2013, 1, 1), datetime(2013, 7, 1), datetime(2014, 1, 1), datetime(2014, 7, 1), datetime(2015, 1, 1), datetime(2015, 7, 1), datetime(2016, 7, 1), datetime(2016, 1, 1), ] models = [ ModelFactory(model_group_rel=model_group, train_end_time=train_end_time) for model_group in model_groups for train_end_time in train_end_times ] metrics = [ ("precision@", "100_abs"), ("recall@", "100_abs"), ("precision@", "50_abs"), ("recall@", "50_abs"), ("fpr@", "10_pct"), ] class ImmediateEvalFactory(EvaluationFactory): evaluation_start_time = factory.LazyAttribute( lambda o: o.model_rel.train_end_time ) for model in models: for (metric, parameter) in metrics: ImmediateEvalFactory( model_rel=model, metric=metric, parameter=parameter ) session.commit() pre_aud = PreAudition(db_engine) # Expect the number of model groups with label_1 assert len(pre_aud.get_model_groups_from_label("label_1")['model_groups']) == sum( [x["label_definition"] == "label_1" for x in model_configs] ) # Expect no baseline model groups assert len(pre_aud.get_model_groups_from_label("label_1")['baseline_model_groups']) == 0 # Expect the number of model groups with certain experiment_hash experiment_hash = list( pd.read_sql( """SELECT experiment_hash FROM triage_metadata.models JOIN triage_metadata.experiment_models using (model_hash) limit 1""", con=db_engine, )["experiment_hash"] )[0] assert len(pre_aud.get_model_groups_from_experiment(experiment_hash)['model_groups']) == 1 # Expect the number of model groups for customs SQL query = """ SELECT DISTINCT(model_group_id) FROM triage_metadata.models JOIN triage_metadata.experiment_models using (model_hash) WHERE train_end_time >= '2013-01-01' AND experiment_hash = '{}' """.format( experiment_hash ) assert len(pre_aud.get_model_groups(query)) == 1 # Expect the number of train_end_times after 2014-01-01 assert len(pre_aud.get_train_end_times(after="2014-01-01")) == 6 query = """ SELECT DISTINCT train_end_time FROM triage_metadata.models WHERE model_group_id IN ({}) AND train_end_time >= '2014-01-01' ORDER BY train_end_time """.format( ", ".join(map(str, pre_aud.model_groups)) ) assert len(pre_aud.get_train_end_times(query=query)) == 6
def create_sample_distance_table(engine): ensure_db(engine) init_engine(engine) model_groups = { 'stable': ModelGroupFactory(model_type='myStableClassifier'), 'spiky': ModelGroupFactory(model_type='mySpikeClassifier'), } class StableModelFactory(ModelFactory): model_group_rel = model_groups['stable'] class SpikyModelFactory(ModelFactory): model_group_rel = model_groups['spiky'] models = { 'stable_3y_ago': StableModelFactory(train_end_time='2014-01-01'), 'stable_2y_ago': StableModelFactory(train_end_time='2015-01-01'), 'stable_1y_ago': StableModelFactory(train_end_time='2016-01-01'), 'spiky_3y_ago': SpikyModelFactory(train_end_time='2014-01-01'), 'spiky_2y_ago': SpikyModelFactory(train_end_time='2015-01-01'), 'spiky_1y_ago': SpikyModelFactory(train_end_time='2016-01-01'), } session.commit() distance_table = DistanceFromBestTable(db_engine=engine, models_table='models', distance_table='dist_table') distance_table._create() stable_grp = model_groups['stable'].model_group_id spiky_grp = model_groups['spiky'].model_group_id stable_3y_id = models['stable_3y_ago'].model_id stable_3y_end = models['stable_3y_ago'].train_end_time stable_2y_id = models['stable_2y_ago'].model_id stable_2y_end = models['stable_2y_ago'].train_end_time stable_1y_id = models['stable_1y_ago'].model_id stable_1y_end = models['stable_1y_ago'].train_end_time spiky_3y_id = models['spiky_3y_ago'].model_id spiky_3y_end = models['spiky_3y_ago'].train_end_time spiky_2y_id = models['spiky_2y_ago'].model_id spiky_2y_end = models['spiky_2y_ago'].train_end_time spiky_1y_id = models['spiky_1y_ago'].model_id spiky_1y_end = models['spiky_1y_ago'].train_end_time distance_rows = [ (stable_grp, stable_3y_id, stable_3y_end, 'precision@', '100_abs', 0.5, 0.6, 0.1, 0.5, 0.15), (stable_grp, stable_2y_id, stable_2y_end, 'precision@', '100_abs', 0.5, 0.84, 0.34, 0.5, 0.18), (stable_grp, stable_1y_id, stable_1y_end, 'precision@', '100_abs', 0.46, 0.67, 0.21, 0.5, 0.11), (spiky_grp, spiky_3y_id, spiky_3y_end, 'precision@', '100_abs', 0.45, 0.6, 0.15, 0.5, 0.19), (spiky_grp, spiky_2y_id, spiky_2y_end, 'precision@', '100_abs', 0.84, 0.84, 0.0, 0.5, 0.3), (spiky_grp, spiky_1y_id, spiky_1y_end, 'precision@', '100_abs', 0.45, 0.67, 0.22, 0.5, 0.12), (stable_grp, stable_3y_id, stable_3y_end, 'recall@', '100_abs', 0.4, 0.4, 0.0, 0.4, 0.0), (stable_grp, stable_2y_id, stable_2y_end, 'recall@', '100_abs', 0.5, 0.5, 0.0, 0.5, 0.0), (stable_grp, stable_1y_id, stable_1y_end, 'recall@', '100_abs', 0.6, 0.6, 0.0, 0.6, 0.0), (spiky_grp, spiky_3y_id, spiky_3y_end, 'recall@', '100_abs', 0.65, 0.65, 0.0, 0.65, 0.0), (spiky_grp, spiky_2y_id, spiky_2y_end, 'recall@', '100_abs', 0.55, 0.55, 0.0, 0.55, 0.0), (spiky_grp, spiky_1y_id, spiky_1y_end, 'recall@', '100_abs', 0.45, 0.45, 0.0, 0.45, 0.0), ] for dist_row in distance_rows: engine.execute( 'insert into dist_table values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)', dist_row) return distance_table, model_groups
def test_DistanceFromBestTable(): with testing.postgresql.Postgresql() as postgresql: engine = create_engine(postgresql.url()) ensure_db(engine) init_engine(engine) model_groups = { "stable": ModelGroupFactory(model_type="myStableClassifier"), "bad": ModelGroupFactory(model_type="myBadClassifier"), "spiky": ModelGroupFactory(model_type="mySpikeClassifier"), } class StableModelFactory(ModelFactory): model_group_rel = model_groups["stable"] class BadModelFactory(ModelFactory): model_group_rel = model_groups["bad"] class SpikyModelFactory(ModelFactory): model_group_rel = model_groups["spiky"] models = { "stable_3y_ago": StableModelFactory(train_end_time="2014-01-01"), "stable_2y_ago": StableModelFactory(train_end_time="2015-01-01"), "stable_1y_ago": StableModelFactory(train_end_time="2016-01-01"), "bad_3y_ago": BadModelFactory(train_end_time="2014-01-01"), "bad_2y_ago": BadModelFactory(train_end_time="2015-01-01"), "bad_1y_ago": BadModelFactory(train_end_time="2016-01-01"), "spiky_3y_ago": SpikyModelFactory(train_end_time="2014-01-01"), "spiky_2y_ago": SpikyModelFactory(train_end_time="2015-01-01"), "spiky_1y_ago": SpikyModelFactory(train_end_time="2016-01-01"), } class ImmediateEvalFactory(EvaluationFactory): evaluation_start_time = factory.LazyAttribute( lambda o: o.model_rel.train_end_time) evaluation_end_time = factory.LazyAttribute( lambda o: _sql_add_days(o.model_rel.train_end_time, 1)) class MonthOutEvalFactory(EvaluationFactory): evaluation_start_time = factory.LazyAttribute( lambda o: _sql_add_days(o.model_rel.train_end_time, 31)) evaluation_end_time = factory.LazyAttribute( lambda o: _sql_add_days(o.model_rel.train_end_time, 32)) class Precision100Factory(ImmediateEvalFactory): metric = "precision@" parameter = "100_abs" class Precision100FactoryMonthOut(MonthOutEvalFactory): metric = "precision@" parameter = "100_abs" class Recall100Factory(ImmediateEvalFactory): metric = "recall@" parameter = "100_abs" class Recall100FactoryMonthOut(MonthOutEvalFactory): metric = "recall@" parameter = "100_abs" for (add_val, PrecFac, RecFac) in ( (0, Precision100Factory, Recall100Factory), (-0.15, Precision100FactoryMonthOut, Recall100FactoryMonthOut), ): PrecFac(model_rel=models["stable_3y_ago"], value=0.6 + add_val) PrecFac(model_rel=models["stable_2y_ago"], value=0.57 + add_val) PrecFac(model_rel=models["stable_1y_ago"], value=0.59 + add_val) PrecFac(model_rel=models["bad_3y_ago"], value=0.4 + add_val) PrecFac(model_rel=models["bad_2y_ago"], value=0.39 + add_val) PrecFac(model_rel=models["bad_1y_ago"], value=0.43 + add_val) PrecFac(model_rel=models["spiky_3y_ago"], value=0.8 + add_val) PrecFac(model_rel=models["spiky_2y_ago"], value=0.4 + add_val) PrecFac(model_rel=models["spiky_1y_ago"], value=0.4 + add_val) RecFac(model_rel=models["stable_3y_ago"], value=0.55 + add_val) RecFac(model_rel=models["stable_2y_ago"], value=0.56 + add_val) RecFac(model_rel=models["stable_1y_ago"], value=0.55 + add_val) RecFac(model_rel=models["bad_3y_ago"], value=0.35 + add_val) RecFac(model_rel=models["bad_2y_ago"], value=0.34 + add_val) RecFac(model_rel=models["bad_1y_ago"], value=0.36 + add_val) RecFac(model_rel=models["spiky_3y_ago"], value=0.35 + add_val) RecFac(model_rel=models["spiky_2y_ago"], value=0.8 + add_val) RecFac(model_rel=models["spiky_1y_ago"], value=0.36 + add_val) session.commit() distance_table = DistanceFromBestTable(db_engine=engine, models_table="models", distance_table="dist_table") metrics = [ { "metric": "precision@", "parameter": "100_abs" }, { "metric": "recall@", "parameter": "100_abs" }, ] model_group_ids = [mg.model_group_id for mg in model_groups.values()] distance_table.create_and_populate( model_group_ids, ["2014-01-01", "2015-01-01", "2016-01-01"], metrics) # get an ordered list of the models/groups for a particular metric/time query = """ select model_id, raw_value, dist_from_best_case, dist_from_best_case_next_time from dist_table where metric = %s and parameter = %s and train_end_time = %s order by dist_from_best_case """ prec_3y_ago = engine.execute(query, ("precision@", "100_abs", "2014-01-01")) assert [row for row in prec_3y_ago] == [ (models["spiky_3y_ago"].model_id, 0.8, 0, 0.17), (models["stable_3y_ago"].model_id, 0.6, 0.2, 0), (models["bad_3y_ago"].model_id, 0.4, 0.4, 0.18), ] recall_2y_ago = engine.execute(query, ("recall@", "100_abs", "2015-01-01")) assert [row for row in recall_2y_ago] == [ (models["spiky_2y_ago"].model_id, 0.8, 0, 0.19), (models["stable_2y_ago"].model_id, 0.56, 0.24, 0), (models["bad_2y_ago"].model_id, 0.34, 0.46, 0.19), ] assert distance_table.observed_bounds == { ("precision@", "100_abs"): (0.39, 0.8), ("recall@", "100_abs"): (0.34, 0.8), }
def test_Auditioner(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) # set up data, randomly generated by the factories but conforming # generally to what we expect triage_metadata schema data to look like num_model_groups = 10 model_types = [ "classifier type {}".format(i) for i in range(0, num_model_groups) ] model_groups = [ ModelGroupFactory(model_type=model_type) for model_type in model_types ] train_end_times = [ datetime(2013, 1, 1), datetime(2014, 1, 1), datetime(2015, 1, 1), datetime(2016, 1, 1), ] models = [ ModelFactory(model_group_rel=model_group, train_end_time=train_end_time) for model_group in model_groups for train_end_time in train_end_times ] metrics = [ ("precision@", "100_abs"), ("recall@", "100_abs"), ("precision@", "50_abs"), ("recall@", "50_abs"), ("fpr@", "10_pct"), ] class ImmediateEvalFactory(EvaluationFactory): evaluation_start_time = factory.LazyAttribute( lambda o: o.model_rel.train_end_time) for model in models: for (metric, parameter) in metrics: ImmediateEvalFactory(model_rel=model, metric=metric, parameter=parameter) session.commit() # define a very loose filtering that should admit all model groups no_filtering = [ { "metric": "precision@", "parameter": "100_abs", "max_from_best": 1.0, "threshold_value": 0.0, }, { "metric": "recall@", "parameter": "100_abs", "max_from_best": 1.0, "threshold_value": 0.0, }, ] model_group_ids = [mg.model_group_id for mg in model_groups] auditioner = Auditioner(db_engine, model_group_ids, train_end_times, no_filtering) assert len(auditioner.thresholded_model_group_ids) == num_model_groups auditioner.plot_model_groups() # here, we pick thresholding rules that should definitely remove # all model groups from contention because they are too strict. remove_all = [ { "metric": "precision@", "parameter": "100_abs", "max_from_best": 0.0, "threshold_value": 1.1, }, { "metric": "recall@", "parameter": "100_abs", "max_from_best": 0.0, "threshold_value": 1.1, }, ] auditioner.update_metric_filters(new_filters=remove_all) assert len(auditioner.thresholded_model_group_ids) == 0 # pass the argument instead and remove all model groups auditioner.set_one_metric_filter( metric="precision@", parameter="100_abs", max_from_best=0.0, threshold_value=1.1, ) assert len(auditioner.thresholded_model_group_ids) == 0 # one potential place for bugs would be when we pull back the rules # for being too restrictive. we want to make sure that the original list is # always used for thresholding, or else such a move would be impossible auditioner.update_metric_filters(new_filters=no_filtering) assert len(auditioner.thresholded_model_group_ids) == num_model_groups # pass the argument instead and let all model groups pass auditioner.set_one_metric_filter( metric="precision@", parameter="100_abs", max_from_best=1.0, threshold_value=0.0, ) assert len(auditioner.thresholded_model_group_ids) == num_model_groups # now, we want to take this partially thresholded list and run it through # a grid of selection rules, meant to pick winners by a variety of user-defined # criteria rule_grid = [ { "shared_parameters": [ { "metric": "precision@", "parameter": "100_abs" }, { "metric": "recall@", "parameter": "100_abs" }, ], "selection_rules": [ { "name": "most_frequent_best_dist", "dist_from_best_case": [0.1, 0.2, 0.3], "n": 1, }, { "name": "best_current_value", "n": 1 }, ], }, { "shared_parameters": [{ "metric1": "precision@", "parameter1": "100_abs" }], "selection_rules": [{ "name": "best_average_two_metrics", "metric2": ["recall@"], "parameter2": ["100_abs"], "metric1_weight": [0.4, 0.5, 0.6], "n": 1, }], }, ] auditioner.register_selection_rule_grid(rule_grid, plot=False) final_model_group_ids = auditioner.selection_rule_model_group_ids # we expect the result to be a mapping of selection rule name to model group id assert isinstance(final_model_group_ids, dict) # we expect that there is one winner for each selection rule assert sorted(final_model_group_ids.keys()) == sorted( [rule.descriptive_name for rule in auditioner.selection_rules])
def test_integration(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) with mock_s3(): s3_conn = boto3.resource('s3') s3_conn.create_bucket(Bucket='econ-dev') project_path = 'econ-dev/inspections' # create train and test matrices train_matrix = pandas.DataFrame.from_dict({ 'entity_id': [1, 2], 'feature_one': [3, 4], 'feature_two': [5, 6], 'label': [7, 8] }).set_index('entity_id') train_metadata = { 'feature_start_time': datetime.date(2012, 12, 20), 'end_time': datetime.date(2016, 12, 20), 'label_name': 'label', 'label_timespan': '1y', 'feature_names': ['ft1', 'ft2'], 'metta-uuid': '1234', 'indices': ['entity_id'], 'matrix_type': 'train' } # Creates a matrix entry in the matrices table with uuid from train_metadata MatrixFactory(matrix_uuid="1234") session.commit() train_store = InMemoryMatrixStore(train_matrix, sample_metadata()) as_of_dates = [ datetime.date(2016, 12, 21), datetime.date(2017, 1, 21) ] test_stores = [ InMemoryMatrixStore( pandas.DataFrame.from_dict({ 'entity_id': [3], 'feature_one': [8], 'feature_two': [5], 'label': [5] }), { 'label_name': 'label', 'label_timespan': '1y', 'end_time': as_of_date, 'metta-uuid': '1234', 'indices': ['entity_id'], 'matrix_type': 'test', 'as_of_date_frequency': '1month' }) for as_of_date in as_of_dates ] model_storage_engine = S3ModelStorageEngine(project_path) experiment_hash = save_experiment_and_get_hash({}, db_engine) # instantiate pipeline objects trainer = ModelTrainer( project_path=project_path, experiment_hash=experiment_hash, model_storage_engine=model_storage_engine, db_engine=db_engine, ) predictor = Predictor(project_path, model_storage_engine, db_engine) model_evaluator = ModelEvaluator([{ 'metrics': ['precision@'], 'thresholds': { 'top_n': [5] } }], [{}], db_engine) # run the pipeline grid_config = { 'sklearn.linear_model.LogisticRegression': { 'C': [0.00001, 0.0001], 'penalty': ['l1', 'l2'], 'random_state': [2193] } } model_ids = trainer.train_models(grid_config=grid_config, misc_db_parameters=dict(), matrix_store=train_store) for model_id in model_ids: for as_of_date, test_store in zip(as_of_dates, test_stores): predictions_proba = predictor.predict( model_id, test_store, misc_db_parameters=dict(), train_matrix_columns=['feature_one', 'feature_two']) model_evaluator.evaluate( predictions_proba, test_store, model_id, ) # assert # 1. that the predictions table entries are present and # can be linked to the original models records = [ row for row in db_engine.execute( '''select entity_id, model_id, as_of_date from test_results.test_predictions join model_metadata.models using (model_id) order by 3, 2''') ] assert records == [ (3, 1, datetime.datetime(2016, 12, 21)), (3, 2, datetime.datetime(2016, 12, 21)), (3, 3, datetime.datetime(2016, 12, 21)), (3, 4, datetime.datetime(2016, 12, 21)), (3, 1, datetime.datetime(2017, 1, 21)), (3, 2, datetime.datetime(2017, 1, 21)), (3, 3, datetime.datetime(2017, 1, 21)), (3, 4, datetime.datetime(2017, 1, 21)), ] # that evaluations are there records = [ row for row in db_engine.execute(''' select model_id, evaluation_start_time, metric, parameter from test_results.test_evaluations order by 2, 1''') ] assert records == [ (1, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'), (2, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'), (3, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'), (4, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'), (1, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'), (2, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'), (3, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'), (4, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'), ]
def test_evaluating_early_warning(db_engine_with_results_schema): num_entities = 10 labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1] # Set up testing configuration parameters testing_metric_groups = [ { "metrics": [ "precision@", "recall@", "true positives@", "true negatives@", "false positives@", "false negatives@", ], "thresholds": {"percentiles": [5.0, 10.0], "top_n": [5, 10]}, }, { "metrics": [ "f1", "mediocre", "accuracy", "roc_auc", "average precision score", ] }, {"metrics": ["fbeta@"], "parameters": [{"beta": 0.75}, {"beta": 1.25}]}, ] training_metric_groups = [{"metrics": ["accuracy", "roc_auc"]}] custom_metrics = {"mediocre": always_half} # Acquire fake data and objects to be used in the tests model_evaluator = ModelEvaluator( testing_metric_groups, training_metric_groups, db_engine_with_results_schema, custom_metrics=custom_metrics, ) fake_test_matrix_store = MockMatrixStore( matrix_type="test", matrix_uuid="efgh", label_count=num_entities, db_engine=db_engine_with_results_schema, init_labels=pd.DataFrame( { "label_value": labels, "entity_id": list(range(num_entities)), "as_of_date": [TRAIN_END_TIME] * num_entities, } ) .set_index(["entity_id", "as_of_date"]) .label_value, init_as_of_dates=[TRAIN_END_TIME], ) fake_train_matrix_store = MockMatrixStore( matrix_type="train", matrix_uuid="1234", label_count=num_entities, db_engine=db_engine_with_results_schema, init_labels=pd.DataFrame( { "label_value": labels, "entity_id": list(range(num_entities)), "as_of_date": [TRAIN_END_TIME] * num_entities, } ) .set_index(["entity_id", "as_of_date"]) .label_value, init_as_of_dates=[TRAIN_END_TIME], ) trained_model, model_id = fake_trained_model( db_engine_with_results_schema, train_end_time=TRAIN_END_TIME, ) # ensure that the matrix uuid is present matrix_uuids = [ row[0] for row in db_engine_with_results_schema.execute( "select matrix_uuid from test_results.evaluations" ) ] assert all(matrix_uuid == "efgh" for matrix_uuid in matrix_uuids) # Evaluate the training metrics and test model_evaluator.evaluate( trained_model.predict_proba(labels)[:, 1], fake_train_matrix_store, model_id ) records = [ row[0] for row in db_engine_with_results_schema.execute( """select distinct(metric || parameter) from train_results.evaluations where model_id = %s and evaluation_start_time = %s order by 1""", (model_id, fake_train_matrix_store.as_of_dates[0]), ) ] assert records == ["accuracy", "roc_auc"] # Run tests for overall and subset evaluations for subset in SUBSETS: if subset is None: where_hash = "" else: populate_subset_data( db_engine_with_results_schema, subset, list(range(num_entities)) ) SubsetFactory(subset_hash=filename_friendly_hash(subset)) session.commit() where_hash = f"and subset_hash = '{filename_friendly_hash(subset)}'" # Evaluate the testing metrics and test for all of them. model_evaluator.evaluate( trained_model.predict_proba(labels)[:, 1], fake_test_matrix_store, model_id, subset=subset, ) records = [ row[0] for row in db_engine_with_results_schema.execute( f"""\ select distinct(metric || parameter) from test_results.evaluations where model_id = %s and evaluation_start_time = %s {where_hash} order by 1 """, (model_id, fake_test_matrix_store.as_of_dates[0]), ) ] assert records == [ "accuracy", "average precision score", "f1", "false [email protected]_pct", "false negatives@10_abs", "false [email protected]_pct", "false negatives@5_abs", "false [email protected]_pct", "false positives@10_abs", "false [email protected]_pct", "false positives@5_abs", "[email protected]_beta", "[email protected]_beta", "mediocre", "[email protected]_pct", "precision@10_abs", "[email protected]_pct", "precision@5_abs", "[email protected]_pct", "recall@10_abs", "[email protected]_pct", "recall@5_abs", "roc_auc", "true [email protected]_pct", "true negatives@10_abs", "true [email protected]_pct", "true negatives@5_abs", "true [email protected]_pct", "true positives@10_abs", "true [email protected]_pct", "true positives@5_abs", ] # Evaluate the training metrics and test model_evaluator.evaluate( trained_model.predict_proba(labels)[:, 1], fake_train_matrix_store, model_id, subset=subset, ) records = [ row[0] for row in db_engine_with_results_schema.execute( f"""select distinct(metric || parameter) from train_results.evaluations where model_id = %s and evaluation_start_time = %s {where_hash} order by 1""", (model_id, fake_train_matrix_store.as_of_dates[0]), ) ] assert records == ["accuracy", "roc_auc"] # ensure that the matrix uuid is present matrix_uuids = [ row[0] for row in db_engine_with_results_schema.execute( "select matrix_uuid from train_results.evaluations" ) ] assert all(matrix_uuid == "1234" for matrix_uuid in matrix_uuids)
def test_PreAudition(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) # set up data, randomly generated by the factories but conforming # generally to what we expect results schema data to look like num_model_groups = 10 model_types = [ 'classifier type {}'.format(i) for i in range(0, num_model_groups) ] model_configs = [{ 'label_definition': 'label_1' } if i % 2 == 0 else { 'label_definition': 'label_2' } for i in range(0, num_model_groups)] model_groups = [ ModelGroupFactory(model_type=model_type, model_config=model_config) for model_type, model_config in zip(model_types, model_configs) ] train_end_times = [ datetime(2013, 1, 1), datetime(2013, 7, 1), datetime(2014, 1, 1), datetime(2014, 7, 1), datetime(2015, 1, 1), datetime(2015, 7, 1), datetime(2016, 7, 1), datetime(2016, 1, 1), ] models = [ ModelFactory(model_group_rel=model_group, train_end_time=train_end_time) for model_group in model_groups for train_end_time in train_end_times ] metrics = [ ('precision@', '100_abs'), ('recall@', '100_abs'), ('precision@', '50_abs'), ('recall@', '50_abs'), ('fpr@', '10_pct'), ] class ImmediateEvalFactory(EvaluationFactory): evaluation_start_time = factory.LazyAttribute( lambda o: o.model_rel.train_end_time) for model in models: for (metric, parameter) in metrics: ImmediateEvalFactory(model_rel=model, metric=metric, parameter=parameter) session.commit() pre_aud = PreAudition(db_engine) # Expect the number of model groups with label_1 assert len(pre_aud.get_model_groups_from_label("label_1")) == \ sum([x['label_definition']=='label_1' for x in model_configs]) # Expect the number of model groups with certain experiment_hash experiment_hash = list( pd.read_sql("SELECT experiment_hash FROM results.models limit 1", con=db_engine)['experiment_hash'])[0] assert len( pre_aud.get_model_groups_from_experiment(experiment_hash)) == 1 # Expect the number of model groups for customs SQL query = """ SELECT DISTINCT(model_group_id) FROM results.models WHERE train_end_time >= '2013-01-01' AND experiment_hash = '{}' """.format(experiment_hash) assert len(pre_aud.get_model_groups(query)) == 1 # Expect the number of train_end_times after 2014-01-01 assert len(pre_aud.get_train_end_times(after='2014-01-01')) == 6 query = """ SELECT DISTINCT train_end_time FROM results.models WHERE model_group_id IN ({}) AND train_end_time >= '2014-01-01' ORDER BY train_end_time """.format(', '.join(map(str, pre_aud.model_groups))) assert len(pre_aud.get_train_end_times(query=query)) == 6