def test_ModelEvaluator_needs_evaluation_with_bias_audit( db_engine_with_results_schema): # test that if a bias audit config is passed, and there are no matching bias audits # in the database, needs_evaluation returns true # this all assumes that evaluations are populated. those tests are in the 'no_bias_audit' test model_evaluator = ModelEvaluator( testing_metric_groups=[ { "metrics": ["precision@"], "thresholds": { "top_n": [3] }, }, ], training_metric_groups=[], bias_config={'thresholds': { 'top_n': [2] }}, db_engine=db_engine_with_results_schema, ) model_with_evaluations = ModelFactory() eval_time = datetime.datetime(2016, 1, 1) as_of_date_frequency = "3d" for subset_hash in [""]: EvaluationFactory( model_rel=model_with_evaluations, evaluation_start_time=eval_time, evaluation_end_time=eval_time, as_of_date_frequency=as_of_date_frequency, metric="precision@", parameter="3_abs", subset_hash=subset_hash, ) session.commit() # make a test matrix to pass in metadata_overrides = { 'as_of_date_frequency': as_of_date_frequency, 'as_of_times': [eval_time], } test_matrix_store = MockMatrixStore("test", "1234", 5, db_engine_with_results_schema, metadata_overrides=metadata_overrides) assert model_evaluator.needs_evaluations( matrix_store=test_matrix_store, model_id=model_with_evaluations.model_id, subset_hash="", )
class ModelTester(object): def __init__( self, db_engine, model_storage_engine, matrix_storage_engine, replace, evaluator_config, individual_importance_config, ): self.matrix_storage_engine = matrix_storage_engine self.predictor = Predictor( db_engine=db_engine, model_storage_engine=model_storage_engine, replace=replace, ) self.individual_importance_calculator = IndividualImportanceCalculator( db_engine=db_engine, n_ranks=individual_importance_config.get("n_ranks", 5), methods=individual_importance_config.get("methods", ["uniform"]), replace=replace, ) self.evaluator = ModelEvaluator( db_engine=db_engine, sort_seed=evaluator_config.get("sort_seed", None), testing_metric_groups=evaluator_config.get("testing_metric_groups", []), training_metric_groups=evaluator_config.get( "training_metric_groups", []), ) def generate_model_test_tasks(self, split, train_store, model_ids): test_tasks = [] for test_matrix_def, test_uuid in zip(split["test_matrices"], split["test_uuids"]): test_store = self.matrix_storage_engine.get_store(test_uuid) if test_store.empty: logging.warning( """Test matrix for uuid %s was empty, no point in generating predictions. Not creating test task. """, test_uuid, ) continue test_tasks.append({ "test_store": test_store, "train_store": train_store, "model_ids": [model_id for model_id in model_ids if model_id], }) return test_tasks def process_model_test_task(self, test_store, train_store, model_ids): as_of_times = test_store.metadata["as_of_times"] logging.info( "Testing and scoring all model ids with test matrix %s. " "as_of_times min: %s max: %s num: %s", test_store.uuid, min(as_of_times), max(as_of_times), len(as_of_times), ) for model_id in model_ids: logging.info("Testing model id %s", model_id) self.individual_importance_calculator.calculate_and_save_all_methods_and_dates( model_id, test_store) # Generate predictions for the testing data then training data for store in (test_store, train_store): if self.evaluator.needs_evaluations(store, model_id): logging.info( "The evaluations needed for matrix %s-%s and model %s" "are not all present in db, so predicting and evaluating", store.uuid, store.matrix_type, model_id) predictions_proba = self.predictor.predict( model_id, store, misc_db_parameters=dict(), train_matrix_columns=train_store.columns(), ) self.evaluator.evaluate( predictions_proba=predictions_proba, matrix_store=store, model_id=model_id, ) else: logging.info( "The evaluations needed for matrix %s-%s and model %s are all present" "in db from a previous run (or none needed at all), so skipping!", store.uuid, store.matrix_type, model_id)