Пример #1
0
def test_ModelEvaluator_needs_evaluation_with_bias_audit(
        db_engine_with_results_schema):
    # test that if a bias audit config is passed, and there are no matching bias audits
    # in the database, needs_evaluation returns true
    # this all assumes that evaluations are populated. those tests are in the 'no_bias_audit' test
    model_evaluator = ModelEvaluator(
        testing_metric_groups=[
            {
                "metrics": ["precision@"],
                "thresholds": {
                    "top_n": [3]
                },
            },
        ],
        training_metric_groups=[],
        bias_config={'thresholds': {
            'top_n': [2]
        }},
        db_engine=db_engine_with_results_schema,
    )
    model_with_evaluations = ModelFactory()

    eval_time = datetime.datetime(2016, 1, 1)
    as_of_date_frequency = "3d"
    for subset_hash in [""]:
        EvaluationFactory(
            model_rel=model_with_evaluations,
            evaluation_start_time=eval_time,
            evaluation_end_time=eval_time,
            as_of_date_frequency=as_of_date_frequency,
            metric="precision@",
            parameter="3_abs",
            subset_hash=subset_hash,
        )
    session.commit()

    # make a test matrix to pass in
    metadata_overrides = {
        'as_of_date_frequency': as_of_date_frequency,
        'as_of_times': [eval_time],
    }
    test_matrix_store = MockMatrixStore("test",
                                        "1234",
                                        5,
                                        db_engine_with_results_schema,
                                        metadata_overrides=metadata_overrides)
    assert model_evaluator.needs_evaluations(
        matrix_store=test_matrix_store,
        model_id=model_with_evaluations.model_id,
        subset_hash="",
    )
Пример #2
0
class ModelTester(object):
    def __init__(
        self,
        db_engine,
        model_storage_engine,
        matrix_storage_engine,
        replace,
        evaluator_config,
        individual_importance_config,
    ):
        self.matrix_storage_engine = matrix_storage_engine
        self.predictor = Predictor(
            db_engine=db_engine,
            model_storage_engine=model_storage_engine,
            replace=replace,
        )

        self.individual_importance_calculator = IndividualImportanceCalculator(
            db_engine=db_engine,
            n_ranks=individual_importance_config.get("n_ranks", 5),
            methods=individual_importance_config.get("methods", ["uniform"]),
            replace=replace,
        )

        self.evaluator = ModelEvaluator(
            db_engine=db_engine,
            sort_seed=evaluator_config.get("sort_seed", None),
            testing_metric_groups=evaluator_config.get("testing_metric_groups",
                                                       []),
            training_metric_groups=evaluator_config.get(
                "training_metric_groups", []),
        )

    def generate_model_test_tasks(self, split, train_store, model_ids):
        test_tasks = []
        for test_matrix_def, test_uuid in zip(split["test_matrices"],
                                              split["test_uuids"]):
            test_store = self.matrix_storage_engine.get_store(test_uuid)

            if test_store.empty:
                logging.warning(
                    """Test matrix for uuid %s
                was empty, no point in generating predictions. Not creating test task.
                """,
                    test_uuid,
                )
                continue
            test_tasks.append({
                "test_store":
                test_store,
                "train_store":
                train_store,
                "model_ids": [model_id for model_id in model_ids if model_id],
            })
        return test_tasks

    def process_model_test_task(self, test_store, train_store, model_ids):
        as_of_times = test_store.metadata["as_of_times"]
        logging.info(
            "Testing and scoring all model ids with test matrix %s. "
            "as_of_times min: %s max: %s num: %s",
            test_store.uuid,
            min(as_of_times),
            max(as_of_times),
            len(as_of_times),
        )

        for model_id in model_ids:
            logging.info("Testing model id %s", model_id)

            self.individual_importance_calculator.calculate_and_save_all_methods_and_dates(
                model_id, test_store)

            # Generate predictions for the testing data then training data
            for store in (test_store, train_store):
                if self.evaluator.needs_evaluations(store, model_id):
                    logging.info(
                        "The evaluations needed for matrix %s-%s and model %s"
                        "are not all present in db, so predicting and evaluating",
                        store.uuid, store.matrix_type, model_id)
                    predictions_proba = self.predictor.predict(
                        model_id,
                        store,
                        misc_db_parameters=dict(),
                        train_matrix_columns=train_store.columns(),
                    )

                    self.evaluator.evaluate(
                        predictions_proba=predictions_proba,
                        matrix_store=store,
                        model_id=model_id,
                    )
                else:
                    logging.info(
                        "The evaluations needed for matrix %s-%s and model %s are all present"
                        "in db from a previous run (or none needed at all), so skipping!",
                        store.uuid, store.matrix_type, model_id)