def test_initialize_tracking_and_get_run_id(db_engine_with_results_schema): experiment = ExperimentFactory() factory_session.commit() experiment_hash = experiment.experiment_hash run_id = initialize_tracking_and_get_run_id( experiment_hash=experiment_hash, experiment_class_path="mymodule.MyClassName", random_seed=1234, experiment_kwargs={"key": "value"}, db_engine=db_engine_with_results_schema, ) assert run_id with scoped_session(db_engine_with_results_schema) as session: experiment_run = session.query(TriageRun).get(run_id) assert experiment_run.run_hash == experiment_hash assert experiment_run.experiment_class_path == "mymodule.MyClassName" assert experiment_run.random_seed == 1234 assert experiment_run.experiment_kwargs == {"key": "value"} new_run_id = initialize_tracking_and_get_run_id( experiment_hash=experiment_hash, experiment_class_path="mymodule.MyClassName", random_seed=5432, experiment_kwargs={"key": "value"}, db_engine=db_engine_with_results_schema, ) assert new_run_id > run_id
def _needs_ranks(self, model_id, matrix_uuid, matrix_type): if self.replace: logger.info("Replace flag set, will compute and store ranks regardless") return True with scoped_session(self.db_engine) as session: # if the metadata is different (e.g. they changed the rank order) # or there are any null ranks we need to rank metadata_matches = session.query(session.query(matrix_type.prediction_metadata_obj).filter_by( model_id=model_id, matrix_uuid=matrix_uuid, tiebreaker_ordering=self.rank_order, ).exists()).scalar() if not metadata_matches: logger.debug("Prediction metadata does not match what is in configuration" ", will compute and store ranks") return True any_nulls_in_ranks = session.query(session.query(matrix_type.prediction_obj)\ .filter( matrix_type.prediction_obj.model_id == model_id, matrix_type.prediction_obj.matrix_uuid == matrix_uuid, or_( matrix_type.prediction_obj.rank_abs_no_ties == None, matrix_type.prediction_obj.rank_abs_with_ties == None, matrix_type.prediction_obj.rank_pct_no_ties == None, matrix_type.prediction_obj.rank_pct_with_ties == None, ) ).exists()).scalar() if any_nulls_in_ranks: logger.debug("At least one null in rankings in predictions table", ", will compute and store ranks") return True logger.debug("No need to recompute prediction ranks") return False
def test_increment_field(db_engine_with_results_schema): experiment_run = ExperimentRunFactory() factory_session.commit() increment_field('matrices_made', experiment_run.run_id, db_engine_with_results_schema) increment_field('matrices_made', experiment_run.run_id, db_engine_with_results_schema) with scoped_session(db_engine_with_results_schema) as session: experiment_run_from_db = session.query(ExperimentRun).get(experiment_run.run_id) assert experiment_run_from_db.matrices_made == 2
def test_get_run_for_update(db_engine_with_results_schema): experiment_run = TriageRunFactory() factory_session.commit() with get_run_for_update(db_engine=db_engine_with_results_schema, run_id=experiment_run.run_id) as run_obj: run_obj.stacktrace = "My stacktrace" with scoped_session(db_engine_with_results_schema) as session: experiment_run_from_db = session.query(TriageRun).get( experiment_run.run_id) assert experiment_run_from_db.stacktrace == "My stacktrace"
def test_experiment_tracker_in_parts(test_engine, project_path): experiment = SingleThreadedExperiment( config=sample_config(), db_engine=test_engine, project_path=project_path, ) experiment.generate_matrices() experiment.train_and_test_models() with scoped_session(test_engine) as session: experiment_run = session.query(ExperimentRun).get(experiment.run_id) assert experiment_run.start_method == "generate_matrices"
def test_experiment_tracker_in_parts(test_engine, project_path): with mock.patch("triage.util.conf.open", side_effect=open_side_effect) as mock_file: experiment = SingleThreadedExperiment( config=sample_config(), db_engine=test_engine, project_path=project_path, ) experiment.generate_matrices() experiment.train_and_test_models() with scoped_session(test_engine) as session: experiment_run = session.query(TriageRun).get(experiment.run_id) assert experiment_run.start_method == "generate_matrices"
def test_experiment_tracker_exception(db_engine, project_path): experiment = SingleThreadedExperiment( config=sample_config(), db_engine=db_engine, project_path=project_path, ) # no source data means this should blow up with pytest.raises(Exception): experiment.run() with scoped_session(db_engine) as session: experiment_run = session.query(ExperimentRun).get(experiment.run_id) assert experiment_run.current_status == ExperimentRunStatus.failed assert isinstance(experiment_run.last_updated_time, datetime.datetime) assert experiment_run.stacktrace
def _write_to_db( self, model_id, subset_hash, evaluation_start_time, evaluation_end_time, as_of_date_frequency, matrix_uuid, evaluations, evaluation_table_obj, ): """Write evaluation objects to the database Binds the model_id as as_of_date to the given ORM objects and writes them to the database Args: model_id (int) primary key of the model subset_hash (str) the hash of the subset, if any, that the evaluation is made on evaluation_start_time (pandas._libs.tslibs.timestamps.Timestamp) first as_of_date included in the evaluation period evaluation_end_time (pandas._libs.tslibs.timestamps.Timestamp) last as_of_date included in the evaluation period as_of_date_frequency (str) the frequency with which as_of_dates occur between the evaluation_start_time and evaluation_end_time evaluations (list) results_schema.TestEvaluation or TrainEvaluation objects evaluation_table_obj (schema.TestEvaluation or TrainEvaluation) specifies to which table to add the evaluations """ with scoped_session(self.db_engine) as session: session.query(evaluation_table_obj).filter_by( model_id=model_id, evaluation_start_time=evaluation_start_time, evaluation_end_time=evaluation_end_time, as_of_date_frequency=as_of_date_frequency, subset_hash=subset_hash ).delete() for evaluation in evaluations: evaluation.model_id = model_id evaluation.as_of_date_frequency = as_of_date_frequency evaluation.subset_hash = subset_hash evaluation.evaluation_start_time = evaluation_start_time evaluation.evaluation_end_time = evaluation_end_time evaluation.as_of_date_frequency = as_of_date_frequency evaluation.matrix_uuid = matrix_uuid evaluation.subset_hash = subset_hash session.add(evaluation)
def test_experiment_tracker_exception(db_engine, project_path): with mock.patch("triage.util.conf.open", side_effect=open_side_effect) as mock_file: experiment = SingleThreadedExperiment( config=sample_config(), db_engine=db_engine, project_path=project_path, ) # no source data means this should blow up with pytest.raises(Exception): experiment.run() with scoped_session(db_engine) as session: experiment_run = session.query(TriageRun).get(experiment.run_id) assert experiment_run.current_status == TriageRunStatus.failed assert isinstance(experiment_run.last_updated_time, datetime.datetime) assert experiment_run.stacktrace
def initialize_tracking_and_get_run_id(experiment_hash, experiment_class_path, random_seed, experiment_kwargs, db_engine): """Create a row in the TriageRun table with some initial info and return the created run_id Args: experiment_hash (str) An experiment hash that exists in the experiments table experiment_class_path (str) The name of the experiment subclass used random_seed (int) Random seed used to run the experiment experiment_kwargs (dict) Any runtime Experiment keyword arguments that should be saved db_engine (sqlalchemy.engine) """ # Any experiment kwargs that are types (e.g. MatrixStorageClass) can't # be serialized, so just use the class name if so cleaned_experiment_kwargs = { k: (classpath(v) if isinstance(v, type) else v) for k, v in experiment_kwargs.items() } run = TriageRun( start_time=datetime.datetime.now(), git_hash=infer_git_hash(), triage_version=infer_triage_version(), python_version=infer_python_version(), run_type="experiment", run_hash=experiment_hash, last_updated_time=datetime.datetime.now(), current_status=TriageRunStatus.started, installed_libraries=infer_installed_libraries(), platform=platform.platform(), os_user=getpass.getuser(), working_directory=os.getcwd(), ec2_instance_type=infer_ec2_instance_type(), log_location=infer_log_location(), experiment_class_path=experiment_class_path, random_seed=random_seed, experiment_kwargs=cleaned_experiment_kwargs, ) run_id = None with scoped_session(db_engine) as session: session.add(run) session.commit() run_id = run.run_id if not run_id: raise ValueError("Failed to retrieve run_id from saved row") return run_id
def increment_field(field, run_id, db_engine): """Increment an ExperimentRun's named field. Expects that the field is an integer in the database. Will also kick the last_updated_time timestamp. Args: field (str) The name of the field run_id (int) The identifier/primary key of the run db_engine (sqlalchemy.engine) """ with scoped_session(db_engine) as session: # Use an update query instead of a session merge so it happens in one atomic query # and protect against race conditions session.query(ExperimentRun).filter_by(run_id=run_id).update({ field: getattr(ExperimentRun, field) + 1, 'last_updated_time': datetime.datetime.now() })
def test_initialize_tracking_and_get_run_id(db_engine_with_results_schema): experiment = ExperimentFactory() factory_session.commit() experiment_hash = experiment.experiment_hash run_id = initialize_tracking_and_get_run_id( experiment_hash=experiment_hash, experiment_class_path='mymodule.MyClassName', experiment_kwargs={'key': 'value'}, db_engine=db_engine_with_results_schema ) assert run_id with scoped_session(db_engine_with_results_schema) as session: experiment_run = session.query(ExperimentRun).get(run_id) assert experiment_run.experiment_hash == experiment_hash assert experiment_run.experiment_class_path == 'mymodule.MyClassName' assert experiment_run.experiment_kwargs == {'key': 'value'} new_run_id = initialize_tracking_and_get_run_id( experiment_hash=experiment_hash, experiment_class_path='mymodule.MyClassName', experiment_kwargs={'key': 'value'}, db_engine=db_engine_with_results_schema ) assert new_run_id > run_id
def _write_audit_to_db(self, model_id, protected_df, predictions_proba, labels, tie_breaker, subset_hash, matrix_type, evaluation_start_time, evaluation_end_time, matrix_uuid): """ Runs the bias audit and saves the result in the bias table. Args: model_id (int) primary key of the model protected_df (pandas.DataFrame) A dataframe with protected group attributes: predictions_proba (np.array) List of prediction probabilities labels (pandas.Series): List of labels tie_breaker: 'best' or 'worst' case tiebreaking rule that the predictions and labels were sorted by subset_hash (str) the hash of the subset, if any, that the evaluation is made on matrix_type (triage.component.catwalk.storage.MatrixType) The type of matrix used evaluation_start_time (pandas._libs.tslibs.timestamps.Timestamp) first as_of_date included in the evaluation period evaluation_end_time (pandas._libs.tslibs.timestamps.Timestamp) last as_of_date included in the evaluation period matrix_uuid: the uuid of the matrix Returns: """ if protected_df.empty: return # to preprocess aequitas requires the following columns: # score, label value, model_id, protected attributes # fill out the protected_df, which just has protected attributes at this point protected_df = protected_df.copy() protected_df['model_id'] = model_id protected_df['score'] = predictions_proba protected_df['label_value'] = labels aequitas_df, attr_cols_input = preprocess_input_df(protected_df) # create group crosstabs g = Group() score_thresholds = {} score_thresholds['rank_abs'] = self.bias_config['thresholds'].get( 'top_n', []) # convert 0-100 percentile to 0-1 that Aequitas expects score_thresholds['rank_pct'] = [ value / 100.0 for value in self.bias_config['thresholds'].get('percentiles', []) ] groups_model, attr_cols = g.get_crosstabs( aequitas_df, score_thresholds=score_thresholds, attr_cols=attr_cols_input) # analyze bias from reference groups bias = Bias() ref_groups_method = self.bias_config.get('ref_groups_method', None) if ref_groups_method == 'predefined' and self.bias_config['ref_groups']: bias_df = bias.get_disparity_predefined_groups( groups_model, aequitas_df, self.bias_config['ref_groups']) elif ref_groups_method == 'majority': bias_df = bias.get_disparity_major_group(groups_model, aequitas_df) else: bias_df = bias.get_disparity_min_metric(groups_model, aequitas_df) # analyze fairness for each group f = Fairness(tau=0.8) # the default fairness threshold is 0.8 group_value_df = f.get_group_value_fairness(bias_df) group_value_df['subset_hash'] = subset_hash group_value_df['tie_breaker'] = tie_breaker group_value_df['evaluation_start_time'] = evaluation_start_time group_value_df['evaluation_end_time'] = evaluation_end_time group_value_df['matrix_uuid'] = matrix_uuid group_value_df = group_value_df.rename( index=str, columns={"score_threshold": "parameter"}) if group_value_df.empty: raise ValueError(f""" Bias audit: aequitas_audit() failed. Returned empty dataframe for model_id = {model_id}, and subset_hash = {subset_hash} and matrix_type = {matrix_type}""") with scoped_session(self.db_engine) as session: for index, row in group_value_df.iterrows(): session.query(matrix_type.aequitas_obj).filter_by( model_id=row['model_id'], evaluation_start_time=row['evaluation_start_time'], evaluation_end_time=row['evaluation_end_time'], subset_hash=row['subset_hash'], parameter=row['parameter'], tie_breaker=row['tie_breaker'], matrix_uuid=row['matrix_uuid'], attribute_name=row['attribute_name'], attribute_value=row['attribute_value']).delete() session.bulk_insert_mappings( matrix_type.aequitas_obj, group_value_df.to_dict(orient="records"))
def update_db_with_ranks(self, model_id, matrix_uuid, matrix_type): """Update predictions table with rankings, both absolute and percentile. random_seed=postgres_random_seed, All entities should have different ranks, so to break ties: - abs_rank uses the 'row_number' function, so ties are broken by the database ordering session.close() - pct_rank uses the output of the abs_rank to compute percentiles (as opposed to raw scores), so it inherits the tie-breaking from abs_rank Args: model_id (int) the id of the model associated with the given predictions matrix_uuid (string) the uuid of the prediction matrix """ if not self.save_predictions: logging.info("save_predictions is set to False so there are no predictions to rank") return logging.info( 'Beginning ranking of new Predictions for model %s, matrix %s', model_id, matrix_uuid ) # retrieve a dataframe with only the data we need to rank ranking_df = pandas.DataFrame.pg_copy_from( f"""select entity_id, score, as_of_date, label_value from {matrix_type.string_name}_results.predictions where model_id = {model_id} and matrix_uuid = '{matrix_uuid}' """, connectable=self.db_engine) sort_seed = None if self.rank_order == 'random': with scoped_session(self.db_engine) as session: sort_seed = session.query(Model).get(model_id).random_seed if not sort_seed: sort_seed = generate_python_random_seed() sorted_predictions, sorted_labels, sorted_arrays = sort_predictions_and_labels( predictions_proba=ranking_df['score'], labels=ranking_df['label_value'], tiebreaker=self.rank_order, sort_seed=sort_seed, parallel_arrays=(ranking_df['entity_id'], ranking_df['as_of_date']), ) ranking_df['score'] = sorted_predictions.values ranking_df['as_of_date'] = pandas.to_datetime(sorted_arrays[1].values) ranking_df['label_value'] = sorted_labels.values ranking_df['entity_id'] = sorted_arrays[0].values # at this point, we have the same dataframe that we loaded from postgres, # but sorted based on score and the self.rank_order. # Now we can generate ranks using pandas and only using the 'score' column because # our secondary ordering is baked in, enabling the 'first' method to break ties. ranking_df['rank_abs_no_ties'] = ranking_df['score'].rank(ascending=False, method='first') ranking_df['rank_abs_with_ties'] = ranking_df['score'].rank(ascending=False, method='min') ranking_df['rank_pct_no_ties'] = numpy.array([1 - (rank - 1) / len(ranking_df) for rank in ranking_df['rank_abs_no_ties']]) ranking_df['rank_pct_with_ties'] = ranking_df['score'].rank(method='min', pct=True) # with our rankings computed, update these ranks into the existing rows # in the predictions table temp_table_name = f"ranks_mod{model_id}_mat{matrix_uuid}" ranking_df.pg_copy_to(temp_table_name, self.db_engine) self.db_engine.execute(f"""update {matrix_type.string_name}_results.predictions as p set rank_abs_no_ties = tt.rank_abs_no_ties, rank_abs_with_ties = tt.rank_abs_with_ties, rank_pct_no_ties = tt.rank_pct_no_ties, rank_pct_with_ties = tt.rank_pct_with_ties from {temp_table_name} as tt where tt.entity_id = p.entity_id and p.matrix_uuid = '{matrix_uuid}' and p.model_id = {model_id} and p.as_of_date = tt.as_of_date """) self.db_engine.execute(f"drop table {temp_table_name}") self._write_metadata_to_db( model_id=model_id, matrix_uuid=matrix_uuid, matrix_type=matrix_type, random_seed=sort_seed, ) logging.info( 'Completed ranking of new Predictions for model %s, matrix %s', model_id, matrix_uuid )
def retrain(self, prediction_date): """Retrain a model by going back one split from prediction_date, so the as_of_date for training would be (prediction_date - training_label_timespan) Args: prediction_date(str) """ # Retrain config and hash retrain_config = { "model_group_id": self.model_group_id, "prediction_date": prediction_date, "test_label_timespan": self.test_label_timespan, "test_duration": self.test_duration, } self.retrain_hash = save_retrain_and_get_hash(retrain_config, self.db_engine) with get_for_update(self.db_engine, Retrain, self.retrain_hash) as retrain: retrain.prediction_date = prediction_date # Timechop prediction_date = dt_from_str(prediction_date) temporal_config = self.get_temporal_config_for_retrain(prediction_date) timechopper = Timechop(**temporal_config) chops = timechopper.chop_time() assert len(chops) == 1 chops_train_matrix = chops[0]['train_matrix'] as_of_date = datetime.strftime(chops_train_matrix['last_as_of_time'], "%Y-%m-%d") retrain_definition = { 'first_as_of_time': chops_train_matrix['first_as_of_time'], 'last_as_of_time': chops_train_matrix['last_as_of_time'], 'matrix_info_end_time': chops_train_matrix['matrix_info_end_time'], 'as_of_times': [as_of_date], 'training_label_timespan': chops_train_matrix['training_label_timespan'], 'max_training_history': chops_train_matrix['max_training_history'], 'training_as_of_date_frequency': chops_train_matrix['training_as_of_date_frequency'], } # Set ExperimentRun run = TriageRun( start_time=datetime.now(), git_hash=infer_git_hash(), triage_version=infer_triage_version(), python_version=infer_python_version(), run_type="retrain", run_hash=self.retrain_hash, last_updated_time=datetime.now(), current_status=TriageRunStatus.started, installed_libraries=infer_installed_libraries(), platform=platform.platform(), os_user=getpass.getuser(), working_directory=os.getcwd(), ec2_instance_type=infer_ec2_instance_type(), log_location=infer_log_location(), experiment_class_path=classpath(self.__class__), random_seed=retrieve_experiment_seed_from_run_id( self.db_engine, self.triage_run_id), ) run_id = None with scoped_session(self.db_engine) as session: session.add(run) session.commit() run_id = run.run_id if not run_id: raise ValueError("Failed to retrieve run_id from saved row") # set ModelTrainer's run_id and experiment_hash for Retrain run self.model_trainer.run_id = run_id self.model_trainer.experiment_hash = self.retrain_hash # 1. Generate all labels self.generate_all_labels(as_of_date) record_labels_table_name(run_id, self.db_engine, self.labels_table_name) # 2. Generate cohort cohort_table_name = f"triage_production.cohort_{self.experiment_config['cohort_config']['name']}_retrain" self.generate_entity_date_table(as_of_date, cohort_table_name) record_cohort_table_name(run_id, self.db_engine, cohort_table_name) # 3. Generate feature aggregations collate_aggregations = self.get_collate_aggregations( as_of_date, cohort_table_name) feature_aggregation_table_tasks = self.feature_generator.generate_all_table_tasks( collate_aggregations, task_type='aggregation') self.feature_generator.process_table_tasks( feature_aggregation_table_tasks) # 4. Reconstruct feature disctionary from feature_names and generate imputation reconstructed_feature_dict, imputation_table_tasks = self.get_feature_dict_and_imputation_task( collate_aggregations, self.model_group_info['model_id_last_split'], ) feature_group_creator = FeatureGroupCreator( self.experiment_config['feature_group_definition']) feature_group_mixer = FeatureGroupMixer(["all"]) feature_group_dict = feature_group_mixer.generate( feature_group_creator.subsets(reconstructed_feature_dict))[0] self.feature_generator.process_table_tasks(imputation_table_tasks) # 5. Build new matrix db_config = { "features_schema_name": "triage_production", "labels_schema_name": "public", "cohort_table_name": cohort_table_name, "labels_table_name": self.labels_table_name, } record_matrix_building_started(run_id, self.db_engine) matrix_builder = MatrixBuilder( db_config=db_config, matrix_storage_engine=self.matrix_storage_engine, engine=self.db_engine, experiment_hash=None, replace=True, ) new_matrix_metadata = Planner.make_metadata( matrix_definition=retrain_definition, feature_dictionary=feature_group_dict, label_name=self.label_name, label_type='binary', cohort_name=self.cohort_name, matrix_type='train', feature_start_time=dt_from_str(self.feature_start_time), user_metadata=self.user_metadata, ) new_matrix_metadata['matrix_id'] = "_".join([ self.label_name, 'binary', str(as_of_date), 'retrain', ]) matrix_uuid = filename_friendly_hash(new_matrix_metadata) matrix_builder.build_matrix( as_of_times=[as_of_date], label_name=self.label_name, label_type='binary', feature_dictionary=feature_group_dict, matrix_metadata=new_matrix_metadata, matrix_uuid=matrix_uuid, matrix_type="train", ) retrain_model_comment = 'retrain_' + str(datetime.now()) misc_db_parameters = { 'train_end_time': dt_from_str(as_of_date), 'test': False, 'train_matrix_uuid': matrix_uuid, 'training_label_timespan': self.training_label_timespan, 'model_comment': retrain_model_comment, } # get the random seed from the last split last_split_train_matrix_uuid, last_split_matrix_metadata = train_matrix_info_from_model_id( self.db_engine, model_id=self.model_group_info['model_id_last_split']) random_seed = self.model_trainer.get_or_generate_random_seed( model_group_id=self.model_group_id, matrix_metadata=last_split_matrix_metadata, train_matrix_uuid=last_split_train_matrix_uuid) # create retrain model hash retrain_model_hash = self.model_trainer._model_hash( self.matrix_storage_engine.get_store(matrix_uuid).metadata, class_path=self.model_group_info['model_type'], parameters=self.model_group_info['hyperparameters'], random_seed=random_seed, ) associate_models_with_retrain(self.retrain_hash, (retrain_model_hash, ), self.db_engine) record_model_building_started(run_id, self.db_engine) retrain_model_id = self.model_trainer.process_train_task( matrix_store=self.matrix_storage_engine.get_store(matrix_uuid), class_path=self.model_group_info['model_type'], parameters=self.model_group_info['hyperparameters'], model_hash=retrain_model_hash, misc_db_parameters=misc_db_parameters, random_seed=random_seed, retrain=True, model_group_id=self.model_group_id) self.retrain_model_hash = retrieve_model_hash_from_id( self.db_engine, retrain_model_id) self.retrain_matrix_uuid = matrix_uuid self.retrain_model_id = retrain_model_id return { 'retrain_model_comment': retrain_model_comment, 'retrain_model_id': retrain_model_id }