def __init__(self, output_path=".", execution_path=None, config_filepath=None): report_config = PrescConfig(global_config) if config_filepath: report_config.update_from_file(config_filepath) self.config = report_config # Path where the report output is written. # Outputs are nested in a subdir. self.output_path = Path(output_path) / REPORT_OUTPUT_DIR self.output_path.mkdir(parents=True, exist_ok=True) # Path where the report is built from. # The report source files are copied here, and the model inputs are # written to a data store. # If missing, a temp dir will be used on execution. self.execution_path = None if execution_path is not None: self.execution_path = Path(execution_path) / REPORT_EXECUTION_DIR # Build artifacts: # The main entry page for the report. self.report_main_page = self.output_path / SPHINX_INDEX_PAGE # Log files for jupyter-book execution. self.jb_clean_log = self.output_path / JB_CLEAN_LOG self.jb_build_log = self.output_path / JB_BUILD_LOG # The main page will be linked to the top-level output dir, if possible. self._linked_main_page = self.output_path / REPORT_MAIN_PAGE # Cache the process results from running jupyter-book commands for # debugging. self._jb_clean_result = None self._jb_build_result = None
def compute(self, metric, **kwargs): """Compute the evaluation for the given dataset column. Parameters ---------- metric : str The evaluation metric to compute for each split. This should be the name of a `sklearn.metrics` scorer function. kwargs: On-the-fly overrides to the config option values for the computation. Returns ------- TrainTestSplitsResult """ eval_config = PrescConfig(self._config) eval_config = eval_config["evaluations"]["train_test_splits"]["computation"] if kwargs: eval_config.set(kwargs) return compute_train_test_splits( dataset=self._train_dataset, classifier=self._model.classifier, metric=metric, split_size_increment=eval_config["split_size_increment"].get(float), num_replicates=eval_config["num_replicates"].get(int), random_state=eval_config["random_state"].get(int), )
def __init__(self, model, train_dataset, settings=None, config=None): source_config = config or global_config self._config = PrescConfig(source_config) if settings: self._config.set({"evaluations": {"train_test_splits": settings}}) self._model = model self._train_dataset = train_dataset
def __init__(self, model, test_dataset, settings=None, config=None): source_config = config or global_config self._config = PrescConfig(source_config) if settings: self._config.set({"evaluations": {"conditional_distribution": settings}}) self._model = model self._test_dataset = test_dataset self._test_pred = self._model.predict_labels(test_dataset).rename("predicted")
def compute(self, **kwargs): """Compute the evaluation for the given datasets. Parameters ---------- kwargs: On-the-fly overrides to the config option values for the computation. Returns ------- SpatialDistributionResult """ eval_config = PrescConfig(self._config) eval_config = eval_config["evaluations"]["spatial_distribution"] if kwargs: eval_config.set(kwargs) # Feature columns to include in the distance computation. feats_incl = eval_config["features_include"].get() feats_excl = eval_config["features_exclude"].get() feats = include_exclude_list( self._test_dataset.feature_names, included=feats_incl, excluded=feats_excl ) num_feats = [] categ_feats = [] for col in feats: if is_discrete(self._test_dataset.features[col]): categ_feats.append(col) else: num_feats.append(col) # Figure the metric to use for each feature. dist_metrics_num, dist_metrics_categ = _get_distance_metrics_by_column( num_feats, categ_feats, eval_config ) return compute_spatial_distribution( test_features=self._test_dataset.features, test_labs_true=self._test_dataset.labels, test_labs_pred=self._test_pred, base_features=self._train_dataset.features, base_labs=self._train_dataset.labels, numerical_dist_metric=dist_metrics_num, categorical_dist_metric=dist_metrics_categ, summary=eval_config["summary_agg"].get(), )
def compute_for_column(self, colname, metric, **kwargs): """Compute the evaluation for the given dataset column. The metric is computed within unique values of the specified column (if categorical) or within bins partitioning its range (if continuous). colname : str A column in the dataset to partition on. metric : function The evaluation metric to compute across the partitions. This should be a function f(y_true, y_pred) which accepts Series of true and predicted labels. kwargs : On-the-fly overrides to the config option values for the computation. Returns ------ ConditionalMetricResult """ comp_config = PrescConfig(self._config) comp_config = comp_config["evaluations"]["conditional_metric"][ "computation"] col_overrides = comp_config["columns"][colname] try: col_overrides = col_overrides.get() except ConfigError: col_overrides = None if col_overrides: comp_config.set(col_overrides) if kwargs: comp_config.set(kwargs) return compute_conditional_metric( grouping_col=self._test_dataset.df[colname], true_labs=self._test_dataset.labels, pred_labs=self._test_pred, metric=metric, as_categorical=comp_config["as_categorical"].get(bool), num_bins=comp_config["num_bins"].get(int), quantile=comp_config["quantile"].get(bool), )
def compute_for_column(self, colname, **kwargs): """Compute the evaluation for the given dataset column. Parameters ---------- colname : str A column in the dataset to compute distributions for. kwargs: On-the-fly overrides to the config option values for the computation. Returns ------- ConditionalDistributionResult """ comp_config = PrescConfig(self._config) comp_config = comp_config["evaluations"]["conditional_distribution"][ "computation" ] col_overrides = comp_config["columns"][colname] try: col_overrides = col_overrides.get() except ConfigError: col_overrides = None if col_overrides: comp_config.set(col_overrides) if kwargs: comp_config.set(kwargs) return compute_conditional_distribution( data_col=self._test_dataset.df[colname], true_labs=self._test_dataset.labels, pred_labs=self._test_pred, as_categorical=comp_config["as_categorical"].get(bool), binning=comp_config["binning"].get(), common_bins=comp_config["common_bins"].get(bool), )
class SpatialDistribution: """Computation of distributions of data in feature space. Attributes ---------- model: presc.model.ClassificationModel The ClassificationModel to run the evaluation for. test_dataset : presc.dataset.Dataset A Dataset to use for evaluation. train_dataset : presc.dataset.Dataset A Dataset to use as the baseline for distance measures (eg. the training data). settings: dict An optional dict specifying option values under `evaluations.spatial_distribution`, eg. `{"summary_agg": "median"}` These are restricted to the class instance and do not change the global config. config: presc.configuration.PrescConfig An optional PrescConfig instance to read options from. This will be overridden by `settings` values. """ def __init__(self, model, test_dataset, train_dataset, settings=None, config=None): source_config = config or global_config self._config = PrescConfig(source_config) if settings: self._config.set({"evaluations": {"spatial_distribution": settings}}) self._model = model self._test_dataset = test_dataset self._test_pred = self._model.predict_labels(test_dataset).rename("predicted") self._train_dataset = train_dataset def compute(self, **kwargs): """Compute the evaluation for the given datasets. Parameters ---------- kwargs: On-the-fly overrides to the config option values for the computation. Returns ------- SpatialDistributionResult """ eval_config = PrescConfig(self._config) eval_config = eval_config["evaluations"]["spatial_distribution"] if kwargs: eval_config.set(kwargs) # Feature columns to include in the distance computation. feats_incl = eval_config["features_include"].get() feats_excl = eval_config["features_exclude"].get() feats = include_exclude_list( self._test_dataset.feature_names, included=feats_incl, excluded=feats_excl ) num_feats = [] categ_feats = [] for col in feats: if is_discrete(self._test_dataset.features[col]): categ_feats.append(col) else: num_feats.append(col) # Figure the metric to use for each feature. dist_metrics_num, dist_metrics_categ = _get_distance_metrics_by_column( num_feats, categ_feats, eval_config ) return compute_spatial_distribution( test_features=self._test_dataset.features, test_labs_true=self._test_dataset.labels, test_labs_pred=self._test_pred, base_features=self._train_dataset.features, base_labs=self._train_dataset.labels, numerical_dist_metric=dist_metrics_num, categorical_dist_metric=dist_metrics_categ, summary=eval_config["summary_agg"].get(), ) def display(self): """Computes and displays the spatial distribution results.""" eval_result = self.compute() eval_result.display_result()
def test_get_metrics_for_column(): config = PrescConfig(global_config) # get default metrics eval_config = config["evaluations"]["conditional_metric"] result = _get_metrics_for_column("b", eval_config) assert len(result) == 1 assert result[0].get("display_name") == "Accuracy" assert result[0].get("function") == accuracy_score # get multiple metrics, one with unspecified display name config.reset_defaults() config.set({ "evaluations.conditional_metric.metrics": [ { "function": "accuracy_score", "display_name": "Accuracy" }, { "function": "jaccard_score" }, { "function": "f1_score", "display_name": "F1 Score" }, ] }) eval_config = config["evaluations"]["conditional_metric"] result = _get_metrics_for_column("a", eval_config) assert len(result) == 3 assert result[0].get("display_name") == "Accuracy" assert result[0].get("function") == accuracy_score assert result[1].get("display_name") == "jaccard_score" assert result[1].get("function") == jaccard_score assert result[2].get("display_name") == "F1 Score" assert result[2].get("function") == f1_score # get multiple metrics, one invalid function, one with unspecified display name config.reset_defaults() config.set({ "evaluations.conditional_metric.metrics": [ { "function": "wrong_accuracy_score", "display_name": "Wrong Accuracy" }, { "function": "jaccard_score" }, { "function": "f1_score", "display_name": "F1 Score" }, ] }) eval_config = config["evaluations"]["conditional_metric"] result = _get_metrics_for_column("a", eval_config) assert len(result) == 2 assert result[0].get("display_name") == "jaccard_score" assert result[0].get("function") == jaccard_score assert result[1].get("display_name") == "F1 Score" assert result[1].get("function") == f1_score # add invalid metric for column a config.set({ "evaluations.conditional_metric.computation.columns.a.metrics": [{ "function": "col_a_accuracy", "display_name": "Col A Acc" }] }) eval_config = config["evaluations"]["conditional_metric"] result = _get_metrics_for_column("a", eval_config) assert len(result) == 0 # all defaults still valid for column b result = _get_metrics_for_column("b", eval_config) assert len(result) == 2 assert result[0].get("display_name") == "jaccard_score" assert result[0].get("function") == jaccard_score assert result[1].get("display_name") == "F1 Score" assert result[1].get("function") == f1_score
def test_eval_compute_for_column(test_dataset, classification_model, config_col_override): # Get the default metric function from the config config = PrescConfig(global_config) eval_config = config["evaluations"]["conditional_metric"] metric_function = _get_metrics_for_column("b", eval_config)[0].get("function") # Defaults cme = ConditionalMetric(classification_model, test_dataset) cmr = cme.compute_for_column("a", metric=metric_function) assert len(cmr.vals) == 10 assert cmr.num_bins == 10 # Global override global_config.set( {"evaluations.conditional_metric.computation.num_bins": 6}) cme = ConditionalMetric(classification_model, test_dataset) cmr = cme.compute_for_column("a", metric=metric_function) assert len(cmr.vals) == 6 assert cmr.num_bins == 6 # Evaluation-level override: # explicit settings global_config.reset_defaults() conf_default = global_config.dump() cme = ConditionalMetric(classification_model, test_dataset, settings={"computation.num_bins": 7}) cmr = cme.compute_for_column("a", metric=metric_function) assert len(cmr.vals) == 7 assert cmr.num_bins == 7 assert global_config.dump() == conf_default # config object + settings new_config = PrescConfig(global_config) new_config.set({"evaluations.conditional_metric.computation.num_bins": 4}) cme = ConditionalMetric( classification_model, test_dataset, settings={"computation.quantile": True}, config=new_config, ) cmr = cme.compute_for_column("a", metric=metric_function) assert len(cmr.vals) == 4 assert cmr.num_bins == 4 assert cmr.quantile is True assert global_config.dump() == conf_default # Column-specific override cme = ConditionalMetric(classification_model, test_dataset, settings=config_col_override) cmr_a = cme.compute_for_column("a", metric=metric_function) assert len(cmr_a.vals) == 5 assert cmr_a.num_bins == 5 cmr_b = cme.compute_for_column("b", metric=metric_function) assert len(cmr_b.vals) == 10 assert cmr_b.num_bins == 10 assert global_config.dump() == conf_default # kwarg override conf_cme = cme._config.dump() cmr_a = cme.compute_for_column("a", metric=metric_function, num_bins=4, quantile=True) assert len(cmr_a.vals) == 4 assert cmr_a.num_bins == 4 assert cmr_a.quantile is True cmr_b = cme.compute_for_column("b", metric=metric_function, num_bins=3) assert len(cmr_b.vals) == 3 assert cmr_b.num_bins == 3 assert global_config.dump() == conf_default assert cme._config.dump() == conf_cme
class ConditionalDistribution: """Computation of data distributions conditional on prediction results. Attributes ---------- model : The ClassificationModel to run the evaluation for. test_dataset : presc.dataset.Dataset A Dataset to use for evaluation. settings : dict An optional dict specifying option values under `evaluations.conditional_distribution`, eg. `{"computation.binning": 5}`, These are restricted to the class instance and do not change the global config. config : presc.configuration.PrescConfig An optional PrescConfig instance to read options from. This will be overridden by `settings` values. """ def __init__(self, model, test_dataset, settings=None, config=None): source_config = config or global_config self._config = PrescConfig(source_config) if settings: self._config.set({"evaluations": {"conditional_distribution": settings}}) self._model = model self._test_dataset = test_dataset self._test_pred = self._model.predict_labels(test_dataset).rename("predicted") def compute_for_column(self, colname, **kwargs): """Compute the evaluation for the given dataset column. Parameters ---------- colname : str A column in the dataset to compute distributions for. kwargs: On-the-fly overrides to the config option values for the computation. Returns ------- ConditionalDistributionResult """ comp_config = PrescConfig(self._config) comp_config = comp_config["evaluations"]["conditional_distribution"][ "computation" ] col_overrides = comp_config["columns"][colname] try: col_overrides = col_overrides.get() except ConfigError: col_overrides = None if col_overrides: comp_config.set(col_overrides) if kwargs: comp_config.set(kwargs) return compute_conditional_distribution( data_col=self._test_dataset.df[colname], true_labs=self._test_dataset.labels, pred_labs=self._test_pred, as_categorical=comp_config["as_categorical"].get(bool), binning=comp_config["binning"].get(), common_bins=comp_config["common_bins"].get(bool), ) def display(self, colnames=None): """Computes and displays the conditional distribution result for each specified column. Parameters ---------- colnames : list of str A list of column names to run the evaluation over, creating a plot for each. If not supplied, defaults to columns specifed in the config. """ if colnames: incl = colnames excl = None else: eval_config = self._config["evaluations"]["conditional_distribution"] incl = eval_config["columns_include"].get() excl = eval_config["columns_exclude"].get() cols = include_exclude_list( self._test_dataset.column_names, included=incl, excluded=excl ) for colname in cols: eval_result = self.compute_for_column(colname) eval_result.display_result(xlab=colname)
def config_report(): conf = PrescConfig(global_config) extra = yaml.load(REPORT_CONFIG_YAML, Loader=yaml.FullLoader) conf.set(extra) return conf
def run(self, model, test_dataset, train_dataset=None, settings=None, clean=True): """Runs the PRESC report for the given modeling inputs. The report is written to `<output_path>/presc_report`. If this dir already exists, it will be overwritten. Parameters ---------- model: presc.model.ClassificationModel A pre-trained ClassificationModel instance to evaluate test_dataset : presc.dataset.Dataset A test Dataset instance used to evaluate model performance train_dataset: presc.dataset.Dataset The Dataset instance used to train the model. This is not required for every evaluation. settings : dict A dict specifying option values to override report settings, eg. `{"report.title": "My Report"}`. clean : book Should previous outputs be cleaned? Default: True """ if settings: run_config = PrescConfig(self.config) run_config.set(settings) else: run_config = self.config if clean: self.clean() tmpdir = None exec_path = None if self.execution_path: # If using a user-defined execution path, need to make sure # it doesn't exist for `shutil.copytree` to work. # Note that this will only remove the nested subdir, not the actual # user-specified dir. if self.execution_path.exists(): shutil.rmtree(self.execution_path) exec_path = self.execution_path else: # Create a temp dir to run the build from. # We set up the temp dir here rather than using jupyter-book's # `run_in_temp` option so that we have access to the temp path. tmpdir = TemporaryDirectory() exec_path = Path(tmpdir.name) / REPORT_EXECUTION_DIR # Copy the report source files to the execution dir and # execute from there. The data store for the inputs is saved to # the same dir. That way, since the notebooks' working dir on execution # is set to where they are located by jupyter-book, they can find # the data store without needed to know the calling path. try: shutil.copytree(REPORT_SOURCE_PATH, exec_path) except shutil.Error as e: msg = f"Failed to copy report source to execution dir {exec_path}" raise PrescError(msg) from e # Update the default JB config files based on the PRESC config options. with open(exec_path / JB_CONFIG_FILENAME, "w") as f: f.write(_updated_jb_config(run_config["report"])) # Write the inputs to the data store. ctx = Context(store_dir=exec_path) ctx.store_inputs( model=model, test_dataset=test_dataset, train_dataset=train_dataset, config=run_config, ) # Build the report. self._run_jb_build(exec_path) if tmpdir: tmpdir.cleanup() # The build should have created index.html at the `report_main_page` # path. if self.report_main_page.exists(): # Symlink the main page to the top level for convenience. try: main_page_target = self.report_main_page.relative_to( self._linked_main_page.parent) self._linked_main_page.symlink_to(main_page_target) except OSError: pass else: msg = f"The expected report main page {self.report_main_page} does not appear to exist." msg += " There may have been an error generating the report." msg += f" Output is written to {self.jb_build_log}" warnings.warn(msg)
class TrainTestSplits: """Simulation of performance across different train-test split ratios. Attributes ---------- model : The ClassificationModel to run the evaluation for. train_dataset : presc.dataset.Dataset A Dataset to use for evaluation. settings : dict An optional dict specifying option values under `evaluations.conditional_distribution`, eg. `{"computation.num_replicates": 5}`, These are restricted to the class instance and do not change the global config. config : presc.configuration.PrescConfig An optional PrescConfig instance to read options from. This will be overridden by `settings` values. """ def __init__(self, model, train_dataset, settings=None, config=None): source_config = config or global_config self._config = PrescConfig(source_config) if settings: self._config.set({"evaluations": {"train_test_splits": settings}}) self._model = model self._train_dataset = train_dataset def compute(self, metric, **kwargs): """Compute the evaluation for the given dataset column. Parameters ---------- metric : str The evaluation metric to compute for each split. This should be the name of a `sklearn.metrics` scorer function. kwargs: On-the-fly overrides to the config option values for the computation. Returns ------- TrainTestSplitsResult """ eval_config = PrescConfig(self._config) eval_config = eval_config["evaluations"]["train_test_splits"]["computation"] if kwargs: eval_config.set(kwargs) return compute_train_test_splits( dataset=self._train_dataset, classifier=self._model.classifier, metric=metric, split_size_increment=eval_config["split_size_increment"].get(float), num_replicates=eval_config["num_replicates"].get(int), random_state=eval_config["random_state"].get(int), ) def display(self): """Computes and displays the train-test splits result for each specified metric.""" for metric in self._config["evaluations"]["train_test_splits"]["metrics"].get(): metric_func = metric["function"] eval_result = self.compute(metric_func) eval_result.display_result( metric_name=metric.get("display_name", metric_func) )
class ConditionalMetric: """Computation of confusion-based metrics across subsets of a test dataset. Attributes ---------- model: The ClassificationModel to run the evaluation for. test_dataset : presc.dataset.Dataset A Dataset to use for evaluation. settings: dict An optional dict specifying option values under `evaluations.conditional_metric`, eg. `{"computation.num_bins": 5}` These are restricted to the class instance and do not change the global config. config: presc.configuration.PrescConfig An optional PrescConfig instance to read options from. This will be overridden by `settings` values. """ def __init__(self, model, test_dataset, settings=None, config=None): source_config = config or global_config self._config = PrescConfig(source_config) if settings: self._config.set({"evaluations": {"conditional_metric": settings}}) self._model = model self._test_dataset = test_dataset self._test_pred = self._model.predict_labels(test_dataset) def compute_for_column(self, colname, metric, **kwargs): """Compute the evaluation for the given dataset column. The metric is computed within unique values of the specified column (if categorical) or within bins partitioning its range (if continuous). colname : str A column in the dataset to partition on. metric : function The evaluation metric to compute across the partitions. This should be a function f(y_true, y_pred) which accepts Series of true and predicted labels. kwargs : On-the-fly overrides to the config option values for the computation. Returns ------ ConditionalMetricResult """ comp_config = PrescConfig(self._config) comp_config = comp_config["evaluations"]["conditional_metric"][ "computation"] col_overrides = comp_config["columns"][colname] try: col_overrides = col_overrides.get() except ConfigError: col_overrides = None if col_overrides: comp_config.set(col_overrides) if kwargs: comp_config.set(kwargs) return compute_conditional_metric( grouping_col=self._test_dataset.df[colname], true_labs=self._test_dataset.labels, pred_labs=self._test_pred, metric=metric, as_categorical=comp_config["as_categorical"].get(bool), num_bins=comp_config["num_bins"].get(int), quantile=comp_config["quantile"].get(bool), ) def display(self, colnames=None): """Computes and displays the conditional metric result for each specified column. Parameters ---------- colnames : list of str A list of column names to run the evaluation over, creating a plot for each. If not supplied, defaults to columns specifed in the config. metric_name : str Display name identifying the metric to show on the plot """ eval_config = self._config["evaluations"]["conditional_metric"] if colnames: incl = colnames excl = None else: incl = eval_config["columns_include"].get() excl = eval_config["columns_exclude"].get() cols = include_exclude_list(self._test_dataset.column_names, included=incl, excluded=excl) for colname in cols: metrics = _get_metrics_for_column(colname=colname, eval_config=eval_config) for metric in metrics: function = metric.get("function") display_name = metric.get("display_name") eval_result = self.compute_for_column(colname, metric=function) eval_result.display_result(xlab=colname, ylab=display_name)
def test_eval_compute_for_column(test_dataset, classification_model, config_col_override): # Defaults cde = ConditionalDistribution(classification_model, test_dataset) cdr = cde.compute_for_column("a") # Same number of bins in each group assert_array_equal( cdr.vals.groupby(["label", "predicted"]).size().unique(), 3) assert cdr.binning == "fd" assert cdr.common_bins is True # Global override global_config.set( {"evaluations.conditional_distribution.computation.binning": 2}) cde = ConditionalDistribution(classification_model, test_dataset) cdr = cde.compute_for_column("a") # Same number of bins in each group assert_array_equal( cdr.vals.groupby(["label", "predicted"]).size().unique(), 2) assert cdr.binning == 2 assert cdr.common_bins is True # Evaluation-level override # explicit settings global_config.reset_defaults() conf_default = global_config.dump() cde = ConditionalDistribution(classification_model, test_dataset, settings={"computation.binning": 4}) cdr = cde.compute_for_column("a") # Same number of bins in each group assert_array_equal( cdr.vals.groupby(["label", "predicted"]).size().unique(), 4) assert cdr.binning == 4 assert cdr.common_bins is True assert global_config.dump() == conf_default # config object + settings new_config = PrescConfig(global_config) new_config.set( {"evaluations.conditional_distribution.computation.binning": 5}) cde = ConditionalDistribution( classification_model, test_dataset, settings={"computation.common_bins": False}, config=new_config, ) cdr = cde.compute_for_column("a") assert_array_equal( cdr.vals.groupby(["label", "predicted"]).size().unique(), 5) # Bins are not all the same assert isinstance(cdr.bins.index, MultiIndex) assert cdr.binning == 5 assert cdr.common_bins is False assert global_config.dump() == conf_default # Column-specific override cde = ConditionalDistribution(classification_model, test_dataset, settings=config_col_override) cdr_a = cde.compute_for_column("a") assert cdr_a.vals.groupby(["label", "predicted"]).size().nunique() > 1 assert cdr_a.common_bins is False cdr_b = cde.compute_for_column("b") assert cdr_b.vals.groupby(["label", "predicted"]).size().nunique() == 1 assert cdr_b.common_bins is True assert global_config.dump() == conf_default # kwarg override conf_cde = cde._config.dump() cdr_a = cde.compute_for_column("a", binning=3, common_bins=False) assert_array_equal( cdr_a.vals.groupby(["label", "predicted"]).size().unique(), 3) # Bins are not all the same assert isinstance(cdr_a.bins.index, MultiIndex) assert cdr_a.binning == 3 assert cdr_a.common_bins is False cdr_b = cde.compute_for_column("b", binning=3) assert_array_equal( cdr_b.vals.groupby(["label", "predicted"]).size().unique(), 3) assert not isinstance(cdr_b.bins.index, MultiIndex) assert cdr_b.binning == 3 assert cdr_b.common_bins is True assert global_config.dump() == conf_default assert cde._config.dump() == conf_cde