Exemplo n.º 1
0
    def test_sensitivity_functions(self, is_classification=False):
        sensitivity = SensitivityAnalysis()
        model = self.get_model(is_classification)

        train = self.get_data(is_classification)
        model.fit(train.values, train.target.ravel())

        test = self.get_data(is_classification, seed=1)

        shuffled = sensitivity.sensitivity_analysis(
            model=model,
            metric=Metrics.mae.name,
            dmd_test=test,
            method=SensitivityTypes.shuffled,
            raw_scores=False)

        missing = sensitivity.sensitivity_analysis(
            model=model,
            metric=Metrics.mae.name,
            dmd_test=test,
            method=SensitivityTypes.missing,
            raw_scores=False)

        stats = sensitivity._sensitivity_stats_report(shuffled.sensitivities)
        n_features = stats.n_features
        n_zero = stats.n_zero
        n_very_low = stats.n_very_low
        n_low = stats.n_low

        leakage_score = sensitivity._leakage(n_features=n_features,
                                             n_very_low=n_very_low,
                                             n_zero=n_zero)

        self.assertGreater(leakage_score, 0)
        self.assertLessEqual(leakage_score, 1)

        overfit_score = sensitivity._too_many_features(n_features=n_features,
                                                       n_very_low=n_very_low,
                                                       n_low=n_low,
                                                       n_zero=n_zero)

        self.assertGreaterEqual(overfit_score, 0)
        self.assertLessEqual(overfit_score, 1)

        imputation_score = sensitivity._imputation_score(shuffled=shuffled,
                                                         missing=missing)
        self.assertGreaterEqual(imputation_score, 0)
        self.assertLessEqual(imputation_score, 1)

        report = sensitivity._vulnerability_report(
            shuffled_sensitivity=shuffled, missing_sensitivity=missing)
        self.assertTrue(0 <= report.imputation <= 1)
        self.assertTrue(0 <= report.leakage <= 1)
        self.assertTrue(0 <= report.too_many_features <= 1)
Exemplo n.º 2
0
    def test_sensitivity_impact_regression(self, is_classification=False):
        sensitivity = SensitivityAnalysis()
        model = self.get_model(is_classification)

        train = self.get_data(is_classification)
        model.fit(train.values, train.target.ravel())

        test = self.get_data(is_classification, seed=1)

        scores = sensitivity.sensitivity_analysis(
            model=model,
            metric=Metrics.mae.name,
            dmd_test=test,
            method=SensitivityTypes.missing,
            raw_scores=False)

        scores = scores.sensitivities
        print(scores)
        self.assertTrue(isinstance(scores, dict))
        self.assertEqual(len(scores), test.n_features)
        self.assertGreaterEqual(
            numpy.round(sum([v for v in scores.values()]), 6), 1 - 1e-5)
        self.assertEqual(scores['f_1'], 0)
        self.assertGreaterEqual(scores['f_0'], 2 / len(scores))

        o_scores = [v for k, v in scores.items() if k not in ['f_0', 'f_1']]
        self.assertLessEqual(numpy.std(o_scores), 0.05)
        self.assertGreaterEqual(scores['f_0'], numpy.mean(o_scores))
Exemplo n.º 3
0
    def test_sensitivity_raw_shuffled_classification(self,
                                                     is_classification=True):
        sensitivity = SensitivityAnalysis()
        model = self.get_model(is_classification)

        train = self.get_data(is_classification)
        model.fit(train.values, train.target.ravel())

        test = self.get_data(is_classification, seed=1)

        raw_scores = sensitivity.sensitivity_analysis(
            model=model,
            metric=Metrics.recall.name,
            dmd_test=test,
            method=SensitivityTypes.shuffled,
            raw_scores=True)

        raw_scores = raw_scores.sensitivities
        self.assertTrue(isinstance(raw_scores, dict))
        self.assertEqual(len(raw_scores), test.n_features)
        self.assertLessEqual(raw_scores['f_0'], 0.5)
        self.assertEqual(raw_scores['f_1'], 1.0)
        self.assertLessEqual(max([v for v in raw_scores.values()]), 1.0)

        scores = [v for k, v in raw_scores.items() if k not in ['f_0', 'f_1']]
        self.assertLessEqual(numpy.std(scores), 0.05)
Exemplo n.º 4
0
class CovarianceShift():
    def __init__(self):
        self._separation_quality = None
        self._cov_train = None
        self._cov_test = None
        self._classifier = None
        self._sensitivity = None

        self._dmd_train = None
        self._dmd_test = None

    def calc_covariance_shift(self, dmd_train: DMD, dmd_test: DMD):
        # save data for later report
        self._dmd_train = dmd_train
        self._dmd_test = dmd_test

        # split data to new train / test setes
        self._cov_train, self._cov_test = CovarianceShiftCalculator.prepare_dataset_for_score_quality(dmd_train=dmd_train, dmd_test=dmd_test)
        self._classifier = CovarianceShiftCalculator.prepare_estimator(train=self._cov_train)
        self._covariance_shift = CovarianceShiftCalculator.calc_convriance_shift_auc(classifier=self._classifier, test=self._cov_test)

    def covariance_shift_report(self):
        medium_lvl = 0.7
        high_lvl = 0.95
        if self.covariance_shift > medium_lvl:
            sensitivity_report=self.calc_sensitivity_report()
        else:
            sensitivity_report=None

        return CovarianceShiftReport(covariance_shift=self.covariance_shift,
                                     sensitivity_report=sensitivity_report,
                                     medium_lvl=medium_lvl, high_lvl=high_lvl,
                                     train=self._dmd_train,
                                     test=self._dmd_test)

    def calc_sensitivity_report(self):
        try:
            from pytolemaic.analysis_logic.model_analysis.sensitivity.sensitivity import SensitivityAnalysis
        except:
            logging.exception("Failed to import SensitivityAnalysis")
            return None

        self._sensitivity = SensitivityAnalysis()
        sensitivity_report = self._sensitivity.sensitivity_analysis(model=self.classifier, dmd_train=self.train, dmd_test=self.test,
                                               metric=Metrics.auc.name)
        return sensitivity_report

    @property
    def sensitivity(self):
        return self._sensitivity

    @property
    def separation_quality(self):
        return 1-self._covariance_shift

    @property
    def covariance_shift(self):
        return self._covariance_shift

    @property
    def train(self):
        return self._cov_train

    @property
    def test(self):
        return self._cov_test

    @property
    def classifier(self):
        return self._classifier