Пример #1
0
    def testNumeric(self):
        N_SAMPLES = 200

        X, y = make_classification(n_samples=N_SAMPLES, n_features=10, n_informative=2, random_state=42)
        X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)
        column_names = ['X%i' % i for i in range(10)]

        D_train = pd.DataFrame(X_train, columns=column_names)
        D_test = pd.DataFrame(X_test, columns=column_names)

        lastochka = LastochkaTransformer(verbose=True, n_final=3, n_initial=10)
        log = LogisticRegression()

        pipe = Pipeline(steps=[
                ('lastochka', lastochka),
                ('log', log)])

        pipe.fit(D_train, y_train)
        X_w = lastochka.transform(D_train)
        X_wt = lastochka.transform(D_test)

        for variable in column_names:
            vt = lastochka.get_transformer(variable)
            acceptable_values = vt.optimizer_instance.bin_stats["woe_value"]
            real_values_train = X_w[variable].unique()
            real_values_test = X_wt[variable].unique()
            self.assertTrue(set(acceptable_values) == set(real_values_train))
            self.assertTrue((set(acceptable_values)) == set(real_values_test))
Пример #2
0
 def testInputArray(self):
     X = np.random.normal(0, 1, size=(200, 3))
     y = np.random.randint(0, 2, size=200)
     lastochka = LastochkaTransformer()
     lastochka.fit(X, y)
     X_w = lastochka.transform(X)
     self.assertIsInstance(X_w, np.ndarray)
Пример #3
0
    def testVerbose(self):
        N_SAMPLES = 100
        X, y = make_classification(n_samples=N_SAMPLES, n_features=5, n_informative=2, random_state=42)
        column_names = ['X%i' % i for i in range(5)]
        X_df = pd.DataFrame(X, columns=column_names)
        X_df["CAT"] = np.random.choice(list("ABCDF"), N_SAMPLES)

        lastochka = LastochkaTransformer(verbose=False, n_final=3, n_initial=10)

        lastochka.fit(X_df, y)
Пример #4
0
    def testIncrease(self):
        _boston = load_boston()
        X = pd.DataFrame(_boston["data"], columns=_boston["feature_names"])

        y = (_boston["target"] >= np.median(_boston["target"])).astype(int)

        X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)

        lastochka = LastochkaTransformer(verbose=True, n_final=3, n_initial=10)
        log = LogisticRegression()
        slog = LogisticRegression()

        pipe = Pipeline(steps=[
                ('lastochka', lastochka),
                ('log', log)])

        pipe.fit(X_train, y_train)
        slog.fit(X_train, y_train)

        pipe_probas = pipe.predict_proba(X_test)[:, 1]
        slog_probas = slog.predict_proba(X_test)[:, 1]

        pipe_auc = roc_auc_score(y_test, pipe_probas)
        slog_auc = roc_auc_score(y_test, slog_probas)
        print("Pipe AUC: %0.5f, Log AUC: %0.5f" % (pipe_auc, slog_auc))
        self.assertGreater(pipe_auc, slog_auc)
Пример #5
0
 def testSpecial(self):
     _boston = load_boston()
     X = pd.DataFrame(_boston["data"], columns=_boston["feature_names"])
     y = (_boston["target"] >= np.median(_boston["target"])).astype(int)
     specs = {"PTRATIO": [20.2, 14.7]}
     lastochka = LastochkaTransformer(verbose=True, n_final=3, n_initial=10, specials=specs)
     lastochka.fit(X, y)
     lastochka.transform(X)
     self.assertTrue(list(lastochka.get_transformer("PTRATIO").specials_stats.keys()) == specs["PTRATIO"])
Пример #6
0
 def testMissing(self):
     _boston = load_boston()
     X = pd.DataFrame(_boston["data"], columns=_boston["feature_names"])
     np.random.seed(2)
     indexes = np.random.choice(X.index.tolist(), 200)
     X.loc[indexes, "ZN"] = np.nan
     y = (_boston["target"] >= np.median(_boston["target"])).astype(int)
     lastochka = LastochkaTransformer(verbose=True, n_final=3, n_initial=10)
     lastochka.fit(X, y)
     lastochka.transform(X)
     self.assertTrue(lastochka.get_transformer("ZN").missing_woe_value is not None)
Пример #7
0
 def testCategory(self):
     _boston = load_boston()
     X = pd.DataFrame(_boston["data"], columns=_boston["feature_names"])
     X["RAD_CAT"] = X["RAD"].astype(str)
     X = X.drop("RAD", axis=1)
     y = (_boston["target"] >= np.median(_boston["target"])).astype(int)
     lastochka = LastochkaTransformer(verbose=True, n_final=3, n_initial=10)
     lastochka.fit(X, y)
     lastochka.transform(X)
     optimizer_instance = lastochka.get_transformer("RAD_CAT").optimizer_instance
     self.assertIsInstance(optimizer_instance, CategoryOptimizer)
Пример #8
0
 def testEmpty(self):
     _X = pd.DataFrame(columns=["X1,X2"])
     _y = np.array([])
     lastochka = LastochkaTransformer()
     self.assertRaises(ValueError, lastochka.fit, X=_X, y=_y)
Пример #9
0
 def testInputList(self):
     X = [[1, 2, 3], [4, 5, 6]]
     y = [0, 1]
     lastochka = LastochkaTransformer()
     self.assertRaises(TypeError, lastochka.fit, X=X, y=y)
Пример #10
0
 def testNonBinary(self):
     X = np.random.normal(0, 1, size=(200, 3))
     y = np.random.randint(0, 3, size=200)
     lastochka = LastochkaTransformer()
     self.assertRaises(ValueError, lastochka.fit, X=X, y=y)