Exemplo n.º 1
0
    def test_cross_val_stack(self):
        x, y = DataGenerator.get_digits_data()

        # In order to obtain some categorical columns
        x['i63'] = x['i63'].map(str)
        x['i62'] = x['i62'].map(str)
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.33,
                                                            random_state=42)
        dic = {}
        x_shadow = x_train.copy()

        x_train.loc[:, 'source'] = 0
        x_shadow.loc[:, 'source'] = 1
        x_all = pd.concat([x_train, x_shadow])
        shadow_selector = x_all['source'] == 0
        ChaosGeneration.chaos_feature_importance(x_all,
                                                 y_train,
                                                 shadow_selector,
                                                 feat_dic=dic,
                                                 feat_iter=10,
                                                 nb_features=20,
                                                 chaos_gen_iter=30)
        sorted_x = sorted(dic.items(), key=operator.itemgetter(1))
        self.assertGreater(len(dic), len(x_train.columns))
        self.assertGreater(len(dic), len(x_shadow.columns))
Exemplo n.º 2
0
def chaosize(x_feat, x_train1, x_valid, y_feat, **chaos_args):
    x_feat.loc[:, 'source'] = 0
    x_train1.loc[:, 'source'] = 1
    x_valid.loc[:, 'source'] = 2
    x_all = pd.concat([x_feat, x_train1, x_valid])
    Cg.chaos_feature_importance(x_all, y_feat, x_all['source'] == 0, **chaos_args)
    x_train_res = x_all[x_all['source'] == 1].drop(['source'], axis=1)
    x_valid_res = x_all[x_all['source'] == 2].drop(['source'], axis=1)
    return x_train_res, x_valid_res
Exemplo n.º 3
0
def chaosize(x_feat, x_mirror, x_valid, y_feat, **chaos_args):
    x_feat.loc[:, 'source'] = 0
    x_mirror.loc[:, 'source'] = 1
    x_valid.loc[:, 'source'] = 2
    x_all = pd.concat([x_feat, x_mirror, x_valid])
    Cg.chaos_feature_importance(x_all, y_feat, x_all['source'] == 0,
                                **chaos_args)
    x_train_res = x_all[x_all['source'] == 1].drop(['source'], axis=1)
    x_valid_res = x_all[x_all['source'] == 2].drop(['source'], axis=1)
    return x_train_res, x_valid_res
    def test_cross_val_stack_none(self):
        x, y = DataGenerator.get_digits_data()

        # In order to obtain some categorical columns
        x["i63"] = x["i63"].map(str)
        x["i62"] = x["i62"].map(str)
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
        dic = {}

        x_train.loc[:, "source"] = 0
        shadow_selector = x_train["source"] == 0
        ChaosGeneration.chaos_feature_importance(
            x_train, y_train, shadow_selector, feat_dic=dic, feat_iter=10, nb_features=20, chaos_gen_iter=30
        )
        sorted_x = sorted(dic.items(), key=operator.itemgetter(1))
        self.assertGreater(len(dic), len(x_train.columns))
Exemplo n.º 5
0
    def test_handle_nocategoric_nonreg(self):
        x, y = DataGenerator.get_digits_data()

        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.33,
                                                            random_state=42)
        dic = {}

        x_train.loc[:, 'source'] = 0
        shadow_selector = x_train['source'] == 0
        ChaosGeneration.chaos_feature_importance(x_train,
                                                 y_train,
                                                 shadow_selector,
                                                 feat_dic=dic,
                                                 feat_iter=10,
                                                 nb_features=20,
                                                 chaos_gen_iter=30)
        self.assertGreater(len(dic), len(x_train.columns))