Exemplo n.º 1
0
    def transform_regression(self, c_config, d_config):
        #step1
        test_continuous_df = self.test_df[self.continuous_df_col]
        test_discrete_df = self.test_df[self.discrete_df_col]

        ag = AutoGenerator(test_continuous_df, None, c_config, None,
                           self.clf_obj, self.metric)
        new_add = {}
        for col, transform_method in self.c_transform_methods.items():
            new_add[col] = ag.restore_ind(transform_method, test_continuous_df)
        for col, new_add_value in new_add.items():
            test_continuous_df[col] = new_add_value

        #step2
        for col, bagging_method in self.bagging_methods.items():
            test_discrete_df["bagging{}".format(col)] = bagging_method(
                test_continuous_df[col].values)

        ag = AutoGenerator(test_discrete_df, None, d_config, None,
                           self.clf_obj, self.metric)
        new_add = {}
        for col, transform_method in self.d_transform_methods.items():
            new_add[col] = ag.restore_ind(transform_method, test_discrete_df)
        for col, new_add_value in new_add.items():
            test_discrete_df[col] = new_add_value

        test_df = pd.concat([test_discrete_df, test_continuous_df], axis=1)

        test_df = get_dummy(test_df, self.dummy_candidate_col)
        test_df = remove_same(test_df)
        test_df.to_csv("middle2.csv", index=False)

        #step3
        new_test_df = remove_same(test_df)
        return new_test_df
Exemplo n.º 2
0
    def __transform_classification(self, c_config, d_config, df):
        target = pd.DataFrame()
        if self.target_label in df.columns:
            target = pd.DataFrame(df[self.target_label])
            df = df.drop(self.target_label, axis=1)
        #step1

        test_continuous_df = df[self.continuous_df_col]
        test_discrete_df = df[self.discrete_df_col]

        ag = AutoGenerator(test_continuous_df, None, c_config, None,
                           self.clf_obj, self.metric)
        new_add = {}
        for col, transform_method in self.c_transform_methods.items():
            new_add[col] = ag.restore_ind(transform_method, test_continuous_df)
        for col, new_add_value in new_add.items():
            test_continuous_df[col] = new_add_value

        #step2
        for col, bagging_method in self.bagging_methods.items():
            test_discrete_df["bagging{}".format(col)] = bagging_method(
                test_continuous_df[col].values)

        ag = AutoGenerator(test_discrete_df, None, d_config, None,
                           self.clf_obj, self.metric)
        new_add = {}
        for col, transform_method in self.d_transform_methods.items():
            new_add[col] = ag.restore_ind(transform_method, test_discrete_df)
        for col, new_add_value in new_add.items():
            test_discrete_df[col] = new_add_value

        test_df = pd.concat([test_discrete_df, test_continuous_df], axis=1)

        test_df = get_dummy(test_df, self.dummy_candidate_col)
        test_df = remove_same(test_df)
        test_df.to_csv("middle2.csv", index=False)

        #step3
        #new_test_df = pd.DataFrame()
        #for col in self.select_col:
        #    if col in test_df.columns:
        #        new_test_df[col] = test_df[col].values
        new_test_df = remove_same(test_df)

        if len(target) != 0:
            new_test_df = pd.concat([new_test_df, target], axis=1)
        return new_test_df
Exemplo n.º 3
0
    def pre_select2(self, train_df, test_df, target_label):
        fs = FeatureSelection()
        train_df, select_col = fs.run(train_df, target_label, 150)

        new_test_df = pd.DataFrame()
        for col in select_col:
            if col in test_df.columns:
                new_test_df[col] = test_df[col].values
        new_test_df = remove_same(new_test_df)
        return train_df, new_test_df
Exemplo n.º 4
0
    def fit_classification(self, c_config, d_config):
        target = self.train_df[self.target_label]
        feature = self.train_df.drop(self.target_label, axis=1)

        #step1
        discrete_df, continuous_df = ColType(feature).run()
        print("cshape")
        print(continuous_df.shape)
        print(discrete_df.shape)
        self.continuous_df_col = continuous_df.columns
        self.discrete_df_col = discrete_df.columns

        #step2
        print("begin generate")
        continuous_df = pd.concat([continuous_df, target], axis=1)
        ag = AutoGenerator(continuous_df, self.target_label, c_config, feature,
                           self.clf_obj, self.metric)
        new_add_cols, transform_methods = ag.run(popsize=50,
                                                 matepb=0.7,
                                                 mutpb=0.2,
                                                 gensize=20,
                                                 selectsize=100,
                                                 kbest=50,
                                                 direction=self.direction)
        for i in range(len(new_add_cols)):
            col_name = "new{}".format(i)
            continuous_df[col_name] = new_add_cols[i]
            self.c_transform_methods[col_name] = transform_methods[i]

        #step3
        lb = LoopBagging(continuous_df, self.target_label)
        new_add_bagging = lb.run()
        print("new add baggung")
        print(new_add_bagging)
        for col, value in new_add_bagging.items():
            col_name = "bagging{}".format(col)
            discrete_df[col_name] = value[2]
            self.bagging_methods[col] = value[0]

        discrete_df = pd.concat([discrete_df, target], axis=1)
        ag = AutoGenerator(discrete_df, self.target_label, d_config, feature,
                           self.clf_obj, self.metric)
        new_add_cols, transform_methods = ag.run(popsize=50,
                                                 matepb=0.7,
                                                 mutpb=0.2,
                                                 gensize=20,
                                                 selectsize=100,
                                                 kbest=50,
                                                 direction=self.direction)
        for i in range(len(new_add_cols)):
            col_name = "newd{}".format(i)
            discrete_df[col_name] = new_add_cols[i]
            self.d_transform_methods[col_name] = transform_methods[i]

        #step4
        train_df = pd.concat([discrete_df, continuous_df], axis=1)
        train_df = get_dummy(train_df, self.dummy_candidate_col)

        train_df = remove_same(train_df)
        train_df.to_csv("/tmp/middle.csv", index=False)
        #        fs = FeatureSelection()
        #        train_df, self.select_col = fs.run(train_df, self.target_label, self.final_feature_number)

        #        train_df_without_target = train_df.drop(target_label,axis=1)
        #        train_df_without_target_with_dummy, self.dummy_col = get_dummy(train_df_without_target)
        #        train_df = pd.concat([train_df_without_target_with_dummy, target],axis=1)

        #step
        #        train_df = pre_select(train_df, target_label)
        #
        #        #step5
        #        myas = AutoSelection(train_df, target_label)
        #        myas.run(pop_num=100, cxpb=0.6, mutpb=0.2, gen_num=10)
        #        train_df, select_col=myas.get_best()
        train_df = remove_same(train_df)

        return train_df