Exemplo n.º 1
0
def execute_feature_selection_a_run(run, methods):
    X_train, y_train = run[0], run[2]
    arr = []
    for method in methods:
        selector = method.split('_')[0]
        n_features = int(method.split('_')[1])
        arr.append(
            fselect.run_feature_selection(selector, X_train, y_train,
                                          n_features))
    return arr
Exemplo n.º 2
0
def normal_run(data,
               n_seed=2,
               splits=5,
               methods=['infogain_10'],
               estimators=['rdforest']):
    X = np.array(data.drop('HPYLORI', axis=1))
    y = np.array(data.HPYLORI)

    if not os.path.exists('data'):
        os.makedirs('data')
    if not os.path.exists('results'):
        os.makedirs('results')

    features = np.zeros(X.shape[1])
    outer_names = create_empty_dic(estimators, methods)

    for seed in range(n_seed):
        skf = StratifiedKFold(n_splits=splits, random_state=seed, shuffle=True)
        split_num = 0
        names = create_empty_dic(estimators, methods)

        for train, test in skf.split(X, y):
            split_num += 1
            X_train, X_test = X[train], X[test]
            y_train, y_test = y[train], y[test]
            X_train, X_test = dp.impute(X_train), dp.impute(X_test)

            for s in methods:
                selector = s.split('_')[0]
                n_features = int(s.split('_')[1])
                selection = fselect.run_feature_selection(
                    selector, X_train, y_train, n_features)
                for i in selection:
                    features[i] += 1

                X_train, X_test = X_train[:, selection], X_test[:, selection]

                for estimator in estimators:

                    result = e.classify(estimator, X_train, X_test, y_train,
                                        y_test)
                    filename = data_path + estimator + s + '_' + str(
                        split_num) + '_' + str(seed)
                    names[estimator][s].append(filename)
                    result.to_csv(filename + '.csv')

        create_interim_csv(names, outer_names, seed, splits)
        delete_interim_csv(names)
    print(outer_names)
    file_list = create_final_csv(outer_names, n_seed)

    return file_list
Exemplo n.º 3
0
def execute_feature_selection_a_run(run, methods):
    # '''
    # executing feature selections of a run for the methods
    
    # Args:
    #     run(list): of  X_train, X_test, y_train, y_test
    #     methods (list): of feature selection methods

    # Returns:
    #     List: A list of the same size as methods where each item is list of features selected for that method for that run
    
    # '''
    
    
    X_train,  y_train = run[0], run[2]
    arr =[]
    for method in methods:
        selector = method.split('_')[0]
        n_features = int(method.split('_')[1])
        arr.append(fselect.run_feature_selection(selector, X_train, y_train, n_features))
    return arr