示例#1
0
def main():
    # Enter path to higgs dataset
    dataset_reader = DatasetReader("/scratch/cpillsb1/cs66/data/")

    X, y, X_final, y_final, dataset = dataset_reader.load_cancer()

    # uncomment for higgs
    # X, y, X_final, y_final, dataset = dataset_reader.load_higgs()

    skf = StratifiedKFold(y, n_folds=4, shuffle=True, random_state=42)

    ii = 0
    for train, test in skf:
        x_train = X[train]
        x_test = X[test]

        y_train = y[train]
        y_test = y[test]
        nums = [5, 10, 30, 50]
        layer = Layer(ExtraTreeClassifier, {"max_depth": 1}, x_train, y_train,
                      nums[ii])
        predictions = layer.predictAll(x_train)
        lr = Layer(LogisticRegression, {
            "n_jobs": -1,
            "max_iter": 1000
        }, predictions, y_train, 1)
        network = Network([layer, lr])

        evaluate_test(network, X_final, y_final, nums[ii], dataset)

        ii += 1
示例#2
0
def main():

    dataset_reader = DatasetReader("/scratch/cpillsb1/cs66/data/")

    X, y, X_final, y_final, dataset = dataset_reader.load_cancer()

    # uncomment for higgs
    # X, y, X_final, y_final, dataset = dataset_reader.load_higgs()

    skf = StratifiedKFold(y, n_folds=4, shuffle=True, random_state=42)

    dtree_params = [1, 5, 30, 50]

    ii = 0
    best_acc = 0

    for train, test in skf:
        x_train = X[train]
        x_test = X[test]

        y_train = y[train]
        y_test = y[test]

        clf = DecisionTreeClassifier(max_depth=dtree_params[ii],
                                     max_features=1.0,
                                     random_state=42)
        clf.fit(x_train, y_train)

        predictions = clf.predict(x_test)
        acc = 0.0
        for i, prediction in enumerate(predictions):
            if prediction == y_test[i]:
                acc += 1
        acc /= len(predictions)

        if acc > best_acc:
            best_classifier = clf
            best_depth = dtree_params[ii]
            best_acc = acc

        ii += 1

    evaluate_test(best_classifier, X_final, y_final, best_depth, dataset)