def main(): # Enter path to higgs dataset dataset_reader = DatasetReader("/scratch/cpillsb1/cs66/data/") X, y, X_final, y_final, dataset = dataset_reader.load_cancer() # uncomment for higgs # X, y, X_final, y_final, dataset = dataset_reader.load_higgs() skf = StratifiedKFold(y, n_folds=4, shuffle=True, random_state=42) ii = 0 for train, test in skf: x_train = X[train] x_test = X[test] y_train = y[train] y_test = y[test] nums = [5, 10, 30, 50] layer = Layer(ExtraTreeClassifier, {"max_depth": 1}, x_train, y_train, nums[ii]) predictions = layer.predictAll(x_train) lr = Layer(LogisticRegression, { "n_jobs": -1, "max_iter": 1000 }, predictions, y_train, 1) network = Network([layer, lr]) evaluate_test(network, X_final, y_final, nums[ii], dataset) ii += 1
def main(): dataset_reader = DatasetReader("/scratch/cpillsb1/cs66/data/") X, y, X_final, y_final, dataset = dataset_reader.load_cancer() # uncomment for higgs # X, y, X_final, y_final, dataset = dataset_reader.load_higgs() skf = StratifiedKFold(y, n_folds=4, shuffle=True, random_state=42) dtree_params = [1, 5, 30, 50] ii = 0 best_acc = 0 for train, test in skf: x_train = X[train] x_test = X[test] y_train = y[train] y_test = y[test] clf = DecisionTreeClassifier(max_depth=dtree_params[ii], max_features=1.0, random_state=42) clf.fit(x_train, y_train) predictions = clf.predict(x_test) acc = 0.0 for i, prediction in enumerate(predictions): if prediction == y_test[i]: acc += 1 acc /= len(predictions) if acc > best_acc: best_classifier = clf best_depth = dtree_params[ii] best_acc = acc ii += 1 evaluate_test(best_classifier, X_final, y_final, best_depth, dataset)