def main(): dataset_reader = DatasetReader("/scratch/cpillsb1/cs66/data/") # uncomment for cancer # X, y, X_final, y_final, dataset = dataset_reader.load_cancer() X, y, X_final, y_final, dataset = dataset_reader.load_higgs() skf = StratifiedKFold(y, n_folds=4, shuffle=True, random_state=42) ii = 0 for train, test in skf: x_train = X[train] x_test = X[test] y_train = y[train] y_test = y[test] nums = [5, 10, 30, 50] layer = Layer(RandomForestClassifier, { "max_depth": 1, "n_estimators": nums[ii] }, x_train, y_train, 10) predictions = layer.predictAll(x_train) lr = Layer(LogisticRegression, { "n_jobs": -1, "max_iter": 1000 }, predictions, y_train, 1) network = Network([layer, lr]) evaluate_test(network, X_final, y_final, nums[ii], dataset) ii += 1
def main(): dataset_reader = DatasetReader("/scratch/cpillsb1/cs66/data/") # uncomment for cancer # X, y, X_final, y_final, dataset = dataset_reader.load_cancer() X, y, X_final, y_final, dataset = dataset_reader.load_higgs() input_s = (30,) batch_size = 25 classes = 2 num_nodes = [5,10,30,50] skf = StratifiedKFold(y, n_folds=4, shuffle = True, random_state=42) best_acc = 0 ii = 0 for train,test in skf: x_train = X[train] x_test = X[test] y_train = y[train] y_test = y[test] y_train = to_categorical(y_train, classes) y_test = to_categorical(y_test, classes) neural_net = Sequential() neural_net.add(Dense(num_nodes[ii], activation='sigmoid', input_shape = input_s, kernel_initializer="TruncatedNormal")) neural_net.add(Dropout(.01)) neural_net.add(Dense(2, activation='softmax')) neural_net.compile(optimizer="RMSProp", loss = 'binary_crossentropy', metrics = ['accuracy']) neural_net.fit(x_train, y_train, batch_size = batch_size, epochs = 100, verbose = 0, validation_data = (x_test, y_test)) predictions = neural_net.predict(x_test) predictions = [round(x[1]) for x in predictions] y_test = [x[1] for x in y_test] acc = 0.0 for i, prediction in enumerate(predictions): if prediction == y_test[i]: acc += 1 acc /= len(predictions) if acc > best_acc: best_classifier = neural_net best_num_nodes = num_nodes[ii] best_acc = acc ii += 1 evaluate_test(best_classifier, X_final, y_final, best_num_nodes, dataset)