else: L, perm = build_laplacian(k) x, y = get_data(k) kf = KFold(n_splits=5, random_state=0) final_accuracy = 0 train_accuracy = 0 all_loss, all_acc = 0, 0 for train_index, test_index in kf.split(x): x_train, y_train, x_test, y_test = x[train_index], y[train_index], x[test_index], y[test_index] scaler = MinMaxScaler() scaler.fit(x_train) x_train = scaler.transform(x_train) x_test = scaler.transform(x_test) x_train = coarsening.perm_data(x_train, perm) x_test = coarsening.perm_data(x_test, perm) model = models.cgcnn(L, **build_params()) accuracy, loss, t_step = model.fit(x_train, y_train, x_test, y_test) # all_acc += accuracy[-1] # all_loss += loss[-1] final_accuracy += accuracy[-1] train_accuracy += model.evaluate(x_train, y_train)[1] final_accuracy /= 5 train_accuracy /= 5 all_loss /= 5 all_acc /= 5 print("final accuracy:", final_accuracy) print("training accuracy:", train_accuracy) fig, ax1 = plt.subplots(figsize=(15, 5)) ax1.plot(accuracy, 'b.-') ax1.set_ylabel('validation accuracy', color='b') ax2 = ax1.twinx()
params['F'] = [32, 64] # Number of graph convolutional filters. params['K'] = [20, 20] # Polynomial orders. params['p'] = [4, 2] # Pooling sizes. params['M'] = [512, C] # Output dimensionality of fully connected layers. # Optimization. params['regularization'] = 5e-4 params['dropout'] = 1 params['learning_rate'] = 1e-3 params['decay_rate'] = 0.95 params['momentum'] = 0.9 params['decay_steps'] = n_train / params['batch_size'] # In[ ]: model = models.cgcnn(L, **params) accuracy, loss, t_step = model.fit(X_train, y_train, X_val, y_val) # # 4 Evaluation # # We often want to monitor: # 1. The convergence, i.e. the training loss and the classification accuracy on the validation set. # 2. The performance, i.e. the classification accuracy on the testing set (to be compared with the training set accuracy to spot overfitting). # # The `model_perf` class in [utils.py](utils.py) can be used to compactly evaluate multiple models. # In[ ]: fig, ax1 = plt.subplots(figsize=(15, 5)) ax1.plot(accuracy, 'b.-') ax1.set_ylabel('validation accuracy', color='b')
common['momentum'] = 0 common['F'] = [2, 2] common['K'] = [1,1] common['p'] = [1,1] common['M'] = [512, C] # Architecture of TF MNIST conv model (LeNet-5-like). # Changes: regularization, dropout, decaying learning rate, momentum optimizer, stopping condition, size of biases. # Differences: training data randomization, init conv1 biases at 0. if True: name = 'fgconv_fgconv_fc_softmax' # 'Non-Param' params = common.copy() params['dir_name'] += name params['filter'] = 'fourier' params['K'] = [L[0].shape[0], L[2].shape[0]] model_perf.test(models.cgcnn(L, **params), name, params, train_data, train_labels, val_data, val_labels, test_data, test_labels) if True: name = 'sgconv_sgconv_fc_softmax' # 'Spline' params = common.copy() params['dir_name'] += name params['filter'] = 'spline' model_perf.test(models.cgcnn(L, **params), name, params, train_data, train_labels, val_data, val_labels, test_data, test_labels) ''' if True: name = 'cgconv_cgconv_fc_softmax' # 'Chebyshev' params = common.copy() params['dir_name'] += name params['filter'] = 'chebyshev5'
common['dropout'] = 1 common['learning_rate'] = .005 common['decay_rate'] = 0.95 common['momentum'] = 0 common['F'] = [1] common['K'] = [1] common['p'] = [2] common['M'] = [1024, C] if True: name = 'Run1' params = common.copy() params['dir_name'] += name # params['filter'] = 'chebyshev5' params['filter'] = 'chebyshev2' params['brelu'] = 'b1relu' model_perf.test(models.cgcnn(L, **params), name, params, Train_Data, Train_Label, Val_Data, Val_Label, Test_Data, Test_Label) model_perf.show() if False: grid_params = {} data = (train_data, train_labels, val_data, val_labels, test_data, test_labels) utils.grid_search(params, grid_params, *data, model=lambda x: models.cgcnn(L, **x))
common['dropout'] = 0.9 common['is_training'] = True #batch normalization common['learning_rate'] = [0.02, 0.002] common['boundaries'] = [int(2 / 3 * step_num)] print('boundaries is {}'.format(common['boundaries'])) common['momentum'] = 0.9 common['F'] = [32, 64] common['K'] = [25, 25] common['p'] = [4, 4] common['M'] = [512, C] out_path = 'lr{}{}_bou{}_ep{}_bat{}_reg{}_drp{}_BN_mnist_TTT_logk10_K25'.format( common['learning_rate'][0], common['learning_rate'][1], common['boundaries'][0], common['num_epochs'], common['batch_size'], common['regularization'], common['dropout']) #file_out = open(out_path, 'a') file_out = None if True: name = out_path params = common.copy() params['dir_name'] += name params['filter'] = 'chebyshev5' model_perf.test( models.cgcnn(L, graphs_adjacency, index, z_xyz[-1], file_out, **params), name, params, train_data, train_labels, val_data, val_labels, test_data, test_labels, file_out) file_out.close()
def main(): createFolder('Result') config_file = sys.argv[1] with open(config_file, 'r') as f: config = yaml.load(f) PPI_data = config["PPI_data"] Response_data = config["Response_data"] Gene_data = config["Gene_data"] n_fold = config["n_fold"] test_size = config["test_size"] num_epochs = config["num_epochs"] batch_size = config["batch_size"] brelu = config["brelu"] pool = config["pool"] regularization = config["regularization"] dropout = config["dropout"] learning_rate = config["learning_rate"] decay_rate = config["decay_rate"] momentum = config["momentum"] Name = config["Name"] F = config["F"] K = config["K"] p = config["p"] M = config["M"] data_PPI = pd.read_csv(PPI_data) data_PPI.drop(['Unnamed: 0'], axis='columns', inplace=True) data_IC50 = pd.read_csv(Response_data) data_IC50.drop(['Unnamed: 0'], axis='columns', inplace=True) data_Gene = pd.read_csv(Gene_data) data_Gene.drop(['Unnamed: 0'], axis='columns', inplace=True) data_Gene = np.array(data_Gene) df = np.array(data_PPI) A = coo_matrix(df, dtype=np.float32) print(A.nnz) graphs, perm = coarsening.coarsen(A, levels=6, self_connections=False) L = [graph.laplacian(A, normalized=True) for A in graphs] graph.plot_spectrum(L) n_fold = n_fold PCC = [] SPC = [] RMSE = [] X_train, X_test, Y_train, Y_test = train_test_split(data_Gene, data_IC50, test_size=test_size, shuffle=True, random_state=20) for cv in range(n_fold): Y_pred = np.zeros([Y_test.shape[0], Y_test.shape[1]]) Y_test = np.zeros([Y_test.shape[0], Y_test.shape[1]]) j = 0 for i in range(Y.test.shape[1]): data1 = data_IC50.iloc[:, i] data1 = np.array(data1) data_minmax = data1[~np.isnan(data1)] min = data_minmax.min() max = data_minmax.max() data1 = (data1 - min) / (max - min) train_data_split, test_data_split, train_labels_split, test_labels_split = train_test_split( data_Gene, data1, test_size=test_size, shuffle=True, random_state=20) train_data = np.array( train_data_split[~np.isnan(train_labels_split)]).astype( np.float32) list_train, list_val = Validation(n_fold, train_data, train_labels_split) train_data_V = train_data[list_train[cv]] val_data = train_data[list_val[cv]] test_data = np.array(test_data_split[:]).astype(np.float32) train_labels = np.array( train_labels_split[~np.isnan(train_labels_split)]).astype( np.float32) train_labels_V = train_labels[list_train[cv]] val_labels = train_labels[list_val[cv]] test_labels = np.array(test_labels_split[:]).astype(np.float32) train_data_V = coarsening.perm_data(train_data_V, perm) val_data = coarsening.perm_data(val_data, perm) test_data = coarsening.perm_data(test_data, perm) common = {} common['num_epochs'] = num_epochs common['batch_size'] = batch_size common['decay_steps'] = train_data.shape[0] / common['batch_size'] common['eval_frequency'] = 10 * common['num_epochs'] common['brelu'] = brelu common['pool'] = pool common['regularization'] = regularization common['dropout'] = dropout common['learning_rate'] = learning_rate common['decay_rate'] = decay_rate common['momentum'] = momentum common['F'] = F common['K'] = K common['p'] = p common['M'] = M if True: name = Name params = common.copy() model = models.cgcnn(L, **params) loss, t_step = model.fit(train_data_V, train_labels_V, val_data, val_labels) Y_pred[:, j] = model.predict(test_data) Y_test[:, j] = test_labels j = j + 1 np.savez(('Result/GraphCNN_CV_{}'.format(cv)), Y_true=Y_test, Y_pred=Y_pred)
model_perf = utils.model_perf() if True: name = 'softmax' params = common.copy() params['dir_name'] += name params['regularization'] = 0 params['dropout'] = 1 params['learning_rate'] = 1e3 params['decay_rate'] = 0.95 params['momentum'] = 0.9 params['F'] = [] params['K'] = [] params['p'] = [] params['M'] = [C] model_perf.test(models.cgcnn(L, **params), name, params, train_data, train_labels, val_data, val_labels, test_data, test_labels) if True: name = 'fc_softmax' params = common.copy() params['dir_name'] += name params['regularization'] = 0 params['dropout'] = 1 params['learning_rate'] = 0.1 params['decay_rate'] = 0.95 params['momentum'] = 0.9 params['F'] = [] params['K'] = [] params['p'] = [] params['M'] = [2500, C]
common['decay_steps'] = 17.7 # * common['num_epochs'] since not used use as in momentum common['eval_frequency'] = 10 * common['num_epochs'] common['brelu'] = 'b1relu' common['pool'] = 'apool1' common['regularization'] = 0 common['dropout'] = 1 common['learning_rate'] = .005 common['decay_rate'] = 0.95 common['momentum'] = 0 common['F'] = [1] common['K'] = [1] common['p'] = [2] common['M'] = [1024,C] name = 'Run1' ## Copy common to params, adds filterparameter and rewrites brelu and dir_name params = common.copy() params['dir_name'] += name # params['filter'] = 'chebyshev5' params['filter'] = 'chebyshev2' params['brelu'] = 'b1relu' model_perf.test(models.cgcnn(L, **params), name, params, Train_Data, Train_Label, Val_Data, Val_Label, Test_Data, Test_Label) model_perf.show() ## Grid search if False: grid_params = {} data = (train_data, train_labels, val_data, val_labels, test_data, test_labels) utils.grid_search(params, grid_params, *data, model=lambda x: models.cgcnn(L,**x)) print(sys.argv[1])
name = 'sgconv_softmax' params = common.copy() params['dir_name'] += name params['filter'] = 'spline' # model_perf.test(models.cgcnn(L, **params), name, params, # train_data, train_labels, val_data, val_labels, test_data, test_labels) # With 'chebyshev2' and 'b2relu', it corresponds to cgcnn2_2(L[0], F=10, K=20). if True: name = 'cgconv_softmax' params = common.copy() params['dir_name'] += name params['filter'] = 'chebyshev5' # params['filter'] = 'chebyshev2' # params['brelu'] = 'b2relu' model_perf.test(models.cgcnn(L, **params), name, params, train_data, train_labels, val_data, val_labels, test_data, test_labels) # Common hyper-parameters for LeNet5-like networks. common['regularization'] = 5e-4 common['dropout'] = 0.5 common['learning_rate'] = 0.02 # 0.03 in the paper but sgconv_sgconv_fc_softmax has difficulty to converge common['decay_rate'] = 0.95 common['momentum'] = 0.9 common['F'] = [32, 64] common['K'] = [25, 25] common['p'] = [4, 4] common['M'] = [512, C] # Architecture of TF MNIST conv model (LeNet-5-like). # Changes: regularization, dropout, decaying learning rate, momentum optimizer, stopping condition, size of biases.
def cross_validate_convNN(X, y, adjacency, name_param, value_param, k, num_levels=5): split_index = split_test_train_for_cv(X.shape[0], k_fold=k) graphs, perm = coarsening.coarsen(sp.csr_matrix( adjacency.astype(np.float32)), levels=num_levels, self_connections=False) accuracy = [] loss = [] for param_val in value_param: accuracy_param = [] loss_param = [] for k_ in range(k): test_samples = split_index[k_] train_samples = split_index[~( np.arange(split_index.shape[0]) == k_)].flatten() X_train = X[train_samples] X_test = X[test_samples] y_train = y[train_samples] y_test = y[test_samples] X_train = coarsening.perm_data(X_train, perm) X_test = coarsening.perm_data(X_test, perm) n_train = X_train.shape[0] L = [graph.laplacian(A, normalized=True) for A in graphs] # Conv NN parameters params = dict() params['dir_name'] = 'demo' params['num_epochs'] = 30 params['batch_size'] = 30 params['eval_frequency'] = 30 # Building blocks. params['filter'] = 'chebyshev5' params['brelu'] = 'b1relu' params['pool'] = 'apool1' # Number of classes. C = y.max() + 1 assert C == np.unique(y).size # Architecture. params['F'] = [4, 8] # Number of graph convolutional filters. params['K'] = [3, 3] # Polynomial orders. params['p'] = [2, 8] # Pooling sizes. params['M'] = [ 256, C ] # Output dimensionality of fully connected layers. # Optimization. params['regularization'] = 4e-5 params['dropout'] = 1 params['learning_rate'] = 3e-3 params['decay_rate'] = 0.9 params['momentum'] = 0.8 params['decay_steps'] = n_train / params['batch_size'] params[name_param] = param_val model = models.cgcnn(L, **params) test_acc, train_loss, t_step = model.fit(X_train, y_train, X_test, y_test) accuracy_param.append([max(test_acc), np.mean(test_acc)]) loss_param.append([max(train_loss), np.mean(train_loss)]) print(np.array(accuracy_param)) pm = np.mean(np.array(accuracy_param), axis=0) pl = np.mean(np.array(loss_param), axis=0) print( "IIIII Accuracy: %0.2f (max) %0.2f (mean) Loss: %0.2f (max) %0.2f (mean)" % (pm[0], pm[1], pl[0], pl[1])) accuracy.append(pm) loss.append(pl) return accuracy, loss