def gene_graph(self, A): coarsening_levels = 4 L, perm = coarsen(A, coarsening_levels) train_data = perm_data(copy.copy(self.train_data), perm) test_data = perm_data(copy.copy(self.test_data), perm) self.layer1 = (L[0].shape) self.D = self.layer1[0] print(self.D) lmax = [] for i in range(coarsening_levels + 1): lmax.append(lmax_L(L[i])) return train_data, test_data, self.train_label, self.test_label, L, lmax
def __init__(self, split): super().__init__() if split == 'val': split = 'test' data_dir = 'data/20news' data_path = os.path.join(data_dir, '{}_data.npz'.format(split)) labels_path = os.path.join(data_dir, '{}_labels.npy'.format(split)) class_names_path = os.path.join(data_dir, 'class_names.txt') self.labels = np.load(labels_path) self.class_names = [c.strip() for c in open(class_names_path)] self.classes_num = len(self.class_names) data = sparse.load_npz(data_path).astype(np.float32) self.data = sparse.csr_matrix( coarsening.perm_data(data.toarray(), NewsDataset.perm))
def __init__(self, split): super().__init__() self.data_dir = './data' self.width, self.height = 28, 28 self.node_num = self.width * self.height is_train_or_val = split == 'train' or split == 'val' data = MNIST(root=self.data_dir, train=is_train_or_val, download=True) self.classes_num = 10 img_data = data.data.numpy().reshape(-1, self.node_num).astype(np.float32) img_labels = data.targets.numpy() start = len(img_data) - 5000 if split == 'val' else 0 end = len(img_data) - 5000 if split == 'train' else len(img_data) img_data = img_data[start:end] img_labels = img_labels[start:end] self.img_data = coarsening.perm_data(img_data, MNISTDataset.perm) self.img_labels = img_labels
if k in [200, 600, 2000]: L, perm = get_graph_parameters(k) else: L, perm = build_laplacian(k) x, y = get_data(k) kf = KFold(n_splits=5, random_state=0) final_accuracy = 0 train_accuracy = 0 all_loss, all_acc = 0, 0 for train_index, test_index in kf.split(x): x_train, y_train, x_test, y_test = x[train_index], y[train_index], x[test_index], y[test_index] scaler = MinMaxScaler() scaler.fit(x_train) x_train = scaler.transform(x_train) x_test = scaler.transform(x_test) x_train = coarsening.perm_data(x_train, perm) x_test = coarsening.perm_data(x_test, perm) model = models.cgcnn(L, **build_params()) accuracy, loss, t_step = model.fit(x_train, y_train, x_test, y_test) # all_acc += accuracy[-1] # all_loss += loss[-1] final_accuracy += accuracy[-1] train_accuracy += model.evaluate(x_train, y_train)[1] final_accuracy /= 5 train_accuracy /= 5 all_loss /= 5 all_acc /= 5 print("final accuracy:", final_accuracy) print("training accuracy:", train_accuracy) fig, ax1 = plt.subplots(figsize=(15, 5)) ax1.plot(accuracy, 'b.-')
X_val = X[n_train:, ...] y_train = y[:n_train, ...] y_val = y[n_train:, ...] A = np.load('/Neutron9/joyneel.misra/npys/meanFC_d'+str(d)+'.npy'); A = A - np.min(A) A = scipy.sparse.csr_matrix(A) d = X.shape[1] assert A.shape == (d, d) print('d = |V| = {}, k|V| < |E| = {}'.format(d, A.nnz)) graphs, perm = coarsening.coarsen(A, levels=3, self_connections=False) X_train = coarsening.perm_data(X_train, perm) X_val = coarsening.perm_data(X_val, perm) L = [graph.laplacian(A, normalized=True) for A in graphs] L = [elem.astype(np.float32) for elem in L] params = dict() params['dir_name'] = 'demo' params['num_epochs'] = 10 params['batch_size'] = 40 params['eval_frequency'] = 100 # Building blocks. params['filter'] = 'chebyshev5' params['brelu'] = 'b1relu' params['pool'] = 'apool1'
print("DONE") del A from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False) train_data = mnist.train.images.astype(np.float32) val_data = mnist.validation.images.astype(np.float32) test_data = mnist.test.images.astype(np.float32) train_labels = mnist.train.labels val_labels = mnist.validation.labels test_labels = mnist.test.labels print(val_labels) t_start = time.process_time() train_data = coarsening.perm_data(train_data, perm) val_data = coarsening.perm_data(val_data, perm) test_data = coarsening.perm_data(test_data, perm) print('Execution time: {:.2f}s'.format(time.process_time() - t_start)) del perm #model = fc1() #model = fc2(nhiddens=100) #model = cnn2(K=5, F=10) # K=28 is equivalent to filtering with fgcnn. #model = fcnn2(F=10) #model = fgcnn2(L[0], F=10) #model = lgcnn2_2(L[0], F=10, K=10) #model = cgcnn2_3(L[0], F=10, K=5) #model = cgcnn2_4(L[0], F=10, K=5) #model = cgcnn2_5(L[0], F=10, K=5)
f_adjacency = h5py.File(Adjfilename, 'r') W = sparse.csr_matrix( (f_adjacency["W"]["data"], f_adjacency["W"]["ir"], f_adjacency["W"]["jc"])) W = W.astype(np.float32) print('Size of W: ', W.shape, '\n') # ----- Graph coarsening ------------------------------------------------------ print('Graph coarsening ...') print('Original: |V| = {} nodes, |E| = {} edges'.format( W.shape[0], int(W.nnz / 3))) graphs, perm = coarsening.coarsen(W, levels=coarsen_level, self_connections=False) # exchange node ids so that binary unions form the clustering tree. data_train = coarsening.perm_data(data_train, perm) data_valid = coarsening.perm_data(data_valid, perm) data_test = coarsening.perm_data(data_test, perm) # ----- Update LB-operator or graph Laplacian for each coarsened level -------- L = [ graph.laplacian(W2, normalized=params['normalized']).transpose() for W2 in graphs ] # ----- Training and validation ----------------------------------------------- n_train = data_train.shape[0] # Number of train samples. params['decay_steps'] = (np.multiply(params['decay_steps'], n_train / params['batch_size'])).astype(int) model = models.cgcnn(L, 1, **params)
L2 = Data2['L'].astype(np.float32) L3 = Data3['L'].astype(np.float32) L4 = Data4['L'].astype(np.float32) L5 = Data5['L'].astype(np.float32) # adjust for K-Fold cross validation Train_Data = np.transpose(np.hstack((D1, D2, D3, D4))) Val_Data = np.transpose(D5) Test_Data = np.transpose(D5) Train_Label = (np.vstack((L1, L2, L3, L4))) Val_Label = (L5) Test_Label = (L5) Test_Label = Test_Label.ravel() Train_Label = Train_Label.ravel() Val_Label = Val_Label.ravel() Train_Data = coarsening.perm_data(Train_Data, perm) Val_Data = coarsening.perm_data(Val_Data, perm) Test_Data = coarsening.perm_data(Test_Data, perm) C = 34 # number of classes common = {} common['dir_name'] = 'PPI/' common['num_epochs'] = 20 common['batch_size'] = 200 common[ 'decay_steps'] = 17.7 # * common['num_epochs'] since not used use as in momentum common['eval_frequency'] = 10 * common['num_epochs'] common['brelu'] = 'b1relu' common['pool'] = 'apool1'
def main(): createFolder('Result') config_file = sys.argv[1] with open(config_file, 'r') as f: config = yaml.load(f) PPI_data = config["PPI_data"] Response_data = config["Response_data"] Gene_data = config["Gene_data"] n_fold = config["n_fold"] test_size = config["test_size"] num_epochs = config["num_epochs"] batch_size = config["batch_size"] brelu = config["brelu"] pool = config["pool"] regularization = config["regularization"] dropout = config["dropout"] learning_rate = config["learning_rate"] decay_rate = config["decay_rate"] momentum = config["momentum"] Name = config["Name"] F = config["F"] K = config["K"] p = config["p"] M = config["M"] data_PPI = pd.read_csv(PPI_data) data_PPI.drop(['Unnamed: 0'], axis='columns', inplace=True) data_IC50 = pd.read_csv(Response_data) data_IC50.drop(['Unnamed: 0'], axis='columns', inplace=True) data_Gene = pd.read_csv(Gene_data) data_Gene.drop(['Unnamed: 0'], axis='columns', inplace=True) data_Gene = np.array(data_Gene) df = np.array(data_PPI) A = coo_matrix(df, dtype=np.float32) print(A.nnz) graphs, perm = coarsening.coarsen(A, levels=6, self_connections=False) L = [graph.laplacian(A, normalized=True) for A in graphs] graph.plot_spectrum(L) n_fold = n_fold PCC = [] SPC = [] RMSE = [] X_train, X_test, Y_train, Y_test = train_test_split(data_Gene, data_IC50, test_size=test_size, shuffle=True, random_state=20) for cv in range(n_fold): Y_pred = np.zeros([Y_test.shape[0], Y_test.shape[1]]) Y_test = np.zeros([Y_test.shape[0], Y_test.shape[1]]) j = 0 for i in range(Y.test.shape[1]): data1 = data_IC50.iloc[:, i] data1 = np.array(data1) data_minmax = data1[~np.isnan(data1)] min = data_minmax.min() max = data_minmax.max() data1 = (data1 - min) / (max - min) train_data_split, test_data_split, train_labels_split, test_labels_split = train_test_split( data_Gene, data1, test_size=test_size, shuffle=True, random_state=20) train_data = np.array( train_data_split[~np.isnan(train_labels_split)]).astype( np.float32) list_train, list_val = Validation(n_fold, train_data, train_labels_split) train_data_V = train_data[list_train[cv]] val_data = train_data[list_val[cv]] test_data = np.array(test_data_split[:]).astype(np.float32) train_labels = np.array( train_labels_split[~np.isnan(train_labels_split)]).astype( np.float32) train_labels_V = train_labels[list_train[cv]] val_labels = train_labels[list_val[cv]] test_labels = np.array(test_labels_split[:]).astype(np.float32) train_data_V = coarsening.perm_data(train_data_V, perm) val_data = coarsening.perm_data(val_data, perm) test_data = coarsening.perm_data(test_data, perm) common = {} common['num_epochs'] = num_epochs common['batch_size'] = batch_size common['decay_steps'] = train_data.shape[0] / common['batch_size'] common['eval_frequency'] = 10 * common['num_epochs'] common['brelu'] = brelu common['pool'] = pool common['regularization'] = regularization common['dropout'] = dropout common['learning_rate'] = learning_rate common['decay_rate'] = decay_rate common['momentum'] = momentum common['F'] = F common['K'] = K common['p'] = p common['M'] = M if True: name = Name params = common.copy() model = models.cgcnn(L, **params) loss, t_step = model.fit(train_data_V, train_labels_V, val_data, val_labels) Y_pred[:, j] = model.predict(test_data) Y_test[:, j] = test_labels j = j + 1 np.savez(('Result/GraphCNN_CV_{}'.format(cv)), Y_true=Y_test, Y_pred=Y_pred)
def prepare(dataset): # # MNIST if dataset == 'mnist': mnist = input_data.read_data_sets( 'datasets', one_hot=False) # load data in folder datasets/ train_data = mnist.train.images.astype(np.float32) val_data = mnist.validation.images.astype(np.float32) test_data = mnist.test.images.astype(np.float32) train_labels = mnist.train.labels val_labels = mnist.validation.labels test_labels = mnist.test.labels print(train_data.shape) print(train_labels.shape) print(val_data.shape) print(val_labels.shape) print(test_data.shape) print(test_labels.shape) # Construct graph t_start = time.time() grid_side = 280 number_edges = 8 metric = 'euclidean' A = grid_graph(grid_side, number_edges, metric) # create graph of Euclidean grid print(A.shape) elif dataset == 'adni': t_start = time.time() train_data, test_data, train_labels, test_labels, A = load_data( train_rate=train_rate, thresh=thresh, binary=binary, num=num, state=state) # 0.35 #A = np.load(saved_path+"_graph_A_2.npy") #A = np.load(saved_path+"_init_graph.npy") #A = np.load("UTA/multi_30_20_3_300_found_graph.npy") #A = np.load("UTA/multi_30_20_3_300_found_graph.npy") #A = np.load("UTA/multi_30_20_3_800_found_graph_left_candidate_465.npy") #A = scipy.sparse.coo_matrix(A) # print(A) # print( scipy.sparse.coo_matrix(A)) #np.save(saved_path+"_init_graph.npy",A.toarray()) # print("data shape ====") # print(train_data.shape) # print(train_data[0][0]) # print(train_labels.shape) # print(test_data.shape) # print(test_labels.shape) # print("data shape end") if verbose: fig = plt.figure() #定义画布为1*1个划分,并在第1个位置上进行作图 ax = fig.add_subplot(111) #定义横纵坐标的刻度 # ax.set_yticks(range(len(yLabel))) # ax.set_yticklabels(yLabel, fontproperties=font) # ax.set_xticks(range(len(xLabel))) # ax.set_xticklabels(xLabel) #作图并选择热图的颜色填充风格,这里选择hot #print("A===") #print(A) im = ax.imshow(A.toarray(), cmap=plt.cm.hot_r) #增加右侧的颜色刻度条 plt.colorbar(im) #增加标题 plt.title("This is the original graph") #show plt.show() # print(train_data.shape) # print(test_data.shape) # print("baseline: ", sum(test_labels)/test_labels.shape[0]) # grid_side = 180 # 102 # number_edges = 101 # metric = 'euclidean' # print(A) # Compute coarsened graphs coarsening_levels = 4 L, perm = coarsen(A, coarsening_levels) #print(L) global layer1 layer1 = (L[0].shape) #print(perm) #print(set(perm)) # Compute max eigenvalue of graph Laplacians lmax = [] for i in range(coarsening_levels): lmax.append(lmax_L(L[i])) #print('lmax: ' + str([lmax[i] for i in range(coarsening_levels)])) # # Reindex nodes to satisfy a binary tree structure train_data = perm_data(train_data, perm) # val_data = perm_data(val_data, perm) test_data = perm_data(test_data, perm) # #print(train_data.shape) #print(val_data.shape) #print(test_data.shape) ''' test part for update graph ''' #a,b = update_graph(A) #print(a[0].shape == (144,144)) #exit() ''' test part for update graph ''' #print('Execution time: {:.2f}s'.format(time.time() - t_start)) del perm return train_data, train_labels, test_data, test_labels, L, lmax, A
def cross_validate_convNN(X, y, adjacency, name_param, value_param, k, num_levels=5): split_index = split_test_train_for_cv(X.shape[0], k_fold=k) graphs, perm = coarsening.coarsen(sp.csr_matrix( adjacency.astype(np.float32)), levels=num_levels, self_connections=False) accuracy = [] loss = [] for param_val in value_param: accuracy_param = [] loss_param = [] for k_ in range(k): test_samples = split_index[k_] train_samples = split_index[~( np.arange(split_index.shape[0]) == k_)].flatten() X_train = X[train_samples] X_test = X[test_samples] y_train = y[train_samples] y_test = y[test_samples] X_train = coarsening.perm_data(X_train, perm) X_test = coarsening.perm_data(X_test, perm) n_train = X_train.shape[0] L = [graph.laplacian(A, normalized=True) for A in graphs] # Conv NN parameters params = dict() params['dir_name'] = 'demo' params['num_epochs'] = 30 params['batch_size'] = 30 params['eval_frequency'] = 30 # Building blocks. params['filter'] = 'chebyshev5' params['brelu'] = 'b1relu' params['pool'] = 'apool1' # Number of classes. C = y.max() + 1 assert C == np.unique(y).size # Architecture. params['F'] = [4, 8] # Number of graph convolutional filters. params['K'] = [3, 3] # Polynomial orders. params['p'] = [2, 8] # Pooling sizes. params['M'] = [ 256, C ] # Output dimensionality of fully connected layers. # Optimization. params['regularization'] = 4e-5 params['dropout'] = 1 params['learning_rate'] = 3e-3 params['decay_rate'] = 0.9 params['momentum'] = 0.8 params['decay_steps'] = n_train / params['batch_size'] params[name_param] = param_val model = models.cgcnn(L, **params) test_acc, train_loss, t_step = model.fit(X_train, y_train, X_test, y_test) accuracy_param.append([max(test_acc), np.mean(test_acc)]) loss_param.append([max(train_loss), np.mean(train_loss)]) print(np.array(accuracy_param)) pm = np.mean(np.array(accuracy_param), axis=0) pl = np.mean(np.array(loss_param), axis=0) print( "IIIII Accuracy: %0.2f (max) %0.2f (mean) Loss: %0.2f (max) %0.2f (mean)" % (pm[0], pm[1], pl[0], pl[1])) accuracy.append(pm) loss.append(pl) return accuracy, loss
A = graph.adjacency(dist, idx) print("{} > {} edges".format(A.nnz // 2, FLAGS.number_edges * graph_data.shape[0] // 2)) A = graph.replace_random_edges(A, 0) graphs, perm = coarsening.coarsen(A, levels=FLAGS.coarsening_levels, self_connections=False) L = [graph.laplacian(A, normalized=True) for A in graphs] print('Execution time: {:.2f}s'.format(time.process_time() - t_start)) #graph.plot_spectrum(L) #del graph_data, A, dist, idx #%% t_start = time.process_time() train_data = scipy.sparse.csr_matrix( coarsening.perm_data(train_data.toarray(), perm)) test_data = scipy.sparse.csr_matrix( coarsening.perm_data(test_data.toarray(), perm)) print('Execution time: {:.2f}s'.format(time.process_time() - t_start)) del perm #%% [markdown] # # Classification #%% # Training set is shuffled already. #perm = np.random.permutation(train_data.shape[0]) #train_data = train_data[perm,:] #train_labels = train_labels[perm] # Validation set.