Пример #1
0
    def gene_graph(self, A):
        coarsening_levels = 4

        L, perm = coarsen(A, coarsening_levels)
        train_data = perm_data(copy.copy(self.train_data), perm)
        test_data = perm_data(copy.copy(self.test_data), perm)
        self.layer1 = (L[0].shape)
        self.D = self.layer1[0]
        print(self.D)
        lmax = []
        for i in range(coarsening_levels + 1):
            lmax.append(lmax_L(L[i]))
        return train_data, test_data, self.train_label, self.test_label, L, lmax
Пример #2
0
    def __init__(self, split):
        super().__init__()
        if split == 'val':
            split = 'test'
        data_dir = 'data/20news'
        data_path = os.path.join(data_dir, '{}_data.npz'.format(split))
        labels_path = os.path.join(data_dir, '{}_labels.npy'.format(split))
        class_names_path = os.path.join(data_dir, 'class_names.txt')
        self.labels = np.load(labels_path)
        self.class_names = [c.strip() for c in open(class_names_path)]
        self.classes_num = len(self.class_names)

        data = sparse.load_npz(data_path).astype(np.float32)
        self.data = sparse.csr_matrix(
            coarsening.perm_data(data.toarray(), NewsDataset.perm))
Пример #3
0
    def __init__(self, split):
        super().__init__()
        self.data_dir = './data'
        self.width, self.height = 28, 28
        self.node_num = self.width * self.height

        is_train_or_val = split == 'train' or split == 'val'
        data = MNIST(root=self.data_dir, train=is_train_or_val, download=True)
        self.classes_num = 10
        img_data = data.data.numpy().reshape(-1, self.node_num).astype(np.float32)
        img_labels = data.targets.numpy()

        start = len(img_data) - 5000 if split == 'val' else 0
        end = len(img_data) - 5000 if split == 'train' else len(img_data)
        img_data = img_data[start:end]
        img_labels = img_labels[start:end]
        
        self.img_data = coarsening.perm_data(img_data, MNISTDataset.perm)
        self.img_labels = img_labels
Пример #4
0
 if k in [200, 600, 2000]:
     L, perm = get_graph_parameters(k)
 else:
     L, perm = build_laplacian(k)
 x, y = get_data(k)
 kf = KFold(n_splits=5, random_state=0)
 final_accuracy = 0
 train_accuracy = 0
 all_loss, all_acc = 0, 0
 for train_index, test_index in kf.split(x):
     x_train, y_train, x_test, y_test = x[train_index], y[train_index], x[test_index], y[test_index]
     scaler = MinMaxScaler()
     scaler.fit(x_train)
     x_train = scaler.transform(x_train)
     x_test = scaler.transform(x_test)
     x_train = coarsening.perm_data(x_train, perm)
     x_test = coarsening.perm_data(x_test, perm)
     model = models.cgcnn(L, **build_params())
     accuracy, loss, t_step = model.fit(x_train, y_train, x_test, y_test)
     # all_acc += accuracy[-1]
     # all_loss += loss[-1]
     final_accuracy += accuracy[-1]
     train_accuracy += model.evaluate(x_train, y_train)[1]
 final_accuracy /= 5
 train_accuracy /= 5
 all_loss /= 5
 all_acc /= 5
 print("final accuracy:", final_accuracy)
 print("training accuracy:", train_accuracy)
 fig, ax1 = plt.subplots(figsize=(15, 5))
 ax1.plot(accuracy, 'b.-')
Пример #5
0
    X_val   = X[n_train:, ...]

    y_train = y[:n_train, ...]
    y_val   = y[n_train:, ...]

    A = np.load('/Neutron9/joyneel.misra/npys/meanFC_d'+str(d)+'.npy');
    A = A - np.min(A)
    A = scipy.sparse.csr_matrix(A)
    d = X.shape[1]

    assert A.shape == (d, d)
    print('d = |V| = {}, k|V| < |E| = {}'.format(d, A.nnz))

    graphs, perm = coarsening.coarsen(A, levels=3, self_connections=False)

    X_train = coarsening.perm_data(X_train, perm)
    X_val = coarsening.perm_data(X_val, perm)

    L = [graph.laplacian(A, normalized=True) for A in graphs]
    L = [elem.astype(np.float32) for elem in L]

    params = dict()
    params['dir_name']       = 'demo'
    params['num_epochs']     = 10
    params['batch_size']     = 40
    params['eval_frequency'] = 100

    # Building blocks.
    params['filter']         = 'chebyshev5'
    params['brelu']          = 'b1relu'
    params['pool']           = 'apool1'
Пример #6
0
print("DONE")

del A
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False)

train_data = mnist.train.images.astype(np.float32)
val_data = mnist.validation.images.astype(np.float32)
test_data = mnist.test.images.astype(np.float32)
train_labels = mnist.train.labels
val_labels = mnist.validation.labels
test_labels = mnist.test.labels

print(val_labels)
t_start = time.process_time()
train_data = coarsening.perm_data(train_data, perm)
val_data = coarsening.perm_data(val_data, perm)
test_data = coarsening.perm_data(test_data, perm)
print('Execution time: {:.2f}s'.format(time.process_time() - t_start))
del perm

#model = fc1()
#model = fc2(nhiddens=100)
#model = cnn2(K=5, F=10)  # K=28 is equivalent to filtering with fgcnn.
#model = fcnn2(F=10)
#model = fgcnn2(L[0], F=10)
#model = lgcnn2_2(L[0], F=10, K=10)
#model = cgcnn2_3(L[0], F=10, K=5)
#model = cgcnn2_4(L[0], F=10, K=5)
#model = cgcnn2_5(L[0], F=10, K=5)
Пример #7
0
f_adjacency = h5py.File(Adjfilename, 'r')
W = sparse.csr_matrix(
    (f_adjacency["W"]["data"], f_adjacency["W"]["ir"], f_adjacency["W"]["jc"]))
W = W.astype(np.float32)
print('Size of W: ', W.shape, '\n')

# ----- Graph coarsening ------------------------------------------------------
print('Graph coarsening  ...')
print('Original: |V| = {} nodes, |E| = {} edges'.format(
    W.shape[0], int(W.nnz / 3)))
graphs, perm = coarsening.coarsen(W,
                                  levels=coarsen_level,
                                  self_connections=False)

# exchange node ids so that binary unions form the clustering tree.
data_train = coarsening.perm_data(data_train, perm)
data_valid = coarsening.perm_data(data_valid, perm)
data_test = coarsening.perm_data(data_test, perm)

# ----- Update LB-operator or graph Laplacian for each coarsened level --------
L = [
    graph.laplacian(W2, normalized=params['normalized']).transpose()
    for W2 in graphs
]

# ----- Training and validation -----------------------------------------------
n_train = data_train.shape[0]  # Number of train samples.
params['decay_steps'] = (np.multiply(params['decay_steps'], n_train /
                                     params['batch_size'])).astype(int)
model = models.cgcnn(L, 1, **params)
Пример #8
0
L2 = Data2['L'].astype(np.float32)
L3 = Data3['L'].astype(np.float32)
L4 = Data4['L'].astype(np.float32)
L5 = Data5['L'].astype(np.float32)
# adjust for K-Fold cross validation
Train_Data = np.transpose(np.hstack((D1, D2, D3, D4)))
Val_Data = np.transpose(D5)
Test_Data = np.transpose(D5)
Train_Label = (np.vstack((L1, L2, L3, L4)))
Val_Label = (L5)
Test_Label = (L5)
Test_Label = Test_Label.ravel()
Train_Label = Train_Label.ravel()
Val_Label = Val_Label.ravel()

Train_Data = coarsening.perm_data(Train_Data, perm)
Val_Data = coarsening.perm_data(Val_Data, perm)
Test_Data = coarsening.perm_data(Test_Data, perm)

C = 34  # number of classes

common = {}
common['dir_name'] = 'PPI/'
common['num_epochs'] = 20
common['batch_size'] = 200
common[
    'decay_steps'] = 17.7  # * common['num_epochs'] since not used use as in momentum
common['eval_frequency'] = 10 * common['num_epochs']
common['brelu'] = 'b1relu'
common['pool'] = 'apool1'
Пример #9
0
def main():
    createFolder('Result')
    config_file = sys.argv[1]
    with open(config_file, 'r') as f:
        config = yaml.load(f)

    PPI_data = config["PPI_data"]
    Response_data = config["Response_data"]
    Gene_data = config["Gene_data"]
    n_fold = config["n_fold"]
    test_size = config["test_size"]
    num_epochs = config["num_epochs"]
    batch_size = config["batch_size"]
    brelu = config["brelu"]
    pool = config["pool"]
    regularization = config["regularization"]
    dropout = config["dropout"]
    learning_rate = config["learning_rate"]
    decay_rate = config["decay_rate"]
    momentum = config["momentum"]
    Name = config["Name"]
    F = config["F"]
    K = config["K"]
    p = config["p"]
    M = config["M"]

    data_PPI = pd.read_csv(PPI_data)
    data_PPI.drop(['Unnamed: 0'], axis='columns', inplace=True)
    data_IC50 = pd.read_csv(Response_data)
    data_IC50.drop(['Unnamed: 0'], axis='columns', inplace=True)
    data_Gene = pd.read_csv(Gene_data)
    data_Gene.drop(['Unnamed: 0'], axis='columns', inplace=True)
    data_Gene = np.array(data_Gene)

    df = np.array(data_PPI)
    A = coo_matrix(df, dtype=np.float32)
    print(A.nnz)
    graphs, perm = coarsening.coarsen(A, levels=6, self_connections=False)
    L = [graph.laplacian(A, normalized=True) for A in graphs]
    graph.plot_spectrum(L)

    n_fold = n_fold
    PCC = []
    SPC = []
    RMSE = []

    X_train, X_test, Y_train, Y_test = train_test_split(data_Gene,
                                                        data_IC50,
                                                        test_size=test_size,
                                                        shuffle=True,
                                                        random_state=20)

    for cv in range(n_fold):
        Y_pred = np.zeros([Y_test.shape[0], Y_test.shape[1]])
        Y_test = np.zeros([Y_test.shape[0], Y_test.shape[1]])
        j = 0
        for i in range(Y.test.shape[1]):
            data1 = data_IC50.iloc[:, i]
            data1 = np.array(data1)
            data_minmax = data1[~np.isnan(data1)]
            min = data_minmax.min()
            max = data_minmax.max()
            data1 = (data1 - min) / (max - min)

            train_data_split, test_data_split, train_labels_split, test_labels_split = train_test_split(
                data_Gene,
                data1,
                test_size=test_size,
                shuffle=True,
                random_state=20)
            train_data = np.array(
                train_data_split[~np.isnan(train_labels_split)]).astype(
                    np.float32)

            list_train, list_val = Validation(n_fold, train_data,
                                              train_labels_split)

            train_data_V = train_data[list_train[cv]]
            val_data = train_data[list_val[cv]]
            test_data = np.array(test_data_split[:]).astype(np.float32)
            train_labels = np.array(
                train_labels_split[~np.isnan(train_labels_split)]).astype(
                    np.float32)
            train_labels_V = train_labels[list_train[cv]]
            val_labels = train_labels[list_val[cv]]
            test_labels = np.array(test_labels_split[:]).astype(np.float32)
            train_data_V = coarsening.perm_data(train_data_V, perm)
            val_data = coarsening.perm_data(val_data, perm)
            test_data = coarsening.perm_data(test_data, perm)

            common = {}
            common['num_epochs'] = num_epochs
            common['batch_size'] = batch_size
            common['decay_steps'] = train_data.shape[0] / common['batch_size']
            common['eval_frequency'] = 10 * common['num_epochs']
            common['brelu'] = brelu
            common['pool'] = pool

            common['regularization'] = regularization
            common['dropout'] = dropout
            common['learning_rate'] = learning_rate
            common['decay_rate'] = decay_rate
            common['momentum'] = momentum
            common['F'] = F
            common['K'] = K
            common['p'] = p
            common['M'] = M

            if True:
                name = Name
                params = common.copy()

            model = models.cgcnn(L, **params)
            loss, t_step = model.fit(train_data_V, train_labels_V, val_data,
                                     val_labels)

            Y_pred[:, j] = model.predict(test_data)
            Y_test[:, j] = test_labels
            j = j + 1

        np.savez(('Result/GraphCNN_CV_{}'.format(cv)),
                 Y_true=Y_test,
                 Y_pred=Y_pred)
Пример #10
0
def prepare(dataset):
    # # MNIST
    if dataset == 'mnist':
        mnist = input_data.read_data_sets(
            'datasets', one_hot=False)  # load data in folder datasets/
        train_data = mnist.train.images.astype(np.float32)
        val_data = mnist.validation.images.astype(np.float32)
        test_data = mnist.test.images.astype(np.float32)
        train_labels = mnist.train.labels
        val_labels = mnist.validation.labels
        test_labels = mnist.test.labels
        print(train_data.shape)

        print(train_labels.shape)
        print(val_data.shape)
        print(val_labels.shape)
        print(test_data.shape)
        print(test_labels.shape)

        # Construct graph
        t_start = time.time()
        grid_side = 280
        number_edges = 8
        metric = 'euclidean'
        A = grid_graph(grid_side, number_edges,
                       metric)  # create graph of Euclidean grid
        print(A.shape)

    elif dataset == 'adni':
        t_start = time.time()
        train_data, test_data, train_labels, test_labels, A = load_data(
            train_rate=train_rate,
            thresh=thresh,
            binary=binary,
            num=num,
            state=state)  # 0.35
        #A = np.load(saved_path+"_graph_A_2.npy")
        #A = np.load(saved_path+"_init_graph.npy")
        #A = np.load("UTA/multi_30_20_3_300_found_graph.npy")
        #A = np.load("UTA/multi_30_20_3_300_found_graph.npy")
        #A = np.load("UTA/multi_30_20_3_800_found_graph_left_candidate_465.npy")
        #A = scipy.sparse.coo_matrix(A)
        # print(A)
        # print( scipy.sparse.coo_matrix(A))
        #np.save(saved_path+"_init_graph.npy",A.toarray())

        # print("data shape ====")
        # print(train_data.shape)
        # print(train_data[0][0])
        # print(train_labels.shape)
        # print(test_data.shape)
        # print(test_labels.shape)
        # print("data shape end")
        if verbose:
            fig = plt.figure()
            #定义画布为1*1个划分,并在第1个位置上进行作图
            ax = fig.add_subplot(111)
            #定义横纵坐标的刻度
            # ax.set_yticks(range(len(yLabel)))
            # ax.set_yticklabels(yLabel, fontproperties=font)
            # ax.set_xticks(range(len(xLabel)))
            # ax.set_xticklabels(xLabel)
            #作图并选择热图的颜色填充风格,这里选择hot
            #print("A===")
            #print(A)
            im = ax.imshow(A.toarray(), cmap=plt.cm.hot_r)
            #增加右侧的颜色刻度条
            plt.colorbar(im)
            #增加标题
            plt.title("This is the original graph")
            #show
            plt.show()

        # print(train_data.shape)
        # print(test_data.shape)
        # print("baseline: ", sum(test_labels)/test_labels.shape[0])

        # grid_side = 180 # 102
        # number_edges = 101
        # metric = 'euclidean'
        # print(A)

    # Compute coarsened graphs

    coarsening_levels = 4
    L, perm = coarsen(A, coarsening_levels)

    #print(L)

    global layer1
    layer1 = (L[0].shape)
    #print(perm)
    #print(set(perm))
    # Compute max eigenvalue of graph Laplacians
    lmax = []
    for i in range(coarsening_levels):
        lmax.append(lmax_L(L[i]))
    #print('lmax: ' + str([lmax[i] for i in range(coarsening_levels)]))

    # # Reindex nodes to satisfy a binary tree structure
    train_data = perm_data(train_data, perm)
    # val_data = perm_data(val_data, perm)
    test_data = perm_data(test_data, perm)
    #
    #print(train_data.shape)
    #print(val_data.shape)
    #print(test_data.shape)
    '''
    test part for update graph
    '''
    #a,b = update_graph(A)
    #print(a[0].shape == (144,144))
    #exit()
    '''
    test part for update graph
    '''
    #print('Execution time: {:.2f}s'.format(time.time() - t_start))
    del perm
    return train_data, train_labels, test_data, test_labels, L, lmax, A
Пример #11
0
def cross_validate_convNN(X,
                          y,
                          adjacency,
                          name_param,
                          value_param,
                          k,
                          num_levels=5):

    split_index = split_test_train_for_cv(X.shape[0], k_fold=k)
    graphs, perm = coarsening.coarsen(sp.csr_matrix(
        adjacency.astype(np.float32)),
                                      levels=num_levels,
                                      self_connections=False)

    accuracy = []
    loss = []
    for param_val in value_param:
        accuracy_param = []
        loss_param = []
        for k_ in range(k):
            test_samples = split_index[k_]
            train_samples = split_index[~(
                np.arange(split_index.shape[0]) == k_)].flatten()

            X_train = X[train_samples]
            X_test = X[test_samples]
            y_train = y[train_samples]
            y_test = y[test_samples]

            X_train = coarsening.perm_data(X_train, perm)
            X_test = coarsening.perm_data(X_test, perm)
            n_train = X_train.shape[0]

            L = [graph.laplacian(A, normalized=True) for A in graphs]

            # Conv NN parameters
            params = dict()
            params['dir_name'] = 'demo'
            params['num_epochs'] = 30
            params['batch_size'] = 30
            params['eval_frequency'] = 30

            # Building blocks.
            params['filter'] = 'chebyshev5'
            params['brelu'] = 'b1relu'
            params['pool'] = 'apool1'

            # Number of classes.
            C = y.max() + 1
            assert C == np.unique(y).size

            # Architecture.
            params['F'] = [4, 8]  # Number of graph convolutional filters.
            params['K'] = [3, 3]  # Polynomial orders.
            params['p'] = [2, 8]  # Pooling sizes.
            params['M'] = [
                256, C
            ]  # Output dimensionality of fully connected layers.

            # Optimization.
            params['regularization'] = 4e-5
            params['dropout'] = 1
            params['learning_rate'] = 3e-3
            params['decay_rate'] = 0.9
            params['momentum'] = 0.8
            params['decay_steps'] = n_train / params['batch_size']
            params[name_param] = param_val

            model = models.cgcnn(L, **params)
            test_acc, train_loss, t_step = model.fit(X_train, y_train, X_test,
                                                     y_test)
            accuracy_param.append([max(test_acc), np.mean(test_acc)])
            loss_param.append([max(train_loss), np.mean(train_loss)])
        print(np.array(accuracy_param))
        pm = np.mean(np.array(accuracy_param), axis=0)
        pl = np.mean(np.array(loss_param), axis=0)
        print(
            "IIIII Accuracy: %0.2f (max) %0.2f (mean) Loss: %0.2f (max) %0.2f (mean)"
            % (pm[0], pm[1], pl[0], pl[1]))
        accuracy.append(pm)
        loss.append(pl)
    return accuracy, loss
Пример #12
0
A = graph.adjacency(dist, idx)
print("{} > {} edges".format(A.nnz // 2,
                             FLAGS.number_edges * graph_data.shape[0] // 2))
A = graph.replace_random_edges(A, 0)
graphs, perm = coarsening.coarsen(A,
                                  levels=FLAGS.coarsening_levels,
                                  self_connections=False)
L = [graph.laplacian(A, normalized=True) for A in graphs]
print('Execution time: {:.2f}s'.format(time.process_time() - t_start))
#graph.plot_spectrum(L)
#del graph_data, A, dist, idx

#%%
t_start = time.process_time()
train_data = scipy.sparse.csr_matrix(
    coarsening.perm_data(train_data.toarray(), perm))
test_data = scipy.sparse.csr_matrix(
    coarsening.perm_data(test_data.toarray(), perm))
print('Execution time: {:.2f}s'.format(time.process_time() - t_start))
del perm

#%% [markdown]
# # Classification

#%%
# Training set is shuffled already.
#perm = np.random.permutation(train_data.shape[0])
#train_data = train_data[perm,:]
#train_labels = train_labels[perm]

# Validation set.