Пример #1
0
 def _preprocess_data(self):
     # compute support
     if self.params["support"] > 0:
         self.support = utils.chebyshev_polynomials(
             self.network, self.params["support"], sparse=False)
     else:
         self.support = [np.eye(self.network.shape[0])]
     # get predicted probabilities for all genes and CV folds
     self.predicted_probs = []
     with open(self.predictions_file, "rt") as f:
         next(f) # skip header
         for line in f:
             self.predicted_probs.append(line.split('\t'))
     assert len(self.predicted_probs) == self.features.shape[0]
     assert len(self.predicted_probs) == len(set(x[1] for x in self.predicted_probs))
                .format(fold_i, len(store_data_dicts)))
            continue

        data["train_ind"] = train_ind
        data["val_ind"] = val_ind
        data["fold_i"] = fold_i

        # Apply feature dimensionality reduction per fold. And store it as input_features.
        data['input_features'] = reduce_dim_ridge_smart(
            args, data, data['atlas_features_vec'])

        if args.adj_type == "fixed":
            # Create adjacency matrix. Due to the normalisation step, it is depedend on input_features.
            data['adj_raw'] = get_adjacency_matrix_fixed(args, data)
        elif args.adj_type == "vae":
            # Create adjacency matrix based on vae.
            data['vae'] = get_data_raw_amc_vae(data['id'])
            data['adj_raw'] = get_adjacency_matrix_vae(args, data['vae'], data)
        elif args.adj_type == "correlation_only":
            data['adj_raw'] = correlation_matrix(data['input_features'],
                                                 "correlation")

        data['adj_support'] = chebyshev_polynomials(data['adj_raw'],
                                                    args.polynomial_degree)

        store_data_dict = train_single_fold(args, data)
        store_data_dicts.append(store_data_dict)

        save_datadicts(args, store_data_dicts)

save_results(args, store_data_dicts)
Пример #3
0
def build_model(adj, features, n_classes, subgraphs):
    perturbation = None
    placeholders = {
        'features':
        tf.sparse_placeholder(tf.float32,
                              shape=tf.constant(features[2], dtype=tf.int64)),
        'labels':
        tf.placeholder(tf.float32, shape=(None, n_classes)),
        'labels_mask':
        tf.placeholder(tf.int32),
        'noise':
        tf.placeholder(tf.float32, shape=()),
        'dropout':
        tf.placeholder_with_default(0., shape=()),
    }

    if FLAGS.model == 'gcn':
        support = [sparse_to_tuple(preprocess_adj(adj))]
        model_func = GCN

    elif FLAGS.model == 'gcnR':
        support = [sparse_to_tuple(adj)]
        model_func = GCN

    elif FLAGS.model == 'gcnT':
        support = [
            sparse_to_tuple(
                preprocess_high_order_adj(adj, FLAGS.order, FLAGS.threshold))
        ]
        model_func = GCN

    elif FLAGS.model == 'fishergcn' or FLAGS.model == 'fishergcnT':

        if FLAGS.model == 'fishergcn':
            A = preprocess_adj(adj)
        else:
            A = preprocess_high_order_adj(adj, FLAGS.order, FLAGS.threshold)

        N = adj.shape[0]
        L = sp.eye(N) - A

        if FLAGS.fisher_freq == 0:
            #nsubgraphs = subgraphs.shape[1]
            #V = block_krylov( A, FLAGS.fisher_rank+nsubgraphs )
            #V = V[:,:FLAGS.fisher_rank]

            V = block_krylov(A, FLAGS.fisher_rank)
            w = (sp.csr_matrix.dot(L, V) * V).sum(0)

        elif FLAGS.fisher_freq == 1:
            # if the graph contains one large component and small isolated components
            # only perturb the largest connected component
            subgraph_sizes = subgraphs.sum(0)
            largest_idx = np.argmax(subgraph_sizes)
            isolated = np.nonzero(1 - subgraphs[:, largest_idx])[0]
            L = L.tolil()
            L[:, isolated] = 0
            L[isolated, :] = 0
            L = L.tocsr()

            V = block_krylov(L, FLAGS.fisher_rank)
            w = (sp.csr_matrix.dot(L, V) * V).sum(0)

        elif FLAGS.fisher_freq == 2:
            V, _ = np.linalg.qr(np.random.randn(N, FLAGS.fisher_rank))
            w = np.ones(FLAGS.fisher_rank)

        else:
            print('unknown frequency:', FLAGS.fisher_freq)
            sys.exit(0)

        perturbation = make_perturbation(V, w, placeholders['noise'],
                                         FLAGS.fisher_adversary)
        support = [sparse_to_tuple(A)]
        model_func = GCN

    elif FLAGS.model == 'chebynet':
        support = chebyshev_polynomials(adj, FLAGS.max_degree)
        model_func = GCN

    elif FLAGS.model == 'mlp':
        support = [sparse_to_tuple(preprocess_adj(adj))]
        model_func = MLP

    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    try:
        _, _values, _shape = support[0]
        print("sparsity: {0:.2f}%".format(100 * (_values > 0).sum() /
                                          (_shape[0] * _shape[1])))
    except:
        pass
    placeholders['support'] = [
        tf.sparse_placeholder(tf.float32) for _ in support
    ]

    model = model_func(placeholders,
                       perturbation=perturbation,
                       subgraphs=subgraphs)
    return model, support, placeholders
Пример #4
0
tf.random.set_seed(seed)

early_stopping = 100

num_supports = 1  # Chebyshev polynomials up to order num_supports
weight_decay = 5e-4  # Weight for L2 loss on embedding matrix

epochs = 1000  # Number of epochs to train

learning_rate = 0.002  # Initial learning rate

adj, data, y_train, y_test, train_mask, test_mask, labels = input_sz()
print('load_data success')

# choose Chebyshev polynomials as Convolution kernels
support = chebyshev_polynomials(adj,4)

# choose Symmetrically normalize adjacency matrix as Convolution kernels
# support = [preprocess_adj(adj)]

data = tf.constant([data])
y_train = tf.constant(y_train)
y_test = tf.constant(y_test)
train_mask = tf.constant(train_mask)
test_mask = tf.constant(test_mask)
labels = tf.constant(labels)
supports = [[tf.sparse.SparseTensor( tf.cast(item[0], dtype=tf.int64), item[1], item[2]) for item in support]]

print('support success')
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
Пример #5
0
flags.DEFINE_string('method', args.method, 'Adversarial attack method')

os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

# Load data
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(
    FLAGS.dataset_dir, FLAGS.dataset)

# Some preprocessing
features_dense, features = preprocess_features(features)
if FLAGS.model == 'gcn':
    support = [preprocess_adj(adj)]
    num_supports = 1
    model_func = GCN
elif FLAGS.model == 'gcn_cheby':
    support = chebyshev_polynomials(adj, FLAGS.max_degree)
    num_supports = 1 + FLAGS.max_degree
    model_func = GCN
elif FLAGS.model == 'dense':
    support = [preprocess_adj(adj)]  # Not used
    num_supports = 1
    model_func = MLP
else:
    raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

# Define placeholders
placeholders = {
    'support':
    [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
    'features': tf.placeholder(tf.float32, shape=features[2]),
    'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
Пример #6
0
dataset = CoraData().data
x = dataset.x / dataset.x.sum(1, keepdims=True)  # 归一化数据,使得每一行和为1
tensor_x = torch.from_numpy(x).to(device)
tensor_y = torch.from_numpy(dataset.y).to(device)
tensor_train_mask = torch.from_numpy(dataset.train_mask).to(device)
tensor_val_mask = torch.from_numpy(dataset.val_mask).to(device)
tensor_test_mask = torch.from_numpy(dataset.test_mask).to(device)
normalize_adjacency = CoraData.normalization(dataset.adjacency)  # 规范化邻接矩阵
indices = torch.from_numpy(
    np.asarray([normalize_adjacency.row,
                normalize_adjacency.col]).astype('int64')).long()
values = torch.from_numpy(normalize_adjacency.data.astype(np.float32))
tensor_adjacency = torch.sparse.FloatTensor(indices, values,
                                            (2708, 2708)).to(device)

heatkernel = chebyshev_polynomials(dataset.adjacency, 3)


# In[8]:
# 训练主体函数
def train():
    loss_history = []
    val_acc_history = []
    model.train()
    train_y = tensor_y[tensor_train_mask]
    for epoch in range(epochs):
        logits = model(tensor_x, heatkernel)  # 前向传播
        train_mask_logits = logits[tensor_train_mask]  # 只选择训练节点进行监督
        loss = criterion(train_mask_logits, train_y)  # 计算损失值
        optimizer.zero_grad()
        loss.backward()  # 反向传播计算参数的梯度