예제 #1
0
 def get_degree_feature_list(self, origin_base_path, start_idx, duration, sep='\t', init_type='gaussian', std=1e-4):
     assert init_type in ['gaussian', 'adj', 'combine', 'one-hot']
     x_list = []
     max_degree = 0
     adj_list = []
     degree_list = []
     date_dir_list = sorted(os.listdir(origin_base_path))
     # find the maximal degree for a list of graphs
     for i in range(start_idx, min(start_idx + duration, self.max_time_num)):
         original_graph_path = os.path.join(origin_base_path, date_dir_list[i])
         adj = get_sp_adj_mat(original_graph_path, self.full_node_list, sep=sep)
         adj_list.append(adj)
         degrees = adj.sum(axis=1).astype(np.int)
         max_degree = max(max_degree, degrees.max())
         degree_list.append(degrees)
     # generate degree_based features
     input_dim = 0
     for i, degrees in enumerate(degree_list):
         # other structural feature initialization techniques can also be tried to improve performance
         if init_type == 'gaussian':
             fea_list = []
             for degree in degrees:
                 fea_list.append(np.random.normal(degree, std, max_degree + 1))
             fea_arr = np.array(fea_list).astype(np.float32)
             input_dim = fea_arr.shape[1]
             fea_tensor = torch.from_numpy(fea_arr).float()
             x_list.append(fea_tensor.cuda() if self.has_cuda else fea_tensor)
         elif init_type == 'adj':
             input_dim = self.node_num
             feat_tensor = sparse_mx_to_torch_sparse_tensor(adj_list[i])
             x_list.append(feat_tensor.cuda() if self.has_cuda else feat_tensor)
         elif init_type == 'combine':
             fea_list = []
             for degree in degrees:
                 fea_list.append(np.random.normal(degree, std, max_degree + 1))
             sp_feat = sp.coo_matrix(np.array(fea_list))
             sp_feat = sp.hstack((sp_feat, adj_list[i])).astype(np.float32)
             input_dim = sp_feat.shape[1]
             feat_tensor = sparse_mx_to_torch_sparse_tensor(sp_feat)
             x_list.append(feat_tensor.cuda() if self.has_cuda else feat_tensor)
         else:  # one-hot degree feature
             data = np.ones(degrees.shape[0], dtype=np.int)
             row = np.arange(degrees.shape[0])
             col = degrees.flatten().A[0]
             spmat = sp.csr_matrix((data, (row, col)), shape=(degrees.shape[0], max_degree + 1))
             sptensor = sparse_mx_to_torch_sparse_tensor(spmat)
             x_list.append(sptensor.cuda() if self.has_cuda else sptensor)
             # print('max degree: ', max_degree + 1)
             input_dim = max_degree + 1
     return x_list, input_dim
예제 #2
0
 def _preprocess_adj(self, normalization, adj, cuda):
     adj_normalizer = fetch_normalization(normalization)
     r_adj = adj_normalizer(adj)
     r_adj = sparse_mx_to_torch_sparse_tensor(r_adj).float()
     if cuda:
         r_adj = r_adj.cuda()
     return r_adj
예제 #3
0
    def build_cluster_adj(self, clean=False):
        """
        build a adjacency matrix which only record what kind of fake labels each node link to
        """
        adj = np.zeros((self.n_nodes, self.n_clusters), dtype=np.float64)

        for dst, src in self.edges.tolist():
            adj[src, self.fake_labels[dst]] += 1
            adj[dst, self.fake_labels[src]] += 1

        if self.mode in ('clusteradj') and not clean:
            adj += get_noise(self.args.noise_type,
                             self.n_nodes,
                             self.n_clusters,
                             self.args.noise_seed,
                             eps=self.args.epsilon,
                             delta=self.args.delta)

            adj = np.clip(adj, a_min=0, a_max=None)
            adj = normalize(adj)
            return torch.FloatTensor(adj)

        adj = sp.coo_matrix(adj)
        adj = normalize(adj)
        return sparse_mx_to_torch_sparse_tensor(adj)
예제 #4
0
def evaluate_test(model, g, inputs, labels, test_mask, lp_dict, coeffs, meta):
    model.eval()
    with torch.no_grad():
        pred = model(g, inputs).squeeze()

    output = pred.cuda()
    labels = labels.cuda()
    idx_test = lp_dict['idx_test']
    idx_train = lp_dict['idx_train']
    adj = sparse_mx_to_torch_sparse_tensor(normalize(lp_dict['sp_adj']))
    #print(adj.to_dense()[np.arange(100), np.arange(100)+1])

    labels, output, adj = labels.cpu(), output.cpu(), adj.cpu()
    loss = F.mse_loss(output[idx_test].squeeze(), labels[idx_test].squeeze())
    r2_test = compute_r2(output[idx_test], labels[idx_test])
    lp_output = lp_refine(idx_test, idx_train, labels, output, adj,
                          torch.tanh(coeffs[0]).item(),
                          torch.exp(coeffs[1]).item())
    lp_r2_test = compute_r2(lp_output, labels[idx_test])
    lp_output_raw_cov = lp_refine(idx_test, idx_train, labels, output, adj)
    lp_r2_test_raw_cov = compute_r2(lp_output_raw_cov, labels[idx_test])

    print("------------")
    print("election year {}".format(meta))
    print("loss:", loss.item())
    print("raw_r2:", r2_test)
    print("refined_r2:", lp_r2_test)
    print("refined_r2_raw_cov:", lp_r2_test_raw_cov)
    print("------------")
예제 #5
0
def evaluate_test(model, g, inputs, labels, test_mask, batch_size, device, lp_dict, meta):
    model.eval()
    with th.no_grad():
        pred = model.inference(g, inputs, batch_size, device).view(-1)

    output = pred.to(device)
    labels = labels.to(device)
    idx_test = lp_dict['idx_test']
    idx_train = lp_dict['idx_train']
    adj = sparse_mx_to_torch_sparse_tensor(normalize(lp_dict['sp_adj']))

    labels, output, adj = labels.cpu(), output.cpu(), adj.cpu()
    loss = F.mse_loss(output[idx_test].squeeze(), labels[idx_test].squeeze())
    r2_test = compute_r2(output[test_mask], labels[test_mask])
    lp_output = lp_refine(idx_test, idx_train, labels, output, adj)
    lp_r2_test = compute_r2(lp_output, labels[idx_test])

    print("------------")
    print("election year {}".format(meta))
    print("loss:", loss.item())
    print("raw_r2:", r2_test)
    print("refined_r2:", lp_r2_test)
    print("------------")

    model.train()
예제 #6
0
    def get_core_adj_list(self, core_base_path, start_idx, duration, max_core=-1):
        date_dir_list = sorted(os.listdir(core_base_path))
        time_stamp_num = len(date_dir_list)
        assert start_idx < time_stamp_num
        core_adj_list = []
        for i in range(start_idx, min(start_idx + duration, self.max_time_num)):
            date_dir_path = os.path.join(core_base_path, date_dir_list[i])
            f_list = sorted(os.listdir(date_dir_path))
            core_file_num = len(f_list)
            tmp_adj_list = []
            if max_core == -1:
                max_core = core_file_num
            f_list = f_list[:max_core]  # select 1 core to max core
            f_list = f_list[::-1]  # reverse order, max core, (max - 1) core, ..., 1 core

            # get k-core adjacent matrices at the i-th timestamp
            spmat_list = []
            for j, f_name in enumerate(f_list):
                spmat = sp.load_npz(os.path.join(date_dir_path, f_name))
                spmat_list.append(spmat)
                if j == 0:
                    spmat = spmat + sp.eye(spmat.shape[0])
                else:
                    delta = spmat - spmat_list[j - 1]    # reduce subsequent computation complexity and reduce memory cost!
                    if delta.sum() == 0:  # reduce computation complexity and memory cost!
                        continue
                # Normalization will reduce the self weight, hence affect its performance! So we omit normalization.
                sptensor = sparse_mx_to_torch_sparse_tensor(spmat)
                tmp_adj_list.append(sptensor.cuda() if self.has_cuda else sptensor)
            # print('time: ', i, 'core len: ', len(tmp_adj_list))
            core_adj_list.append(tmp_adj_list)
        return core_adj_list
예제 #7
0
def test(model, dataset, cfg, logger):
    if cfg.load_from:
        logger.info('load from {}'.format(cfg.load_from))
        load_checkpoint(model, cfg.load_from, strict=True, logger=logger)

    features = torch.FloatTensor(dataset.features)
    adj = sparse_mx_to_torch_sparse_tensor(dataset.adj)
    if not dataset.ignore_label:
        labels = torch.FloatTensor(dataset.labels)

    if cfg.cuda:
        model.cuda()
        features = features.cuda()
        adj = adj.cuda()
        labels = labels.cuda()

    model.eval()
    output, gcn_feat = model((features, adj), output_feat=True)
    if not dataset.ignore_label:
        loss = F.mse_loss(output, labels)
        loss_test = float(loss)
        logger.info('[Test] loss = {:.4f}'.format(loss_test))

    pred_confs = output.detach().cpu().numpy()
    gcn_feat = gcn_feat.detach().cpu().numpy()
    return pred_confs, gcn_feat
예제 #8
0
 def get_feature_list(self, feature_base_path, start_idx, duration, sep='\t', shuffle=False):
     if feature_base_path is None:
         x_list = []
         for i in range(start_idx, min(start_idx + duration, self.max_time_num)):
             if shuffle:
                 node_indices = np.random.permutation(np.arange(self.node_num)) if shuffle else np.arange(self.node_num)
                 spmat = sp.coo_matrix((np.ones(self.node_num), (np.arange(self.node_num), node_indices)), shape=(self.node_num, self.node_num))
             else:
                 spmat = sp.eye(self.node_num)
             sptensor = sparse_mx_to_torch_sparse_tensor(spmat)
             x_list.append(sptensor.cuda() if self.has_cuda else sptensor)
         input_dim = self.node_num
     else:
         feature_file_list = sorted(os.listdir(feature_base_path))
         x_list = []
         feature_arr_list = []
         max_feature_dim = 0
         # calculate max feature dimension
         for i in range(start_idx, min(start_idx + duration, self.max_time_num)):
             feature_file_path = os.path.join(feature_base_path, feature_file_list[i])
             df_feature = pd.read_csv(feature_file_path, sep=sep, header=0)
             max_feature_dim = max(max_feature_dim, df_feature.shape[1])
             feature_arr = df_feature.values
             feature_arr_list.append(feature_arr)
         # expand feature matrix into the same dimension
         for feature_arr in feature_arr_list:
             batch_dim, feature_dim = feature_arr.shape
             expand_feature_arr = np.hstack((feature_arr, np.zeros((batch_dim, max_feature_dim - feature_dim)))).astype(np.float32)
             fea_tensor = torch.from_numpy(expand_feature_arr).float()
             x_list.append(fea_tensor.cuda() if self.has_cuda else fea_tensor)
         input_dim = max_feature_dim
     return x_list, input_dim
예제 #9
0
def _single_train(model, dataset, cfg, logger):
    if cfg.gpus > 1:
        raise NotImplemented

    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)
    runner = Runner(model, batch_processor, optimizer, cfg.work_dir,
                    logger)
    runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)

    features = torch.FloatTensor(dataset.features)
    adj = sparse_mx_to_torch_sparse_tensor(dataset.adj)
    labels = torch.FloatTensor(dataset.labels)

    if cfg.cuda:
        model.cuda()
        features = features.cuda()
        adj = adj.cuda()
        labels = labels.cuda()

    train_data = [[features, adj, labels]]
    runner.run(train_data, cfg.workflow, cfg.total_epochs)
예제 #10
0
def evaluate_test(model, features, labels, test_mask, lp_dict, coeffs, meta="2012"):
    model.eval()
    with torch.no_grad():
        output = model(features).squeeze()

    output = output.cuda()
    labels = labels.cuda()
    idx_test = lp_dict['idx_test']
    idx_train = lp_dict['idx_train']
    adj = sparse_mx_to_torch_sparse_tensor(normalize(lp_dict['sp_adj']))

    labels, output, adj = labels.cpu(), output.cpu(), adj.cpu()
    loss = F.mse_loss(output[idx_test].squeeze(), labels[idx_test].squeeze())
    r2_test = compute_r2(output[idx_test], labels[idx_test])
    lp_output = lp_refine(idx_test, idx_train, labels, output, adj, torch.tanh(coeffs[0]).item(), torch.exp(coeffs[1]).item())
    lp_r2_test = compute_r2(lp_output, labels[idx_test])
    lp_output_raw_conv = lp_refine(idx_test, idx_train, labels, output, adj)
    lp_r2_test_raw_conv = compute_r2(lp_output_raw_conv, labels[idx_test])

    print("------------")
    print("election year {}".format(meta))
    print("loss:", loss.item())
    print("raw_r2:", r2_test)
    print("refined_r2:", lp_r2_test)
    print("refined_r2_raw_conv:", lp_r2_test_raw_conv)
    print("------------")
 def prepare_for_pytorch(self):
     self.edge_index_lists = [0] * len(self.graphs)
     for i in range(len(self.graphs)):
         self.edge_index_lists[i] = adj2edgeindex(self.graphs[i])
     for i in self.layer2pooling_matrices:
         self.layer2pooling_matrices[i] = [
             sparse_mx_to_torch_sparse_tensor(spmat).t()
             for spmat in self.layer2pooling_matrices[i]
         ]
예제 #12
0
def run(args, seed):

    setup_seed(seed)
    adj, features, labels, idx_train, idx_val, idx_test = load_data(args['dataset'])

    node_num = features.size()[0]
    class_num = labels.numpy().max() + 1

    adj = adj.cuda()
    features = features.cuda()
    labels = labels.cuda()

    loss_func = nn.CrossEntropyLoss()
    loss_func_ss = nn.L1Loss()
    early_stopping = 10

    adj_raw = load_adj_raw(args['dataset']).tocsr()
    idx_mask = list(range(node_num))
    adj_mask = adj_raw
    adj_mask[idx_mask, idx_mask] = 0
    adj_mask = sparse_mx_to_torch_sparse_tensor(normalize_adj(adj_mask)).cuda()

    reduced_dim = args['reduced_dimension']
    ss_labels, _, _ = features.svd()
    ss_labels = ss_labels[:, :reduced_dim].cuda()

    net_gcn = net.net_gcn_multitask(embedding_dim=args['embedding_dim'], ss_dim=args['reduced_dimension'])
    net_gcn = net_gcn.cuda()
    optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
    best_val = 0
    best_val_test = 0
    for epoch in range(500):

        optimizer.zero_grad()
        output, _ = net_gcn(features, adj)
        _, output_ss = net_gcn(features, adj_mask)
        loss_target = loss_func(output[idx_train], labels[idx_train])
        loss_ss = loss_func_ss(output_ss, ss_labels) * 1e2
        loss = loss_target + loss_ss * args['loss_weight']
        # print('epoch', epoch, 'loss', loss_target.data)
        loss.backward()
        optimizer.step()

        # validation
        with torch.no_grad():
            output, _ = net_gcn(features, adj, val_test=True)
            # loss_val.append(loss_func(output[idx_val], labels[idx_val]).cpu().numpy())
            # print('val acc', f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro'))

            acc_val = f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro')
            acc_test = f1_score(labels[idx_test].cpu().numpy(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro')
            if acc_val > best_val:
                best_val = acc_val
                best_val_test = acc_test

    return best_val, best_val_test
예제 #13
0
 def forward(self, input, adj):
     # adj is extracted from the graph structure
     support = torch.mm(input, self.weight)
     I_n = sp.eye(adj.shape[0])
     I_n = sparse_mx_to_torch_sparse_tensor(I_n).cuda()
     output = torch.spmm((I_n + self.smooth * adj) / (1 + self.smooth),
                         support)
     if self.bias is not None:
         return output + self.bias
     else:
         return output
예제 #14
0
    def build_adj_mat(self, mode='vanilla-clean'):
        if mode == 'vanilla-clean':
            adj = self.build_adj_original()

        elif mode == 'vanilla':
            adj = self.build_adj_vanilla()

        else:
            raise NotImplementedError('mode = {} not implemented!'.format(mode))

        adj = normalize(adj + sp.eye(adj.shape[0]))
        adj = sparse_mx_to_torch_sparse_tensor(adj) if mode == 'vanilla-clean' else torch.FloatTensor(adj)
        return adj
예제 #15
0
 def __init__(self, adj_mat, train_nodes, valid_nodes, test_nodes, device):
     self.adj_mat = adj_mat
     self.train_nodes = train_nodes
     self.valid_nodes = valid_nodes
     self.test_nodes = test_nodes
     self.device = device
     self.num_nodes = adj_mat.shape[0]
     self.num_train_nodes = len(self.train_nodes)
     self.lap_matrix = self.sym_normalize(adj_mat)
     self.lap_tensor = sparse_mx_to_torch_sparse_tensor(self.lap_matrix)
     self.lap_tensor = torch.sparse.FloatTensor(
         self.lap_tensor[0], self.lap_tensor[1],
         self.lap_tensor[2]).to(device)
예제 #16
0
파일: DNDC.py 프로젝트: jma712/DNDC
def load_data(path, name='BlogCatalog', exp_id='0'):
    data = sio.loadmat(path + name + exp_id + '.mat')

    C_list = data['T']
    Y1_true_list = data['Y1']
    Y0_true_list = data['Y0']
    idx_trn = data['trn_idx'][0]
    idx_val = data['val_idx'][0]
    idx_tst = data['tst_idx'][0]

    # load
    X = data['X'][0]

    Z_init = torch.zeros(X[0].shape[0], args.h_dim)

    X_list = []
    for t in range(len(X)):
        n_x = len(X)
        xt = X[t]
        X_list.append(torch.tensor(X[t].todense(), dtype=torch.float32))

    # A
    sparse_A_list = []
    dense_A_list = []

    A = data['A'][0]
    for t in range(len(A)):
        dense_A_list.append(torch.tensor(A[t].todense()))
        A[t] = sp.csr_matrix(A[t])
        A[t] = utils.sparse_mx_to_torch_sparse_tensor(A[t])
        sparse_A_list.append(A[t])

    C_list = [torch.FloatTensor(C) for C in C_list]
    Y1_true_list = [torch.FloatTensor(y1) for y1 in Y1_true_list]
    Y0_true_list = [torch.FloatTensor(y0) for y0 in Y0_true_list]
    idx_trn = torch.LongTensor(idx_trn)
    idx_val = torch.LongTensor(idx_val)
    idx_tst = torch.LongTensor(idx_tst)

    idx_trn_list = []
    idx_val_list = []
    idx_tst_list = []
    for t in range(len(A)):
        idx_trn_list.append(idx_trn)
        idx_val_list.append(idx_val)
        idx_tst_list.append(idx_tst)

    Z_init = torch.FloatTensor(Z_init)

    return X_list, sparse_A_list, dense_A_list, C_list, Y1_true_list, Y0_true_list, idx_trn_list, idx_val_list, idx_tst_list, Z_init
예제 #17
0
def prepare(i_exp):

    # Load data and init models
    X, A, T, Y1, Y0 = utils.load_data(args.path,
                                      name=args.dataset,
                                      original_X=False,
                                      exp_id=str(i_exp),
                                      extra_str=args.extrastr)

    n = X.shape[0]
    n_train = int(n * args.tr)
    n_test = int(n * 0.2)
    # n_valid = n_test

    idx = np.random.permutation(n)
    idx_train, idx_test, idx_val = idx[:n_train], idx[n_train:n_train +
                                                      n_test], idx[n_train +
                                                                   n_test:]

    X = utils.normalize(X)  #row-normalize
    # A = utils.normalize(A+sp.eye(n))

    X = X.todense()
    X = Tensor(X)

    Y1 = Tensor(np.squeeze(Y1))
    Y0 = Tensor(np.squeeze(Y0))
    T = LongTensor(np.squeeze(T))

    A = utils.sparse_mx_to_torch_sparse_tensor(A, cuda=args.cuda)

    # print(X.shape, Y1.shape, A.shape)

    idx_train = LongTensor(idx_train)
    idx_val = LongTensor(idx_val)
    idx_test = LongTensor(idx_test)

    # Model and optimizer
    model = GCN_DECONF(nfeat=X.shape[1],
                       nhid=args.hidden,
                       dropout=args.dropout,
                       n_out=args.nout,
                       n_in=args.nin,
                       cuda=args.cuda)

    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)

    return X, A, T, Y1, Y0, idx_train, idx_val, idx_test, model, optimizer
예제 #18
0
 def get_mini_batch_dropedge(self, percent=0.8):
     nnz = self.adj_mat.nnz
     perm = np.random.permutation(nnz)
     preserve_nnz = int(nnz * percent)
     perm = np.sort(perm[:preserve_nnz])
     # print(preserve_nnz, perm)
     adj_mat = self.adj_mat.tocoo()
     adj_mat = sp.coo_matrix(
         (adj_mat.data[perm], (adj_mat.row[perm], adj_mat.col[perm])),
         shape=adj_mat.shape)
     lap_matrix = self.sym_normalize(adj_mat)
     lap_tensor = sparse_mx_to_torch_sparse_tensor(lap_matrix)
     lap_tensor = torch.sparse.FloatTensor(lap_tensor[0], lap_tensor[1],
                                           lap_tensor[2]).to(self.device)
     return [self.train_nodes], lap_tensor
예제 #19
0
def test(model, test_adj, test_feats, test_labels, batch_size, epoch):
    t = time.time()
    # change data type to tensor
    test_adj = [
        sparse_mx_to_torch_sparse_tensor(cur_adj) for cur_adj in test_adj
    ]
    test_feats = [torch.FloatTensor(cur_feats) for cur_feats in test_feats]
    test_labels = torch.LongTensor(test_labels).max(1)[1]

    model.eval()
    outputs = model(test_feats, test_adj)
    loss_test = F.nll_loss(outputs, test_labels)
    acc_test = accuracy(outputs, test_labels)

    return loss_test.item(), acc_test.item(), time.time() - t
예제 #20
0
파일: worker.py 프로젝트: AI-secure/DP-GCN
    def sgc_precompute(self, adj, features, mode='sgc-clean'):
        # # if mode == 'sgc-clean':
        # adj = self.build_adj_original()
        # # else:
        # #     adj = self.build_adj_vanilla()

        normalizer = fetch_normalization(self.args.norm)
        adj = sparse_mx_to_torch_sparse_tensor(normalizer(adj)).float().cuda()

        # adj_normalizer = fetch_normalization(self.args.normalization)
        # adj = adj_normalizer(adj)
        # adj = sparse_mx_to_torch_sparse_tensor(adj).float().cuda()

        # for _ in range(self.args.degree):
        features = torch.spmm(adj, features)

        return features
예제 #21
0
def get_label_weights(opt, test_predictions, test_targets):
    adj = pickle.load(
        open(
            '/bigtemp/jjl5sw/ChromeGCN/data/' + args.cell_type + '/hic/' +
            'test' + '_graphs_min1000_samples' + args.hicsize + '_' +
            args.hicnorm + 'norm.pkl', "rb"))

    # if not os.path.exists('/bigtemp/jjl5sw/ChromeGCN/data/'+args.cell_type+'/hic/test.pt'):
    # if True:
    #     data_dict = torch.load('/bigtemp/jjl5sw/ChromeGCN/data/'+args.cell_type+'/train_valid_test.pt')
    #     torch.save(data_dict['test'],'/bigtemp/jjl5sw/ChromeGCN/data/'+args.cell_type+'/test.pt')
    #     torch.save(data_dict['dict'],'/bigtemp/jjl5sw/ChromeGCN/data/'+args.cell_type+'/src_tgt_dict.pt')
    # else:
    #     test_data = torch.load('/bigtemp/jjl5sw/ChromeGCN/data/'+args.cell_type+'/test.pt')
    #     data_dict = torch.load('/bigtemp/jjl5sw/ChromeGCN/data/'+args.cell_type+'/src_tgt_dict.pt')
    test_data = torch.load('/bigtemp/jjl5sw/ChromeGCN/data/' + args.cell_type +
                           '/test.pt')

    chrom_index_dict = {}
    for idx, sample in enumerate(test_data['loc']):
        chrom = sample[0]
        if not chrom in chrom_index_dict:
            chrom_index_dict[chrom] = []
        chrom_index_dict[chrom].append(idx)

    test_labels = torch.Tensor(test_data['tgt'])
    label_neighbor_count = torch.zeros(len(test_data['tgt'][0]))
    label_count = torch.zeros(len(test_data['tgt'][0]))

    for chrom in chrom_index_dict:
        chrom_indices = torch.Tensor(chrom_index_dict[chrom]).long()
        chrom_labels = torch.index_select(test_labels, 0, chrom_indices)

        chrom_adj = utils.sparse_mx_to_torch_sparse_tensor(adj[chrom].tocoo())
        chrom_adj_d = chrom_adj.to_dense()
        chrom_adj_d[chrom_adj_d > 1] = 1

        for idx, sample_labels in enumerate(chrom_labels):
            sample_labels_nz = sample_labels.nonzero()
            sample_neighbors = chrom_adj_d[idx].sum()
            label_neighbor_count[sample_labels_nz] += sample_neighbors
            label_count[sample_labels_nz] += 1

    normalized_label_weights = label_neighbor_count.div(label_count)

    return normalized_label_weights
예제 #22
0
def perform_val(model, HEAD1, HEAD_test1, cfg, feature_dim, pair_a, pair_b):

    test_lb2idxs, test_idx2lb = read_meta(cfg.test_data['label_path'])
    test_inst_num = len(test_idx2lb)

    model.eval()
    HEAD1.eval()
    HEAD_test1.eval()

    for k, v in cfg.model['kwargs'].items():
        setattr(cfg.test_data, k, v)
    dataset = build_dataset(cfg.model['type'], cfg.test_data)

    features = torch.FloatTensor(dataset.features)
    adj = sparse_mx_to_torch_sparse_tensor(dataset.adj)
    labels = torch.LongTensor(dataset.gt_labels)

    if cfg.cuda:
        features = features.cuda()
        adj = adj.cuda()
        labels = labels.cuda()
        HEAD_test1 = HEAD_test1.cuda()

    test_data = [features, adj, labels]

    HEAD_test1.load_state_dict(HEAD1.state_dict(), False)

    with torch.no_grad():
        output_feature = model(test_data)
        sum_acc = 0
        patch_num = 10
        patch_size = int(test_inst_num / patch_num)
        for i in range(patch_num):
            score = HEAD_test1(output_feature[pair_a[i * patch_size:(i + 1) * patch_size]],
                               output_feature[pair_b[i * patch_size:(i + 1) * patch_size]], no_list=True)
            #print(score)
            pre_labels = (score > 0.5).long()
            #print(pre_labels)
            gt_labels = (labels[pair_a[i * patch_size:(i + 1) * patch_size]] == labels[pair_b[i * patch_size:(i + 1) * patch_size]]).long()

            acc = (pre_labels == gt_labels).long().sum()
            sum_acc += acc
        avg_acc = float(sum_acc) / test_inst_num
        return avg_acc
def full_citation(dataset_str="cora"):
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i, name in enumerate(names):
        with open("dataset/ind.{}.{}".format(dataset_str, name), 'rb') as f:
            objects.append(pkl.load(f, encoding="latin1"))    

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("dataset/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == "citeseer":
        # For this dataset, there are some isolated nodes in the graph
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder))
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range-min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
    labels = np.vstack((ally, ty))

    labels[test_idx_reorder, :] = labels[test_idx_range, :]
    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y) + 500)

    features = normalize(features)
    # porting to pytorch
    features = torch.FloatTensor(np.array(features.todense())).float()
    labels = torch.LongTensor(labels)

    idx_train = torch.LongTensor(idx_train)
    idx_val = torch.LongTensor(idx_val)
    idx_test = torch.LongTensor(idx_test)

    adj = sys_normalized_adjacency(adj)
    adj = sparse_mx_to_torch_sparse_tensor(adj)
    return adj, features, labels, idx_train, idx_val, idx_test
예제 #24
0
def load_ABIDE(graph_type):

    atlas = "ho"
    connectivity = "correlation"

    # Get class labels
    subject_IDs = get_ids()
    labels = get_subject_score(subject_IDs, score="DX_GROUP")
    labels = np.array(list(map(int, list(labels.values())))) - 1
    num_nodes = len(subject_IDs)

    # Compute feature vectors (vectorised connectivity networks)
    features = get_networks(subject_IDs, kind=connectivity, atlas_name=atlas)

    # Compute population graph using phenotypic features
    if graph_type == "original":
        final_graph = create_weighted_adjacency()
    if graph_type == "graph_no_features":
        final_graph = create_weighted_adjacency()
        features = np.identity(num_nodes)
    if graph_type == "graph_random":
        ones = get_num_edges() / (len(labels) * len(labels))
        final_graph = np.random.choice([0, 1],
                                       size=(len(labels), len(labels)),
                                       p=[1 - ones, ones])
        final_graph = (final_graph + final_graph.T) / 2
    if graph_type == "graph_identity":
        final_graph = np.zeros((num_nodes, num_nodes))

    final_graph = normalize(final_graph)

    adj = sp.coo_matrix(final_graph)
    adj = adj + sp.eye(adj.shape[0])
    adj = normalize(adj)

    features = sp.csr_matrix(features)
    features = normalize(features)

    # Convert to tensors
    adj = sparse_mx_to_torch_sparse_tensor(adj)
    features = torch.FloatTensor(np.array(features.todense())).float()
    labels = torch.LongTensor(labels)

    return adj, features, labels
예제 #25
0
def load_trained_vector(epoch, number, n2i_f, file_homes):
    global node2index
    node2index = cPickle.load(n2i_f)
    node_count = len(node2index)
    node_dim = 128
    n_repr = 128
    gcn = GCN(node_count, node_dim, n_repr)
    gcn.load_state_dict(
        torch.load(file_homes + '/networks/GCN_%d_%d.pth' % (number, epoch),
                   map_location='cpu'))
    f = open(files_home + '/networks/adj_matrix_%d_full' % (number), 'rb')
    full_adj_matrix = cPickle.load(f)
    full_adj_matrix = sparse_mx_to_torch_sparse_tensor(full_adj_matrix)
    init_input = torch.LongTensor([j for j in range(0, node_count)])
    gcn.eval()

    rp_matrix = gcn(init_input, full_adj_matrix)
    #gcn.to(device)
    return rp_matrix.double()
예제 #26
0
 def get_date_adj_list(self, origin_base_path, start_idx, duration, sep='\t', normalize=False, row_norm=False, add_eye=False, data_type='tensor'):
     assert data_type in ['tensor', 'matrix']
     date_dir_list = sorted(os.listdir(origin_base_path))
     # print('adj list: ', date_dir_list)
     date_adj_list = []
     for i in range(start_idx, min(start_idx + duration, self.max_time_num)):
         original_graph_path = os.path.join(origin_base_path, date_dir_list[i])
         spmat = get_sp_adj_mat(original_graph_path, self.full_node_list, sep=sep)
         # spmat = sp.coo_matrix((np.exp(alpha * spmat.data), (spmat.row, spmat.col)), shape=(self.node_num, self.node_num))
         if add_eye:
             spmat = spmat + sp.eye(spmat.shape[0])
         if normalize:
             spmat = get_normalized_adj(spmat, row_norm=row_norm)
         # data type
         if data_type == 'tensor':
             sptensor = sparse_mx_to_torch_sparse_tensor(spmat)
             date_adj_list.append(sptensor.cuda() if self.has_cuda else sptensor)
         else:  # data_type == matrix
             date_adj_list.append(spmat)
     # print(len(date_adj_list))
     return date_adj_list
예제 #27
0
파일: worker.py 프로젝트: AI-secure/DP-GCN
    def build_adj_mat(self, edges, mode='vanilla-clean'):
        if mode in ( 'vanilla-clean', 'degcn-clean' ):
            adj = self.build_adj_original(edges)

        elif mode in ( 'vanilla', 'degcn' ):
            adj = self.build_adj_vanilla()
            if mode == 'degcn':
                # temp = np.zeros((self.n_nodes, self.n_nodes))
                # temp[adj > 0.5] = 1
                # adj = temp

                # print(len(self.edges))
                self.edges = []
                for u, v in zip(*np.where(adj)):
                    if u > v: continue
                    self.edges.append((u, v))

                print(len(self.edges))

        adj = normalize(adj + sp.eye(adj.shape[0]))
        adj = sparse_mx_to_torch_sparse_tensor(adj) if mode in ( 'vanilla-clean', 'degcn-clean' ) else torch.FloatTensor(adj)
        return adj
예제 #28
0
def load_reg_data(args):
    path = './data/county/election/2012'
    adj = np.load(path + "/A.npy")
    labels = np.load(path + "/labels.npy")
    features = np.load(path + "/feats.npy")
    idx_train = np.load(path + "/train_idx.npy") - 1
    idx_val = np.load(path + "/val_idx.npy") - 1
    idx_test = np.load(path + "/test_idx.npy") - 1
    n = len(adj)
    train_mask = np.zeros(n).astype(bool)
    train_mask[idx_train] = True
    val_mask = np.zeros(n).astype(bool)
    val_mask[idx_val] = True
    test_mask = np.zeros(n).astype(bool)
    test_mask[idx_test] = True
    n_classes = 1
    sp_adj = sp.coo_matrix(adj)
    g = dgl.graph((torch.LongTensor(sp_adj.row), torch.LongTensor(sp_adj.col)))
    lp_dict = {
        'idx_test': torch.LongTensor(idx_test),
        'idx_train': torch.LongTensor(idx_train),
        'sp_adj': sp_adj.astype(float),
        'adj':
        sparse_mx_to_torch_sparse_tensor(normalize(sp_adj.astype(float)))
    }

    features = torch.FloatTensor(features)
    labels = torch.FloatTensor(labels)
    train_mask = torch.BoolTensor(train_mask)
    val_mask = torch.BoolTensor(val_mask)
    test_mask = torch.BoolTensor(test_mask)

    path = './data/county/election/2016'
    ind_features = torch.FloatTensor(np.load(path + "/feats.npy"))
    ind_labels = torch.FloatTensor(np.load(path + "/labels.npy"))

    return g, features, labels, n_classes, train_mask, val_mask, test_mask, lp_dict, ind_features, ind_labels
예제 #29
0
파일: worker.py 프로젝트: AI-secure/DP-GCN
    def prepare_data(self):
        if self.mode in ( 'sgc-clean', 'sgc' ):
            if self.dataset in ( 'reddit', 'flickr', 'ppi', 'ppi-large', 'cora', 'citeseer', 'pubmed' ):
                self.features_train = self.sgc_precompute(self.adj_train, self.features_train, mode=self.mode)
                self.features = self.sgc_precompute(self.adj_full, self.features, mode=self.mode)
                self.adj = self.adj_train = None

            elif self.transfer:
                self.features_1 = self.sgc_precompute(self.adj_1, self.features_1, mode=self.mode)
                self.features_2 = self.sgc_precompute(self.adj_2, self.features_2, mode=self.mode)
                self.adj_1 = self.adj_2 = None

            else:
                raise NotImplementedError(f'dataset = {self.dataset} not implemented!')

            print('SGC Precomputing done!')

        elif self.mode in ( 'clusteradj', 'clusteradj-clean' ):
            self.generate_fake_labels()
            if self.args.break_down:
                self.break_down()

            self.prj = self.build_cluster_prj()
            self.adj = self.build_cluster_adj(fnormalize=self.args.fnormalize)

        elif self.mode in ( 'vanilla', 'vanilla-clean', 'cs' ):
            if self.dataset in ( 'reddit', 'flickr', 'ppi', 'ppi-large', 'cora', 'citeseer', 'pubmed' ) \
                or self.dataset.startswith('twitch-train'):
                if self.mode == 'vanilla':
                    self.adj_full = self.perturb_adj(self.adj_full, self.args.perturb_type)
                    self.adj_train = self.perturb_adj(self.adj_train, self.args.perturb_type)
                    print('perturbing done!')

                # normalize adjacency matrix
                if self.dataset not in ( 'cora', 'citeseer', 'pubmed' ):
                    normalizer = fetch_normalization(self.args.norm)
                    self.adj_train = normalizer(self.adj_train)
                    self.adj_full = normalizer(self.adj_full)

                self.adj_train = sparse_mx_to_torch_sparse_tensor(self.adj_train)
                self.adj_full = sparse_mx_to_torch_sparse_tensor(self.adj_full)

            elif self.transfer:
                if self.mode == 'vanilla':
                    self.adj_1 = self.perturb_adj(self.adj_1, self.args.perturb_type)
                    self.adj_2 = self.perturb_adj(self.adj_2, self.args.perturb_type)
                    print('perturbing done!')

                elif self.mode == 'cs':
                    self.adj_1 = compressive_sensing(self.args, self.adj_1)
                    self.adj_2 = compressive_sensing(self.args, self.adj_2)
                    print('compressive sensing done!')

                # normalize adjacency matrix
                normalizer = fetch_normalization(self.args.norm)
                self.adj_1 = sparse_mx_to_torch_sparse_tensor(normalizer(self.adj_1))
                self.adj_2 = sparse_mx_to_torch_sparse_tensor(normalizer(self.adj_2))

            else:
                # self.adj = self.build_adj_mat(self.edges, mode=self.mode)
                raise NotImplementedError(f'dataset = {self.dataset} not implemented!')

            print('Normalizing Adj done!')

        elif self.mode in ( 'degree_mlp', 'basic_mlp' ):
            self.adj = None

        elif self.mode in ( 'degcn', 'degcn-clean' ):
            self.adj = self.build_adj_mat(self.edges, mode=self.mode)
            self.decompose_graph()

        else:
            raise NotImplementedError('mode = {} not implemented!'.format(self.mode))

        # self.calculate_connectivity()

        if torch.cuda.is_available():
            if hasattr(self, 'adj') and self.adj is not None:
                self.adj = self.adj.cuda()
            if hasattr(self, 'adj_train') and self.adj_train is not None:
                self.adj_train = self.adj_train.cuda()
                self.adj_full = self.adj_full.cuda()
            if hasattr(self, 'adj_1') and self.adj_1 is not None:
                self.adj_1 = self.adj_1.cuda()
                self.adj_2 = self.adj_2.cuda()
            if hasattr(self, 'prj'):
                self.prj = self.prj.cuda()
            if hasattr(self, 'sub_adj'):
                for i in range(len(self.sub_adj)):
                    self.sub_adj[i] = self.sub_adj[i].cuda()
def full_load_data(dataset_name, splits_file_path=None):
    if dataset_name in {'cora', 'citeseer', 'pubmed'}:
        adj, features, labels, _, _, _ = full_load_citation(dataset_name)
        labels = np.argmax(labels, axis=-1)
        features = features.todense()
        G = nx.DiGraph(adj)
    else:
        graph_adjacency_list_file_path = os.path.join('new_data', dataset_name, 'out1_graph_edges.txt')
        graph_node_features_and_labels_file_path = os.path.join('new_data', dataset_name,
                                                                'out1_node_feature_label.txt')

        G = nx.DiGraph()
        graph_node_features_dict = {}
        graph_labels_dict = {}


        with open(graph_node_features_and_labels_file_path) as graph_node_features_and_labels_file:
            graph_node_features_and_labels_file.readline()
            for line in graph_node_features_and_labels_file:
                line = line.rstrip().split('\t')
                assert (len(line) == 3)
                assert (int(line[0]) not in graph_node_features_dict and int(line[0]) not in graph_labels_dict)
                graph_node_features_dict[int(line[0])] = np.array(line[1].split(','), dtype=np.uint8)
                graph_labels_dict[int(line[0])] = int(line[2])

        with open(graph_adjacency_list_file_path) as graph_adjacency_list_file:
            graph_adjacency_list_file.readline()
            for line in graph_adjacency_list_file:
                line = line.rstrip().split('\t')
                assert (len(line) == 2)
                if int(line[0]) not in G:
                    G.add_node(int(line[0]), features=graph_node_features_dict[int(line[0])],
                               label=graph_labels_dict[int(line[0])])
                if int(line[1]) not in G:
                    G.add_node(int(line[1]), features=graph_node_features_dict[int(line[1])],
                               label=graph_labels_dict[int(line[1])])
                G.add_edge(int(line[0]), int(line[1]))

        adj = nx.adjacency_matrix(G, sorted(G.nodes()))
        features = np.array(
            [features for _, features in sorted(G.nodes(data='features'), key=lambda x: x[0])])
        labels = np.array(
            [label for _, label in sorted(G.nodes(data='label'), key=lambda x: x[0])])
    features = preprocess_features(features)

    g = adj
  
    with np.load(splits_file_path) as splits_file:
        train_mask = splits_file['train_mask']
        val_mask = splits_file['val_mask']
        test_mask = splits_file['test_mask']
    
    num_features = features.shape[1]
    num_labels = len(np.unique(labels))
    assert (np.array_equal(np.unique(labels), np.arange(len(np.unique(labels)))))

    features = th.FloatTensor(features)
    labels = th.LongTensor(labels)
    train_mask = th.BoolTensor(train_mask)
    val_mask = th.BoolTensor(val_mask)
    test_mask = th.BoolTensor(test_mask)

    g = sys_normalized_adjacency(g)
    g = sparse_mx_to_torch_sparse_tensor(g)

    return g, features, labels, train_mask, val_mask, test_mask, num_features, num_labels