예제 #1
0
    adj, feats = load_mat_data(dataset)
    if dataset == 'protein':
        negatives = feats < 0.0
        r, c, values = sp.find(negatives)
        feats[r, c] = 0.0
    else:
        feats = feats.toarray()
        feats = MinMaxScaler().fit_transform(feats)
        feats = sp.csr_matrix(feats)
    print('\nPreparing test split...\n')
    test_inds = split_train_test(dataset, adj, fold=0)
    train = adj.copy()
    if dataset != 'conflict':
        train.setdiag(1.0)
elif dataset in ['cora', 'citeseer', 'pubmed']:
    adj, feats, _, _, _, _, _, _ = load_citation_data(dataset)
    feats = MaxAbsScaler().fit_transform(feats).tolil()
    print('\nPreparing test split...\n')
    test_inds = split_citation_data(adj)
    test_inds = np.vstack({tuple(row) for row in test_inds})
    train = adj.copy()
    if dataset != 'pubmed':
        train.setdiag(1.0)
    else:
        train.setdiag(0.0)
else:
    raise Exception(
        'Supported strings: {protein, metabolic, conflict, cora, citeseer, pubmed}'
    )

test_r = test_inds[:, 0]
예제 #2
0
import numpy as np
import scipy.sparse as sp
from keras import backend as K
from sklearn.metrics import roc_auc_score as auc_score
from sklearn.metrics import average_precision_score as ap_score
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler, StandardScaler
from utils import generate_data, batch_data, compute_masked_accuracy
from utils_gcn import load_citation_data, split_citation_data
from ae_LPNC import autoencoder_multitask

# 以'citeseer'数据集为例,讨论多任务过程
dataset = 'citeseer'
print('\nLoading dataset {:s}...\n'.format(dataset))
adj, feats, y_train, y_val, y_test, mask_train, mask_val, mask_test = load_citation_data(
    dataset)
feats = MaxAbsScaler().fit_transform(feats).tolil()
train = adj.copy()

test_inds = split_citation_data(adj)
test_inds = np.vstack({tuple(row) for row in test_inds})
test_r = test_inds[:, 0]
test_c = test_inds[:, 1]
labels = []
labels.extend(np.squeeze(adj[test_r, test_c].toarray()))
labels.extend(np.squeeze(adj[test_c, test_r].toarray()))

multitask = True
if multitask:
    # If multitask, simultaneously perform link prediction and
    # semi-supervised node classification on incomplete graph with
    # 10% held-out positive links and same number of negative links.