示例#1
0
def import_dataset(name='CORA'):
    root = f'BENCHMARK/{name.upper()}/'
    if name.upper() == 'CORA':
        dataset = Planetoid(root=root, name='CORA')
    elif name.upper() == 'CORA-F':
        dataset = CitationFull(root=root, name='cora')
    elif name.upper() == 'CITESEER':
        dataset = Planetoid(root=root, name='citeseer')
    elif name.upper() == 'PUBMED':
        dataset = Planetoid(root=root, name='PubMed')
    elif name.upper() == 'COAUTHOR-P':
        dataset = Coauthor(root=root, name='Physics')
    elif name.upper() == 'COAUTHOR-C':
        dataset = Coauthor(root=root, name='CS')
    elif name.upper() == 'AMAZON-C':
        dataset = Amazon(root=root, name='Computers')
    elif name.upper() == 'AMAZON-P':
        dataset = Amazon(root=root, name='Photo')

    elif name.lower() == 'all':
        Planetoid(root=root, name='CORA')
        Planetoid(root=root, name='citeseer')
        CitationFull(root=root, name='cora')
        Planetoid(root=root, name='PubMed')
        Coauthor(root=root, name='Physics')
        Coauthor(root=root, name='CS')
        Amazon(root=root, name='Computers')
        Amazon(root=root, name='Photo')
        exit()
    return dataset
示例#2
0
def load_citation(dataset):
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'Datasets',
                    'NodeData', 'Citation')
    # transforms = T.Compose([T.NormalizeFeatures()])
    if dataset == 'PubMedFull':
        dataset = 'PubMed'
    dataset = CitationFull(path, dataset)

    num_per_class = 20
    train_index = []
    test_index = []
    for i in range(dataset.num_classes):
        index = (dataset[0].y.long() == i).nonzero().view(-1)
        index = index[torch.randperm(index.size(0))]
        if len(index) > num_per_class + 30:
            train_index.append(index[:num_per_class])
            test_index.append(index[num_per_class:])
        else:
            continue
    train_index = torch.cat(train_index)
    test_index = torch.cat(test_index)

    train_mask = index_to_mask(train_index, size=dataset[0].num_nodes)
    test_mask = index_to_mask(test_index, size=dataset[0].num_nodes)

    data = Data(x=dataset[0].x,
                edge_index=dataset[0].edge_index,
                train_mask=train_mask,
                test_mask=test_mask,
                y=dataset[0].y)
    return dataset, data
示例#3
0
def load_non_overlapping_dataset(
    dataset_name: PlanetoidDataset or CitationFullDataset,
    transform=T.NormalizeFeatures()
) -> Data:
    path = osp.join(DATASETS_DIR, dataset_name.value)

    if type(dataset_name) == PlanetoidDataset:
        data = Planetoid(path, dataset_name.value, transform=transform)[0]
    elif type(dataset_name) == CitationFullDataset:
        data = CitationFull(path, dataset_name.value, transform=transform)[0]
    else:
        raise Exception("Unknown dataset name")
    return data
示例#4
0
def run(file, data_name, model_name,lr):
    parser = argparse.ArgumentParser(description='OGBL-DDI (GNN)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--log_steps', type=int, default=1)
    parser.add_argument('--use_sage', action='store_true')
    parser.add_argument('--num_layers', type=int, default=2)
    parser.add_argument('--hidden_channels', type=int, default=256)
    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--batch_size', type=int, default=64*1024)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--epochs', type=int, default=200)
    parser.add_argument('--eval_steps', type=int, default=5)
    parser.add_argument('--runs', type=int, default=10)
    parser.add_argument('--use_nd', action='store_true')
    parser.add_argument('--use_lgae', action='store_true')
    parser.add_argument('--use_vgae', action='store_true')
    parser.add_argument('--model', type=str, default='')

    parser.add_argument('--dataset', type=str, default='Citeseer')

    args = parser.parse_args()
    if data_name != None and model_name != None and lr != None:
        args.dataset = data_name
        args.model = model_name
        args.lr = lr
    print(args)

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    # device = 'cpu'
    device = torch.device(device)

    dataset = CitationFull(os.path.join('citation_data',args.dataset),name=args.dataset,transform=T.ToSparseTensor())
    num_training = int(dataset.__len__()*0.8)
    num_val = int(dataset.__len__()*0.1)
    num_test = dataset.__len__() - (num_training+num_val)

    data = dataset[0]
    adj_t = data.adj_t.to(device)
    edge_index, edge_type = utils.dense_to_sparse(adj_t.to_dense())
    data.edge_index = edge_index
    data.x = data.x.to(device)
    num_nodes = data.x.shape[0]
    num_edges = data.edge_index.shape[1]
    print(data)
    # nx_data = to_networkx(data, to_undirected=True)
    # print('graph density='+str(2*num_edges/(num_nodes*(num_nodes-1))))
    # print('clustering coefficient='+str(nx.average_clustering(nx_data)))


    decoder_enable = args.model[-3:]
    if args.model[-3:] == '-nd': model_name = args.model[:-3]
    
    if model_name == 'lgae':
        model = LGAE(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)
    elif model_name == 'vgae':
        model = DeepVGAE(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)

    elif model_name == 'gae':
        model = GraphAutoEncoder(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)

    elif model_name == 'arga':
        model = AdversarialGAE(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)

    elif model_name == 'arvga':
        model = AdversarialVGAE(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)
    elif model_name == 'lrga':
        model = LRGA(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)
    elif model_name == 'sage':
        model = SAGEAutoEncoder(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)

    if decoder_enable == '-nd':
        model.decoder = NeuralDecoder( args.hidden_channels,  
            args.hidden_channels, 1, args.num_layers, args.dropout)
    
    evaluator = Evaluator(name='ogbl-ddi')

    model = model.to(device)

    loggers = {}
    K_list = ['20','50','100']
    for k in K_list:
        loggers['Hits@'+k] = Logger(args.runs, args)

    for run in range(args.runs):
        torch.manual_seed(run)
        split_edge = utils.train_test_split_edges(data)
        # print(split_edge.train_pos_edge_index.shape)
        # print(split_edge.val_pos_edge_index.shape)


        # exit()
        split_edge.edge_index = edge_index

        # emb.weight.data = features
        model.reset_parameters()

        if args.model in ['arga','arga-nd','arvga','arvga-nd']:
            args.lr=0.005
        optimizer = torch.optim.Adam(
                list(model.parameters()), lr=args.lr)

        for epoch in range(1, 1 + args.epochs):
            loss = train(model, data.x, adj_t, split_edge,
                         optimizer, args.batch_size)

            if epoch % args.eval_steps == 0:
                results = test(model, data.x, adj_t, split_edge,
                               evaluator, args.batch_size)
                for key, result in results.items():
                    loggers[key].add_result(run, result)
            

                if epoch % args.log_steps == 0:
                    for key, result in results.items():
                        train_hits, valid_hits, test_hits, test_auc, test_ap, val_auc, val_ap = result
                        print(key)
                        print(f'Run: {run + 1:02d}, '
                              f'Epoch: {epoch:02d}, '
                              f'Loss: {loss:.4f}, '
                              f'auc: {100 * test_auc:.2f}%, '
                              f'ap: {100 * test_ap:.2f}%, '
                              f'Train: {100 * train_hits:.2f}%, '
                              f'Valid: {100 * valid_hits:.2f}%, '
                              f'Test: {100 * test_hits:.2f}%', )
                    print('---')



        for key in loggers.keys():
            print(key)
            loggers[key].print_statistics(run)

    for key in loggers.keys():
        print(key)
        toWrite = loggers[key].print_statistics()

        file.write(str(args.lr)+' ' +key + ' ' +args.model+"'"+str(toWrite)+'\n')
        file.flush()
示例#5
0
def run(file, data_name, model_name,lr):
    parser = argparse.ArgumentParser(description='OGBL-DDI (GNN)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--log_steps', type=int, default=1)
    parser.add_argument('--use_sage', action='store_true')
    parser.add_argument('--num_layers', type=int, default=2)
    parser.add_argument('--hidden_channels', type=int, default=256)
    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--batch_size', type=int, default=64*1024)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--epochs', type=int, default=200)
    parser.add_argument('--eval_steps', type=int, default=5)
    parser.add_argument('--runs', type=int, default=10)
    parser.add_argument('--use_nd', action='store_true')
    parser.add_argument('--use_lgae', action='store_true')
    parser.add_argument('--use_vgae', action='store_true')
    parser.add_argument('--model', type=str, default='')

    parser.add_argument('--dataset', type=str, default='Citeseer')

    args = parser.parse_args()
    if data_name != None and model_name != None and lr != None:
        args.dataset = data_name
        args.model = model_name
        args.lr = lr
    print(args)

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    # device = 'cpu'
    device = torch.device(device)

    dataset = CitationFull(os.path.join('citation_data',args.dataset),name=args.dataset,transform=T.ToSparseTensor())
    num_training = int(dataset.__len__()*0.8)
    num_val = int(dataset.__len__()*0.1)
    num_test = dataset.__len__() - (num_training+num_val)

    data = dataset[0]
    print('data:',vars(data))
    adj_t = data.adj_t.to(device)
    edge_index, edge_type = utils.dense_to_sparse(adj_t.to_dense())
    data.edge_index = edge_index
    data.x = data.x.to(device)
    split_edge = utils.train_test_split_edges(data)
    split_edge.edge_index = edge_index

    print(data)
    print(edge_index.shape)

    decoder_enable = args.model[-3:]
    if args.model[-3:] == '-nd': model_name = args.model[:-3]
    

    if model_name == 'lgae':
        model = LGAE(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)
    elif model_name == 'vgae':
        model = DeepVGAE(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)

    elif model_name == 'gae':
        model = GraphAutoEncoder(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)

    elif model_name == 'arga':
        model = AdversarialGAE(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)

    elif model_name == 'arvga':
        model = AdversarialVGAE(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)
    elif model_name == 'lrga':
        model = LRGA(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)
    elif model_name == 'sage':
        model = SAGEAutoEncoder(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout)

    if decoder_enable == '-nd':
        model.decoder = NeuralDecoder( args.hidden_channels,  
            args.hidden_channels, 1, args.num_layers, args.dropout)

    evaluator = Evaluator(name='ogbl-ddi')

    model = model.to(device)

    loggers = {
        'metrics': Logger(args.runs, args)
    }

    for run in range(args.runs):
        torch.manual_seed(run)
        model.reset_parameters()

        if args.model in ['arga','arga-nd','arvga','arvga-nd']:
            args.lr=0.005
        optimizer = torch.optim.Adam(
                list(model.parameters()), lr=args.lr)

        for epoch in range(1, 1 + args.epochs):
            loss = train(model, data.x, adj_t, split_edge,
                         optimizer, args.batch_size)

        result = test(model, data.x, data, split_edge, evaluator, args.batch_size)
        loggers['metrics'].add_result(run, result)

    for key in loggers.keys():
        print(key)
        toWrite = loggers[key].print_statistics()
        file.write(args.model+'\t'+'\t'.join(toWrite)+'\n')
        file.flush()
        os.fsync(file)
示例#6
0
    parser.add_argument('--epochs', type=int, default=100)
    parser.add_argument('--use_gdc', type=bool, default=False)
    parser.add_argument("--dataset", type=str, default="CiteSeer")
    args = parser.parse_args()

    # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    dataset = args.dataset

    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
    lrate = 0.01
    if dataset == "F":
        dataset = Flickr(path, transform=T.NormalizeFeatures())
        print(len(dataset))
        lrate = 0.1
    elif dataset == "C":
        dataset = CitationFull(path, "DBLP", transform=T.NormalizeFeatures())
        print(len(dataset))
    else:
        dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
    print(len(dataset))
    data = dataset[0]    
    print(data)
    model, data = Net(dataset, data, args), data.to(device)
    model = model.to(device)
    optimizer = torch.optim.Adam([
        dict(params=model.conv1.parameters(), weight_decay=5e-4),
        dict(params=model.conv2.parameters(), weight_decay=0)
    ], lr=lrate)  # Only perform weight-decay on first convolution.
    best_val_acc = test_acc = 0
    for epoch in range(1, args.epochs):
        train(model,data)
示例#7
0
文件: old.py 项目: Richard-He/AGN
parser.add_argument('--layers', type=int, default=30)
parser.add_argument('--epochs', type=int, default=800)
parser.add_argument('--early', type=int, default=80)


args = parser.parse_args()
gnn = args.gnn
gnndict = {'GAT': GAT, 'SAGE': SAGE, 'GCN': GCN, 'GEN': AdaGNN_v, 'MLP': MLP}
reset = args.reset
ratio = args.ratio
dataset_n = args.dataset
t_layers = args.layers
log_name = f'./result/Greedy_SRM_GNN_{gnn}_reset_{reset}_dataset_{dataset_n}'
path = osp.join(osp.dirname(osp.realpath(__file__)), '.', 'data', dataset_n)
if dataset_n == 'dblp':
    dataset = CitationFull(path, dataset_n)
else:
    dataset = Planetoid(path, dataset_n)
data = dataset[0]
train_split = pickle.load(open(f'./datasetsplit/{dataset_n.lower()}_train', "rb") )
test_split = pickle.load(open(f'./datasetsplit/{dataset_n.lower()}_train', "rb") )
rand = torch.cat([train_split, test_split])
thold = int(data.num_nodes * ratio)
train_split = rand[:thold]
test_split = rand[thold:]
data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.train_mask[train_split] = 1
data.val_mask = None
data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.test_mask[test_split] = 1
criteria = CrossEntropyLoss()
示例#8
0
import torch.optim as optim

from torch_geometric.datasets import TUDataset
from torch_geometric.datasets import Planetoid
from torch_geometric.data import DataLoader

import torch_geometric.transforms as T

from tensorboardX import SummaryWriter
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from torch_geometric.datasets import CitationFull
from tqdm import tqdm
import pdb

data_cora = CitationFull('./CitationFull', 'cora')
data_cora_ml = CitationFull('./CitationFull', 'cora_ml')
data_citeseer = CitationFull('./CitationFull', 'citeseer')
data_dblp = CitationFull('./CitationFull', 'dblp')
data_pubmed = CitationFull('./CitationFull', 'pubmed')


class GNNStack(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, task='node'):
        super(GNNStack, self).__init__()
        self.task = task
        self.convs = nn.ModuleList()
        self.convs.append(self.build_conv_model(input_dim, hidden_dim))
        self.lns = nn.ModuleList()
        self.lns.append(nn.LayerNorm(hidden_dim))
        self.lns.append(nn.LayerNorm(hidden_dim))
示例#9
0
from torch_geometric.datasets import CitationFull
from torch_geometric.utils import to_scipy_sparse_matrix
import torch_geometric.transforms as T
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'DBLP')
from torch_geometric.utils import to_scipy_sparse_matrix
from utils import normalize_adjacency_matrix, normalizemx
from DBLP_utils import SCAT_Red
from utils import normalize_adjacency_matrix, sparse_mx_to_torch_sparse_tensor
from layers import GC_withres, GraphConvolution
#from torch_geometric.nn import GATConv
from torch.optim.lr_scheduler import MultiStepLR, StepLR

#dataset = TUDataset(root= path,name='REDDIT-BINARY')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

dataset = CitationFull(path, name='dblp', transform=T.TargetIndegree())
data = dataset[0]
# Num of feat:1639
adj = to_scipy_sparse_matrix(edge_index=data.edge_index)
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
A_tilde = sparse_mx_to_torch_sparse_tensor(
    normalize_adjacency_matrix(adj, sp.eye(adj.shape[0]))).to(device)
adj = sparse_mx_to_torch_sparse_tensor(adj).to(device)
#print(dataset)
#print(data.x.shape)
#print(data.y.shape)

#tp = SCAT_Red(in_features=1639,med_f0=10,med_f1=10,med_f2=10,med_f3=10,med_f4=10).to(device)
#tp2 = SCAT_Red(in_features=40,med_f0=30,med_f1=10,med_f2=10,med_f3=10,med_f4=10).to(device)
train_mask = torch.cat((torch.ones(10000), torch.zeros(2000),
                        torch.zeros(2000), torch.zeros(3716)), 0) > 0