示例#1
0
@File    : main.py
"""
import torch
from torch_geometric.data import Data
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GAE, VGAE
from torch_geometric.utils import train_test_split_edges

import args
from model import Encoder, VEncoder, get_edge_acc

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

dataset = None
if args.dataset.lower() == 'Cora'.lower():
    dataset = Planetoid(root='tmp', name='Cora')
    print("use dataset: Cora")
elif args.dataset.lower() == 'CiteSeer'.lower():
    dataset = Planetoid(root='tmp', name='CiteSeer')
    print("use dataset: CiteSeer")
elif args.dataset.lower() == 'PubMed'.lower():
    dataset = Planetoid(root='tmp', name='PubMed')
    print("use dataset: PubMed")
data = dataset[0]

enhanced_data = train_test_split_edges(data.clone(),
                                       val_ratio=0.1,
                                       test_ratio=0.2)

train_data = Data(x=enhanced_data.x,
                  edge_index=enhanced_data['train_pos_edge_index']).to(DEVICE)
示例#2
0
import os.path as osp

import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCN2Conv
from torch_geometric.nn.conv.gcn_conv import gcn_norm
import onnxruntime

dataset = 'Cora'
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
transform = T.Compose([T.NormalizeFeatures(), T.ToSparseTensor()])
dataset = Planetoid(path, dataset, transform=transform)
data = dataset[0]
data.adj_t = gcn_norm(data.adj_t)  # Pre-process GCN normalization.


def export_to_onnx_pt(model, data, use_dynamic=True):
    input_names = ['input_1', 'input_2']
    inputs = {
        'input_1': data.x,
        'input_2': data.adj_t
    }
    output_names = ["output1"]
    batch = torch.arange(data.num_nodes)
    if use_dynamic:
        torch_out = torch.onnx.export(model,  # model being run
                                      args=tuple(inputs.values()),  # model input (or a tuple for multiple inputs)
                                      f="models/graphml/gcn2.onnx",
示例#3
0
import os.path as osp

import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, GNNExplainer

from BayesianExplainer import BayesianExplainer

from tqdm import tqdm

dataset = 'Cora'
path = osp.join('data', 'Planetoid')
dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
data = dataset[0]


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_features, 32)
        self.conv2 = GCNConv(32, 16)
        self.linear = torch.nn.Linear(16, dataset.num_features)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = self.linear(x)
def test_neighbor_sampler_on_cora():
    root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    dataset = Planetoid(root, 'Cora')
    data = dataset[0]

    batch = torch.arange(10)
    loader = NeighborSampler(data.edge_index,
                             sizes=[-1, -1, -1],
                             node_idx=batch,
                             batch_size=10)

    class SAGE(torch.nn.Module):
        def __init__(self, in_channels, out_channels):
            super().__init__()

            self.convs = torch.nn.ModuleList()
            self.convs.append(SAGEConv(in_channels, 16))
            self.convs.append(SAGEConv(16, 16))
            self.convs.append(SAGEConv(16, out_channels))

        def batch(self, x, adjs):
            for i, (edge_index, _, size) in enumerate(adjs):
                x_target = x[:size[1]]  # Target nodes are always placed first.
                x = self.convs[i]((x, x_target), edge_index)
            return x

        def full(self, x, edge_index):
            for conv in self.convs:
                x = conv(x, edge_index)
            return x

    model = SAGE(dataset.num_features, dataset.num_classes)

    _, n_id, adjs = next(iter(loader))
    out1 = model.batch(data.x[n_id], adjs)
    out2 = model.full(data.x, data.edge_index)[batch]
    assert torch.allclose(out1, out2)

    class GAT(torch.nn.Module):
        def __init__(self, in_channels, out_channels):
            super().__init__()

            self.convs = torch.nn.ModuleList()
            self.convs.append(GATConv(in_channels, 16, heads=2))
            self.convs.append(GATConv(32, 16, heads=2))
            self.convs.append(GATConv(32, out_channels, heads=2, concat=False))

        def batch(self, x, adjs):
            for i, (edge_index, _, size) in enumerate(adjs):
                x_target = x[:size[1]]  # Target nodes are always placed first.
                x = self.convs[i]((x, x_target), edge_index)
            return x

        def full(self, x, edge_index):
            for conv in self.convs:
                x = conv(x, edge_index)
            return x

    _, n_id, adjs = next(iter(loader))
    out1 = model.batch(data.x[n_id], adjs)
    out2 = model.full(data.x, data.edge_index)[batch]
    assert torch.allclose(out1, out2)

    shutil.rmtree(root)
示例#5
0
import sys
import inspect
import torch
import torch.nn.functional as F
import pdb

from torch.nn import Parameter
from torch_scatter import scatter_add
from torch_geometric.utils import scatter_
from torch_geometric.utils import add_remaining_self_loops
from torch_geometric.nn.inits import uniform, glorot, zeros, ones, reset

from torch_geometric.datasets import Planetoid
#dataset = Planetoid(root='/tmp/Cora', name='Cora')
dataset = Planetoid(root='/tmp/Pubmed', name='Pubmed')
#dataset = Planetoid(root='/tmp/Citeseer', name='Citeseer')


class GCNConv(torch.nn.Module):
    def __init__(self,
                 in_channels,
                 out_channels,
                 improved=False,
                 cached=False,
                 bias=True,
                 **kwargs):
        super(GCNConv, self).__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.improved = improved
示例#6
0
from torch_geometric.datasets import Planetoid
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch_geometric.nn import GCNConv
from torch_geometric.nn import GATConv
from torch_geometric.nn import SAGEConv
from torch_geometric.nn import JumpingKnowledge

dataset = Planetoid(root='./cora/', name='Cora')
# dataset = Planetoid(root='./cora/', name='Cora', split='random',
#                          num_train_per_class=232, num_val=542, num_test=542)
# dataset = Planetoid(root='./citeseer',name='Citeseer')
# dataset = Planetoid(root='./pubmed/', name='Pubmed')
print(dataset)


# baseline:GCN模型(2层)
class GCNNet(nn.Module):
    def __init__(self, dataset):
        super(GCNNet, self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
示例#7
0
        type=str,
        default='results',
        help='filename to store results and the model (default: results)')
    args = parser.parse_args()

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    # Training on CPU/GPU device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)

    # load dataset
    dataname = args.dataset
    rootname = osp.join(osp.abspath(''), 'data', dataname)
    dataset = Planetoid(root=rootname, name=dataname)

    num_nodes = dataset[0].x.shape[0]
    L = get_laplacian(dataset[0].edge_index,
                      num_nodes=num_nodes,
                      normalization='sym')
    L = sparse.coo_matrix(
        (L[1].numpy(), (L[0][0, :].numpy(), L[0][1, :].numpy())),
        shape=(num_nodes, num_nodes))

    lobpcg_init = np.random.rand(num_nodes, 1)
    lambda_max, _ = lobpcg(L, lobpcg_init)
    lambda_max = lambda_max[0]

    # extract decomposition/reconstruction Masks
    FrameType = args.FrameType
示例#8
0
def train():
    # get the parameters
    args = get_args()
    print(args.domain)

    # decide the device
    device = torch.device('cuda:2' if torch.cuda.is_available() and args.cuda else 'cpu')

    # load dataset
    if args.domain == 'Cora':
        dataset = Planetoid(root='/home/amax/xsx/data/gnn_datas/Cora', name='Cora', transform=T.NormalizeFeatures())
    elif args.domain == 'CiteSeer':
        dataset = Planetoid(root='/home/amax/xsx/data/gnn_datas/CiteSeer', name='CiteSeer', transform=T.NormalizeFeatures())
    elif args.domain == 'PubMed':
        dataset = Planetoid(root='/home/amax/xsx/data/gnn_datas/PubMed', name='PubMed', transform=T.NormalizeFeatures())
    elif args.domain == 'DBLP':
        dataset = DBLP(root='/home/amax/xsx/data/gnn_datas/DBLP', name='DBLP')
    elif args.domain == 'Cora-ML':
        dataset = CoraML(root='/home/amax/xsx/data/gnn_datas/Cora_ML', name='Cora_ML')
    elif args.domain == 'CS':
        dataset = Coauthor(root='/home/amax/xsx/data/gnn_datas/Coauthor/CS', name='CS')
    elif args.domain == 'Physics':
        dataset = Coauthor(root='/home/amax/xsx/data/gnn_datas/Coauthor/Physics', name='Physics')
    elif args.domain == 'Computers':
        dataset = Amazon(root='/home/amax/xsx/data/gnn_datas/Amazon/Computers', name='Computers')
    elif args.domain == 'Photo':
        dataset = Amazon(root='/home/amax/xsx/data/gnn_datas/Amazon/Photo', name='Photo')
    else:
        dataset = None
    if dataset is None:
        pdb.set_trace()
    data = dataset[0].to(device)

    # create the model and optimizer
    model = DeepGraphInfomax(hidden_channels=args.hidden_dim, encoder=Encoder(dataset.num_features, args.hidden_dim),
                             summary=lambda z, *args, **kwargs: z.mean(dim=0), corruption=corruption).to(device)
    optimizer = Adam(model.parameters(), lr=args.lr)

    # the information which need to be recorded
    start_time = time.time()
    bad_counter = 0
    best_epoch = 0
    least_loss = float("inf")
    best_model = None

    # beging training
    for epoch in range(args.epochs):
        # the steps of training
        model.train()
        optimizer.zero_grad()

        pos_z, neg_z, summary = model(data.x, data.edge_index)
        loss = model.loss(pos_z, neg_z, summary)
        current_loss = loss.item()
        loss.backward()
        optimizer.step()

        # save the model if it access the minimum loss in current epoch
        if current_loss < least_loss:
            least_loss = current_loss
            best_epoch = epoch + 1
            best_model = copy.deepcopy(model)
            bad_counter = 0
        else:
            bad_counter += 1

        # early stop
        if bad_counter >= args.patience:
            break

    print("Optimization Finished!")
    used_time = time.time() - start_time
    print("Total epochs: {:2d}".format(best_epoch + 100))
    print("Best epochs: {:2d}".format(best_epoch))
    # train a classification model
    node_classification(best_model, data, args, device, int(dataset.num_classes))
    print("Total time elapsed: {:.2f}s".format(used_time))
示例#9
0
def GCN(dataset, params, Epochs, MonteSize, width, lr, savepath):
    Batch_size = int(params[0])

    for Monte_iter in range(MonteSize):

        # Data
        best_loss = float('inf')  # best test loss
        start_epoch = 0  # start from epoch 0 or last checkpoint epoch
        TrainConvergence = []
        TestConvergence = []

        # model
        root = '/data/GraphData/' + dataset
        if dataset == 'Cora':
            model_name = "GCN3"
            datasetroot = Planetoid(root=root, name=dataset).shuffle()
            trainloader = DataListLoader(datasetroot,
                                         batch_size=Batch_size,
                                         shuffle=True)
            testloader = DataListLoader(datasetroot,
                                        batch_size=100,
                                        shuffle=False)
            model_to_save = './checkpoint/{}-{}-param_{}_{}-Mon_{}-ckpt.pth'.format(
                dataset, model_name, params[0], params[1], Monte_iter)
            if resume and os.path.exists(model_to_save):
                [net, TrainConvergence, TestConvergence,
                 start_epoch] = ResumeModel(model_to_save)
                if start_epoch >= Epochs - 1:
                    continue

            else:
                net = Net(datasetroot, width)

        elif dataset == 'ENZYMES' or dataset == 'MUTAG':
            model_name = "topk_pool_Net"
            root = '/data/GraphData' + dataset
            datasetroot = TUDataset(root, name=dataset)
            trainloader = DataListLoader(datasetroot,
                                         batch_size=Batch_size,
                                         shuffle=True)
            testloader = DataListLoader(datasetroot,
                                        batch_size=100,
                                        shuffle=False)
            model_to_save = './checkpoint/{}-{}-param_{}_{}-Mon_{}-ckpt.pth'.format(
                dataset, model_name, params[0], params[1], Monte_iter)
            if resume and os.path.exists(model_to_save):
                [net, TrainConvergence, TestConvergence,
                 start_epoch] = ResumeModel(model_to_save)
                if start_epoch >= Epochs - 1:
                    continue

            else:
                net = topk_pool_Net(datasetroot, width)

        elif dataset == 'MNIST':
            datasetroot = MNISTSuperpixels(root='/data/GraphData/' + dataset,
                                           transform=T.Cartesian()).shuffle()
            trainloader = DataListLoader(datasetroot,
                                         batch_size=Batch_size,
                                         shuffle=True)
            testloader = DataListLoader(datasetroot,
                                        batch_size=100,
                                        shuffle=False)
            model_name = 'SPlineNet'
            model_to_save = './checkpoint/{}-{}-param_{}_{}-Mon_{}-ckpt.pth'.format(
                dataset, model_name, params[0], params[1], Monte_iter)

            if resume and os.path.exists(model_to_save):
                [net, TrainConvergence, TestConvergence,
                 start_epoch] = ResumeModel(model_to_save)
                if start_epoch >= Epochs - 1:
                    continue

            else:
                #net=Net(datasetroot,width)
                net = SPlineNet(datasetroot, width)

        elif dataset == 'CIFAR10':
            if resume and os.path.exists(model_to_save):
                [net, TrainConvergence, TestConvergence,
                 start_epoch] = ResumeModel(model_to_save)
                if start_epoch >= Epochs - 1:
                    continue
            else:
                net = getattr(CIFAR10_resnet, 'Resnet20_CIFAR10')(params[1])
        else:
            raise Exception(
                "The dataset is:{}, it isn't existed.".format(dataset))

        print('Let\'s use', torch.cuda.device_count(), 'GPUs!')
        torch.cuda.is_available()
        net = DataParallel(net)
        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        net = net.to(device)

        #cudnn.benchmark = True

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(net.parameters(),
                              lr=lr,
                              momentum=0.9,
                              weight_decay=5e-4)
        for epoch in range(start_epoch, start_epoch + Epochs):
            if epoch < Epochs:
                logging(
                    'Batch size: {},ConCoeff: {},MonteSize:{},epoch:{}'.format(
                        params[0], params[1], Monte_iter, epoch))
                TrainLoss = train(trainloader, net, optimizer, criterion)
                TrainConvergence.append(statistics.mean(TrainLoss))
                TestConvergence.append(
                    statistics.mean(test(testloader, net, criterion)))
            else:
                break
            if TestConvergence[epoch] < best_loss:
                logging('Saving..')
                state = {
                    'net': net.module,
                    'TrainConvergence': TrainConvergence,
                    'TestConvergence': TestConvergence,
                    'epoch': epoch,
                }
                if not os.path.isdir('checkpoint'):
                    os.mkdir('checkpoint')
                torch.save(state, model_to_save)
                best_loss = TestConvergence[epoch]
                if not os.path.exists('./%s' % model_name):
                    os.makedirs('./%s' % model_name)
                torch.save(
                    net.module.state_dict(),
                    './%s/%s_%s_%s_%s_%s_pretrain.pth' %
                    (model_name, dataset, model_name, params[0], params[1],
                     Epochs))
            else:
                pass
            ## save recurrence plots
            if epoch % 20 == 0:
                save_recurrencePlots_file = "../Results/RecurrencePlots/RecurrencePlots_{}_{}_BatchSize{}_ConCoeffi{}_epoch{}.png".format(
                    dataset, model_name, params[0], params[1], epoch)

                save_recurrencePlots(net, save_recurrencePlots_file)

        FileName = "{}-{}-param_{}_{}-monte_{}".format(dataset, model_name,
                                                       params[0], params[1],
                                                       Monte_iter)
        np.save(savepath + 'TrainConvergence-' + FileName, TrainConvergence)
        np.save(savepath + 'TestConvergence-' + FileName, TestConvergence)
        torch.cuda.empty_cache()
        print_nvidia_useage()

    if return_output == True:
        return TestConvergence[-1], net.module.fc.weight
    else:
        pass
示例#10
0
log_file = os.path.join(args.res_dir, 'log.txt')
# Save command line input.
cmd_input = 'python ' + ' '.join(sys.argv) + '\n'
with open(os.path.join(args.res_dir, 'cmd_input.txt'), 'a') as f:
    f.write(cmd_input)
print('Command line input: ' + cmd_input + ' is saved.')
with open(log_file, 'a') as f:
    f.write('\n' + cmd_input)

if args.dataset.startswith('ogbl'):
    dataset = PygLinkPropPredDataset(name=args.dataset)
    split_edge = dataset.get_edge_split()
    data = dataset[0]
else:
    path = osp.join('dataset', args.dataset)
    dataset = Planetoid(path, args.dataset)
    split_edge = do_edge_split(dataset)
    data = dataset[0]
    data.edge_index = split_edge['train']['edge'].t()

if args.use_valedges_as_input:
    val_edge_index = split_edge['valid']['edge'].t()
    val_edge_index = to_undirected(val_edge_index)
    data.edge_index = torch.cat([data.edge_index, val_edge_index], dim=-1)
    val_edge_weight = torch.ones([val_edge_index.size(1), 1], dtype=int)
    data.edge_weight = torch.cat([data.edge_weight, val_edge_weight], 0)

if args.dataset == 'ogbl-citation':
    args.eval_metric = 'mrr'
elif args.dataset.startswith('ogbl'):
    args.eval_metric = 'hits'
示例#11
0
from torch_geometric.datasets import Planetoid, CoraFull

for dataset_name in ['Cora', 'PubMed', 'CoraFull']:
    print(dataset_name)

    if dataset_name == 'CoraFull':
        dataset = CoraFull(root='/tmp/CoraFull')
    elif dataset_name == 'PubMed':
        dataset = Planetoid(root='/tmp/PubMed', name=dataset_name)
    else:
        dataset = Planetoid(root='/tmp/Cora', name=dataset_name)

    print("num classes=", dataset.num_classes)

    data = dataset[0]
    print("num nodes=", data.num_nodes)

    print("num edges=", data.num_edges / 2)

    print("num features=", dataset.num_node_features)
示例#12
0
import os.path as osp

import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T

from torch_geometric.nn import SplineConv
from torchdyn.models import NeuralDE

dataset = 'Cora'
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
dataset = Planetoid(path, dataset, transform=T.TargetIndegree())
data = dataset[0]

data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.train_mask[:data.num_nodes - 1000] = 1
data.val_mask = None
data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.test_mask[data.num_nodes - 500:] = 1


class GCNLayer(torch.nn.Module):
    def __init__(self, input_size, output_size):
        super(GCNLayer, self).__init__()

        if input_size != output_size:
            raise AttributeError('input size must equal output size')

        self.conv1 = SplineConv(input_size, output_size, dim=1,
                                kernel_size=2).to(device)
示例#13
0
parser = argparse.ArgumentParser(description="Model Name")

parser.add_argument("-model",
                    action="store",
                    dest="model",
                    type=int,
                    default=1)
parser.add_argument("-net", action="store", dest="net", type=int, default=1)
pr = parser.parse_args()

label_ids = defaultdict(list)

if pr.net == 1:
    print("Data Cora")
    _data = Planetoid(root="./pcora", name="Cora")
elif pr.net == 2:
    print("Data CiteSeer")
    _data = Planetoid(root="./pciteseer", name="Citeseer")
elif pr.net == 3:
    print("Data Pubmed")
    _data = Planetoid(root="./ppubmed", name="Pubmed")
elif pr.net == 4:
    print("Data CoraFull")
    _data = CoraFull("./Corafull")
elif pr.net == 5:
    print("Data Coauthor CS")
    _data = Coauthor("./CS", "CS")
elif pr.net == 6:
    print("Data Coauthor Physics")
    _data = Coauthor("./Physics", "Physics")
import networkx as nx
import torch
import numpy as np
import pandas as pd
from torch_geometric.datasets import Planetoid
from torch_geometric.utils.convert import to_networkx

dataset1 = Planetoid(root = '/content/cora',name='Cora')

cora = dataset1 [0]

coragraph = to_networkx(cora)

node_labels = cora.y[list(coragraph.nodes)].numpy()

import matplotlib.pyplot as plt
plt.figure(1,figsize=(14,12)) 
nx.draw(coragraph, cmap=plt.get_cmap('Set1'),node_color = node_labels,node_size=75,linewidths=6)
plt.show()
示例#15
0
import sys
import networkx as nx
import pdb

from deepsnap.dataset import GraphDataset
from deepsnap.batch import Batch
from torch.utils.data import DataLoader

from torch.nn.parallel import DistributedDataParallel

n_gpus = 2
name = 'Cora'
model_name = 'GCN'
fixed_split = True
pyg_dataset = Planetoid(
    './cora', name,
    transform=T.TargetIndegree())  # load some format of graph data

if not fixed_split:
    graphs = GraphDataset.pyg_to_graphs(
        pyg_dataset, verbose=True,
        fixed_split=fixed_split)  # transform to our format

    dataset = GraphDataset(graphs, task='node')  # node, edge, link_pred, graph
    dataset_train, dataset_val, dataset_test = dataset.split(
        transductive=True,
        split_ratio=[0.8, 0.1, 0.1])  # transductive split, inductive split

else:
    graphs_train, graphs_val, graphs_test = \
        GraphDataset.pyg_to_graphs(pyg_dataset, verbose=True, fixed_split=fixed_split)  # transform to our format
def main():
    parser = argparse.ArgumentParser(description='GAT')
    parser.add_argument("--dataset", type=str)
    parser.add_argument("--device", type=int, default=0)
    parser.add_argument("--num-layers",
                        type=int,
                        default=3,
                        help="number of hidden layers")
    parser.add_argument("--lr",
                        type=float,
                        default=0.005,
                        help="learning rate")
    parser.add_argument('--weight-decay',
                        type=float,
                        default=5e-4,
                        help="weight decay")
    parser.add_argument("--num-hidden",
                        type=int,
                        default=8,
                        help="number of hidden units")
    parser.add_argument("--dropout",
                        type=float,
                        default=.6,
                        help="Dropout to use")
    parser.add_argument('--epochs', type=int, default=200)
    parser.add_argument("--eval",
                        action='store_true',
                        help='If not set, we will only do the training part.')
    parser.add_argument("--runs", type=int, default=10)
    args = parser.parse_args()
    print(args)

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    path = osp.join('dataset', args.dataset)
    dataset = Planetoid(path, args.dataset, transform=T.NormalizeFeatures())
    data = dataset[0]

    features = data.x.to(device)
    labels = data.y.to(device)
    edge_index = data.edge_index.to(device)
    adj = SparseTensor(row=edge_index[0], col=edge_index[1])
    train_mask = torch.BoolTensor(data.train_mask).to(device)
    val_mask = torch.BoolTensor(data.val_mask).to(device)
    test_mask = torch.BoolTensor(data.test_mask).to(device)

    model = GAT(num_layers=args.num_layers,
                in_feats=features.size(-1),
                num_hidden=args.num_hidden,
                num_classes=dataset.num_classes,
                heads=[8, 8, 1],
                dropout=args.dropout).to(device)

    loss_fcn = nn.CrossEntropyLoss()

    logger = Logger(args.runs, args)
    dur = []
    for run in range(args.runs):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
        for epoch in range(1, args.epochs + 1):
            model.train()
            if epoch >= 3:
                t0 = time.time()
            # forward
            logits = model(features, adj)
            loss = loss_fcn(logits[train_mask], labels[train_mask])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if epoch >= 3:
                dur.append(time.time() - t0)
                print('Training time/epoch {}'.format(np.mean(dur)))

            if not args.eval:
                continue

            train_acc, val_acc, test_acc = evaluate(model, features, adj,
                                                    labels, train_mask,
                                                    val_mask, test_mask)
            logger.add_result(run, (train_acc, val_acc, test_acc))

            print(
                "Run {:02d} | Epoch {:05d} | Loss {:.4f} | Train {:.4f} | Val {:.4f} | Test {:.4f}"
                .format(run, epoch, loss.item(), train_acc, val_acc, test_acc))

        if args.eval:
            logger.print_statistics(run)

    if args.eval:
        logger.print_statistics()
示例#17
0
    def test_split(self):
        pyg_dataset = Planetoid("./cora", "Cora")
        dg = Graph.pyg_to_graph(pyg_dataset[0])

        dg_node = dg.split()
        dg_num_nodes_reduced = dg.num_nodes - 3
        self.assertEqual(
            dg_node[0].node_label_index.shape[0],
            1 + int(dg_num_nodes_reduced * 0.8),
        )
        self.assertEqual(
            dg_node[1].node_label_index.shape[0],
            1 + int(dg_num_nodes_reduced * 0.1),
        )
        self.assertEqual(
            dg_node[2].node_label_index.shape[0],
            dg.num_nodes
            - 2
            - int(dg_num_nodes_reduced * 0.8)
            - int(dg_num_nodes_reduced * 0.1),
        )

        dg_edge = dg.split(task="edge")
        dg_num_edges_reduced = dg.num_edges - 3
        edge_0 = 2 * (1 + int(dg_num_edges_reduced * 0.8))
        edge_1 = 2 * (1 + int(dg_num_edges_reduced * 0.1))
        edge_2 = dg.num_edges * 2 - edge_0 - edge_1
        self.assertEqual(dg_edge[0].edge_label_index.shape[1], edge_0)
        self.assertEqual(dg_edge[1].edge_label_index.shape[1], edge_1)
        self.assertEqual(dg_edge[2].edge_label_index.shape[1], edge_2)

        dg_link = dg.split(task="link_pred")
        dg_num_edges_reduced = dg.num_edges - 3
        edge_0 = 2 * (1 + int(dg_num_edges_reduced * 0.8))
        edge_1 = 2 * (1 + int(dg_num_edges_reduced * 0.1))
        edge_2 = dg.num_edges * 2 - edge_0 - edge_1
        self.assertEqual(dg_link[0].edge_label_index.shape[1], edge_0)
        self.assertEqual(dg_link[1].edge_label_index.shape[1], edge_1)
        self.assertEqual(dg_link[2].edge_label_index.shape[1], edge_2)

        for message_ratio in [0.1, 0.2, 0.4, 0.8]:
            dg_link_resample = (
                dg_link[0].clone().resample_disjoint(
                    message_ratio=message_ratio,
                )
            )
            positive_edge_num = (
                int(0.5 * dg_link[0].clone().edge_label_index.shape[1])
            )
            self.assertEqual(
                dg_link_resample.edge_label_index.shape[1],
                2 * (
                    positive_edge_num
                    - 1
                    - int(message_ratio * (positive_edge_num - 2))
                )
            )

        for split_ratio in [[0.1, 0.4, 0.5], [0.4, 0.3, 0.3], [0.7, 0.2, 0.1]]:
            dg_link_custom = (
                dg.split(task='link_pred', split_ratio=split_ratio)
            )
            dg_num_edges_reduced = dg.num_edges - 3
            edge_0 = 2 * (1 + int(dg_num_edges_reduced * split_ratio[0]))
            self.assertEqual(
                dg_link_custom[0].edge_label_index.shape[1],
                edge_0,
            )
            edge_1 = (
                2 * (
                    1
                    + int(split_ratio[0] * dg_num_edges_reduced)
                    + 1
                    + int(split_ratio[1] * dg_num_edges_reduced)
                )
                - edge_0
            )
            self.assertEqual(
                dg_link_custom[1].edge_label_index.shape[1],
                edge_1,
            )
            edge_2 = dg.num_edges * 2 - edge_0 - edge_1
            self.assertEqual(
                dg_link_custom[2].edge_label_index.shape[1],
                edge_2,
            )
示例#18
0
        dist = dist2src + dist2dst
        dist_over_2, dist_mod_2 = dist // 2, dist % 2

        z = 1 + torch.min(dist2src, dist2dst)
        z += dist_over_2 * (dist_over_2 + dist_mod_2 - 1)
        z[src] = 1.
        z[dst] = 1.
        z[torch.isnan(z)] = 0.

        self.__max_z__ = max(int(z.max()), self.__max_z__)

        return z.to(torch.long)


path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'Planetoid')
dataset = Planetoid(path, 'Cora')

train_dataset = SEALDataset(dataset, num_hops=2, split='train')
val_dataset = SEALDataset(dataset, num_hops=2, split='val')
test_dataset = SEALDataset(dataset, num_hops=2, split='test')

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)


class DGCNN(torch.nn.Module):
    def __init__(self, hidden_channels, num_layers, GNN=GCNConv, k=0.6):
        super(DGCNN, self).__init__()

        if k < 1:  # Transform percentile to number.
示例#19
0
import os.path as osp

import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import SplineConv

dataset = 'Cora'
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
dataset = Planetoid(path, dataset, T.TargetIndegree())
data = dataset[0]

data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.train_mask[:data.num_nodes - 1000] = 1
data.val_mask = None
data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.test_mask[data.num_nodes - 500:] = 1


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = SplineConv(dataset.num_features, 16, dim=1, kernel_size=2)
        self.conv2 = SplineConv(16, dataset.num_classes, dim=1, kernel_size=2)

    def forward(self):
        x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
        x = F.dropout(x, training=self.training)
        x = F.elu(self.conv1(x, edge_index, edge_attr))
        x = F.dropout(x, training=self.training)
示例#20
0
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures
from torch_geometric.data import ClusterData, ClusterLoader
from torch_geometric.nn import GCNConv

### Load data
dataset = Planetoid(root='data/Planetoid',
                    name='PubMed',
                    transform=NormalizeFeatures())

print()
print(f'Dataset: {dataset}:')
print('==================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print(
    '==============================================================================================================='
)

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
示例#21
0
def load_dataset(dataset, transform=None):
    if dataset.lower() in ["cora", "citeseer", "pubmed"]:
        path = os.path.join(".datasets", "Plantoid")
        dataset = Planetoid(path, dataset.lower(), transform=transform)
    elif dataset.lower() in ["cs", "physics"]:
        path = os.path.join(".datasets", "Coauthor", dataset.lower())
        dataset = Coauthor(path, dataset.lower(), transform=transform)
    elif dataset.lower() in ["computers", "photo"]:
        path = os.path.join(".datasets", "Amazon", dataset.lower())
        dataset = Amazon(path, dataset.lower(), transform=transform)
    else:
        print("Dataset not supported!")
        assert False
    return dataset
示例#22
0
import os.path as osp

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
from torch_geometric.nn.inits import uniform

hidden_dim = 512

dataset = 'Cora'
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
data = Planetoid(path, dataset)[0]


class Encoder(nn.Module):
    def __init__(self, hidden_dim):
        super(Encoder, self).__init__()
        self.conv = GCNConv(data.num_features, hidden_dim)
        self.prelu = nn.PReLU(hidden_dim)

    def forward(self, x, edge_index, corrupt=False):
        if corrupt:
            perm = torch.randperm(data.num_nodes)
            x = x[perm]

        x = self.conv(x, edge_index)
        x = self.prelu(x)
        return x
import torch.nn.functional as F
import time
import matplotlib as mpl

from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, ChebConv

mpl.use('agg')

# Loading Dataset

dataset = 'Cora'
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
dataset = Planetoid(path,
                    dataset,
                    split='public',
                    transform=T.NormalizeFeatures())
graph_data = dataset[0]

num_train = len(graph_data.y[graph_data.train_mask])
num_test = len(graph_data.y[graph_data.test_mask])

# Initialise and parse command-line inputs

parser = argparse.ArgumentParser(description='PT MCMC CNN')
parser.add_argument('-s',
                    '--samples',
                    help='Number of samples',
                    default=80,
                    dest="samples",
                    type=int)
示例#24
0
def planetoid_dataset(name: str) -> Callable:
    return lambda root: Planetoid(root, name)
示例#25
0
文件: pytg.py 项目: tekdogan/gcn
#!/usr/bin/env python3
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import sys
import torch.cuda.profiler as profiler
from torch_geometric.datasets import Planetoid
import pyprof
import torch

pyprof.init()

dataset = Planetoid(root='/tmp/Cora', name='Cora')

with torch.autograd.profiler.emit_nvtx():

    profiler.start()

    class Net(torch.nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = GCNConv(dataset.num_node_features, 16)
            #self.conv2 = GCNConv(16, dataset.num_classes)

        def forward(self, data):
            x, edge_index = data.x, data.edge_index

            x = self.conv1(x, edge_index)
            #x = F.relu(x)
            #x = F.dropout(x, training=self.training)
            #x = self.conv2(x, edge_index)
示例#26
0
文件: pyg.py 项目: Frozenmad/AutoGL
 def __init__(self, path):
     dataset = "Cora"
     # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
     Planetoid(path, dataset)
     super(CoraDataset, self).__init__(path, dataset)
示例#27
0
#imports
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import torch
import torch.nn.functional as F
from tqdm import tqdm_notebook as tqdm

torch.manual_seed(0)
np.random.seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

dataset = 'CiteSeer'
path = osp.join('..', 'data', dataset)
dataset = Planetoid(path, dataset, T.NormalizeFeatures())
data = dataset[0]


class Net(torch.nn.Module):
    def __init__(self, in_features, num_classes):
        super(Net, self).__init__()
        self.conv1 = GCNConv(in_features, 16, cached=True)
        self.conv2 = GCNConv(16, num_classes, cached=True)

    def forward(self, x, edge_index):
        # get the graph data
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)
示例#28
0
def load_data(dataset="cora",
              num_labels_per_class=20,
              missing_edge=False,
              verbose=0):
    # Load data.
    path = os.path.join("data", dataset)
    if verbose:
        print("loading data from %s. %d labels per class." %
              (path, num_labels_per_class))
    assert dataset in ["cora", "pubmed", "citeseer"]
    dataset = Planetoid(root=path,
                        name=dataset,
                        transform=T.NormalizeFeatures())

    data = dataset[0]
    data.num_classes = dataset.num_classes

    if missing_edge:
        assert num_labels_per_class == 20
        test_idx = data.test_mask.nonzero().squeeze().numpy()
        edge_index = data.edge_index.numpy()
        num_nodes = data.y.size(0)
        adj = sps.csc_matrix(
            (np.ones(edge_index.shape[1]), (edge_index[0], edge_index[1])),
            shape=(num_nodes, num_nodes))
        adj_mask = np.ones(num_nodes)
        adj_mask[test_idx] = 0
        adj_mask = sps.diags(adj_mask, format="csr")
        adj = adj_mask.dot(adj).dot(adj_mask.tocsc()).tocoo()
        edge_index = np.concatenate(
            [adj.row.reshape(1, -1),
             adj.col.reshape(1, -1)], axis=0)
        data.edge_index = torch.LongTensor(edge_index)

    # Original Planetoid setting.
    if num_labels_per_class == 20:
        return data

    # Get one-hot labels.
    temp = data.y.numpy()
    labels = np.zeros((len(temp), temp.max() + 1))
    for i in range(len(labels)):
        labels[i, temp[i]] = 1

    all_idx = list(range(len(labels)))

    # Select a fixed number of training data per class.
    idx_train = []
    class_cnt = np.zeros(
        labels.shape[1])  # number of nodes selected for each class
    for i in all_idx:
        if (class_cnt >= num_labels_per_class).all():
            break
        if ((class_cnt + labels[i]) > num_labels_per_class).any():
            continue
        class_cnt += labels[i]
        idx_train.append(i)
    if verbose:
        print("number of training data: ", len(idx_train))

    train_mask = np.zeros((len(labels), ), dtype=int)
    val_mask = np.zeros((len(labels), ), dtype=int)
    test_mask = np.zeros((len(labels), ), dtype=int)
    for i in all_idx:
        if i in idx_train:
            train_mask[i] = 1
        elif sum(val_mask) < 500:  # select 500 validation data
            val_mask[i] = 1
        else:
            test_mask[i] = 1
    data.train_mask = torch.ByteTensor(train_mask)
    data.val_mask = torch.ByteTensor(val_mask)
    data.test_mask = torch.ByteTensor(test_mask)

    return data
示例#29
0
import os.path as osp

import torch
import torch.nn as nn
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv, DeepGraphInfomax

dataset = 'Cora'
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
dataset = Planetoid(path, dataset)


class Encoder(nn.Module):
    def __init__(self, in_channels, hidden_channels):
        super(Encoder, self).__init__()
        self.conv = GCNConv(in_channels, hidden_channels, cached=True)
        self.prelu = nn.PReLU(hidden_channels)

    def forward(self, x, edge_index):
        x = self.conv(x, edge_index)
        x = self.prelu(x)
        return x


def corruption(x, edge_index):
    return x[torch.randperm(x.size(0))], edge_index


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DeepGraphInfomax(hidden_channels=512,
                         encoder=Encoder(dataset.num_features, 512),
示例#30
0
        dist = dist2src + dist2dst
        dist_over_2, dist_mod_2 = dist // 2, dist % 2

        z = 1 + torch.min(dist2src, dist2dst)
        z += dist_over_2 * (dist_over_2 + dist_mod_2 - 1)
        z[src] = 1.
        z[dst] = 1.
        z[torch.isnan(z)] = 0.

        self._max_z = max(int(z.max()), self._max_z)

        return z.to(torch.long)


path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'Planetoid')
dataset = Planetoid(path, name='Cora')

train_dataset = SEALDataset(dataset, num_hops=2, split='train')
val_dataset = SEALDataset(dataset, num_hops=2, split='val')
test_dataset = SEALDataset(dataset, num_hops=2, split='test')

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)


class DGCNN(torch.nn.Module):
    def __init__(self, hidden_channels, num_layers, GNN=GCNConv, k=0.6):
        super().__init__()

        if k < 1:  # Transform percentile to number.