Exemplo n.º 1
0
def graph_multiclass_classification(in_feats: int = 1,
                                    hid_feats: int = 4,
                                    num_heads: int = 2) -> None:
    from dgl.data import MiniGCDataset

    # Create training and test sets.
    trainset = MiniGCDataset(320, 10, 20)
    testset = MiniGCDataset(80, 10, 20)

    # # Use PyTorch's DataLoader and the collate function defined before.
    data_loader = DataLoader(trainset,
                             batch_size=8,
                             shuffle=True,
                             collate_fn=batch_graphs)

    # Create model
    model = GAT_Graph_Classifier(in_feats,
                                 hid_feats,
                                 num_heads=num_heads,
                                 out_dim=trainset.num_classes)
    logger.info(model)

    loss_func = torch.nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    epoch_losses, epoch_predictions_dict = train_graph_classifier(
        model, data_loader, loss_func=loss_func, optimizer=optimizer, epochs=5)
Exemplo n.º 2
0
def get_dgl_minigc(aggregation_type="sum", ):
    import dgl
    from dgl.data import MiniGCDataset
    tr_set = MiniGCDataset(80, 10, 20)
    test_set = MiniGCDataset(20, 10, 20)
    data_loader = DataLoader(tr_set, batch_size=80, shuffle=True,
                             collate_fn=collate)
    dataiter = iter(data_loader)
    images, labels = dataiter.next()  # get all the dataset
    G = images.to_networkx()

    e = len(G.edges)
    n = len(G.nodes)

    edges, agg_matrix = nx_to_format(G, aggregation_type)

    print("ciao")
Exemplo n.º 3
0
 def setup(self, stage):
     # # 实现数据集的定义,每张GPU都会执行该函数, stage 用于标记是用于什么阶段
     if stage == 'fit' or stage is None:
         # trainset = MiniGCDataset(20000, 10, 20)
         train_dataset, val_dataset = random_split(self.trainset,
                                                   [14000, 6000])
         self.train_dataset = train_dataset
         self.val_dataset = val_dataset
     if stage == 'test' or stage is None:
         self.test_dataset = MiniGCDataset(10000, 10, 20)
Exemplo n.º 4
0
# --------------------------------
# In this tutorial, you learn how to perform batched graph classification
# with DGL. The example task objective is to classify eight types of topologies shown here.
#
# .. image:: https://s3.us-east-2.amazonaws.com/dgl.ai/tutorial/batch/dataset_overview.png
#     :align: center
#
# Implement a synthetic dataset :class:`data.MiniGCDataset` in DGL. The dataset has eight
# different types of graphs and each class has the same number of graph samples.

from dgl.data import MiniGCDataset
import matplotlib.pyplot as plt
import networkx as nx
# A dataset with 80 samples, each graph is
# of size [10, 20]
dataset = MiniGCDataset(80, 10, 20)
graph, label = dataset[0]
fig, ax = plt.subplots()
nx.draw(graph.to_networkx(), ax=ax)
ax.set_title('Class: {:d}'.format(label))
plt.show()

###############################################################################
# Form a graph mini-batch
# -----------------------
# To train neural networks efficiently, a common practice is to batch
# multiple samples together to form a mini-batch. Batching fixed-shaped tensor
# inputs is common. For example, batching two images of size 28 x 28
# gives a tensor of shape 2 x 28 x 28. By contrast, batching graph inputs
# has two challenges:
#
Exemplo n.º 5
0
    correct = 0
    total = 0
    for batch_idx, (inputs, labels) in enumerate(testloader):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        print(epoch, test_loss / (1 + batch_idx), correct, total,
              1. * correct / total)


# Dataset
trainset = MiniGCDataset(1000, 10, 11)
testset = MiniGCDataset(100, 10, 11)
trainloader = DataLoader(trainset,
                         batch_size=100,
                         shuffle=True,
                         collate_fn=collate)
testloader = DataLoader(testset,
                        batch_size=100,
                        shuffle=False,
                        collate_fn=collate)

# Model
model = GraphModel(2, 128, trainset.num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-2)
# optimizer = optim.SGD(model.parameters(), lr=0.001,
Exemplo n.º 6
0
import dgl
import dgl.function as fn
import matplotlib.pyplot as plt
import networkx as nx
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from dgl.data import MiniGCDataset
from torch.utils.data import DataLoader


dataset = MiniGCDataset(80, 10, 20)  # 80 graphs with 10 ~ 20 nodes

graph, label = dataset[0]

fig, ax = plt.subplots()  # axis
nx.draw(graph.to_networkx(), ax=ax)
ax.set_title('Class: {:d}'.format(label))
plt.show()


# form a mini-batch: batch multiple samples together
def collate(samples):
    graphs, labels = map(list, zip(*samples))  # `samples` is a list of pairs (graph, label); graphs, labels: lists
    batched_graph = dgl.batch(graphs)  # view as a whole graph
    return batched_graph, torch.tensor(labels)  # graph, tensor


# graph convolution
Exemplo n.º 7
0
import torch.nn as nn
import torch.optim as optim
from dgl.data import MiniGCDataset
from torch.utils.data import DataLoader

from nets.classifier import Classifier
from utils.graph import collate

# Create training and test sets.
train_set = MiniGCDataset(320, 10, 20)
valid_set = MiniGCDataset(80, 10, 20)

# Use PyTorch's DataLoader and the collate function
# defined before.
data_loader = DataLoader(train_set,
                         batch_size=32,
                         shuffle=True,
                         collate_fn=collate)

# Create model
model = Classifier(1, 256, train_set.num_classes)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
model.train()

epoch_losses = []
for epoch in range(50):
    epoch_loss = 0
    for iteration, (graph, label) in enumerate(data_loader):
        prediction = model(graph)
        loss = loss_func(prediction, label)
Exemplo n.º 8
0
# %%


# create artifical data feature (= in degree) for each node
def create_artificial_features(dataset):
    for (graph, _) in dataset:
        graph.ndata['feat'] = graph.in_degrees().view(-1, 1).float()
        graph.edata['feat'] = torch.ones(graph.number_of_edges(), 1)
    return dataset


# %%

# Generate artifical graph dataset with DGL
trainset = MiniGCDataset(350, 10, 20)
testset = MiniGCDataset(100, 10, 20)

trainset = create_artificial_features(trainset)
testset = create_artificial_features(testset)

print(trainset[0])

# %%


class ExcitationGCN_layer(nn.Module):
    def __init__(self, input_dim, output_dim, reduction=2):
        super().__init__()
        self.hidden_exc = output_dim // reduction
        self.A = nn.Linear(input_dim, output_dim)
Exemplo n.º 9
0
            J = loss(batch_scores, batch_labels.long())

            epoch_test_loss += J.detach().item()
            # epoch_test_acc += accuracy(batch_scores, batch_labels)
            nb_data += batch_labels.size(0)

        epoch_test_loss /= (iter + 1)
        # epoch_test_acc /= nb_data

    # return epoch_test_loss, epoch_test_acc
    return epoch_test_loss


#%%
# Generate artifical graph dataset with DGL
data = MiniGCDataset(500, 10, 20)
test_ratio = 0.2
n = len(data)
train_size = int(n * (1 - test_ratio))
test_size = n - train_size
testset, trainset = random_split(data, (test_size, train_size),
                                 generator=torch.Generator().manual_seed(42))

#%%
next(iter(trainset))

#%%


def get_model(name, input_dim, hidden_dim, output_dim, L):
    if name == "normal":
Exemplo n.º 10
0
#     print(param_tensor, "\t", net.state_dict()[param_tensor].size())

from torch.utils.tensorboard import SummaryWriter

# default `log_dir` is "runs" - we'll be more specific here
writer = SummaryWriter('runs/fashion_mnist_experiment_1')

import torch as th
import numpy as np
import scipy.sparse as spp

# Create a star graph from a pair of arrays (using ``numpy.array`` works too).
u = th.tensor([0, 0, 0, 0, 0])
v = th.tensor([1, 2, 3, 4, 5])
star1 = dgl.DGLGraph((u, v))

# Create the same graph from a scipy sparse matrix (using ``scipy.sparse.csr_matrix`` works too).
adj = spp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy())))
star3 = dgl.DGLGraph(adj)

from dgl.data import MiniGCDataset
import matplotlib.pyplot as plt
import networkx as nx
# A dataset with 80 samples, each graph is
# of size [10, 20]
dataset = MiniGCDataset(80, 10, 20)
graph, label = dataset[0]
fig, ax = plt.subplots()
nx.draw(graph.to_networkx(), ax=ax)
ax.set_title('Class: {:d}'.format(label))
plt.show()
Exemplo n.º 11
0
#%%
import networkx as nx
import numpy as np
import h5py
from dgl.data import MiniGCDataset

#%%
# Generate a dataset with 1200 samples.  Each graph is of size [10, 20]
dataset = MiniGCDataset(1200, 10, 20)

#%%
def converter(data):
    """
    Read data from dataset, convert to numpy adjacency matrix, and return matrix and label
    """
    graph, label = data
    graph = graph.to_networkx()
    adj = nx.to_numpy_matrix(graph)
    lab = int(label.numpy())
    return adj, lab

#%%
# Write dataset to hdf5 file

output_file = 'GDataset.h5'
h5file = h5py.File(output_file, 'w')

for i in range(0, 1200, 1):
    print(i)

    mat = converter(dataset[i])[0]
Exemplo n.º 12
0
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import dgl
import dgl.function as fn
from dgl.data import MiniGCDataset
from bijou.data import DGLDataLoader, DataBunch
from bijou.callbacks import DGLInterpreter
from bijou.metrics import accuracy
from bijou.learner import Learner
import matplotlib.pyplot as plt

# 1. dataset
train_ds = MiniGCDataset(320, 10, 20)
val_ds = MiniGCDataset(100, 10, 20)
test_ds = MiniGCDataset(80, 10, 20)

train_dl = DGLDataLoader(train_ds, batch_size=32, shuffle=True)
val_dl = DGLDataLoader(val_ds, batch_size=32, shuffle=False)
test_dl = DGLDataLoader(test_ds, batch_size=32, shuffle=False)

data = DataBunch(train_dl, val_dl)

# 2. mode and optimizer

msg = fn.copy_src(src='h', out='m')  # Sends a message of node feature h.


def reduce(nodes):