def load_planetoid(dataset): data_name = ['Cora', 'CiteSeer', 'PubMed'] assert dataset in data_name path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'Datasets', 'NodeData') transforms = T.Compose([T.AddSelfLoops()]) dataset = Planetoid(path, dataset, transform=transforms) return dataset, dataset[0]
def test_compose(): transform = T.Compose([T.Center(), T.AddSelfLoops()]) assert str(transform) == ('Compose([\n' ' Center(),\n' ' AddSelfLoops()\n' '])') pos = torch.Tensor([[0, 0], [2, 0], [4, 0]]) edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]]) data = Data(edge_index=edge_index, pos=pos) data = transform(data) assert len(data) == 2 assert data.pos.tolist() == [[-2, 0], [0, 0], [2, 0]] assert data.edge_index.size() == (2, 7)
def test_compose(): transform = T.Compose([T.Center(), T.AddSelfLoops()]) assert transform.__repr__() == ('Compose([\n' ' Center(),\n' ' AddSelfLoops(),\n' '])') pos = torch.tensor([[0, 0], [2, 0], [4, 0]], dtype=torch.float) edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]]) data = Data(edge_index=edge_index, pos=pos) out = transform(data) assert out.pos.tolist() == [[-2, 0], [0, 0], [2, 0]] assert out.edge_index.tolist() == [[0, 0, 1, 1, 1, 2, 2], [0, 1, 0, 1, 2, 1, 2]]
def test_cora(): class Net(torch.nn.Module): def __init__(self, in_channels, out_channels): super(Net, self).__init__() self.conv1 = SAGEConv(in_channels, 16) self.conv2 = SAGEConv(16, 16) self.conv3 = SAGEConv(16, out_channels) def forward_data_flow(self, x, edge_weight, data_flow): block = data_flow[0] weight = edge_weight[block.e_id] weight[block.e_id == -1] = 1 x = relu(self.conv1(x, block.edge_index, weight, block.size)) block = data_flow[1] weight = edge_weight[block.e_id] weight[block.e_id == -1] = 1 x = relu(self.conv2(x, block.edge_index, weight, block.size)) block = data_flow[2] weight = edge_weight[block.e_id] weight[block.e_id == -1] = 1 x = self.conv3(x, block.edge_index, weight, block.size) return x def forward(self, x, edge_index, edge_weight): x = relu(self.conv1(x, edge_index, edge_weight)) x = relu(self.conv2(x, edge_index, edge_weight)) return self.conv3(x, edge_index, edge_weight) root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize))) dataset = Planetoid(root, 'Cora') model = Net(dataset.num_features, dataset.num_classes) data1 = dataset[0] data1.edge_weight = torch.rand(data1.num_edges) data2 = T.AddSelfLoops()(dataset[0]) data2.edge_weight = torch.rand(data2.num_edges) data3 = dataset[0] loop = torch.stack([torch.arange(100, 200), torch.arange(100, 200)], dim=0) data3.edge_index = torch.cat([data3.edge_index, loop], dim=1) data3.edge_weight = torch.rand(data3.num_edges) for data in [data1, data2, data3]: out_all = model(data.x, data.edge_index, data.edge_weight) loader = NeighborSampler(data, size=1.0, num_hops=3, batch_size=64, shuffle=False, drop_last=False, bipartite=True, add_self_loops=True) for data_flow in loader(data.train_mask): out = model.forward_data_flow(data.x[data_flow[0].n_id], data.edge_weight, data_flow) assert torch.allclose(out_all[data_flow.n_id], out) loader = NeighborSampler(data, size=1.0, num_hops=3, batch_size=64, shuffle=False, drop_last=False, bipartite=False) for subdata in loader(data.train_mask): out = model(data.x[subdata.n_id], subdata.edge_index, data.edge_weight[subdata.e_id]) out = out[subdata.sub_b_id] assert torch.allclose(out_all[subdata.b_id], out) shutil.rmtree(root)
# dataset.py import torch import torch_geometric import torch_geometric.data as gdata import torch_geometric.datasets as gdatasets import torch_geometric.transforms as gtransforms from config import batch_size transform = gtransforms.AddSelfLoops() # test if transform works # cora = gdatasets.KarateClub(transform=transform) # cora_loader = gdata.DataLoader(cora, batch_size=1, shuffle=True) cora = gdatasets.Planetoid(root='./Planetoid/Cora', name='Cora', transform=transform) cora_data = cora[0] cora_data.train_mask = torch.zeros(cora_data.num_nodes, dtype=torch.uint8) cora_data.train_mask[:cora_data.num_nodes-1000] = 1 cora_data.val_mask = None cora_data.test_mask = torch.zeros(cora_data.num_nodes, dtype=torch.uint8) cora_data.test_mask[cora_data.num_nodes-500:] = 1 # We only need the train part of the graph to train. num_features = cora.num_features num_classes = cora.num_classes
x = torch.tensor(tmp_event[['charge_log10','time','dom_x','dom_y','dom_z']].values,dtype=torch.float) #Features pos = torch.tensor(tmp_event[['dom_x','dom_y','dom_z']].values,dtype=torch.float) #Position query = "SELECT energy_log10, time, position_x, position_y, position_z, direction_x, direction_y, direction_z, azimuth, zenith FROM truth WHERE event_no = {}".format(event_no) y = pd.read_sql(query,con) y = torch.tensor(y.values,dtype=torch.float) #Target dat = Data(x=x,edge_index=None,edge_attr=None,y=y,pos=pos) # T.KNNGraph(loop=True)(dat) #defining edges by k-NN with k=6 !!! Make sure .pos is not scaled!!! ie. x,y,z -!-> ax,by,cz T.KNNGraph(k=6, loop=False, force_undirected = False)(dat) dat.adj_t = None T.ToUndirected()(dat) T.AddSelfLoops()(dat) (row, col) = dat.edge_index dat.edge_index = torch.stack([col,row],dim=0) data_list.append(dat) if (i+1) % subdivides == 0: data, slices = InMemoryDataset.collate(data_list) torch.save((data,slices), destination + '/{}k_{}{}.pt'.format(subdivides//1000,save_filename,subset)) subset += 1 data_list = [] #Does this free up the memory? if i % 500 == 0: print("{}: Completed {}/{}".format(datetime.now(),i,N)) if data_list != []:
mask = None if self.training and self.dropout > 0: mask = torch.zeros_like(x).bernoulli_(1 - self.dropout) mask = mask.requires_grad_(False) mask = mask / (1 - self.dropout) for conv in self.convs: x = conv(x, edge_index, mask) x = self.norm(x).relu() x = F.dropout(x, p=self.dropout, training=self.training) return self.lin2(x) from ogb.nodeproppred import Evaluator, PygNodePropPredDataset # noqa transform = T.AddSelfLoops() root = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'products') dataset = PygNodePropPredDataset('ogbn-products', root, transform=transform) evaluator = Evaluator(name='ogbn-products') data = dataset[0] split_idx = dataset.get_idx_split() for split in ['train', 'valid', 'test']: data[f'{split}_mask'] = index_to_mask(split_idx[split], data.y.shape[0]) train_loader = RandomNodeSampler(data, num_parts=10, shuffle=True, num_workers=5) # Increase the num_parts of the test loader if you cannot fit # the full batch graph into your GPU: