def test_karate(): dataset = KarateClub() assert len(dataset) == 1 assert dataset.num_features == 34 assert dataset.num_classes == 2 assert dataset.__repr__() == 'KarateClub()' assert len(dataset[0]) == 3 assert dataset[0].edge_index.size() == (2, 156) assert dataset[0].x.size() == (34, 34) assert dataset[0].y.size() == (34, ) assert dataset[0].y.sum().item() == 17
def test_influence(): data = KarateClub()[0] x = torch.randn(data.num_nodes, 8) out = influence(Net(x.size(1), 16), x, data.edge_index) assert out.size() == (data.num_nodes, data.num_nodes) assert torch.allclose(out.sum(dim=-1), torch.ones(data.num_nodes))
def load_dataset(root: str, name: str, *args, **kwargs) -> Dataset: r"""Returns a variety of datasets according to :obj:`name`.""" if 'karate' in name.lower(): from torch_geometric.datasets import KarateClub return KarateClub(*args, **kwargs) if name.lower() in ['cora', 'citeseer', 'pubmed']: from torch_geometric.datasets import Planetoid path = osp.join(root, 'Planetoid', name) return Planetoid(path, name, *args, **kwargs) if name in ['BZR', 'ENZYMES', 'IMDB-BINARY', 'MUTAG']: from torch_geometric.datasets import TUDataset path = osp.join(root, 'TUDataset') return TUDataset(path, name, *args, **kwargs) if name in ['ego-facebook', 'soc-Slashdot0811', 'wiki-vote']: from torch_geometric.datasets import SNAPDataset path = osp.join(root, 'SNAPDataset') return SNAPDataset(path, name, *args, **kwargs) if name.lower() in ['bashapes']: from torch_geometric.datasets import BAShapes return BAShapes(*args, **kwargs) if name.lower() in ['dblp']: from torch_geometric.datasets import DBLP path = osp.join(root, 'DBLP') return DBLP(path, *args, **kwargs) if name in ['citationCiteseer', 'illc1850']: from torch_geometric.datasets import SuiteSparseMatrixCollection path = osp.join(root, 'SuiteSparseMatrixCollection') return SuiteSparseMatrixCollection(path, name=name, *args, **kwargs) raise NotImplementedError
def test_visualize_KarateClub(self): """ export LD_LIBRARY_PATH=/usr/local/cuda-10.0/lib64:/usr/local/cudnn-10.0-v7.6.5.32 proxychains python -c "from template_lib.examples.DGL.geometric.test_pytorch_geometric import TestingGeometric;\ TestingGeometric().test_learning_methods_on_graphs()" """ if 'CUDA_VISIBLE_DEVICES' not in os.environ: os.environ['CUDA_VISIBLE_DEVICES'] = '0' if 'PORT' not in os.environ: os.environ['PORT'] = '6006' if 'TIME_STR' not in os.environ: os.environ['TIME_STR'] = '0' if utils.is_debugging() else '1' # func name assert sys._getframe().f_code.co_name.startswith('test_') command = sys._getframe().f_code.co_name[5:] class_name = self.__class__.__name__[7:] \ if self.__class__.__name__.startswith('Testing') \ else self.__class__.__name__ outdir = f'results/{class_name}/{command}' from datetime import datetime TIME_STR = bool(int(os.getenv('TIME_STR', 0))) time_str = datetime.now().strftime("%Y%m%d-%H_%M_%S_%f")[:-3] outdir = outdir if not TIME_STR else (outdir + '_' + time_str) print(outdir) import collections, shutil shutil.rmtree(outdir, ignore_errors=True) os.makedirs(outdir, exist_ok=True) from torch_geometric.datasets import KarateClub dataset = KarateClub() for i in dataset[0]: print(i) # this torch.geometric.datasets object comprises of edge(edge information for each node), x(nodes) and y(labels for each node) edge, x, y = dataset[0] numpyx = x[1].numpy() numpyy = y[1].numpy() numpyedge = edge[1].numpy() import networkx as nx g = nx.Graph(numpyx) name, edgeinfo = edge src = edgeinfo[0].numpy() dst = edgeinfo[1].numpy() edgelist = zip(src, dst) for i, j in edgelist: g.add_edge(i, j) nx.draw_networkx(g) pass
def test_GCN(self): g = KarateClub().data gcn = GCN(g.x.shape[1], len(np.unique(g.y)), n_hidden_gcn=64) epochs = 100 criterion = th.nn.CrossEntropyLoss(reduction='mean') optimizer = th.optim.Adam(gcn.parameters(), lr=0.02) device = th.device('cuda' if th.cuda.is_available() else 'cpu') gcn = gcn.to(device).float() g = g.to(device) length = len(str(epochs)) print("#### TRAINING START ####") test_mask = th.logical_not(g.train_mask) for epoch in range(epochs): gcn.train() outputs = gcn(g)[g.train_mask] loss = criterion(outputs, g.y[g.train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() gcn.eval() with th.no_grad(): predictions = np.argmax( gcn(g)[test_mask].cpu().detach().numpy(), axis=1) pred_train = np.argmax( gcn(g)[g.train_mask].cpu().detach().numpy(), axis=1) acc = accuracy_score(g.y.cpu()[test_mask].detach(), predictions) acc_train = accuracy_score(g.y.cpu()[g.train_mask].detach(), pred_train) print( f"[{epoch + 1:{length}}] loss: {loss.item(): .3f}, " f"training accuracy: {acc_train: .3f}, val_accuracy: {acc: .3f}" )
def train_net(): club = KarateClub() data = club.data data.num_nodes = data.num_nodes[0] print(data) data_loader = NeighborSampler(data, size=[20, 10], num_hops=2, batch_size=8, shuffle=True, add_self_loops=True) net = SAGENet(34, 2) criterion = nn.NLLLoss() optimizer = torch.optim.Adam(net.parameters(), lr=lr) losses = AverageMeter() for i in range(epoch): result = [] for data_flow in data_loader(): label = data.y[data_flow.n_id] # print(label) optimizer.zero_grad() out = net(data.x, data_flow.to(device)) # print(out) # print(label) result.append((label, out)) # Calculate loss loss = criterion(out, label) # Back prop. optimizer.zero_grad() loss.backward() # Clip gradients clip_gradient(optimizer, grad_clip) # Update weights optimizer.step() losses.update(loss.item()) # Print status print(validation(result)) if i % print_freq == 0: print('Epoch: {0} Loss {loss.val:.5f} ({loss.avg:.5f})\t'.format( i, loss=losses)) return losses.avg
def test_karate(): from torch_geometric.datasets import KarateClub dataset = KarateClub('../data/KarateClub') config = Config('../data/KarateClub/', True, multilabel=False) data = prepare_dataset(dataset, config, 2) index = np.arange(data.y.shape[0]) train, test = train_test_split(index, stratify=data.y, test_size=0.6, random_state=0) data.train_mask = torch.zeros(index.shape[0], dtype=torch.uint8) data.train_mask[train] = 1 data.test_mask = torch.zeros(index.shape[0], dtype=torch.uint8) data.test_mask[test] = 1 # ss = sparse.coo_matrix(data.x.numpy()) # i = torch.LongTensor([ss.row, ss.col]) # data.x = torch.sparse.FloatTensor( # i, torch.from_numpy(ss.data), torch.Size(ss.shape)) import networkx as nx g = nx.from_edgelist(data.edge_index.numpy().T) train_set = set(train) x = [] for nid in sorted(g.nodes): inter = list(train_set.intersection(g.neighbors(nid))) ixs, vals = torch.unique(data.y[inter], return_counts=True) counts = torch.zeros(data.num_classes, dtype=torch.int64) counts[ixs] = vals x.append(counts) data.x = torch.stack(x).to(torch.float) # sub_edge_index, _ = subgraph(torch.from_numpy(train), data.edge_index) deg = degree(data.edge_index[1].to(torch.long), data.num_nodes, dtype=torch.int) loader = NeighborSampler(data, size=[5, 5], deg=deg, batch_size=5, shuffle=True) transductive_sage(loader, config, steps=200, test_every=20)
def load_pyg(name, dataset_dir): """ Load PyG dataset objects. (More PyG datasets will be supported) Args: name (string): dataset name dataset_dir (string): data directory Returns: PyG dataset object """ dataset_dir = '{}/{}'.format(dataset_dir, name) if name in ['Cora', 'CiteSeer', 'PubMed']: dataset = Planetoid(dataset_dir, name) elif name[:3] == 'TU_': # TU_IMDB doesn't have node features if name[3:] == 'IMDB': name = 'IMDB-MULTI' dataset = TUDataset(dataset_dir, name, transform=T.Constant()) else: dataset = TUDataset(dataset_dir, name[3:]) elif name == 'Karate': dataset = KarateClub() elif 'Coauthor' in name: if 'CS' in name: dataset = Coauthor(dataset_dir, name='CS') else: dataset = Coauthor(dataset_dir, name='Physics') elif 'Amazon' in name: if 'Computers' in name: dataset = Amazon(dataset_dir, name='Computers') else: dataset = Amazon(dataset_dir, name='Photo') elif name == 'MNIST': dataset = MNISTSuperpixels(dataset_dir) elif name == 'PPI': dataset = PPI(dataset_dir) elif name == 'QM7b': dataset = QM7b(dataset_dir) else: raise ValueError('{} not support'.format(name)) return dataset
def load_pyg_dataset(dataset_name, root='dataset/'): from ogb.nodeproppred import PygNodePropPredDataset, Evaluator source, name = dataset_name.split('-', maxsplit=1) assert source in ['ogbn', 'pyg', 'custom'] if source == 'ogbn': dataset = PygNodePropPredDataset(name=dataset_name, root=root) return dataset, dataset.get_idx_split(), Evaluator(dataset_name) elif source == 'pyg': from torch_geometric.datasets import KarateClub, CoraFull if name == "karate": dataset = KarateClub() elif name == "cora": dataset = CoraFull(root) else: raise Exception("Dataset not recognized") num_nodes = dataset[0].x.shape[0] num_train = int(num_nodes * 0.8) num_val = int(num_nodes * 0.1) perm = np.arange(num_nodes, dtype=int) np.random.shuffle(perm) split_idx = { 'train': perm[:num_train], 'valid': perm[num_train:num_train + num_val], 'test': perm[num_train + num_val:] } return dataset, split_idx, Evaluator('ogbn-arxiv') elif source == "custom": from dataset import registry dataset = registry[name]() split_idx = { 'train': dataset[0].idx_train, 'valid': dataset[0].idx_val, 'test': dataset[0].idx_test } return dataset, split_idx, CustomEvaluator() else: raise Exception("Dataset not recognized")
if torch.is_tensor(h): h = h.detach().cpu().numpy() plt.scatter(h[:, 0], h[:, 1], s=140, c=color, cmap="Set2") if epoch is not None and loss is not None: plt.xlabel(f'Epoch: {epoch}, Loss: {loss.item():.4f}', fontsize=16) else: nx.draw_networkx(h, pos=nx.spring_layout(h, seed=42), with_labels=False, node_color=color, cmap="Set2") plt.show() ## Getting some data dataset = KarateClub() graph = dataset[0] G_nx = to_networkx(graph, to_undirected=True) # Uncomment for initial vis. # visualize(G_nx, color=graph.y) ## Defining a GCN Model # The meat!!! class GCN(torch.nn.Module): def __init__(self): super(GCN, self).__init__() torch.manual_seed(12345)
# GCN进行节点分类 import torch import torch.nn.functional as F from torch_geometric.nn import GCNConv from torch_geometric.data import Data from torch_geometric.datasets import KarateClub from torch_geometric.utils import to_networkx import networkx as nx from matplotlib import pyplot as plt import numpy as np dataset = KarateClub() print("图数量:", len(dataset)) print("类数量:", dataset.num_classes) data = dataset[0] # 查看创建出来的图 def check_graph(data): print("图结构:", data) print("图中的键:", data.keys) print("图中节点数:", data.num_nodes) print("图中边数:", data.num_edges) print("图中节点属性特征数:", data.num_node_features) print("图中是否存在孤立节点", data.contains_isolated_nodes())
full_description = '' with open('./EGONETCONFIG.py', 'r') as f: full_description = f.read() wandb.init(name=current_dataset['name'] + " - " + job_id, project="ego-net", notes=full_description) # --------------------------------------------------------------- print("Done 1") wandb.log({'action': 'Done 1'}) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') real_data = None if DATASET == "Karate Club": real_data = KarateClub() elif DATASET == "Cora" or DATASET == "Citeseer" or DATASET == "Pubmed": real_data = Planetoid(root=input_path, name=DATASET, split="public") elif DATASET == "Reddit": real_data = Reddit(root=input_path) elif DATASET == "Amazon Computers": real_data = Amazon(root=input_path, name="Computers") elif DATASET == "Amazon Photos": real_data = Amazon(root=input_path, name="Photo") elif DATASET == "CLUSTER": real_data = GNNBenchmarkDataset(root=input_path, name="CLUSTER", split="test") elif DATASET == "PATTERN": real_data = GNNBenchmarkDataset(root=input_path, name="PATTERN",
avg_path_length = path_length_sum / (obj_size-1) return avg_path_length # parameters dataset = 'Cora' sample_label_idxes = [0, 2, 7] # label7 means augmented vertices color_list = ['#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00', '#000000', '#a65628', '#ffff33'] step = 3300 edge_BA = 1 edge_TF = 5 # data loading device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if(dataset == 'KarateClub'): data = KarateClub(transform=GraphAugmenter(step, edge_BA, edge_TF)) else: data = Planetoid(root='./data/experiment/', name=dataset, transform=GraphAugmenter(step, edge_BA, edge_TF)) data_augmented = data[0].to(device) print(data_augmented) # make networkx object from torch tensor object G = graph_data_obj_to_nx(data_augmented) # make subgraph of G v_idxes_of_subgraph = [] for v_id, v_label_id in enumerate(data_augmented.y): if(v_label_id in sample_label_idxes): v_idxes_of_subgraph.append(v_id) G_sub = nx.subgraph(G, nbunch=v_idxes_of_subgraph)
import torch from torch import Tensor import networkx as nx import matplotlib.pyplot as plt from torch_geometric.datasets import KarateClub dataset: torch.utils.data.Dataset = KarateClub() print(f'Dataset: {dataset}:') print('======================') print(f'Number of graphs: {len(dataset)}') print(f'Number of features: {dataset.num_features}') print(f'Number of classes: {dataset.num_classes}') # Gather some statistics about the graph. data = dataset[0] # 由于现在的KarateClub没有train_mask, 所以自己加上 import numpy as np # randint的区间[0, 2) np_array = np.random.randint(0, 2, 34) mask = np_array >= 1 # print(mask) train_mask = torch.from_numpy(mask) # 加入自动随机生成的mask data['train_mask'] = train_mask print(data) print(f'Number of nodes: {data.num_nodes}') print(f'Number of edges: {data.num_edges}')
def test_MessagePassing(self): """ export LD_LIBRARY_PATH=/usr/local/cuda-10.0/lib64:/usr/local/cudnn-10.0-v7.6.5.32 proxychains python -c "from template_lib.examples.DGL.geometric.test_pytorch_geometric import TestingGeometric;\ TestingGeometric().test_learning_methods_on_graphs()" """ if 'CUDA_VISIBLE_DEVICES' not in os.environ: os.environ['CUDA_VISIBLE_DEVICES'] = '0' if 'PORT' not in os.environ: os.environ['PORT'] = '6006' if 'TIME_STR' not in os.environ: os.environ['TIME_STR'] = '0' if utils.is_debugging() else '1' # func name assert sys._getframe().f_code.co_name.startswith('test_') command = sys._getframe().f_code.co_name[5:] class_name = self.__class__.__name__[7:] \ if self.__class__.__name__.startswith('Testing') \ else self.__class__.__name__ outdir = f'results/{class_name}/{command}' from datetime import datetime TIME_STR = bool(int(os.getenv('TIME_STR', 0))) time_str = datetime.now().strftime("%Y%m%d-%H_%M_%S_%f")[:-3] outdir = outdir if not TIME_STR else (outdir + '_' + time_str) print(outdir) import collections, shutil shutil.rmtree(outdir, ignore_errors=True) os.makedirs(outdir, exist_ok=True) import torch from torch_geometric.nn import MessagePassing from torch_geometric.utils import add_self_loops, degree class GCNConv(MessagePassing): def __init__(self, in_channels, out_channels): super(GCNConv, self).__init__(aggr='add') # "Add" aggregation. self.lin = torch.nn.Linear(in_channels, out_channels) def forward(self, x, edge_index): # x has shape [N, in_channels] # edge_index has shape [2, E] # Step 1: Add self-loops to the adjacency matrix. edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0)) # Step 2: Linearly transform node feature matrix. x = self.lin(x) # Step 3: Compute normalization row, col = edge_index deg = degree(row, x.size(0), dtype=x.dtype) deg_inv_sqrt = deg.pow(-0.5) norm = deg_inv_sqrt[row] * deg_inv_sqrt[col] # Step 4-6: Start propagating messages. return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x, norm=norm) def message(self, x_j, norm): # x_j has shape [E, out_channels] # Step 4: Normalize node features. return norm.view(-1, 1) * x_j def update(self, aggr_out): # aggr_out has shape [N, out_channels] # Step 6: Return new node embeddings. return aggr_out from torch_geometric.datasets import KarateClub dataset = KarateClub() x = dataset[0].x edge_index = dataset[0].edge_index conv = GCNConv(34, 64) x = conv(x, edge_index) pass