def __init__(self): dataset = "PubMed" path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) if not osp.exists(path): Planetoid(path, dataset, T.TargetIndegree()) super(PubMedDataset, self).__init__(path, dataset, T.TargetIndegree())
def __init__(self): dataset = "Cora" path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) if not osp.exists(path): Planetoid(path, dataset, T.NormalizeFeatures()) Planetoid(path, dataset, T.TargetIndegree()) super(CoraDataset, self).__init__(path, dataset, T.TargetIndegree())
def transform_setup(graph_u=False, graph_gcn=False, rotation=180, samplePoints=1024, mesh=False, node_translation=0.01): if not graph_u and not graph_gcn: # Default transformation for scale noralization, centering, point sampling and rotating pretransform = T.Compose([T.NormalizeScale(), T.Center()]) transform = T.Compose([ T.SamplePoints(samplePoints), T.RandomRotate(rotation[0], rotation[1]) ]) print("pointnet rotation {}".format(rotation)) elif graph_u: pretransform = T.Compose([T.NormalizeScale(), T.Center()]) transform = T.Compose([ T.NormalizeScale(), T.Center(), T.SamplePoints(samplePoints, True, True), T.RandomRotate(rotation[0], rotation[1]), T.KNNGraph(k=graph_u) ]) elif graph_gcn: pretransform = T.Compose([T.NormalizeScale(), T.Center()]) if mesh: if mesh == "extraFeatures": transform = T.Compose([ T.RandomRotate(rotation[0], rotation[1]), T.GenerateMeshNormals(), T.FaceToEdge(True), T.Distance(norm=True), T.TargetIndegree(cat=True) ]) # , else: transform = T.Compose([ T.RandomRotate(rotation[0], rotation[1]), T.GenerateMeshNormals(), T.FaceToEdge(True), T.Distance(norm=True), T.TargetIndegree(cat=True) ]) else: transform = T.Compose([ T.SamplePoints(samplePoints, True, True), T.KNNGraph(k=graph_gcn), T.Distance(norm=True) ]) print("no mesh") print("Rotation {}".format(rotation)) print("Meshing {}".format(mesh)) else: print('no transfom') return transform, pretransform
def __init__(self): dataset = "CiteSeer" path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) if not osp.exists(path): Planetoid(path, dataset, transform=T.TargetIndegree()) super(CiteSeerDataset, self).__init__(path, dataset, transform=T.TargetIndegree())
def __init__(self, args=None): dataset = "PubMed" path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) if not osp.exists(path): Planetoid(path, dataset, transform=T.TargetIndegree()) super(PubMedDataset, self).__init__(path, dataset, transform=T.TargetIndegree()) self.data = normalize_feature(self.data)
def __init__(self, args=None): self.url = "https://data.dgl.ai/dataset/reddit.zip" dataset = "Reddit" path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) if not osp.exists(path): Reddit(path) super(RedditDataset, self).__init__(path, transform=T.TargetIndegree())
def load_data(dataset_name): path = osp.join(osp.dirname(osp.realpath(__file__)), '.', 'data', dataset_name) dataset = Planetoid(path, dataset_name, T.TargetIndegree()) train_loader = DataLoader(dataset, batch_size=1) return dataset, train_loader
def __init__(self): dataset = "Reddit" path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) if not osp.exists(path): Reddit(path) super(RedditDataset, self).__init__(path, transform=T.TargetIndegree())
def networkx_to_torch2(self, networkx_graph): from torch_geometric.utils import convert import torch_geometric.transforms as T graph = convert.from_networkx(networkx_graph) transform = T.Compose([T.TargetIndegree()]) graph = transform(graph) return graph.to(self.device)
def get_data2(folder="node_classify/cora", data_name="cora"): dataset = Planetoid( root=folder, name=data_name, # pre_transform=T.KNNGraph(k=6), # transform=T.NormalizeFeatures())#, transform=T.TargetIndegree()) return dataset
def __init__(self): dataset = "PubMed" path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) super(PubMedStrucDataset, self).__init__(path, dataset, T.TargetIndegree()) struc_feat = np.load("saved/pubmed.npy") self.data.x = torch.cat( [self.data.x, torch.from_numpy(struc_feat)], dim=1)
def main(): args = arg_parse() pyg_dataset = Planetoid('./cora', 'Cora', transform=T.TargetIndegree()) # the input that we assume users have edge_train_mode = args.mode print('edge train mode: {}'.format(edge_train_mode)) graphs = GraphDataset.pyg_to_graphs(pyg_dataset, tensor_backend=True) if args.multigraph: graphs = [copy.deepcopy(graphs[0]) for _ in range(10)] dataset = GraphDataset(graphs, task='link_pred', edge_message_ratio=args.edge_message_ratio, edge_train_mode=edge_train_mode) print('Initial dataset: {}'.format(dataset)) # split dataset datasets = {} datasets['train'], datasets['val'], datasets['test']= dataset.split( transductive=not args.multigraph, split_ratio=[0.85, 0.05, 0.1]) print('after split') print('Train message-passing graph: {} nodes; {} edges.'.format( datasets['train'][0].num_nodes, datasets['train'][0].num_edges)) print('Val message-passing graph: {} nodes; {} edges.'.format( datasets['val'][0].num_nodes, datasets['val'][0].num_edges)) print('Test message-passing graph: {} nodes; {} edges.'.format( datasets['test'][0].num_nodes, datasets['test'][0].num_edges)) # node feature dimension input_dim = datasets['train'].num_node_features # link prediction needs 2 classes (0, 1) num_classes = datasets['train'].num_edge_labels model = Net(input_dim, num_classes, args).to(args.device) #optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-3) optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs) follow_batch = [] # e.g., follow_batch = ['edge_index'] dataloaders = {split: DataLoader( ds, collate_fn=Batch.collate(follow_batch), batch_size=args.batch_size, shuffle=(split=='train')) for split, ds in datasets.items()} print('Graphs after split: ') for key, dataloader in dataloaders.items(): for batch in dataloader: print(key, ': ', batch) train(model, dataloaders, optimizer, args, scheduler=scheduler)
def get_data(model_name, dataset_dir): dataset_path = path.join(dataset_dir, 'Cora') if model_name == 'spline': transform = T.TargetIndegree() elif model_name == 'dna': transform = None else: transform = T.NormalizeFeatures() dataset = Planetoid(dataset_path, 'Cora', transform=transform) return dataset
def get_planetoid(self, dataset='cora'): path = osp.join( '/home/cai.507/Documents/DeepLearning/sparsifier/sparsenet', 'data', dataset) dataset = Planetoid(path, dataset, T.TargetIndegree()) n_edge = dataset.data.edge_index.size(1) g = Data(edge_index=dataset.data.edge_index, edge_weight=torch.ones(n_edge)) assert g.is_directed() == False # g = g.coalesce() return g
def get_data(dataset_name, model_name, dataset_dir): full_names = {'cora': 'Cora', 'citeseer': 'CiteSeer', 'pubmed': 'PubMed'} dataset_name = full_names[dataset_name] dataset_path = path.join(dataset_dir, dataset_name) if model_name == 'spline': transform = T.TargetIndegree() elif model_name == 'dna': transform = None else: transform = T.NormalizeFeatures() dataset = Planetoid(dataset_path, dataset_name, transform=transform) return dataset
def test_compose(): transform = T.Compose([T.Cartesian(), T.TargetIndegree()]) assert transform.__repr__() == ('Compose([\n' ' Cartesian(cat=True),\n' ' TargetIndegree(cat=True),\n' '])') pos = torch.tensor([[-1, 0], [0, 0], [2, 0]], dtype=torch.float) edge_index = torch.tensor([[0, 1], [1, 2]]) data = Data(edge_index=edge_index, pos=pos) out = transform(data).edge_attr.tolist() assert out == [[0.75, 0.5, 1], [1, 0.5, 1]]
def __init__(self, root, name): self.name = name edge_list_path = os.path.join(root, name + ".edgelist") node_label_path = os.path.join(root, name + ".nodelabel") node_features_path = os.path.join(root, name + ".nodefeatures") edge_index, y, self.node2id = self._preprocess(edge_list_path, node_label_path) self.num_classes = y.shape[1] x = self._preprocess_feats(node_features_path, self.node2id) assert x.shape[0] == y.shape[0] and x.shape[0] == len(self.node2id) y = y.argmax(dim=1) self.data = Data(x=x, edge_index=edge_index, y=y) indices = np.arange(x.shape[0]) np.random.shuffle(indices) self.data.train_mask = torch.from_numpy(indices < int(0.2 * x.shape[0])) self.data.val_mask = torch.from_numpy((int(0.2 * x.shape[0]) <= indices) & (indices < int(0.3 * x.shape[0]))) self.data.test_mask = torch.from_numpy(int(0.3 * x.shape[0]) <= indices) import torch_geometric.transforms as T self.transform = T.TargetIndegree()
def load_data(dataset_name): path = osp.join(osp.dirname(osp.realpath(__file__)), '.', 'data', dataset_name) dataset = Planetoid(path, dataset_name, T.TargetIndegree()) num_features = dataset.num_features data = GAE.split_edges(GAE, dataset[0]) data.train_pos_edge_index = gutils.to_undirected(data.train_pos_edge_index) data.val_pos_edge_index = gutils.to_undirected(data.val_pos_edge_index) data.test_pos_edge_index = gutils.to_undirected(data.test_pos_edge_index) data.edge_index = torch.cat([ data.train_pos_edge_index, data.val_pos_edge_index, data.test_pos_edge_index ], dim=1) data.edge_train_mask = torch.cat([ torch.ones((data.train_pos_edge_index.size(-1))), torch.zeros((data.val_pos_edge_index.size(-1))), torch.zeros((data.test_pos_edge_index.size(-1))) ], dim=0).byte() data.edge_val_mask = torch.cat([ torch.zeros((data.train_pos_edge_index.size(-1))), torch.ones((data.val_pos_edge_index.size(-1))), torch.zeros((data.test_pos_edge_index.size(-1))) ], dim=0).byte() data.edge_test_mask = torch.cat([ torch.zeros((data.train_pos_edge_index.size(-1))), torch.zeros((data.val_pos_edge_index.size(-1))), torch.ones((data.test_pos_edge_index.size(-1))) ], dim=0).byte() data.edge_type = torch.zeros(((data.edge_index.size(-1)), )).long() data.batch = torch.zeros((1, data.num_nodes), dtype=torch.int64).view(-1) data.num_graphs = 1 return data, num_features
import os.path as osp import torch import torch.nn.functional as F from torch_geometric.datasets import Planetoid import torch_geometric.transforms as T from torch_geometric.nn import SplineConv dataset = 'Cora' transform = T.Compose([ T.RandomNodeSplit(num_val=500, num_test=500), T.TargetIndegree(), ]) path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) dataset = Planetoid(path, dataset, transform=transform) data = dataset[0] class Net(torch.nn.Module): def __init__(self): super().__init__() self.conv1 = SplineConv(dataset.num_features, 16, dim=1, kernel_size=2) self.conv2 = SplineConv(16, dataset.num_classes, dim=1, kernel_size=2) def forward(self): x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr x = F.dropout(x, training=self.training) x = F.elu(self.conv1(x, edge_index, edge_attr)) x = F.dropout(x, training=self.training) x = self.conv2(x, edge_index, edge_attr) return F.log_softmax(x, dim=1)
import os.path as osp import torch import torch.nn.functional as F from torch_geometric.datasets import Planetoid import torch_geometric.transforms as T from torch_geometric.nn import SplineConv dataset = 'Cora' path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) dataset = Planetoid(path, dataset, transform=T.TargetIndegree()) data = dataset[0] data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.train_mask[:data.num_nodes - 1000] = 1 data.val_mask = None data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.test_mask[data.num_nodes - 500:] = 1 class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = SplineConv(dataset.num_features, 16, dim=1, kernel_size=2) self.conv2 = SplineConv(16, dataset.num_classes, dim=1, kernel_size=2) def forward(self): x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr x = F.dropout(x, training=self.training) x = F.elu(self.conv1(x, edge_index, edge_attr)) x = F.dropout(x, training=self.training)
action='store_true', help='Use GDC preprocessing.') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: print('Traning on Cuda,yeah!') torch.cuda.manual_seed(args.seed) #from torch_geometric.nn import GATConv from torch.optim.lr_scheduler import MultiStepLR, StepLR device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') dataset = WikiCS(path, transform=T.TargetIndegree()) data = dataset[0] # Num of feat:1639 adj = to_scipy_sparse_matrix(edge_index=data.edge_index) adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) #A_tilde = sparse_mx_to_torch_sparse_tensor(normalize_adjacency_matrix(adj,sp.eye(adj.shape[0]))).to(device) A_tilde = normalize_adjacency_matrix(adj, sp.eye(adj.shape[0])) adj_p = normalizemx(adj) #adj = sparse_mx_to_torch_sparse_tensor(adj).to(device) features = data.x features = torch.FloatTensor(np.array(features)) labels = data.y labels = torch.LongTensor(np.array(labels)) adj_sct1 = scattering1st(adj_p, 1) adj_sct2 = scattering1st(adj_p, 2) adj_sct4 = scattering1st(adj_p, 4)
import os.path as osp import torch import torch_geometric.transforms as T from torch_geometric.dataset import ENZYMES from torch_geometric.data import DataLoader path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'ENZYMES2') split = torch.randperm(ENZYMES.num_graphs) dataset = ENZYMES(path, split, transform=T.TargetIndegree()) dataloader = DataLoader(dataset, batch_size=32, shuffle=True) for data in dataloader: print(data.x.size(), data.edge_attr.size(), data.y.size())
from torch_geometric.nn import SplineConv from torch_geometric.nn import GCNConv import sys import networkx as nx import pdb from deepsnap.dataset import GraphDataset from deepsnap.batch import Batch from torch.utils.data import DataLoader name = 'Cora' model_name = 'GCN' fixed_split = True pyg_dataset = Planetoid( './cora', name, transform=T.TargetIndegree()) # load some format of graph data if not fixed_split: graphs = GraphDataset.pyg_to_graphs( pyg_dataset, verbose=True, fixed_split=fixed_split) # transform to our format dataset = GraphDataset(graphs, task='node') # node, edge, link_pred, graph dataset_train, dataset_val, dataset_test = dataset.split( transductive=True, split_ratio=[0.8, 0.1, 0.1]) # transductive split, inductive split else: graphs_train, graphs_val, graphs_test = \ GraphDataset.pyg_to_graphs(pyg_dataset, verbose=True, fixed_split=fixed_split) # transform to our format
import os.path as osp import torch import torch.nn.functional as F from torch_geometric.datasets import Planetoid import torch_geometric.transforms as T from torch_geometric.nn import SplineConv dataset = 'Cora' path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) dataset = Planetoid(path, dataset, T.TargetIndegree()) data = dataset[0] data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.train_mask[:data.num_nodes - 1000] = 1 data.val_mask = None data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.test_mask[data.num_nodes - 500:] = 1 class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = SplineConv(dataset.num_features, 16, dim=1, kernel_size=2) self.conv2 = SplineConv(16, dataset.num_classes, dim=1, kernel_size=2) def forward(self): x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr x = F.dropout(x, training=self.training) x = F.elu(self.conv1(x, edge_index, edge_attr)) x = F.dropout(x, training=self.training)
def __init__(self): dataset = "PubMed" path = osp.join(osp.dirname(osp.realpath(__file__)), "..", "data", dataset) super(PubMedDataset, self).__init__(path, dataset, T.TargetIndegree())
def __init__(self): dataset = "Reddit" path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) super(RedditDataset, self).__init__(path, T.TargetIndegree())
from torch_geometric.datasets import CitationFull from torch_geometric.utils import to_scipy_sparse_matrix import torch_geometric.transforms as T path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'DBLP') from torch_geometric.utils import to_scipy_sparse_matrix from utils import normalize_adjacency_matrix, normalizemx from DBLP_utils import SCAT_Red from utils import normalize_adjacency_matrix, sparse_mx_to_torch_sparse_tensor from layers import GC_withres, GraphConvolution #from torch_geometric.nn import GATConv from torch.optim.lr_scheduler import MultiStepLR, StepLR #dataset = TUDataset(root= path,name='REDDIT-BINARY') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') dataset = CitationFull(path, name='dblp', transform=T.TargetIndegree()) data = dataset[0] # Num of feat:1639 adj = to_scipy_sparse_matrix(edge_index=data.edge_index) adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) A_tilde = sparse_mx_to_torch_sparse_tensor( normalize_adjacency_matrix(adj, sp.eye(adj.shape[0]))).to(device) adj = sparse_mx_to_torch_sparse_tensor(adj).to(device) #print(dataset) #print(data.x.shape) #print(data.y.shape) #tp = SCAT_Red(in_features=1639,med_f0=10,med_f1=10,med_f2=10,med_f3=10,med_f4=10).to(device) #tp2 = SCAT_Red(in_features=40,med_f0=30,med_f1=10,med_f2=10,med_f3=10,med_f4=10).to(device) train_mask = torch.cat((torch.ones(10000), torch.zeros(2000), torch.zeros(2000), torch.zeros(3716)), 0) > 0