def load_citation_graph(graph_name): """ Loads one of the DGL-hosted citation graph datasets :param graph_name: name of the citation graph to load; one of ['cora', 'citeseer', 'pubmed'] :return: namedtuple for the citation graph dataset; attributes: [graph, features, labels, mask] """ # retrieve the dataset if graph_name == 'cora': dataset = citation_graph.load_cora() elif graph_name == 'citeseer': dataset = citation_graph.load_citeseer() elif graph_name == 'pubmed': dataset = citation_graph.load_pubmed() else: raise ValueError( "Unknown citation graph name <{:s}>; " "Expected one of [cora, citeseer, pubmed]".format(graph_name)) #endif # return the datasets' components dataset_tuple = namedtuple("citation_graph", ["graph", "features", "labels", "mask"]) return dataset_tuple(DGLGraph(dataset.graph), torch.FloatTensor(dataset.features), torch.LongTensor(dataset.labels), torch.BoolTensor(dataset.train_mask))
def load_pubmed_data(): data = citegrh.load_pubmed() features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.BoolTensor(data.train_mask) test_mask = torch.BoolTensor(data.test_mask) g = DGLGraph(data.graph) return g, features, labels, train_mask, test_mask
def load_data(dataset): if dataset == 'cora': data = citegrh.load_cora() features = th.FloatTensor(data.features) labels = th.LongTensor(data.labels) num_labels = data.num_labels g = DGLGraph(data.graph) elif dataset == 'pubmed': data = citegrh.load_pubmed() features = th.FloatTensor(data.features) labels = th.LongTensor(data.labels) num_labels = data.num_labels g = DGLGraph(data.graph) elif dataset == 'citeseer': data = citegrh.load_citeseer() features = th.FloatTensor(data.features) labels = th.LongTensor(data.labels) num_labels = data.num_labels g = DGLGraph(data.graph) elif dataset == 'amazon-computers': dataset = gnn_benckmark.AmazonCoBuy('computers') g = dataset[0] features = th.FloatTensor(g.ndata['feat'].float()) labels = th.LongTensor(g.ndata['label']) num_labels = int(th.max(labels) + 1) elif dataset == 'amazon-photo': dataset = gnn_benckmark.AmazonCoBuy('photo') g = dataset[0] features = th.FloatTensor(g.ndata['feat'].float()) labels = th.LongTensor(g.ndata['label']) num_labels = int(th.max(labels) + 1) elif dataset == 'coauthor-cs': dataset = gnn_benckmark.Coauthor('cs') g = dataset[0] features = th.FloatTensor(g.ndata['feat'].float()) labels = th.LongTensor(g.ndata['label']) num_labels = int(th.max(labels) + 1) else: dataset = gnn_benckmark.Coauthor('physics') g = dataset[0] features = th.FloatTensor(g.ndata['feat'].float()) labels = th.LongTensor(g.ndata['label']) num_labels = int(th.max(labels) + 1) # 数据集划分点 split1 = int(0.7 * len(labels)) split2 = int(0.9 * len(labels)) train_mask = th.BoolTensor(_sample_mask(range(split1), labels.shape[0])) val_mask = th.BoolTensor( _sample_mask(range(split1, split2), labels.shape[0])) test_mask = th.BoolTensor( _sample_mask(range(split2, labels.shape[0] - 1), labels.shape[0])) print( "Total size: {:}| Feature dims: {:}| Train size: {:}| Val size: {:}| Test size: {:}| Num of labels: {:}" .format(features.size(0), features.size(1), len(labels[train_mask]), len(labels[val_mask]), len(labels[test_mask]), num_labels)) return g, features, labels, num_labels, train_mask, val_mask, test_mask
def load_data(dataset_name: str): if dataset_name == "cora": data = citegrh.load_cora() if dataset_name == "citeseer": data = citegrh.load_citeseer() if dataset_name == "pubmed": data = citegrh.load_pubmed() features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.BoolTensor(data.train_mask) test_mask = torch.BoolTensor(data.test_mask) g = DGLGraph(data.graph) return g, features, labels, train_mask, test_mask
def load_data(dataset_name, self_loops): if dataset_name == 'cora': return citegrh.load_cora() elif dataset_name == 'citeseer': return citegrh.load_citeseer() elif dataset_name == 'pubmed': return citegrh.load_pubmed() elif dataset_name == "PPI": return PPIDataset('test') elif dataset_name is not None and dataset_name.startswith('reddit'): return RedditDataset(self_loop=self_loops) else: raise ValueError('Unknown dataset: {}'.format(dataset_name))
def load_pubmed_data(): data = citegrh.load_pubmed() features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) mask = torch.ByteTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) g = data.graph # add self loop, A^hat = A + I in the paper g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) # return graph, node features, labels, and training mask return g, features, labels, mask, val_mask, test_mask
def load_data(dataset="cora"): assert dataset in ["cora", "pubmed", "citeseer", "synthetic"] if dataset == "cora": data = citegrh.load_cora() elif dataset == "pubmed": data = citegrh.load_pubmed() elif dataset == "citeseer": data = citegrh.load_citeseer() else: data = synthetic_data() data.features = th.FloatTensor(data.features) data.labels = th.LongTensor(data.labels) data.size = data.labels.shape[0] g = data.graph g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) data.g = g data.adj = g.adjacency_matrix(transpose=None).to_dense() data.Prob = normalize(th.FloatTensor(data.adj), p=1, dim=1) print("============Successfully Load %s===============" % dataset) return data
x = F.log_softmax(self.layer2(g, x)) return x from dgl.data import citation_graph as citegrh import networkx as nx def load_cora_data(): data = citegrh.load_cora() features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.BoolTensor(data.train_mask) test_mask = torch.BoolTensor(data.test_mask) g = DGLGraph(data.graph) return g, features, labels, train_mask, test_mask data = citegrh.load_pubmed() #features = torch.FloatTensor(data.features) #g = DGLGraph(data.graph).to(device) #dataset = da.CoraGraphDataset() device = torch.device('cuda') #model = Net() model = Net().to(device) features = torch.FloatTensor(data.features).to(device) g = DGLGraph(data.graph).to(device) #data = dataset[0].to(device)
from itertools import product import torch from runtime.dgl.gcn import GCN from runtime.dgl.gat import GAT from runtime.dgl.train import train_runtime from dgl.data import citation_graph from dgl import DGLGraph device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') Cora = citation_graph.load_cora() CiteSeer = citation_graph.load_citeseer() PubMed = citation_graph.load_pubmed() for d, Net in product([Cora, CiteSeer, PubMed], [GCN, GAT]): g = DGLGraph(d.graph) x = torch.tensor(d.features, dtype=torch.float, device=device) y = torch.tensor(d.labels, dtype=torch.long, device=device) mask = torch.tensor(d.train_mask, dtype=torch.uint8, device=device) g.add_edges(g.nodes(), g.nodes()) norm = torch.pow(g.in_degrees().float(), -0.5) norm[torch.isinf(norm)] = 0 g.ndata['norm'] = norm.unsqueeze(1).to(device) model = Net(g, x.size(1), d.num_labels) t = train_runtime(model, x, y, mask, epochs=200, device=device) print('{} - {}: {:.2f}s'.format(d.name, Net.__name__, t))
path = args.p algo = args.algo if len(path) > 0: #G = mmread(path) #nxgraph = nx.Graph(G) #graph = dgl.from_networkx(nxgraph) edges = readmtxGraph(path) graph = dgl.graph(edges) #print(graph.edges()) elif graph == "simple": graph = dgl.graph(([0, 0, 1, 1, 2, 3], [1, 2, 2, 4, 3, 4])) elif graph == "citeseer": data = load_citeseer(".") graph = data[0] elif graph == "pubmed": data = load_pubmed(".") graph = data[0] else: data = load_cora(".") graph = data[0] N = len(graph.nodes()) print("#Nodes:", N, "#Edges:", len(graph.edges()[0])) embed = torch.rand(N, dim) #print(embed) #need to check batch processing ... print("Creating batch graphs...") if bsize == 256: bgraphs = batch_process(graph, 1024, 50) elif bsize == 1: bgraphs = [[graph, 0, N]] else:
def __init__(self, name, seed, self_loop=False, split=None): super(SmallGraphDataset, self).__init__() if name == 'cora': data = citegrh.load_cora() graph = data.graph if self_loop: graph = self.add_selfloop(graph) graph = dgl.DGLGraph(graph) features = data.features labels = data.labels elif name == 'citeseer': data = citegrh.load_citeseer() graph = data.graph if self_loop: graph = self.add_selfloop(graph) graph = dgl.DGLGraph(graph) features = data.features labels = data.labels elif name == 'pubmed': data = citegrh.load_pubmed() graph = data.graph if self_loop: graph = self.add_selfloop(graph) graph = dgl.DGLGraph(graph) features = data.features labels = data.labels elif name == 'amazon': assert(split!=None) data = AmazonCoBuy(name='computers') graph = data.data[0] if self_loop: graph.remove_edges(graph.edge_ids(graph.nodes(), graph.nodes())) graph.add_edges(graph.nodes(), graph.nodes()) # must create split features = graph.ndata['feat'] labels = graph.ndata['label'] elif name =='karate': kG = nx.karate_club_graph() labels = np.array( [kG.nodes[i]['club'] != 'Mr. Hi' for i in kG.nodes]).astype(np.int64) graph = dgl.DGLGraph(kG) if self_loop: graph.remove_edges(graph.edge_ids(graph.nodes(), graph.nodes())) graph.add_edges(graph.nodes(), graph.nodes()) features = torch.eye(n=graph.number_of_nodes()) # graph.ndata['feat'] = features # Mr.Hi's club:1, John A's club:0 self.train_mask = torch.zeros(graph.number_of_nodes(), dtype=torch.bool) self.train_mask[0] = True #Mr.Hi self.train_mask[33] = True # John A self.test_mask = ~self.train_mask graph = self.compute_norm(graph) self.graph = graph self.features = torch.FloatTensor(features) self.n_features = self.features.size(1) self.labels = torch.LongTensor(labels) self.n_label = torch.unique(self.labels).size(0) self.n_nodes = graph.number_of_nodes() if hasattr(self, 'train_mask'): return if split: print('using {} for training data.'.format(split)) assert(split > 0.0) assert(split < 1.0) sample_size = ceil(self.n_nodes*split) train_np = np.zeros(self.n_nodes, dtype=np.bool) test_np = np.zeros(self.n_nodes, dtype=np.bool) test_np[range(500,1500)] = 1 if seed ==0: # use first few data points as seed train_idx = range(sample_size) train_np[train_idx] = 1 else: random.seed(seed) train_idx = random.sample(range(self.n_nodes-1000), sample_size) mapped_train_idx = [idx if idx<500 else idx+1000 for idx in train_idx] train_np[mapped_train_idx] =1 self.train_mask = torch.tensor(train_np, dtype=torch.bool) self.test_mask = torch.tensor(test_np, dtype=torch.bool) else: # use original split self.train_mask = torch.BoolTensor(data.train_mask) self.test_mask = torch.BoolTensor(data.test_mask)
def load_ppi_data(): data = citegrh.load_pubmed() labels = th.LongTensor(data.labels) features = th.FloatTensor(data.features) g = DGLGraph(data.graph) return g,labels,features