def main(args): if args.gpu < 0: device = "cpu" else: device = f"cuda:{args.gpu}" g = load_data(args).graph n_nodes = g.number_of_nodes() # build sparse matrix src, dst = g.all_edges() adj = torch.sparse.FloatTensor(torch.stack([dst, src]), torch.ones(src.shape), torch.Size([n_nodes, n_nodes])) adj = adj.coalesce().to(device) # generate features features = torch.randn(n_nodes, args.n_hidden).to(device) # warm up for _ in range(args.n_repeat): x = torch.spmm(adj, features) torch.cuda.synchronize() start = time.time() for _ in range(args.n_repeat): x = torch.spmm(adj, features) torch.cuda.synchronize() end = time.time() print("Time (ms): {:.3f}".format((end - start) * 1e3 / args.n_repeat))
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() G = data.graph fun_decompose_graph_central_rectangle.main_of_decompose(G, args.dataset) print('done') exit(0)
def main(args): # load and preprocess dataset data = load_data(args) features = mx.nd.array(data.features) labels = mx.nd.array(data.labels) mask = mx.nd.array(data.train_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() mask = mask.cuda() # create GCN model g = DGLGraph(data.graph) # create model model = GAT(g, args.num_layers, in_feats, args.num_hidden, n_classes, args.num_heads, elu, args.in_drop, args.attn_drop, args.residual) if cuda: model.cuda() model.initialize() # use optimizer trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': args.lr}) # initialize graph dur = [] for epoch in range(args.epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): logits = model(features) loss = mx.nd.softmax_cross_entropy(logits, labels) #optimizer.zero_grad() loss.backward() trainer.step(features.shape[0]) if epoch >= 3: dur.append(time.time() - t0) print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, loss.asnumpy()[0], np.mean(dur), n_edges / np.mean(dur) / 1000))
def emb_dataloader(args): # load and preprocess dataset data = load_data(args) normal_class=get_normal_class(args) labels,train_mask,val_mask,test_mask=one_class_processing(data,normal_class,args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(labels) train_mask = torch.BoolTensor(train_mask) val_mask = torch.BoolTensor(val_mask) test_mask = torch.BoolTensor(test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) g = data.graph datadict={'g':g,'features':features,'labels':labels,'train_mask':train_mask, 'val_mask':val_mask,'test_mask': test_mask,'in_feats':in_feats,'n_classes':n_classes,'n_edges':n_edges} return datadict
def load_dataset(dataset="cora"): args = namedtuple("args", ["dataset"]) dataset = load_data(args(dataset)) params = {} params['infeats'] = dataset.features.astype( 'float32') # Only support float32 as feature for now # Remove self-loops to avoid duplicate passing of a node's feature to itself g = dataset.graph g.remove_edges_from(g.selfloop_edges()) g.add_edges_from(zip(g.nodes, g.nodes)) # Generate adjacency matrix adjacency = nx.to_scipy_sparse_matrix(g) params['data'] = adjacency.data.astype('float32') params['indices'] = adjacency.indices.astype('int32') params['indptr'] = adjacency.indptr.astype('int32') # Normalization w.r.t. node degrees degs = [g.in_degree[i] for i in range(g.number_of_nodes())] params['norm'] = np.power(degs, -0.5).astype('float32') params['norm'] = params['norm'].reshape((params['norm'].shape[0], 1)) return params
def worker(self, args): """User-defined worker function """ # Start sender namebook = { 0:args.ip } sender = dgl.contrib.sampling.SamplerSender(namebook) # load and preprocess dataset data = load_data(args) ctx = mx.cpu() if args.self_loop and not args.dataset.startswith('reddit'): data.graph.add_edges_from([(i,i) for i in range(len(data.graph))]) train_nid = mx.nd.array(np.nonzero(data.train_mask)[0]).astype(np.int64).as_in_context(ctx) test_nid = mx.nd.array(np.nonzero(data.test_mask)[0]).astype(np.int64).as_in_context(ctx) # create GCN model g = DGLGraph(data.graph, readonly=True) while True: idx = 0 for nf in dgl.contrib.sampling.NeighborSampler(g, args.batch_size, args.num_neighbors, neighbor_type='in', shuffle=True, num_hops=args.n_layers+1, seed_nodes=train_nid): print("send train nodeflow: %d" %(idx)) sender.send(nf, 0) idx += 1 sender.signal(0)
def get_data(args): """ Data loader. For now, just a test sample """ args.syn_train_ratio = 0.1 args.syn_val_ratio = 0.1 args.syn_test_ratio = 0.8 data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) args.in_feats = features.shape[1] args.classes = data.num_labels args.n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (args.n_edges, args.classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() stop_number = int(np.round(len(labels) * 0.1)) attacker_mask = torch.ByteTensor( sample_mask(range(stop_number), labels.shape[0])) target_mask = torch.ByteTensor( sample_mask(range(stop_number), labels.shape[0])) return features, labels, train_mask, val_mask, test_mask, data
def load_cls_data(args): data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) n_classes = data.num_labels if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) g = data.graph # add self loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) row = g.edges()[0] col = g.edges()[1] g = dgl.graph((row, col)) return g, features, labels, n_classes, train_mask, val_mask, test_mask
def load(args, save_file=".npy"): save_file = args.dataset + save_file if os.path.exists(save_file): return np.load(save_file).tolist() else: datas = load_data(args) np.save(save_file, datas) return datas
def main(args): data = load_data(args) g = data.graph if isinstance(g, dgl.DGLGraph): csr = g.adjacency_matrix_scipy(transpose=True) else: csr = nx.to_scipy_sparse_matrix(g, weight=None, format='csr') graph_io.save_graph(args.out, csr)
def load_dataset(dataset="cora"): args = namedtuple("args", ["dataset"]) data = load_data(args(dataset)) # Remove self-loops to avoid duplicate passing of a node's feature to itself g = data.graph g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes, g.nodes)) return g, data
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) mask = torch.ByteTensor(data.train_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() mask = mask.cuda() # create GCN model g = DGLGraph(data.graph) # create model model = GAT(g, args.num_layers, in_feats, args.num_hidden, n_classes, args.num_heads, F.elu, args.in_drop, args.attn_drop, args.residual) if cuda: model.cuda() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # initialize graph dur = [] for epoch in range(args.epochs): if epoch >= 3: t0 = time.time() # forward logits = model(features) logp = F.log_softmax(logits, 1) loss = F.nll_loss(logp, labels) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) print( "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}" .format(epoch, loss.item(), np.mean(dur), n_edges / np.mean(dur) / 1000))
def main(args): # load and preprocess dataset data = load_data(args) if args.gpu >= 0: ctx = mx.gpu(args.gpu) else: ctx = mx.cpu() if args.self_loop and not args.dataset.startswith('reddit'): data.graph.add_edges_from([(i, i) for i in range(len(data.graph))]) train_nid = mx.nd.array(np.nonzero(data.train_mask)[0]).astype( np.int64).as_in_context(ctx) test_nid = mx.nd.array(np.nonzero(data.test_mask)[0]).astype( np.int64).as_in_context(ctx) features = mx.nd.array(data.features).as_in_context(ctx) labels = mx.nd.array(data.labels).as_in_context(ctx) train_mask = mx.nd.array(data.train_mask).as_in_context(ctx) val_mask = mx.nd.array(data.val_mask).as_in_context(ctx) test_mask = mx.nd.array(data.test_mask).as_in_context(ctx) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() n_train_samples = train_mask.sum().asscalar() n_val_samples = val_mask.sum().asscalar() n_test_samples = test_mask.sum().asscalar() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) # create GCN model g = DGLGraph(data.graph, readonly=True) g.ndata['features'] = features g.ndata['labels'] = labels if args.model == "gcn_ns": gcn_ns_train(g, ctx, args, n_classes, train_nid, test_nid, n_test_samples) elif args.model == "gcn_cv": gcn_cv_train(g, ctx, args, n_classes, train_nid, test_nid, n_test_samples) elif args.model == "graphsage_cv": graphsage_cv_train(g, ctx, args, n_classes, train_nid, test_nid, n_test_samples) else: print("unknown model. Please choose from gcn_ns, gcn_cv, graphsage_cv")
def main(args): if args.dataset == 'segtree': g = build_segtree(batch_size=32, seq_len=512) print('#Nodes: %d #Edges: %d' % (g.number_of_nodes(), g.number_of_edges())) csr = g.adjacency_matrix_scipy(fmt='csr') else: data = load_data(args) g = data.graph csr = nx.to_scipy_sparse_matrix(g, weight=None, format='csr') graph_io.save_graph(args.out, csr)
def load(kwargs, save_file=".pkl"): kwarg_nt = namedtuple('kwarg', kwargs.keys())(*kwargs.values()) save_file = 'data/' + kwarg_nt.dataset + save_file if os.path.exists(save_file): with open(save_file, "rb") as f: return pkl.load(f) else: datas = load_data(kwarg_nt) with open(save_file, "wb") as f: pkl.dump(datas, f) return datas
def main(): if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # TODO: train test split # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) in_feats = features.shape[1] print(features.shape) model = VGAE(in_feats, [32, 16], zdim=10, device=device) model.train() optim = torch.optim.Adam(model.parameters(), lr=1e-4) loss_function = BCELoss g = DGLGraph(data.graph) g.ndata['h'] = features n_epochs = 500 losses = [] loss = 0.0 print('Training Start') t = trange(n_epochs, desc="Loss: 0.0", leave=True) for epoch in t: g.ndata['h'] = features t.set_description("Loss: {}".format(loss)) t.refresh() # normalization adj = g.adjacency_matrix().to_dense() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) #g.ndata['norm'] = norm.unsqueeze(1) pos_weight = torch.Tensor([ float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() ]).to(device) z, adj_logits = model.forward(g) loss = model.compute_loss(z, adj_logits, adj, norm, pos_weight) optim.zero_grad() loss.backward() optim.step() losses.append(loss.item()) #print('Epoch: {:02d} | Loss: {:.5f}'.format(epoch, loss)) plt.plot(losses) plt.xlabel('iteration') plt.ylabel('train loss') plt.grid() plt.show()
def load_citation(args): data = load_data(args) features = torch.FloatTensor(data.features).to(device) labels = torch.LongTensor(data.labels).to(device) train_mask = torch.BoolTensor(data.train_mask).to(device) valid_mask = torch.BoolTensor(data.val_mask).to(device) test_mask = torch.BoolTensor(data.test_mask).to(device) g = data.graph # add self loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) return g, features, labels, train_mask, valid_mask, test_mask
def main(args): # load and preprocess dataset if args.graph_file != '': csr = mx.nd.load(args.graph_file)[0] n_edges = csr.shape[0] graph_name = os.path.basename(args.graph_file) data = GraphData(csr, args.num_feats, graph_name) csr = None else: data = load_data(args) n_edges = data.graph.number_of_edges() graph_name = args.dataset if args.self_loop and not args.dataset.startswith('reddit'): data.graph.add_edges_from([(i, i) for i in range(len(data.graph))]) mem_ctx = mx.cpu() features = mx.nd.array(data.features, ctx=mem_ctx) labels = mx.nd.array(data.labels, ctx=mem_ctx) train_mask = mx.nd.array(data.train_mask, ctx=mem_ctx) val_mask = mx.nd.array(data.val_mask, ctx=mem_ctx) test_mask = mx.nd.array(data.test_mask, ctx=mem_ctx) n_classes = data.num_labels n_train_samples = train_mask.sum().asscalar() n_val_samples = val_mask.sum().asscalar() n_test_samples = test_mask.sum().asscalar() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) # create GCN model print('graph name: ' + graph_name) g = dgl.contrib.graph_store.create_graph_store_server(data.graph, graph_name, "shared_mem", args.num_workers, False, edge_dir='in') g.ndata['features'] = features g.ndata['labels'] = labels g.ndata['train_mask'] = train_mask g.ndata['val_mask'] = val_mask g.ndata['test_mask'] = test_mask g.run()
def generate_data(args): data = load_data(args) labels = torch.LongTensor(data.labels) features = torch.FloatTensor(data.features) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() g = data.graph # add self loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g).to('cuda:0') g.add_edges(g.nodes(), g.nodes()) netg = nx.from_numpy_matrix(g.adjacency_matrix().to_dense().numpy(), create_using=nx.DiGraph) print(netg) g = dgl.from_networkx(netg, edge_attrs=['weight']).to("cuda:0") n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] print("train_mask-shape", train_mask) return g, num_feats, n_classes, heads, cuda, features, labels, train_mask, val_mask, test_mask
def main(): if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # TODO: train test split # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) in_feats = features.shape[1] print(features.shape) model = GAE(in_feats, [32,16]) model.train() optim = torch.optim.Adam(model.parameters(), lr=1e-2) loss_function = BCELoss g = DGLGraph(data.graph) g.ndata['h'] = features n_epochs = 500 losses = [] print('Training Start') for epoch in tqdm(range(n_epochs)): g.ndata['h'] = features # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 g.ndata['norm'] = norm.unsqueeze(1) adj = g.adjacency_matrix().to_dense() pos_weight = torch.Tensor([float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()]) adj_logits = model.forward(g)#, features) loss = loss_function(adj_logits, adj, pos_weight=pos_weight) optim.zero_grad() loss.backward() optim.step() losses.append(loss.item()) print('Epoch: {:02d} | Loss: {:.5f}'.format(epoch, loss)) plt.plot(losses) plt.xlabel('iteration') plt.ylabel('train loss') plt.grid() plt.show()
def main(args): data = load_data(args) if args.self_loop and not args.dataset.startswith('reddit'): data.graph.add_edges_from([(i,i) for i in range(len(data.graph))]) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() n_train_samples = train_mask.sum().item() n_val_samples = val_mask.sum().item() n_test_samples = test_mask.sum().item() graph_name = args.dataset print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) g = dgl.contrib.graph_store.create_graph_store_server( data.graph, graph_name, 'shared_mem', args.num_workers, False, edge_dir='in') dgl_g = DGLGraph(data.graph, readonly=True) norm = 1. / dgl_g.in_degrees().float().unsqueeze(1) del dgl_g g.ndata['norm'] = norm g.ndata['features'] = features g.ndata['labels'] = labels g.ndata['train_mask'] = train_mask g.ndata['val_mask'] = val_mask g.ndata['test_mask'] = test_mask print('start running graph server on dataset: {}'.format(graph_name)) g.run()
def main(args): if args.dataset == 'segtree': g = build_segtree(batch_size=32, seq_len=512) print('#Nodes: %d #Edges: %d' % (g.number_of_nodes(), g.number_of_edges())) csr = g.adjacency_matrix_scipy(fmt='csr') n, m = 32 * 512, 32 * 512 else: data = load_data(args) g = data.graph if isinstance(g, dgl.DGLGraph): csr = g.adjacency_matrix_scipy(transpose=True) else: csr = nx.to_scipy_sparse_matrix(g, weight=None, format='csr') n, m = csr.indptr.shape[0] - 1, csr.indptr.shape[0] - 1 graph_io.save_graph(args.out, csr, n, m)
def main(): n_users = 100 n_relationships = 10 data = load_data(args) gl = DGLGraph(data.graph) features, labels = data.features, data.labels G = gl.to_networkx() #print(gl.nodes().tolist()) features, labels = data.features, data.labels sampled_nodes = random_walk(G) print(' features:', len(features)) print(' labels:', len(labels)) print(' nodes:', len(gl.nodes())) gl, features, labels = sample_data(gl, features, labels, sampled_nodes) print(' features:', len(features)) print(' labels:', len(labels)) print(' nodes:', len(gl.nodes()))
def main(args): #print('in main...') percents = [10,20,30,40]#,5, 10,15,20,25,30,35,40,45,50,55,60,65,70]##[1,2,3,4,5,6,7,8] criteria = ['random']#'closeness','rank','betweenness']#'random']#]#, 'katz' , degrees =[0,1,2,3,4,5,6,7]#] folds =range(160,162) epocs = [600]#,200] # load and preprocess dataset data = load_data(args) dgl_g = DGLGraph(data.graph) features, labels = data.features, data.labels ##################### MY WORK ################ #g = DGLGraph(data.graph) #print(g.ndata['norm'] ) #sys.exit() print(np.shape(data.features)) print(np.shape(data.labels)) #print(type(data.graph)) node_list = dgl_g.nodes().tolist() G, temp_G = get_weak_ties_network(dgl_g,node_list) for p in percents: for c in criteria: G_ = G.copy() g, remaining_nodes= na.network_preprocess(G_, temp_G, node_list, args.dir_, args.dataset, p, c) for f in folds: features2,labels2, train_mask,val_mask,test_mask = na.get_model_parameters(remaining_nodes, features, labels) #print('number of nodes in strong tie network:',dgl_g.number_of_nodes()) #print('number of nodes in weak tie network:',G.number_of_nodes()) #print('number of nodes in undirected weak tie network:',temp_G.number_of_nodes()) #print('number of nodes in weak tie network after %s removal:'%p,len(remaining_nodes)) #print('number of nodes in weak tie network after %s removal:'%p,g.number_of_nodes()) #print('number of features in weak tie network after %s removal:'%p,len(features2)) for d in degrees: print('******************* degree %s******************'%d) for e in epocs: print(args.dataset,'criteria:',c,'percent:',p, 'degree:',d,'fold:',f,'epoch:',e) run_(data, args, g, features2, labels2, train_mask,val_mask,test_mask, p, c, f, d, e)
def load_cls_data(args): data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) n_classes = data.num_labels if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) g = DGLGraph(data.graph) g.add_edges(g.nodes(), g.nodes()) return g, features, labels, n_classes, train_mask, val_mask, test_mask
def worker(self, args): number_hops = 1 if args.model == "gcn_ns": number_hops = args.n_layers + 1 elif args.model == "gcn_cv": number_hops = args.n_layers else: print("unknown model. Please choose from gcn_ns and gcn_cv") # Start sender namebook = {0: args.ip} sender = dgl.contrib.sampling.SamplerSender(namebook) # load and preprocess dataset data = load_data(args) if args.self_loop and not args.dataset.startswith('reddit'): data.graph.add_edges_from([(i, i) for i in range(len(data.graph))]) train_nid = np.nonzero(data.train_mask)[0].astype(np.int64) test_nid = np.nonzero(data.test_mask)[0].astype(np.int64) # create GCN model g = DGLGraph(data.graph, readonly=True) while True: idx = 0 for nf in dgl.contrib.sampling.NeighborSampler( g, args.batch_size, args.num_neighbors, neighbor_type='in', shuffle=True, num_workers=32, num_hops=number_hops, seed_nodes=train_nid): print("send train nodeflow: %d" % (idx)) sender.send(nf, 0) idx += 1 sender.signal(0)
def main(args): # load and preprocess dataset train_acc_list = [] test_acc_list = [] data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False print('>> no use GPU') else: cuda = True print('>> using GPU ...') torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor g = DGLGraph(data.graph) n_edges = g.number_of_edges() # add self loop g.add_edges(g.nodes(), g.nodes()) # create SGC model model = SGConv(in_feats, n_classes, k=2, cached=True, bias=args.bias) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # loss_fcn = FocalLoss(gamma=0) # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(g, features) # only compute the train set print(torch.nonzero(train_mask)) print(logits[train_mask].size()) exit() loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, g, features, labels, val_mask) test_acc = evaluate(model, g, features, labels, test_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.6f} | Val accuracy {:.6f} | Test accuracy {:.6f} | ETputs(KTEPS) {:.2f}" .format(epoch, np.mean(dur), loss.item(), acc, test_acc, n_edges / np.mean(dur) / 1000)) train_acc_list.append(acc) test_acc_list.append(test_acc) plot_curve(train_acc_list, test_acc_list, args.dataset)
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor g = data.graph # add self loop if args.self_loop: g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes(), g.nodes())) g = DGLGraph(g) n_edges = g.number_of_edges() # create TAGCN model model = TAGCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def main(args): # load and preprocess dataset args.dataset = "reddit-self-loop" data = load_data(args) g = data.graph if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, g.ndata['train_mask'].int().sum().item(), g.ndata['val_mask'].int().sum().item(), g.ndata['test_mask'].int().sum().item())) # graph preprocess and calculate normalization factor n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 g.ndata['norm'] = norm.unsqueeze(1) # create SGC model model = SGConv(in_feats, n_classes, k=2, cached=True, bias=True, norm=normalize) if args.gpu >= 0: model = model.cuda() # use optimizer optimizer = torch.optim.LBFGS(model.parameters()) # define loss closure def closure(): optimizer.zero_grad() output = model(g, features)[train_mask] loss_train = F.cross_entropy(output, labels[train_mask]) loss_train.backward() return loss_train # initialize graph for epoch in range(args.n_epochs): model.train() optimizer.step(closure) acc = evaluate(model, features, g, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, "BoolTensor"): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item(), ) ) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() print("use cuda:", args.gpu) # graph preprocess and calculate normalization factor g = data.graph g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) n_edges = g.number_of_edges() # create GraphSAGE model model = GraphSAGE( g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.aggregator_type, ) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) accuracy, precision, recall, fscore, _ = evaluate( model, features, labels, val_mask ) print("Epoch:", epoch) print("Loss:", loss.item()) print("Accuracy:", accuracy) print("Precision:", precision) print("Recall:", recall) print("F-Score:", fscore) print() print("=" * 80) print() accuracy, precision, recall, fscore, class_based_report = evaluate( model, features, labels, test_mask ) print("=" * 80) print(" " * 28 + "Final Statistics") print("=" * 80) print("Accuracy", accuracy) print("Precision", precision) print("Recall", recall) print("F-Score", fscore) print(class_based_report)