if gpu >= 0: device = torch.device('cuda:%d' % gpu) else: device = torch.device('cpu') # Reddit dataset # NumNodes: 232965 # NumEdges: 114848857 # NumFeats: 602 # NumClasses: 41 # NumTrainingSamples: 153431 # NumValidationSamples: 23831 # NumTestSamples: 55703 data = RedditDataset(self_loop=True) train_mask = data.train_mask val_mask = data.val_mask test_mask = data.test_mask features = torch.Tensor(data.features) labels = torch.LongTensor(data.labels) in_feats = features.shape[1] n_classes = data.num_labels # Construct graph g = dgl.graph(data.graph.all_edges()) g.ndata['features'] = features g.ndata['labels'] = labels # get different node IDs # Examples:
for i in range(args.ngpus): args.devices_name_list.append('cuda:'+str(i)) args.devices_name_list = ['cuda:0','cuda:0'] print("args: \n",args) os.environ['MASTER_ADDR'] = '127.0.0.1' # os.environ['MASTER_PORT'] = '12345' if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() elif args.dataset == 'reddit': data = RedditDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] args.TV_list,args.PV_list=pg.Init.DivideGraph(g,args.ngpus,args.hop) del data del g # release memory mp.spawn(train,nprocs = args.ngpus,args = (args,)) print("Exit!")
def train(procid,args): # load and preprocess dataset assert procid >= 0 os.environ['MASTER_ADDR'] = '127.0.0.1' os.environ['MASTER_PORT'] = '12345' if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() elif args.dataset == 'reddit': data = RedditDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] #data = args.data #g = args.data[0] #g.create_formats_() print("New Proc! ",procid) #return g device = torch.device(args.devices_name_list[procid]) dist_init_method = 'tcp://{master_ip}:{master_port}'.format( master_ip='127.0.0.1', master_port='12345') world_size = args.ngpus torch.distributed.init_process_group(backend="nccl", init_method=dist_init_method, world_size = world_size, rank = procid) #torch.cuda.set_device(device) #st = pg.Storage(g,[device],[args.PV_list[procid]],[args.TV_list[procid]]) # use pagraph st = pg.Storage(g=g,data=g.ndata,cache_rate=args.cache_rate, nodes=args.PV_list[procid],gpu=args.devices_name_list[procid],cpu='cpu') if(True): features = g.ndata.pop('feat') labels = g.ndata.pop('label') train_mask = g.ndata.pop('train_mask') val_mask = g.ndata.pop('val_mask') test_mask = g.ndata.pop('test_mask') in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) del features #release memory # add self loop ''' if args.self_loop: g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) ''' # create GCN model model = MyGCN( in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, ) model = model.to(device) model = DistributedDataParallel(model, device_ids = [device], output_device = device) # set sampler fanouts=[] for i in range(args.n_layers + 1): fanouts.append(args.neighbor_number) ''' example: fanout=[2,2,2,2] or [3,3,3] ... ''' sampler = dgl.dataloading.MultiLayerNeighborSampler(fanouts) train_nids = args.TV_list[procid] dataloader = dgl.dataloading.NodeDataLoader( g, train_nids, sampler, batch_size=args.batch_size, shuffle=True, drop_last=True, num_workers=0) # set loss function loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr = args.lr) # initialize graph dur = [] # Sync if(args.ngpus > 1): torch.distributed.barrier() #Start trainning model.train() for epoch in range(args.n_epochs): # time record #if epoch >= 3: tS=[0.0,0.0,0.0,0.0,0.0,0.0] t0 = time.time() # forward #Loss=torch.tensor([0.0],device=device,required_grad=False) for count,(in_nodes,out_nodes,blocks) in enumerate(dataloader): t1=time.time() blocks=[b.to(device) for b in blocks] t2=time.time() feat_in = st.Query(0,in_nodes,'feat') labels_out = st.Query(0,out_nodes,'label') t3=time.time() # forward feat_out = model(blocks,feat_in) t4=time.time() loss = loss_fcn(feat_out,labels_out) #Loss=Loss+loss.detach() t5=time.time() optimizer.zero_grad() loss.backward() optimizer.step() t6=time.time() tS[1]=tS[1]+t2-t1 tS[2]=tS[2]+t3-t2 tS[3]=tS[3]+t4-t3 tS[4]=tS[4]+t5-t4 tS[5]=tS[5]+t6-t5 tE=time.time() #logits = model(features) #loss = loss_fcn(logits[train_mask], labels[train_mask]) #optimizer.zero_grad() #loss.backward() #optimizer.step() #if epoch >= 3: dur.append(time.time() - t0) acc = 0.0 #evaluate(model, features, labels, val_mask) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) #for i in range(1,6): print(tS[1:],'\nTotal:',tE-t0," s ") #Finish trainning # Sync if(args.ngpus > 1): torch.distributed.barrier() model.eval() print("____________________________")
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() elif args.dataset == 'reddit': data = RedditDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = g.ndata['feat'] labels = g.ndata['label'] if True: # default mask train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] else: train_mask = torch.load('data/tm.pt') val_mask = torch.load('data/vm.pt') test_mask = torch.load('data/sm.pt') in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Vertices %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, features.shape[0], n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) # add self loop if args.self_loop: g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) torch.manual_seed(8888) # create GCN model model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout) # one_hot_labels = torch.nn.functional.one_hot(labels, num_classes=41) if cuda: model.cuda() # YIFAN: use buildin loss function for more stable convergence # loss_fcn = ZeroCrossEntropyLoss() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = ZeroAdam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) # loss = loss_fcn(logits[train_mask], one_hot_labels[train_mask]) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}". format(epoch + 1, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, features, labels, test_mask) print("Test accuracy {:.2%}".format(acc))