Exemplo n.º 1
0
if gpu >= 0:
    device = torch.device('cuda:%d' % gpu)
else:
    device = torch.device('cpu')

# Reddit dataset

# NumNodes: 232965
# NumEdges: 114848857
# NumFeats: 602
# NumClasses: 41
# NumTrainingSamples: 153431
# NumValidationSamples: 23831
# NumTestSamples: 55703

data = RedditDataset(self_loop=True)
train_mask = data.train_mask
val_mask = data.val_mask
test_mask = data.test_mask
features = torch.Tensor(data.features)
labels = torch.LongTensor(data.labels)
in_feats = features.shape[1]
n_classes = data.num_labels

# Construct graph
g = dgl.graph(data.graph.all_edges())
g.ndata['features'] = features
g.ndata['labels'] = labels

# get different node IDs
# Examples:
Exemplo n.º 2
0
    for i in range(args.ngpus):
        args.devices_name_list.append('cuda:'+str(i))
    args.devices_name_list = ['cuda:0','cuda:0']
    
    print("args: \n",args)
    
    os.environ['MASTER_ADDR'] = '127.0.0.1'              #
    os.environ['MASTER_PORT'] = '12345'
    if args.dataset == 'cora':
        data = CoraGraphDataset()
    elif args.dataset == 'citeseer':
        data = CiteseerGraphDataset()
    elif args.dataset == 'pubmed':
        data = PubmedGraphDataset()
    elif args.dataset == 'reddit':
        data = RedditDataset()
    else:
        raise ValueError('Unknown dataset: {}'.format(args.dataset))

    g = data[0]


    args.TV_list,args.PV_list=pg.Init.DivideGraph(g,args.ngpus,args.hop)
    
    del data
    del g
    # release memory
       
    mp.spawn(train,nprocs = args.ngpus,args = (args,))
    
    print("Exit!")
Exemplo n.º 3
0
def train(procid,args):
    # load and preprocess dataset
    assert procid >= 0
    os.environ['MASTER_ADDR'] = '127.0.0.1'              
    os.environ['MASTER_PORT'] = '12345'
    
    if args.dataset == 'cora':
        data = CoraGraphDataset()
    elif args.dataset == 'citeseer':
        data = CiteseerGraphDataset()
    elif args.dataset == 'pubmed':
        data = PubmedGraphDataset()
    elif args.dataset == 'reddit':
        data = RedditDataset()
    else:
        raise ValueError('Unknown dataset: {}'.format(args.dataset))

    g = data[0]
    
        
    #data = args.data
    #g = args.data[0]
    #g.create_formats_()
    print("New Proc! ",procid)
    #return g
    device = torch.device(args.devices_name_list[procid])
    dist_init_method = 'tcp://{master_ip}:{master_port}'.format(
            master_ip='127.0.0.1', master_port='12345')
    world_size = args.ngpus
    torch.distributed.init_process_group(backend="nccl",
                                         init_method=dist_init_method,
                                          world_size = world_size,
                                          rank = procid)
    #torch.cuda.set_device(device)
#st = pg.Storage(g,[device],[args.PV_list[procid]],[args.TV_list[procid]])
    

    # use pagraph    
    st = pg.Storage(g=g,data=g.ndata,cache_rate=args.cache_rate,
                    nodes=args.PV_list[procid],gpu=args.devices_name_list[procid],cpu='cpu')
    if(True):
        features = g.ndata.pop('feat')
        labels = g.ndata.pop('label')
        train_mask = g.ndata.pop('train_mask')
        val_mask = g.ndata.pop('val_mask')
        test_mask = g.ndata.pop('test_mask')
        in_feats = features.shape[1]
        n_classes = data.num_labels
        n_edges = data.graph.number_of_edges()
        
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes,
              train_mask.int().sum().item(),
              val_mask.int().sum().item(),
              test_mask.int().sum().item()))
    
    del features    #release memory 

    # add self loop
    '''
    if args.self_loop:
        g = dgl.remove_self_loop(g)
        g = dgl.add_self_loop(g)

    '''
    # create GCN model
    model = MyGCN(
                in_feats,
                args.n_hidden,
                n_classes,
                args.n_layers,
                F.relu,
                args.dropout,
                
                )
    model = model.to(device)
    model = DistributedDataParallel(model, device_ids = [device], output_device = device)
    
    # set sampler
    fanouts=[]
    for i in range(args.n_layers + 1):
        fanouts.append(args.neighbor_number)
        '''
        example: fanout=[2,2,2,2] or [3,3,3] ...
        '''
    sampler = dgl.dataloading.MultiLayerNeighborSampler(fanouts) 
    train_nids = args.TV_list[procid]
    dataloader = dgl.dataloading.NodeDataLoader(    
    g, train_nids, sampler,
    batch_size=args.batch_size,
    shuffle=True,
    drop_last=True,
    num_workers=0)
    
    # set loss function
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr = args.lr)

    # initialize graph
    dur = []
    
        
    # Sync
    if(args.ngpus > 1):
        torch.distributed.barrier()   
        
    #Start trainning
    model.train()
    
    for epoch in range(args.n_epochs):
        # time record
        #if epoch >= 3:
        tS=[0.0,0.0,0.0,0.0,0.0,0.0]
        t0 = time.time()
        
        # forward

        #Loss=torch.tensor([0.0],device=device,required_grad=False)
        
        for count,(in_nodes,out_nodes,blocks) in enumerate(dataloader):
            
            t1=time.time()
            blocks=[b.to(device) for b in blocks]
            
            t2=time.time()
            feat_in = st.Query(0,in_nodes,'feat')
            labels_out = st.Query(0,out_nodes,'label')
            

            t3=time.time()
            # forward
            feat_out = model(blocks,feat_in)
            t4=time.time()
            
            loss = loss_fcn(feat_out,labels_out)
            #Loss=Loss+loss.detach()
            t5=time.time()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            t6=time.time()
            
            tS[1]=tS[1]+t2-t1
            tS[2]=tS[2]+t3-t2
            tS[3]=tS[3]+t4-t3
            tS[4]=tS[4]+t5-t4
            tS[5]=tS[5]+t6-t5
        
            
        
        tE=time.time()
        #logits = model(features)
        #loss = loss_fcn(logits[train_mask], labels[train_mask])
        #optimizer.zero_grad()
        #loss.backward()
        #optimizer.step()

        #if epoch >= 3:
        dur.append(time.time() - t0)

        acc = 0.0 #evaluate(model, features, labels, val_mask)
        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
              "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
                                             acc, n_edges / np.mean(dur) / 1000))
        #for i in range(1,6):
        print(tS[1:],'\nTotal:',tE-t0," s ")
        
        
    #Finish trainning
    
    # Sync
    if(args.ngpus > 1):
        torch.distributed.barrier()
    model.eval()

    print("____________________________")
Exemplo n.º 4
0
def main(args):
    # load and preprocess dataset
    if args.dataset == 'cora':
        data = CoraGraphDataset()
    elif args.dataset == 'citeseer':
        data = CiteseerGraphDataset()
    elif args.dataset == 'pubmed':
        data = PubmedGraphDataset()
    elif args.dataset == 'reddit':
        data = RedditDataset()
    else:
        raise ValueError('Unknown dataset: {}'.format(args.dataset))

    g = data[0]
    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        g = g.int().to(args.gpu)

    features = g.ndata['feat']
    labels = g.ndata['label']
    if True: # default mask
        train_mask = g.ndata['train_mask']
        val_mask = g.ndata['val_mask']
        test_mask = g.ndata['test_mask']
    else:
        train_mask = torch.load('data/tm.pt')
        val_mask = torch.load('data/vm.pt')
        test_mask = torch.load('data/sm.pt')
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Vertices %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, features.shape[0], n_classes,
              train_mask.int().sum().item(),
              val_mask.int().sum().item(),
              test_mask.int().sum().item()))

    # add self loop
    if args.self_loop:
        g = dgl.remove_self_loop(g)
        g = dgl.add_self_loop(g)
    n_edges = g.number_of_edges()

    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    if cuda:
        norm = norm.cuda()
    g.ndata['norm'] = norm.unsqueeze(1)

    torch.manual_seed(8888)

    # create GCN model
    model = GCN(g,
                in_feats,
                args.n_hidden,
                n_classes,
                args.n_layers,
                F.relu,
                args.dropout)

    # one_hot_labels = torch.nn.functional.one_hot(labels, num_classes=41)

    if cuda:
        model.cuda()

    # YIFAN: use buildin loss function for more stable convergence
    # loss_fcn = ZeroCrossEntropyLoss()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = ZeroAdam(model.parameters(),
                        lr=args.lr,
                        weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        # loss = loss_fcn(logits[train_mask], one_hot_labels[train_mask])
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc = evaluate(model, features, labels, val_mask)
        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
              "ETputs(KTEPS) {:.2f}". format(epoch + 1, np.mean(dur), loss.item(),
                                             acc, n_edges / np.mean(dur) / 1000))

    print()
    acc = evaluate(model, features, labels, test_mask)
    print("Test accuracy {:.2%}".format(acc))