def main(args): g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data( args.dataset, get_norm=True) num_nodes = g.num_nodes() # Since the nodes are featureless, learn node embeddings from scratch # This requires passing the node IDs to the model. feats = th.arange(num_nodes) model = RGCN(num_nodes, args.n_hidden, num_classes, num_rels, num_bases=args.n_bases) if args.gpu >= 0 and th.cuda.is_available(): device = th.device(args.gpu) else: device = th.device('cpu') feats = feats.to(device) labels = labels.to(device) model = model.to(device) g = g.to(device) optimizer = th.optim.Adam(model.parameters(), lr=1e-2, weight_decay=args.l2norm) model.train() for epoch in range(50): logits = model(g, feats) logits = logits[target_idx] loss = F.cross_entropy(logits[train_idx], labels[train_idx]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc = accuracy(logits[train_idx].argmax(dim=1), labels[train_idx]).item() print("Epoch {:05d} | Train Accuracy: {:.4f} | Train Loss: {:.4f}". format(epoch, train_acc, loss.item())) print() model.eval() with th.no_grad(): logits = model(g, feats) logits = logits[target_idx] test_acc = accuracy(logits[test_idx].argmax(dim=1), labels[test_idx]).item() print("Test Accuracy: {:.4f}".format(test_acc))
def main(args): g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data( args.dataset, get_norm=True) model = RGCN(g.num_nodes(), args.n_hidden, num_classes, num_rels, num_bases=args.n_bases) if args.gpu >= 0 and th.cuda.is_available(): device = th.device(args.gpu) else: device = th.device('cpu') labels = labels.to(device) model = model.to(device) g = g.int().to(device) optimizer = th.optim.Adam(model.parameters(), lr=1e-2, weight_decay=args.wd) model.train() for epoch in range(100): logits = model(g) logits = logits[target_idx] loss = F.cross_entropy(logits[train_idx], labels[train_idx]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc = accuracy(logits[train_idx].argmax(dim=1), labels[train_idx]).item() print("Epoch {:05d} | Train Accuracy: {:.4f} | Train Loss: {:.4f}". format(epoch, train_acc, loss.item())) print() model.eval() with th.no_grad(): logits = model(g) logits = logits[target_idx] test_acc = accuracy(logits[test_idx].argmax(dim=1), labels[test_idx]).item() print("Test Accuracy: {:.4f}".format(test_acc))
def main(args): g, num_rels, num_classes, labels, train_idx, test_idx, target_idx, inv_target = load_data( args.dataset, inv_target=True) if args.gpu >= 0 and th.cuda.is_available(): device = th.device(args.gpu) else: device = th.device('cpu') train_loader, val_loader, test_loader = init_dataloaders( args, g, train_idx, test_idx, target_idx, args.gpu) model = RGCN(g.num_nodes(), args.n_hidden, num_classes, num_rels, num_bases=args.n_bases, dropout=args.dropout, self_loop=args.use_self_loop, ns_mode=True) labels = labels.to(device) model = model.to(device) optimizer = th.optim.Adam(model.parameters(), lr=1e-2, weight_decay=args.wd) for epoch in range(args.n_epochs): train_acc, loss = train(model, train_loader, inv_target, labels, optimizer) print( "Epoch {:05d}/{:05d} | Train Accuracy: {:.4f} | Train Loss: {:.4f}" .format(epoch, args.n_epochs, train_acc, loss)) val_logits, val_seeds = evaluate(model, val_loader, inv_target) val_acc = accuracy(val_logits.argmax(dim=1), labels[val_seeds].cpu()).item() print("Validation Accuracy: {:.4f}".format(val_acc)) test_logits, test_seeds = evaluate(model, test_loader, inv_target) test_acc = accuracy(test_logits.argmax(dim=1), labels[test_seeds].cpu()).item() print("Final Test Accuracy: {:.4f}".format(test_acc))
def run(proc_id, n_gpus, n_cpus, args, devices, dataset, queue=None): dev_id = devices[proc_id] th.cuda.set_device(dev_id) g, num_rels, num_classes, labels, train_idx, test_idx,\ target_idx, inv_target = dataset dist_init_method = 'tcp://{master_ip}:{master_port}'.format( master_ip='127.0.0.1', master_port='12345') backend = 'nccl' if proc_id == 0: print("backend using {}".format(backend)) th.distributed.init_process_group(backend=backend, init_method=dist_init_method, world_size=n_gpus, rank=proc_id) device = th.device(dev_id) use_ddp = True if n_gpus > 1 else False train_loader, val_loader, test_loader = init_dataloaders(args, g, train_idx, test_idx, target_idx, dev_id, use_ddp=use_ddp) model = RGCN(g.num_nodes(), args.n_hidden, num_classes, num_rels, num_bases=args.n_bases, dropout=args.dropout, self_loop=args.use_self_loop, ns_mode=True) labels = labels.to(device) model = model.to(device) inv_target = inv_target.to(device) model = DistributedDataParallel(model, device_ids=[dev_id], output_device=dev_id) optimizer = th.optim.Adam(model.parameters(), lr=1e-2, weight_decay=args.wd) th.set_num_threads(n_cpus) for epoch in range(args.n_epochs): train_acc, loss = train(model, train_loader, inv_target, labels, optimizer) if proc_id == 0: print( "Epoch {:05d}/{:05d} | Train Accuracy: {:.4f} | Train Loss: {:.4f}" .format(epoch, args.n_epochs, train_acc, loss)) # garbage collection that empties the queue gc.collect() val_logits, val_seeds = evaluate(model, val_loader, inv_target) queue.put((val_logits, val_seeds)) # gather evaluation result from multiple processes if proc_id == 0: val_acc = collect_eval(n_gpus, queue, labels) print("Validation Accuracy: {:.4f}".format(val_acc)) # garbage collection that empties the queue gc.collect() test_logits, test_seeds = evaluate(model, test_loader, inv_target) queue.put((test_logits, test_seeds)) if proc_id == 0: test_acc = collect_eval(n_gpus, queue, labels) print("Final Test Accuracy: {:.4f}".format(test_acc)) th.distributed.barrier()
def train(args, device, data): # Unpack data train_nid, val_nid, test_nid, input_dim, left_pad_size, right_pad_size, labels, n_classes, entity_features, g = data # Create PyTorch DataLoader for constructing blocks sampler = dgl.dataloading.MultiLayerNeighborSampler( [int(fanout) for fanout in args.fan_out.split(',')]) dataloader = dgl.dataloading.NodeDataLoader( g, {args.label_entity: train_nid}, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers) # may change device to gpu? default device='cpu' # Define model and optimizer model = RGCN(input_dim, args.hidden_dim, n_classes, args.num_layers, F.relu, args.dropout, g.etypes) model = model.to(device) loss_fcn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) # Training loop avg = 0 iter_tput = [] best_eval_acc = 0 best_test_acc = 0 for epoch in range(args.num_epochs): tic = time.time() # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. for step, (input_nodes, seeds, blocks) in enumerate(dataloader): tic_step = time.time() # copy block to gpu blocks = [blk.int().to(device) for blk in blocks] # Load the input features as well as output labels batch_inputs, batch_labels = load_subtensor(entity_features, labels, seeds, input_nodes, args.label_entity, args.is_pad, device) # Compute loss and prediction batch_pred = model(blocks, batch_inputs)[args.label_entity] loss = loss_fcn(batch_pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() iter_tput.append(len(seeds) / (time.time() - tic_step)) if step % args.log_every == 0: acc = compute_acc(batch_pred, batch_labels) gpu_mem_alloc = torch.cuda.max_memory_allocated() / 1000000 if torch.cuda.is_available() else 0 print('Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MB'.format( epoch, step, loss.item(), acc.item(), np.mean(iter_tput[3:]), gpu_mem_alloc)) toc = time.time() print('Epoch Time(s): {:.4f}'.format(toc - tic)) if epoch >= 5: avg += toc - tic if epoch % args.eval_every == 0 and epoch != 0: eval_acc, test_acc = evaluate(model, g, entity_features, labels, val_nid, test_nid, device, args.batch_size, args.num_workers, args.label_entity, args.is_pad) # if args.save_pred: # np.savetxt(args.save_pred + '%02d' % epoch, pred.argmax(1).cpu().numpy(), '%d') print('Eval Acc {:.4f}'.format(eval_acc)) if eval_acc > best_eval_acc: best_eval_acc = eval_acc best_test_acc = test_acc print('Best Eval Acc {:.4f} Test Acc {:.4f}'.format(best_eval_acc, best_test_acc)) print('Avg epoch time: {}'.format(avg / (epoch - 4))) return best_test_acc