Пример #1
0
def main(args):
    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data(
        args.dataset, get_norm=True)

    num_nodes = g.num_nodes()

    # Since the nodes are featureless, learn node embeddings from scratch
    # This requires passing the node IDs to the model.
    feats = th.arange(num_nodes)

    model = RGCN(num_nodes,
                 args.n_hidden,
                 num_classes,
                 num_rels,
                 num_bases=args.n_bases)

    if args.gpu >= 0 and th.cuda.is_available():
        device = th.device(args.gpu)
    else:
        device = th.device('cpu')
    feats = feats.to(device)
    labels = labels.to(device)
    model = model.to(device)
    g = g.to(device)

    optimizer = th.optim.Adam(model.parameters(),
                              lr=1e-2,
                              weight_decay=args.l2norm)

    model.train()
    for epoch in range(50):
        logits = model(g, feats)
        logits = logits[target_idx]
        loss = F.cross_entropy(logits[train_idx], labels[train_idx])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc = accuracy(logits[train_idx].argmax(dim=1),
                             labels[train_idx]).item()
        print("Epoch {:05d} | Train Accuracy: {:.4f} | Train Loss: {:.4f}".
              format(epoch, train_acc, loss.item()))
    print()

    model.eval()
    with th.no_grad():
        logits = model(g, feats)
    logits = logits[target_idx]
    test_acc = accuracy(logits[test_idx].argmax(dim=1),
                        labels[test_idx]).item()
    print("Test Accuracy: {:.4f}".format(test_acc))
Пример #2
0
def main(args, devices):
    data = load_data(args.dataset, inv_target=True)

    # Create csr/coo/csc formats before launching training processes.
    # This avoids creating certain formats in each sub-process, which saves momory and CPU.
    g = data[0]
    g.create_formats_()

    n_gpus = len(devices)
    # required for mp.Queue() to work with mp.spawn()
    mp.set_start_method('spawn')
    n_cpus = mp.cpu_count()
    queue = mp.Queue(n_gpus)
    mp.spawn(run, args=(n_gpus, n_cpus // n_gpus, args, devices, data, queue),
             nprocs=n_gpus)
Пример #3
0
def main(args):
    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = load_data(
        args.dataset, get_norm=True)

    model = RGCN(g.num_nodes(),
                 args.n_hidden,
                 num_classes,
                 num_rels,
                 num_bases=args.n_bases)

    if args.gpu >= 0 and th.cuda.is_available():
        device = th.device(args.gpu)
    else:
        device = th.device('cpu')
    labels = labels.to(device)
    model = model.to(device)
    g = g.int().to(device)

    optimizer = th.optim.Adam(model.parameters(),
                              lr=1e-2,
                              weight_decay=args.wd)

    model.train()
    for epoch in range(100):
        logits = model(g)
        logits = logits[target_idx]
        loss = F.cross_entropy(logits[train_idx], labels[train_idx])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc = accuracy(logits[train_idx].argmax(dim=1),
                             labels[train_idx]).item()
        print("Epoch {:05d} | Train Accuracy: {:.4f} | Train Loss: {:.4f}".
              format(epoch, train_acc, loss.item()))
    print()

    model.eval()
    with th.no_grad():
        logits = model(g)
    logits = logits[target_idx]
    test_acc = accuracy(logits[test_idx].argmax(dim=1),
                        labels[test_idx]).item()
    print("Test Accuracy: {:.4f}".format(test_acc))
Пример #4
0
def main(args):
    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx, inv_target = load_data(
        args.dataset, inv_target=True)

    if args.gpu >= 0 and th.cuda.is_available():
        device = th.device(args.gpu)
    else:
        device = th.device('cpu')

    train_loader, val_loader, test_loader = init_dataloaders(
        args, g, train_idx, test_idx, target_idx, args.gpu)

    model = RGCN(g.num_nodes(),
                 args.n_hidden,
                 num_classes,
                 num_rels,
                 num_bases=args.n_bases,
                 dropout=args.dropout,
                 self_loop=args.use_self_loop,
                 ns_mode=True)
    labels = labels.to(device)
    model = model.to(device)

    optimizer = th.optim.Adam(model.parameters(),
                              lr=1e-2,
                              weight_decay=args.wd)

    for epoch in range(args.n_epochs):
        train_acc, loss = train(model, train_loader, inv_target, labels,
                                optimizer)
        print(
            "Epoch {:05d}/{:05d} | Train Accuracy: {:.4f} | Train Loss: {:.4f}"
            .format(epoch, args.n_epochs, train_acc, loss))

        val_logits, val_seeds = evaluate(model, val_loader, inv_target)
        val_acc = accuracy(val_logits.argmax(dim=1),
                           labels[val_seeds].cpu()).item()
        print("Validation Accuracy: {:.4f}".format(val_acc))

    test_logits, test_seeds = evaluate(model, test_loader, inv_target)
    test_acc = accuracy(test_logits.argmax(dim=1),
                        labels[test_seeds].cpu()).item()
    print("Final Test Accuracy: {:.4f}".format(test_acc))
Пример #5
0
def main(args):
    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx, inv_target = load_data(
        args.dataset, inv_target=True)

    if args.gpu >= 0 and th.cuda.is_available():
        device = th.device(args.gpu)
    else:
        device = th.device('cpu')

    train_loader, val_loader, test_loader = init_dataloaders(
        args, g, train_idx, test_idx, target_idx, args.gpu)
    embed_layer, model = init_models(args, device, g.num_nodes(), num_classes,
                                     num_rels)

    labels = labels.to(device)
    model = model.to(device)

    emb_optimizer = th.optim.SparseAdam(embed_layer.parameters(),
                                        lr=args.sparse_lr)
    optimizer = th.optim.Adam(model.parameters(),
                              lr=1e-2,
                              weight_decay=args.l2norm)

    for epoch in range(args.n_epochs):
        train_acc, loss = train(model, embed_layer, train_loader, inv_target,
                                labels, emb_optimizer, optimizer)
        print(
            "Epoch {:05d}/{:05d} | Train Accuracy: {:.4f} | Train Loss: {:.4f}"
            .format(epoch, args.n_epochs, train_acc, loss))

        val_logits, val_seeds = evaluate(model, embed_layer, val_loader,
                                         inv_target)
        val_acc = accuracy(val_logits.argmax(dim=1),
                           labels[val_seeds].cpu()).item()
        print("Validation Accuracy: {:.4f}".format(val_acc))

    test_logits, test_seeds = evaluate(model, embed_layer, test_loader,
                                       inv_target)
    test_acc = accuracy(test_logits.argmax(dim=1),
                        labels[test_seeds].cpu()).item()
    print("Final Test Accuracy: {:.4f}".format(test_acc))
Пример #6
0
def main(args, devices):
    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx, inv_target = load_data(
        args.dataset, inv_target=True)

    # Create csr/coo/csc formats before launching training processes.
    # This avoids creating certain formats in each sub-process, which saves momory and CPU.
    g.create_formats_()

    n_gpus = len(devices)
    n_cpus = mp.cpu_count()
    queue = mp.Queue(n_gpus)
    procs = []
    for proc_id in range(n_gpus):
        # We use distributed data parallel dataloader to handle the data splitting
        p = mp.Process(target=run,
                       args=(proc_id, n_gpus, n_cpus // n_gpus, args, devices,
                             (g, num_classes, num_rels, target_idx, inv_target,
                              train_idx, test_idx, labels), queue))
        p.start()
        procs.append(p)
    for p in procs:
        p.join()