Exemplo n.º 1
0
def main(opt):
    data = get_dataset(opt)
    g = data[0]
    if opt['gpu'] < 0:
        cuda = False
    else:
        cuda = True
        g = g.int().to(opt['gpu'])

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    # add self loop
    g = dgl.remove_self_loop(g)
    g = dgl.add_self_loop(g)
    n_edges = g.number_of_edges()
    # create model
    heads = ([opt['num_heads']] * opt['num_layers']) + [opt['num_out_heads']]
    if opt['model'] == 'GAT':
        model = GAT(g, opt['num_layers'], num_feats, opt['num_hidden'],
                    n_classes, heads, F.elu, opt['in_drop'], opt['attn_drop'],
                    opt['negative_slope'], opt['residual'], opt)
    elif opt['model'] == 'AGNN':
        model = AGNN(g, opt['num_layers'], num_feats, opt['num_hidden'],
                     n_classes, opt['in_drop'], opt)
    print(model)
    if opt['early_stop']:
        stopper = EarlyStopping(patience=100)
    if cuda:
        model.cuda()

    # use optimizer
    optimizer = get_optimizer(opt['optimizer'],
                              parameters=model.parameters(),
                              lr=opt['lr'],
                              weight_decay=opt['weight_decay'])

    # initialize graph
    dur = []
    for epoch in range(opt['epochs']):
        # model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        # logits = model(features)
        # loss = loss_fcn(logits[train_mask], labels[train_mask])
        # optimizer.zero_grad()
        # loss.backward()
        # optimizer.step()

        loss, logits = train(model, optimizer, features, train_mask, labels)

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        if opt['fastmode']:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
            if opt['early_stop']:
                if stopper.step(val_acc, model):
                    break

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format(
                  epoch, np.mean(dur), loss.item(), train_acc, val_acc,
                  n_edges / np.mean(dur) / 1000))

    print()
    if opt['early_stop']:
        model.load_state_dict(torch.load('es_checkpoint.pt'))
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
Exemplo n.º 2
0
if args.cuda:
    torch.cuda.manual_seed(args.seed)


#Load data
adj, features, labels, idx_train, idx_val, idx_test = load_data()

model = GAT(nfeat=features.shape[1],
            nhid=args.hidden,
            nclass=int(labels.max())+1,
            dropout=args.dropout,
            nheads=args.nb_heads,
            alpha=args.alpha)

optimizer = optim.Adam(model.parameters(),
                      lr=args.lr,
                      weight_decay=args.weight_decay)

if args.cuda:
    model.cuda()
    features = features.cuda()
    adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()
    
features, adj, labels = Variable(features), Variable(adj), Variable(labels)

train_losses, train_accs, val_losses, val_accs = [], [], [], []
Exemplo n.º 3
0
def train_ray(opt, checkpoint_dir=None, data_dir="../data"):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    data = get_dataset(opt)
    g = data[0]
    if opt['gpu'] < 0:
        cuda = False
    else:
        cuda = True
        g = g.int().to(opt['gpu'])

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    num_feats = features.shape[1]
    n_classes = data.num_classes
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
    #Edges %d
    #Classes %d
    #Train samples %d
    #Val samples %d
    #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    # add self loop
    g = dgl.remove_self_loop(g)
    g = dgl.add_self_loop(g)
    n_edges = g.number_of_edges()
    # create model
    heads = ([opt['num_heads']] * opt['num_layers']) + [opt['num_out_heads']]

    models = []
    optimizers = []
    datas = [g for i in range(opt['num_init'])]

    for split in range(opt['num_init']):
        if opt['model'] == 'GAT':
            model = GAT(g, opt['num_layers'], num_feats, opt['num_hidden'],
                        n_classes, heads, F.elu, opt['in_drop'],
                        opt['attn_drop'], opt['negative_slope'],
                        opt['residual'], opt)
        elif opt['model'] == 'AGNN':
            model = AGNN(g, opt['num_layers'], num_feats, opt['num_hidden'],
                         n_classes, opt['in_drop'], opt)

        train_this = train
        model = model.to(device)
        models.append(model)

        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)

        # model = model.to(device)
        parameters = [p for p in model.parameters() if p.requires_grad]

        optimizer = get_optimizer(opt['optimizer'],
                                  parameters,
                                  lr=opt['lr'],
                                  weight_decay=opt['weight_decay'])
        optimizers.append(optimizer)

        # The `checkpoint_dir` parameter gets passed by Ray Tune when a checkpoint
        # should be restored.
        if checkpoint_dir:
            checkpoint = os.path.join(checkpoint_dir, "checkpoint")
            model_state, optimizer_state = torch.load(checkpoint)
            model.load_state_dict(model_state)
            optimizer.load_state_dict(optimizer_state)

    for epoch in range(1, opt['epochs']):
        loss = np.mean([
            train_this(model, optimizer, features, train_mask,
                       labels)[0].item()
            for model, optimizer in zip(models, optimizers)
        ])
        train_accs, val_accs, tmp_test_accs = average_test(models, datas)
        with tune.checkpoint_dir(step=epoch) as checkpoint_dir:
            best = np.argmax(val_accs)
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save(
                (models[best].state_dict(), optimizers[best].state_dict()),
                path)
        tune.report(loss=loss,
                    accuracy=np.mean(val_accs),
                    test_acc=np.mean(tmp_test_accs),
                    train_acc=np.mean(train_accs))
Exemplo n.º 4
0
        sampler=ImbalancedDatasetSampler(
            train_db, callback_get_weight=WeightBinary(pos_ratio)))
    return trainloader


# Model and optimizer
model = GAT(nfeat=args.nfeat,
            nhid=args.hidden,
            nout=128,
            nlmphid=64,
            nclass=args.nclass,
            dropout=args.dropout,
            nheads=args.nb_heads,
            alpha=args.alpha)

optimizer = torch.optim.SGD(model.parameters(),
                            lr=args.lr,
                            weight_decay=args.weight_decay)

criterion = torch.nn.CrossEntropyLoss()


def run(epoch, dataloader, phase='train'):
    loss_train = []
    labels = []
    outputs = []
    preds = []
    pos_correct = 0.0
    neg_correct = 0.0
    pos = 0.0
    neg = 0.0
Exemplo n.º 5
0
def train_ray_int(opt, checkpoint_dir=None, data_dir="../data"):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    data = get_dataset(opt)
    g = data[0]
    if opt['gpu'] < 0:
        cuda = False
    else:
        cuda = True
        g = g.int().to(opt['gpu'])

    # if opt["num_splits"] > 0:
    #   dataset.data = set_train_val_test_split(
    #     23 * np.random.randint(0, opt["num_splits"]),  # random prime 23 to make the splits 'more' random. Could remove
    #     dataset.data,
    #     num_development=5000 if opt["dataset"] == "CoauthorCS" else 1500)

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    num_feats = features.shape[1]
    n_classes = data.num_classes
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
  #Edges %d
  #Classes %d
  #Train samples %d
  #Val samples %d
  #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    # add self loop
    g = dgl.remove_self_loop(g)
    g = dgl.add_self_loop(g)
    n_edges = g.number_of_edges()
    # create model
    heads = ([opt['num_heads']] * opt['num_layers']) + [opt['num_out_heads']]
    if opt['model'] == 'GAT':
        model = GAT(g, opt['num_layers'], num_feats, opt['num_hidden'],
                    n_classes, heads, F.elu, opt['in_drop'], opt['attn_drop'],
                    opt['negative_slope'], opt['residual'], opt)
    elif opt['model'] == 'AGNN':
        model = AGNN(g, opt['num_layers'], num_feats, opt['num_hidden'],
                     n_classes, opt['in_drop'], opt)

    model = model.to(device)
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    parameters = [p for p in model.parameters() if p.requires_grad]
    optimizer = get_optimizer(opt["optimizer"],
                              parameters,
                              lr=opt["lr"],
                              weight_decay=opt["weight_decay"])

    if checkpoint_dir:
        checkpoint = os.path.join(checkpoint_dir, "checkpoint")
        model_state, optimizer_state = torch.load(checkpoint)
        model.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)
    train_this = train
    this_test = test_OGB if opt['dataset'] == 'ogbn-arxiv' else test
    best_time = best_epoch = train_acc = val_acc = test_acc = 0
    for epoch in range(1, opt["epoch"]):
        # loss = train(model, optimizer, data)
        loss = train_this(model, optimizer, features, train_mask,
                          labels)[0].item()
        if opt["no_early"]:
            tmp_train_acc, tmp_val_acc, tmp_test_acc = this_test(model, g)
            best_time = opt['time']
        else:
            tmp_train_acc, tmp_val_acc, tmp_test_acc = this_test(model, g)
        if tmp_val_acc > val_acc:
            best_epoch = epoch
            train_acc = tmp_train_acc
            val_acc = tmp_val_acc
            test_acc = tmp_test_acc
        with tune.checkpoint_dir(step=epoch) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save((model.state_dict(), optimizer.state_dict()), path)
        tune.report(loss=loss,
                    accuracy=val_acc,
                    test_acc=test_acc,
                    train_acc=train_acc,
                    best_time=best_time,
                    best_epoch=best_epoch)
Exemplo n.º 6
0
if args.model == 'GAT':
    model = GAT(nfeat=features.shape[1],
                nhid=args.hidden,
                nclass=int(labels.max()) + 1,
                dropout=args.dropout,
                nheads=args.nb_heads,
                alpha=args.alpha)
elif args.model == 'GCN':
    model = GCN(nfeat=features.shape[1],
                nhid=args.hidden,
                nclass=int(labels.max()) + 1,
                dropout=args.dropout)
else:
    raise ValueError("Model {} not registered".format(args.model))

optimizer = optim.Adadelta(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)

features, adj, labels = Variable(features), Variable(adj), Variable(labels)

if args.cuda:
    model.cuda()
    features = features.cuda()
    adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()

Exemplo n.º 7
0
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    # Load data
    # adj, features, labels, idx_train, idx_val, idx_test = load_data()
    adj, features, labels, idx_train, idx_val, idx_test = load_data_gat(path='data/', dataset_str=args.dataset)
    # print(adj.sum(dim=0))

    print(int(labels.max()) + 1)
    # Model and optimizer
    model = GAT(nfeat=features.shape[1], nhid=args.hidden, nclass=int(labels.max()) + 1, dropout=args.dropout, nheads=args.nb_heads, alpha=args.alpha)
    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

    if args.cuda:
        model.cuda()
        features = features.cuda()
        adj = adj.cuda()
        labels = labels.cuda()
        idx_train = idx_train.cuda()
        idx_val = idx_val.cuda()
        idx_test = idx_test.cuda()

    features, adj, labels = Variable(features), Variable(adj), Variable(labels)

    # Train model
    t_total = time.time()
    loss_values = []
Exemplo n.º 8
0
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

# Load data
adj, features, labels, idx_train, idx_val, idx_test = load_data()

# Model and optimizer
model = GAT(nfeat=features.shape[1], nhid=args.hidden, nclass=int(labels.max()) + 1, dropout=args.dropout, nheads=args.nb_heads, alpha=args.alpha)
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

if args.cuda:
    model.cuda()
    features = features.cuda()
    adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()

features, adj, labels = Variable(features), Variable(adj), Variable(labels)


def train(epoch):
    t = time.time()
Exemplo n.º 9
0
torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

print('loading data')
x, y, adj = load_data()

y_train = y[:60000]
x_train = x[:60000]
y_test = y[60000:]
x_test = x[60000:]
adj = torch.Tensor(adj)

print('loading on cuda')

model = GAT(nfeat=1, nhid=5, nclass=1, dropout=0.8, nheads=2, alpha=0.2)

optimizer = optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

model.cuda()

adj = adj.cuda()


def train(epoch):
    #	LR = [1.00e-02, 8.89e-03, 7.78e-03, 6.67e-03, 5.56e-03, 4.45e-03, 3.34e-03, 2.23e-03, 1.12e-03, 1.00e-05]
    print('Epoch: ' + str(epoch))

    n_iter = 500

    batch_size = int(60000 / n_iter)

    t = time.time()