Пример #1
0
def train(dataset):
    model = models.GNNStack(dataset.num_node_features, hidden_dim, num_classes,
                            model_type, num_layers)
    filter_fn = filter(lambda p: p.requires_grad, model.parameters())
    opt = optim.Adam(filter_fn, lr=learning_rate, weight_decay=weight_decay)
    writer = SummaryWriter()
    for epoch in range(epochs):
        model.train()
        for batch in dataset:
            opt.zero_grad()
            prob = model(batch)
            pred = prob.argmax(axis=1)
            #             label = batch.y
            label = np.zeros(len(batch.y))
            label[batch.attacked_nodes] = 1
            label = torch.tensor(label, dtype=torch.long)
            #             print((pred==label).double().mean())
            loss = model.loss(prob, label)
            print(loss.item())
            loss.backward()
            opt.step()
        true_pos = (pred[batch.attacked_nodes] == 1).sum().double()
        precision = true_pos / pred.sum()
        recall = true_pos / label.sum()
        print(2 * precision * recall / (precision + recall))
Пример #2
0
def train(dataset, args):
    # use mask to split train/validation/test
    test_loader = loader = DataLoader(dataset,
                                      batch_size=args.batch_size,
                                      shuffle=True)

    # build model
    model = models.GNNStack(dataset.num_node_features, args.hidden_dim,
                            dataset.num_classes, args)
    scheduler, opt = utils.build_optimizer(args, model.parameters())

    # train
    for epoch in range(args.epochs):
        total_loss = 0
        model.train()
        for batch in loader:
            opt.zero_grad()
            pred = model(batch)
            label = batch.y
            pred = pred[batch.train_mask]
            label = label[batch.train_mask]
            loss = model.loss(pred, label)
            loss.backward()
            opt.step()
            total_loss += loss.item() * batch.num_graphs
        total_loss /= len(loader.dataset)
        print(total_loss)

        if epoch % 10 == 0:
            test_acc = test(loader, model)
            print(test_acc, '  test')
Пример #3
0
def train(dataset, task, args):
    if task == 'graph':
        # graph classification: separate dataloader for test set
        data_size = len(dataset)
        loader = DataLoader(dataset[:int(data_size * 0.8)],
                            batch_size=args.batch_size,
                            shuffle=True)
        test_loader = DataLoader(dataset[int(data_size * 0.8):],
                                 batch_size=args.batch_size,
                                 shuffle=True)
    elif task == 'node':
        # use mask to split train/validation/test
        test_loader = loader = DataLoader(dataset,
                                          batch_size=args.batch_size,
                                          shuffle=True)
    else:
        raise RuntimeError('Unknown task')

    # build model
    model = models.GNNStack(dataset.num_node_features,
                            args.hidden_dim,
                            dataset.num_classes,
                            args,
                            task=task)
    print(model)
    scheduler, opt = utils.build_optimizer(args, model.parameters())

    # train
    best_val_acc = 0
    test_acc = 0

    for epoch in range(args.epochs):
        total_loss = 0
        model.train()
        for batch in loader:
            opt.zero_grad()
            pred = model(batch)
            label = batch.y
            if task == 'node':
                pred = pred[batch.train_mask]
                label = label[batch.train_mask]
            loss = model.loss(pred, label)
            loss.backward()
            opt.step()
            total_loss += loss.item() * batch.num_graphs
        total_loss /= len(loader.dataset)
        print("Loss in Epoch {0}: {1}".format(epoch, total_loss))

        if epoch % 10 == 0:
            val_acc, tmp_test_acc = test(loader, model,
                                         is_validation=True), test(
                                             loader, model)
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                test_acc = tmp_test_acc
            print("Current Best Val Acc {0}, with Test Acc {1}".format(
                best_val_acc, test_acc))

    print('Final Val Acc {0}, Test Acc {1}'.format(val_acc, test_acc))
Пример #4
0
def train(dataset, task, args):
    # use mask to split train/validation/test
    test_loader = loader = DataLoader(dataset,
                                      batch_size=args.batch_size,
                                      shuffle=True)

    # build model
    if args.model_type != 'APPNP':
        model = models.GNNStack(dataset.num_node_features,
                                args.hidden_dim,
                                dataset.num_classes,
                                args,
                                task=task)
    else:
        alpha = 0.1  # Change here if you need to change alpha
        niter = 10  # Change here if you need to change niterations of Pagerank
        appnp_prop = models.PPRPowerIteration(dataset.data.edge_index, alpha,
                                              niter, args.dropout)
        model = models.APPNP(dataset.num_node_features,
                             args.hidden_dim,
                             dataset.num_classes,
                             appnp_prop,
                             args,
                             task=task)
    scheduler, opt = utils.build_optimizer(args, model.parameters())

    accuracy = []
    # train
    for epoch in range(args.epochs):
        total_loss = 0
        model.train()
        for batch in loader:
            opt.zero_grad()
            pred = model(batch)
            label = batch.y
            pred = pred[batch.train_mask]
            label = label[batch.train_mask]
            loss = model.loss(pred, label)
            loss.backward()
            opt.step()
            total_loss += loss.item() * batch.num_graphs
        total_loss /= len(loader.dataset)
        print('Epoch: ', epoch, 'Training loss: ', total_loss)

        if epoch % 100 == 0:
            test_acc = test(loader, model)
            print('Test acc: ', test_acc)
            accuracy.append([epoch, test_acc])
    test_acc = test(loader, model)
    accuracy.append([args.epochs, test_acc])
    plot_accuracy(np.array(accuracy), args)
    print('Final test acc: ', test_acc)
Пример #5
0
def train(dataset, task, args):
    if task == 'graph':
        # graph classification: separate dataloader for test set
        data_size = len(dataset)
        loader = DataLoader(dataset[:int(data_size * 0.8)],
                            batch_size=args.batch_size,
                            shuffle=True)
        test_loader = DataLoader(dataset[int(data_size * 0.8):],
                                 batch_size=args.batch_size,
                                 shuffle=True)
    elif task == 'node':
        # use mask to split train/validation/test
        test_loader = loader = DataLoader(dataset,
                                          batch_size=args.batch_size,
                                          shuffle=True)
    else:
        raise RuntimeError('Unknown task')

    # build model
    model = models.GNNStack(dataset.num_node_features,
                            args.hidden_dim,
                            dataset.num_classes,
                            args,
                            task=task)
    scheduler, opt = utils.build_optimizer(args, model.parameters())
    loss_t = []
    acc = []
    # train
    for epoch in range(args.epochs):
        total_loss = 0
        model.train()
        for batch in loader:
            opt.zero_grad()
            pred = model(batch)
            label = batch.y
            if task == 'node':
                pred = pred[batch.train_mask]
                label = label[batch.train_mask]
            loss = model.loss(pred, label)
            loss.backward()
            opt.step()
            total_loss += loss.item() * batch.num_graphs
        total_loss /= len(loader.dataset)
        loss_t.append(total_loss)
        print(total_loss)

        if epoch % 10 == 0:
            test_acc = test(loader, model)
            acc.append(test_acc)
            print(test_acc, '  test')
    print(loss_t)
    print(acc)
Пример #6
0
def train(dataset, task, args):
    f1 = open(task + "_" + args.model_type+'.txt','w')
    if task == 'graph':
        # graph classification: separate dataloader for test set
        data_size = len(dataset)
        print("==> There are", data_size, "graphs in the dataset.")
        loader = DataLoader(
                dataset[:int(data_size * 0.8)], batch_size=args.batch_size, shuffle=True)
        test_loader = DataLoader(
                dataset[int(data_size * 0.8):], batch_size=args.batch_size, shuffle=True)
    elif task == 'node':
        print("==> There are", dataset.data.edge_index.shape[1], "edges, and", dataset.data.y.shape[0], "nodes in the dataset.")
        # use mask to split train/validation/test
        test_loader = loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True)
    else:
        raise RuntimeError('Unknown task')

    # build model
    model = models.GNNStack(dataset.num_node_features, args.hidden_dim, dataset.num_classes,
                            args, task=task)
    scheduler, opt = utils.build_optimizer(args, model.parameters())

    # train
    for epoch in range(args.epochs):
        total_loss = 0
        model.train()
        for batch in loader:
            opt.zero_grad()
            pred = model(batch)
            label = batch.y
            if task == 'node':
                pred = pred[batch.train_mask]
                label = label[batch.train_mask]
            loss = model.loss(pred, label)
            loss.backward()
            opt.step()
            total_loss += loss.item() * batch.num_graphs
        total_loss /= len(loader.dataset)
        #print(total_loss)

        if epoch % 10 == 0:
            test_acc = test(loader, model)
            print("Epoch {}. Loss: {:.4f}. Test accuracy: {:.4f}".format(
                epoch, total_loss, test_acc))
            f1.write("{} {:.4f} {:.4f}\n".format(
                epoch, total_loss, test_acc))
    f1.close()
Пример #7
0
def train(dataset, args):
    # For reproducibility
    torch.manual_seed(1)
    np.random.seed(1)
    random.seed(1)

    logger = Logger(model=args.model_type)

    # build model
    num_feats = NUM_FEATURES if not args.use_refex else NUM_FEATURES + NUM_ROLX_FEATURES
    model = models.GNNStack(
        num_feats,
        args.hidden_dim,
        3,  # dataset.num_classes
        args,
        torch.tensor([1, 0, 15], device=dev).float()  # weights for each class
    )
    if torch.cuda.is_available():
        model = model.cuda(dev)

    scheduler, opt = build_optimizer(args, model.parameters())
    skf, x, y = get_stratified_batches()

    # train
    for epoch in range(args.epochs):
        total_loss = 0
        accs, f1s, aucs, recalls = [], [], [], []
        model.train()
        # No need to loop over batches since we only have one batch
        num_splits = 0
        for train_indices, test_indices in skf.split(x, y):
            train_indices, test_indices = x[train_indices], x[test_indices]
            batch = dataset
            opt.zero_grad()
            pred = model(batch)
            label = batch.y

            pred = pred[train_indices]
            label = label[train_indices]

            loss = model.loss(pred, label)
            loss.backward()
            opt.step()
            total_loss += loss.item()
            num_splits += 1

            acc_score, f1, auc_score, recall = test(dataset, model,
                                                    test_indices)
            accs.append(acc_score)
            f1s.append(f1)
            aucs.append(auc_score)
            recalls.append(recall)

        total_loss /= num_splits
        accs = np.array(accs)
        f1s = np.array(f1s)
        aucs = np.array(aucs)
        recalls = np.array(recalls)
        log_metrics = {
            'total_loss': total_loss,
            'acc': accs,
            'f1': f1s,
            'auc': aucs,
            'recall': recalls
        }

        logger.log(log_metrics, epoch)
        if epoch % 5 == 0:
            logger.display_status(epoch, args.epochs, total_loss, accs, f1s,
                                  aucs, recalls)
    logger.close()
Пример #8
0
def train(dataset, task, args):
    global device

    if task == 'graph':
        # graph classification: separate dataloader for test set
        # shuffle dataset before splitting
        data_size = len(dataset)
        idxs = np.arange(data_size).astype(int)
        np.random.shuffle(idxs)
        idxs = list(idxs)
        dataset = dataset[idxs]

        loader = DataLoader(dataset[:int(data_size * 0.8)],
                            batch_size=args.batch_size,
                            shuffle=True)
        test_loader = DataLoader(dataset[int(data_size * 0.8):],
                                 batch_size=args.batch_size,
                                 shuffle=True)
    elif task == 'node':
        # use mask to split train/validation/test
        test_loader = loader = DataLoader(dataset,
                                          batch_size=args.batch_size,
                                          shuffle=True)
    else:
        raise RuntimeError('Unknown task')

    # build model
    model = models.GNNStack(dataset.num_node_features,
                            args.hidden_dim,
                            dataset.num_classes,
                            args,
                            task=task)
    model = model.to(device)
    print(model)
    scheduler, opt = utils.build_optimizer(args, model.parameters())

    # train
    test_accs = []
    best_acc = 0
    timestr = time.strftime("%Y%m%d-%H%M%S")
    for epoch in range(args.epochs):
        total_loss = 0
        model.train()
        for batch in loader:
            batch = batch.to(device)
            opt.zero_grad()
            pred = model(batch)
            label = batch.y
            if task == 'node':
                pred = pred[batch.train_mask]
                label = label[batch.train_mask]
            loss = model.loss(pred, label)
            loss.backward()
            opt.step()
            total_loss += loss.item() * batch.num_graphs
        total_loss /= len(loader.dataset)
        print(total_loss)

        if epoch % 10 == 0:
            if task == 'graph':
                test_acc = test(test_loader, model)
            else:
                test_acc = test(loader, model, is_validation=True)
            test_accs.append(test_acc)
            print(test_acc, '  test')
            # save best model
            if test_acc > best_acc:
                best_acc = test_acc
            torch.save(model.state_dict(),
                       str(args.model_type) + timestr + '.pt')
            # plot accuracies
            x = range(0, epoch + 1, 10)
            plt.plot(x, test_accs)
            plt.savefig(str(args.model_type) + timestr + '.png')

    print(f'best achieved accuracy: {best_acc}')
    if model.task == 'node':
        best_model = models.GNNStack(dataset.num_node_features,
                                     args.hidden_dim,
                                     dataset.num_classes,
                                     args,
                                     task=task)
        best_model.load_state_dict(
            torch.load(str(args.model_type) + timestr + '.pt'))
        best_model = best_model.to(device)
        test_acc = test(loader, best_model, is_validation=False)
        print(f'test accuracy: {test_acc}')
Пример #9
0
def train(dataset, task, args):
    test_epoch, test_acc_per_epoch = [], []

    if task == 'graph':
        # graph classification: separate dataloader for test set
        data_size = len(dataset)
        dataset.shuffle()
        loader = DataLoader(dataset[:int(data_size * 0.8)],
                            batch_size=args.batch_size,
                            shuffle=True)
        test_loader = DataLoader(dataset[int(data_size * 0.8):],
                                 batch_size=args.batch_size,
                                 shuffle=True)
    elif task == 'node':
        # use mask to split train/validation/test
        test_loader = loader = DataLoader(dataset,
                                          batch_size=args.batch_size,
                                          shuffle=True)
    else:
        raise RuntimeError('Unknown task')

    # build model
    model = models.GNNStack(dataset.num_node_features,
                            args.hidden_dim,
                            dataset.num_classes,
                            args,
                            task=task)
    scheduler, opt = utils.build_optimizer(args, model.parameters())

    # train
    for epoch in range(args.epochs):
        total_loss = 0
        total_acc = 0
        model.train()
        for batch in loader:
            opt.zero_grad()
            pred = model(batch)
            label = batch.y
            if task == 'node':
                pred = pred[batch.train_mask]
                label = label[batch.train_mask]
            loss = model.loss(pred, label)
            loss.backward()
            opt.step()
            total_loss += loss.item() * batch.num_graphs
            total_acc += pred.max(dim=1)[1].eq(label).float().sum().item()
        total_loss /= len(loader.dataset)
        total_acc /= len(loader.dataset)
        # print(total_loss)

        if epoch % 1 == 0:
            test_acc = test(loader, model)
            print(
                f'epoch {epoch}: train loss - {total_loss:.4f}, train acc - {total_acc:.2%}, test acc - {test_acc:.2%}'
            )
            test_epoch.append(epoch)
            test_acc_per_epoch.append(test_acc)
    f, ax = plt.subplots(1, 1)
    ax.plot(np.array(test_epoch), np.array(test_acc_per_epoch))
    ax.set_title(f'{dataset.name} - {args.model_type}')
    ax.set_xlabel('epochs')
    ax.set_ylabel('accuracy')
    f.savefig(f'{dataset.name}_{args.model_type}.png',
              bbox_inches='tight',
              dpi=400)
Пример #10
0
def train(dataset, task, args):
    if task == 'graph':
        # graph classification: separate dataloader for test set
        data_size = len(dataset)
        loader = DataLoader(dataset[:int(data_size * 0.8)],
                            batch_size=args.batch_size,
                            shuffle=True)
        test_loader = DataLoader(dataset[int(data_size * 0.8):],
                                 batch_size=args.batch_size,
                                 shuffle=True)
    elif task == 'node':
        # use mask to split train/validation/test
        test_loader = loader = DataLoader(dataset,
                                          batch_size=args.batch_size,
                                          shuffle=True)
    else:
        raise RuntimeError('Unknown task')

    # build model
    model = models.GNNStack(dataset.num_node_features,
                            args.hidden_dim,
                            dataset.num_classes,
                            args,
                            task=task)
    model.to(device)
    print(model)
    scheduler, opt = utils.build_optimizer(args, model.parameters())

    # train
    vals = []
    tests = []
    best_val_acc = 0
    test_acc = 0
    early_stop = 1e9
    stop_cnt = 0

    for epoch in range(1, args.epochs + 1):
        total_loss = 0
        model.train()
        for batch in loader:
            batch.to(device)
            opt.zero_grad()
            pred = model(batch)
            label = batch.y
            if task == 'node':
                pred = pred[batch.train_mask]
                label = label[batch.train_mask]
            loss = model.loss(pred, label)
            loss.backward()
            opt.step()
            total_loss += loss.item() * batch.num_graphs
        total_loss /= len(loader.dataset)

        val_acc, tmp_test_acc = test(loader, model,
                                     is_validation=True), test(loader, model)
        vals.append(val_acc)
        tests.append(tmp_test_acc)

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            test_acc = tmp_test_acc
            stop_cnt = 0
        else:
            stop_cnt += 1
        print("Loss in Epoch {:03d}: {:.4f}. ".format(epoch, total_loss),
              end="")
        print("Current Best Val Acc {:.4f}, with Test Acc {:.4f}".format(
            best_val_acc, test_acc))

        if stop_cnt >= early_stop:
            break

    print('Final Val Acc {0}, Test Acc {1}'.format(best_val_acc, test_acc))
    return list(range(1, args.epochs + 1)), vals