Exemplo n.º 1
0
def main(args):
    args['device'] = "cuda" if torch.cuda.is_available() else "cpu"
    set_random_seed()

    # Interchangeable with other datasets
    if args['dataset'] == 'Tox21':
        from dgl.data.chem import Tox21
        dataset = Tox21()

    trainset, valset, testset = split_dataset(dataset, args['train_val_test_split'])
    train_loader = DataLoader(trainset, batch_size=args['batch_size'],
                              collate_fn=collate_molgraphs_for_classification)
    val_loader = DataLoader(valset, batch_size=args['batch_size'],
                            collate_fn=collate_molgraphs_for_classification)
    test_loader = DataLoader(testset, batch_size=args['batch_size'],
                             collate_fn=collate_molgraphs_for_classification)

    if args['pre_trained']:
        args['num_epochs'] = 0
        model = model_zoo.chem.load_pretrained(args['exp'])
    else:
        # Interchangeable with other models
        if args['model'] == 'GCN':
            model = model_zoo.chem.GCNClassifier(in_feats=args['in_feats'],
                                                 gcn_hidden_feats=args['gcn_hidden_feats'],
                                                 classifier_hidden_feats=args['classifier_hidden_feats'],
                                                 n_tasks=dataset.n_tasks)
        elif args['model'] == 'GAT':
            model = model_zoo.chem.GATClassifier(in_feats=args['in_feats'],
                                                 gat_hidden_feats=args['gat_hidden_feats'],
                                                 num_heads=args['num_heads'],
                                                 classifier_hidden_feats=args['classifier_hidden_feats'],
                                                 n_tasks=dataset.n_tasks)

        loss_criterion = BCEWithLogitsLoss(pos_weight=dataset.task_pos_weights.to(args['device']),
                                           reduction='none')
        optimizer = Adam(model.parameters(), lr=args['lr'])
        stopper = EarlyStopping(patience=args['patience'])
    model.to(args['device'])

    for epoch in range(args['num_epochs']):
        # Train
        run_a_train_epoch(args, epoch, model, train_loader, loss_criterion, optimizer)

        # Validation and early stop
        val_roc_auc = run_an_eval_epoch(args, model, val_loader)
        early_stop = stopper.step(val_roc_auc, model)
        print('epoch {:d}/{:d}, validation roc-auc score {:.4f}, best validation roc-auc score {:.4f}'.format(
            epoch + 1, args['num_epochs'], val_roc_auc, stopper.best_score))
        if early_stop:
            break

    if not args['pre_trained']:
        stopper.load_checkpoint(model)
    test_roc_auc = run_an_eval_epoch(args, model, test_loader)
    print('test roc-auc score {:.4f}'.format(test_roc_auc))
Exemplo n.º 2
0
def main(args):
    args['device'] = "cuda" if torch.cuda.is_available() else "cpu"
    set_random_seed()

    # Interchangeable with other datasets
    train_set, val_set, test_set = load_dataset_for_regression(args)
    train_loader = DataLoader(dataset=train_set,
                              batch_size=args['batch_size'],
                              collate_fn=collate_molgraphs)
    val_loader = DataLoader(dataset=val_set,
                            batch_size=args['batch_size'],
                            collate_fn=collate_molgraphs)
    if test_set is not None:
        test_loader = DataLoader(dataset=test_set,
                                 batch_size=args['batch_size'],
                                 collate_fn=collate_molgraphs)

    if args['model'] == 'MPNN':
        model = model_zoo.chem.MPNNModel(node_input_dim=args['node_in_feats'],
                                         edge_input_dim=args['edge_in_feats'],
                                         output_dim=args['output_dim'])
    elif args['model'] == 'SCHNET':
        model = model_zoo.chem.SchNet(norm=args['norm'],
                                      output_dim=args['output_dim'])
        model.set_mean_std(train_set.mean, train_set.std, args['device'])
    elif args['model'] == 'MGCN':
        model = model_zoo.chem.MGCNModel(norm=args['norm'],
                                         output_dim=args['output_dim'])
        model.set_mean_std(train_set.mean, train_set.std, args['device'])
    model.to(args['device'])

    loss_fn = nn.MSELoss(reduction='none')
    optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
    stopper = EarlyStopping(mode='lower', patience=args['patience'])

    for epoch in range(args['num_epochs']):
        # Train
        run_a_train_epoch(args, epoch, model, train_loader, loss_fn, optimizer)

        # Validation and early stop
        val_score = run_an_eval_epoch(args, model, val_loader)
        early_stop = stopper.step(val_score, model)
        print(
            'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'.
            format(epoch + 1, args['num_epochs'], args['metric_name'],
                   val_score, args['metric_name'], stopper.best_score))
        if early_stop:
            break

    if test_set is not None:
        stopper.load_checkpoint(model)
        test_score = run_an_eval_epoch(args, model, test_loader)
        print('test {} {:.4f}'.format(args['metric_name'], test_score))
Exemplo n.º 3
0
def main(args):
    g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \
    val_mask, test_mask = load_data(args['dataset'])
    dev = torch.device("cuda:0" if args['gpu'] >= 0 else "cpu")

    features = features.to(dev)
    labels = labels.to(dev)
    train_mask = train_mask.to(dev)
    val_mask = val_mask.to(dev)
    test_mask = test_mask.to(dev)

    model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']],
                in_size=features.shape[1],
                hidden_size=args['hidden_units'],
                out_size=num_classes,
                num_heads=args['num_heads'],
                dropout=args['dropout']).to(dev)

    stopper = EarlyStopping(patience=args['patience'])
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    for epoch in range(args['num_epochs']):
        model.train()
        logits = model(g, features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc, train_micro_f1, train_macro_f1 = score(
            logits[train_mask], labels[train_mask])
        val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate(
            model, g, features, labels, val_mask, loss_fcn)
        early_stop = stopper.step(val_loss.data.item(), val_acc, model)

        print(
            'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | '
            'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.
            format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1,
                   val_loss.item(), val_micro_f1, val_macro_f1))

        if early_stop:
            break

    stopper.load_checkpoint(model)
    test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(
        model, g, features, labels, test_mask, loss_fcn)
    print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.
          format(test_loss.item(), test_micro_f1, test_macro_f1))
Exemplo n.º 4
0
def main(args):
    args['device'] = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    set_random_seed(args['random_seed'])

    train_set, val_set, test_set = load_dataset_for_regression(args)
    train_loader = DataLoader(dataset=train_set,
                              batch_size=args['batch_size'],
                              shuffle=True,
                              collate_fn=collate_molgraphs)
    val_loader = DataLoader(dataset=val_set,
                            batch_size=args['batch_size'],
                            shuffle=True,
                            collate_fn=collate_molgraphs)
    if test_set is not None:
        test_loader = DataLoader(dataset=test_set,
                                 batch_size=args['batch_size'],
                                 collate_fn=collate_molgraphs)

    if args['pre_trained']:
        args['num_epochs'] = 0
        model = model_zoo.chem.load_pretrained(args['exp'])
    else:
        model = load_model(args)
        if args['model'] in ['SCHNET', 'MGCN']:
            model.set_mean_std(train_set.mean, train_set.std, args['device'])
        loss_fn = nn.MSELoss(reduction='none')
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args['lr'],
                                     weight_decay=args['weight_decay'])
        stopper = EarlyStopping(mode='lower', patience=args['patience'])
    model.to(args['device'])

    for epoch in range(args['num_epochs']):
        # Train
        run_a_train_epoch(args, epoch, model, train_loader, loss_fn, optimizer)

        # Validation and early stop
        val_score = run_an_eval_epoch(args, model, val_loader)
        early_stop = stopper.step(val_score, model)
        print(
            'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'.
            format(epoch + 1, args['num_epochs'], args['metric_name'],
                   val_score, args['metric_name'], stopper.best_score))

        if early_stop:
            break

    if test_set is not None:
        if not args['pre_trained']:
            stopper.load_checkpoint(model)
        test_score = run_an_eval_epoch(args, model, test_loader)
        print('test {} {:.4f}'.format(args['metric_name'], test_score))
Exemplo n.º 5
0
def main(args):
    args['device'] = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    set_random_seed(args['random_seed'])

    # Interchangeable with other datasets
    dataset, train_set, val_set, test_set = load_dataset_for_classification(
        args)
    train_loader = DataLoader(train_set,
                              batch_size=args['batch_size'],
                              collate_fn=collate_molgraphs)
    val_loader = DataLoader(val_set,
                            batch_size=args['batch_size'],
                            collate_fn=collate_molgraphs)
    test_loader = DataLoader(test_set,
                             batch_size=args['batch_size'],
                             collate_fn=collate_molgraphs)

    if args['pre_trained']:
        args['num_epochs'] = 0
        model = model_zoo.chem.load_pretrained(args['exp'])
    else:
        args['n_tasks'] = dataset.n_tasks
        model = load_model(args)
        loss_criterion = BCEWithLogitsLoss(
            pos_weight=dataset.task_pos_weights.to(args['device']),
            reduction='none')
        optimizer = Adam(model.parameters(), lr=args['lr'])
        stopper = EarlyStopping(patience=args['patience'])
    model.to(args['device'])

    for epoch in range(args['num_epochs']):
        # Train
        run_a_train_epoch(args, epoch, model, train_loader, loss_criterion,
                          optimizer)

        # Validation and early stop
        val_score = run_an_eval_epoch(args, model, val_loader)
        early_stop = stopper.step(val_score, model)
        print(
            'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'.
            format(epoch + 1, args['num_epochs'], args['metric_name'],
                   val_score, args['metric_name'], stopper.best_score))
        if early_stop:
            break

    if not args['pre_trained']:
        stopper.load_checkpoint(model)
    test_score = run_an_eval_epoch(args, model, test_loader)
    print('test {} {:.4f}'.format(args['metric_name'], test_score))
Exemplo n.º 6
0
def main(args):
    args['device'] = "cuda" if torch.cuda.is_available() else "cpu"
    set_random_seed()

    # Interchangeable with other datasets
    if args['dataset'] == 'Alchemy':
        from dgl.data.chem import TencentAlchemyDataset
        train_set = TencentAlchemyDataset(mode='dev')
        val_set = TencentAlchemyDataset(mode='valid')

    train_loader = DataLoader(dataset=train_set,
                              batch_size=args['batch_size'],
                              collate_fn=collate_molgraphs_for_regression)
    val_loader = DataLoader(dataset=val_set,
                            batch_size=args['batch_size'],
                            collate_fn=collate_molgraphs_for_regression)

    if args['model'] == 'MPNN':
        model = model_zoo.chem.MPNNModel(output_dim=args['output_dim'])
    elif args['model'] == 'SCHNET':
        model = model_zoo.chem.SchNet(norm=args['norm'], output_dim=args['output_dim'])
        model.set_mean_std(train_set.mean, train_set.std, args['device'])
    elif args['model'] == 'MGCN':
        model = model_zoo.chem.MGCNModel(norm=args['norm'], output_dim=args['output_dim'])
        model.set_mean_std(train_set.mean, train_set.std, args['device'])
    model.to(args['device'])

    loss_fn = nn.MSELoss()
    score_fn = nn.L1Loss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
    stopper = EarlyStopping(mode='lower', patience=args['patience'])

    for epoch in range(args['num_epochs']):
        # Train
        run_a_train_epoch(args, epoch, model, train_loader, loss_fn, score_fn, optimizer)

        # Validation and early stop
        val_score = run_an_eval_epoch(args, model, val_loader, score_fn)
        early_stop = stopper.step(val_score, model)
        print('epoch {:d}/{:d}, validation score {:.4f}, best validation score {:.4f}'.format(
            epoch + 1, args['num_epochs'], val_score, stopper.best_score))
        if early_stop:
            break
Exemplo n.º 7
0
def train(gpu, args):
    rank = args.nr * args.gpus + gpu
    print(rank)
    dist.init_process_group(backend='nccl',
                            init_method='env://',
                            world_size=args.world_size,
                            rank=rank)
    cuda_string = 'cuda' + ':' + str(gpu)
    device = torch.device(
        cuda_string) if torch.cuda.is_available() else torch.device("cpu")

    g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \
    val_mask, test_mask = load_data(args.dataset)

    if hasattr(torch, 'BoolTensor'):
        train_mask = train_mask.bool()
        val_mask = val_mask.bool()
        test_mask = test_mask.bool()

    print(train_mask.size())
    train_mask = th.split(train_mask,
                          math.ceil(len(train_mask) / args.gpus))[rank]
    labels = th.split(labels, math.ceil(len(labels) / args.gpus))[rank]
    features = th.split(features, math.ceil(len(features) / args.gpus))[rank]
    #g = th.split(g, math.ceil(len(g) / args.gpus))[rank]
    print(train_mask.size(), labels.size(), features.size(), len(g))
    print(type(g))
    features = features.to(device)
    labels = labels.to(device)
    train_mask = train_mask.to(device)
    val_mask = val_mask.to(device)
    test_mask = test_mask.to(device)

    if args.hetero:
        from model_hetero import HAN
        model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']],
                    in_size=features.shape[1],
                    hidden_size=args.hidden_units,
                    out_size=num_classes,
                    num_heads=args.num_heads,
                    dropout=args.dropout).to(device)

        model = nn.parallel.DistributedDataParallel(model, device_ids=[gpu])
        g = g.to(device)
    else:
        from model import HAN
        model = HAN(num_meta_paths=len(g),
                    in_size=features.shape[1],
                    hidden_size=args.hidden_units,
                    out_size=num_classes,
                    num_heads=args.num_heads,
                    dropout=args.dropout).to(device)
        model = nn.parallel.DistributedDataParallel(model, device_ids=[gpu])
        g = [graph.to(device) for graph in g]

    stopper = EarlyStopping(patience=args.patience)
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    for epoch in range(args.num_epochs):
        model.train()
        logits = model(g, features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc, train_micro_f1, train_macro_f1 = score(
            logits[train_mask], labels[train_mask])
        val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate(
            model, g, features, labels, val_mask, loss_fcn)
        early_stop = stopper.step(val_loss.data.item(), val_acc, model)

        print(
            'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | '
            'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.
            format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1,
                   val_loss.item(), val_micro_f1, val_macro_f1))

        if early_stop:
            break

    stopper.load_checkpoint(model)
    test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(
        model, g, features, labels, test_mask, loss_fcn)
    print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.
          format(test_loss.item(), test_micro_f1, test_macro_f1))
Exemplo n.º 8
0
def main(args):
    # If args['hetero'] is True, g would be a heterogeneous graph.
    # Otherwise, it will be a list of homogeneous graphs.
    g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \
    val_mask, test_mask = load_data(args['dataset'])

    if hasattr(torch, 'BoolTensor'):
        train_mask = train_mask.bool()
        val_mask = val_mask.bool()
        test_mask = test_mask.bool()

    features = features.to(args['device'])
    labels = labels.to(args['device'])
    train_mask = train_mask.to(args['device'])
    val_mask = val_mask.to(args['device'])
    test_mask = test_mask.to(args['device'])

    if args['hetero']:
        from model_hetero import HAN
        model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']],
                    in_size=features.shape[1],
                    hidden_size=args['hidden_units'],
                    out_size=num_classes,
                    num_heads=args['num_heads'],
                    dropout=args['dropout']).to(args['device'])
        g = g.to(args['device'])
    else:
        from model import HAN
        model = HAN(num_meta_paths=len(g),
                    in_size=features.shape[1],
                    hidden_size=args['hidden_units'],
                    out_size=num_classes,
                    num_heads=args['num_heads'],
                    dropout=args['dropout']).to(args['device'])
        g = [graph.to(args['device']) for graph in g]

    stopper = EarlyStopping(patience=args['patience'])
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    for epoch in range(args['num_epochs']):
        model.train()
        logits = model(g, features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc, train_micro_f1, train_macro_f1 = score(logits[train_mask], labels[train_mask])
        val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate(model, g, features, labels, val_mask, loss_fcn)
        early_stop = stopper.step(val_loss.data.item(), val_acc, model)

        print('Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | '
              'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.format(
            epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1))

        if early_stop:
            break

    stopper.load_checkpoint(model)
    test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(model, g, features, labels, test_mask, loss_fcn)
    print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format(
        test_loss.item(), test_micro_f1, test_macro_f1))
Exemplo n.º 9
0
def main(args):
    # load and preprocess dataset
    if args.dataset == 'cora':
        data = CoraGraphDataset()
    elif args.dataset == 'citeseer':
        data = CiteseerGraphDataset()
    elif args.dataset == 'pubmed':
        data = PubmedGraphDataset()
    else:
        raise ValueError('Unknown dataset: {}'.format(args.dataset))

    g = data[0]
    if args.gpu < 0:
        cuda = False
        ctx = mx.cpu(0)
    else:
        cuda = True
        ctx = mx.gpu(args.gpu)
        g = g.to(ctx)

    features = g.ndata['feat']
    labels = mx.nd.array(g.ndata['label'], dtype="float32", ctx=ctx)
    mask = g.ndata['train_mask']
    mask = mx.nd.array(np.nonzero(mask.asnumpy())[0], ctx=ctx)
    val_mask = g.ndata['val_mask']
    val_mask =  mx.nd.array(np.nonzero(val_mask.asnumpy())[0], ctx=ctx)
    test_mask = g.ndata['test_mask']
    test_mask =  mx.nd.array(np.nonzero(test_mask.asnumpy())[0], ctx=ctx)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

    g = dgl.remove_self_loop(g)
    g = dgl.add_self_loop(g)
    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    model = GAT(g,
                args.num_layers,
                in_feats,
                args.num_hidden,
                n_classes,
                heads,
                elu,
                args.in_drop,
                args.attn_drop,
                args.alpha,
                args.residual)

    if args.early_stop:
        stopper = EarlyStopping(patience=100)
    model.initialize(ctx=ctx)

    # use optimizer
    trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': args.lr})

    dur = []
    for epoch in range(args.epochs):
        if epoch >= 3:
            t0 = time.time()
        # forward
        with mx.autograd.record():
            logits = model(features)
            loss = mx.nd.softmax_cross_entropy(logits[mask].squeeze(), labels[mask].squeeze())
            loss.backward()
        trainer.step(mask.shape[0])

        if epoch >= 3:
            dur.append(time.time() - t0)
        print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format(
            epoch, loss.asnumpy()[0], np.mean(dur), n_edges / np.mean(dur) / 1000))
        val_accuracy = evaluate(model, features, labels, val_mask)
        print("Validation Accuracy {:.4f}".format(val_accuracy))
        if args.early_stop:
            if stopper.step(val_accuracy, model):
                break
    print()

    if args.early_stop:
        model.load_parameters('model.param')
    test_accuracy = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(test_accuracy))
Exemplo n.º 10
0
def main(opt):
    data = get_dataset(opt)
    g = data[0]
    if opt['gpu'] < 0:
        cuda = False
    else:
        cuda = True
        g = g.int().to(opt['gpu'])

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    # add self loop
    g = dgl.remove_self_loop(g)
    g = dgl.add_self_loop(g)
    n_edges = g.number_of_edges()
    # create model
    heads = ([opt['num_heads']] * opt['num_layers']) + [opt['num_out_heads']]
    if opt['model'] == 'GAT':
        model = GAT(g, opt['num_layers'], num_feats, opt['num_hidden'],
                    n_classes, heads, F.elu, opt['in_drop'], opt['attn_drop'],
                    opt['negative_slope'], opt['residual'], opt)
    elif opt['model'] == 'AGNN':
        model = AGNN(g, opt['num_layers'], num_feats, opt['num_hidden'],
                     n_classes, opt['in_drop'], opt)
    print(model)
    if opt['early_stop']:
        stopper = EarlyStopping(patience=100)
    if cuda:
        model.cuda()

    # use optimizer
    optimizer = get_optimizer(opt['optimizer'],
                              parameters=model.parameters(),
                              lr=opt['lr'],
                              weight_decay=opt['weight_decay'])

    # initialize graph
    dur = []
    for epoch in range(opt['epochs']):
        # model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        # logits = model(features)
        # loss = loss_fcn(logits[train_mask], labels[train_mask])
        # optimizer.zero_grad()
        # loss.backward()
        # optimizer.step()

        loss, logits = train(model, optimizer, features, train_mask, labels)

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        if opt['fastmode']:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
            if opt['early_stop']:
                if stopper.step(val_acc, model):
                    break

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format(
                  epoch, np.mean(dur), loss.item(), train_acc, val_acc,
                  n_edges / np.mean(dur) / 1000))

    print()
    if opt['early_stop']:
        model.load_state_dict(torch.load('es_checkpoint.pt'))
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
Exemplo n.º 11
0
def main(args):
    # load and preprocess dataset
    g, graph_labels = load_graphs(
        '/yushi/dataset/Amazon2M/Amazon2M_dglgraph.bin')
    assert len(g) == 1
    g = g[0]
    data = g.ndata
    features = torch.FloatTensor(data['feat'])
    labels = torch.LongTensor(data['label'])
    if hasattr(torch, 'BoolTensor'):
        train_mask = data['train_mask'].bool()
        val_mask = data['val_mask'].bool()
        test_mask = data['test_mask'].bool()
    num_feats = features.shape[1]
    n_classes = 47
    n_edges = g.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d 
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # add self loop
    g = add_self_loop(g)
    # g.remove_edges_from(nx.selfloop_edges(g))
    # g = DGLGraph(g)
    # g.add_edges(g.nodes(), g.nodes())
    n_edges = g.number_of_edges()
    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes,
                heads, F.elu, args.in_drop, args.attn_drop,
                args.negative_slope, args.residual)
    print(model)
    if args.early_stop:
        stopper = EarlyStopping(patience=100)
    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    start = time.time()
    for epoch in range(args.epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        if args.fastmode:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
            if args.early_stop:
                if stopper.step(val_acc, model):
                    break

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format(
                  epoch, np.mean(dur), loss.item(), train_acc, val_acc,
                  n_edges / np.mean(dur) / 1000))

    print()
    if args.early_stop:
        model.load_state_dict(torch.load('es_checkpoint.pt'))
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
    print(f"Time Consuming {np.sum(dur)}, Overall time {time.time() - start}")
Exemplo n.º 12
0
def main(args):
    # load and preprocess dataset
    if args.dataset == 'cora':
        data = CoraGraphDataset()
    elif args.dataset == 'citeseer':
        data = CiteseerGraphDataset()
    elif args.dataset == 'pubmed':
        data = PubmedGraphDataset()
    else:
        raise ValueError('Unknown dataset: {}'.format(args.dataset))
    
    if args.num_layers <=0:
        raise ValueError("num layer must be positive int")
    g = data[0]
    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        g = g.to(args.gpu)

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = g.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes,
           train_mask.int().sum().item(),
           val_mask.int().sum().item(),
           test_mask.int().sum().item()))

    # add self loop
    g = dgl.remove_self_loop(g)
    g = dgl.add_self_loop(g)
    n_edges = g.number_of_edges()
    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    model = HardGAT(g,
                args.num_layers,
                num_feats,
                args.num_hidden,
                n_classes,
                heads,
                F.elu,
                args.in_drop,
                args.attn_drop,
                args.negative_slope,
                args.residual,
                args.k)
    print(model)
    if args.early_stop:
        stopper = EarlyStopping(patience=100)
    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(
        model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        if args.fastmode:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
            if args.early_stop:
                if stopper.step(val_acc, model):
                    break

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".
              format(epoch, np.mean(dur), loss.item(), train_acc,
                     val_acc, n_edges / np.mean(dur) / 1000))

    print()
    if args.early_stop:
        model.load_state_dict(torch.load('es_checkpoint.pt'))
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
Exemplo n.º 13
0
def train_idgl(args):
    data = load_data(args)
    seed_init(seed=args.seed)
    dev = torch.device("cuda:0" if args.gpu >= 0 else "cpu")

    features = torch.FloatTensor(data.features)
    features = F.normalize(features, p=1, dim=1)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d 
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))
    # print(torch.where(test_mask)) # Same train/test split with different init_seed
    features = features.to(dev)
    labels = labels.to(dev)
    train_mask = train_mask.to(dev)
    val_mask = val_mask.to(dev)
    test_mask = test_mask.to(dev)
    g = data.graph
    # add self loop
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    n_edges = g.number_of_edges()
    # create model
    model = IDGL(args, num_feats, n_classes, dev)

    print(model)
    es_checkpoint = 'temp/' + time.strftime('%m-%d %H-%M-%S',
                                            time.localtime()) + '.pt'
    stopper = EarlyStopping(patience=100, path=es_checkpoint)

    model.to(dev)
    adj = g.adjacency_matrix()
    # adj = normalize_adj_torch(adj.to_dense())
    adj = F.normalize(adj.to_dense(), dim=1, p=1)
    adj = adj.to(dev)

    # cla_loss = torch.nn.CrossEntropyLoss()
    cla_loss = torch.nn.NLLLoss()
    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    h = None

    # ! Pretrain
    res_dict = {'parameters': args.__dict__}
    for epoch in range(args.pretrain_epochs):
        logits, _ = model.GCN(features, adj)
        loss = cla_loss(logits[train_mask], labels[train_mask])
        optimizer.zero_grad()
        # Stops if get annomaly
        with torch.autograd.detect_anomaly():
            loss.backward()
        optimizer.step()
        train_acc = accuracy(logits[train_mask], labels[train_mask])
        val_acc = evaluate(model, features, labels, val_mask, adj)
        test_acc = evaluate(model, features, labels, test_mask, adj)
        print(
            f"Pretrain-Epoch {epoch:05d} | Time(s) {np.mean(dur):.4f} | Loss {loss.item():.4f} | TrainAcc {train_acc:.4f} | ValAcc {val_acc:.4f} | TestAcc {test_acc:.4f}"
        )
        if args.early_stop > 0:
            if stopper.step(val_acc, model):
                break
    print(f"Pretrain Test Accuracy: {test_acc:.4f}")
    print(f"{'=' * 10}Pretrain finished!{'=' * 10}\n\n")
    if args.early_stop > 0:
        model.load_state_dict(torch.load(es_checkpoint))
    test_acc = evaluate(model, features, labels, test_mask, adj)
    res_dict['res'] = {'pretrain_acc': f'{test_acc:.4f}'}
    # ! Train
    stopper = EarlyStopping(patience=100, path=es_checkpoint)
    for epoch in range(args.max_epoch):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        t, adj_sim_prev = 0, None
        logits, h, adj_sim, adj_feat = model(features,
                                             h=None,
                                             adj_ori=adj,
                                             adj_feat=None,
                                             mode='feat',
                                             norm_graph_reg_loss=args.ngrl)
        loss_adj_feat = cal_loss(args, cla_loss, logits, train_mask, labels,
                                 adj_sim, features)
        loss_list = [loss_adj_feat]
        ori_adj_norm = torch.norm(adj_sim.detach(), p=2)

        while iter_condition(args, adj_sim_prev, adj_sim, ori_adj_norm, t):
            t += 1
            adj_sim_prev = adj_sim.detach()
            logits, h, adj_sim, adj_agg = model(features,
                                                h,
                                                adj,
                                                adj_feat,
                                                mode='emb',
                                                norm_graph_reg_loss=args.ngrl)
            # exists_zero_lines(h)
            loss_adj_emb = cal_loss(args, cla_loss, logits, train_mask, labels,
                                    adj_sim, features)
            loss_list.append(loss_adj_emb)
        loss = torch.mean(torch.stack(loss_list))
        optimizer.zero_grad()

        # Stops if get annomaly
        with torch.autograd.detect_anomaly():
            loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        val_acc = evaluate(model, features, labels, val_mask, adj)
        test_acc = evaluate(model, features, labels, test_mask, adj)

        # print(
        #     f"Epoch {epoch:05d} | Time(s) {np.mean(dur):.4f} | Loss {loss.item():.4f} | TrainAcc {train_acc:.4f} | ValAcc {val_acc:.4f}")
        print(
            f"IDGL-Epoch {epoch:05d} | Time(s) {np.mean(dur):.4f} | Loss {loss.item():.4f} | TrainAcc {train_acc:.4f} | ValAcc {val_acc:.4f} | TestAcc {test_acc:.4f}"
        )
        if args.early_stop > 0:
            if stopper.step(val_acc, model):
                break
    if args.early_stop > 0:
        model.load_state_dict(torch.load(es_checkpoint))
    test_acc = evaluate(model, features, labels, test_mask, adj)
    print(f"Test Accuracy {test_acc:.4f}")
    res_dict['res']['IDGL_acc'] = f'{test_acc:.4f}'
    print(res_dict['res'])
    print(res_dict['parameters'])
    return res_dict
Exemplo n.º 14
0
def main(args):
    # load and preprocess dataset
    g, features, labels, n_classes, train_mask, val_mask, test_mask, lp_dict, ind_features, ind_labels = load_reg_data(args)
    num_feats = features.shape[1]
    n_edges = g.number_of_edges()

    print("""----Data statistics------'
      #use cuda: %d
      #Edges %d
      #Classes %d 
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (args.gpu, n_edges, n_classes,
           train_mask.int().sum().item(),
           val_mask.int().sum().item(),
           test_mask.int().sum().item()))
    
    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        ind_features = ind_features.cuda()
        labels = labels.cuda()
        ind_labels = ind_labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    model = GAT(g,
                args.num_layers,
                num_feats,
                args.num_hidden,
                n_classes,
                heads,
                F.elu,
                args.in_drop,
                args.attn_drop,
                args.negative_slope,
                args.residual,
                args.bias)
    print(model)
    if args.early_stop:
        stopper = EarlyStopping(patience=100)
    if cuda:
        model.cuda()

    # use optimizer
    optimizer = torch.optim.Adam(
        model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        pred = model(features)
        loss = loss_fcn(pred[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_r2 = compute_r2(pred[train_mask], labels[train_mask])

        if args.fastmode:
            val_r2 = compute_r2(pred[val_mask], labels[val_mask])
        else:
            val_r2 = evaluate(model, features, labels, val_mask)
            if args.early_stop:
                if stopper.step(val_r2, model):
                    break

        if epoch > 3:
            print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainR2 {:.4f} |"
              " Val R2 {:.4f} | ETputs(KTEPS) {:.2f}".
              format(epoch, np.mean(dur), loss.item(), train_r2,
                     val_r2, n_edges / np.mean(dur) / 1000))

    print()
    if args.early_stop:
        model.load_state_dict(torch.load('es_checkpoint.pt'))
    evaluate_test(model, features, labels, test_mask, lp_dict, meta="2012")
    evaluate_test(model, ind_features, ind_labels, test_mask, lp_dict, meta="2016")
Exemplo n.º 15
0
def main(args):
    g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \
    val_mask, test_mask = load_imdb_raw()

    if hasattr(torch, 'BoolTensor'):
        train_mask = train_mask.bool()
        val_mask = val_mask.bool()
        test_mask = test_mask.bool()

    features_m, features_a, features_d = features

    features_a = torch.zeros(features_a.shape[0], 10)
    features_d = torch.zeros(features_d.shape[0], 10)

    features_m = features_m.to(args['device'])
    features_a = features_a.to(args['device'])
    features_d = features_d.to(args['device'])

    features = {'movie': features_m, 'actor': features_a, 'director':features_d}
    
    in_size = {'actor': features_a.shape[1], 'movie': features_m.shape[1], 'director': features_d.shape[1]}

    labels = labels.to(args['device'])
    train_mask = train_mask.to(args['device'])
    val_mask = val_mask.to(args['device'])
    test_mask = test_mask.to(args['device'])

    model = HMSG(meta_paths = [['ma','am'], ['md', 'dm'], ['am'], ['dm']],
                in_size = in_size,
                hidden_size = args['hidden_units'],
                out_size = num_classes,
                aggre_type = 'attention',
                num_heads = args['num_heads'],
                dropout = args['dropout']).to(args['device'])
    g = g.to(args['device'])

    stopper = EarlyStopping(patience=args['patience'])
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    for epoch in range(args['num_epochs']):
        model.train()
        z, logits = model(g, features)

        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc, train_micro_f1, train_macro_f1 = score(logits[train_mask], labels[train_mask])
        val_loss, val_acc, val_micro_f1, val_macro_f1, z = evaluate(model, g, features, labels, val_mask, loss_fcn)
        early_stop = stopper.step(val_loss.data.item(), val_acc, model)

        print('Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | '
             'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.format(
           epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1))

        if early_stop:
            break

    stopper.load_checkpoint(model)
    test_loss, test_acc, test_micro_f1, test_macro_f1, z = evaluate(model, g, features, labels, test_mask, loss_fcn)

    emd_imdb, label_imdb = z[test_mask], labels[test_mask]
    np.savetxt('./out/emd_imdb.txt',emd_imdb.cpu())
    np.savetxt('./out/label_imdb.txt', np.array(label_imdb.cpu(), dtype=np.int32))

    print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format(
        test_loss.item(), test_micro_f1, test_macro_f1))
Exemplo n.º 16
0
def train_model(model,
                loss,
                optimizer,
                dataloader,
                train_size,
                valid_size,
                model_name='weights',
                num_epochs=50):

    writer = SummaryWriter(comment='--{}'.format(model_name))
    es = EarlyStopping(patience=5)
    since = time.time()

    # best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        start = time.time()
        print("Epoch {}/{}".format(epoch, num_epochs))
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0.

            for inputs, labels in dataloader[phase]:
                inputs = inputs.to('cuda:0')
                labels = labels.to('cuda:0')

                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss_ = loss(outputs, labels)

                    if phase == 'train':
                        loss_.backward()
                        optimizer.step()

                running_loss += loss_.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

                del inputs, labels, outputs, preds
                torch.cuda.empty_cache()

            data_size = train_size if phase == 'train' else valid_size
            epoch_loss = running_loss / data_size
            epoch_acc = running_corrects / data_size

            if phase == 'train':
                writer.add_scalar('Loss/train', epoch_loss, epoch)
                writer.add_scalar('Accuracy/train', epoch_acc, epoch)
            else:
                writer.add_scalar('Loss/test', epoch_loss, epoch)
                writer.add_scalar('Accuracy/test', epoch_acc, epoch)

            print('{} -> Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
            print('\ttime', time.time() - start)

            if phase == 'val':
                if es.step(epoch_acc):
                    time_elapsed = time.time() - since
                    print('Early Stopping')
                    print('Training complete in {:.0f}m {:.0f}s'.format(
                        time_elapsed // 60, time_elapsed % 60))
                    print('Best val Acc: {:4f}'.format(best_acc))
                    return

                if epoch_acc > best_acc:
                    best_acc = epoch_acc
                    print('Update best acc: {:4f}'.format(best_acc))
                    torch.save(model.state_dict(), '{}.pt'.format(model_name))

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
Exemplo n.º 17
0
def main(args):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    batch_size = 128
    learning_rate = 0.001
    num_epochs = 100
    set_random_seed()

    # Interchangeable with other Dataset
    dataset = Tox21()
    atom_data_field = 'h'

    trainset, valset, testset = split_dataset(dataset, [0.8, 0.1, 0.1])
    train_loader = DataLoader(
        trainset, batch_size=batch_size, collate_fn=collate_molgraphs)
    val_loader = DataLoader(
        valset, batch_size=batch_size, collate_fn=collate_molgraphs)
    test_loader = DataLoader(
        testset, batch_size=batch_size, collate_fn=collate_molgraphs)

    if args.pre_trained:
        num_epochs = 0
        model = model_zoo.chem.load_pretrained('GCN_Tox21')
    else:
        # Interchangeable with other models
        model = model_zoo.chem.GCNClassifier(in_feats=74,
                                             gcn_hidden_feats=[64, 64],
                                             n_tasks=dataset.n_tasks)
        loss_criterion = BCEWithLogitsLoss(pos_weight=torch.tensor(
            dataset.task_pos_weights).to(device), reduction='none')
        optimizer = Adam(model.parameters(), lr=learning_rate)
        stopper = EarlyStopping(patience=10)
    model.to(device)

    for epoch in range(num_epochs):
        model.train()
        print('Start training')
        train_meter = Meter()
        for batch_id, batch_data in enumerate(train_loader):
            smiles, bg, labels, mask = batch_data
            atom_feats = bg.ndata.pop(atom_data_field)
            atom_feats, labels, mask = atom_feats.to(device), labels.to(device), mask.to(device)
            logits = model(atom_feats, bg)
            # Mask non-existing labels
            loss = (loss_criterion(logits, labels)
                    * (mask != 0).float()).mean()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print('epoch {:d}/{:d}, batch {:d}/{:d}, loss {:.4f}'.format(
                epoch + 1, num_epochs, batch_id + 1, len(train_loader), loss.item()))
            train_meter.update(logits, labels, mask)
        train_roc_auc = train_meter.roc_auc_averaged_over_tasks()
        print('epoch {:d}/{:d}, training roc-auc score {:.4f}'.format(
            epoch + 1, num_epochs, train_roc_auc))
        
        val_meter = Meter()
        model.eval()
        with torch.no_grad():
            for batch_id, batch_data in enumerate(val_loader):
                smiles, bg, labels, mask = batch_data
                atom_feats = bg.ndata.pop(atom_data_field)
                atom_feats, labels = atom_feats.to(device), labels.to(device)
                logits = model(atom_feats, bg)
                val_meter.update(logits, labels, mask)
        
        val_roc_auc = val_meter.roc_auc_averaged_over_tasks()
        if stopper.step(val_roc_auc, model):
            break

        print('epoch {:d}/{:d}, validation roc-auc score {:.4f}, best validation roc-auc score {:.4f}'.format(
            epoch + 1, num_epochs, val_roc_auc, stopper.best_score))

    test_meter = Meter()
    model.eval()
    for batch_id, batch_data in enumerate(test_loader):
        smiles, bg, labels, mask = batch_data
        atom_feats = bg.ndata.pop(atom_data_field)
        atom_feats, labels = atom_feats.to(device), labels.to(device)
        logits = model(atom_feats, bg)
        test_meter.update(logits, labels, mask)
    print('test roc-auc score {:.4f}'.format(test_meter.roc_auc_averaged_over_tasks()))
Exemplo n.º 18
0
Arquivo: train.py Projeto: zlmtk/dgl
def main(args):
    # load and preprocess dataset
    data = load_data(args)

    features = mx.nd.array(data.features)
    labels = mx.nd.array(data.labels)
    mask = mx.nd.array(np.where(data.train_mask == 1))
    test_mask = mx.nd.array(np.where(data.test_mask == 1))
    val_mask = mx.nd.array(np.where(data.val_mask == 1))
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

    if args.gpu < 0:
        ctx = mx.cpu()
    else:
        ctx = mx.gpu(args.gpu)
        features = features.as_in_context(ctx)
        labels = labels.as_in_context(ctx)
        mask = mask.as_in_context(ctx)
        test_mask = test_mask.as_in_context(ctx)
        val_mask = val_mask.as_in_context(ctx)
    # create graph
    g = data.graph
    # add self-loop
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    model = GAT(g, args.num_layers, in_feats, args.num_hidden, n_classes,
                heads, elu, args.in_drop, args.attn_drop, args.alpha,
                args.residual)

    stopper = EarlyStopping(patience=100)
    model.initialize(ctx=ctx)

    # use optimizer
    trainer = gluon.Trainer(model.collect_params(), 'adam',
                            {'learning_rate': args.lr})

    dur = []
    for epoch in range(args.epochs):
        if epoch >= 3:
            t0 = time.time()
        # forward
        with mx.autograd.record():
            logits = model(features)
            loss = mx.nd.softmax_cross_entropy(logits[mask].squeeze(),
                                               labels[mask].squeeze())
            loss.backward()
        trainer.step(mask.shape[0])

        if epoch >= 3:
            dur.append(time.time() - t0)
        print(
            "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}"
            .format(epoch,
                    loss.asnumpy()[0], np.mean(dur),
                    n_edges / np.mean(dur) / 1000))
        val_accuracy = evaluate(model, features, labels, val_mask)
        print("Validation Accuracy {:.4f}".format(val_accuracy))
        if stopper.step(val_accuracy, model):
            break
    model.load_parameters('model.param')
    test_accuracy = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(test_accuracy))
Exemplo n.º 19
0
def train(model, data, current_model_dir):
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=1e-3,
                                 weight_decay=1e-5)  # Check lr and adam
    es = EarlyStopping(mode="min",
                       patience=30,
                       threshold=0.005,
                       threshold_mode="rel")  # Check threshold

    loss_meters = []

    model.train()

    dc_folder = '{}/dc_img'.format(current_model_dir)
    if not os.path.exists(dc_folder):
        os.mkdir(dc_folder)

    start_time = time.time()

    for e in range(EPOCHS):
        loss_meter = AverageMeter()

        for d in data:
            message, img = d

            output, loss = model(message, img)

            loss.backward()
            optimizer.step()

            loss_meter.update(loss.item())

        print('Epoch {}, loss {}'.format(e, loss_meter.avg))

        # Save only the best model
        if e == 0 or loss_meter.avg < np.min([m.avg for m in loss_meters]):
            # First delete the old model
            if e > 0:
                old_model_files = [
                    '{}/{}'.format(current_model_dir, f)
                    for f in os.listdir(current_model_dir)
                    if f.endswith('_model')
                ]
                if len(old_model_files) > 0:
                    os.remove(old_model_files[0])

            torch.save(model.state_dict(),
                       '{}/{}_model'.format(current_model_dir, e))

        loss_meters.append(loss_meter)
        es.step(loss_meter.avg)

        if e % 10 == 0:
            pic = to_img(output.cpu().data if use_gpu else output.data)
            save_image(pic, '{}/image_{}.png'.format(dc_folder, e))

        if es.is_converged:
            print("Converged in epoch {}".format(e))
            break

    print('Training took {} seconds'.format(time.time() - start_time))

    pickle.dump(loss_meters,
                open('{}/{}_loss_meters.p'.format(current_model_dir, e), 'wb'))
    return loss_meters
Exemplo n.º 20
0
def main(args):
    # load graph data
    data = load_data(args.dataset)
    num_nodes = data.num_nodes
    train_data = data.train
    valid_data = data.valid
    test_data = data.test
    num_rels = data.num_rels

    stopper = EarlyStopping(patience=args.patience)

    # check cuda
    if args.gpu >= 0:
        device = torch.device('cuda:%d' % args.gpu)
    else:
        device = torch.device('cpu')

    # create model
    model = SACN(num_entities=num_nodes,
                 num_relations=num_rels * 2 + 1,
                 args=args)

    # build graph
    g = dgl.graph([])
    g.add_nodes(num_nodes)
    src, rel, dst = train_data.transpose()
    # add reverse edges, reverse relation id is between [num_rels, 2*num_rels)
    src, dst = np.concatenate((src, dst)), np.concatenate((dst, src))
    rel = np.concatenate((rel, rel + num_rels))
    # get new train_data with reverse relation
    train_data_new = np.stack((src, rel, dst)).transpose()

    # unique train data by (h,r)
    train_data_new_pandas = pandas.DataFrame(train_data_new)
    train_data_new_pandas = train_data_new_pandas.drop_duplicates([0, 1])
    train_data_unique = np.asarray(train_data_new_pandas)

    g.add_edges(src, dst)
    # add graph self loop
    g.add_edges(g.nodes(), g.nodes())
    # add self loop relation type, self loop relation's id is 2*num_rels.
    rel = np.concatenate((rel, np.ones([num_nodes]) * num_rels * 2))
    print(g)
    entity_id = torch.LongTensor([i for i in range(num_nodes)])

    model = model.to(device)
    g = g.to(device)
    all_rel = torch.LongTensor(rel).to(device)
    entity_id = entity_id.to(device)

    # optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    # process the triples and get all tails corresponding to (h,r)
    # here valid_dict and test_dict are not used.
    train_dict, valid_dict, test_dict, all_dict = preprocess_data(
        train_data, valid_data, test_data, num_rels)

    train_batch_prepare = TrainBatchPrepare(train_dict, num_nodes)

    # eval needs to use all the data in train_data, valid_data and test_data
    eval_batch_prepare = EvalBatchPrepare(all_dict, num_rels)

    train_dataloader = DataLoader(dataset=train_data_unique,
                                  batch_size=args.batch_size,
                                  collate_fn=train_batch_prepare.get_batch,
                                  shuffle=True,
                                  drop_last=False,
                                  num_workers=args.num_workers)

    valid_dataloader = DataLoader(dataset=valid_data,
                                  batch_size=args.batch_size,
                                  collate_fn=eval_batch_prepare.get_batch,
                                  shuffle=False,
                                  drop_last=False,
                                  num_workers=args.num_workers)

    test_dataloader = DataLoader(dataset=test_data,
                                 batch_size=args.batch_size,
                                 collate_fn=eval_batch_prepare.get_batch,
                                 shuffle=False,
                                 drop_last=False,
                                 num_workers=args.num_workers)

    # training loop
    print("start training...")
    for epoch in range(args.n_epochs):
        model.train()
        epoch_start_time = time.time()
        for step, batch_tuple in enumerate(train_dataloader):
            e1_batch, rel_batch, labels_one_hot = batch_tuple
            e1_batch = e1_batch.to(device)
            rel_batch = rel_batch.to(device)
            labels_one_hot = labels_one_hot.to(device)
            labels_one_hot = (
                (1.0 - 0.1) * labels_one_hot) + (1.0 / labels_one_hot.size(1))

            pred = model.forward(g, all_rel, e1_batch, rel_batch, entity_id)
            optimizer.zero_grad()
            loss = model.loss(pred, labels_one_hot)
            loss.backward()
            optimizer.step()

        print("epoch : {}".format(epoch))
        print("epoch time: {:.4f}".format(time.time() - epoch_start_time))
        print("loss: {}".format(loss.data))

        model.eval()
        if epoch % args.eval_every == 0:
            with torch.no_grad():
                val_mrr = ranking_and_hits(g, all_rel, model, valid_dataloader,
                                           'dev_evaluation', entity_id, device)
            if stopper.step(val_mrr, model):
                break

    print("training done")
    model.load_state_dict(torch.load('es_checkpoint.pt'))
    ranking_and_hits(g, all_rel, model, test_dataloader, 'test_evaluation',
                     entity_id, device)
Exemplo n.º 21
0
def train(model, data, property, current_model_dir):
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=0.001)  # Check lr and adam
    es = EarlyStopping(mode="min",
                       patience=30,
                       threshold=0.005,
                       threshold_mode="rel")  # Check threshold

    model_prop = str(property).split('.')[-1].lower()

    loss_meters = []

    model.train()

    print('Training model {}'.format(property))
    start_time = time.time()

    for e in range(EPOCHS):
        loss_meter = AverageMeter()

        for d in data:
            message, metadata = d

            if property == Property.COLOR:
                one_hot_prop = metadata[:, 0:3]
            elif property == Property.SHAPE:
                one_hot_prop = metadata[:, 3:6]
            elif property == Property.SIZE:
                one_hot_prop = metadata[:, 6:8]
            elif property == Property.ROW:
                one_hot_prop = metadata[:, 9:12]
            elif property == Property.COLUMN:
                one_hot_prop = metadata[:, 12:15]

            loss = model(message, one_hot_prop)

            loss.backward()
            optimizer.step()

            loss_meter.update(loss.item())

        print('Epoch {}, loss {}'.format(e, loss_meter.avg))

        # Save only the best model
        if e == 0 or loss_meter.avg < np.min([m.avg for m in loss_meters]):
            # First delete the old model
            if e > 0:
                old_model_files = [
                    '{}/{}'.format(current_model_dir, f)
                    for f in os.listdir(current_model_dir)
                    if f.endswith('_model')
                ]
                if len(old_model_files) > 0:
                    os.remove(old_model_files[0])

            torch.save(
                model.state_dict(),
                '{}/{}_{}_model'.format(current_model_dir, model_prop, e))

        loss_meters.append(loss_meter)
        es.step(loss_meter.avg)

        if es.is_converged:
            print("Converged in epoch {}".format(e))
            break

    print('Training took {} seconds'.format(time.time() - start_time))

    pickle.dump(
        loss_meters,
        open('{}/{}_{}_loss_meters.p'.format(current_model_dir, model_prop, e),
             'wb'))
    return loss_meters
Exemplo n.º 22
0
def main(args):
    # load and preprocess dataset
    if args.dataset == 'reddit':
        data = RedditDataset()
    elif args.dataset in ['photo', "computer"]:
        data = MsDataset(args)
    else:
        data = load_data(args)

    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    current_time = time.strftime('%d_%H:%M:%S', localtime())
    writer = SummaryWriter(log_dir='runs/' + current_time + '_' + args.sess, flush_secs=30)

    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes,
           train_mask.sum().item(),
           val_mask.sum().item(),
           test_mask.sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.bool().cuda()
        val_mask = val_mask.bool().cuda()
        test_mask = test_mask.bool().cuda()


    g = data.graph
    # add self loop
    if args.dataset != 'reddit':
        g.remove_edges_from(nx.selfloop_edges(g))
        g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    n_edges = g.number_of_edges()
    print('edge number %d'%(n_edges))
    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]

    model = GAT(g,
                args.num_layers,
                num_feats,
                args.num_hidden,
                n_classes,
                heads,
                F.elu,
                args.idrop,
                args.adrop,
                args.alpha,
                args.bias,
                args.residual, args.l0)
    print(model)
    if args.early_stop:
        stopper = EarlyStopping(patience=150)
    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

    dur = []
    time_used = 0

    for epoch in range(args.epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()

        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        loss_l0 = args.loss_l0*( model.gat_layers[0].loss)
        optimizer.zero_grad()
        (loss + loss_l0).backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])
        writer.add_scalar('edge_num/0', model.gat_layers[0].num, epoch)

        if args.fastmode:
            val_acc, loss = accuracy(logits[val_mask], labels[val_mask], loss_fcn)
        else:
            val_acc,_ = evaluate(model, features, labels, val_mask, loss_fcn)
            if args.early_stop:
                if stopper.step(val_acc, model):   
                    break

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), train_acc,
                     val_acc, n_edges / np.mean(dur) / 1000))
        writer.add_scalar('loss', loss.item(), epoch)
        writer.add_scalar('f1/train_f1_mic', train_acc, epoch)
        writer.add_scalar('f1/test_f1_mic', val_acc, epoch)
        writer.add_scalar('time/time', time_used, epoch)

    writer.close()
    if args.early_stop:
        model.load_state_dict(torch.load('es_checkpoint.pt'))
    acc, _ = evaluate(model,features, labels, test_mask, loss_fcn)
    print("Test Accuracy {:.4f}".format(acc))
Exemplo n.º 23
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d 
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes,
           train_mask.sum().item(),
           val_mask.sum().item(),
           test_mask.sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    g = data.graph
    # add self loop
    g.remove_edges_from(g.selfloop_edges())
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    n_edges = g.number_of_edges()
    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    model = GAT(g,
                args.num_layers,
                num_feats,
                args.num_hidden,
                n_classes,
                heads,
                F.elu,
                args.in_drop,
                args.attn_drop,
                args.negative_slope,
                args.residual)
    print(model)
    stopper = EarlyStopping(patience=100)
    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(
        model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        if args.fastmode:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
            if stopper.step(val_acc, model):
                break

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".
              format(epoch, np.mean(dur), loss.item(), train_acc,
                     val_acc, n_edges / np.mean(dur) / 1000))

    print()
    model.load_state_dict(torch.load('es_checkpoint.pt'))
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
Exemplo n.º 24
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)

    if args.gpu < 0:
        device = "/cpu:0"
    else:
        device = "/gpu:{}".format(args.gpu)

    with tf.device(device):

        features = tf.convert_to_tensor(data.features, dtype=tf.float32)
        labels = tf.convert_to_tensor(data.labels, dtype=tf.int64)
        train_mask = tf.convert_to_tensor(data.train_mask, dtype=tf.bool)
        val_mask = tf.convert_to_tensor(data.val_mask, dtype=tf.bool)
        test_mask = tf.convert_to_tensor(data.test_mask, dtype=tf.bool)
        num_feats = features.shape[1]
        n_classes = data.num_labels
        n_edges = data.graph.number_of_edges()
        print("""----Data statistics------'
        #Edges %d
        #Classes %d 
        #Train samples %d
        #Val samples %d
        #Test samples %d""" %
              (n_edges, n_classes, train_mask.numpy().sum(),
               val_mask.numpy().sum(), test_mask.numpy().sum()))

        g = data.graph
        # add self loop
        g.remove_edges_from(nx.selfloop_edges(g))
        g = DGLGraph(g)
        g.add_edges(g.nodes(), g.nodes())
        n_edges = g.number_of_edges()
        # create model
        heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
        model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes,
                    heads, tf.nn.elu, args.in_drop, args.attn_drop,
                    args.negative_slope, args.residual)
        print(model)
        if args.early_stop:
            stopper = EarlyStopping(patience=100)

        # loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy(
        #     from_logits=False)
        loss_fcn = tf.nn.sparse_softmax_cross_entropy_with_logits

        # use optimizer
        optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr,
                                             epsilon=1e-8)

        # initialize graph
        dur = []
        for epoch in range(args.epochs):
            if epoch >= 3:
                t0 = time.time()
            # forward
            with tf.GradientTape() as tape:
                tape.watch(model.trainable_weights)
                logits = model(features, training=True)
                loss_value = tf.reduce_mean(
                    loss_fcn(labels=labels[train_mask],
                             logits=logits[train_mask]))
                # Manually Weight Decay
                # We found Tensorflow has a different implementation on weight decay
                # of Adam(W) optimizer with PyTorch. And this results in worse results.
                # Manually adding weights to the loss to do weight decay solves this problem.
                for weight in model.trainable_weights:
                    loss_value = loss_value + \
                        args.weight_decay*tf.nn.l2_loss(weight)

                grads = tape.gradient(loss_value, model.trainable_weights)
                optimizer.apply_gradients(zip(grads, model.trainable_weights))

            if epoch >= 3:
                dur.append(time.time() - t0)

            train_acc = accuracy(logits[train_mask], labels[train_mask])

            if args.fastmode:
                val_acc = accuracy(logits[val_mask], labels[val_mask])
            else:
                val_acc = evaluate(model, features, labels, val_mask)
                if args.early_stop:
                    if stopper.step(val_acc, model):
                        break

            print(
                "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
                " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format(
                    epoch, np.mean(dur),
                    loss_value.numpy().item(), train_acc, val_acc,
                    n_edges / np.mean(dur) / 1000))

        print()
        if args.early_stop:
            model.load_weights('es_checkpoint.pb')
        acc = evaluate(model, features, labels, test_mask)
        print("Test Accuracy {:.4f}".format(acc))
Exemplo n.º 25
0
def main(args):
    # If args['hetero'] is True, g would be a heterogeneous graph.
    # Otherwise, it will be a list of homogeneous graphs.
    g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \
    val_mask, test_mask = load_data(args['dataset'])

    if hasattr(torch, 'BoolTensor'):
        train_mask = train_mask.bool()
        val_mask = val_mask.bool()
        test_mask = test_mask.bool()

    # features = features.to(args['device'])
    features = [f.to(args['device']) for f in features]
    labels = labels.to(args['device'])
    train_mask = train_mask.to(args['device'])
    val_mask = val_mask.to(args['device'])
    test_mask = test_mask.to(args['device'])

    if args['hetero']:
        from model_hetero import SS_HAN
        model = SS_HAN(muti_meta_paths=
                    [[['pa', 'ap'], ['pf', 'fp']],
                    [['ap', 'pa']],
                    [['fp', 'pf']]],
                    in_size=features[0].shape[1],
                    hidden_size=args['hidden_units'],
                    out_size=num_classes,
                    num_heads=args['num_heads'],
                    dropout=args['dropout']).to(args['device'])

        g = g.to(args['device'])
    else:
        from model import HAN
        model = HAN(num_meta_paths=len(g),
                    in_size=features.shape[1],
                    hidden_size=args['hidden_units'],
                    out_size=num_classes,
                    num_heads=args['num_heads'],
                    dropout=args['dropout']).to(args['device'])
        g = [graph.to(args['device']) for graph in g]

    stopper = EarlyStopping(patience=args['patience'])
    # loss_fcn = F.binary_cross_entropy_with_logits
    loss_fcn = torch.nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=args['lr'],
                                 weight_decay=args['weight_decay'])
    # lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.95)

    print('*****************************Pre-training Starting*************************************')
    for epoch in range(args['pretrain_epochs']):
        model.train()

        for idx in range(args['batch_size']):
            embeddings = model(g, features)
            pos_edge_index, neg_edge_index = sample(g, 1)
            link_logits = model.calculate_loss(embeddings, pos_edge_index, neg_edge_index)
            link_labels = get_link_labels(pos_edge_index, neg_edge_index)
            loss = loss_fcn(link_logits, link_labels)
            link_probs = link_logits.sigmoid().detach().numpy()
            acc = roc_auc_score(link_labels, link_probs)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # print('link_labels : {}'.format(link_labels))
            # print('link_probs : {}'.format(link_probs))
            print('epoch: {} || batch_size : {} || loss: {} || accuracy: {}'.format(epoch, idx, loss, acc))
        # lr_scheduler.step()
        early_stop = stopper.step(model, epoch, loss.item(), acc)
        if early_stop:
            break
    filename = './model/ss-han_{}_{:02f}_{:02f}'.format(epoch, loss, acc)
    torch.save(model.state_dict(), filename)

    print('*****************************Pre-training Ending*************************************')
    print('\n')
    print('*****************************Fine-tuning Starting*************************************')

    # freeze the pretrained parameter
    for parms in model.parameters():
        parms.requires_grad = False

    from model_hetero import Classifier
    classifier = Classifier(in_size=args['hidden_units']*args['num_heads'][-1],
                            hidden_size=128,
                            out_size=num_classes)

    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(classifier.parameters(), lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    for epoch in range(args['fine-tuning_epochs']):
        model.train()

        embeddings = model(g, features)
        output = classifier(embeddings[0])
        loss = loss_fcn(output[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc, train_micro_f1, train_macro_f1 = score(output[train_mask], labels[train_mask])
        val_loss, val_acc, val_micro_f1, val_macro_f1 \
            = evaluate(model, classifier, g, features, labels, val_mask, loss_fcn)
        print('Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | '
              'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.format(
            epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1))

    print('*****************************Fine-tuning Ending*************************************')

    test_loss, test_acc, test_micro_f1, test_macro_f1 \
        = evaluate(model, classifier, g, features, labels, val_mask, loss_fcn)
    print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format(
        test_loss.item(), test_micro_f1, test_macro_f1))
Exemplo n.º 26
0
def main(args):
    # If args['hetero'] is True, g would be a heterogeneous graph.
    # Otherwise, it will be a list of homogeneous graphs.
    args_academic = read_args()
    data = dataprocess_han.input_data_han(args_academic)
    #g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \
    #val_mask, test_mask = load_data(args['dataset'])
    features = torch.tensor(data.a_text_embed, dtype=torch.float32)
    labels = torch.tensor(data.a_class)

    APA_g = dgl.graph(data.APA_matrix, ntype='author', etype='coauthor')
    APVPA_g = dgl.graph(data.APVPA_matrix, ntype='author', etype='attendance')
    APPA_g = dgl.graph(data.APPA_matrix, ntype='author', etype='reference')

    #g = [APA_g, APPA_g]
    g = [APA_g, APVPA_g, APPA_g]

    num_classes = 4
    features = features.to(args['device'])
    labels = labels.to(args['device'])

    #if args['hetero']:
    #from model_hetero import HAN
    #model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']],
    #in_size=features.shape[1],
    #hidden_size=args['hidden_units'],
    #out_size=num_classes,
    #num_heads=args['num_heads'],
    #dropout=args['dropout']).to(args['device'])
    #else:
    model = HAN(num_meta_paths=len(g),
                in_size=features.shape[1],
                hidden_size=args['hidden_units'],
                out_size=num_classes,
                num_heads=args['num_heads'],
                dropout=args['dropout']).to(args['device'])

    stopper = EarlyStopping(patience=args['patience'])
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])
    model.load_state_dict(torch.load("./model_para.pt"))

    for epoch in range(args['num_epochs']):

        X = [[i] for i in range(args_academic.A_n)]
        train_X, test_X, _, _ = train_test_split(X, X, test_size=0.8)  #
        train_X, test_X, _, _ = train_test_split(train_X,
                                                 train_X,
                                                 test_size=0.2)  #

        train_mask = get_binary_mask(args_academic.A_n, train_X)
        test_mask = get_binary_mask(args_academic.A_n, test_X)

        #train_mask = torch.tensor(data.train_mask)
        #test_mask = torch.tensor(data.test_mask)
        val_mask = test_mask
        train_mask = train_mask.to(args['device'])
        val_mask = val_mask.to(args['device'])
        test_mask = test_mask.to(args['device'])
        model.train()
        logits, _ = model(g, features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc, train_micro_f1, train_macro_f1 = score(
            logits[train_mask], labels[train_mask])
        val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate(
            model, g, features, labels, val_mask, loss_fcn)
        early_stop = stopper.step(val_loss.data.item(), val_acc, model)

        print(
            'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | '
            'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.
            format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1,
                   val_loss.item(), val_micro_f1, val_macro_f1))

        if early_stop:
            break

    stopper.load_checkpoint(model)
    model.eval()
    _, embedding = model(g, features)
    embed_file = open("./node_embedding.txt", "w")
    for k in range(embedding.shape[0]):
        embed_file.write('a' + str(k) + " ")
        for l in range(embedding.shape[1] - 1):
            embed_file.write(str(embedding[k][l].item()) + " ")
        embed_file.write(str(embedding[k][-1].item()) + "\n")
    embed_file.close()
    #test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(model, g, features, labels, test_mask, loss_fcn)
    #print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format(
    #test_loss.item(), test_micro_f1, test_macro_f1))
    torch.save(model.state_dict(), "./model_para.pt")
Exemplo n.º 27
0
def main(args):
    # Step 1: Prepare graph data and retrieve train/validation/test index ============================= #
    # Load dataset
    dataset = dgl.data.FraudDataset(args.dataset, train_size=0.4)
    graph = dataset[0]
    num_classes = dataset.num_classes

    # check cuda
    if args.gpu >= 0 and th.cuda.is_available():
        device = 'cuda:{}'.format(args.gpu)
        args.num_workers = 0
    else:
        device = 'cpu'

    # retrieve labels of ground truth
    labels = graph.ndata['label'].to(device)

    # Extract node features
    feat = graph.ndata['feature'].to(device)
    layers_feat = feat.expand(args.num_layers, -1, -1)

    # retrieve masks for train/validation/test
    train_mask = graph.ndata['train_mask']
    val_mask = graph.ndata['val_mask']
    test_mask = graph.ndata['test_mask']

    train_idx = th.nonzero(train_mask, as_tuple=False).squeeze(1).to(device)
    val_idx = th.nonzero(val_mask, as_tuple=False).squeeze(1).to(device)
    test_idx = th.nonzero(test_mask, as_tuple=False).squeeze(1).to(device)

    # Reinforcement learning module only for positive training nodes
    rl_idx = th.nonzero(train_mask.to(device) & labels.bool(), as_tuple=False).squeeze(1)

    graph = graph.to(device)

    # Step 2: Create model =================================================================== #
    model = CAREGNN(in_dim=feat.shape[-1],
                    num_classes=num_classes,
                    hid_dim=args.hid_dim,
                    num_layers=args.num_layers,
                    activation=th.tanh,
                    step_size=args.step_size,
                    edges=graph.canonical_etypes)

    model = model.to(device)

    # Step 3: Create training components ===================================================== #
    _, cnt = th.unique(labels, return_counts=True)
    loss_fn = th.nn.CrossEntropyLoss(weight=1 / cnt)
    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    if args.early_stop:
        stopper = EarlyStopping(patience=100)

    # Step 4: training epochs =============================================================== #
    for epoch in range(args.max_epoch):
        # calculate the distance of each edges and sample based on the distance
        dists = []
        p = []
        for i in range(args.num_layers):
            dist = {}
            graph.ndata['nd'] = th.tanh(model.layers[i].MLP(layers_feat[i]))
            for etype in graph.canonical_etypes:
                graph.apply_edges(_l1_dist, etype=etype)
                dist[etype] = graph.edges[etype].data['ed']
            dists.append(dist)
            p.append(model.layers[i].p)
        sampler = CARESampler(p, dists, args.num_layers)

        # train
        model.train()
        tr_loss = 0
        tr_recall = 0
        tr_auc = 0
        tr_blk = 0
        train_dataloader = dgl.dataloading.NodeDataLoader(graph,
                                                          train_idx,
                                                          sampler,
                                                          batch_size=args.batch_size,
                                                          shuffle=True,
                                                          drop_last=False,
                                                          num_workers=args.num_workers
                                                          )

        for input_nodes, output_nodes, blocks in train_dataloader:
            blocks = [b.to(device) for b in blocks]
            train_feature = blocks[0].srcdata['feature']
            train_label = blocks[-1].dstdata['label']
            logits_gnn, logits_sim = model(blocks, train_feature)

            # compute loss
            blk_loss = loss_fn(logits_gnn, train_label) + args.sim_weight * loss_fn(logits_sim, train_label)
            tr_loss += blk_loss.item()
            tr_recall += recall_score(train_label.cpu(), logits_gnn.argmax(dim=1).detach().cpu())
            tr_auc += roc_auc_score(train_label.cpu(), logits_gnn[:, 1].detach().cpu())
            tr_blk += 1

            # backward
            optimizer.zero_grad()
            blk_loss.backward()
            optimizer.step()

        # Reinforcement learning module
        model.RLModule(graph, epoch, rl_idx, dists)

        # validation
        model.eval()
        val_dataloader = dgl.dataloading.NodeDataLoader(graph,
                                                        val_idx,
                                                        sampler,
                                                        batch_size=args.batch_size,
                                                        shuffle=True,
                                                        drop_last=False,
                                                        num_workers=args.num_workers
                                                        )

        val_recall, val_auc, val_loss = evaluate(model, loss_fn, val_dataloader, device)

        # Print out performance
        print("In epoch {}, Train Recall: {:.4f} | Train AUC: {:.4f} | Train Loss: {:.4f}; "
              "Valid Recall: {:.4f} | Valid AUC: {:.4f} | Valid loss: {:.4f}".
              format(epoch, tr_recall / tr_blk, tr_auc / tr_blk, tr_loss / tr_blk, val_recall, val_auc, val_loss))

        if args.early_stop:
            if stopper.step(val_auc, model):
                break

    # Test with mini batch after all epoch
    model.eval()
    if args.early_stop:
        model.load_state_dict(th.load('es_checkpoint.pt'))
    test_dataloader = dgl.dataloading.NodeDataLoader(graph,
                                                     test_idx,
                                                     sampler,
                                                     batch_size=args.batch_size,
                                                     shuffle=True,
                                                     drop_last=False,
                                                     num_workers=args.num_workers
                                                     )

    test_recall, test_auc, test_loss = evaluate(model, loss_fn, test_dataloader, device)

    print("Test Recall: {:.4f} | Test AUC: {:.4f} | Test loss: {:.4f}".format(test_recall, test_auc, test_loss))
Exemplo n.º 28
0
def main(args):
    # Step 1: Prepare graph data and retrieve train/validation/test index ============================= #
    # Load dataset
    dataset = dgl.data.FraudDataset(args.dataset, train_size=0.4)
    graph = dataset[0]
    num_classes = dataset.num_classes

    # check cuda
    if args.gpu >= 0 and th.cuda.is_available():
        device = 'cuda:{}'.format(args.gpu)
    else:
        device = 'cpu'

    # retrieve labels of ground truth
    labels = graph.ndata['label'].to(device)

    # Extract node features
    feat = graph.ndata['feature'].to(device)

    # retrieve masks for train/validation/test
    train_mask = graph.ndata['train_mask']
    val_mask = graph.ndata['val_mask']
    test_mask = graph.ndata['test_mask']

    train_idx = th.nonzero(train_mask, as_tuple=False).squeeze(1).to(device)
    val_idx = th.nonzero(val_mask, as_tuple=False).squeeze(1).to(device)
    test_idx = th.nonzero(test_mask, as_tuple=False).squeeze(1).to(device)

    # Reinforcement learning module only for positive training nodes
    rl_idx = th.nonzero(train_mask.to(device) & labels.bool(), as_tuple=False).squeeze(1)

    graph = graph.to(device)

    # Step 2: Create model =================================================================== #
    model = CAREGNN(in_dim=feat.shape[-1],
                    num_classes=num_classes,
                    hid_dim=args.hid_dim,
                    num_layers=args.num_layers,
                    activation=th.tanh,
                    step_size=args.step_size,
                    edges=graph.canonical_etypes)

    model = model.to(device)

    # Step 3: Create training components ===================================================== #
    _, cnt = th.unique(labels, return_counts=True)
    loss_fn = th.nn.CrossEntropyLoss(weight=1 / cnt)
    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    if args.early_stop:
        stopper = EarlyStopping(patience=100)

    # Step 4: training epochs =============================================================== #
    for epoch in range(args.max_epoch):
        # Training and validation using a full graph
        model.train()
        logits_gnn, logits_sim = model(graph, feat)

        # compute loss
        tr_loss = loss_fn(logits_gnn[train_idx], labels[train_idx]) + \
                  args.sim_weight * loss_fn(logits_sim[train_idx], labels[train_idx])

        tr_recall = recall_score(labels[train_idx].cpu(), logits_gnn.data[train_idx].argmax(dim=1).cpu())
        tr_auc = roc_auc_score(labels[train_idx].cpu(), softmax(logits_gnn, dim=1).data[train_idx][:, 1].cpu())

        # validation
        val_loss = loss_fn(logits_gnn[val_idx], labels[val_idx]) + \
                   args.sim_weight * loss_fn(logits_sim[val_idx], labels[val_idx])
        val_recall = recall_score(labels[val_idx].cpu(), logits_gnn.data[val_idx].argmax(dim=1).cpu())
        val_auc = roc_auc_score(labels[val_idx].cpu(), softmax(logits_gnn, dim=1).data[val_idx][:, 1].cpu())

        # backward
        optimizer.zero_grad()
        tr_loss.backward()
        optimizer.step()

        # Print out performance
        print("Epoch {}, Train: Recall: {:.4f} AUC: {:.4f} Loss: {:.4f} | Val: Recall: {:.4f} AUC: {:.4f} Loss: {:.4f}"
              .format(epoch, tr_recall, tr_auc, tr_loss.item(), val_recall, val_auc, val_loss.item()))

        # Adjust p value with reinforcement learning module
        model.RLModule(graph, epoch, rl_idx)

        if args.early_stop:
            if stopper.step(val_auc, model):
                break

    # Test after all epoch
    model.eval()
    if args.early_stop:
        model.load_state_dict(th.load('es_checkpoint.pt'))

    # forward
    logits_gnn, logits_sim = model.forward(graph, feat)

    # compute loss
    test_loss = loss_fn(logits_gnn[test_idx], labels[test_idx]) + \
                args.sim_weight * loss_fn(logits_sim[test_idx], labels[test_idx])
    test_recall = recall_score(labels[test_idx].cpu(), logits_gnn[test_idx].argmax(dim=1).cpu())
    test_auc = roc_auc_score(labels[test_idx].cpu(), softmax(logits_gnn, dim=1).data[test_idx][:, 1].cpu())

    print("Test Recall: {:.4f} AUC: {:.4f} Loss: {:.4f}".format(test_recall, test_auc, test_loss.item()))
Exemplo n.º 29
0
class ConvModel(Model):
    def __init__(self, config):
        super().__init__(config)
        if config.type == 'conv':
            # self.dynamics = ConvTransitionModel2().cuda()
            self.dynamics = ConvTransitionModel2_2().cuda()  # uses corrected action
            self.get_dataset_sample = self.get_dataset_sample_no_speed
            self.criterion = F.mse_loss
        elif config.type == 'conv_speed':
            self.dynamics = ConvTransitionModel3().cuda()
            self.get_dataset_sample = self.get_dataset_sample_with_speed
            self.criterion = F.mse_loss
        elif config.type == 'class':
            # self.dynamics = ClassificationModel().cuda()
            # self.dynamics = ClassificationModel2().cuda()  # uses corrected phase action
            self.dynamics = ClassificationModel3().cuda()  # uses limited phase history
            self.get_dataset_sample = self.get_dataset_sample_for_classification
            self.criterion = torch.nn.BCELoss()
        elif config.type == 'latent_fc':
            self.dynamics = LatentFCTransitionModel().cuda()
            self.get_dataset_sample = self.get_dataset_sample_for_latent_fc
            self.criterion = F.mse_loss
        else:
            raise NotImplementedError

        self.optim = torch.optim.Adam(self.dynamics.parameters())
        self.earlystopping = EarlyStopping(patience=self._c.early_stop_patience)
        self.set_epoch_length()
        self.writer = SummaryWriter(log_dir=config.logdir, purge_step=0)

    def preprocess(self,):
        pass

    # def get_sample(self):
    #     return get_dataset_sample(self._dataset)
        # danijar style get sample
        # yield method
        # choose any episode
        # why yield episode and not sample.

        # while true
            # for files in directory:
                # if not in cache add to cache
            # for i in random set of cache:
                # length limitation?
                # yield i episode
        
        # while true
            # check for files in dir, add new files to cache
            # for i in train_steps number of episodes (sampled from episode cache):
                # yield a sample of given length
        pass

    def get_dataset_sample_no_speed(self, dataset):
        s = dataset if isinstance(dataset, dict) else next(dataset)
        sample = {}
        sample['phases'] = torch.Tensor(s['phases'][:,  0, 0, 3, :, 0].numpy()).cuda()
        sample['y'] = self.preprocess(torch.Tensor(s['x'][:, 1, :, :, :, 0].numpy()))
        sample['v'] = self.preprocess(torch.Tensor(s['x'][:, 0, :, :, :, 1].numpy())) + 0.5
        sample['x'] = self.preprocess(torch.Tensor(s['x'][:, 0, :, :, :, 0].numpy()))
        sample['action'] = torch.Tensor(s['corrected_action'][:, :1].numpy()).cuda()
        
        ## not needed for now.
        sample['reward'] = s['reward'].numpy()
        # sample['action'] = s['action'].numpy()
        return sample

    def get_dataset_sample_with_speed(self, dataset):
        s = dataset if isinstance(dataset, dict) else next(dataset)
        sample = {}
        sample['phases'] = self.preprocess(torch.Tensor(s['phases'][:,  0, 0, 3, :, 0].numpy()))
        sample['y'] = self.preprocess(torch.Tensor(s['x'][:, 1, :, :, :, :].numpy())).permute(0, 4, 2, 3, 1).squeeze(-1).contiguous()
        sample['v'] = self.preprocess(torch.Tensor(s['x'][:, 0, :, :, :, 1].numpy())) + 0.5
        sample['x'] = self.preprocess(torch.Tensor(s['x'][:, 0, :, :, :, :].numpy())).permute(0, 4, 2, 3, 1).squeeze(-1).contiguous()
        
        sample['x'][:, 1] = sample['x'][:, 1] + 0.5
        sample['y'][:, 1] = sample['y'][:, 1] + 0.5

        ## not needed for now.
        sample['reward'] = s['reward'].numpy()
        # sample['action'] = s['action'].numpy()
        return sample

    def get_dataset_sample_for_classification(self, dataset):
        s = dataset if isinstance(dataset, dict) else next(dataset)
        sample = {}
        sample['phases'] = torch.Tensor(s['phases'][:,  0, 0, 3, :, 0].numpy()).cuda()
        sample['y'] = torch.Tensor(s['x'][:, 1, :, :, :, 0].numpy()).cuda()
        sample['v'] = torch.Tensor(s['x'][:, 0, :, :, :, 1].numpy()).cuda()
        sample['x'] = torch.Tensor(s['x'][:, 0, :, :, :, 0].numpy()).cuda()
        sample['action'] = torch.Tensor(s['corrected_action'][:, :1].numpy()).cuda()
        sample['phase_action'] = torch.Tensor(s['corrected_p_action'][:, 0].numpy()).cuda()

        # classification model only works on the last lane
        sample['x'] = sample['x'][:, 0, -1]
        sample['y'] = sample['y'][:, 0, -1]
        
        ## not needed for now.
        sample['reward'] = s['reward'].numpy()
        return sample

    def get_dataset_sample_for_classification_kstep(self, dataset):
        # to see accuracy of k-step predictions
        # need formatted samples of higher batch_length
        s = dataset if isinstance(dataset, dict) else next(dataset)
        sample = {}
        sample['phases'] = torch.Tensor(s['phases'][:,  :, 0, 3, :, 0].numpy()).cuda()
        sample['x'] = torch.Tensor(s['x'][:, :, :, :, :, 0].numpy()).cuda()
        sample['action'] = torch.Tensor(s['corrected_action'][:, :].numpy()).cuda()
        sample['phase_action'] = torch.Tensor(s['corrected_p_action'][:, :].numpy()).cuda()

        # classification model only works on the last lane
        sample['x'] = sample['x'][:, :, 0, -1]

        sample['reward'] = s['reward'].numpy()
        return sample

    def get_dataset_sample_for_latent_fc(self, dataset):
        s = dataset if isinstance(dataset, dict) else next(dataset)
        sample = {}
        sample['phases'] = torch.Tensor(s['phases'][:,  0, 0, 3, :, 0].numpy()).cuda()
        sample['action'] = torch.Tensor(s['corrected_action'][:, :1].numpy()).cuda()
        sample['reward'] = s['reward'].numpy()

        mu = (torch.Tensor(s['mu'].numpy())).cuda()
        logvar = (torch.Tensor(s['logvar'].numpy())).cuda()
        latent = reparameterize(mu, logvar)

        sample['x'] = latent[:, 0]
        sample['y'] = latent[:, 1]
        return sample

    def preprocess(self, x):
        x = x - 0.5
        return x.cuda()

    def set_epoch_length(self):
        """
        These many number of batches when sampled from the dataset would lead to 1 epoch.
        """
        num_episodes = len(self.train_eps)
        episode_length = 500
        batch_length = self._c.batch_length
        batch_size = self._c.batch_size
        self.epoch_length = ceil(num_episodes * (episode_length - (batch_length - 1)) / batch_size)

        test_num_episodes = len(self.test_eps)
        self.test_epoch_length = ceil(test_num_episodes * (episode_length - (batch_length - 1)) / batch_size)

    def batch_update_model(self):
        # calculate loss
        # loss.backward()
        # optim.step()

        sample = self.get_sample()
        loss = self._loss(sample)
        loss.backward()
        self.optim.step()

    def train(self):
        cur_best = None
        for epoch in range(self._c.epochs):
            self.train_dynamics(epoch)
            test_loss = self.test(epoch)
            # scheduler.step(test_loss)
            self.earlystopping.step(test_loss)
            self.writer.file_writer.flush()

            # checkpointing
            best_filename = self._c.logdir / 'best.tar'
            filename = self._c.logdir / f'checkpoint_{epoch}.tar'
            is_best = not cur_best or test_loss < cur_best
            if is_best:
                cur_best = test_loss
            
            if is_best or (epoch % 10 == 0):
                checkpoint = {
                    'epoch': epoch,
                    'state_dict': self.dynamics.state_dict(),
                    'precision': test_loss,
                    'optimizer': self.optim.state_dict(),
                    'earlystopping': self.earlystopping.state_dict(),
                    # 'scheduler': scheduler.state_dict(),
                }
                save_checkpoint(checkpoint, is_best, filename, best_filename)

            if self.earlystopping.stop:
                print("End of Training because of early stopping at epoch {}".format(epoch))

    def train_dynamics(self, epoch):
        print('=======================> epoch:', epoch)
        self.dynamics.train()
        train_loss = 0
        t1 = time.time()
        for u in range(self.epoch_length):
            s = self.get_dataset_sample(self._dataset)
            self.optim.zero_grad()
            y_pred = self.dynamics(s)
            loss = self.criterion(y_pred, s['y'])
            loss.backward()
            train_loss += loss
            self.optim.step()
        
            if (u % int(self.epoch_length/min(self.epoch_length, 5)) == 0):
                t2 = time.time()
                print(u, round(t2-t1, 2), '{:.10f}'.format(loss.item() / self._c.batch_size))
        
        norm_train_loss = (train_loss / (self.epoch_length * self._c.batch_size)).item()
        self.writer.add_scalar('train/loss', norm_train_loss, epoch)
        print('====> Epoch: {} Average loss: {:.10f}'.format(epoch, norm_train_loss))
    
    def test(self, epoch):
        self.dynamics.eval()
        test_loss = 0
        for u in range(self.test_epoch_length):
            s = self.get_dataset_sample(self._test_dataset)
            y_pred = self.dynamics(s)
            test_loss += F.mse_loss(y_pred, s['y'])
        
        norm_test_loss = (test_loss / (self.test_epoch_length * self._c.batch_size)).item()
        self.writer.add_scalar('test/loss', norm_test_loss, epoch)
        print('====> Test set loss: {:.10f}'.format(norm_test_loss))
        print()
        return norm_test_loss

    def save(self):
        raise NotImplementedError

    def load(self):
        raise NotImplementedError

    def _loss(self):
        raise NotImplementedError

    def create_reconstructions(self):
        pass
Exemplo n.º 30
0
class VehModel(Model):
    def __init__(self, config):
        super().__init__(config)
        self.dynamics = VehicleTransitionModel().cuda()
        self.optim = torch.optim.Adam(self.dynamics.parameters())
        self.earlystopping = EarlyStopping(
            patience=self._c.early_stop_patience)
        self.set_epoch_length()

    def preprocess(self, ):
        pass

        # def get_sample(self):
        #     return get_dataset_sample(self._dataset)
        # danijar style get sample
        # yield method
        # choose any episode
        # why yield episode and not sample.

        # while true
        # for files in directory:
        # if not in cache add to cache
        # for i in random set of cache:
        # length limitation?
        # yield i episode

        # while true
        # check for files in dir, add new files to cache
        # for i in train_steps number of episodes (sampled from episode cache):
        # yield a sample of given length
        pass

    def get_dataset_sample(self, dataset):
        sample = next(dataset)
        sample = self.preprocess(sample)
        return sample

    def preprocess(self, e):
        bs = self._c.batch_size
        e['x'] = torch.Tensor(e['x'].numpy()).reshape(bs, -1).cuda()
        e['y'] = torch.Tensor(e['y'].numpy()).reshape(bs, -1).cuda()
        e['phases'] = torch.Tensor(e['phases'].numpy()).reshape(bs, -1).cuda()

        e['x'][:, [0, 2]] = e['x'][:, [0, 2]] / 200
        e['x'][:, [1, 3]] = e['x'][:, [1, 3]] / 35

        e['y'][:, 0] = e['y'][:, 0] / 200
        e['y'][:, 1] = e['y'][:, 1] / 35
        return e

    def set_epoch_length(self):
        """
        These many number of batches when sampled from the dataset would lead to 1 epoch.
        """
        num_episodes = len(self.train_eps)
        episode_length = 500
        batch_length = self._c.batch_length
        batch_size = self._c.batch_size
        self.epoch_length = ceil(
            num_episodes * (episode_length - (batch_length - 1)) / batch_size)

        test_num_episodes = len(self.test_eps)
        self.test_epoch_length = ceil(test_num_episodes *
                                      (episode_length -
                                       (batch_length - 1)) / batch_size)

    def batch_update_model(self):
        # calculate loss
        # loss.backward()
        # optim.step()

        sample = self.get_sample()
        loss = self._loss(sample)
        loss.backward()
        self.optim.step()

    def train(self):
        cur_best = None
        for epoch in range(self._c.epochs):
            self.train_dynamics(epoch)
            test_loss = self.test()
            # scheduler.step(test_loss)
            self.earlystopping.step(test_loss)

            # checkpointing
            best_filename = self._c.logdir / 'best.tar'
            filename = self._c.logdir / f'checkpoint_{epoch}.tar'
            is_best = not cur_best or test_loss < cur_best
            if is_best:
                cur_best = test_loss

            if is_best or (epoch % 10 == 0):
                checkpoint = {
                    'epoch': epoch,
                    'state_dict': self.dynamics.state_dict(),
                    'precision': test_loss,
                    'optimizer': self.optim.state_dict(),
                    'earlystopping': self.earlystopping.state_dict(),
                    # 'scheduler': scheduler.state_dict(),
                }
                save_checkpoint(checkpoint, is_best, filename, best_filename)

            if self.earlystopping.stop:
                print("End of Training because of early stopping at epoch {}".
                      format(epoch))
                break

    def train_dynamics(self, epoch):
        print('=======================> epoch:', epoch)
        train_loss = 0
        t1 = time.time()
        for u in range(self.epoch_length):
            s = self.get_dataset_sample(self._dataset)
            self.optim.zero_grad()
            y_pred = self.dynamics(s['x'], s['phases'])
            loss = F.mse_loss(y_pred, s['y'])
            loss.backward()
            train_loss += loss
            self.optim.step()

            if (u % int(self.epoch_length / min(self.epoch_length, 20)) == 0):
                t2 = time.time()
                print(u, round(t2 - t1, 2),
                      '{:.10f}'.format(loss.item() / self._c.batch_size))

        print('====> Epoch: {} Average loss: {:.10f}'.format(
            epoch, train_loss / (self.epoch_length * self._c.batch_size)))

    def test(self):
        self.dynamics.eval()
        test_loss = 0
        for u in range(self.test_epoch_length):
            s = self.get_dataset_sample(self._dataset)
            y_pred = self.dynamics(s['x'], s['phases'])
            test_loss += F.mse_loss(y_pred, s['y'])

        test_loss /= (self.test_epoch_length * self._c.batch_size)
        print('====> Test set loss: {:.10f}'.format(test_loss))
        print()
        return test_loss

    def save(self):
        raise NotImplementedError

    def load(self):
        raise NotImplementedError

    def _loss(self):
        raise NotImplementedError

    def create_reconstructions(self):
        pass