Exemplo n.º 1
0
def test_train(tmpdir):
    import pytorch_lightning as pl

    load_cfg(cfg, args)
    cfg.out_dir = osp.join(tmpdir, str(random.randrange(sys.maxsize)))
    cfg.run_dir = osp.join(tmpdir, str(random.randrange(sys.maxsize)))
    cfg.dataset.dir = osp.join(tmpdir, 'pyg_test_datasets', 'Planetoid')

    set_out_dir(cfg.out_dir, args.cfg_file)
    dump_cfg(cfg)
    set_printing()

    seed_everything(cfg.seed)
    auto_select_device()
    set_run_dir(cfg.out_dir)

    loaders = create_loader()
    model = create_model()
    cfg.params = params_count(model)
    logger = LoggerCallback()
    trainer = pl.Trainer(max_epochs=1, max_steps=4, callbacks=logger,
                         log_every_n_steps=1)
    train_loader, val_loader = loaders[0], loaders[1]
    trainer.fit(model, train_loader, val_loader)

    shutil.rmtree(cfg.out_dir)
Exemplo n.º 2
0
def main():
    seed_everything(42)

    root = osp.join('data', 'TUDataset')
    dataset = TUDataset(root, 'IMDB-BINARY', pre_transform=T.OneHotDegree(135))

    dataset = dataset.shuffle()
    test_dataset = dataset[:len(dataset) // 10]
    val_dataset = dataset[len(dataset) // 10:2 * len(dataset) // 10]
    train_dataset = dataset[2 * len(dataset) // 10:]

    datamodule = LightningDataset(train_dataset,
                                  val_dataset,
                                  test_dataset,
                                  batch_size=64,
                                  num_workers=4)

    model = Model(dataset.num_node_features, dataset.num_classes)

    devices = torch.cuda.device_count()
    strategy = pl.strategies.DDPSpawnStrategy(find_unused_parameters=False)
    checkpoint = pl.callbacks.ModelCheckpoint(monitor='val_acc', save_top_k=1)
    trainer = pl.Trainer(strategy=strategy,
                         accelerator='gpu',
                         devices=devices,
                         max_epochs=50,
                         log_every_n_steps=5,
                         callbacks=[checkpoint])

    trainer.fit(model, datamodule)
    trainer.test(ckpt_path='best', datamodule=datamodule)
Exemplo n.º 3
0
def test_run_single_graphgym(auto_resume, skip_train_eval, use_trivial_metric):
    Args = namedtuple('Args', ['cfg_file', 'opts'])
    root = osp.join(osp.dirname(osp.realpath(__file__)))
    args = Args(osp.join(root, 'example_node.yml'), [])

    load_cfg(cfg, args)
    cfg.out_dir = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    cfg.run_dir = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    cfg.dataset.dir = osp.join('/', 'tmp', 'pyg_test_datasets', 'Planetoid')
    cfg.train.auto_resume = auto_resume

    set_out_dir(cfg.out_dir, args.cfg_file)
    dump_cfg(cfg)
    set_printing()

    seed_everything(cfg.seed)
    auto_select_device()
    set_run_dir(cfg.out_dir)

    cfg.train.skip_train_eval = skip_train_eval
    cfg.train.enable_ckpt = use_trivial_metric and skip_train_eval
    if use_trivial_metric:
        if 'trivial' not in register.metric_dict:
            register.register_metric('trivial', trivial_metric)
        global num_trivial_metric_calls
        num_trivial_metric_calls = 0
        cfg.metric_best = 'trivial'
        cfg.custom_metrics = ['trivial']
    else:
        cfg.metric_best = 'auto'
        cfg.custom_metrics = []

    datamodule = GraphGymDataModule()
    assert len(datamodule.loaders) == 3

    model = create_model()
    assert isinstance(model, torch.nn.Module)
    assert isinstance(model.encoder, FeatureEncoder)
    assert isinstance(model.mp, GNNStackStage)
    assert isinstance(model.post_mp, GNNNodeHead)
    assert len(list(model.pre_mp.children())) == cfg.gnn.layers_pre_mp

    optimizer, scheduler = model.configure_optimizers()
    assert isinstance(optimizer[0], torch.optim.Adam)
    assert isinstance(scheduler[0], torch.optim.lr_scheduler.CosineAnnealingLR)

    cfg.params = params_count(model)
    assert cfg.params == 23883

    train(model,
          datamodule,
          logger=True,
          trainer_config={"enable_progress_bar": False})

    assert osp.isdir(get_ckpt_dir()) is cfg.train.enable_ckpt

    agg_runs(cfg.out_dir, cfg.metric_best)

    shutil.rmtree(cfg.out_dir)
Exemplo n.º 4
0
def test_seed_everything():
    seed_everything(0)

    assert random.randint(0, 100) == 49
    assert random.randint(0, 100) == 97
    assert np.random.randint(0, 100) == 44
    assert np.random.randint(0, 100) == 47
    assert int(torch.randint(0, 100, (1, ))) == 44
    assert int(torch.randint(0, 100, (1, ))) == 39
Exemplo n.º 5
0
def main():
    seed_everything(42)
    datamodule = DataModule('../../data/OGB')
    model = RelationalGNN(datamodule.metadata(), hidden_channels=64,
                          out_channels=349, dropout=0.0)
    checkpoint_callback = ModelCheckpoint(monitor='val_acc', save_top_k=1)
    trainer = Trainer(accelerator='gpu', devices=1, max_epochs=20,
                      callbacks=[checkpoint_callback])

    trainer.fit(model, datamodule)
    trainer.test()
Exemplo n.º 6
0
def test_graphgym_module(tmpdir):
    import pytorch_lightning as pl

    load_cfg(cfg, args)
    cfg.out_dir = osp.join(tmpdir, str(random.randrange(sys.maxsize)))
    cfg.run_dir = osp.join(tmpdir, str(random.randrange(sys.maxsize)))
    cfg.dataset.dir = osp.join(tmpdir, 'pyg_test_datasets', 'Planetoid')

    set_out_dir(cfg.out_dir, args.cfg_file)
    dump_cfg(cfg)
    set_printing()

    seed_everything(cfg.seed)
    auto_select_device()
    set_run_dir(cfg.out_dir)

    loaders = create_loader()
    assert len(loaders) == 3

    model = create_model()
    assert isinstance(model, pl.LightningModule)

    optimizer, scheduler = model.configure_optimizers()
    assert isinstance(optimizer[0], torch.optim.Adam)
    assert isinstance(scheduler[0], torch.optim.lr_scheduler.CosineAnnealingLR)

    cfg.params = params_count(model)
    assert cfg.params == 23880

    keys = {"loss", "true", "pred_score", "step_end_time"}
    # test training step
    batch = next(iter(loaders[0]))
    batch.to(model.device)
    outputs = model.training_step(batch)
    assert keys == set(outputs.keys())
    assert isinstance(outputs["loss"], torch.Tensor)

    # test validation step
    batch = next(iter(loaders[1]))
    batch.to(model.device)
    outputs = model.validation_step(batch)
    assert keys == set(outputs.keys())
    assert isinstance(outputs["loss"], torch.Tensor)

    # test test step
    batch = next(iter(loaders[2]))
    batch.to(model.device)
    outputs = model.test_step(batch)
    assert keys == set(outputs.keys())
    assert isinstance(outputs["loss"], torch.Tensor)

    shutil.rmtree(cfg.out_dir)
Exemplo n.º 7
0
def test_run_single_graphgym():
    Args = namedtuple('Args', ['cfg_file', 'opts'])
    root = osp.join(osp.dirname(osp.realpath(__file__)))
    args = Args(osp.join(root, 'example_node.yml'), [])

    load_cfg(cfg, args)
    cfg.out_dir = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    cfg.run_dir = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    cfg.dataset.dir = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    dump_cfg(cfg)
    set_printing()

    seed_everything(cfg.seed)
    auto_select_device()
    set_run_dir(cfg.out_dir, args.cfg_file)

    loaders = create_loader()
    assert len(loaders) == 3

    loggers = create_logger()
    assert len(loggers) == 3

    model = create_model()
    assert isinstance(model, torch.nn.Module)
    assert isinstance(model.encoder, FeatureEncoder)
    assert isinstance(model.mp, GNNStackStage)
    assert isinstance(model.post_mp, GNNNodeHead)
    assert len(list(model.pre_mp.children())) == cfg.gnn.layers_pre_mp

    optimizer_config = OptimizerConfig(optimizer=cfg.optim.optimizer,
                                       base_lr=cfg.optim.base_lr,
                                       weight_decay=cfg.optim.weight_decay,
                                       momentum=cfg.optim.momentum)
    optimizer = create_optimizer(model.parameters(), optimizer_config)
    assert isinstance(optimizer, torch.optim.Adam)

    scheduler_config = SchedulerConfig(scheduler=cfg.optim.scheduler,
                                       steps=cfg.optim.steps,
                                       lr_decay=cfg.optim.lr_decay,
                                       max_epoch=cfg.optim.max_epoch)
    scheduler = create_scheduler(optimizer, scheduler_config)
    assert isinstance(scheduler, torch.optim.lr_scheduler.CosineAnnealingLR)

    cfg.params = params_count(model)
    assert cfg.params == 23880

    train(loggers, loaders, model, optimizer, scheduler)

    agg_runs(set_agg_dir(cfg.out_dir, args.cfg_file), cfg.metric_best)

    shutil.rmtree(cfg.out_dir)
    shutil.rmtree(cfg.dataset.dir)
Exemplo n.º 8
0
def main():
    seed_everything(42)
    datamodule = RedditDataModule('data/Reddit')
    model = GraphSAGE(datamodule.num_features, datamodule.num_classes)
    checkpoint_callback = ModelCheckpoint(monitor='val_acc', save_top_k=1)
    trainer = Trainer(gpus=1, max_epochs=10, callbacks=[checkpoint_callback])

    # Uncomment to train on multiple GPUs:
    # trainer = Trainer(gpus=2, accelerator='ddp', max_epochs=10,
    #                   callbacks=[checkpoint_callback])

    trainer.fit(model, datamodule=datamodule)
    trainer.test()
Exemplo n.º 9
0
def main():
    seed_everything(42)

    dataset = Reddit(osp.join('data', 'Reddit'))
    data = dataset[0]

    datamodule = LightningNodeData(data, data.train_mask, data.val_mask,
                                   data.test_mask, loader='neighbor',
                                   num_neighbors=[25, 10], batch_size=1024,
                                   num_workers=8)

    model = Model(dataset.num_node_features, dataset.num_classes)

    devices = torch.cuda.device_count()
    strategy = pl.strategies.DDPSpawnStrategy(find_unused_parameters=False)
    checkpoint = pl.callbacks.ModelCheckpoint(monitor='val_acc', save_top_k=1)
    trainer = pl.Trainer(strategy=strategy, accelerator='gpu', devices=devices,
                         max_epochs=20, callbacks=[checkpoint])

    trainer.fit(model, datamodule)
    trainer.test(ckpt_path='best', datamodule=datamodule)
Exemplo n.º 10
0
def main():
    seed_everything(42)

    root = osp.join('data', 'TUDataset')
    dataset = TUDataset(root, 'IMDB-BINARY', pre_transform=T.OneHotDegree(135))

    dataset = dataset.shuffle()
    test_dataset = dataset[:len(dataset) // 10]
    val_dataset = dataset[len(dataset) // 10:2 * len(dataset) // 10]
    train_dataset = dataset[2 * len(dataset) // 10:]

    train_loader = DataLoader(train_dataset,
                              batch_size=64,
                              shuffle=True,
                              pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=64, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=64, pin_memory=True)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = Model(dataset.num_node_features, dataset.num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    metrics = {'acc': ignite.metrics.Accuracy()}

    def prepare_batch_fn(batch, device, non_blocking):
        return (batch.to(device, non_blocking=non_blocking),
                batch.y.to(device, non_blocking=non_blocking))

    trainer = ignite.engine.create_supervised_trainer(
        model=model,
        optimizer=optimizer,
        loss_fn=F.cross_entropy,
        device=device,
        prepare_batch=prepare_batch_fn,
        output_transform=lambda x, y, y_pred, loss: loss.item(),
        amp_mode='amp',
    )

    # Progress bar for each epoch:
    pbar = ignite.contrib.handlers.tqdm_logger.ProgressBar()
    pbar.attach(trainer, output_transform=lambda x: {'loss': x})

    def log_metrics(evaluator, loader, tag):
        def logger(trainer):
            evaluator.run(loader)
            print(f'{tag:10} Epoch: {trainer.state.epoch:02d}, '
                  f'Acc: {evaluator.state.metrics["acc"]:.4f}')

        return logger

    train_evaluator = ignite.engine.create_supervised_evaluator(
        model=model,
        metrics=metrics,
        device=device,
        prepare_batch=prepare_batch_fn,
        output_transform=lambda x, y, y_pred: (y_pred, y),
        amp_mode='amp',
    )
    trainer.on(ignite.engine.Events.EPOCH_COMPLETED(every=1))(log_metrics(
        train_evaluator, train_loader, 'Training'))

    val_evaluator = ignite.engine.create_supervised_evaluator(
        model=model,
        metrics=metrics,
        device=device,
        prepare_batch=prepare_batch_fn,
        output_transform=lambda x, y, y_pred: (y_pred, y),
        amp_mode='amp',
    )
    trainer.on(ignite.engine.Events.EPOCH_COMPLETED(every=1))(log_metrics(
        val_evaluator, val_loader, 'Validation'))

    test_evaluator = ignite.engine.create_supervised_evaluator(
        model=model,
        metrics=metrics,
        device=device,
        prepare_batch=prepare_batch_fn,
        output_transform=lambda x, y, y_pred: (y_pred, y),
        amp_mode='amp',
    )
    trainer.on(ignite.engine.Events.EPOCH_COMPLETED(every=1))(log_metrics(
        test_evaluator, test_loader, 'Test'))

    # Save checkpoint of the model based on Accuracy on the validation set:
    checkpoint_handler = ignite.handlers.Checkpoint(
        {'model': model},
        'runs/gin',
        n_saved=2,
        score_name=list(metrics.keys())[0],
        filename_pattern='best-{global_step}-{score_name}-{score}.pt',
        global_step_transform=ignite.handlers.global_step_from_engine(trainer),
    )
    val_evaluator.add_event_handler(ignite.engine.Events.EPOCH_COMPLETED,
                                    checkpoint_handler)

    # Create a tensorboard logger to write logs:
    tb_logger = ignite.contrib.handlers.tensorboard_logger.TensorboardLogger(
        log_dir=osp.join('runs/example', 'tb_logs'))

    tb_logger.attach_output_handler(
        trainer,
        event_name=ignite.engine.Events.ITERATION_COMPLETED,
        tag='training',
        output_transform=lambda loss: {'loss_iteration': loss})
    tb_logger.attach_output_handler(
        trainer,
        event_name=ignite.engine.Events.EPOCH_COMPLETED,
        tag='training',
        output_transform=lambda loss: {'loss_epoch': loss})
    tb_logger.attach_output_handler(
        train_evaluator,
        event_name=ignite.engine.Events.EPOCH_COMPLETED,
        tag='training',
        metric_names='all',
        global_step_transform=ignite.handlers.global_step_from_engine(trainer),
    )
    tb_logger.attach_output_handler(
        val_evaluator,
        event_name=ignite.engine.Events.EPOCH_COMPLETED,
        tag='validation',
        metric_names='all',
        global_step_transform=ignite.handlers.global_step_from_engine(trainer),
    )
    tb_logger.attach_output_handler(
        test_evaluator,
        event_name=ignite.engine.Events.EPOCH_COMPLETED,
        tag='test',
        metric_names='all',
        global_step_transform=ignite.handlers.global_step_from_engine(trainer),
    )
    tb_logger.close()

    trainer.run(train_loader, max_epochs=50)
Exemplo n.º 11
0
def test_run_single_graphgym(skip_train_eval, use_trivial_metric):
    Args = namedtuple('Args', ['cfg_file', 'opts'])
    root = osp.join(osp.dirname(osp.realpath(__file__)))
    args = Args(osp.join(root, 'example_node.yml'), [])

    load_cfg(cfg, args)
    cfg.out_dir = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    cfg.run_dir = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    cfg.dataset.dir = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    dump_cfg(cfg)
    set_printing()

    seed_everything(cfg.seed)
    auto_select_device()
    set_run_dir(cfg.out_dir, args.cfg_file)

    cfg.train.skip_train_eval = skip_train_eval
    cfg.train.enable_ckpt = use_trivial_metric and skip_train_eval
    if use_trivial_metric:
        if 'trivial' not in register.metric_dict:
            register.register_metric('trivial', trivial_metric)
        global num_trivial_metric_calls
        num_trivial_metric_calls = 0
        cfg.metric_best = 'trivial'
        cfg.custom_metrics = ['trivial']
    else:
        cfg.metric_best = 'auto'
        cfg.custom_metrics = []

    loaders = create_loader()
    assert len(loaders) == 3

    loggers = create_logger()
    assert len(loggers) == 3

    model = create_model()
    assert isinstance(model, torch.nn.Module)
    assert isinstance(model.encoder, FeatureEncoder)
    assert isinstance(model.mp, GNNStackStage)
    assert isinstance(model.post_mp, GNNNodeHead)
    assert len(list(model.pre_mp.children())) == cfg.gnn.layers_pre_mp

    optimizer = create_optimizer(model.parameters(), cfg.optim)
    assert isinstance(optimizer, torch.optim.Adam)

    scheduler = create_scheduler(optimizer, cfg.optim)
    assert isinstance(scheduler, torch.optim.lr_scheduler.CosineAnnealingLR)

    cfg.params = params_count(model)
    assert cfg.params == 23880

    train(loggers, loaders, model, optimizer, scheduler)

    if use_trivial_metric:
        # 6 total epochs, 4 eval epochs, 3 splits (1 training split)
        assert num_trivial_metric_calls == 12 if skip_train_eval else 14

    assert osp.isdir(get_ckpt_dir()) is cfg.train.enable_ckpt

    agg_runs(set_agg_dir(cfg.out_dir, args.cfg_file), cfg.metric_best)

    shutil.rmtree(cfg.out_dir)
    shutil.rmtree(cfg.dataset.dir)
Exemplo n.º 12
0
if __name__ == '__main__':
    # Load cmd line args
    args = parse_args()
    # Load config file
    load_cfg(cfg, args)
    set_out_dir(cfg.out_dir, args.cfg_file)
    # Set Pytorch environment
    torch.set_num_threads(cfg.num_threads)
    dump_cfg(cfg)
    # Repeat for different random seeds
    for i in range(args.repeat):
        set_run_dir(cfg.out_dir)
        set_printing()
        # Set configurations for each run
        cfg.seed = cfg.seed + 1
        seed_everything(cfg.seed)
        auto_select_device()
        # Set machine learning pipeline
        datamodule = GraphGymDataModule()
        model = create_model()
        # Print model info
        logging.info(model)
        logging.info(cfg)
        cfg.params = params_count(model)
        logging.info('Num parameters: %s', cfg.params)
        train(model, datamodule, logger=True)

    # Aggregate results from different seeds
    agg_runs(cfg.out_dir, cfg.metric_best)
    # When being launched in batch mode, mark a yaml as done
    if args.mark_done:
Exemplo n.º 13
0
                           max_epoch=cfg.optim.max_epoch)


if __name__ == '__main__':
    # Load cmd line args
    args = parse_args()
    # Load config file
    load_cfg(cfg, args)
    # Set Pytorch environment
    torch.set_num_threads(cfg.num_threads)
    dump_cfg(cfg)
    set_printing()
    # Repeat for different random seeds
    for i in range(args.repeat):
        # Set configurations for each run
        seed_everything(cfg.seed + i)
        auto_select_device()
        set_run_dir(cfg.out_dir, args.cfg_file)
        # Set machine learning pipeline
        loaders = create_loader()
        loggers = create_logger()
        model = create_model()
        optimizer = create_optimizer(model.parameters(),
                                     new_optimizer_config(cfg))
        scheduler = create_scheduler(optimizer, new_scheduler_config(cfg))
        # Print model info
        logging.info(model)
        logging.info(cfg)
        cfg.params = params_count(model)
        logging.info('Num parameters: {}'.format(cfg.params))
        # Start training
Exemplo n.º 14
0
import argparse
from itertools import product

import torch
from datasets import get_dataset
from gcn import GCN
from gin import GIN
from graph_sage import GraphSAGE
from train_eval import eval_acc, train

from torch_geometric import seed_everything
from torch_geometric.loader import DataLoader
from torch_geometric.profile import get_stats_summary, profileit, timeit

seed_everything(0)

parser = argparse.ArgumentParser()
parser.add_argument('--epochs', type=int, default=100)
parser.add_argument('--batch_size', type=int, default=128)
parser.add_argument('--lr', type=float, default=0.01)
parser.add_argument('--warmup_profile',
                    type=int,
                    default=1,
                    help='Skip the first few runs')
parser.add_argument('--goal_accuracy',
                    type=int,
                    default=1,
                    help='The goal test accuracy')
args = parser.parse_args()

layers = [1, 2, 3]