Пример #1
0
def test_concat_scheduler_asserts():

    tensor = torch.zeros([1], requires_grad=True)
    optimizer = torch.optim.SGD([tensor], lr=0)

    scheduler_1 = LinearCyclicalScheduler(optimizer, "lr", start_value=1.0, end_value=0.0, cycle_size=10)
    scheduler_2 = CosineAnnealingScheduler(optimizer, "lr", start_value=0.0, end_value=1.0, cycle_size=10)

    with pytest.raises(ValueError):
        ConcatScheduler(schedulers=[], durations=[])

    with pytest.raises(ValueError):
        ConcatScheduler(schedulers=[scheduler_1], durations=[10])

    with pytest.raises(TypeError):
        ConcatScheduler(schedulers=[scheduler_1, 12], durations=[10])

    with pytest.raises(ValueError):
        ConcatScheduler(schedulers=[scheduler_1, scheduler_2], durations=[10, 5])

    with pytest.raises(ValueError):
        ConcatScheduler(schedulers=[scheduler_1, scheduler_2, scheduler_2], durations=[15, 12.0])

    with pytest.raises(ValueError):
        ConcatScheduler(schedulers=[scheduler_1, scheduler_2], durations="abc")

    with pytest.raises(ValueError):
        ConcatScheduler.simulate_values(
            num_events=123, schedulers=[scheduler_1, scheduler_2], durations=[15], param_names="abc"
        )
Пример #2
0
def test_concat_scheduler_state_dict():
    tensor = torch.zeros([1], requires_grad=True)
    optimizer = torch.optim.SGD([tensor], lr=0)
    scheduler_1 = LinearCyclicalScheduler(optimizer, "lr", start_value=1.0, end_value=0.0, cycle_size=10)
    scheduler_2 = CosineAnnealingScheduler(optimizer, "lr", start_value=0.0, end_value=1.0, cycle_size=10)
    durations = [10]
    concat_scheduler = ConcatScheduler(schedulers=[scheduler_1, scheduler_2], durations=durations, save_history=False)
    state_dict = concat_scheduler.state_dict()

    assert state_dict["durations"] == durations
    assert state_dict["_current_duration"] == durations[0]
    assert state_dict["_scheduler_index"] == 0

    for _ in range(20):
        concat_scheduler(None, None)

    concat_scheduler.load_state_dict(state_dict)
    assert concat_scheduler.durations == durations
    assert concat_scheduler._current_duration == durations[0]
    assert id(concat_scheduler._current_scheduler) == id(scheduler_1)

    with pytest.raises(ValueError, match=r"Required state attribute 'schedulers' is absent in provided state_dict"):
        concat_scheduler.load_state_dict({"a": 1})

    with pytest.raises(ValueError, match=r"Input state_dict contains 0 state_dicts of concatenated schedulers"):
        concat_scheduler.load_state_dict({"schedulers": []})

    with pytest.raises(TypeError, match=r"Argument state_dict should be a dictionary, but given"):
        concat_scheduler.load_state_dict(None)
Пример #3
0
def test_concat_scheduler_asserts():

    tensor = torch.zeros([1], requires_grad=True)
    optimizer = torch.optim.SGD([tensor], lr=0)

    scheduler_1 = LinearCyclicalScheduler(optimizer, "lr", start_value=1.0, end_value=0.0, cycle_size=10)
    scheduler_2 = CosineAnnealingScheduler(optimizer, "lr", start_value=0.0, end_value=1.0, cycle_size=10)

    with pytest.raises(TypeError, match=r"Argument schedulers should be a sequence"):
        ConcatScheduler(schedulers=None, durations=[])

    with pytest.raises(ValueError, match=r"Argument schedulers should be of more than one parameter schedulers"):
        ConcatScheduler(schedulers=[], durations=[])

    with pytest.raises(ValueError, match=r"Argument schedulers should be of more than one parameter schedulers"):
        ConcatScheduler(schedulers=[scheduler_1], durations=[10])

    with pytest.raises(TypeError, match=r"Value at index 1 of schedulers should be a parameter scheduler"):
        ConcatScheduler(schedulers=[scheduler_1, 12], durations=[10])

    with pytest.raises(ValueError, match=r"Incorrect number schedulers or duration values"):
        ConcatScheduler(schedulers=[scheduler_1, scheduler_2], durations=[10, 5])

    with pytest.raises(ValueError, match=r"Argument durations should be list/tuple of integers"):
        ConcatScheduler(schedulers=[scheduler_1, scheduler_2, scheduler_2], durations=[15, 12.0])

    with pytest.raises(TypeError, match=r"Argument durations should be list/tuple"):
        ConcatScheduler(schedulers=[scheduler_1, scheduler_2], durations="abc")

    with pytest.raises(TypeError, match=r"Argument param_names should be list or tuple"):
        ConcatScheduler.simulate_values(
            num_events=123, schedulers=[scheduler_1, scheduler_2], durations=[15], param_names="abc"
        )

    with pytest.raises(ValueError, match=r"Argument param_names should be list or tuple of strings"):
        ConcatScheduler.simulate_values(
            num_events=123, schedulers=[scheduler_1, scheduler_2], durations=[15], param_names=[1]
        )

    optimizer_2 = torch.optim.SGD([tensor], lr=0)
    scheduler_3 = CosineAnnealingScheduler(optimizer_2, "lr", start_value=0.0, end_value=1.0, cycle_size=10)

    with pytest.raises(ValueError, match=r"schedulers should be related to same optimizer"):
        ConcatScheduler([scheduler_1, scheduler_3], durations=[30,])
Пример #4
0
def test_cosine_annealing_scheduler():
    tensor = torch.zeros([1], requires_grad=True)
    optimizer = torch.optim.SGD([tensor], lr=0)

    scheduler = CosineAnnealingScheduler(optimizer, "lr", 0, 1, 10)
    state_dict = scheduler.state_dict()

    data = [0] * 9
    max_epochs = 2
    simulated_values = CosineAnnealingScheduler.simulate_values(
        num_events=len(data) * max_epochs,
        param_name="lr",
        start_value=0,
        end_value=1,
        cycle_size=10,
    )

    def save_lr(engine):
        lrs.append(optimizer.param_groups[0]["lr"])

    trainer = Engine(lambda engine, batch: None)
    trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)
    trainer.add_event_handler(Events.ITERATION_COMPLETED, save_lr)

    for _ in range(2):
        lrs = []
        trainer.run(data, max_epochs=max_epochs)

        assert lrs == list(
            map(
                pytest.approx,
                [
                    0.0,
                    0.02447174185242318,
                    0.09549150281252627,
                    0.20610737385376332,
                    0.3454915028125263,
                    0.5,
                    0.6545084971874737,
                    0.7938926261462365,
                    0.9045084971874737,
                    0.9755282581475768,
                    0.0,
                    0.02447174185242318,
                    0.09549150281252627,
                    0.20610737385376332,
                    0.3454915028125263,
                    0.5,
                    0.6545084971874737,
                    0.7938926261462365,  # 0.9045084971874737, 0.9755282581475768
                ],
            ))
        scheduler.load_state_dict(state_dict)

        assert lrs == pytest.approx([v for i, v in simulated_values])
Пример #5
0
def test_scheduler_with_param_groups():
    def _test(lr_scheduler, optimizer):
        num_iterations = 10
        max_epochs = 20

        state_dict = lr_scheduler.state_dict()

        trainer = Engine(lambda engine, batch: None)

        @trainer.on(Events.ITERATION_COMPLETED)
        def save_lr():
            lrs.append((optimizer.param_groups[0]["lr"], optimizer.param_groups[1]["lr"]))

        trainer.add_event_handler(Events.ITERATION_STARTED, lr_scheduler)

        data = [0] * num_iterations

        for _ in range(2):
            lrs = []
            trainer.run(data, max_epochs=max_epochs)
            assert [lr[0] for lr in lrs] == pytest.approx([lr[1] for lr in lrs])
            lr_scheduler.load_state_dict(state_dict)

    t1 = torch.zeros([1], requires_grad=True)
    t2 = torch.zeros([1], requires_grad=True)
    optimizer = torch.optim.SGD([{"params": t1, "lr": 0.1}, {"params": t2, "lr": 0.1}])

    lr_scheduler = LinearCyclicalScheduler(optimizer, "lr", start_value=1.0, end_value=0.0, cycle_size=10)
    _test(lr_scheduler, optimizer)

    lr_scheduler = PiecewiseLinear(
        optimizer, "lr", milestones_values=[(5, 0.5), (15, 1.0), (25, 0.0), (35, 1.0), (40, 0.5)]
    )
    _test(lr_scheduler, optimizer)

    lr_scheduler = CosineAnnealingScheduler(optimizer, "lr", start_value=0.0, end_value=1.0, cycle_size=10)
    _test(lr_scheduler, optimizer)

    torch_lr_scheduler = ExponentialLR(optimizer, gamma=0.98)
    _test(LRScheduler(torch_lr_scheduler), optimizer)

    torch_lr_scheduler = StepLR(optimizer, step_size=50, gamma=0.5)
    _test(LRScheduler(torch_lr_scheduler), optimizer)
Пример #6
0
    def _test(duration_vals_as_np_int):
        scheduler_1 = LinearCyclicalScheduler(optimizer, "lr", start_value=1.0, end_value=0.0, cycle_size=10)
        scheduler_2 = CosineAnnealingScheduler(optimizer, "lr", start_value=0.0, end_value=1.0, cycle_size=10)

        durations = [10, ]
        if duration_vals_as_np_int:
            durations = [np.int64(t) for t in durations]

        concat_scheduler = ConcatScheduler(schedulers=[scheduler_1, scheduler_2],
                                           durations=durations, save_history=True)

        data = [0] * 10
        max_epochs = 2
        simulated_values = ConcatScheduler.simulate_values(num_events=len(data) * max_epochs,
                                                           schedulers=[scheduler_1, scheduler_2],
                                                           durations=durations)

        lrs = []

        def save_lr(engine):
            lrs.append(optimizer.param_groups[0]['lr'])

        trainer = Engine(lambda engine, batch: None)
        trainer.add_event_handler(Events.ITERATION_STARTED, concat_scheduler)
        trainer.add_event_handler(Events.ITERATION_COMPLETED, save_lr)
        trainer.run(data, max_epochs=max_epochs)

        assert lrs == list(map(pytest.approx, [
            # Cycle 1 of the LinearCyclicalScheduler
            1.0, 0.8, 0.6, 0.4, 0.2,
            0.0, 0.2, 0.4, 0.6, 0.8,
            # Cycle 1 of the CosineAnnealingScheduler
            0.0, 0.02447174185242318, 0.09549150281252627, 0.20610737385376332, 0.3454915028125263,
            0.5, 0.6545084971874737, 0.7938926261462365, 0.9045084971874737, 0.9755282581475768,
        ]))

        state_lrs = trainer.state.param_history['lr']
        assert len(state_lrs) == len(lrs)
        # Unpack singleton lists
        assert [group[0] for group in state_lrs] == lrs

        assert lrs == pytest.approx([v for i, v in simulated_values])
Пример #7
0
def test_cosine_annealing_scheduler():
    tensor = torch.zeros([1], requires_grad=True)
    optimizer = torch.optim.SGD([tensor], lr=0)

    scheduler = CosineAnnealingScheduler(optimizer, 'lr', 0, 1, 10)
    lrs = []

    def save_lr(engine):
        lrs.append(optimizer.param_groups[0]['lr'])

    trainer = Engine(lambda engine, batch: None)
    trainer.add_event_handler(Events.ITERATION_COMPLETED, scheduler)
    trainer.add_event_handler(Events.ITERATION_COMPLETED, save_lr)
    trainer.run([0] * 10, max_epochs=2)

    assert lrs == list(map(pytest.approx, [
        0.0, 0.02447174185242318, 0.09549150281252627, 0.20610737385376332, 0.3454915028125263,
        0.5, 0.6545084971874737, 0.7938926261462365, 0.9045084971874737, 0.9755282581475768,
        0.0, 0.02447174185242318, 0.09549150281252627, 0.20610737385376332, 0.3454915028125263,
        0.5, 0.6545084971874737, 0.7938926261462365, 0.9045084971874737, 0.9755282581475768
    ]))
Пример #8
0
print(model)
model.to(device)

# multi-gpus
if torch.cuda.device_count():
    print('==================== Use {} GPUs ===================='.format(torch.cuda.device_count()))
    model = nn.DataParallel(model)

# loss function
loss_fn = nn.CrossEntropyLoss()

# optimizer
optimizer = optim.SGD(model.parameters(), lr=init_lr, momentum=0.9, weight_decay=5e-4)

# scheduler
scheduler = CosineAnnealingScheduler(optimizer, 'lr', init_lr, end_lr, 4*len(trainloader), cycle_mult=1.5, start_value_mult=0.1)
scheduler = create_lr_scheduler_with_warmup(scheduler, warmup_start_value=0., warmup_end_value=init_lr, warmup_duration=len(trainloader))

# create trainer
trainer = create_trainer(model, optimizer, loss_fn, device=device)
trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)

# add timer for each iteration
timer = Timer(average=False)

# logging training loss
def log_loss(engine):
    i = engine.state.iteration
    e = engine.state.epoch

    if i % 100 == 0:
Пример #9
0
    checkpointer = ModelCheckpoint(
        CHECKPOINTS_RUN_DIR_PATH,
        filename_prefix=RUN_NAME.lower(),
        n_saved=None,
        score_function=lambda engine: round(engine.state.metrics['WRA'], 3),
        score_name='WRA',
        atomic=True,
        require_empty=True,
        create_dir=True,
        archived=False,
        global_step_transform=global_step_from_engine(trainer))
    nan_handler = TerminateOnNan()
    coslr = CosineAnnealingScheduler(opt,
                                     "lr",
                                     start_value=LR,
                                     end_value=LR / 4,
                                     cycle_size=TOTAL_UPDATE_STEPS // 1)

    evaluator.add_event_handler(Events.EPOCH_COMPLETED, checkpointer,
                                {'_': mude})

    trainer.add_event_handler(Events.ITERATION_COMPLETED, nan_handler)
    trainer.add_event_handler(Events.ITERATION_COMPLETED, coslr)

    GpuInfo().attach(trainer, name='gpu')
    pbar.attach(trainer,
                output_transform=lambda output: {'loss': output['loss']},
                metric_names=[f"gpu:{args.gpu} mem(%)"])

    # FIRE
Пример #10
0
def test_simulate_values():
    def _test(scheduler_cls, **scheduler_kwargs):

        optimizer = None
        if scheduler_cls == LRScheduler:
            scheduler_kwargs['optimizer'] = scheduler_kwargs[
                'lr_scheduler'].optimizer
            optimizer = scheduler_kwargs['optimizer']
        elif scheduler_cls == ConcatScheduler:
            optimizer = scheduler_kwargs['optimizer']
            del scheduler_kwargs['optimizer']
        else:
            tensor = torch.zeros([1], requires_grad=True)
            scheduler_kwargs['optimizer'] = torch.optim.SGD([tensor], lr=0.1)
            optimizer = scheduler_kwargs['optimizer']

        max_epochs = 2
        data = [0] * 10
        simulated_values = scheduler_cls.simulate_values(num_events=len(data) *
                                                         max_epochs,
                                                         **scheduler_kwargs)

        scheduler = scheduler_cls(**scheduler_kwargs)

        lrs = []

        def save_lr(engine):
            lrs.append(optimizer.param_groups[0]['lr'])

        trainer = Engine(lambda engine, batch: None)
        trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)
        trainer.add_event_handler(Events.ITERATION_COMPLETED, save_lr)
        trainer.run(data, max_epochs=max_epochs)

        assert lrs == pytest.approx([v for i, v in simulated_values])

        if scheduler_cls == LRScheduler or scheduler_cls == ConcatScheduler:
            # As internal state of torch lr scheduler has been changed the following checks will fail
            return

        # reexecute to check if no internal changes
        simulated_values = scheduler_cls.simulate_values(
            num_events=len(data) * max_epochs,
            save_history=True,  # this will be removed
            **scheduler_kwargs)
        assert lrs == pytest.approx([v for i, v in simulated_values])

    # LinearCyclicalScheduler
    _test(LinearCyclicalScheduler,
          param_name="lr",
          start_value=1.0,
          end_value=0.0,
          cycle_size=10)

    # CosineAnnealingScheduler
    _test(CosineAnnealingScheduler,
          param_name="lr",
          start_value=1.0,
          end_value=0.0,
          cycle_size=10)

    # LRScheduler
    tensor = torch.zeros([1], requires_grad=True)
    optimizer = torch.optim.SGD([tensor], lr=0.1)
    torch_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(
        optimizer=optimizer, gamma=0.5)

    _test(LRScheduler, lr_scheduler=torch_lr_scheduler)

    # ConcatScheduler = [LinearCyclicalScheduler, CosineAnnealingScheduler]
    scheduler_1 = LinearCyclicalScheduler(optimizer,
                                          "lr",
                                          start_value=1.0,
                                          end_value=0.0,
                                          cycle_size=20)
    scheduler_2 = CosineAnnealingScheduler(optimizer,
                                           "lr",
                                           start_value=0.0,
                                           end_value=1.0,
                                           cycle_size=10)
    durations = [
        10,
    ]
    _test(ConcatScheduler,
          optimizer=optimizer,
          schedulers=[scheduler_1, scheduler_2],
          durations=durations)

    # ConcatScheduler = [LinearCyclicalScheduler, LRScheduler]
    tensor = torch.ones([1], requires_grad=True)
    optimizer = torch.optim.SGD([tensor], lr=0.001)
    torch_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(
        optimizer=optimizer, gamma=1.5)
    scheduler_1 = LRScheduler(torch_lr_scheduler)
    scheduler_2 = LinearCyclicalScheduler(optimizer,
                                          "lr",
                                          start_value=0.1,
                                          end_value=0.0,
                                          cycle_size=10)
    durations = [
        10,
    ]
    _test(ConcatScheduler,
          optimizer=optimizer,
          schedulers=[scheduler_1, scheduler_2],
          durations=durations)
Пример #11
0
def train(cfg):
    print(cfg.pretty())

    ###################################################################
    # Dataset
    ###################################################################
    wt = Dataset(batch_size=cfg.train.batch_size,
                 bptt_len=cfg.train.bptt_len,
                 dataset_cls=hydra.utils.get_class(cfg.dataset.name))

    ###################################################################
    # Models
    ###################################################################
    base_embedding = hydra.utils.instantiate(cfg.embedding,
                                             ntokens=len(wt.text_field.vocab) +
                                             3)
    embedding = TransformerEmbedding(
        embedding=base_embedding,
        max_length=cfg.train.bptt_len,
        embedding_size=base_embedding.embedding_size,
        use_positional_embedding=False)
    encoder = TransformerEncoder(query_dim=cfg.encoder.query_dim,
                                 att_num_units=cfg.encoder.att_num_units,
                                 ffn_num_unit=cfg.encoder.ffn_num_unit,
                                 max_ext=cfg.encoder.max_ext)
    model = TransformerLanguageModel(embedding, encoder)
    model.init_weight()

    # wandb.watch(model)

    ###################################################################
    # Loss
    ###################################################################
    criterion = lm_criterion(in_features=cfg.encoder.att_num_units[-1],
                             vocab_size=len(wt.text_field.vocab))

    ###################################################################
    # Parameters + Train ops
    ###################################################################
    parameters = (list(model.parameters()) + list(criterion.parameters()))
    tot_params = 0
    for p in parameters:
        tot_params += reduce(lambda x, y: x * y, p.size())
    print("Total Parameters: ", tot_params)
    opt = optim.Adam(parameters, lr=cfg.train.lr)
    model.to(DEVICE)
    criterion.to(DEVICE)

    ###################################################################
    # Train + Evaluation
    ###################################################################
    def train_step(engine, batch):
        model.train()
        opt.zero_grad()

        text = batch.text.to(DEVICE).t().contiguous()
        target = batch.target.to(DEVICE).t().contiguous()

        out, out_past = model(text, engine.state.train_past)
        engine.state.train_past = out_past
        raw_loss = criterion(out.view(-1, out.size(2)), target.view(-1))
        loss = raw_loss[1]

        loss.backward()
        nn.utils.clip_grad_norm_(parameters, cfg.train.clip_grad)
        opt.step()

        return {"train_loss": loss.item(), "train_ppl": loss.exp().item()}

    def eval_step(engine, batch):
        model.eval()

        if not hasattr(engine.state, "eval_past"):
            engine.state.eval_past = None

        target_sample = []
        result_sample = []
        with torch.no_grad():
            text = batch.text.to(DEVICE).t().contiguous()
            target = batch.target.to(DEVICE).t().contiguous()

            out, out_past = model(text, engine.state.eval_past)

            vocab = wt.text_field.vocab
            idx = list(range(32))
            sample = random.choices(idx, k=5)
            for id_sample in sample:
                s = []
                for target_id in target[id_sample]:
                    s.append(vocab.itos[target_id])
                target_sample.append(" ".join(s))

                s = []
                for result_id in out.max(-1)[1][id_sample]:
                    s.append(vocab.itos[result_id])
                result_sample.append(" ".join(s))
            # engine.state.eval_past = out_past
            raw_loss = criterion(out.view(-1, out.size(2)), target.view(-1))
            loss = raw_loss[1]

            return {
                "val_loss": loss.item(),
                "sample": (target_sample, result_sample)
            }

    train_engine = Engine(train_step)
    eval_engine = Engine(eval_step)

    def reset_state(engine):
        engine.state.train_past = None

    def run_eval(_):
        print("start running eval")
        eval_engine.run(wt.valid_iter)
        metrics = eval_engine.state.metrics
        print("Validation loss: ", metrics["val_loss"], ", ppl: ",
              np.exp(metrics["val_loss"]))

    train_engine.add_event_handler(Events.EPOCH_STARTED, reset_state)
    train_engine.add_event_handler(Events.EPOCH_COMPLETED, run_eval)

    ###################################################################
    # LR Scheduler
    ###################################################################
    cosine_scheduler = CosineAnnealingScheduler(opt.param_groups[0],
                                                "lr",
                                                0.0,
                                                2.5e-4,
                                                cycle_size=len(wt.train_iter))
    warmup_scheduler = create_lr_scheduler_with_warmup(cosine_scheduler, 0.0,
                                                       2.5e-4, 200)
    train_engine.add_event_handler(Events.ITERATION_STARTED, warmup_scheduler)

    ###################################################################
    # Metrics
    ###################################################################
    RunningAverage(output_transform=lambda x: x["train_ppl"]).attach(
        train_engine, "train_ppl")
    RunningAverage(output_transform=lambda x: x["train_loss"]).attach(
        train_engine, "train_loss")
    RunningAverage(output_transform=lambda x: x["val_loss"]).attach(
        eval_engine, "val_loss")
    progress_bar = ProgressBar(persist=True)
    progress_bar.attach(train_engine, ["train_ppl", "train_loss"])
    progress_bar_val = ProgressBar(persist=True)
    progress_bar_val.attach(eval_engine, ["val_loss"])

    ###################################################################
    # Tensorboard
    ###################################################################
    # tb_logger = TensorboardLogger(log_dir=log_dir)
    tb_logger = WandbLogger(project="language_model", entity="akurniawan")
    tb_logger.watch(model)

    def stepn_logger(num_steps, handler):
        def logger_runner(engine, log_handler, event_name):
            if engine.state.iteration % num_steps == 0:
                handler(engine, log_handler, event_name)

        return logger_runner

    tb_logger.attach(train_engine,
                     log_handler=stepn_logger(
                         cfg.train.log_steps,
                         OutputHandler(tag="training",
                                       output_transform=lambda loss: loss)),
                     event_name=Events.ITERATION_COMPLETED)
    tb_logger.attach(eval_engine,
                     log_handler=OutputHandler(
                         tag="validation",
                         output_transform=lambda loss: loss,
                         another_engine=train_engine),
                     event_name=Events.EPOCH_COMPLETED)
    # tb_logger.attach(train_engine,
    #                  log_handler=stepn_logger(log_steps,
    #                                           OptimizerParamsHandler(opt)),
    #                  event_name=Events.ITERATION_STARTED)
    # tb_logger.attach(train_engine,
    #                  log_handler=stepn_logger(log_steps,
    #                                           WeightsScalarHandler(model)),
    #                  event_name=Events.ITERATION_COMPLETED)
    # tb_logger.attach(train_engine,
    #                  log_handler=stepn_logger(log_steps,
    #                                           GradsScalarHandler(model)),
    #                  event_name=Events.ITERATION_COMPLETED)
    # tb_logger.attach(train_engine,
    #                  log_handler=stepn_logger(500, WeightsHistHandler(model)),
    #                  event_name=Events.ITERATION_COMPLETED)
    # tb_logger.attach(train_engine,
    #                  log_handler=stepn_logger(500, GradsHistHandler(model)),
    #                  event_name=Events.ITERATION_COMPLETED)

    try:
        train_engine.run(wt.train_iter, max_epochs=cfg.train.epochs)
    except Exception:
        pass
    finally:
        tb_logger.close()
Пример #12
0
def train(epochs=500,
          batch_size=32,
          bptt_len=70,
          lr=0.00025,
          log_steps=200,
          clip_grad=0.25,
          log_dir="experiments"):
    ###################################################################
    # Dataset
    ###################################################################
    wt = wikitext103(batch_size=batch_size, bptt_len=bptt_len)
    # wt = wikitext2(batch_size=batch_size, bptt_len=bptt_len)

    ###################################################################
    # Configs
    ###################################################################
    embedding_config = DropEmbedding.Hyperparams(len(wt.text_field.vocab) + 3,
                                                 ninp=512)
    encoder_config = TransformerEncoder.Hyperparams(
        att_num_units=[512, 512, 512, 512, 512, 512], max_ext=384)

    ###################################################################
    # Models
    ###################################################################
    base_embedding = DropEmbedding(embedding_config)
    embedding = TransformerEmbedding(embedding=base_embedding,
                                     max_length=bptt_len,
                                     embedding_size=embedding_config.ninp,
                                     use_positional_embedding=False)
    encoder = TransformerEncoder(encoder_config)
    model = TransformerLanguageModel(embedding, encoder)
    model.init_weight()

    ###################################################################
    # Loss
    ###################################################################
    criterion = lm_criterion(in_features=encoder_config.att_num_units[-1],
                             vocab_size=len(wt.text_field.vocab))

    ###################################################################
    # Parameters + Train ops
    ###################################################################
    parameters = (list(model.parameters()) + list(criterion.parameters()))
    tot_params = 0
    for p in parameters:
        tot_params += reduce(lambda x, y: x * y, p.size())
    print("Total Parameters: ", tot_params)
    opt = optim.Adam(parameters, lr=lr)
    model.to(DEVICE)
    criterion.to(DEVICE)

    ###################################################################
    # Train + Evaluation
    ###################################################################
    def train_step(engine, batch):
        model.train()
        opt.zero_grad()

        text = batch.text.to(DEVICE).t().contiguous()
        target = batch.target.to(DEVICE).t().contiguous()

        out, out_past = model(text, engine.state.train_past)
        engine.state.train_past = out_past
        raw_loss = criterion(out.view(-1, out.size(2)), target.view(-1))
        loss = raw_loss[1]

        loss.backward()
        nn.utils.clip_grad_norm_(parameters, clip_grad)
        opt.step()

        return {"train_loss": loss.item(), "train_ppl": loss.exp().item()}

    def eval_step(engine, batch):
        model.eval()

        if not hasattr(engine.state, "eval_past"):
            engine.state.eval_past = None

        with torch.no_grad():
            text = batch.text.to(DEVICE).t().contiguous()
            target = batch.target.to(DEVICE).t().contiguous()

            out, out_past = model(text, engine.state.eval_past)
            engine.state.eval_past = out_past
            raw_loss = criterion(out.view(-1, out.size(2)), target.view(-1))
            loss = raw_loss[1]

            return {"val_loss": loss.item()}

    train_engine = Engine(train_step)
    eval_engine = Engine(eval_step)

    def reset_state(engine):
        engine.state.train_past = None

    def run_eval(_):
        print("start running eval")
        eval_engine.run(wt.valid_iter)
        metrics = eval_engine.state.metrics
        print("Validation loss: ", metrics["val_loss"], ", ppl: ",
              np.exp(metrics["val_loss"]))

    train_engine.add_event_handler(Events.EPOCH_STARTED, reset_state)
    train_engine.add_event_handler(Events.EPOCH_COMPLETED, run_eval)

    ###################################################################
    # LR Scheduler
    ###################################################################
    cosine_scheduler = CosineAnnealingScheduler(opt.param_groups[0],
                                                "lr",
                                                0.0,
                                                2.5e-4,
                                                cycle_size=len(wt.train_iter))
    warmup_scheduler = create_lr_scheduler_with_warmup(cosine_scheduler, 0.0,
                                                       2.5e-4, 200)
    train_engine.add_event_handler(Events.ITERATION_STARTED, warmup_scheduler)

    ###################################################################
    # Metrics
    ###################################################################
    RunningAverage(output_transform=lambda x: x["train_ppl"]).attach(
        train_engine, "train_ppl")
    RunningAverage(output_transform=lambda x: x["train_loss"]).attach(
        train_engine, "train_loss")
    RunningAverage(output_transform=lambda x: x["val_loss"]).attach(
        eval_engine, "val_loss")
    progress_bar = ProgressBar(persist=True)
    progress_bar.attach(train_engine, ["train_ppl", "train_loss"])
    progress_bar_val = ProgressBar(persist=True)
    progress_bar_val.attach(eval_engine, ["val_loss"])

    ###################################################################
    # Tensorboard
    ###################################################################
    tb_logger = TensorboardLogger(log_dir=log_dir)

    def stepn_logger(num_steps, handler):
        def logger_runner(engine, log_handler, event_name):
            if engine.state.iteration % num_steps == 0:
                handler(engine, log_handler, event_name)

        return logger_runner

    tb_logger.attach(train_engine,
                     log_handler=stepn_logger(
                         log_steps,
                         OutputHandler(tag="training",
                                       output_transform=lambda loss: loss)),
                     event_name=Events.ITERATION_COMPLETED)
    tb_logger.attach(eval_engine,
                     log_handler=OutputHandler(
                         tag="validation",
                         output_transform=lambda loss: loss,
                         another_engine=train_engine),
                     event_name=Events.EPOCH_COMPLETED)
    tb_logger.attach(train_engine,
                     log_handler=stepn_logger(log_steps,
                                              OptimizerParamsHandler(opt)),
                     event_name=Events.ITERATION_STARTED)
    tb_logger.attach(train_engine,
                     log_handler=stepn_logger(log_steps,
                                              WeightsScalarHandler(model)),
                     event_name=Events.ITERATION_COMPLETED)
    tb_logger.attach(train_engine,
                     log_handler=stepn_logger(log_steps,
                                              GradsScalarHandler(model)),
                     event_name=Events.ITERATION_COMPLETED)
    tb_logger.attach(train_engine,
                     log_handler=stepn_logger(500, WeightsHistHandler(model)),
                     event_name=Events.ITERATION_COMPLETED)
    tb_logger.attach(train_engine,
                     log_handler=stepn_logger(500, GradsHistHandler(model)),
                     event_name=Events.ITERATION_COMPLETED)

    try:
        train_engine.run(wt.train_iter, max_epochs=epochs)
    except Exception:
        pass
    finally:
        tb_logger.close()
Пример #13
0
def run(config):

    if config['model'] == 'AttentionWideResNet':
        train_loader, val_loader = get_data_loaders(config['batch_size'])
        model = AttentionWideResNet(28, 100, 10, (32, 32), 0.0)
    elif config['model'] == 'AttentionRetinaNet':
        train_loader, val_loader = get_COCO_loaders(config['batch_size'])
        model = AttentionRetinaNet(num_classes=80, input_size=(5,3))
    writer = create_summary_writer(model, train_loader, config["tb_logdir"])
    model.cuda()
    
    log_interval = config['log_interval']
    epochs = config['epochs']
    model = nn.DataParallel(model)

    optimizer = optim.SGD(model.parameters(), lr=config['lr'], momentum=config['momentum'])
    scheduler = CosineAnnealingScheduler(optimizer, 'lr', 0.1, 0.001, len(train_loader))
    
    loss_fn = nn.CrossEntropyLoss().cuda()
    
    trainer = create_supervised_trainer(model, optimizer, loss_fn, device='cuda')
    trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)
    trainer_saver = ModelCheckpoint(
        config['checkpoint_dir'],
        filename_prefix="model_ckpt",
        save_interval=1000,
        n_saved=10,
        atomic=True,
        save_as_state_dict=True,
        create_dir=True
    )
    trainer.add_event_handler(Events.ITERATION_COMPLETED,
                              trainer_saver,
                              {
                                  "model": model,
                              })
    evaluator = create_supervised_evaluator(model,
                                            metrics={"accuracy": Accuracy(),
                                                     'CE': Loss(loss_fn)},
                                            device="cuda")

    desc = "ITERATION - loss: {:.2f}"
    pbar = tqdm(
        initial=0, leave=False, total=len(train_loader),
        desc=desc.format(0)
    )

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_training_loss(engine):
        iter = (engine.state.iteration - 1) % len(train_loader) + 1

        if iter % log_interval == 0:
            pbar.desc = desc.format(engine.state.output)
            pbar.update(log_interval)

        writer.add_scalar("training/loss", engine.state.output, engine.state.iteration)

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(engine):
        pbar.refresh()
        evaluator.run(train_loader)
        metrics = evaluator.state.metrics
        avg_accuracy = metrics['accuracy']
        avg_CE = metrics['CE']
        tqdm.write(
            "Training Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}".format(engine.state.epoch,
                                                                                        avg_accuracy,
                                                                                        avg_CE)
        )
        writer.add_scalar("training/avg_loss", avg_CE, engine.state.epoch)
        writer.add_scalar("training/avg_accuracy", avg_accuracy, engine.state.epoch)

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):
        evaluator.run(val_loader)
        metrics = evaluator.state.metrics
        avg_accuracy = metrics['accuracy']
        avg_CE = metrics['CE']
        tqdm.write(
            "Validation Results - Epoch: {} Avg accuracy {:.2f} Avg loss: {:.2f}".format(engine.state.epoch,
                                                                                         avg_accuracy,
                                                                                         avg_CE)
        )
        pbar.n = pbar.last_print_n = 0

        writer.add_scalar("valdation/avg_loss", avg_CE, engine.state.epoch)
        writer.add_scalar("valdation/avg_accuracy", avg_accuracy, engine.state.epoch)

    trainer.run(train_loader, max_epochs=epochs)
    pbar.close()
    writer.close()