示例#1
0
def main():
    ds = Dataset('imdb')
    params = {
        'batch_size': 67,
        'shuffle': True,
        'num_workers': 8,
        'collate_fn': collate_fn
    }
    epochs = 4
    lr = 0.01
    tbptt_steps = 256
    training_generator = data.DataLoader(ds, **params)
    model = CharRNN(input_size=ds.encoder.get_vocab_size(),
                    embedding_size=8,
                    hidden_size=128,
                    output_size=ds.encoder.get_vocab_size(),
                    no_sentiments=3,
                    dense_size=32,
                    padding_idx=ds.encoder.get_id(PADDING_TOKEN),
                    n_layers=1)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    step_no = 0
    for epoch in range(epochs):
        print('Epoch: ', epoch)
        for x_i, y_i, l_i in training_generator:
            model.reset_intermediate_vars()
            step_no += 1
            print(x_i.size())
            batch_loss = 0
            for step in range(l_i[0] // tbptt_steps +
                              (l_i[0] % tbptt_steps != 0)):
                von = tbptt_steps * step
                bis = min(tbptt_steps * (step + 1), l_i[0])
                out = model(x_i[:, von:bis])
                if step % 25 == 0:
                    print(model.attn[0].detach().numpy(),
                          model.attn[-1].detach().numpy())
                loss = model.loss(out, y_i, l_i, von, bis)
                batch_loss += loss
                optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), 1.5)
                for p in model.parameters():
                    p.data.add_(-lr, p.grad.data)
                optimizer.step()

                model.detach_intermediate_vars()
            print('Total loss for this batch: ', batch_loss.item())
            if step_no % 30 == 1:
                gen_sample, sentis = model.generate_text(
                    ds.encoder, 'T', 200, 0.7)
                print_colored_text(gen_sample, sentis, ds.encoder)
                # Print an example with sentiments
                print_colored_text(x_i[-1].data.numpy(),
                                   get_sentiments(model, x_i[-1], 0.7),
                                   ds.encoder)
示例#2
0
def run_training(model: CharRNN, dataset, config: dict, validation: bool,
                 valid_dataset):
    optimizer = torch.optim.Adam(model.parameters(), lr=config['initial_lr'])
    epoch = load_checkpoint(optimizer, model, config['filename'])
    if not epoch:
        epoch = 0
    epoch += 1
    params = {
        'batch_size': config['batch_size'],
        'shuffle': False,
        'num_workers': 0 if os.name == 'nt' else 8
    }
    data_generator = data.DataLoader(dataset, **params)
    while epoch < config['epochs'] + 1:
        model.reset_intermediate_vars()
        for step, (x_i, y_i, l_i) in enumerate(data_generator):
            loss = run_forward_pass_and_get_loss(model, x_i, y_i, l_i)
            # Gradient descent step
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1.5)
            optimizer.step()

            model.detach_intermediate_vars()
            if step % 100 == 0:
                print('Epoch: {} Loss for step {} : {}'.format(
                    epoch, step, round(loss.item(), 4)))
            if step % 1000 == 1:
                gen_sample = model.generate_text(dataset.encoder, 't', 200)
                print_tokens(dataset.encoder.map_ids_to_tokens(gen_sample),
                             config['is_bytes'])
        save_checkpoint(optimizer, model, epoch, config['filename'])
        if validation and epoch % 2:
            bpc = validate(valid_dataset, model)
            print('BPC on validation set: ', bpc)
        if epoch in config['lr_schedule']:
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=config['lr_schedule'][epoch])
        epoch += 1