Exemplo n.º 1
0
def train():

    # 模型定义
    model = RNNModel(len(word2ix), embed_size, hidden_dims)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    model.to(device)
    model.train()
    for epoch in (range(epochs)):
        total_loss = 0
        count = 0
        for ii, data_ in tqdm.tqdm(enumerate(data)):
            data_ = torch.tensor(data_).long()
            x = data_.unsqueeze(1).to(device)
            optimizer.zero_grad()
            y = torch.zeros(x.shape).to(device).long()
            y[:-1], y[-1] = x[1:], x[0]
            output, _ = model(x)
            loss = criterion(output, y.view(-1))
            """
            hidden=None
            for k in range(2,max_lenth):
                data1=data_[:k]
                input_, target = data1[:-1, :], data1[1:, :]
                output, hidden = model(input_,hidden)
                loss = criterion(output, target.view(-1))
                optimizer.step()
            """
            loss.backward()
            optimizer.step()
            total_loss += (loss.item())
            count += 1
        print(epoch, 'loss=', total_loss / count)
        torch.save(model.state_dict(), 'model.bin')
        chars = test(model)
        print(chars)
Exemplo n.º 2
0
def main():
    # Add ckp
    parser = argparse.ArgumentParser(
        description='PyTorch PennTreeBank RNN/LSTM Language Model')
    parser.add_argument(
        '--data',
        type=str,
        default='/input',  # /input
        help='location of the data corpus')
    parser.add_argument('--checkpoint',
                        type=str,
                        default='',
                        help='model checkpoint to use')
    parser.add_argument(
        '--model',
        type=str,
        default='LSTM',
        help='type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU)')
    parser.add_argument('--emsize',
                        type=int,
                        default=200,
                        help='size of word embeddings')
    parser.add_argument('--nhid',
                        type=int,
                        default=200,
                        help='number of hidden units per layer')
    parser.add_argument('--nlayers',
                        type=int,
                        default=2,
                        help='number of layers')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='initial learning rate')
    parser.add_argument('--clip',
                        type=float,
                        default=0.25,
                        help='gradient clipping')
    parser.add_argument('--epochs',
                        type=int,
                        default=40,
                        help='upper epoch limit')
    parser.add_argument('--batch_size',
                        type=int,
                        default=256,
                        metavar='N',
                        help='batch size')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.2,
                        help='dropout applied to layers (0 = no dropout)')
    parser.add_argument('--tied',
                        action='store_true',
                        help='tie the word embedding and softmax weights')
    parser.add_argument('--seed', type=int, default=1111, help='random seed')
    parser.add_argument('--cuda', action='store_true', help='use CUDA')
    parser.add_argument('--log-interval',
                        type=int,
                        default=200,
                        metavar='N',
                        help='report interval')
    parser.add_argument(
        '--save',
        type=str,
        default='/output/model.pt',  # /output
        help='path to save the final model')
    args = parser.parse_args()

    # Set the random seed manually for reproducibility.
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        if not args.cuda:
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda"
            )
        else:
            torch.cuda.manual_seed(args.seed)

    # Load checkpoint
    build_vocab = False
    if args.checkpoint != '' and os.path.exists(args.checkpoint):
        print(f'Loading field from {args.checkpoint}')
        save_dict = torch.load(args.checkpoint)
        field = save_dict['field']
        start_epoch = save_dict['start_epoch']
    else:
        save_dict = None
        field = Field(tokenize=split_tokenize, init_token='<init>')
        build_vocab = True
        start_epoch = 0

    ###############################################################################
    # Load data
    ###############################################################################

    train_data, val_data, test_data = TabularDataset.splits(
        path=args.data,
        train='train.txt',
        validation='valid.txt',
        test='test.txt',
        format='tsv',
        fields=[('text', field)])
    print(train_data, len(train_data), val_data, len(val_data), test_data,
          len(test_data))
    if build_vocab:
        field.eos_token = '<eos>'
        field.build_vocab(train_data, val_data, min_freq=1000)
        field.eos_token = None
    eos_id = field.vocab.stoi['<eos>']
    pad_id = field.vocab.stoi[field.pad_token]

    train_iter = BucketIterator(train_data,
                                args.batch_size,
                                train=True,
                                repeat=False,
                                device='cuda:0' if args.cuda else 'cpu:0')
    val_iter = Iterator(val_data,
                        args.batch_size,
                        repeat=False,
                        device='cuda:0' if args.cuda else 'cpu:0')
    test_iter = Iterator(test_data,
                         args.batch_size,
                         repeat=False,
                         device='cuda:0' if args.cuda else 'cpu:0')
    print(train_iter, len(train_iter), val_iter, len(val_iter), test_iter,
          len(test_iter))

    ###############################################################################
    # Build the model
    ###############################################################################

    ntokens = len(field.vocab)
    model = RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers,
                     args.dropout, args.tied)

    if save_dict is not None:
        model.load_state_dict(save_dict['model'])

    if args.cuda:
        model.cuda()
    else:
        model.cpu()
    print(model)

    if save_dict:
        opt = save_dict['optimizer']
    else:
        opt = torch.optim.Adam(model.parameters(), lr=args.lr)

    if args.checkpoint:
        torch.save(
            dict(field=field,
                 model=model.state_dict(),
                 optimizer=opt,
                 start_epoch=start_epoch), args.checkpoint)

    ###############################################################################
    # Training code
    ###############################################################################

    criterion = torch.nn.CrossEntropyLoss(ignore_index=pad_id)

    def make_target(text):
        batch_size = text.size()[1]
        eos_vector = torch.full((1, batch_size),
                                eos_id,
                                dtype=text.dtype,
                                device='cuda:0' if args.cuda else 'cpu:0')
        target = torch.cat((text[1:], eos_vector), dim=0)
        return target

    def compute_loss(output, text):
        output_flat = output.view(-1, ntokens)
        target = make_target(text)
        target_flat = target.view(-1)

        return criterion(output_flat, target_flat)

    def evaluate(data_source):
        # Turn on evaluation mode which disables dropout.
        with torch.no_grad():
            model.eval()
            total_loss = 0
            for batch in data_source:
                output, hidden = model(batch.text)
                loss = compute_loss(output, batch.text)

                total_loss += loss.item()
            return total_loss / len(data_source)

    def train():
        # Turn on training mode which enables dropout.
        model.train()
        total_loss = 0
        start_time = time.time()
        for i, batch in enumerate(train_iter):
            model.zero_grad()

            output, hidden = model(batch.text)
            target = make_target(batch.text)

            loss = compute_loss(output, batch.text)
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
            opt.step()

            total_loss += loss.item()

            if i % args.log_interval == 0 and i > 0:
                cur_loss = total_loss / args.log_interval
                elapsed = time.time() - start_time
                print(
                    '| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                        epoch, i, len(train_iter),
                        elapsed * 1000 / args.log_interval, cur_loss,
                        math.exp(cur_loss)))
                total_loss = 0
                start_time = time.time()

    # Loop over epochs.
    best_val_loss = None

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        for epoch in range(start_epoch, args.epochs):
            epoch_start_time = time.time()
            train()
            val_loss = evaluate(val_iter)
            print('-' * 89)
            print(
                '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                'valid ppl {:8.2f}'.format(epoch,
                                           (time.time() - epoch_start_time),
                                           val_loss, math.exp(val_loss)))
            print('-' * 89)
            # Save the model if the validation loss is the best we've seen so far.
            if not best_val_loss or val_loss < best_val_loss:
                if args.checkpoint:
                    torch.save(
                        dict(field=field,
                             model=model.state_dict(),
                             optimizer=opt,
                             start_epoch=epoch), args.checkpoint)
                best_val_loss = val_loss
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')

    torch.save(
        dict(vocab=field.vocab.itos,
             model=model.state_dict(),
             settings=dict(rnn_type=args.model,
                           emsize=args.emsize,
                           nhid=args.nhid,
                           nlayers=args.nlayers)), args.save)

    # Load the best saved model.
    #with open(args.save, 'rb') as f:
    #    save_dict = torch.load(f)
    #    field = save_dict['field']
    #    if save_dict is not None:
    #        model.load_state_dict(save_dict['model'])
    #
    #    if args.cuda:
    #        model.cuda()
    #    else:
    #        model.cpu()

    # Run on test data.
    test_loss = evaluate(test_iter)
    print('=' * 89)
    print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
        test_loss, math.exp(test_loss)))
    print('=' * 89)
Exemplo n.º 3
0
    model = model.eval()
    for batch_num, batch in enumerate(valid_loader):
        lm_input, lens, lm_output = batch
        predictions, _, lens = model(cudalize(lm_input), cudalize(lens))
        loss = loss_function(predictions, cudalize(lm_output), lens)
        valid_loss += loss.item()
        valid_avg_loss = valid_loss / (batch_num + 1)
        valid_ppl = math.exp(valid_avg_loss)
        print(f'\rvalid batch:{batch_num} loss: {valid_avg_loss:.4f} ' +
              f'ppl:{valid_ppl:.4f}',
              end='')
    print()
    if valid_loss / (batch_num + 1) < min_loss:
        min_loss = valid_loss / (batch_num + 1)
        earlystop_count = 0
        torch.save(model.state_dict(), './model.pt')
        print('saved model')
    else:
        earlystop_count += 1
        if earlystop_count > 20:
            print('earlystop')
            break
    print()
test_loss = 0
for batch_num, batch in enumerate(test_loader):
    lm_input, lens, lm_output = batch
    predictions, _, lens = model(cudalize(lm_input), cudalize(lens))
    loss = loss_function(predictions, cudalize(lm_output), lens)
    test_loss += loss.item()
    test_avg_loss = test_loss / (batch_num + 1)
    test_ppl = math.exp(test_avg_loss)
Exemplo n.º 4
0
lr = cf.learning_rate
best_val_loss = None
try:
    for epoch in range(1, cf.n_epochs + 1):
        epoch_start_time = time.time()
        loop('train', lr)
        val_loss, val_acc = loop('valid')
        print('-' * 89)
        print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
              'valid acc {:8.2f}'.format(epoch,
                                         (time.time() - epoch_start_time),
                                         val_loss, val_acc))
        print('-' * 89)

        if not best_val_loss or val_loss < best_val_loss:
            torch.save(model.state_dict(),
                       os.path.join(args.save_path, 'model.pt'))
            best_val_loss = val_loss
        else:
            lr /= 4.0
except KeyboardInterrupt:
    print('-' * 89)
    print('Exiting from training early')

# Load the best saved model.
model.load_state_dict(torch.load(os.path.join(args.save_path, 'model.pt')))

# Run on test data.
test_loss, test_acc = loop('test')
print('=' * 89)
print('| End of training | test loss {:5.2f} | test acc {:8.2f}'.format(
        words_weights = output.squeeze().data.div(temperature).exp().cpu()
        word_idx = torch.multinomial(words_weights, 1)[0]
        input.data.fill_(word_idx)
        word = corpus.dictionary.idx2word[word_idx]
        word = word.replace('<eos>','\n')
        # if ':' in word:
        #     word = '\n\n' + word
        print("%s "%(word), end='')
        # outf.write(word + ('\n' if i % 20 == 19 else ' '))
try:
    for epoch in range(1, epochs+1):
        train(model,
                optimizer,
                criterion,
                corpus,
                batch_size,
                train_data,
                sequence_length,
                clip,
                step_vis,
                epoch,
                learning_rate)
        torch.save(model.state_dict(), './snapchot.pth')
        generate(model, 
            words, 
            temperature, 
            corpus)
except KeyboardInterrupt:
    print('-' * 89)
print('Exiting from training early')
Exemplo n.º 6
0
                     nclasses,
                     args.emsize,
                     args.nhid_class,
                     subdictmask.to(device),
                     args.nhid,
                     args.nlayers,
                     None,
                     args.rnndrop,
                     args.dropout,
                     reset=args.reset,
                     classnorm=args.classnorm,
                     tied=args.tied)
# Initialise with trained parameters
if args.load_from != '':
    pretrained_dict = torch.load(args.load_from).state_dict()
    model_dict = model.state_dict()
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items() if k in model_dict
    }
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)

if args.classnorm:
    criterion = nn.NLLLoss()
    interpCrit = nn.NLLLoss(reduction='none')
else:
    criterion = nn.CrossEntropyLoss()
    interpCrit = nn.CrossEntropyLoss(reduction='none')
att_criterion = nn.CrossEntropyLoss(reduction='none', ignore_index=0)
Exemplo n.º 7
0
def train():
    # 载入数据与配置模型
    print("Loading data...")
    corpus = Corpus(train_dir)
    print(corpus)

    config = Config()
    config.vocab_size = len(corpus.dictionary)
    train_data = batchify(corpus.train, config.batch_size)
    train_len = train_data.size(0)
    seq_len = config.seq_len

    print("Configuring model...")
    model = RNNModel(config)
    if use_cuda:
        model.cuda()
    print(model)

    criterion = nn.CrossEntropyLoss()
    lr = config.learning_rate  # 初始学习率
    start_time = time.time()

    print("Training and generating...")
    for epoch in range(1, config.num_epochs + 1):  # 多轮次训练
        total_loss = 0.0
        model.train()  # 在训练模式下dropout才可用。
        hidden = model.init_hidden(config.batch_size)  # 初始化隐藏层参数

        for ibatch, i in enumerate(range(0, train_len - 1, seq_len)):
            data, targets = get_batch(train_data, i, seq_len)  # 取一个批次的数据
            # 在每批开始之前,将隐藏的状态与之前产生的结果分离。
            # 如果不这样做,模型会尝试反向传播到数据集的起点。
            hidden = repackage_hidden(hidden)
            model.zero_grad()

            output, hidden = model(data, hidden)
            loss = criterion(output.view(-1, config.vocab_size), targets)
            loss.backward()  # 反向传播

            # `clip_grad_norm` 有助于防止RNNs/LSTMs中的梯度爆炸问题。
            torch.nn.utils.clip_grad_norm(model.parameters(), config.clip)
            for p in model.parameters():  # 梯度更新
                p.data.add_(-lr, p.grad.data)

            total_loss += loss.data  # loss累计

            if ibatch % config.log_interval == 0 and ibatch > 0:  # 每隔多少个批次输出一次状态
                cur_loss = total_loss[0] / config.log_interval
                elapsed = get_time_dif(start_time)
                print(
                    "Epoch {:3d}, {:5d}/{:5d} batches, lr {:2.3f}, loss {:5.2f}, ppl {:8.2f}, time {}"
                    .format(epoch, ibatch, train_len // seq_len, lr, cur_loss,
                            math.exp(cur_loss), elapsed))
                total_loss = 0.0
        lr /= 4.0  # 在一轮迭代完成后,尝试缩小学习率

        # 每隔多少轮次保存一次模型参数
        if epoch % config.save_interval == 0:
            torch.save(model.state_dict(),
                       os.path.join(save_dir, model_name.format(epoch)))

        print(''.join(generate(model, corpus.dictionary.idx2word)))
Exemplo n.º 8
0
Arquivo: main.py Projeto: Phlix1/exps
if args.save and os.path.isfile(args.save):
    print("Loading Saved Model")
    with open(args.save, 'rb') as f:
        net.load_state_dict(torch.load(f))
        net.rnn.flatten_parameters()
else:
    print("Random Initialization - No Saved Model")

# At any point you can hit Ctrl + C to break out of training early.
try:
    for epoch in range(1, args.epochs + 1):
        epoch_start_time = time.time()
        train()
        if args.save:
            with open(args.save, 'wb') as f:
                torch.save(net.state_dict(), f)

#        test_loader = test_corpus.batch_generator(seq_length=1, batch_size=1, shuffle=False)
#        val_loss = evaluate(test_corpus, test_loader)
#        print('-' * 89)
#        print('Test: {:3d} | time: {:5.2f}s | valid loss {:5.2f} | valid ppl {:8.2f}'
#               .format(epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss)))
#        print('-' * 89)
        sys.stdout.flush()
except KeyboardInterrupt:
    print('-' * 89)
    print('Exiting from training early')
    sys.stdout.flush()

# Run on test data.
'''