Exemplo n.º 1
0
                                   nb_targets_parallel=args.target_seq_len)
    train_data = TransposeWrapper(train_data_tb)

    valid_ids = tokens_from_fn(args.valid,
                               lm.vocab,
                               randomize=False,
                               regime=tokenize_regime)
    valid_batched = batchify(valid_ids, 10, args.cuda)
    valid_data_tb = TemporalSplits(valid_batched,
                                   nb_inputs_necessary=lm.model.in_len,
                                   nb_targets_parallel=args.target_seq_len)
    valid_data = TransposeWrapper(valid_data_tb)

    print('Initial perplexity {:.2f}'.format(
        math.exp(
            evaluate_(lm, valid_data, use_ivecs=False, custom_batches=False))))

    print("training...")
    lr = args.lr
    best_val_loss = None
    for epoch in range(1, args.epochs + 1):
        logger = ProgressLogger(epoch, args.log_interval, lr,
                                len(train_batched) // args.target_seq_len)
        optim = torch.optim.SGD(lm.parameters(), lr, weight_decay=args.beta)

        train_(
            lm,
            train_data,
            optim,
            logger,
            args.clip,
Exemplo n.º 2
0
    print(lm)

    print("preparing data...")
    tokenize_regime = 'words'
    if args.characters:
        tokenize_regime = 'chars'

    ids = tokens_from_fn(args.data,
                         lm.vocab,
                         randomize=False,
                         regime=tokenize_regime)
    batched = batchify(ids, 10, args.cuda)
    data_tb = TemporalSplits(batched,
                             nb_inputs_necessary=lm.model.in_len,
                             nb_targets_parallel=args.target_seq_len)
    data = TransposeWrapper(data_tb)

    oov_mask = ids == lm.vocab.unk_ind
    nb_oovs = oov_mask.sum()
    print('Nb oovs: {} ({:.2f} %)\n'.format(nb_oovs,
                                            100.0 * nb_oovs / len(ids)))

    # Run on test data.
    loss = evaluate_(
        lm,
        data,
        use_ivecs=False,
        custom_batches=False,
    )
    print('loss {:5.2f} | ppl {:8.2f}'.format(loss, math.exp(loss)))
Exemplo n.º 3
0
 def val_loss_fn(lm):
     return evaluate_(lm, valid_data, use_ivecs=False, custom_batches=False)