示例#1
0
if __name__ == '__main__':
    print('Configuring CNN model...')
    config = TextConfig()
    filenames = [
        config.train_filename, config.test_filename, config.val_filename
    ]

    if not os.path.exists(config.vocab_filename):
        # 根据文本, 创建词典
        build_vocab(filenames, config.vocab_filename,
                    config.vocab_size)  # 创建词汇表文件

    # 类别和类别对应的id
    categories, cat_to_id = read_category()

    # 词汇和词汇在词汇表中的ID
    words, word_to_id = read_vocab(config.vocab_filename)
    config.vocab_size = len(words)  # 这样是因为有可能所有的词汇也不够指定的词汇数量

    # 将word2vec保存为二进制文件,每行是对应词汇 id 的词向量
    if not os.path.exists(config.vector_word_npz):
        export_word2vec_vectors(word_to_id, config.vector_word_filename,
                                config.vector_word_npz)

    # 读取词向量的二进制文件的
    config.pre_trianing = get_training_word2vec_vectors(config.vector_word_npz)

    model = TextCNN(config)
    train()
示例#2
0
def train(args: Dict):
    """ Train the NMT Model.
    @param args (Dict): args from cmd line
    :param args:
    """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print('use device: %s' % device)
    train_data, dev_data = read_data(args)
    vocab, vocab_mask = read_vocab(args)
    train_batch_size, N, d_model, d_ff, h, dropout, valid_niter, log_every, model_save_path, lr = read_model_params(
        args)

    transformer_model = TransfomerModel(vocab, N, d_model, d_ff, h, dropout,
                                        device)
    model = transformer_model.model
    optimizer = NoamOpt(
        model.src_embed[0].d_model, 1, 400,
        torch.optim.Adam(model.parameters(),
                         lr=lr,
                         betas=(0.9, 0.999),
                         eps=1e-9))

    # criterion = nn.CrossEntropyLoss()
    # criterion = LabelSmoothing(size=len(vocab.tgt.word2id), padding_idx=vocab.tgt.word2id['<pad>'], smoothing=0.0)
    criterion = LabelSmoothing(size=len(vocab.tgt.word2id),
                               padding_idx=vocab.tgt.word2id['<pad>'],
                               smoothing=0.001)
    # criterion = partial(nllLoss, vocab.src.word2id['<pad>'])
    loss_compute_train = SimpleLossCompute(model.generator, criterion,
                                           optimizer)
    loss_compute_dev = SimpleLossCompute(model.generator,
                                         criterion,
                                         optimizer,
                                         train=False)

    train_time = start_time = time.time()
    patience = cum_loss = report_loss = cum_tgt_words = report_tgt_words = 0
    num_trial = cum_exmaples = report_examples = epoch = valid_num = 0
    hist_valid_scores = []

    print('begin Maximum Likelihood Training')
    while True:
        epoch += 1
        for train_iter, batch_sents in enumerate(
                batch_iter(train_data,
                           batch_size=train_batch_size,
                           shuffle=True)):

            loss, batch_size, n_tokens = train_step(model, batch_sents, vocab,
                                                    loss_compute_train, device)
            report_loss += loss
            cum_loss += loss
            cum_exmaples += batch_size
            report_examples += batch_size
            report_tgt_words += n_tokens
            cum_tgt_words += n_tokens

            if train_iter % log_every == 0:
                elapsed = time.time() - start_time
                elapsed_since_last = time.time() - train_time
                print(
                    f"epoch {epoch}, iter {train_iter}, avg loss {report_loss / report_examples: .3f}, "
                    f"avg ppl {np.exp(report_loss / report_tgt_words): .3f}, cum examples {cum_exmaples}, "
                    f"speed {report_tgt_words/ elapsed_since_last: .3f} w/s, elapsed time {elapsed: .3f} s, lr= {optimizer._rate}"
                )
                train_time = time.time()
                report_tgt_words = report_loss = report_examples = 0.

            if train_iter % valid_niter == 0:
                print(
                    f"epoch {epoch}, iter {train_iter}, cum. loss {cum_loss/cum_exmaples}, "
                    f"cum ppl {np.exp(cum_loss / cum_tgt_words)}, cum exmples {cum_exmaples}, lr= {optimizer._rate}"
                )
                cum_loss = cum_exmaples = cum_tgt_words = 0.
                valid_num += 1
                print("begin validation ...")

                dev_loss, dev_ppl = run_dev_session(model,
                                                    dev_data,
                                                    vocab,
                                                    loss_compute_dev,
                                                    batch_size=32,
                                                    device=device)
                print(
                    f'validation: iter {train_iter}, dev. loss {dev_loss}, dev. ppl {dev_ppl}'
                )

                valid_metric = -dev_ppl
                is_better = len(hist_valid_scores
                                ) == 0 or valid_metric > max(hist_valid_scores)
                hist_valid_scores.append(valid_metric)
                if is_better:
                    patience = 0
                    print(
                        f'save currently the best model to {model_save_path}')
                    transformer_model.save(model_save_path)
                    torch.save(optimizer.optimizer.state_dict(),
                               model_save_path + ".optim")
                elif patience < int(args['--patience']):
                    patience += 1
                    print(f'hit patience {patience}')
                    if patience == int(args['--patience']):
                        num_trial += 1
                        print(f"hit #{num_trial} trial")
                        if num_trial == int(args['--max-num-trial']):
                            print('early stop!')
                            exit(0)
                if epoch == int(args['--max-epoch']):
                    print('reached max number of epochs!')
                    exit(0)