示例#1
0
t_log_softmax = nemo_nlp.TokenClassifier(args.d_model,
                                         num_classes=vocab_size,
                                         num_layers=1,
                                         log_softmax=True)

beam_translator = nemo_nlp.BeamSearchTranslatorNM(
    decoder=decoder,
    log_softmax=t_log_softmax,
    max_seq_length=args.max_seq_length,
    beam_size=args.beam_size,
    length_penalty=args.len_pen,
    bos_token=tokenizer.bos_id(),
    pad_token=tokenizer.pad_id(),
    eos_token=tokenizer.eos_id())

loss = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(pad_id=0, smoothing=0.1)

loss_eval = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(pad_id=0, smoothing=0.0)

# tie all embeddings weights
t_log_softmax.mlp.last_linear_layer.weight = \
    encoder.bert.embeddings.word_embeddings.weight
decoder.embedding_layer.token_embedding.weight = \
    encoder.bert.embeddings.word_embeddings.weight
decoder.embedding_layer.position_embedding.weight = \
    encoder.bert.embeddings.position_embeddings.weight

# training pipeline
src, src_mask, tgt, tgt_mask, labels, sent_ids = train_data_layer()

input_type_ids = zeros_transform(input_type_ids=src)
log_softmax = nemo_nlp.TokenClassifier(args.d_model,
                                       num_classes=tgt_tokenizer.vocab_size,
                                       num_layers=1,
                                       log_softmax=True)

beam_search = nemo_nlp.BeamSearchTranslatorNM(
    decoder=decoder,
    log_softmax=log_softmax,
    max_seq_length=args.max_seq_length,
    beam_size=args.beam_size,
    bos_token=tgt_tokenizer.bos_id(),
    pad_token=tgt_tokenizer.pad_id(),
    eos_token=tgt_tokenizer.eos_id())

loss_fn = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(
    pad_id=tgt_tokenizer.pad_id(), label_smoothing=args.label_smoothing)

if tie_weight:
    log_softmax.mlp.last_linear_layer.weight = \
        encoder.embedding_layer.token_embedding.weight
    decoder.embedding_layer.token_embedding.weight = \
        encoder.embedding_layer.token_embedding.weight


def create_pipeline(dataset_src,
                    dataset_tgt,
                    tokens_in_batch,
                    clean=False,
                    training=True):
    data_layer = nemo_nlp.TranslationDataLayer(tokenizer_src=src_tokenizer,
                                               tokenizer_tgt=tgt_tokenizer,
示例#3
0
    share_all_layers=args.share_decoder_layers)
log_softmax = nemo_nlp.TransformerLogSoftmaxNM(factory=neural_factory,
                                               vocab_size=vocab_size,
                                               d_model=args.d_model,
                                               d_embedding=args.d_embedding)
beam_search = nemo_nlp.BeamSearchTranslatorNM(
    factory=neural_factory,
    decoder=decoder,
    log_softmax=log_softmax,
    max_seq_length=args.max_sequence_length,
    beam_size=args.beam_size,
    bos_token=tokenizer.bos_id(),
    pad_token=tokenizer.pad_id(),
    eos_token=tokenizer.eos_id())
loss = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(
    factory=neural_factory,
    pad_id=tokenizer.pad_id(),
    label_smoothing=args.label_smoothing)

# tie weight of embedding and log_softmax layers
if args.tie_enc_dec:
    decoder.embedding_layer.token_embedding.weight = \
        encoder.embedding_layer.token_embedding.weight
    if args.tie_projs:
        decoder.embedding_layer.token2hidden.weight = \
            encoder.embedding_layer.token2hidden.weight

if args.tie_enc_softmax:
    log_softmax.log_softmax.dense.weight = \
        encoder.embedding_layer.token_embedding.weight
    if args.tie_projs:
        log_softmax.log_softmax.hidden2token.weight = \
示例#4
0
                                                 d_model=args.d_model,
                                                 d_embedding=args.d_embedding)

beam_translator = nemo_nlp.BeamSearchTranslatorNM(
    factory=neural_factory,
    decoder=decoder,
    log_softmax=t_log_softmax,
    max_seq_length=max_sequence_length,
    beam_size=args.beam_size,
    length_penalty=args.len_pen,
    bos_token=tokenizer.bos_id(),
    pad_token=tokenizer.pad_id(),
    eos_token=tokenizer.eos_id())

loss = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(factory=neural_factory,
                                                 pad_id=0,
                                                 smoothing=0.1)

loss_eval = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(factory=neural_factory,
                                                      pad_id=0,
                                                      smoothing=0.0)

if args.encoder == "hf":
    # tie all embeddings weights
    t_log_softmax.log_softmax.dense.weight = \
        encoder.bert.embeddings.word_embeddings.weight
    decoder.embedding_layer.token_embedding.weight = \
        encoder.bert.embeddings.word_embeddings.weight
    decoder.embedding_layer.position_embedding.weight = \
        encoder.bert.embeddings.position_embeddings.weight
示例#5
0
    vocab_size=vocab_size,
    attn_score_dropout=args.attn_score_dropout,
    attn_layer_dropout=args.attn_layer_dropout,
    max_seq_length=args.max_seq_length,
    embedding_dropout=args.embedding_dropout,
    learn_positional_encodings=True,
    hidden_act="gelu")

decoder.restore_from(args.restore_from, local_rank=args.local_rank)

t_log_softmax = nemo_nlp.TokenClassifier(args.d_model,
                                         num_classes=vocab_size,
                                         num_layers=1,
                                         log_softmax=True)

loss_fn = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(pad_id=tokenizer.pad_id(),
                                                    smoothing=0.1)

beam_search = nemo_nlp.BeamSearchTranslatorNM(
    decoder=decoder,
    log_softmax=t_log_softmax,
    max_seq_length=args.max_seq_length,
    beam_size=args.beam_size,
    length_penalty=args.len_pen,
    bos_token=tokenizer.bos_id(),
    pad_token=tokenizer.pad_id(),
    eos_token=tokenizer.eos_id())

# tie all embeddings weights
t_log_softmax.mlp.layer0.weight = \
    encoder.bert.embeddings.word_embeddings.weight
decoder.embedding_layer.token_embedding.weight = \
示例#6
0
    d_inner=args.d_inner,
    num_layers=args.num_layers,
    embedding_dropout=args.embedding_dropout,
    num_attn_heads=args.num_attn_heads,
    ffn_dropout=args.ffn_dropout,
    vocab_size=vocab_size,
    mask_future=True,
    attn_score_dropout=args.attn_score_dropout,
    attn_layer_dropout=args.attn_layer_dropout,
    max_seq_length=args.max_sequence_length)
log_softmax = nemo_nlp.TransformerLogSoftmaxNM(factory=neural_factory,
                                               vocab_size=vocab_size,
                                               d_model=args.d_model,
                                               d_embedding=args.d_embedding)
loss = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(
    factory=neural_factory,
    pad_id=tokenizer.pad_id(),
    label_smoothing=args.label_smoothing)
loss_eval = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(
    factory=neural_factory,
    pad_id=tokenizer.pad_id(),
    label_smoothing=args.label_smoothing,
    predict_last_k=args.predict_last_k)

if args.tie_enc_softmax:
    log_softmax.log_softmax.dense.weight = \
        encoder.embedding_layer.token_embedding.weight
    if args.tie_projs:
        log_softmax.log_softmax.hidden2token.weight = \
            encoder.embedding_layer.token2hidden.weight

# training pipeline