tokens_in_batch=args.batch_size, clean=True) eval_data_layer = nemo_nlp.TranslationDataLayer( factory=neural_factory, tokenizer_src=tokenizer, tokenizer_tgt=tokenizer, dataset_src=f"{args.data_root}/{args.eval_datasets[0]}.{args.src_lang}", dataset_tgt=f"{args.data_root}/{args.eval_datasets[0]}.{args.tgt_lang}", tokens_in_batch=args.eval_batch_size) encoder = nemo_nlp.TransformerEncoderNM( factory=neural_factory, d_embedding=args.d_embedding, d_model=args.d_model, d_inner=args.d_inner, num_layers=args.num_layers, embedding_dropout=args.embedding_dropout, num_attn_heads=args.num_attn_heads, ffn_dropout=args.ffn_dropout, vocab_size=vocab_size, attn_score_dropout=args.attn_score_dropout, attn_layer_dropout=args.attn_layer_dropout, max_seq_length=args.max_sequence_length, share_all_layers=args.share_encoder_layers) decoder = nemo_nlp.TransformerDecoderNM( factory=neural_factory, d_embedding=args.d_embedding, d_model=args.d_model, d_inner=args.d_inner, num_layers=args.num_layers, embedding_dropout=args.embedding_dropout, num_attn_heads=args.num_attn_heads, ffn_dropout=args.ffn_dropout,
# source and target use different tokenizers, set tie_weight to False tie_weight = False else: nf.logger.info( f"Unsupported language pair:{args.src_lang}-{args.tgt_lang}.") exit(1) # instantiate necessary modules for the whole translation pipeline, namely # data layers, encoder, decoder, output log_softmax, beam_search_translator # and loss function encoder = nemo_nlp.TransformerEncoderNM( d_model=args.d_model, d_inner=args.d_inner, num_layers=args.num_layers, embedding_dropout=args.embedding_dropout, num_attn_heads=args.num_attn_heads, ffn_dropout=args.ffn_dropout, vocab_size=src_vocab_size, attn_score_dropout=args.attn_score_dropout, attn_layer_dropout=args.attn_layer_dropout, max_seq_length=args.max_seq_length) decoder = nemo_nlp.TransformerDecoderNM( d_model=args.d_model, d_inner=args.d_inner, num_layers=args.num_layers, embedding_dropout=args.embedding_dropout, num_attn_heads=args.num_attn_heads, ffn_dropout=args.ffn_dropout, vocab_size=tgt_vocab_size, attn_score_dropout=args.attn_score_dropout,
learn_positional_encodings=True, hidden_act="gelu") if args.restore_decoder: decoder.restore_from(args.decoder_restore_from, local_rank=args.local_rank) elif args.encoder == "nemo": encoder = nemo_nlp.TransformerEncoderNM( factory=neural_factory, d_embedding=args.d_embedding, d_model=args.d_model, d_inner=args.d_inner, num_layers=args.num_layers, num_attn_heads=args.num_heads, ffn_dropout=args.ffn_dropout, vocab_size=vocab_size, attn_score_dropout=args.attn_score_dropout, attn_layer_dropout=args.attn_layer_dropout, max_seq_length=max_sequence_length, embedding_dropout=args.embedding_dropout, share_all_layers=args.share_decoder_layers, learn_positional_encodings=True, hidden_act="gelu") encoder.restore_from(args.encoder_restore_from, local_rank=args.local_rank) decoder = nemo_nlp.TransformerDecoderNM( factory=neural_factory, d_embedding=args.d_embedding, d_model=args.d_model, d_inner=args.d_inner,