else: tgt_max_len = max_len[1] encoder, decoder = get_transformer_encoder_decoder( units=args.num_units, hidden_size=args.hidden_size, dropout=args.dropout, num_layers=args.num_layers, num_heads=args.num_heads, max_src_length=max(src_max_len, 500), max_tgt_length=max(tgt_max_len, 500), scaled=args.scaled) model = NMTModel(src_vocab=src_vocab, tgt_vocab=tgt_vocab, encoder=encoder, decoder=decoder, share_embed=args.dataset != 'TOY', embed_size=args.num_units, tie_weights=args.dataset != 'TOY', embed_initializer=None, prefix='transformer_') model.initialize(init=mx.init.Xavier(magnitude=args.magnitude), ctx=ctx) static_alloc = True model.hybridize(static_alloc=static_alloc) logging.info(model) translator = BeamSearchTranslator(model=model, beam_size=args.beam_size, scorer=BeamSearchScorer(alpha=args.lp_alpha, K=args.lp_k), max_length=200) logging.info('Use beam_size={}, alpha={}, K={}'.format(args.beam_size,
for i, ele in enumerate(data_val)]) data_test = SimpleDataset([(ele[0], ele[1], len(ele[0]), len(ele[1]), i) for i, ele in enumerate(data_test)]) if args.gpu is None: ctx = mx.cpu() print('Use CPU') else: ctx = mx.gpu(args.gpu) encoder, decoder = get_gnmt_encoder_decoder(hidden_size=args.num_hidden, dropout=args.dropout, num_layers=args.num_layers, num_bi_layers=args.num_bi_layers) model = NMTModel(src_vocab=src_vocab, tgt_vocab=tgt_vocab, encoder=encoder, decoder=decoder, embed_size=args.num_hidden, prefix='gnmt_') model.initialize(init=mx.init.Uniform(0.1), ctx=ctx) static_alloc = True model.hybridize(static_alloc=static_alloc) logging.info(model) translator = BeamSearchTranslator(model=model, beam_size=args.beam_size, scorer=BeamSearchScorer(alpha=args.lp_alpha, K=args.lp_k), max_length=args.tgt_max_len + 100) logging.info('Use beam_size={}, alpha={}, K={}'.format(args.beam_size, args.lp_alpha, args.lp_k))
data_train = data_train.transform(lambda src, tgt: (src, tgt, len(src), len(tgt)), lazy=False) data_val = SimpleDataset([(ele[0], ele[1], len(ele[0]), len(ele[1]), i) for i, ele in enumerate(data_val)]) data_test = SimpleDataset([(ele[0], ele[1], len(ele[0]), len(ele[1]), i) for i, ele in enumerate(data_test)]) if args.gpu is None: ctx = mx.cpu() print('Use CPU') else: ctx = mx.gpu(args.gpu) encoder, decoder = get_gnmt_encoder_decoder(hidden_size=args.num_hidden, dropout=args.dropout, num_layers=args.num_layers, num_bi_layers=args.num_bi_layers) model = NMTModel(src_vocab=src_vocab, tgt_vocab=tgt_vocab, encoder=encoder, decoder=decoder, embed_size=args.num_hidden, prefix='gnmt_') model.initialize(init=mx.init.Uniform(0.1), ctx=ctx) model.hybridize() logging.info(model) translator = BeamSearchTranslator(model=model, beam_size=args.beam_size, scorer=BeamSearchScorer(alpha=args.lp_alpha, K=args.lp_k), max_length=args.tgt_max_len) logging.info('Use beam_size={}, alpha={}, K={}'.format(args.beam_size, args.lp_alpha, args.lp_k)) loss_function = SoftmaxCEMaskedLoss() loss_function.hybridize()
ctx = mx.cpu() print('Use CPU') else: ctx = mx.gpu(args.gpu) encoder, decoder = get_gnmt_encoder_decoder(hidden_size=args.num_hidden, dropout=args.dropout, num_encoder_layers=args.num_encoder_layers, num_decoder_layers=args.num_decoder_layers, num_bi_layers=args.num_bi_layers, input_halved_layers=args.input_halved_layers, attention_cell=args.attention) # model = NMTModel(src_vocab=src_vocab, tgt_vocab=tgt_vocab, encoder=encoder, decoder=decoder, # embed_size=args.num_hidden, prefix='gnmt_') model = NMTModel(src_vocab=None, tgt_vocab=train_data.dict(), encoder=encoder, decoder=decoder, embed_size=args.num_hidden, prefix='gnmt_') model.initialize(init=mx.init.Uniform(0.1), ctx=ctx) model.hybridize() logging.info(model) translator = BeamSearchTranslator(model=model, beam_size=args.beam_size, scorer=BeamSearchScorer(alpha=args.lp_alpha, K=args.lp_k), max_length=args.tgt_max_len) logging.info('Use beam_size={}, alpha={}, K={}'.format(args.beam_size, args.lp_alpha, args.lp_k)) loss_function = SoftmaxCEMaskedLoss() loss_function.hybridize()
else: src_max_len = max_len[0] if args.tgt_max_len > 0: tgt_max_len = args.tgt_max_len else: tgt_max_len = max_len[1] encoder, decoder = get_transformer_encoder_decoder(units=args.num_units, hidden_size=args.hidden_size, dropout=args.dropout, num_layers=args.num_layers, num_heads=args.num_heads, max_src_length=max(src_max_len, 500), max_tgt_length=max(tgt_max_len, 500), scaled=args.scaled) model = NMTModel(src_vocab=src_vocab, tgt_vocab=tgt_vocab, encoder=encoder, decoder=decoder, share_embed=True, embed_size=args.num_units, tie_weights=True, embed_initializer=None, prefix='transformer_') model.initialize(init=mx.init.Xavier(magnitude=args.magnitude), ctx=ctx) static_alloc = True #model.hybridize(static_alloc=static_alloc) logging.info(model) translator = BeamSearchTranslator(model=model, beam_size=args.beam_size, scorer=BeamSearchScorer(alpha=args.lp_alpha, K=args.lp_k), max_length=200) logging.info('Use beam_size={}, alpha={}, K={}'.format(args.beam_size, args.lp_alpha, args.lp_k)) label_smoothing = LabelSmoothing(epsilon=args.epsilon, units=len(tgt_vocab)) #label_smoothing.hybridize(static_alloc=static_alloc)