def translate(args, net, src_vocab, tgt_vocab, active_out=None): "done" sentences = [l.split() for l in args.text] translated = [] infer_dataset = ParallelDataset(args.text, args.ref_text, src_vocab, tgt_vocab) if args.batch_size is not None: infer_dataset.BATCH_SIZE = args.batch_size if args.max_batch_size is not None: infer_dataset.max_batch_size = args.max_batch_size if args.tokens_per_batch is not None: infer_dataset.tokens_per_batch = args.tokens_per_batch infer_dataiter = iter( infer_dataset.get_iterator(shuffle=True, group_by_size=True, include_indices=True)) for (raw_batch, indices) in infer_dataiter: src_mask = (raw_batch.src != src_vocab.stoi[config.PAD]).unsqueeze(-2) if args.use_cuda: src, src_mask = raw_batch.src.cuda(), src_mask.cuda() else: src = raw_batch.src generated, gen_len = greedy(args, net, src, src_mask, src_vocab, tgt_vocab) new_translations = gen_batch2str(src, raw_batch.tgt, generated, gen_len, src_vocab, tgt_vocab, indices, active_out) translated.extend(new_translations) return translated
def translate(args, net, src_vocab, tgt_vocab): "done" sentences = [l.split() for l in args.text] translated = [] infer_dataset = ParallelDataset(args.text, args.ref_text, src_vocab, tgt_vocab) if args.batch_size is not None: infer_dataset.BATCH_SIZE = args.batch_size if args.max_batch_size is not None: infer_dataset.max_batch_size = args.max_batch_size if args.tokens_per_batch is not None: infer_dataset.tokens_per_batch = args.tokens_per_batch infer_dataiter = iter(infer_dataset.get_iterator(True, True)) for raw_batch in infer_dataiter: src_mask = (raw_batch.src != src_vocab.stoi[config.PAD]).unsqueeze(-2) if args.use_cuda: src, src_mask = raw_batch.src.cuda(), src_mask.cuda() if args.greedy: generated, gen_len = greedy(args, net, src, src_mask, src_vocab, tgt_vocab) else: generated, gen_len = generate_beam(args, net, src, src_mask, src_vocab, tgt_vocab) new_translations = gen_batch2str(src, raw_batch.tgt, generated, gen_len, src_vocab, tgt_vocab) for res_sent in new_translations: print(res_sent) translated.extend(new_translations) return translated
def translate(args, net, src_vocab, tgt_vocab): "done" sentences = [l.split() for l in args.text] translated = [] if args.greedy: infer_dataset = ParallelDataset(args.text, args.ref_text, src_vocab, tgt_vocab) if args.batch_size is not None: infer_dataset.BATCH_SIZE = args.batch_size if args.max_batch_size is not None: infer_dataset.max_batch_size = args.max_batch_size if args.tokens_per_batch is not None: infer_dataset.tokens_per_batch = args.tokens_per_batch infer_dataiter = iter(infer_dataset.get_iterator(True, True)) num_sents = 0 for raw_batch in infer_dataiter: src_mask = (raw_batch.src != src_vocab.stoi[config.PAD]).unsqueeze(-2) if args.use_cuda: src, src_mask = raw_batch.src.cuda(), src_mask.cuda() generated, gen_len = greedy(args, net, src, src_mask, src_vocab, tgt_vocab) new_translations = gen_batch2str(src, raw_batch.tgt, generated, gen_len, src_vocab, tgt_vocab) print('src size : {}'.format(src.size())) ''' for res_sent in new_translations: print(res_sent) translated.extend(new_translations) ''' else: for i_s, sentence in enumerate(sentences): s_trans = translate_sentence(sentence, net, args, src_vocab, tgt_vocab) s_trans = remove_special_tok(remove_bpe(s_trans)) translated.append(s_trans) print(translated[-1]) return translated