def make_result(src_str, hypos, tgt_dict, nbest=6): result = Translation( src_str=src_str, hypos=[], attention=[], pos_scores=[], ) # Process top predictions for i, hypo in enumerate(hypos[:min(len(hypos), nbest + 1)], start=1): hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, align_dict=None, tgt_dict=tgt_dict, remove_bpe=None, ) result.hypos.append((hypo['score'], '{}'.format(hypo_str))) att_weights = torch.t(hypo['attention'])[0].tolist() result.attention.append(att_weights) result.pos_scores.append('P\t{}'.format(' '.join( map( lambda x: '{:.4f}'.format(x), hypo['positional_scores'].tolist(), )))) return result
def make_result(src_str, hypos, align_dict, tgt_dict, args): result = Translation( src_str="O\t{}".format(src_str), hypos=[], pos_scores=[], alignments=[], ) # Process top predictions for hypo in hypos[:min(len(hypos), args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo["tokens"].int().cpu(), src_str=src_str, alignment=hypo["alignment"].int().cpu() if hypo["alignment"] is not None else None, align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) # result.hypos.append('H\t{}\t{}'.format(hypo['score'], hypo_str)) # only get the traduction, not the score result.hypos.append(hypo_str) result.pos_scores.append("P\t{}".format(" ".join( map( lambda x: "{:.4f}".format(x), hypo["positional_scores"].tolist(), )))) result.alignments.append("A\t{}".format(" ".join( map(lambda x: str(utils.item(x)), alignment))) if args. print_alignment else None) return result
def make_result(src_str, hypos, align_dict, tgt_dict, args): result = Translation( src_str='O\t{}'.format(src_str), hypos=[], pos_scores=[], alignments=[], ) # Process top predictions for hypo in hypos[:min(len(hypos), args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) # result.hypos.append('H\t{}\t{}'.format(hypo['score'], hypo_str)) # only get the traduction, not the score result.hypos.append(hypo_str) result.pos_scores.append('P\t{}'.format( ' '.join(map( lambda x: '{:.4f}'.format(x), hypo['positional_scores'].tolist(), )) )) result.alignments.append( 'A\t{}'.format(' '.join(map(lambda x: str(utils.item(x)), alignment))) if args.print_alignment else None ) return result
def make_result(src_str, hypos): result = Translation( src_str=src_str, hypos=[], pos_scores=[], alignments=[], ) # Process top predictions for hypo in hypos[:min(len(hypos), args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) hypo_str = tokenizer.Tokenizer.detokenize(hypo_str, 'de').strip() result.hypos.append((hypo['score'], hypo_str)) result.pos_scores.append('P\t' + ' '.join( f'{x:.4f}' for x in hypo['positional_scores'].tolist())) result.alignments.append('A\t' + ' '.join( str(utils.item(x)) for x in alignment) if args.print_alignment else None) return result
def make_result(src_str, hypos): result = Translation( src_str='O\t{}'.format(src_str), hypos=[], pos_scores=[], alignments=[], ) # Process top predictions for hypo in hypos[:min(len(hypos), args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) result.hypos.append('H\t{}\t{}'.format(hypo['score'], hypo_str)) result.pos_scores.append('P\t{}'.format(' '.join( map( lambda x: '{:.4f}'.format(x), hypo['positional_scores'].tolist(), )))) result.alignments.append('A\t{}'.format(' '.join( map(lambda x: str(utils.item(x)), alignment))) if args. print_alignment else None) return result
def translation(args, task, max_positions, use_cuda, generator, models, src_dict, tgt_dict, align_dict, tokenizer, bpe, inputs): # if args.buffer_size > 1: # print('| Sentence buffer size:', args.buffer_size) # print('| Type the input sentence and press return:') def encode_fn(x): if tokenizer is not None: x = tokenizer.encode(x) if bpe is not None: x = bpe.encode(x) return x def decode_fn(x): if bpe is not None: x = bpe.decode(x) if tokenizer is not None: x = tokenizer.decode(x) return x start_id = 0 results = [] for batch in make_batches(inputs, args, task, max_positions, encode_fn): src_tokens = batch.src_tokens src_lengths = batch.src_lengths if use_cuda: src_tokens = src_tokens.cuda() src_lengths = src_lengths.cuda() sample = { 'net_input': { 'src_tokens': src_tokens, 'src_lengths': src_lengths, }, } translations = task.inference_step(generator, models, sample) for i, (id, hypos) in enumerate(zip(batch.ids.tolist(), translations)): src_tokens_i = utils.strip_pad(src_tokens[i], tgt_dict.pad()) results.append((start_id + id, src_tokens_i, hypos)) # sort output to match input order for id, src_tokens, hypos in sorted(results, key=lambda x: x[0]): if src_dict is not None: src_str = src_dict.string(src_tokens, args.remove_bpe) # print('S-{}\t{}'.format(id, src_str)) # Process top predictions for hypo in hypos[:min(len(hypos), args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'], align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) hypo_str = decode_fn(hypo_str) return hypo_str
def __call__(self, lines, attention=False): start_id = 0 results = [] args = self.args src_dict = self.task.source_dictionary tgt_dict = self.task.target_dictionary align_dict = utils.load_align_dict(args.replace_unk) for batch, idx in self._make_batches(lines): src_tokens = batch.src_tokens src_lengths = batch.src_lengths if self.use_cuda: src_tokens = src_tokens.cuda() src_lengths = src_lengths.cuda() sample = { 'net_input': { 'src_tokens': src_tokens, 'src_lengths': src_lengths, }, } translations = self.task.inference_step(self.generator, self.models, sample) for i, (id, hypos) in enumerate(zip(batch.ids.tolist(), translations)): src_tokens_i = utils.strip_pad(src_tokens[i], tgt_dict.pad()) results.append((start_id + id, src_tokens_i, hypos)) exports = [] for id, src_tokens, hypos in sorted(results, key=lambda x: x[0]): if src_dict is not None: src_str = src_dict.string(src_tokens, args.remove_bpe) # print('S-{}\t{}'.format(id, src_str)) # Process top predictions for hypo in hypos[:min(len(hypos), args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) export = { 'src': src_str, 'id': id, 'tgt': hypo_str, # 'attn' : translation['attention'].tolist(), } if not attention: export['attn'] = None exports.append(export) return exports
def main(): parser = options.get_parser('Generation') parser.add_argument('--path', metavar='FILE', required=True, action='append', help='path(s) to model file(s)') options.add_dataset_args(parser) options.add_generation_args(parser) args = parser.parse_args() print(args) use_cuda = torch.cuda.is_available() and not args.cpu # Load ensemble print('| loading model(s) from {}'.format(', '.join(args.path))) models, model_args = utils.load_ensemble_for_inference(args.path, data_dir=args.data) src_dict, dst_dict = models[0].src_dict, models[0].dst_dict print('| [{}] dictionary: {} types'.format(model_args.source_lang, len(src_dict))) print('| [{}] dictionary: {} types'.format(model_args.target_lang, len(dst_dict))) # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args.no_beamable_mm else args.beam) # Initialize generator translator = SequenceGenerator( models, beam_size=args.beam, stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized), len_penalty=args.lenpen, unk_penalty=args.unkpen) if use_cuda: translator.cuda() # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args.replace_unk) print('| Type the input sentence and press return:') for src_str in sys.stdin: src_str = src_str.strip() src_tokens = tokenizer.Tokenizer.tokenize(src_str, src_dict, add_if_not_exist=False).long() if use_cuda: src_tokens = src_tokens.cuda() translations = translator.generate(Variable(src_tokens.view(1, -1))) hypos = translations[0] print('O\t{}'.format(src_str)) # Process top predictions for hypo in hypos[:min(len(hypos), args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu(), align_dict=align_dict, dst_dict=dst_dict, remove_bpe=args.remove_bpe) print('H\t{}\t{}'.format(hypo['score'], hypo_str)) print('A\t{}'.format(' '.join(map(str, alignment))))
def main(args): print(args) use_cuda = torch.cuda.is_available() and not args.cpu # Load ensemble print('| loading model(s) from {}'.format(', '.join(args.path))) models, model_args = utils.load_ensemble_for_inference(args.path, data_dir=args.data) src_dict, dst_dict = models[0].src_dict, models[0].dst_dict print('| [{}] dictionary: {} types'.format(model_args.source_lang, len(src_dict))) print('| [{}] dictionary: {} types'.format(model_args.target_lang, len(dst_dict))) # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args.no_beamable_mm else args.beam, ) # Initialize generator translator = SequenceGenerator( models, beam_size=args.beam, stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized), len_penalty=args.lenpen, unk_penalty=args.unkpen) if use_cuda: translator.cuda() # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args.replace_unk) print('| Type the input sentence and press return:') for src_str in sys.stdin: src_str = src_str.strip() src_tokens = tokenizer.Tokenizer.tokenize(src_str, src_dict, add_if_not_exist=False).long() if use_cuda: src_tokens = src_tokens.cuda() src_lengths = src_tokens.new([src_tokens.numel()]) translations = translator.generate( Variable(src_tokens.view(1, -1)), Variable(src_lengths.view(-1)), ) hypos = translations[0] print('O\t{}'.format(src_str)) # Process top predictions for hypo in hypos[:min(len(hypos), args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu(), align_dict=align_dict, dst_dict=dst_dict, remove_bpe=args.remove_bpe, ) print('H\t{}\t{}'.format(hypo['score'], hypo_str)) print('A\t{}'.format(' '.join(map(str, alignment))))
def generate(self, src_str, verbose=False): def preprocess(s): for transform in self.in_transforms: s = transform(s) return s def postprocess(s): for transform in self.out_transforms: s = transform(s) return s src_str = preprocess(src_str) for batch in self.make_batches([src_str], self.args, self.task, self.max_positions): src_tokens = batch.src_tokens src_lengths = batch.src_lengths if self.use_cuda: src_tokens = src_tokens.cuda() src_lengths = src_lengths.cuda() sample = { 'net_input': { 'src_tokens': src_tokens, 'src_lengths': src_lengths, }, } translations = self.task.inference_step(self.generator, self.models, sample) src_tokens = utils.strip_pad(src_tokens, self.tgt_dict.pad()) if self.src_dict is not None: src_str = self.src_dict.string(src_tokens) src_str = postprocess(src_str) if verbose: print('S\t{}'.format(src_str)) # Process top predictions for hypo in translations[0][:min(len(translations), self.args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, align_dict=self.align_dict, tgt_dict=self.tgt_dict, ) hypo_str = postprocess(hypo_str) if verbose: print('H\t{}\t{}'.format(hypo['score'], hypo_str)) print('P\t{}'.format( ' '.join(map(lambda x: '{:.4f}'.format(x), hypo['positional_scores'].tolist())) )) if self.args.print_alignment: print('A\t{}'.format( ' '.join(map(lambda x: str(utils.item(x)), alignment)) )) return html.unescape(hypo_str)
def decode(self, inputs: List[str]) -> List[Tuple[float, str]]: """ Args: inputs: List of sequences as input to the model Returns: responses: The list of (score, response) which is created by the model """ results = [] for batch in self.create_batches(inputs): src_tokens = batch.src_tokens src_lengths = batch.src_lengths if self.args.use_cuda: src_tokens = src_tokens.cuda() src_lengths = src_lengths.cuda() sample = { 'net_input': { 'src_tokens': src_tokens, 'src_lengths': src_lengths } } translations = self.task.inference_step(self.generator, self.models, sample) for i, (id, hypos) in enumerate(zip(batch.ids.tolist(), translations)): src_tokens_i = utils.strip_pad( src_tokens[i], self.task.target_dictionary.pad()) results.append((id, src_tokens_i, hypos)) # sort output to match input order responses = [] for id, src_tokens, hypos in sorted(results, key=lambda x: x[0]): if self.task.source_dictionary is not None: src_str = self.task.source_dictionary.string( src_tokens, self.args.remove_bpe) # Process top predictions for hypo in hypos[:min(len(hypos), self.args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'], align_dict=self.align_dict, tgt_dict=self.task.target_dictionary, remove_bpe=self.args.remove_bpe, extra_symbols_to_ignore=get_symbols_to_strip_from_output( self.generator), ) if self.bpe is not None: hypo_str = self.bpe.decode(hypo_str) detok_hypo_str = self.tokenizer.decode(hypo_str) score = hypo['score'] / math.log(2) # convert to base 2 responses.append((float(score), detok_hypo_str)) return responses
def model_predict(inputs, models, generator, align_dict, max_positions, args, use_cuda, task, src_dict, tgt_dict): results = [] start_id = 0 for batch in make_batches(inputs, args, task, max_positions): src_tokens = batch.src_tokens src_lengths = batch.src_lengths if use_cuda: src_tokens = src_tokens.cuda() src_lengths = src_lengths.cuda() sample = { 'net_input': { 'src_tokens': src_tokens, 'src_lengths': src_lengths, }, } translations = task.inference_step(generator, models, sample) for i, (id, hypos) in enumerate(zip(batch.ids.tolist(), translations)): src_tokens_i = utils.strip_pad(src_tokens[i], tgt_dict.pad()) results.append((start_id + id, src_tokens_i, hypos)) # sort output to match input order outputs = [] ori_scores = [] for id, src_tokens, hypos in sorted(results, key=lambda x: x[0]): if src_dict is not None: src_str = src_dict.string(src_tokens, args.remove_bpe) #print('S-{}\t{}'.format(id, src_str)) # Process top predictions for hypo in hypos[:min(len(hypos), args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) outputs.append(hypo_str.replace('@@ ', '')) ori_scores.append(hypo['score']) # print('H-{}\t{}\t{}'.format(id, hypo['score'], hypo_str)) # print('P-{}\t{}'.format( # id, # ' '.join(map(lambda x: '{:.4f}'.format(x), hypo['positional_scores'].tolist())) # )) # if args.print_alignment: # print('A-{}\t{}'.format( # id, # ' '.join(map(lambda x: str(utils.item(x)), alignment)) # )) return outputs, ori_scores
def generate(self, src_str, verbose=False): def preprocess(s): if self.tokenizer is not None: s = self.tokenizer.encode(s) if self.bpe is not None: s = self.bpe.encode(s) return s def postprocess(s): if self.bpe is not None: s = self.bpe.decode(s) if self.tokenizer is not None: s = self.tokenizer.decode(s) return s src_str = preprocess(src_str) tokens = self.src_dict.encode_line(src_str, add_if_not_exist=False).long() if verbose: src_str_with_unk = self.src_dict.string(tokens) print('S\t{}'.format(src_str_with_unk)) dataset = self.task.build_dataset_for_inference([tokens], [tokens.numel()]) sample = dataset.collater([dataset[0]]) if self.use_cuda: sample = utils.move_to_cuda(sample) translations = self.task.inference_step(self.generator, self.models, sample) # Process top predictions for hypo in translations[ 0][:min(len(translations), getattr(self.args, 'nbest', 1))]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, align_dict=self.align_dict, tgt_dict=self.tgt_dict, ) hypo_str = postprocess(hypo_str) if verbose: print('H\t{}\t{}'.format(hypo['score'], hypo_str)) print('P\t{}'.format(' '.join( map(lambda x: '{:.4f}'.format(x), hypo['positional_scores'].tolist())))) if getattr(self.args, 'print_alignment', False): print('A\t{}'.format(' '.join( map(lambda x: str(utils.item(x)), alignment)))) return hypo_str
def generate(self, string, out_lst): start_id = 0 inputs = [string] results = [] for batch in make_batches(inputs, self.args, self.task, self.max_positions, self.encode_fn): src_tokens = batch.src_tokens src_lengths = batch.src_lengths if self.use_cuda: src_tokens = src_tokens.cuda() src_lengths = src_lengths.cuda() sample = { 'net_input': { 'src_tokens': src_tokens, 'src_lengths': src_lengths, }, } translations = self.task.inference_step(self.generator, self.models, sample) for i, (id, hypos) in enumerate(zip(batch.ids.tolist(), translations)): src_tokens_i = utils.strip_pad(src_tokens[i], self.tgt_dict.pad()) results.append((start_id + id, hypos)) #print(results[0][1]) for id, hypos in sorted(results, key=lambda x: x[0]): if self.src_dict is not None: src_str = self.src_dict.string(src_tokens, self.args.remove_bpe) for hypo in hypos[:min(len(hypos), self.args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, align_dict=self.align_dict, tgt_dict=self.tgt_dict, remove_bpe=self.args.remove_bpe, ) if self.decoder is not None: #print('decoder is not none...') hypo_str = self.decoder.decode( map(int, hypo_str.strip().split())) #print(hypo_tokens) hypo_str = hypo_str.replace(' ', '3spaces').replace( ' ', '').replace('3spaces', ' ') print(hypo_str) out_lst.append(hypo_str + '\n')
def make_results(self, hypos, args): results = [] tgt_dict = self.task.target_dictionary # Process top predictions for hypo in hypos[:min(len(hypos), args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=None, alignment=None, align_dict=None, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) results.append(hypo_str.split()) return results
def gen(self, ipt): start_id = 0 results = [] inputs = [ipt] for batch in make_batches(inputs, self.gen_utils["args"], self.gen_utils["task"], self.gen_utils["max_positions"], self.gen_utils["encode_fn"]): src_tokens = batch.src_tokens src_lengths = batch.src_lengths if self.gen_utils["use_cuda"]: src_tokens = src_tokens.cuda() src_lengths = src_lengths.cuda() sample = { 'net_input': { 'src_tokens': src_tokens, 'src_lengths': src_lengths, }, } translations = self.gen_utils["task"].inference_step( self.gen_utils["generator"], self.gen_utils["models"], sample) for i, (idx, hypos) in enumerate(zip(batch.ids.tolist(), translations)): src_tokens_i = utils.strip_pad( src_tokens[i], self.gen_utils["tgt_dict"].pad()) results.append((start_id + idx, src_tokens_i, hypos)) outputs = [] # sort output to match input order for id, src_tokens, hypos in sorted(results, key=lambda x: x[0]): src_str = self.gen_utils["src_dict"].string( src_tokens, self.gen_utils["args"].remove_bpe) print('S-{}\t{}'.format(id, src_str)) # Process top predictions for hypo in hypos[:min(len(hypos), self.gen_utils["args"].nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'], align_dict=self.gen_utils["align_dict"], tgt_dict=self.gen_utils["tgt_dict"], remove_bpe=self.gen_utils["args"].remove_bpe, ) hypo_str = self.gen_utils["decode_fn"](hypo_str) outputs.append(hypo_str) # update running id counter start_id += len(inputs) return outputs
def make_result(src_str, hypos): hypo_strs = [] # Process top predictions for hypo in hypos[:min(len(hypos), args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) hypo_strs.append(hypo_str) return hypo_strs
def idx2word(hypo_tokens, tgt_dict): """ Args: typo_tokens:input of index2word tgt_dict:the dict of input Return: str: the de-index sentence """ align_dict = utils.load_align_dict(None) al = torch.tensor([]) hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo_tokens.int().cpu(), src_str='src_str', alignment=al, align_dict=align_dict, tgt_dict=tgt_dict) return hypo_str
def predict(inputs): results = [] start_id = 0 for batch in make_batches(inputs, args, task, max_positions, encode_fn): src_tokens = batch.src_tokens src_lengths = batch.src_lengths if use_cuda: src_tokens = src_tokens.cuda() src_lengths = src_lengths.cuda() sample = { 'net_input': { 'src_tokens': src_tokens, 'src_lengths': src_lengths, }, } translations = task.inference_step(generator, models, sample) for i, (id, hypos) in enumerate(zip(batch.ids.tolist(), translations)): src_tokens_i = utils.strip_pad(src_tokens[i], tgt_dict.pad()) results.append((start_id + id, src_tokens_i, hypos)) ret_hypos = [] for id, src_tokens, hypos in sorted(results, key=lambda x: x[0]): if src_dict is not None: src_str = src_dict.string(src_tokens, args.remove_bpe) # print('S-{}\t{}'.format(id, src_str)) # Process top predictions tmp_ret_hypo = [] for hypo in hypos[:min(len(hypos), args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'], align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) hypo_str = decode_fn(hypo_str) tmp_ret_hypo.append([hypo['score'], hypo_str]) ret_hypos.append(tmp_ret_hypo) return ret_hypos
def make_result(src_str, hypos): result = Translation( src_str='O\t{}'.format(src_str), hypos=[], alignments=[], ) # Process top predictions for hypo in hypos[:min(len(hypos), self.args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu(), align_dict=selfalign_dict, tgt_dict=self.tgt_dict, remove_bpe=self.args.remove_bpe, ) result.hypos.append('H\t{}\t{}'.format(hypo['score'], hypo_str)) result.alignments.append('A\t{}'.format(' '.join(map(lambda x: str(utils.item(x)), alignment)))) return result
def make_result(src_str, hypos): result = self.Translation( # create a result Tupple (named tupple result) src_str='O\t{}'.format(src_str), hypos=[], pos_scores=[], alignments=[], ) #print(f"Hypos at beginning of make_result: {hypos}") # Process top predictions # iterates through top predictions? for hypo in hypos[:min(len(hypos), self.args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( # post processes the prediction # pass tokens of current prediction and convert to int and then make it for cpu compatible? hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, # gives the input string alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, # checks if alignmemt is needed align_dict= align_dict, # passes dict with words for alignment # gives the target dictionary (decode one???) tgt_dict=tgt_dict, remove_bpe=self.args.remove_bpe, # bool idk what for ) # use the hypons list of the result to save formatted post_process_prediction score #print(f"Hypo tokens and string in make result after post process prediction:{hypo_tokens}, {hypo_str} ") result.hypos.append('H\t{}\t{}'.format(hypo['score'], hypo_str)) result.pos_scores.append( 'P\t{}'. format( # does the same for the positional score in list format ' '.join( map( lambda x: '{:.4f}'.format(x), hypo['positional_scores'].tolist(), )))) result.alignments.append( # saves the formatted stuff in the alignements section of the result, if print_alignment is not false 'A\t{}'.format(' '.join( map(lambda x: str(utils.item(x)), alignment))) if self.args.print_alignment else None) return result # returns result
def get_text(cfg, generator, model, sample, bpe): decoder_output = task.inference_step(generator, model, sample, prefix_tokens=None, constraints=None) decoder_output = decoder_output[0][0] #top1 hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=decoder_output["tokens"].int().cpu(), src_str="", alignment=decoder_output["alignment"], align_dict=None, tgt_dict=model[0].decoder.dictionary, remove_bpe=cfg.common_eval.post_process, extra_symbols_to_ignore=generate.get_symbols_to_strip_from_output( generator), ) detok_hypo_str = bpe.decode(hypo_str) return detok_hypo_str
def processFlatHypo(sample_id, src_tokens, target_tokens, hypos, src_str, align_dict, tgt_dict, remove_bpe, has_target, target_str): """Not used""" for i, hypo in enumerate(hypos[:min(len(hypos), args.nbest)]): hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu(), align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=remove_bpe, ) if not args.quiet: print('H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str)) print('P-{}\t{}'.format( sample_id, ' '.join( map( lambda x: '{:.4f}'.format(x), hypo['positional_scores'].tolist(), )))) print('A-{}\t{}'.format( sample_id, ' '.join(map(lambda x: str(utils.item(x)), alignment)))) # Score only the top hypothesis if has_target and i == 0: if align_dict is not None or args.remove_bpe is not None: # Convert back to tokens for evaluation with unk replacement and/or without BPE target_tokens = tokenizer.Tokenizer.tokenize( target_str, tgt_dict, add_if_not_exist=True) # write files for ROUGE with open(os.path.join(args.decode_dir, "{}.dec".format(sample_id)), 'w') as f: f.write(make_html_safe(hypo_str)) f.close()
def main(cfg: FairseqConfig): if isinstance(cfg, Namespace): cfg = convert_namespace_to_omegaconf(cfg) start_time = time.time() total_translate_time = 0 utils.import_user_module(cfg.common) if cfg.interactive.buffer_size < 1: cfg.interactive.buffer_size = 1 if cfg.dataset.max_tokens is None and cfg.dataset.batch_size is None: cfg.dataset.batch_size = 1 assert (not cfg.generation.sampling or cfg.generation.nbest == cfg.generation.beam ), "--sampling requires --nbest to be equal to --beam" assert (not cfg.dataset.batch_size or cfg.dataset.batch_size <= cfg.interactive.buffer_size ), "--batch-size cannot be larger than --buffer-size" logger.info(cfg) # Fix seed for stochastic decoding if cfg.common.seed is not None and not cfg.generation.no_seed_provided: np.random.seed(cfg.common.seed) utils.set_torch_seed(cfg.common.seed) use_cuda = torch.cuda.is_available() and not cfg.common.cpu # Setup task, e.g., translation task = tasks.setup_task(cfg.task) # Load ensemble overrides = ast.literal_eval(cfg.common_eval.model_overrides) logger.info("loading model(s) from {}".format(cfg.common_eval.path)) models, _model_args = checkpoint_utils.load_model_ensemble( utils.split_paths(cfg.common_eval.path), arg_overrides=overrides, task=task, suffix=cfg.checkpoint.checkpoint_suffix, strict=(cfg.checkpoint.checkpoint_shard_count == 1), num_shards=cfg.checkpoint.checkpoint_shard_count, ) # Set dictionaries src_dict = task.source_dictionary tgt_dict = task.target_dictionary # Optimize ensemble for generation for model in models: if model is None: continue if cfg.common.fp16: model.half() if use_cuda and not cfg.distributed_training.pipeline_model_parallel: model.cuda() model.prepare_for_inference_(cfg) # Initialize generator generator = task.build_generator(models, cfg.generation) # Handle tokenization and BPE tokenizer = encoders.build_tokenizer(cfg.tokenizer) bpe = encoders.build_bpe(cfg.bpe) def encode_fn(x): if tokenizer is not None: x = tokenizer.encode(x) if bpe is not None: x = bpe.encode(x) return x def decode_fn(x): if bpe is not None: x = bpe.decode(x) if tokenizer is not None: x = tokenizer.decode(x) return x # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(cfg.generation.replace_unk) max_positions = utils.resolve_max_positions( task.max_positions(), *[model.max_positions() for model in models]) if cfg.generation.constraints: logger.warning( "NOTE: Constrained decoding currently assumes a shared subword vocabulary." ) if cfg.interactive.buffer_size > 1: logger.info("Sentence buffer size: %s", cfg.interactive.buffer_size) logger.info("NOTE: hypothesis and token scores are output in base 2") logger.info("Type the input sentence and press return:") start_id = 0 for inputs in buffered_read(cfg.interactive.input, cfg.interactive.buffer_size): results = [] for batch in make_batches(inputs, cfg, task, max_positions, encode_fn): bsz = batch.src_tokens.size(0) src_tokens = batch.src_tokens src_lengths = batch.src_lengths constraints = batch.constraints if use_cuda: src_tokens = src_tokens.cuda() src_lengths = src_lengths.cuda() if constraints is not None: constraints = constraints.cuda() sample = { "net_input": { "src_tokens": src_tokens, "src_lengths": src_lengths, }, } translate_start_time = time.time() translations = task.inference_step(generator, models, sample, constraints=constraints) translate_time = time.time() - translate_start_time total_translate_time += translate_time list_constraints = [[] for _ in range(bsz)] if cfg.generation.constraints: list_constraints = [unpack_constraints(c) for c in constraints] for i, (id, hypos) in enumerate(zip(batch.ids.tolist(), translations)): src_tokens_i = utils.strip_pad(src_tokens[i], tgt_dict.pad()) constraints = list_constraints[i] results.append(( start_id + id, src_tokens_i, hypos, { "constraints": constraints, "time": translate_time / len(translations), }, )) # sort output to match input order for id_, src_tokens, hypos, info in sorted(results, key=lambda x: x[0]): if src_dict is not None: src_str = src_dict.string(src_tokens, cfg.common_eval.post_process) print("S-{}\t{}".format(id_, src_str)) print("W-{}\t{:.3f}\tseconds".format(id_, info["time"])) for constraint in info["constraints"]: print("C-{}\t{}".format( id_, tgt_dict.string(constraint, cfg.common_eval.post_process))) # Process top predictions for hypo in hypos[:min(len(hypos), cfg.generation.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo["tokens"].int().cpu(), src_str=src_str, alignment=hypo["alignment"], align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=cfg.common_eval.post_process, extra_symbols_to_ignore=get_symbols_to_strip_from_output( generator), ) detok_hypo_str = decode_fn(hypo_str) score = hypo["score"] / math.log(2) # convert to base 2 # original hypothesis (after tokenization and BPE) print("H-{}\t{}\t{}".format(id_, score, hypo_str)) # detokenized hypothesis print("D-{}\t{}\t{}".format(id_, score, detok_hypo_str)) print("P-{}\t{}".format( id_, " ".join( map( lambda x: "{:.4f}".format(x), # convert from base e to base 2 hypo["positional_scores"].div_(math.log(2) ).tolist(), )), )) if cfg.generation.print_alignment: alignment_str = " ".join( ["{}-{}".format(src, tgt) for src, tgt in alignment]) print("A-{}\t{}".format(id_, alignment_str)) # update running id_ counter start_id += len(inputs) logger.info("Total time: {:.3f} seconds; translation time: {:.3f}".format( time.time() - start_time, total_translate_time))
def _iter_first_best_bilingual(args, task, dataset, translations, align_dict): """Iterate over first best translations. This is a generator function which yields information about the first best translations in `translations`. It also prints the n-best translations to stdout. Args: args: Command-line arguments. task: FairseqTask object. dataset: Dataset set object for a specific split. translations: Batched translation iterator, as returned by SequenceGenerator.generate_batched_itr(). align_dict: Dictionary for UNK replacement. Yields: For each sentence in `translations`, yields a TranslationInfo. """ for sample_id, src_tokens, target_tokens, hypos in translations: # Process input and ground truth target_tokens = target_tokens.int().cpu() # Either retrieve the original sentences or regenerate them from tokens. if align_dict is not None: src_str = dataset.src.get_original_text(sample_id) target_str = dataset.tgt.get_original_text(sample_id) else: src_str = task.source_dictionary.string(src_tokens, args.remove_bpe) target_str = task.target_dictionary.string( target_tokens, args.remove_bpe, escape_unk=True ) if not args.quiet: print(f"S-{sample_id}\t{src_str}") print(f"T-{sample_id}\t{target_str}") # Process top predictions for i, hypo in enumerate(hypos[: min(len(hypos), args.nbest)]): hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo["tokens"].int().cpu(), src_str=src_str, alignment=hypo["alignment"].int().cpu(), align_dict=align_dict, tgt_dict=task.target_dictionary, remove_bpe=args.remove_bpe, ) if not args.quiet: print(f"H-{sample_id}\t{hypo['score']}\t{hypo_str}") print( "A-{}\t{}".format( sample_id, " ".join(map(lambda x: str(utils.item(x)), alignment)), ) ) if i == 0: if align_dict is not None or args.remove_bpe is not None: # Convert back to tokens for evaluation with unk replacement # and/or without BPE target_tokens = tokenizer.Tokenizer.tokenize( target_str, task.target_dictionary, add_if_not_exist=True ) # The probs score for the hypo_str; whether it's normalized by # sequence length or not depends on normalize_scores, which is # set by arg.nonormalize. # However, as I tried, whether normalize_scores is set or not, # the returned scores are the same (to be investigated). # Here, the probs are normalized by hypo length so the value # is big enough to be used as weights in hypo_score = ( hypo["score"] / len(hypo_tokens) if len(hypo_tokens) > 0 else 0.0 ) yield TranslationInfo( sample_id=sample_id, src_tokens=src_tokens, target_tokens=target_tokens, hypo_tokens=hypo_tokens, src_str=src_str, target_str=target_str, hypo_str=hypo_str, hypo_score=hypo_score, )
def generate(self, sentence): if self.reverse: sentence = sentence[::-1] start_id = 0 src_strs = [] results = [] res = [] for batch in make_batches([sentence], self.args, self.task, self.max_positions): src_tokens = batch.src_tokens src_lengths = batch.src_lengths src_strs.extend(batch.src_strs) sample = { 'net_input': { 'src_tokens': src_tokens, 'src_lengths': src_lengths, }, } sample = utils.move_to_cuda(sample) if self.use_cuda else sample translations = self.task.inference_step(self.generator, self.models, sample) for i, (id, hypos) in enumerate(zip(batch.ids.tolist(), translations)): src_tokens_i = utils.strip_pad(src_tokens[i], self.tgt_dict.pad()) results.append((start_id + id, src_tokens_i, hypos)) for id, src_tokens, hypos in sorted(results, key=lambda x: x[0]): src_str = self.src_dict.string(src_tokens, self.args.remove_bpe) d = { 'id': id, 'src_str': src_str, 'src_raw': src_str.replace(' ', ''), 'hypos': [] } for hypo in hypos[:min(len(hypos), self.args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_strs[id], alignment=hypo['alignment'].int().cpu( ) if hypo['alignment'] is not None else None, align_dict=self.align_dict, tgt_dict=self.tgt_dict, remove_bpe=self.args.remove_bpe, ) positional_scores = [round(score, 4) for score in hypo['positional_scores'].tolist()] alignment = list(map(lambda x: str(utils.item(x)), alignment)) d['hypos'].append({ 'hypo_str': hypo_str, 'hypo_raw': hypo_str.replace(' ', ''), 'score': hypo['score'], # 'positional_scores': positional_scores, # 'alignment': alignment if self.args.print_alignment else None, }) # reranking with language model if self.lm: d = self.rerank_lm(d) if self.reverse: d = self.reverse_result(d) d = self.add_best_hypo(d) if self.print_hypos: pprint(d) res.append(d) return res
def main(args): assert args.path is not None, '--path required for generation!' assert not args.sampling or args.nbest == args.beam, \ '--sampling requires --nbest to be equal to --beam' assert args.replace_unk is None or args.raw_text, \ '--replace-unk requires a raw text dataset (--raw-text)' import_user_module(args) if args.max_tokens is None and args.max_sentences is None: args.max_tokens = 12000 print(args) tgt_file = None hypo_file = None if args.output_dir: os.makedirs(args.output_dir, exist_ok=True) tgt_fn = os.path.join(args.output_dir, 'gold') hypo_fn = os.path.join(args.output_dir, 'candidate') tgt_file = open(tgt_fn, 'w', encoding='utf-8') hypo_file = open(hypo_fn, 'w', encoding='utf-8') use_cuda = torch.cuda.is_available() and not args.cpu # Load dataset splits task = tasks.setup_task(args) task.load_dataset(args.gen_subset) print('| {} {} {} examples'.format(args.data, args.gen_subset, len(task.dataset(args.gen_subset)))) # Set dictionaries try: src_dict = getattr(task, 'source_dictionary', None) except NotImplementedError: src_dict = None tgt_dict = task.target_dictionary # Load ensemble print('| loading model(s) from {}'.format(args.path)) models, _model_args = utils.load_ensemble_for_inference( args.path.split(':'), task, model_arg_overrides=eval(args.model_overrides), ) # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args.no_beamable_mm else args.beam, need_attn=args.print_alignment, ) if args.fp16: model.half() if use_cuda: model.cuda() # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args.replace_unk) # Load dataset (possibly sharded) itr = task.get_batch_iterator( dataset=task.dataset(args.gen_subset), max_tokens=args.max_tokens, max_sentences=args.max_sentences, max_positions=utils.resolve_max_positions( task.max_positions(), *[model.max_positions() for model in models]), ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, required_batch_size_multiple=args.required_batch_size_multiple, num_shards=args.num_shards, shard_id=args.shard_id, num_workers=args.num_workers, ).next_epoch_itr(shuffle=False) # Initialize generator gen_timer = StopwatchMeter() generator = task.build_generator(args) # Generate and compute BLEU score if args.sacrebleu: scorer = bleu.SacrebleuScorer() else: scorer = bleu.Scorer(tgt_dict.pad(), tgt_dict.eos(), tgt_dict.unk()) num_sentences = 0 has_target = True with progress_bar.build_progress_bar(args, itr) as t: wps_meter = TimeMeter() for sample in t: sample = utils.move_to_cuda(sample) if use_cuda else sample if 'net_input' not in sample: continue prefix_tokens = None if args.prefix_size > 0: prefix_tokens = sample['target'][:, :args.prefix_size] gen_timer.start() hypos = task.inference_step(generator, models, sample, prefix_tokens) num_generated_tokens = sum(len(h[0]['tokens']) for h in hypos) gen_timer.stop(num_generated_tokens) for i, sample_id in enumerate(sample['id'].tolist()): has_target = sample['target'] is not None # Remove padding src_tokens = utils.strip_pad( sample['net_input']['src_tokens'][i, :], tgt_dict.pad()) target_tokens = None if has_target: target_tokens = utils.strip_pad( sample['target'][i, :], tgt_dict.pad()).int().cpu() # Either retrieve the original sentences or regenerate them from tokens. if align_dict is not None: src_str = task.dataset( args.gen_subset).src.get_original_text(sample_id) target_str = task.dataset( args.gen_subset).tgt.get_original_text(sample_id) else: if src_dict is not None: src_str = src_dict.string(src_tokens, args.remove_bpe) else: src_str = "" if has_target: target_str = tgt_dict.string(target_tokens, args.remove_bpe, escape_unk=True) if not args.quiet: if src_dict is not None: print('S-{}\t{}'.format( sample_id, src_str.encode(encoding='utf-8'))) if has_target: print('T-{}\t{}'.format( sample_id, target_str.encode(encoding='utf-8'))) # Process top predictions for j, hypo in enumerate( hypos[i][:min(len(hypos[i]), args.nbest)]): hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) if not args.quiet: print('H-{}\t{}\t{}'.format( sample_id, hypo['score'], hypo_str.encode(encoding='utf-8'))) print('P-{}\t{}'.format( sample_id, ' '.join( map( lambda x: '{:.4f}'.format(x), hypo['positional_scores'].tolist(), )))) if args.print_alignment: print('A-{}\t{}'.format( sample_id, ' '.join( map(lambda x: str(utils.item(x)), alignment)))) # Score only the top hypothesis if has_target and j == 0: if align_dict is not None or args.remove_bpe is not None: # Convert back to tokens for evaluation with unk replacement and/or without BPE target_tokens = tgt_dict.encode_line( target_str, add_if_not_exist=True) if hasattr(scorer, 'add_string'): scorer.add_string(target_str, hypo_str) else: scorer.add(target_tokens, hypo_tokens) if args.output_dir: tgt_file.writelines(target_str + '\n') hypo_file.writelines(hypo_str + '\n') wps_meter.update(num_generated_tokens) t.log({'wps': round(wps_meter.avg)}) num_sentences += sample['nsentences'] tgt_file.close() hypo_file.close() print( '| Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} sentences/s, {:.2f} tokens/s)' .format(num_sentences, gen_timer.n, gen_timer.sum, num_sentences / gen_timer.sum, 1. / gen_timer.avg)) if has_target: print('| Generate {} with beam={}: {}'.format(args.gen_subset, args.beam, scorer.result_string())) return scorer
def _iter_first_best_multilingual(args, task, dataset, translations, align_dict): """Like _iter_first_best_bilingual but for multilingual NMT.""" src_dicts = task.source_dictionaries target_dicts = task.target_dictionaries for sample_id, src_tokens, target_tokens, hypos in translations: # Process input and ground truth target_tokens = target_tokens.int().cpu() src_lang_id = ( src_tokens[-1] - pytorch_translate_data.MULTILING_DIALECT_ID_OFFSET ) target_lang_id = ( target_tokens[0] - pytorch_translate_data.MULTILING_DIALECT_ID_OFFSET ) src_tokens = src_tokens[:-1] target_tokens = target_tokens[1:] # Select dictionaries src_dict = src_dicts[task.get_encoder_lang_code(src_lang_id)] target_dict = target_dicts[task.get_decoder_lang_code(target_lang_id)] # Either retrieve the original sentences or regenerate them from tokens. if align_dict is not None: src_str = dataset.src.get_original_text(sample_id) target_str = dataset.tgt.get_original_text(sample_id) else: src_str = src_dict.string(src_tokens, args.remove_bpe) target_str = target_dict.string( target_tokens, args.remove_bpe, escape_unk=True ) if not args.quiet: print(f"S-{sample_id}\tsrc_lang={src_lang_id}\t{src_str}") print(f"T-{sample_id}\ttrg_lang={target_lang_id}\t{target_str}") # Process top predictions for i, hypo in enumerate(hypos[: min(len(hypos), args.nbest)]): hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo["tokens"].int().cpu()[1:], src_str=src_str, alignment=hypo["alignment"].int().cpu()[1:], align_dict=align_dict, tgt_dict=target_dict, remove_bpe=args.remove_bpe, ) if not args.quiet: print(f"H-{sample_id}\t{hypo['score']}\t{hypo_str}") print( "A-{}\t{}".format( sample_id, " ".join(map(lambda x: str(utils.item(x)), alignment)), ) ) # Score only the top hypothesis if i == 0: if align_dict is not None or args.remove_bpe is not None: # Convert back to tokens for evaluation with unk replacement # and/or without BPE target_tokens = tokenizer.Tokenizer.tokenize( target_str, target_dict, add_if_not_exist=True ) hypo_score = hypo["score"] / len(hypo_tokens) yield TranslationInfo( sample_id=sample_id, src_tokens=src_tokens, target_tokens=target_tokens, hypo_tokens=hypo_tokens, src_str=src_str, target_str=target_str, hypo_str=hypo_str, hypo_score=hypo_score, )
def _main(cfg: DictConfig, output_file): logging.basicConfig( format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=os.environ.get("LOGLEVEL", "INFO").upper(), stream=output_file, ) logger = logging.getLogger("fairseq_cli.generate") utils.import_user_module(cfg.common) if cfg.dataset.max_tokens is None and cfg.dataset.batch_size is None: cfg.dataset.max_tokens = 12000 logger.info(cfg) # Fix seed for stochastic decoding if cfg.common.seed is not None and not cfg.generation.no_seed_provided: np.random.seed(cfg.common.seed) utils.set_torch_seed(cfg.common.seed) use_cuda = torch.cuda.is_available() and not cfg.common.cpu # Load dataset splits task = tasks.setup_task(cfg.task) # Set dictionaries try: src_dict = getattr(task, "source_dictionary", None) except NotImplementedError: src_dict = None tgt_dict = task.target_dictionary overrides = ast.literal_eval(cfg.common_eval.model_overrides) # Load ensemble logger.info("loading model(s) from {}".format(cfg.common_eval.path)) models, saved_cfg = checkpoint_utils.load_model_ensemble( utils.split_paths(cfg.common_eval.path), arg_overrides=overrides, task=task, suffix=cfg.checkpoint.checkpoint_suffix, strict=(cfg.checkpoint.checkpoint_shard_count == 1), num_shards=cfg.checkpoint.checkpoint_shard_count, ) # loading the dataset should happen after the checkpoint has been loaded so we can give it the saved task config task.load_dataset(cfg.dataset.gen_subset, task_cfg=saved_cfg.task) if cfg.generation.lm_path is not None: overrides["data"] = cfg.task.data try: lms, _ = checkpoint_utils.load_model_ensemble( [cfg.generation.lm_path], arg_overrides=overrides, task=None) except: logger.warning( f"Failed to load language model! Please make sure that the language model dict is the same " f"as target dict and is located in the data dir ({cfg.task.data})" ) raise assert len(lms) == 1 else: lms = [None] # Optimize ensemble for generation for model in chain(models, lms): if model is None: continue if cfg.common.fp16: model.half() if use_cuda and not cfg.distributed_training.pipeline_model_parallel: model.cuda() model.prepare_for_inference_(cfg) # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(cfg.generation.replace_unk) # Load dataset (possibly sharded) itr = task.get_batch_iterator( dataset=task.dataset(cfg.dataset.gen_subset), max_tokens=cfg.dataset.max_tokens, max_sentences=cfg.dataset.batch_size, max_positions=utils.resolve_max_positions( task.max_positions(), *[m.max_positions() for m in models]), ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test, required_batch_size_multiple=cfg.dataset.required_batch_size_multiple, seed=cfg.common.seed, num_shards=cfg.distributed_training.distributed_world_size, shard_id=cfg.distributed_training.distributed_rank, num_workers=cfg.dataset.num_workers, data_buffer_size=cfg.dataset.data_buffer_size, ).next_epoch_itr(shuffle=False) progress = progress_bar.progress_bar( itr, log_format=cfg.common.log_format, log_interval=cfg.common.log_interval, default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"), ) # Initialize generator gen_timer = StopwatchMeter() extra_gen_cls_kwargs = { "lm_model": lms[0], "lm_weight": cfg.generation.lm_weight } generator = task.build_generator(models, cfg.generation, extra_gen_cls_kwargs=extra_gen_cls_kwargs) # Handle tokenization and BPE tokenizer = task.build_tokenizer(cfg.tokenizer) bpe = task.build_bpe(cfg.bpe) def decode_fn(x): if bpe is not None: x = bpe.decode(x) if tokenizer is not None: x = tokenizer.decode(x) return x scorer = scoring.build_scorer(cfg.scoring, tgt_dict) num_sentences = 0 has_target = True wps_meter = TimeMeter() for sample in progress: sample = utils.move_to_cuda(sample) if use_cuda else sample if "net_input" not in sample: continue prefix_tokens = None if cfg.generation.prefix_size > 0: prefix_tokens = sample["target"][:, :cfg.generation.prefix_size] constraints = None if "constraints" in sample: constraints = sample["constraints"] gen_timer.start() hypos = task.inference_step( generator, models, sample, prefix_tokens=prefix_tokens, constraints=constraints, ) num_generated_tokens = sum(len(h[0]["tokens"]) for h in hypos) gen_timer.stop(num_generated_tokens) for i, sample_id in enumerate(sample["id"].tolist()): has_target = sample["target"] is not None # Remove padding if "src_tokens" in sample["net_input"]: src_tokens = utils.strip_pad( sample["net_input"]["src_tokens"][i, :], tgt_dict.pad()) else: src_tokens = None target_tokens = None if has_target: target_tokens = (utils.strip_pad(sample["target"][i, :], tgt_dict.pad()).int().cpu()) # Either retrieve the original sentences or regenerate them from tokens. if align_dict is not None: src_str = task.dataset( cfg.dataset.gen_subset).src.get_original_text(sample_id) target_str = task.dataset( cfg.dataset.gen_subset).tgt.get_original_text(sample_id) else: if src_dict is not None: src_str = src_dict.string(src_tokens, cfg.common_eval.post_process) else: src_str = "" if has_target: target_str = tgt_dict.string( target_tokens, cfg.common_eval.post_process, escape_unk=True, extra_symbols_to_ignore= get_symbols_to_strip_from_output(generator), ) src_str = decode_fn(src_str) if has_target: target_str = decode_fn(target_str) if not cfg.common_eval.quiet: if src_dict is not None: print("S-{}\t{}".format(sample_id, src_str), file=output_file) if has_target: print("T-{}\t{}".format(sample_id, target_str), file=output_file) # Process top predictions for j, hypo in enumerate(hypos[i][:cfg.generation.nbest]): hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo["tokens"].int().cpu(), src_str=src_str, alignment=hypo["alignment"], align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=cfg.common_eval.post_process, extra_symbols_to_ignore=get_symbols_to_strip_from_output( generator), ) detok_hypo_str = decode_fn(hypo_str) if not cfg.common_eval.quiet: score = hypo["score"] / math.log(2) # convert to base 2 # original hypothesis (after tokenization and BPE) print( "H-{}\t{}\t{}".format(sample_id, score, hypo_str), file=output_file, ) # detokenized hypothesis print( "D-{}\t{}\t{}".format(sample_id, score, detok_hypo_str), file=output_file, ) print( "P-{}\t{}".format( sample_id, " ".join( map( lambda x: "{:.4f}".format(x), # convert from base e to base 2 hypo["positional_scores"].div_(math.log(2) ).tolist(), )), ), file=output_file, ) if cfg.generation.print_alignment == "hard": print( "A-{}\t{}".format( sample_id, " ".join([ "{}-{}".format(src_idx, tgt_idx) for src_idx, tgt_idx in alignment ]), ), file=output_file, ) if cfg.generation.print_alignment == "soft": print( "A-{}\t{}".format( sample_id, " ".join([ ",".join(src_probs) for src_probs in alignment ]), ), file=output_file, ) if cfg.generation.print_step: print( "I-{}\t{}".format(sample_id, hypo["steps"]), file=output_file, ) if cfg.generation.retain_iter_history: for step, h in enumerate(hypo["history"]): _, h_str, _ = utils.post_process_prediction( hypo_tokens=h["tokens"].int().cpu(), src_str=src_str, alignment=None, align_dict=None, tgt_dict=tgt_dict, remove_bpe=None, ) print( "E-{}_{}\t{}".format(sample_id, step, h_str), file=output_file, ) # Score only the top hypothesis if has_target and j == 0: if align_dict is not None or cfg.common_eval.post_process is not None: # Convert back to tokens for evaluation with unk replacement and/or without BPE target_tokens = tgt_dict.encode_line( target_str, add_if_not_exist=True) hypo_tokens = tgt_dict.encode_line( detok_hypo_str, add_if_not_exist=True) if hasattr(scorer, "add_string"): scorer.add_string(target_str, detok_hypo_str) else: scorer.add(target_tokens, hypo_tokens) wps_meter.update(num_generated_tokens) progress.log({"wps": round(wps_meter.avg)}) num_sentences += (sample["nsentences"] if "nsentences" in sample else sample["id"].numel()) logger.info("NOTE: hypothesis and token scores are output in base 2") logger.info( "Translated {:,} sentences ({:,} tokens) in {:.1f}s ({:.2f} sentences/s, {:.2f} tokens/s)" .format( num_sentences, gen_timer.n, gen_timer.sum, num_sentences / gen_timer.sum, 1.0 / gen_timer.avg, )) if has_target: if cfg.bpe and not cfg.generation.sacrebleu: if cfg.common_eval.post_process: logger.warning( "BLEU score is being computed by splitting detokenized string on spaces, this is probably not what you want. Use --sacrebleu for standard 13a BLEU tokenization" ) else: logger.warning( "If you are using BPE on the target side, the BLEU score is computed on BPE tokens, not on proper words. Use --sacrebleu for standard 13a BLEU tokenization" ) # use print to be consistent with other main outputs: S-, H-, T-, D- and so on print( "Generate {} with beam={}: {}".format(cfg.dataset.gen_subset, cfg.generation.beam, scorer.result_string()), file=output_file, ) return scorer
def _iter_translations( args, task, dataset, translations, align_dict, rescorer, modify_target_dict ): """Iterate over translations. This is a generator function which wraps the beam-search sequence generator, performing such work on the output as converting token indices to strings, printing output where applicable (not args.quiet), collecting oracle translations where applicable, and removing language-ID tokens for multilingual translation. Args: args: Command-line arguments. task: FairseqTask object. dataset: Dataset set object for a specific split. translations: Batched translation iterator, as returned by SequenceGenerator.generate_batched_itr(). align_dict: Dictionary for UNK replacement. Yields: For each sentence in `translations`, yields a TranslationInfo. """ is_multilingual = pytorch_translate_data.is_multilingual_many_to_one(args) for sample_id, src_tokens, target_tokens, hypos in translations: # Process input and ground truth target_tokens = target_tokens.int().cpu() if is_multilingual: src_lang_id = ( src_tokens[-1] - pytorch_translate_data.MULTILING_DIALECT_ID_OFFSET ) target_lang_id = ( target_tokens[0] - pytorch_translate_data.MULTILING_DIALECT_ID_OFFSET ) # remove language ID tokens src_tokens = src_tokens[:-1] target_tokens = target_tokens[1:] # Select dictionaries src_dict = task.source_dictionaries[task.get_encoder_lang_code(src_lang_id)] target_dict = task.target_dictionaries[ task.get_decoder_lang_code(target_lang_id) ] else: src_dict = task.source_dictionary target_dict = task.target_dictionary # Either retrieve the original sentences or regenerate them from tokens. if align_dict is not None: src_str = dataset.src.get_original_text(sample_id) target_str = dataset.tgt.get_original_text(sample_id) else: src_str = src_dict.string(src_tokens, args.remove_bpe) target_str = target_dict.string( target_tokens, args.remove_bpe, escape_unk=True ) if not args.quiet: print(f"S-{sample_id}\t{src_str}") print(f"T-{sample_id}\t{target_str}") # used for oracle evaluation (args.report_oracle_bleu) best_hypo_tokens = None best_hypo_score = 0 collect_oracle_hypos = args.report_oracle_bleu or ( args.output_hypos_binary_path and args.nbest > 0 ) # Process top predictions for i, hypo in enumerate(hypos[: min(len(hypos), args.nbest)]): hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo["tokens"].int().cpu(), src_str=src_str, alignment=hypo["alignment"].int().cpu() if align_dict is not None else None, align_dict=align_dict, tgt_dict=task.target_dictionary, remove_bpe=args.remove_bpe, ) if not args.quiet: print(f"H-{sample_id}\t{hypo['score']}\t{hypo_str}") if alignment is not None: print( "A-{}\t{}".format( sample_id, " ".join(map(lambda x: str(utils.item(x)), alignment)), ) ) if collect_oracle_hypos: score = smoothed_sentence_bleu(task, target_tokens, hypo_tokens) if score > best_hypo_score: best_hypo_tokens = hypo_tokens best_hypo_score = score if i == 0: if align_dict is not None or args.remove_bpe is not None: # Convert back to tokens for evaluation with unk replacement # and/or without BPE target_tokens = task.target_dictionary.encode_line( target_str, add_if_not_exist=modify_target_dict ) # The probs score for the hypo_str; whether it's normalized by # sequence length or not depends on normalize_scores, which is # set by arg.nonormalize. # However, as I tried, whether normalize_scores is set or not, # the returned scores are the same (to be investigated). # Here, the probs are normalized by hypo length so the value # is big enough to be used as weights for backtranslations in # dual learning. hypo_score = ( hypo["score"] / len(hypo_tokens) if len(hypo_tokens) > 0 else 0.0 ) top_hypo_tokens = hypo_tokens top_hypo_str = hypo_str if not collect_oracle_hypos: best_hypo_tokens = top_hypo_tokens yield TranslationInfo( sample_id=sample_id, src_tokens=src_tokens, target_tokens=target_tokens, hypo_tokens=top_hypo_tokens, src_str=src_str, target_str=target_str, hypo_str=top_hypo_str, hypo_score=hypo_score, best_hypo_tokens=best_hypo_tokens, hypos=hypos, )
def infer_onebyone(args, models, task, input): assert args.path is not None, '--path required for generation!' assert not args.sampling or args.nbest == args.beam, \ '--sampling requires --nbest to be equal to --beam' assert args.replace_unk is None or args.raw_text, \ '--replace-unk requires a raw text dataset (--raw-text)' utils.import_user_module(args) if args.max_tokens is None and args.max_sentences is None: args.max_tokens = 12000 print(args) use_cuda = torch.cuda.is_available() and not args.cpu # Set dictionaries try: src_dict = getattr(task, 'source_dictionary', None) except NotImplementedError: src_dict = None tgt_dict = task.target_dictionary input = ' '.join([i for i in input]) src_tokens = tgt_dict.encode_line(input).type(torch.LongTensor) input_sample = { 'id': torch.Tensor([0]), 'nsentences': 1, 'ntokens': len(src_tokens), 'net_input': { 'src_tokens': src_tokens.unsqueeze(0), 'src_lengths': torch.tensor([len(src_tokens)]), 'prev_output_tokens': torch.tensor([[tgt_dict.eos()]]) }, 'target': src_tokens.unsqueeze(0), } # Load ensemble print('| loading model(s) from {}'.format(args.path)) # Optimize ensemble for generation # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args.replace_unk) # Load dataset (possibly sharded) # Initialize generator gen_timer = StopwatchMeter() generator = task.build_generator(args) # Generate and compute BLEU score if args.sacrebleu: scorer = bleu.SacrebleuScorer() else: scorer = bleu.Scorer(tgt_dict.pad(), tgt_dict.eos(), tgt_dict.unk()) num_sentences = 0 has_target = True wps_meter = TimeMeter() sample = input_sample sample = utils.move_to_cuda(sample) if use_cuda else sample prefix_tokens = None if args.prefix_size > 0: prefix_tokens = sample['target'][:, :args.prefix_size] gen_timer.start() #pdb.set_trace() hypos = task.inference_step(generator, models, sample, prefix_tokens) num_generated_tokens = sum(len(h[0]['tokens']) for h in hypos) gen_timer.stop(num_generated_tokens) for i, sample_id in enumerate(sample['id'].tolist()): has_target = sample['target'] is not None # Remove padding src_tokens = utils.strip_pad(sample['net_input']['src_tokens'][i, :], tgt_dict.pad()) target_tokens = None if has_target: target_tokens = utils.strip_pad(sample['target'][i, :], tgt_dict.pad()).int().cpu() # Either retrieve the original sentences or regenerate them from tokens. if align_dict is not None: src_str = task.dataset( args.gen_subset).src.get_original_text(sample_id) target_str = task.dataset( args.gen_subset).tgt.get_original_text(sample_id) else: if src_dict is not None: src_str = src_dict.string(src_tokens, args.remove_bpe) else: src_str = "" if has_target: target_str = tgt_dict.string(target_tokens, args.remove_bpe, escape_unk=True) if not args.quiet: if src_dict is not None: print('S-{}\t{}'.format(sample_id, src_str)) if has_target: print('T-{}\t{}'.format(sample_id, target_str)) # Process top predictions for j, hypo in enumerate(hypos[i][:args.nbest]): hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'], align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) if not args.quiet: output = ''.join(hypo_str.split(' ')) print('H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str)) print('P-{}\t{}'.format( sample_id, ' '.join( map( lambda x: '{:.4f}'.format(x), hypo['positional_scores'].tolist(), )))) if args.print_alignment: print('A-{}\t{}'.format( sample_id, ' '.join([ '{}-{}'.format(src_idx, tgt_idx) for src_idx, tgt_idx in alignment ]))) if args.print_step: print('I-{}\t{}'.format(sample_id, hypo['steps'])) # Score only the top hypothesis if has_target and j == 0: if align_dict is not None or args.remove_bpe is not None: # Convert back to tokens for evaluation with unk replacement and/or without BPE target_tokens = tgt_dict.encode_line(target_str, add_if_not_exist=True) if hasattr(scorer, 'add_string'): scorer.add_string(target_str, hypo_str) else: scorer.add(target_tokens, hypo_tokens) wps_meter.update(num_generated_tokens) num_sentences += sample['nsentences'] print( '| Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} sentences/s, {:.2f} tokens/s)' .format(num_sentences, gen_timer.n, gen_timer.sum, num_sentences / gen_timer.sum, 1. / gen_timer.avg)) if has_target: print('| Generate {} with beam={}: {}'.format(args.gen_subset, args.beam, scorer.result_string())) return scorer, output
def main(args): assert args.path is not None, '--path required for generation!' assert not args.sampling or args.nbest == args.beam, \ '--sampling requires --nbest to be equal to --beam' assert args.replace_unk is None or args.raw_text, \ '--replace-unk requires a raw text dataset (--raw-text)' if args.max_tokens is None and args.max_sentences is None: args.max_tokens = 12000 print(args) use_cuda = torch.cuda.is_available() and not args.cpu # Load dataset splits task = tasks.setup_task(args) task.load_dataset(args.gen_subset) print('| {} {} {} examples'.format(args.data, args.gen_subset, len(task.dataset(args.gen_subset)))) # Set dictionaries src_dict = task.source_dictionary tgt_dict = task.target_dictionary # Load ensemble print('| loading model(s) from {}'.format(args.path)) models, _ = utils.load_ensemble_for_inference(args.path.split(':'), task, model_arg_overrides=eval(args.model_overrides)) # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args.no_beamable_mm else args.beam, need_attn=args.print_alignment, ) if args.fp16: model.half() # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args.replace_unk) # Load dataset (possibly sharded) itr = task.get_batch_iterator( dataset=task.dataset(args.gen_subset), max_tokens=args.max_tokens, max_sentences=args.max_sentences, max_positions=utils.resolve_max_positions( task.max_positions(), *[model.max_positions() for model in models] ), ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, required_batch_size_multiple=8, num_shards=args.num_shards, shard_id=args.shard_id, ).next_epoch_itr(shuffle=False) # Initialize generator gen_timer = StopwatchMeter() if args.score_reference: translator = SequenceScorer(models, task.target_dictionary) else: translator = SequenceGenerator( models, task.target_dictionary, beam_size=args.beam, minlen=args.min_len, stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized), len_penalty=args.lenpen, unk_penalty=args.unkpen, sampling=args.sampling, sampling_topk=args.sampling_topk, sampling_temperature=args.sampling_temperature, diverse_beam_groups=args.diverse_beam_groups, diverse_beam_strength=args.diverse_beam_strength, ) if use_cuda: translator.cuda() # Generate and compute BLEU score scorer = bleu.Scorer(tgt_dict.pad(), tgt_dict.eos(), tgt_dict.unk()) num_sentences = 0 has_target = True with progress_bar.build_progress_bar(args, itr) as t: if args.score_reference: translations = translator.score_batched_itr(t, cuda=use_cuda, timer=gen_timer) else: translations = translator.generate_batched_itr( t, maxlen_a=args.max_len_a, maxlen_b=args.max_len_b, cuda=use_cuda, timer=gen_timer, prefix_size=args.prefix_size, ) wps_meter = TimeMeter() for sample_id, src_tokens, target_tokens, hypos in translations: # Process input and ground truth has_target = target_tokens is not None target_tokens = target_tokens.int().cpu() if has_target else None # Either retrieve the original sentences or regenerate them from tokens. if align_dict is not None: src_str = task.dataset(args.gen_subset).src.get_original_text(sample_id) target_str = task.dataset(args.gen_subset).tgt.get_original_text(sample_id) else: src_str = src_dict.string(src_tokens, args.remove_bpe) if has_target: target_str = tgt_dict.string(target_tokens, args.remove_bpe, escape_unk=True) if not args.quiet: print('S-{}\t{}'.format(sample_id, src_str)) if has_target: print('T-{}\t{}'.format(sample_id, target_str)) # Process top predictions for i, hypo in enumerate(hypos[:min(len(hypos), args.nbest)]): hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) if not args.quiet: print('H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str)) print('P-{}\t{}'.format( sample_id, ' '.join(map( lambda x: '{:.4f}'.format(x), hypo['positional_scores'].tolist(), )) )) if args.print_alignment: print('A-{}\t{}'.format( sample_id, ' '.join(map(lambda x: str(utils.item(x)), alignment)) )) # Score only the top hypothesis if has_target and i == 0: if align_dict is not None or args.remove_bpe is not None: # Convert back to tokens for evaluation with unk replacement and/or without BPE target_tokens = tokenizer.Tokenizer.tokenize( target_str, tgt_dict, add_if_not_exist=True) scorer.add(target_tokens, hypo_tokens) wps_meter.update(src_tokens.size(0)) t.log({'wps': round(wps_meter.avg)}) num_sentences += 1 print('| Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} sentences/s, {:.2f} tokens/s)'.format( num_sentences, gen_timer.n, gen_timer.sum, num_sentences / gen_timer.sum, 1. / gen_timer.avg)) if has_target: print('| Generate {} with beam={}: {}'.format(args.gen_subset, args.beam, scorer.result_string()))
def main(args): utils.import_user_module(args) if args.buffer_size < 1: args.buffer_size = 1 if args.max_tokens is None and args.max_sentences is None: args.max_sentences = 1 assert not args.sampling or args.nbest == args.beam, \ '--sampling requires --nbest to be equal to --beam' assert not args.max_sentences or args.max_sentences <= args.buffer_size, \ '--max-sentences/--batch-size cannot be larger than --buffer-size' print(args) use_cuda = torch.cuda.is_available() and not args.cpu # Setup task, e.g., translation task = tasks.setup_task(args) # Load ensemble print('| loading model(s) from {}'.format(args.path)) models, _model_args = checkpoint_utils.load_model_ensemble( args.path.split(':'), arg_overrides=eval(args.model_overrides), task=task, ) # Set dictionaries src_dict = task.source_dictionary tgt_dict = task.target_dictionary # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args.no_beamable_mm else args.beam, need_attn=args.print_alignment, ) if args.fp16: model.half() if use_cuda: model.cuda() # Initialize generator generator = task.build_generator(args) # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args.replace_unk) max_positions = utils.resolve_max_positions( task.max_positions(), *[model.max_positions() for model in models] ) if args.buffer_size > 1: print('| Sentence buffer size:', args.buffer_size) print('| Type the input sentence and press return:') start_id = 0 if args.output != '': output_file = open(args.output, 'w', encoding='utf-8') for inputs in buffered_read(args.input, args.buffer_size): results = [] for batch in make_batches(inputs, args, task, max_positions): src_tokens = batch.src_tokens src_lengths = batch.src_lengths if use_cuda: src_tokens = src_tokens.cuda() src_lengths = src_lengths.cuda() sample = { 'net_input': { 'src_tokens': src_tokens, 'src_lengths': src_lengths, }, } translations = task.inference_step(generator, models, sample) for i, (id, hypos) in enumerate(zip(batch.ids.tolist(), translations)): src_tokens_i = utils.strip_pad(src_tokens[i], tgt_dict.pad()) results.append((start_id + id, src_tokens_i, hypos)) # sort output to match input order for id, src_tokens, hypos in sorted(results, key=lambda x: x[0]): if src_dict is not None: src_str = src_dict.string(src_tokens, args.remove_bpe) print('S-{}\t{}'.format(id, src_str)) # Process top predictions for hypo in hypos[:min(len(hypos), args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) if args.output != '': output_file.write(hypo_str + '\n') print('H-{}\t{}\t{}'.format(id, hypo['score'], hypo_str)) print('P-{}\t{}'.format( id, ' '.join(map(lambda x: '{:.4f}'.format(x), hypo['positional_scores'].tolist())) )) if args.print_alignment: print('A-{}\t{}'.format( id, ' '.join(map(lambda x: str(utils.item(x)), alignment)) )) # update running id counter start_id += len(inputs) if args.output != '': output_file.close()
def main(): parser = options.get_parser('Generation') parser.add_argument('--path', metavar='FILE', required=True, action='append', help='path(s) to model file(s)') dataset_args = options.add_dataset_args(parser) dataset_args.add_argument('--batch-size', default=32, type=int, metavar='N', help='batch size') dataset_args.add_argument('--gen-subset', default='test', metavar='SPLIT', help='data subset to generate (train, valid, test)') options.add_generation_args(parser) args = parser.parse_args() if args.no_progress_bar and args.log_format is None: args.log_format = 'none' print(args) use_cuda = torch.cuda.is_available() and not args.cpu # Load dataset if args.replace_unk is None: dataset = data.load_dataset(args.data, [args.gen_subset], args.source_lang, args.target_lang) else: dataset = data.load_raw_text_dataset(args.data, [args.gen_subset], args.source_lang, args.target_lang) if args.source_lang is None or args.target_lang is None: # record inferred languages in args args.source_lang, args.target_lang = dataset.src, dataset.dst # Load ensemble print('| loading model(s) from {}'.format(', '.join(args.path))) models, _ = utils.load_ensemble_for_inference(args.path, dataset.src_dict, dataset.dst_dict) print('| [{}] dictionary: {} types'.format(dataset.src, len(dataset.src_dict))) print('| [{}] dictionary: {} types'.format(dataset.dst, len(dataset.dst_dict))) print('| {} {} {} examples'.format(args.data, args.gen_subset, len(dataset.splits[args.gen_subset]))) # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args.no_beamable_mm else args.beam) # Initialize generator translator = SequenceGenerator( models, beam_size=args.beam, stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized), len_penalty=args.lenpen, unk_penalty=args.unkpen) if use_cuda: translator.cuda() # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args.replace_unk) # Generate and compute BLEU score scorer = bleu.Scorer(dataset.dst_dict.pad(), dataset.dst_dict.eos(), dataset.dst_dict.unk()) max_positions = min(model.max_encoder_positions() for model in models) itr = dataset.eval_dataloader( args.gen_subset, max_sentences=args.batch_size, max_positions=max_positions, skip_invalid_size_inputs_valid_test=args.skip_invalid_size_inputs_valid_test) num_sentences = 0 with utils.build_progress_bar(args, itr) as t: wps_meter = TimeMeter() gen_timer = StopwatchMeter() translations = translator.generate_batched_itr( t, maxlen_a=args.max_len_a, maxlen_b=args.max_len_b, cuda_device=0 if use_cuda else None, timer=gen_timer) for sample_id, src_tokens, target_tokens, hypos in translations: # Process input and ground truth target_tokens = target_tokens.int().cpu() # Either retrieve the original sentences or regenerate them from tokens. if align_dict is not None: src_str = dataset.splits[args.gen_subset].src.get_original_text(sample_id) target_str = dataset.splits[args.gen_subset].dst.get_original_text(sample_id) else: src_str = dataset.src_dict.string(src_tokens, args.remove_bpe) target_str = dataset.dst_dict.string(target_tokens, args.remove_bpe, escape_unk=True) if not args.quiet: print('S-{}\t{}'.format(sample_id, src_str)) print('T-{}\t{}'.format(sample_id, target_str)) # Process top predictions for i, hypo in enumerate(hypos[:min(len(hypos), args.nbest)]): hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu(), align_dict=align_dict, dst_dict=dataset.dst_dict, remove_bpe=args.remove_bpe) if not args.quiet: print('H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str)) print('A-{}\t{}'.format(sample_id, ' '.join(map(str, alignment)))) # Score only the top hypothesis if i == 0: if align_dict is not None or args.remove_bpe is not None: # Convert back to tokens for evaluation with unk replacement and/or without BPE target_tokens = tokenizer.Tokenizer.tokenize(target_str, dataset.dst_dict, add_if_not_exist=True) scorer.add(target_tokens, hypo_tokens) wps_meter.update(src_tokens.size(0)) t.log({'wps': round(wps_meter.avg)}) num_sentences += 1 print('| Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} tokens/s)'.format( num_sentences, gen_timer.n, gen_timer.sum, 1. / gen_timer.avg)) print('| Generate {} with beam={}: {}'.format(args.gen_subset, args.beam, scorer.result_string()))