def moses_detokenize(self, inp: Path, out: Path, col=0, lang='en', post_op=None): log.info(f"detok : {inp} --> {out}") tok_lines = IO.get_lines(inp, col=col, line_mapper=lambda x: x.split()) with MosesDetokenizer(lang=lang) as detok: detok_lines = (detok(tok_line) for tok_line in tok_lines) if post_op: detok_lines = (post_op(line) for line in detok_lines) IO.write_lines(out, detok_lines)
def evaluate_file(self, detok_hyp: Path, ref: Union[Path, List[str]], lowercase=True) -> float: detok_lines = list(IO.get_lines(detok_hyp)) # takes multiple refs, but here we have only one ref_liness = [IO.get_lines(ref) if isinstance(ref, Path) else ref] bleu: BLEUScore = corpus_bleu(sys_stream=detok_lines, ref_streams=ref_liness, lowercase=lowercase) # this should be part of new sacrebleu release (i sent a PR ;) bleu_str = bleu.format() bleu_file = detok_hyp.with_name(detok_hyp.name + ('.lc' if lowercase else '.oc') + '.sacrebleu') log.info(f'BLEU {detok_hyp} : {bleu_str}') IO.write_lines(bleu_file, bleu_str) return bleu.score
def evaluate_file(self, detok_hyp: Path, ref: Union[Path, List[str]], lowercase=True) -> float: detok_lines = IO.get_lines(detok_hyp) # takes multiple refs, but here we have only one ref_liness = [IO.get_lines(ref) if isinstance(ref, Path) else ref] bleu: BLEU = corpus_bleu(sys_stream=detok_lines, ref_streams=ref_liness, lowercase=lowercase) # this should be part of new sacrebleu release (i sent a PR ;) bleu_str = f'BLEU = {bleu.score:.2f} {"/".join(f"{p:.1f}" for p in bleu.precisions)}' \ f' (BP = {bleu.bp:.3f} ratio = {(bleu.sys_len / bleu.ref_len):.3f}' \ f' hyp_len = {bleu.sys_len:d} ref_len={bleu.ref_len:d})' bleu_file = detok_hyp.with_suffix(('.lc' if lowercase else '.oc') + '.sacrebleu') log.info(f'BLEU {detok_hyp} : {bleu_str}') IO.write_lines(bleu_file, bleu_str) return bleu.score
def tune_decoder_params(self, exp: Experiment, tune_src: str, tune_ref: str, batch_size: int, trials: int = 10, lowercase=True, beam_size=(1, 4, 8), ensemble=(1, 5, 10), lp_alpha=(0.0, 0.4, 0.6), suggested: List[Tuple[int, int, float]] = None, **fixed_args): _, _, _, tune_args = inspect.getargvalues(inspect.currentframe()) tune_args.update(fixed_args) ex_args = ['exp', 'self', 'fixed_args', 'batch_size', 'max_len'] if trials == 0: ex_args += ['beam_size', 'ensemble', 'lp_alpha'] for x in ex_args: del tune_args[x] # exclude some args _, step = exp.get_last_saved_model() tune_dir = exp.work_dir / f'tune_step{step}' log.info(f"Tune dir = {tune_dir}") tune_dir.mkdir(parents=True, exist_ok=True) tune_src, tune_ref = Path(tune_src), Path(tune_ref) assert tune_src.exists() assert tune_ref.exists() tune_src, tune_ref = list(IO.get_lines(tune_src)), list( IO.get_lines(tune_ref)) assert len(tune_src) == len(tune_ref) tune_log = tune_dir / 'scores.json' # resume the tuning memory: Dict[Tuple, float] = {} if tune_log.exists(): data = json.load(tune_log.open()) # JSON keys cant be tuples, so they were stringified memory = {eval(k): v for k, v in data.items()} beam_sizes, ensembles, lp_alphas = [], [], [] if suggested: if isinstance(suggested[0], str): suggested = [eval(x) for x in suggested] suggested = [(x[0], x[1], round(x[2], 2)) for x in suggested] suggested_new = [x for x in suggested if x not in memory] beam_sizes += [x[0] for x in suggested_new] ensembles += [x[1] for x in suggested_new] lp_alphas += [x[2] for x in suggested_new] new_trials = trials - len(memory) if new_trials > 0: beam_sizes += [random.choice(beam_size) for _ in range(new_trials)] ensembles += [random.choice(ensemble) for _ in range(new_trials)] lp_alphas += [ round(random.choice(lp_alpha), 2) for _ in range(new_trials) ] # ensembling is somewhat costlier, so try minimize the model ensembling, by grouping them together grouped_ens = defaultdict(list) for b, ens, l in zip(beam_sizes, ensembles, lp_alphas): grouped_ens[ens].append((b, l)) try: for ens, args in grouped_ens.items(): decoder = Decoder.new(exp, ensemble=ens) for b_s, lp_a in args: eff_batch_size = batch_size // b_s # effective batch size name = f'tune_step{step}_beam{b_s}_ens{ens}_lp{lp_a:.2f}' log.info(name) out_file = tune_dir / f'{name}.out.tsv' score = self.decode_eval_file(decoder, tune_src, out_file, tune_ref, batch_size=eff_batch_size, beam_size=b_s, lp_alpha=lp_a, lowercase=lowercase, **fixed_args) memory[(b_s, ens, lp_a)] = score best_params = sorted(memory.items(), key=lambda x: x[1], reverse=True)[0][0] return dict(zip(['beam_size', 'ensemble', 'lp_alpha'], best_params)), tune_args finally: # JSON keys cant be tuples, so we stringify them data = {str(k): v for k, v in memory.items()} IO.write_lines(tune_log, json.dumps(data))