def write_file_eval(self, out_fname, trans, data_prefix, alns=None): if alns is not None: fout_aln = open('{}.aln'.format(out_fname), 'w') # valids/trans fout_aln.writelines(alns) fout_aln.close() fout = open(out_fname, 'w') # valids/trans fout.writelines(trans) fout.close() ref_fpaths = [] # *.ref ref_fpath = '{}{}.{}'.format(wargs.val_tst_dir, data_prefix, wargs.val_ref_suffix) if os.path.exists(ref_fpath): ref_fpaths.append(ref_fpath) for idx in range(wargs.ref_cnt): # *.ref0, *.ref1, ... ref_fpath = '{}{}.{}{}'.format(wargs.val_tst_dir, data_prefix, wargs.val_ref_suffix, idx) if not os.path.exists(ref_fpath): continue ref_fpaths.append(ref_fpath) if wargs.with_bpe is True: os.system('cp {} {}.bpe'.format(out_fname, out_fname)) wlog('cp {} {}.bpe'.format(out_fname, out_fname)) os.system("sed -r 's/(@@ )|(@@ ?$)//g' {}.bpe > {}".format( out_fname, out_fname)) wlog("sed -r 's/(@@ )|(@@ ?$)//g' {}.bpe > {}".format( out_fname, out_fname)) if wargs.with_postproc is True: opost_name = '{}.opost'.format(out_fname) os.system('cp {} {}'.format(out_fname, opost_name)) wlog('cp {} {}'.format(out_fname, opost_name)) os.system("sh postproc.sh {} {}".format(opost_name, out_fname)) wlog("sh postproc.sh {} {}".format(opost_name, out_fname)) mteval_bleu_opost = bleu_file(opost_name, ref_fpaths) os.rename(opost_name, "{}_{}.txt".format(opost_name, mteval_bleu_opost)) ''' os.system('cp {} {}.bpe'.format(out_fname, out_fname)) wlog('cp {} {}.bpe'.format(out_fname, out_fname)) os.system("sed -r 's/(@@ )|(@@ ?$)//g' {}.bpe > {}".format(out_fname, out_fname)) wlog("sed -r 's/(@@ )|(@@ ?$)//g' {}.bpe > {}".format(out_fname, out_fname)) src_sgm = '{}/{}.src.sgm'.format(wargs.val_tst_dir, data_prefix) os.system('./scripts/wrap_xml.pl zh {} {} < {} > {}.sgm'.format(src_sgm, data_prefix, out_fname, out_fname)) wlog('./scripts/wrap_xml.pl zh {} {} < {} > {}.sgm'.format(src_sgm, data_prefix, out_fname, out_fname)) os.system('./scripts/chi_char_segment.pl -t xml < {}.sgm > {}.seg.sgm'.format(out_fname, out_fname)) wlog('./scripts/chi_char_segment.pl -t xml < {}.sgm > {}.seg.sgm'.format(out_fname, out_fname)) os.system('./scripts/de-xml.pl < {}.seg.sgm > {}.seg.plain'.format(out_fname, out_fname)) wlog('./scripts/de-xml.pl < {}.seg.sgm > {}.seg.plain'.format(out_fname, out_fname)) ''' mteval_bleu = bleu_file(out_fname, ref_fpaths) #mteval_bleu = bleu_file(out_fname + '.seg.plain', ref_fpaths) os.rename(out_fname, "{}_{}.txt".format(out_fname, mteval_bleu)) return mteval_bleu_opost if wargs.with_postproc is True else mteval_bleu
def write_file_eval(self, out_fname, trans, data_prefix, alns=None, subw=None): if alns is not None: fout_aln = open('{}.aln'.format(out_fname), 'w') # valids/trans fout_aln.writelines(alns) fout_aln.close() if subw is not None: fout_subw = open('{}.subword'.format(out_fname), 'w') # valids/trans fout_subw.writelines(subw) fout_subw.close() fout = open(out_fname, 'w') # valids/trans fout.writelines(trans) fout.close() ref_fpaths = [] # *.ref ref_fpath = '{}{}.{}'.format(wargs.val_tst_dir, data_prefix, wargs.val_ref_suffix) if os.path.exists(ref_fpath): ref_fpaths.append(ref_fpath) for idx in range(wargs.ref_cnt): # *.ref0, *.ref1, ... ref_fpath = '{}{}.{}{}'.format(wargs.val_tst_dir, data_prefix, wargs.val_ref_suffix, idx) if not os.path.exists(ref_fpath): continue ref_fpaths.append(ref_fpath) assert os.path.exists(out_fname), 'translation do not exist ...' if wargs.with_bpe is True: bpe_fname = '{}.bpe'.format(out_fname) wlog('copy {} to {} ... '.format(out_fname, bpe_fname), 0) #os.system('cp {} {}.bpe'.format(out_fname, out_fname)) copyfile(out_fname, bpe_fname) assert os.path.exists(bpe_fname), 'bpe file do not exist ...' wlog('done') wlog( "sed -r 's/(@@ )|(@@ ?$)//g' {} > {} ... ".format( bpe_fname, out_fname), 0) #os.system("sed -r 's/(@@ )|(@@ ?$)//g' {} > {}".format(bpe_fname, out_fname)) proc_bpe(bpe_fname, out_fname) wlog('done') # Luong: remove "rich-text format" --> rich ##AT##-##AT## text format #os.system("sed -r -i 's/( ##AT##)|(##AT## )//g' {}".format(out_fname)) #wlog("sed -r -i 's/( ##AT##)|(##AT## )//g' {}".format(out_fname)) if wargs.with_postproc is True: opost_name = '{}.opost'.format(out_fname) wlog('copy {} to {} ... '.format(out_fname, opost_name), 0) #os.system('cp {} {}'.format(out_fname, opost_name)) copyfile(out_fname, opost_name) assert os.path.exists(opost_name), 'opost file do not exist ...' wlog('done') wlog("sh postproc.sh {} {}".format(opost_name, out_fname)) os.system("sh postproc.sh {} {}".format(opost_name, out_fname)) wlog('done') mteval_bleu_opost = bleu_file(opost_name, ref_fpaths, cased=wargs.cased) os.rename(opost_name, "{}_{}.txt".format(opost_name, mteval_bleu_opost)) mteval_bleu = bleu_file(out_fname, ref_fpaths, cased=wargs.cased) multi_bleu = print_multi_bleu(out_fname, ref_fpaths, cased=wargs.cased) #mteval_bleu = bleu_file(out_fname + '.seg.plain', ref_fpaths) if wargs.char is True: c_mteval_bleu = bleu_file(out_fname, ref_fpaths, cased=wargs.cased, char=True) c_multi_bleu = print_multi_bleu(out_fname, ref_fpaths, cased=wargs.cased, char=True) os.rename( out_fname, "{}_{}_{}_c_{}_{}.txt".format(out_fname, mteval_bleu, multi_bleu, c_mteval_bleu, c_multi_bleu)) else: os.rename( out_fname, "{}_{}_{}.txt".format(out_fname, mteval_bleu, multi_bleu)) if wargs.use_multi_bleu is False: if wargs.char is False: final_bleu = mteval_bleu else: final_bleu = c_mteval_bleu else: if wargs.char is False: final_bleu = multi_bleu else: final_bleu = c_multi_bleu return final_bleu
def write_file_eval(self, out_fname, trans, data_prefix, alns=None): if alns is not None: fout_aln = open('{}.aln'.format(out_fname), 'w') # valids/trans fout_aln.writelines(alns) fout_aln.close() fout = open(out_fname, 'w') # valids/trans fout.writelines(trans) fout.close() # *.ref ref_fpath = '{}{}.{}'.format(wargs.val_tst_dir, data_prefix, wargs.val_ref_suffix) ref_fpaths = grab_all_trg_files(ref_fpath) assert os.path.exists(out_fname), 'translation do not exist ...' if wargs.with_bpe is True: bpe_fname = '{}.bpe'.format(out_fname) wlog('copy {} to {} ... '.format(out_fname, bpe_fname), 0) #os.system('cp {} {}.bpe'.format(out_fname, out_fname)) copyfile(out_fname, bpe_fname) assert os.path.exists(bpe_fname), 'bpe file do not exist ...' wlog('done') wlog( "sed -r 's/(@@ )|(@@ ?$)//g' {} > {} ... ".format( bpe_fname, out_fname), 0) #os.system("sed -r 's/(@@ )|(@@ ?$)//g' {} > {}".format(bpe_fname, out_fname)) proc_bpe(bpe_fname, out_fname) wlog('done') if wargs.with_postproc is True: opost_name = '{}.opost'.format(out_fname) wlog('copy {} to {} ... '.format(out_fname, opost_name), 0) #os.system('cp {} {}'.format(out_fname, opost_name)) copyfile(out_fname, opost_name) assert os.path.exists(opost_name), 'opost file do not exist ...' wlog('done') wlog("sh postproc.sh {} {}".format(opost_name, out_fname)) os.system("sh postproc.sh {} {}".format(opost_name, out_fname)) wlog('done') mteval_bleu_opost = bleu_file(opost_name, ref_fpaths, cased=wargs.cased) os.rename(opost_name, "{}_{}.txt".format(opost_name, mteval_bleu_opost)) mteval_bleu = bleu_file(out_fname, ref_fpaths, cased=wargs.cased, char=wargs.char_bleu) multi_bleu = print_multi_bleu(out_fname, ref_fpaths, cased=wargs.cased, char=wargs.char_bleu) #mteval_bleu = bleu_file(out_fname + '.seg.plain', ref_fpaths) os.rename( out_fname, '{}{}_{}_{}.txt'.format(out_fname, '_char' if wargs.char_bleu is True else '', mteval_bleu, multi_bleu)) final_bleu = multi_bleu if wargs.use_multi_bleu is True else mteval_bleu return final_bleu