Exemplo n.º 1
0
    def write_file_eval(self, out_fname, trans, data_prefix, alns=None):

        if alns is not None:
            fout_aln = open('{}.aln'.format(out_fname), 'w')  # valids/trans
            fout_aln.writelines(alns)
            fout_aln.close()

        fout = open(out_fname, 'w')  # valids/trans
        fout.writelines(trans)
        fout.close()

        ref_fpaths = []
        # *.ref
        ref_fpath = '{}{}.{}'.format(wargs.val_tst_dir, data_prefix,
                                     wargs.val_ref_suffix)
        if os.path.exists(ref_fpath): ref_fpaths.append(ref_fpath)
        for idx in range(wargs.ref_cnt):
            # *.ref0, *.ref1, ...
            ref_fpath = '{}{}.{}{}'.format(wargs.val_tst_dir, data_prefix,
                                           wargs.val_ref_suffix, idx)
            if not os.path.exists(ref_fpath): continue
            ref_fpaths.append(ref_fpath)

        if wargs.with_bpe is True:
            os.system('cp {} {}.bpe'.format(out_fname, out_fname))
            wlog('cp {} {}.bpe'.format(out_fname, out_fname))
            os.system("sed -r 's/(@@ )|(@@ ?$)//g' {}.bpe > {}".format(
                out_fname, out_fname))
            wlog("sed -r 's/(@@ )|(@@ ?$)//g' {}.bpe > {}".format(
                out_fname, out_fname))

        if wargs.with_postproc is True:
            opost_name = '{}.opost'.format(out_fname)
            os.system('cp {} {}'.format(out_fname, opost_name))
            wlog('cp {} {}'.format(out_fname, opost_name))
            os.system("sh postproc.sh {} {}".format(opost_name, out_fname))
            wlog("sh postproc.sh {} {}".format(opost_name, out_fname))
            mteval_bleu_opost = bleu_file(opost_name, ref_fpaths)
            os.rename(opost_name, "{}_{}.txt".format(opost_name,
                                                     mteval_bleu_opost))
        '''
        os.system('cp {} {}.bpe'.format(out_fname, out_fname))
	wlog('cp {} {}.bpe'.format(out_fname, out_fname))
        os.system("sed -r 's/(@@ )|(@@ ?$)//g' {}.bpe > {}".format(out_fname, out_fname))
	wlog("sed -r 's/(@@ )|(@@ ?$)//g' {}.bpe > {}".format(out_fname, out_fname))
        src_sgm = '{}/{}.src.sgm'.format(wargs.val_tst_dir, data_prefix)
        os.system('./scripts/wrap_xml.pl zh {} {} < {} > {}.sgm'.format(src_sgm, data_prefix, out_fname, out_fname))
	wlog('./scripts/wrap_xml.pl zh {} {} < {} > {}.sgm'.format(src_sgm, data_prefix, out_fname, out_fname))
        os.system('./scripts/chi_char_segment.pl -t xml < {}.sgm > {}.seg.sgm'.format(out_fname, out_fname))
	wlog('./scripts/chi_char_segment.pl -t xml < {}.sgm > {}.seg.sgm'.format(out_fname, out_fname))
        os.system('./scripts/de-xml.pl < {}.seg.sgm > {}.seg.plain'.format(out_fname, out_fname))
	wlog('./scripts/de-xml.pl < {}.seg.sgm > {}.seg.plain'.format(out_fname, out_fname))
        '''
        mteval_bleu = bleu_file(out_fname, ref_fpaths)
        #mteval_bleu = bleu_file(out_fname + '.seg.plain', ref_fpaths)
        os.rename(out_fname, "{}_{}.txt".format(out_fname, mteval_bleu))

        return mteval_bleu_opost if wargs.with_postproc is True else mteval_bleu
Exemplo n.º 2
0
    def write_file_eval(self,
                        out_fname,
                        trans,
                        data_prefix,
                        alns=None,
                        subw=None):

        if alns is not None:
            fout_aln = open('{}.aln'.format(out_fname), 'w')  # valids/trans
            fout_aln.writelines(alns)
            fout_aln.close()

        if subw is not None:
            fout_subw = open('{}.subword'.format(out_fname),
                             'w')  # valids/trans
            fout_subw.writelines(subw)
            fout_subw.close()

        fout = open(out_fname, 'w')  # valids/trans
        fout.writelines(trans)
        fout.close()

        ref_fpaths = []
        # *.ref
        ref_fpath = '{}{}.{}'.format(wargs.val_tst_dir, data_prefix,
                                     wargs.val_ref_suffix)
        if os.path.exists(ref_fpath): ref_fpaths.append(ref_fpath)
        for idx in range(wargs.ref_cnt):
            # *.ref0, *.ref1, ...
            ref_fpath = '{}{}.{}{}'.format(wargs.val_tst_dir, data_prefix,
                                           wargs.val_ref_suffix, idx)
            if not os.path.exists(ref_fpath): continue
            ref_fpaths.append(ref_fpath)

        assert os.path.exists(out_fname), 'translation do not exist ...'
        if wargs.with_bpe is True:
            bpe_fname = '{}.bpe'.format(out_fname)
            wlog('copy {} to {} ... '.format(out_fname, bpe_fname), 0)
            #os.system('cp {} {}.bpe'.format(out_fname, out_fname))
            copyfile(out_fname, bpe_fname)
            assert os.path.exists(bpe_fname), 'bpe file do not exist ...'
            wlog('done')
            wlog(
                "sed -r 's/(@@ )|(@@ ?$)//g' {} > {} ... ".format(
                    bpe_fname, out_fname), 0)
            #os.system("sed -r 's/(@@ )|(@@ ?$)//g' {} > {}".format(bpe_fname, out_fname))
            proc_bpe(bpe_fname, out_fname)
            wlog('done')

        # Luong: remove "rich-text format" --> rich ##AT##-##AT## text format
        #os.system("sed -r -i 's/( ##AT##)|(##AT## )//g' {}".format(out_fname))
        #wlog("sed -r -i 's/( ##AT##)|(##AT## )//g' {}".format(out_fname))
        if wargs.with_postproc is True:
            opost_name = '{}.opost'.format(out_fname)
            wlog('copy {} to {} ... '.format(out_fname, opost_name), 0)
            #os.system('cp {} {}'.format(out_fname, opost_name))
            copyfile(out_fname, opost_name)
            assert os.path.exists(opost_name), 'opost file do not exist ...'
            wlog('done')
            wlog("sh postproc.sh {} {}".format(opost_name, out_fname))
            os.system("sh postproc.sh {} {}".format(opost_name, out_fname))
            wlog('done')
            mteval_bleu_opost = bleu_file(opost_name,
                                          ref_fpaths,
                                          cased=wargs.cased)
            os.rename(opost_name, "{}_{}.txt".format(opost_name,
                                                     mteval_bleu_opost))

        mteval_bleu = bleu_file(out_fname, ref_fpaths, cased=wargs.cased)

        multi_bleu = print_multi_bleu(out_fname, ref_fpaths, cased=wargs.cased)
        #mteval_bleu = bleu_file(out_fname + '.seg.plain', ref_fpaths)
        if wargs.char is True:
            c_mteval_bleu = bleu_file(out_fname,
                                      ref_fpaths,
                                      cased=wargs.cased,
                                      char=True)
            c_multi_bleu = print_multi_bleu(out_fname,
                                            ref_fpaths,
                                            cased=wargs.cased,
                                            char=True)
            os.rename(
                out_fname,
                "{}_{}_{}_c_{}_{}.txt".format(out_fname, mteval_bleu,
                                              multi_bleu, c_mteval_bleu,
                                              c_multi_bleu))
        else:
            os.rename(
                out_fname, "{}_{}_{}.txt".format(out_fname, mteval_bleu,
                                                 multi_bleu))

        if wargs.use_multi_bleu is False:
            if wargs.char is False: final_bleu = mteval_bleu
            else: final_bleu = c_mteval_bleu
        else:
            if wargs.char is False: final_bleu = multi_bleu
            else: final_bleu = c_multi_bleu

        return final_bleu
Exemplo n.º 3
0
    def write_file_eval(self, out_fname, trans, data_prefix, alns=None):

        if alns is not None:
            fout_aln = open('{}.aln'.format(out_fname), 'w')  # valids/trans
            fout_aln.writelines(alns)
            fout_aln.close()

        fout = open(out_fname, 'w')  # valids/trans
        fout.writelines(trans)
        fout.close()

        # *.ref
        ref_fpath = '{}{}.{}'.format(wargs.val_tst_dir, data_prefix,
                                     wargs.val_ref_suffix)
        ref_fpaths = grab_all_trg_files(ref_fpath)
        assert os.path.exists(out_fname), 'translation do not exist ...'
        if wargs.with_bpe is True:
            bpe_fname = '{}.bpe'.format(out_fname)
            wlog('copy {} to {} ... '.format(out_fname, bpe_fname), 0)
            #os.system('cp {} {}.bpe'.format(out_fname, out_fname))
            copyfile(out_fname, bpe_fname)
            assert os.path.exists(bpe_fname), 'bpe file do not exist ...'
            wlog('done')
            wlog(
                "sed -r 's/(@@ )|(@@ ?$)//g' {} > {} ... ".format(
                    bpe_fname, out_fname), 0)
            #os.system("sed -r 's/(@@ )|(@@ ?$)//g' {} > {}".format(bpe_fname, out_fname))
            proc_bpe(bpe_fname, out_fname)
            wlog('done')

        if wargs.with_postproc is True:
            opost_name = '{}.opost'.format(out_fname)
            wlog('copy {} to {} ... '.format(out_fname, opost_name), 0)
            #os.system('cp {} {}'.format(out_fname, opost_name))
            copyfile(out_fname, opost_name)
            assert os.path.exists(opost_name), 'opost file do not exist ...'
            wlog('done')
            wlog("sh postproc.sh {} {}".format(opost_name, out_fname))
            os.system("sh postproc.sh {} {}".format(opost_name, out_fname))
            wlog('done')
            mteval_bleu_opost = bleu_file(opost_name,
                                          ref_fpaths,
                                          cased=wargs.cased)
            os.rename(opost_name, "{}_{}.txt".format(opost_name,
                                                     mteval_bleu_opost))

        mteval_bleu = bleu_file(out_fname,
                                ref_fpaths,
                                cased=wargs.cased,
                                char=wargs.char_bleu)
        multi_bleu = print_multi_bleu(out_fname,
                                      ref_fpaths,
                                      cased=wargs.cased,
                                      char=wargs.char_bleu)
        #mteval_bleu = bleu_file(out_fname + '.seg.plain', ref_fpaths)
        os.rename(
            out_fname,
            '{}{}_{}_{}.txt'.format(out_fname,
                                    '_char' if wargs.char_bleu is True else '',
                                    mteval_bleu, multi_bleu))

        final_bleu = multi_bleu if wargs.use_multi_bleu is True else mteval_bleu

        return final_bleu