Пример #1
0
 def bleu4(length_gold, length_pred, counts_hit, counts_all, smoothing=False, report=False, ss_short=False):
     s, cc = 0., 0
     them = {}
     bp = BleuCalculator.brevity_penalty(length_gold, length_pred)
     them["bp"] = bp
     utils.zcheck_matched_length(counts_hit, counts_all)
     for h, a in zip(counts_hit, counts_all):
         if cc>0 and smoothing:
             # +1 smooth for n>1 maybe # todo(warn) may result in 0/*/*/*
             vv = (h+1)/(a+1)
         else:
             vv = h/a
         them[cc] = vv
         if vv <= 0:
             utils.zlog("Zero 1-gram counts !!", func="warn")
             s += utils.Constants.MIN_V
         else:
             s += math.log(vv)
         cc += 1
     s /= cc
     bleu = bp * math.exp(s)
     them["bleu"] = bleu
     ss = None
     if report:
         # utils.zlog("BLEU4-Counts: %s-%s" % (counts_hit, counts_all))
         ss = "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, hyp_len=%d, ref_len=%d)" \
               % (them["bleu"]*100, them[0]*100, them[1]*100, them[2]*100, them[3]*100, bp, length_pred, length_gold)
         utils.zlog(ss)
     if ss_short:
         ss = "%.2f(BP=%.3f,L=%d)" % (them["bleu"]*100, bp, length_pred)
     return bleu, ss
Пример #2
0
 def fit_once(fit_files):
     # first fitting a simple one: y = gaussian(ax+b, sigma), here not including xenc for that will be too large
     with utils.Timer(tag="Fit-length-once", print_date=True):
         # 1. collect length
         with utils.zopen(fit_files[0]) as f0, utils.zopen(fit_files[1]) as f1:
             # todo(warn): plus one for the <eos> tokens
             x = [LinearGaussain.trans_len(len(_l.split())+1) for _l in f0]
             y = [LinearGaussain.trans_len(len(_l.split())+1) for _l in f1]
         utils.zcheck_matched_length(x, y, _forced=True)
         ll = len(x)
         x1, y1 = np.array(x, dtype=np.float32).reshape((-1,1)), np.array(y, dtype=np.float32)
         # 2. fit linear model
         try:    # todo(warn)
             regr = linear_model.LinearRegression()
             regr.fit(x1, y1)
             a, b = float(regr.coef_[0]), float(regr.intercept_)
         except:
             utils.zlog("Cannot linear-regression, skip that.")
             a, b = 1., 0.
         # 3. fit sigma
         x1 = x1.reshape((-1,))
         errors = a*x1+b - y1
         mu = np.mean(errors)
         sigma = np.sqrt(((errors - mu)**2).mean())
         ret = (a, b, sigma, mu)
         del x, y, x1, y1
         utils.zlog("Fitting Length with %s sentences and get %s." % (ll, ret), func="score")
     return ret
Пример #3
0
 def _argmax_list(ll, amount=10, cfs=(lambda x: x[0]>1e-5, lambda x: x[0]<=1e-5 and x[0]>=-1e-5, lambda x: x[0]<-1e-5), buckets=(0, 15, 30, 50, 1000), golds_len0=None):
     def _sort_and_report(_l2):
         rankings = sorted(_l2, key=lambda x: x[-1], reverse=True)
         for i in range(min(r, amount)):
             idx, content = rankings[i][0], rankings[i][1]
             utils.zlog("#%d Max-ranking, index is %s, content is %s." % (i, idx, content), func="details")
     # --
     r = len(ll)
     rs = []
     rs_descr = []
     for cf in cfs:
         cc = sum(1 if cf(one) else 0 for one in ll)
         rs.append(cc)
         rs_descr.append("%d/%d/%.3f" % (cc, r, cc/r))
     utils.zlog("Countings: %s" % (" ".join(rs_descr),))
     _sort_and_report([(i,one) for i,one in enumerate(ll)])
     # analyzing buckets of length of gold0
     for i in range(len(buckets)-1):
         a, b = buckets[i], buckets[i+1]
         _rf = lambda x: a <= x and x < b
         ll2 = []
         for i, one in enumerate(ll):
             if _rf(golds_len0[i]):
                 ll2.append((i, one))
         utils.zlog("Range [%d, %d): %d/%d/%.3f" % (a, b, len(ll2), r, len(ll2)/r), func="details")
         _sort_and_report(ll2)
Пример #4
0
def _get_lang(gold_fn):
    # current:
    cands = ["en", "fr", "de", "zh"]
    for c in cands:
        if c in gold_fn:
            return c
    zlog("Unknown target languages for evaluating!!", func="warn")
    return "en"
Пример #5
0
 def _report_log(self):
     utils.zlog(
         "ResultLogger: %d/%.3f/%.3f/%.3f" %
         (self.num_insts, self.num_ends / self.num_insts,
          self.num_mends / self.num_insts, self.num_mends / self.num_ends))
     with utils.zopen("length_sizes.txt", "w") as fd:
         _tmp_idx = 0
         for zl, zs in zip(self.ls[0], self.ls[1]):
             _tmp_idx += 1
             fd.write("%d %d " % (zl, zs))
             if _tmp_idx % 100 == 0:
                 fd.write("\n")
Пример #6
0
 def __init__(self, model, xlen, xadd, xback, length_info):
     super(LinearGaussain, self).__init__(model)
     self.xlen = xlen        # dim of src repr
     self.xadd = xadd        # whether add xsrc
     self.xback = xback      # whether prop back through xsrc
     if length_info is None:
         length_info = LinearGaussain._DEFAULT_INFO
     # params
     utils.zlog("Init lg with len-info %s" % (length_info,))
     self.params["W"] = self._add_params((1, xlen), )
     self.params["A"] = self._add_params((1, 1), init=np.array([length_info[0],], dtype=np.float32))
     self.params["B"] = self._add_params((1,), init=np.array([length_info[1],], dtype=np.float32))
     self.params["SI"] = self._add_params((1,), init=np.array([length_info[2],], dtype=np.float32))
Пример #7
0
 def _validate_len(self, dev_iter):
     # sqrt error
     count = 0
     loss = 0.
     with utils.Timer(tag="VALID-LEN", print_date=True) as et:
         utils.zlog("With lg as %s." % (self._mm.lg.obtain_params(), ))
         for insts in dev_iter.arrange_batches():
             ys = [i[1] for i in insts]
             ylens = np.asarray([len(_y) for _y in ys])
             count += len(ys)
             Model.new_graph()
             self._mm.refresh(False)
             preds = self._mm.predict_length(insts)
             loss += np.sum((preds - ylens)**2)
     return -loss / count
Пример #8
0
def main():
    # init
    opts = mt_args.init("test")
    looping = opts["loop"]
    # 1. data
    source_dict, target_dict = Vocab.read(opts["dicts"][0]), Vocab.read(opts["dicts"][1])
    # -- here usually no need for test[1], but for convenience ...
    if not looping:
        dicts = [source_dict] + [target_dict for _ in opts["test"][1:]]
        test_iter = get_arranger(opts["test"], dicts, multis=False, shuffling_corpus=False, shuflling_buckets=False, sort_prior=[0], batch_size=opts["test_batch_size"], maxibatch_size=-1, max_len=utils.Constants.MAX_V, min_len=0, one_len=opts["max_len"]+1, shuffling0=False)
    # 2. model
    mm = []
    for mn in opts["models"]:
        x = s2sModel(opts, source_dict, target_dict, None)     # rebuild from opts, thus use the same opts when testing
        x.load(mn)
        mm.append(x)
    if len(mm) == 0:
        utils.zlog("No models specified, must be testing mode?", func="warn")
        mm.append(s2sModel(opts, source_dict, target_dict, None))      # no loading, only for testing
    # 3. decode
    if not looping:
        utils.zlog("=== Start to decode ===", func="info")
        with utils.Timer(tag="Decoding", print_date=True):
            mt_decode(opts["decode_way"], test_iter, mm, target_dict, opts, opts["output"])
        utils.zlog("=== End decoding, write to %s ===" % opts["output"], func="info")
        # todo(warn) forward-compatible evaluation
        if len(opts["test"]) > 1:
            gold = opts["test"][1]
        else:
            gold = opts["gold"][0]
        mt_eval.evaluate(opts["output"], gold, opts["eval_metric"])
    else:
        ot = Outputter(opts)
        while True:
            utils.zlog("Enter the src to translate:")
            line = sys.stdin.readline()
            if len(line)==0:
                break
            # prepare one
            one_words = line.strip().split()
            one_idxes = Vocab.w2i(source_dict, one_words, add_eos=True, use_factor=False)
            one_inst = TextInstance([one_words], [one_idxes])
            rs = mt_decode(opts["decode_way"], [one_inst], mm, target_dict, opts, opts["output"])
            utils.zlog(ot.format(rs[0], target_dict, False, False))
Пример #9
0
 def analyse(srcs, golds, preds, kbests, n=4, on_words=True):
     # mainly two goals: compare pred/oracle/gold & compare between preds
     BleuCalculator.add_clipped_counts(golds, preds, n, on_words=on_words)
     for curk in kbests:
         utils.zlog("Start for kbest: k==%s" % curk, func="time")
         for i, pred in enumerate(preds):
             utils.zlog("For file num %i" % i, func="time")
             BleuCalculator.analyse_single(golds, pred, curk, n)
             utils.zlog("", func="time")
     if len(preds) > 1:
         BleuCalculator.analyse_multi(golds, preds, 1, n)
         utils.zlog("", func="time")
Пример #10
0
def _eval_bleu(output, gold, process_gold, lowercase=False):
    dir_name = os.path.dirname(os.path.abspath(__file__))
    restore_name = os.path.join(dir_name, "..", "scripts", "restore.sh")
    script_name = os.path.join(dir_name, "..", "scripts", "moses",
                               "multi-bleu.perl")
    # zmt_name = os.path.join(dir_name, "..")  # todo(warn) need to find mosesdecoder for restore: default $ZMT is znmt/../
    # maybe preprocess
    # todo: special treatment for files with multiple references
    if str.isnumeric(gold[-1]):
        zlog(
            "Evaluating instead on %s to deal with multiple references of original %s."
            % (gold[:-1], gold),
            func="warn")
        gold = gold[:-1]
    elif process_gold:
        gold_res = "temp.somekindofhelpless.gold.restore"
        os.system("bash %s < %s > %s" % (restore_name, gold, gold_res))
        gold = gold_res
    maybe_lc = "-lc" if lowercase else ""
    cmd = "bash %s < %s | perl %s %s %s" % (restore_name, output, script_name,
                                            maybe_lc, gold)
    p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
    line = p.stdout.readlines()
    zlog("Evaluating %s to %s." % (output, gold), func="info")
    zlog(str(line), func="score")
    b = float(line[-1].split()[2][:-1])
    return b
Пример #11
0
def main():
    # init
    opts = mt_args.init("train")
    # start to train
    # 1. obtain dictionaries
    source_corpus, target_corpus = opts["train"]
    source_dict, target_dict = None, None
    if not opts["rebuild_dicts"]:
        try:
            source_dict, target_dict = Vocab.read(opts["dicts"][0]), Vocab.read(opts["dicts"][1])
        except:
            utils.zlog("Read dictionaries fail %s, rebuild them." % (opts["dicts"],), func="warn")
    if source_dict is None or target_dict is None:
        # rebuild the dictionaries from corpus
        source_dict = Vocab(fname=source_corpus, rthres=opts["dicts_rthres"], fthres=opts["dicts_fthres"])
        target_dict = Vocab(fname=target_corpus, rthres=opts["dicts_rthres"], fthres=opts["dicts_fthres"])
        # save dictionaries
        try:
            source_dict.write(opts["dicts"][0])
            target_dict.write(opts["dicts"][1])
        except:
            utils.zlog("Write dictionaries fail: %s, skip this step." % opts["dicts_final"], func="warn")
    # 2. corpus iterator
    shuffling0 = opts["shuffle_training_data_onceatstart"]
    sort_prior = {"src":[0], "trg":[1], "src-trg":[0,1], "trg-src":[1,0]}[opts["training_sort_type"]]
    train_iter = get_arranger(opts["train"], [source_dict, target_dict], multis=False, shuffling_corpus=opts["shuffle_training_data"], shuflling_buckets=opts["shuffle_training_data"], sort_prior=sort_prior, batch_size=opts["batch_size"], maxibatch_size=20, max_len=opts["max_len"]+1, min_len=2, one_len=opts["max_len"]+1, shuffling0=shuffling0)
    dev_iter = get_arranger(opts["dev"], [source_dict, target_dict], multis=False, shuffling_corpus=False, shuflling_buckets=False, sort_prior=[0], batch_size=opts["valid_batch_size"], maxibatch_size=-1, max_len=utils.Constants.MAX_V, min_len=0, one_len=opts["max_len"]+1, shuffling0=False)
    # 3. about model & trainer
    # <special one> fit a gaussian first
    length_info = LinearGaussain.fit_once(opts["train"])   # todo: train or dev?
    mm = s2sModel(opts, source_dict, target_dict, length_info)
    tt = MTTrainer(opts, mm)  # trainer + training_progress
    if opts["reload"] and os.path.exists(opts["reload_model_name"]):
        tt.load(opts["reload_model_name"], opts["reload_training_progress"])
    # 4. training
    tt.train(train_iter, dev_iter)
    utils.zlog("=== Training ok!! ===", func="info")
Пример #12
0
 def report(self):
     utils.zlog("Outputter final: count=%s/%s, replaced=%s" %
                (self.inst_count, self.sent_count, self.replaced))
Пример #13
0
 def _sort_and_report(_l2):
     rankings = sorted(_l2, key=lambda x: x[-1], reverse=True)
     for i in range(min(r, amount)):
         idx, content = rankings[i][0], rankings[i][1]
         utils.zlog("#%d Max-ranking, index is %s, content is %s." % (i, idx, content), func="details")
Пример #14
0
 def global_prune_ngram_greedy(cand_states,
                               rest_beam_size,
                               sig_beam_size,
                               thresh,
                               penalty,
                               ngram_n,
                               ngram_range,
                               pr_global_lreward=0.,
                               pr_global_nalpha=1.):
     # on sorted list, comparing according to normalized scores -- greedy pruning
     # todo: how could we compare diff length states? -> normalize partial score
     # todo: how to do pruning and sig-max (there might be crossings)? -> take the greedy way
     # _get_score_f = (lambda x: x.score_partial/x.length)
     _get_score_f = lambda x: (x.score_partial + pr_global_lreward * x.
                               length) / (x.length**pr_global_nalpha)
     sig_ngram_maxs = {}  # all-step max (for survived ones)
     sig_ngram_curnum = defaultdict(int)  # last-step state lists for sig
     sig_ngram_allnum = defaultdict(int)  # all-step state counts for sig
     temp_ret = []
     ngram_range = max(0, ngram_range)  # to be sure >= 0
     # pruning
     for one in cand_states:
         if len(temp_ret) >= rest_beam_size:
             one.state("PR_BEAM")
             continue
         # ngram sigs, according to the listing
         them = one.get_path(maxlen=ngram_range)
         if len(them) > 0:
             this_pruned = False
             # pruning according to sig-size and thresh
             cur_sig = one.sig_ngram(ngram_n)
             flag_not_best = False
             if cur_sig in sig_ngram_maxs:
                 this_score, high_score = _get_score_f(one), _get_score_f(
                     sig_ngram_maxs[cur_sig])
                 if this_score <= high_score:  # not the best until current
                     flag_not_best = True
                     if sig_ngram_allnum[cur_sig] >= sig_beam_size:
                         one.state("PR_NGRAM_EXPAND")
                         this_pruned = True
                     elif this_score <= high_score - thresh:
                         one.state("PR_NGRAM_DIFF")
                         this_pruned = True
             # check cov for pruning
             if this_pruned:
                 pruner_one = sig_ngram_maxs[cur_sig]
                 if not Pruner.cov_checker.cov_ok(one, pruner_one):
                     one.state("ZZ")
                     one.tags("FAIL_PR_COV")
                     this_pruned = False
             # adding
             if not this_pruned:
                 # todo(warn) penalize here according to two criteria
                 if penalty > 0.:
                     one_score_cur = one.action_score()
                     one_score_cur -= sig_ngram_curnum[cur_sig] * penalty
                     if flag_not_best:
                         one_score_cur -= penalty
                     one.action_score(one_score_cur)
                 # add all steps for this one
                 for one_state in them:
                     one_sig = one_state.sig_ngram(ngram_n)
                     if one_sig not in sig_ngram_maxs or _get_score_f(
                             one_state) > _get_score_f(
                                 sig_ngram_maxs[one_sig]):
                         sig_ngram_maxs[one_sig] = one_state
                     sig_ngram_allnum[one_sig] += 1
                 sig_ngram_curnum[cur_sig] += 1  # only last step
                 temp_ret.append(one)
             else:
                 pruner_one = sig_ngram_maxs[cur_sig]
                 # set pruners and record in the sg
                 one.set("PR_PRUNER", pruner_one)
                 pruner_one.add_list("PRUNING_LIST", one)
                 if one == pruner_one:
                     utils.zlog("WHAT? Self-pruning?")
         else:
             # for example, the first several steps
             temp_ret.append(one)
     return temp_ret
Пример #15
0
def mt_decode(decode_way,
              test_iter,
              mms,
              target_dict,
              opts,
              outf,
              gold_iter=None):
    reranking = (gold_iter is not None)
    looping = isinstance(test_iter, Iterable)
    if reranking:
        cur_searcher = mt_search.search_rerank
    else:
        cur_searcher = {
            "greedy": mt_search.search_greedy,
            "beam": mt_search.search_beam,
            "sample": mt_search.search_sample,
            "branch": mt_search.search_branch
        }[decode_way]
    one_recorder = OnceRecorder("DECODE")
    num_sents = len(test_iter)
    cur_sents = 0.
    sstater = StateStater()
    if opts["decode_extract_paraf"]:
        para_extractor = ParafExtractor(opts, target_dict)
    else:
        para_extractor = DummyParafExtractor(opts)
    # decoding them all
    results = ResultLogger(outf, target_dict, opts)
    tracking_list = None
    prev_point = 0
    # init normer
    for i, _m in enumerate(mms):
        _lg_params = _m.lg.obtain_params()
        utils.zlog("Model[%s] is with lg as %s." % (
            i,
            _lg_params,
        ))
    _sigma = np.average([_m.lg.get_real_sigma() for _m in mms], axis=0)
    normer = get_normer(opts["normalize_way"], opts["normalize_alpha"], _sigma)
    # todo: ugly code here
    if looping:
        rs = cur_searcher(mms, test_iter, target_dict, opts, normer, sstater,
                          para_extractor)
        results.add(rs)
        tracking_list = None
    else:
        if reranking:
            for one_tests, one_golds in zip(test_iter.arrange_batches(),
                                            gold_iter.arrange_batches()):
                if opts["verbose"] and (cur_sents - prev_point) >= (
                        opts["report_freq"] * test_iter.bsize()):
                    utils.zlog("Reranking process: %.2f%%" %
                               (cur_sents / num_sents * 100))
                    prev_point = cur_sents
                cur_sents += len(one_tests)
                mt_search.search_init()
                rs = cur_searcher(mms, [one_tests, one_golds], target_dict,
                                  opts, normer, sstater, para_extractor)
                results.add(rs)
                one_recorder.record(one_tests, {}, 0)
        else:
            for insts in test_iter.arrange_batches():
                if opts["verbose"] and (cur_sents - prev_point) >= (
                        opts["report_freq"] * test_iter.bsize()):
                    utils.zlog("Decoding process: %.2f%%" %
                               (cur_sents / num_sents * 100))
                    prev_point = cur_sents
                cur_sents += len(insts)
                mt_search.search_init()
                # return list(batch) of list(beam) of states
                rs = cur_searcher(mms, insts, target_dict, opts, normer,
                                  sstater, para_extractor)
                results.add(rs)
                one_recorder.record(insts, {}, 0)
        one_recorder.report()
        tracking_list = test_iter.get_tracking_list()
        # restore from sorting by length
        # results = test_iter.restore_order(results)
    # output
    sstater.report()
    results.finish(tracking_list)
    para_extractor.save_parafs(outf)
    utils.zlog("COV-LOG: " + CovChecker.report())
    if looping:
        return rs
    else:
        return None
Пример #16
0
 def report(self, s=""):
     utils.zlog(s + self.state(), func="info")
     if self._mm is not None:
         self._mm.stat_report()
Пример #17
0
 def analyse_single(golds, pred, kbest, n):
     # sentence-level bleu score & ranking (average or max)
     len_inst = len(pred)
     golds_len0 = [len(g[0]) for g in golds]
     # sentence level smoothed bleu score
     # only consider the first k items in the list
     # t1: averaged ones
     utils.zlog("t1: Averages")
     # -- t10: average sent-bleu
     cum = 0.
     for p in pred:
         lp = len(p)
         cc = min(kbest, lp)
         cum += sum([z[0] for z in p.get("sb")[:cc]]) / cc
     utils.zlog("t10: Average sentence BLEU of kbest(k=%s) ones: BLEU=%.3f" % (kbest, cum/len_inst))
     # -- t11: average corpus-bleu
     count = 0
     cum1, cum2 = [0]*(n+1), [0]*(n+1)
     for p in pred:
         lp = len(p)
         cc = min(kbest, lp)
         count += cc
         s1, s2 = p.get("stat"), p.get("stat2")
         for i in range(cc):
             utils.Helper.add_inplace_list(cum1, s1[i])
             utils.Helper.add_inplace_list(cum2, s2[i])
     utils.zlog("t11: Average corpus BLEU of kbest(k=%s) ones, but average count is %.3f." % (kbest, count/len_inst))
     BleuCalculator.bleu4(cum1[0], cum2[0], cum1[1:], cum2[1:], report=True)
     # t2: oracle max (best sentence BLEU): influence on corpus-one-bleu
     utils.zlog("t2: About oracle max")
     # -- t20: how many is already oracle max as max & how much improvement of oracle-max
     # --- pred best
     p_cum1, p_cum2 = [0]*(n+1), [0]*(n+1)
     o_cum1, o_cum2 = [0]*(n+1), [0]*(n+1)
     hit_counts = 0
     obest_ranks = []
     for p in pred:
         lp = len(p)
         cc = min(kbest, lp)
         s1, s2, sbs = p.get("stat"), p.get("stat2"), p.get("sb")
         obest = int(np.argmax([z[0] for z in sbs[:cc]]))
         # record oracle best
         obest_ranks.append(obest)
         if obest == 0:
             hit_counts += 1
         # record pred one
         utils.Helper.add_inplace_list(p_cum1, s1[0])
         utils.Helper.add_inplace_list(p_cum2, s2[0])
         # record oracle one
         utils.Helper.add_inplace_list(o_cum1, s1[obest])
         utils.Helper.add_inplace_list(o_cum2, s2[obest])
     utils.zlog("t20: oracle hit is %s/%s/%.3f; prediction & oracle" % (hit_counts, len_inst, hit_counts/len_inst))
     bleu_base, _ = BleuCalculator.bleu4(p_cum1[0], p_cum2[0], p_cum1[1:], p_cum2[1:], report=True)
     BleuCalculator.bleu4(o_cum1[0], o_cum2[0], o_cum1[1:], o_cum2[1:], report=True)
     # --
     if kbest > 1:
         sbleu_improves = []     # list of (score, p[0], p[oracle]) -> sentence bleu improves
         cbleu_improves = []     # list of (score, final) -> corpus bleu improves after replacing
         for oidx, p in zip(obest_ranks, pred):
             s1, s2, sbs = p.get("stat"), p.get("stat2"), p.get("sb")
             sbleu_improves.append((sbs[oidx][0]-sbs[0][0], sbs[0][-1], sbs[oidx][-1], "Oracle-Rank %s"%oidx))
             repl_cum1, repl_cum2 = p_cum1.copy(), p_cum2.copy()
             utils.Helper.add_inplace_list(repl_cum1, s1[0], -1)
             utils.Helper.add_inplace_list(repl_cum1, s1[oidx], 1)
             utils.Helper.add_inplace_list(repl_cum2, s2[0], -1)
             utils.Helper.add_inplace_list(repl_cum2, s2[oidx], 1)
             bleu_change = BleuCalculator.bleu4(repl_cum1[0], repl_cum2[0], repl_cum1[1:], repl_cum2[1:])
             cbleu_improves.append((bleu_change[0]-bleu_base, bleu_change[-1], sbs[0][-1], sbs[oidx][-1]))
         # -- t21: which ones improves most (sbleu) if replaced by oracle-max
         utils.zlog("t21: improves at sentence bleus")
         BleuCalculator._argmax_list(sbleu_improves, golds_len0=golds_len0)
         # -- t22: which ones improves most (replace-cbleu) if replaced by oracle-max
         utils.zlog("t22: improves at corpus bleus with replacing")
         BleuCalculator._argmax_list(cbleu_improves, golds_len0=golds_len0)
     # t3: improvements by gold (once enough: thus only when kbest==1)
     if kbest == 1:
         utils.zlog("t3: About gold replacing")
         sbleu_goldimpr = []     # list of (score, p[0]) -> sentence bleu improves
         cbleu_goldimpr = []     # list of (score, final) -> corpus bleu improves after replacing
         for p in pred:
             s1, s2, sbs = p.get("stat"), p.get("stat2"), p.get("sb")
             sbleu_goldimpr.append((1.0-sbs[0][0], sbs[0][-1]))
             repl_cum1, repl_cum2 = p_cum1.copy(), p_cum2.copy()
             replace_counts = [s2[0][0], ] + [max(0, s2[0][0]-i) for i in range(n)]
             utils.Helper.add_inplace_list(repl_cum1, s1[0], -1)
             utils.Helper.add_inplace_list(repl_cum1, replace_counts, 1)
             utils.Helper.add_inplace_list(repl_cum2, s2[0], -1)
             utils.Helper.add_inplace_list(repl_cum2, replace_counts, 1)
             bleu_change = BleuCalculator.bleu4(repl_cum1[0], repl_cum2[0], repl_cum1[1:], repl_cum2[1:])
             cbleu_goldimpr.append((bleu_change[0]-bleu_base, bleu_change[-1], sbs[0][-1]))
         # -- t23: which ones improves most (sbleu) if replaced by gold
         utils.zlog("t31: gold_comparing at sentence bleus")
         BleuCalculator._argmax_list(sbleu_goldimpr, golds_len0=golds_len0)
         # -- t24: which ones improves most (replace-cbleu) if replaced by gold
         utils.zlog("t32: gold_comparing at corpus bleus with replacing")
         BleuCalculator._argmax_list(cbleu_goldimpr, golds_len0=golds_len0)
Пример #18
0
 def analyse_multi(golds, preds, kbest, n):
     # todo(warn): only compares the pred[0]
     golds_len0 = [len(g[0]) for g in golds]
     if kbest > 1:
         pass
     else:
         # which one get the best max results
         utils.zlog("mt0: About comparing the predicted best ones")
         # - first get best sbleu (notice that this only takes the p[0], and the index is on the preds list)
         their_cums1 = [[0]*(n+1) for _ in range(len(preds))]
         their_cums2 = [[0]*(n+1) for _ in range(len(preds))]
         max_idxes = []
         num_pred = len(preds)
         len_inst = len(preds[0])
         for i in range(len_inst):
             their_results = []
             for j in range(num_pred):
                 p = preds[j][i]
                 s1, s2, sbs = p.get("stat"), p.get("stat2"), p.get("sb")
                 utils.Helper.add_inplace_list(their_cums1[j], s1[0])
                 utils.Helper.add_inplace_list(their_cums2[j], s2[0])
                 their_results.append(sbs[0])
             # argmax
             max_idx = BleuCalculator._cmp(their_results)
             max_idxes.append(max_idx)
         num_equal = sum(1 if one is None else 0 for one in max_idxes)
         utils.zlog("Specifically, equal rate is: %d/%d/%.3f" % (num_equal, len_inst, num_equal/len_inst))
         # analyzing for each preds
         for j in range(num_pred):
             num_hit = sum(1 if j==one else 0 for one in max_idxes)
             num_good = num_hit + num_equal
             utils.zlog("Specifically, for file #%s: %d/%d(%.3f)/%d(%.3f)" % (j, num_hit, len_inst, num_hit/len_inst, num_good, num_good/len_inst))
             bleu_base, _ = BleuCalculator.bleu4(their_cums1[j][0], their_cums2[j][0], their_cums1[j][1:], their_cums2[j][1:], report=True)
             sbleu_improves = []     # list of (score, p[0], best[0]) -> sentence bleu improves
             cbleu_improves = []     # list of (score, final) -> corpus bleu improves after replacing
             cur_ii = 0
             for oidx, p in zip(max_idxes, preds[j]):
                 if oidx is None:    # equal, count as self
                     oidx = j
                 pbest = preds[oidx][cur_ii]
                 s1, s2, sbs = p.get("stat"), p.get("stat2"), p.get("sb")
                 b1, b2, bbs = pbest.get("stat"), pbest.get("stat2"), pbest.get("sb")
                 if_in_idx = None
                 for _i, _item in enumerate(sbs):
                     if bbs[0] == _item:
                         if_in_idx = _i
                 sbleu_improves.append((bbs[0][0]-sbs[0][0], sbs[0][-1], bbs[0][-1], "Here-Rank %s" % if_in_idx))
                 repl_cum1, repl_cum2 = their_cums1[j].copy(), their_cums2[j].copy()
                 utils.Helper.add_inplace_list(repl_cum1, s1[0], -1)
                 utils.Helper.add_inplace_list(repl_cum1, b1[0], 1)
                 utils.Helper.add_inplace_list(repl_cum2, s2[0], -1)
                 utils.Helper.add_inplace_list(repl_cum2, b2[0], 1)
                 bleu_change = BleuCalculator.bleu4(repl_cum1[0], repl_cum2[0], repl_cum1[1:], repl_cum2[1:])
                 cbleu_improves.append((bleu_change[0]-bleu_base, bleu_change[-1], sbs[0][-1]))
                 cur_ii += 1
             # -- mt01: which ones improves most (sbleu) if replaced by best-sbleu one
             utils.zlog("mt01: best_comparing at sentence bleus (improves from this one to the best)")
             BleuCalculator._argmax_list(sbleu_improves, golds_len0=golds_len0)
             # -- mt02: which ones improves most (replace-cbleu) if replaced by best-sbleu one
             utils.zlog("mt02: best_comparing at corpus bleus with replacing (improves from this one to the best)")
             BleuCalculator._argmax_list(cbleu_improves, golds_len0=golds_len0)
Пример #19
0
def main():
    # init
    opts = mt_args.init("rerank")
    # special readings from args for re-ranking mode
    # only accept spaced (multi-mode) nbest files for target & non-multi for golds
    # 1. data (only accepting nbest files)
    source_dict, target_dict = Vocab.read(opts["dicts"][0]), Vocab.read(
        opts["dicts"][1])
    dicts = [source_dict] + [target_dict for _ in opts["test"][1:]]
    test_iter = get_arranger_simple(opts["test"],
                                    dicts,
                                    multis=[False] +
                                    [True for _ in opts["test"][1:]],
                                    batch_size=opts["test_batch_size"])
    gold_iter = get_arranger_simple(opts["gold"],
                                    [target_dict for _ in opts["gold"]],
                                    multis=False,
                                    batch_size=opts["test_batch_size"])
    utils.zcheck_matched_length(test_iter, gold_iter)
    # 2. model
    mm = []
    try:
        for mn in opts["models"]:
            x = mt_mt.s2sModel(
                opts, source_dict, target_dict,
                None)  # rebuild from opts, thus use the same opts when testing
            try:
                x.load(mn)
            except:
                utils.zlog("Load model error %s!" % mn, func="warn")
            mm.append(x)
    except:
        pass
    # 3. analysis
    if len(mm) == 0:
        utils.zlog("No models specified, only analysing!", func="warn")
        num_test = len(opts["test"]) - 1
        golds = []
        srcs = []
        preds = [[] for _ in range(num_test)]
        for one in gold_iter.arrange_batches():
            golds += one
        for one in test_iter.arrange_batches():
            for zz in one:
                zzs = zz.extract()
                srcs.append(zzs[0])
                for i in range(num_test):
                    preds[i].append(zzs[i + 1])
        Analyzer.analyse(srcs, golds, preds, kbests=opts["rr_analysis_kbests"])
    # 4. rerank
    else:
        utils.zlog("=== Start to rerank ===", func="info")
        with utils.Timer(tag="Reranking", print_date=True):
            mt_decode(None,
                      test_iter,
                      mm,
                      target_dict,
                      opts,
                      opts["output"],
                      gold_iter=gold_iter)
        utils.zlog("=== End reranking, write to %s ===" % opts["output"],
                   func="info")
        mt_eval.evaluate(opts["output"], opts["gold"][0], opts["eval_metric"])