Python rouge_l_summary_level示例，rouge.rouge_score.rouge_l_summary_level Python示例

示例#1

0

显示文件

class Rouge:
    DEFAULT_METRICS = ["rouge-1", "rouge-2", "rouge-l"]
    AVAILABLE_METRICS = {
        "rouge-1": lambda hyp, ref: rouge_score.rouge_n(hyp, ref, 1),
        "rouge-2": lambda hyp, ref: rouge_score.rouge_n(hyp, ref, 2),
        "rouge-l":
        lambda hyp, ref: rouge_score.rouge_l_summary_level(hyp, ref),
    }
    DEFAULT_STATS = ["f", "p", "r"]
    AVAILABLE_STATS = ["f", "p", "r"]

    def __init__(self, metrics=None, stats=None):
        self.metrics = metrics if metrics is not None \
            else Rouge.DEFAULT_METRICS
        self.stats = stats if stats is not None \
            else Rouge.DEFAULT_STATS

        for m in self.metrics:
            if m not in Rouge.AVAILABLE_METRICS:
                raise ValueError("Unknown metric '%s'" % m)

        for s in self.stats:
            if s not in Rouge.AVAILABLE_STATS:
                raise ValueError("Unknown stat '%s'" % s)

    def get_scores(self, hyps, refs, avg=False, ignore_empty=False):
        if isinstance(hyps, six.string_types):
            hyps, refs = [hyps], [refs]

        if ignore_empty:
            # Filter out hyps of 0 length
            hyps_and_refs = zip(hyps, refs)
            hyps_and_refs = [_ for _ in hyps_and_refs if len(_[0]) > 0]
            hyps, refs = zip(*hyps_and_refs)

        assert (type(hyps) == type(refs))
        assert (len(hyps) == len(refs))

        if not avg:
            return self._get_scores(hyps, refs)
        return self._get_avg_scores(hyps, refs)

    def _get_scores(self, hyps, refs):
        scores = []
        for hyp, ref in zip(hyps, refs):
            sen_score = {}
            hyp = [" ".join(_.split()) for _ in hyp.split(".") if len(_) > 0]
            ref = [" ".join(_.split()) for _ in ref.split(".") if len(_) > 0]

            for m in self.metrics:
                fn = Rouge.AVAILABLE_METRICS[m]
                sc = fn(hyp, ref)
                sen_score[m] = {s: sc[s] for s in self.stats}
            scores.append(sen_score)
        return scores

    def _get_avg_scores(self, hyps, refs):
        scores = {m: {s: 0 for s in self.stats} for m in self.metrics}

        count = 0
        for (hyp, ref) in zip(hyps, refs):
            hyp = [" ".join(_.split()) for _ in hyp.split(".") if len(_) > 0]
            ref = [" ".join(_.split()) for _ in ref.split(".") if len(_) > 0]

            for m in self.metrics:
                fn = Rouge.AVAILABLE_METRICS[m]
                sc = fn(hyp, ref)
                scores[m] = {s: scores[m][s] + sc[s] for s in sc}
            count += 1
        scores = {
            m: {s: scores[m][s] / count
                for s in scores[m]}
            for m in scores
        }
        return scores

示例#2

0

显示文件

文件： summarizer.py 项目： smlblbn/textsummarizer

        sum_rouge_2_p = 0
        sum_rouge_2_r = 0
        sum_rouge_2_f = 0

        sum_rouge_l_p = 0
        sum_rouge_l_r = 0
        sum_rouge_l_f = 0

        for idx in range(size_doc):
            score_1 = rouge_n(summarizer.lsa_evaluated_sentences[idx],
                              summarizer.label_sentences[idx],
                              n=1)
            score_2 = rouge_n(summarizer.lsa_evaluated_sentences[idx],
                              summarizer.label_sentences[idx])
            score_l = rouge_l_summary_level(
                summarizer.lsa_evaluated_sentences[idx],
                summarizer.label_sentences[idx])

            # print(score_1)
            # print(score_2)
            # print(score_l)

            sum_rouge_1_p += score_1['p']
            sum_rouge_1_r += score_1['r']
            sum_rouge_1_f += score_1['f']

            sum_rouge_2_p += score_2['p']
            sum_rouge_2_r += score_2['r']
            sum_rouge_2_f += score_2['f']

            sum_rouge_l_p += score_l['p']

示例#3

0

显示文件

文件： rouge.py 项目： jacob-sycoff/LSTM-Financial-News-Extractive-Summarization

class Rouge:
    DEFAULT_METRICS = ["rouge-1", "rouge-2", "rouge-l"]
    AVAILABLE_METRICS = {
        "rouge-1":
        lambda hyp, ref, **k: rouge_score.rouge_n(hyp, ref, 1, **k),
        "rouge-2":
        lambda hyp, ref, **k: rouge_score.rouge_n(hyp, ref, 2, **k),
        "rouge-l":
        lambda hyp, ref, **k: rouge_score.rouge_l_summary_level(hyp, ref, **k),
    }
    DEFAULT_STATS = ["f", "p", "r"]
    AVAILABLE_STATS = ["f", "p", "r"]

    def __init__(self,
                 metrics=None,
                 stats=None,
                 return_lengths=False,
                 raw_results=False,
                 exclusive=False):
        self.return_lengths = return_lengths
        self.raw_results = raw_results
        self.exclusive = exclusive

        if metrics is not None:
            self.metrics = [m.lower() for m in metrics]

            for m in self.metrics:
                if m not in Rouge.AVAILABLE_METRICS:
                    raise ValueError("Unknown metric '%s'" % m)
        else:
            self.metrics = Rouge.DEFAULT_METRICS

        if self.raw_results:
            self.stats = ["hyp", "ref", "overlap"]
        else:
            if stats is not None:
                self.stats = [s.lower() for s in stats]

                for s in self.stats:
                    if s not in Rouge.AVAILABLE_STATS:
                        raise ValueError("Unknown stat '%s'" % s)
            else:
                self.stats = Rouge.DEFAULT_STATS

    def get_scores(self, hyps, refs, avg=False, ignore_empty=False):
        # HERE
        if isinstance(hyps, six.string_types):
            hyps, refs = [hyps], [refs]

        if ignore_empty:
            # Filter out hyps of 0 length
            hyps_and_refs = zip(hyps, refs)
            hyps_and_refs = [
                _ for _ in hyps_and_refs if len(_[0]) > 0 and len(_[1]) > 0
            ]
            hyps, refs = zip(*hyps_and_refs)

        assert (isinstance(hyps, type(refs)))
        assert (len(hyps) == len(refs))

        if not avg:
            return self._get_scores(hyps, refs)
        return self._get_avg_scores(hyps, refs)

    def _get_scores(self, hyps, refs):
        scores = []
        for hyp, ref in zip(hyps, refs):
            sen_score = {}

            hyp = [" ".join(_.split()) for _ in hyp.split(".") if len(_) > 0]
            ref = [" ".join(_.split()) for _ in ref.split(".") if len(_) > 0]

            for m in self.metrics:
                fn = Rouge.AVAILABLE_METRICS[m]
                sc = fn(hyp,
                        ref,
                        raw_results=self.raw_results,
                        exclusive=self.exclusive)
                sen_score[m] = {s: sc[s] for s in self.stats}

            if self.return_lengths:
                lengths = {
                    "hyp": len(" ".join(hyp).split()),
                    "ref": len(" ".join(ref).split())
                }
                sen_score["lengths"] = lengths
            scores.append(sen_score)
        return scores

    def _get_avg_scores(self, hyps, refs):
        # FinRouge: MODIFICATIONS MADE HERE TO IMPLEMENT FinRouge
        double = False
        matches = 0
        for word in [
                'corp', 'corporation', 'company', 'stock', 'share', 'shares',
                'stakeholder', 'financial', '$', 'economy', 'booming', 'bust',
                'quarter', 'earnings', 'shareholder', 'appoints', 'appointed',
                'reports'
        ]:
            for sent in hyps:
                if word in sent.split():
                    double = True
                    matches += 1

        scores = {m: {s: 0 for s in self.stats} for m in self.metrics}
        if self.return_lengths:
            scores["lengths"] = {"hyp": 0, "ref": 0}

        count = 0
        for (hyp, ref) in zip(hyps, refs):
            hyp = [" ".join(_.split()) for _ in hyp.split(".") if len(_) > 0]
            ref = [" ".join(_.split()) for _ in ref.split(".") if len(_) > 0]

            for m in self.metrics:
                fn = Rouge.AVAILABLE_METRICS[m]
                sc = fn(hyp, ref, exclusive=self.exclusive)
                scores[m] = {s: scores[m][s] + sc[s] for s in self.stats}

            if self.return_lengths:
                scores["lengths"]["hyp"] += len(" ".join(hyp).split())
                scores["lengths"]["ref"] += len(" ".join(ref).split())

            count += 1

        if not double:
            avg_scores = {
                m: {s: scores[m][s] / count
                    for s in self.stats}
                for m in self.metrics
            }
        elif double:
            avg_scores = {
                m: {s: (scores[m][s] + matches) / count
                    for s in self.stats}
                for m in self.metrics
            }

        if self.return_lengths and not double:
            avg_scores["lengths"] = {
                k: scores["lengths"][k] / count
                for k in ["hyp", "ref"]
            }
        elif self.return_lengths and double:
            avg_scores["lengths"] = {
                k: (scores["lengths"][k] + matches) / count
                for k in ["hyp", "ref"]
            }

        return avg_scores

示例#4

0

显示文件

文件： rouge.py 项目： toledoam/AmazonReviewSummarizer

class Rouge:
    DEFAULT_METRICS = ["rouge-1", "rouge-2", "rouge-l"]
    AVAILABLE_METRICS = {
        "rouge-1":
        lambda hyp, ref, **k: rouge_score.rouge_n(hyp, ref, 1, **k),
        "rouge-2":
        lambda hyp, ref, **k: rouge_score.rouge_n(hyp, ref, 2, **k),
        "rouge-l":
        lambda hyp, ref, **k: rouge_score.rouge_l_summary_level(hyp, ref, **k),
    }
    DEFAULT_STATS = ["f", "p", "r"]
    AVAILABLE_STATS = ["f", "p", "r"]

    def __init__(self,
                 metrics=None,
                 stats=None,
                 return_lengths=False,
                 raw_results=False,
                 exclusive=False):
        self.return_lengths = return_lengths
        self.raw_results = raw_results
        self.exclusive = exclusive

        if metrics is not None:
            self.metrics = [m.lower() for m in metrics]

            for m in self.metrics:
                if m not in Rouge.AVAILABLE_METRICS:
                    raise ValueError("Unknown metric '%s'" % m)
        else:
            self.metrics = Rouge.DEFAULT_METRICS

        if self.raw_results:
            self.stats = ["hyp", "ref", "overlap"]
        else:
            if stats is not None:
                self.stats = [s.lower() for s in stats]

                for s in self.stats:
                    if s not in Rouge.AVAILABLE_STATS:
                        raise ValueError("Unknown stat '%s'" % s)
            else:
                self.stats = Rouge.DEFAULT_STATS

    def get_scores(self, hyps, refs, avg=False, ignore_empty=False):
        if isinstance(hyps, six.string_types):
            hyps, refs = [hyps], [refs]

        if ignore_empty:
            # Filter out hyps of 0 length
            hyps_and_refs = zip(hyps, refs)
            hyps_and_refs = [
                _ for _ in hyps_and_refs if len(_[0]) > 0 and len(_[1]) > 0
            ]
            hyps, refs = zip(*hyps_and_refs)

        assert (isinstance(hyps, type(refs)))
        assert (len(hyps) == len(refs))

        if not avg:
            return self._get_scores(hyps, refs)
        return self._get_avg_scores(hyps, refs)

    def _get_scores(self, hyps, refs):
        scores = []
        for hyp, ref in zip(hyps, refs):
            sen_score = {}

            hyp = [" ".join(_.split()) for _ in hyp.split(".") if len(_) > 0]
            ref = [" ".join(_.split()) for _ in ref.split(".") if len(_) > 0]

            for m in self.metrics:
                fn = Rouge.AVAILABLE_METRICS[m]
                sc = fn(hyp,
                        ref,
                        raw_results=self.raw_results,
                        exclusive=self.exclusive)
                sen_score[m] = {s: sc[s] for s in self.stats}

            if self.return_lengths:
                lengths = {
                    "hyp": len(" ".join(hyp).split()),
                    "ref": len(" ".join(ref).split())
                }
                sen_score["lengths"] = lengths
            scores.append(sen_score)
        return scores

    def _get_avg_scores(self, hyps, refs):
        scores = {m: {s: 0 for s in self.stats} for m in self.metrics}
        if self.return_lengths:
            scores["lengths"] = {"hyp": 0, "ref": 0}

        count = 0
        for (hyp, ref) in zip(hyps, refs):
            hyp = [" ".join(_.split()) for _ in hyp.split(".") if len(_) > 0]
            ref = [" ".join(_.split()) for _ in ref.split(".") if len(_) > 0]

            for m in self.metrics:
                fn = Rouge.AVAILABLE_METRICS[m]
                sc = fn(hyp, ref, exclusive=self.exclusive)
                scores[m] = {s: scores[m][s] + sc[s] for s in self.stats}

            if self.return_lengths:
                scores["lengths"]["hyp"] += len(" ".join(hyp).split())
                scores["lengths"]["ref"] += len(" ".join(ref).split())

            count += 1
        avg_scores = {
            m: {s: scores[m][s] / count
                for s in self.stats}
            for m in self.metrics
        }

        if self.return_lengths:
            avg_scores["lengths"] = {
                k: scores["lengths"][k] / count
                for k in ["hyp", "ref"]
            }

        return avg_scores

示例#5

0

显示文件

class Rouge:
    DEFAULT_METRICS = ["rouge-1", "rouge-2", "rouge-l"]
    AVAILABLE_METRICS = {
        "rouge-1": lambda hyp, ref: rouge_score.rouge_n(hyp, ref, 1),
        "rouge-2": lambda hyp, ref: rouge_score.rouge_n(hyp, ref, 2),
        "rouge-l":
        lambda hyp, ref: rouge_score.rouge_l_summary_level(hyp, ref),
    }
    DEFAULT_STATS = ["f", "p", "r"]
    AVAILABLE_STATS = ["f", "p", "r"]

    def __init__(self,
                 metrics=None,
                 stats=None,
                 stem=False,
                 remove_stop=False):
        self.metrics = metrics if metrics is not None \
            else Rouge.DEFAULT_METRICS
        self.stats = stats if stats is not None \
            else Rouge.DEFAULT_STATS

        self.rouge_calc = RougeCalculator(stopwords=remove_stop,
                                          stemming=stem,
                                          lang="en")

        for m in self.metrics:
            if m not in Rouge.AVAILABLE_METRICS:
                raise ValueError("Unknown metric '%s'" % m)

        for s in self.stats:
            if s not in Rouge.AVAILABLE_STATS:
                raise ValueError("Unknown stat '%s'" % s)

    def get_scores(self, hyps, refs, avg=False):
        if isinstance(hyps, six.string_types):
            hyps, refs = [hyps], [refs]

        assert (type(hyps) == type(refs))
        assert (len(hyps) == len(refs))

        if not avg:
            return self._get_scores(hyps, refs)
        return self._get_avg_scores(hyps, refs)

    def _get_scores(self, hyps, refs):
        scores = []
        for hyp, ref in zip(hyps, refs):
            sen_score = {}
            hyp = [" ".join(_.split()) for _ in hyp.split(".") if len(_) > 0]
            ref = [" ".join(_.split()) for _ in ref.split(".") if len(_) > 0]

            for m in self.metrics:
                fn = Rouge.AVAILABLE_METRICS[m]
                sc = fn(hyp, ref)
                sen_score[m] = {s: sc[s] for s in self.stats}
            scores.append(sen_score)
        return scores

    def _get_avg_scores(self, hyps, refs):
        scores = {m: {s: 0 for s in self.stats} for m in self.metrics}

        count = 0
        for (hyp, ref) in zip(hyps, refs):

            hyp = [
                " ".join(_.split()).strip() for _ in hyp.split(" . ")
                if len(_) > 0
            ]
            ref = [
                " ".join(_.split()).strip() for _ in ref.split(" . ")
                if len(_) > 0
            ]

            hyp = [" ".join(self.rouge_calc.tokenize(h, False)) for h in hyp]
            ref = [" ".join(self.rouge_calc.tokenize(r, True)) for r in ref]

            for m in self.metrics:
                fn = Rouge.AVAILABLE_METRICS[m]
                sc = fn(hyp, ref)
                scores[m] = {s: scores[m][s] + sc[s] for s in sc}
            count += 1
        scores = {
            m: {s: scores[m][s] / count
                for s in scores[m]}
            for m in scores
        }
        return scores

示例#6

0

显示文件

文件： rouge.py 项目： yikangshen/rouge

class Rouge:
    DEFAULT_METRICS = ["rouge-1", "rouge-2", "rouge-l"]
    AVAILABLE_METRICS = {
        "rouge-1": lambda hyp, ref: rouge_score.rouge_n(hyp, ref, 1),
        "rouge-2": lambda hyp, ref: rouge_score.rouge_n(hyp, ref, 2),
        "rouge-l":
        lambda hyp, ref: rouge_score.rouge_l_summary_level(hyp, ref),
    }

    DEFAULT_STATS = ["f", "p", "r"]
    AVAILABLE_STATS = {"f": 0, "p": 1, "r": 2}

    def __init__(self, metrics=None, stats=None):
        self.metrics = metrics if metrics is not None else Rouge.DEFAULT_METRICS
        self.stats = stats if stats is not None else Rouge.DEFAULT_STATS

        for m in self.metrics:
            if m not in Rouge.AVAILABLE_METRICS:
                raise ValueError("Unknown metric '%s'" % m)

        for s in self.stats:
            if s not in Rouge.AVAILABLE_STATS:
                raise ValueError("Unknown stat '%s'" % s)

    def get_scores(self, hyps, refs, avg=False):
        if type(hyps) == str:
            hyps, refs = [hyps], [refs]

        assert (type(hyps) == type(refs))
        assert (len(hyps) == len(refs))

        if not avg:
            return self._get_scores(hyps, refs)
        return self._get_avg_scores(hyps, refs)

    def _get_scores(self, hyps, refs):
        scores = []
        for hyp, ref in zip(hyps, refs):
            sen_score = {}
            for m in self.metrics:
                fn = Rouge.AVAILABLE_METRICS[m]
                sc = fn(hyp, ref)
                sen_score[m] = {
                    s: sc[Rouge.AVAILABLE_STATS[s]]
                    for s in self.stats
                }
            scores.append(sen_score)
        return scores

    def _get_avg_scores(self, hyps, refs):
        scores = {}
        for m in self.metrics:
            fn = Rouge.AVAILABLE_METRICS[m]
            sc = [fn(hyp, ref) for hyp, ref in zip(hyps, refs)]
            sc = [[sen_sc[Rouge.AVAILABLE_STATS[s]] for s in self.stats]
                  for sen_sc in sc]
            scores[m] = {
                s: st
                for s, st in zip(self.stats, tuple(map(np.mean, zip(*sc))))
            }
        return scores

示例#7

0

显示文件

class Rouge:
    DEFAULT_METRICS = ["rouge-1", "rouge-2", "rouge-l"]
    AVAILABLE_METRICS = {
        "rouge-1": lambda hyp, ref, **k: rouge_score.rouge_n(hyp, ref, 1, **k),
        "rouge-2": lambda hyp, ref, **k: rouge_score.rouge_n(hyp, ref, 2, **k),
        "rouge-l": lambda hyp, ref, **k:
            rouge_score.rouge_l_summary_level(hyp, ref, **k),
    }
    DEFAULT_STATS = ["f", "p", "r"]
    AVAILABLE_STATS = ["f", "p", "r"]
    
    def __init__(self, metrics=None, stats=None, return_lengths=False,
                 raw_results=False, exclusive=False):
        self.return_lengths = return_lengths
        self.raw_results = raw_results
        self.exclusive = exclusive
        if metrics is not None:
            self.metrics = [m.lower() for m in metrics]

            for m in self.metrics:
                if m not in Rouge.AVAILABLE_METRICS:
                    raise ValueError("Unknown metric '%s'" % m)
        else:
            self.metrics = Rouge.DEFAULT_METRICS

        if self.raw_results:
            self.stats = ["hyp", "ref", "overlap"]
        else:
            if stats is not None:
                self.stats = [s.lower() for s in stats]

                for s in self.stats:
                    if s not in Rouge.AVAILABLE_STATS:
                        raise ValueError("Unknown stat '%s'" % s)
            else:
                self.stats = Rouge.DEFAULT_STATS

    def get_scores(self, hyps, refs, avg=False, ignore_empty=False, scoring=None):
        if isinstance(hyps, six.string_types):
            hyps, refs = [hyps], [refs]

        if ignore_empty:
            # Filter out hyps of 0 length
            hyps_and_refs = zip(hyps, refs)
            hyps_and_refs = [_ for _ in hyps_and_refs
                             if len(_[0]) > 0
                             and len(_[1]) > 0]
            hyps, refs = zip(*hyps_and_refs)

        assert(isinstance(hyps, type(refs)))
        assert(len(hyps) == len(refs))

        if not avg:
            return self._get_scores(hyps, refs)
        # ave のほうへ一旦追加
        return self._get_avg_scores(hyps, refs, scoring)

    def _get_scores(self, hyps, refs):
        scores = []
        for hyp, ref in zip(hyps, refs):
            sen_score = {}
            hyp = [" ".join(_.split()) for _ in hyp.split(".") if len(_) > 0]
            ref = [" ".join(_.split()) for _ in ref.split(".") if len(_) > 0]

            for m in self.metrics:
                fn = Rouge.AVAILABLE_METRICS[m]
                sc = fn(
                    hyp,
                    ref,
                    raw_results=self.raw_results,
                    exclusive=self.exclusive)
                sen_score[m] = {s: sc[s] for s in self.stats}

            if self.return_lengths:
                lengths = {
                    "hyp": len(" ".join(hyp).split()),
                    "ref": len(" ".join(ref).split())
                }
                sen_score["lengths"] = lengths
            scores.append(sen_score)
        return scores

    def _get_avg_scores(self, hyps, refs, scoring=None):
        scores = {m: {s: 0 for s in self.stats} for m in self.metrics}
        if self.return_lengths:
            scores["lengths"] = {"hyp": 0, "ref": 0}
        count = 0
        for (hyp, ref) in zip(hyps, refs):
            if scoring in ["A", "B"]:
                partial_scores = {m: {s: [] for s in self.stats} for m in self.metrics}
                hyp = [" ".join(_.split()) for _ in hyp.split(".") if len(_) > 0]
                partial_ref_length = []
                # 正解データが複数<TAB>で分割されている想定
                for r in ref.split("\t"):
                    partial_ref = [" ".join(_.split()) for _ in r.split(".") if len(_) > 0]
                    partial_ref_length.append(len(" ".join(partial_ref).split()))
                    for m in self.metrics:
                        fn = Rouge.AVAILABLE_METRICS[m]
                        sc = fn(hyp, partial_ref, exclusive=self.exclusive)
                        for s in self.stats:
                            partial_scores[m][s].append(sc[s])
        
                # スコアのマージ(ave:A)
                if scoring == "A":
                    for m in self.metrics:
                        for s in self.stats:
                            scores[m][s] += sum(partial_scores[m][s]) / len(partial_scores[m][s])

                    if self.return_lengths:
                        scores["lengths"]["hyp"] += len(" ".join(hyp).split())
                        scores["lengths"]["ref"] += sum(partial_ref_length) / len(partial_ref_length)

                else:
                    # スコアのマージ(best:B)
                    # ベストを選択する場合にソートkey が必要なので一旦
                    # `rouge-l` の `f` が一番高いものとする
                    max_val = max(partial_scores["rouge-l"]["f"])
                    max_idx = partial_scores["rouge-l"]["f"].index(max_val)
                    for m in self.metrics:
                        for s in self.stats:
                            scores[m][s] += partial_scores[m][s][max_idx]

                    if self.return_lengths:
                        scores["lengths"]["hyp"] += len(" ".join(hyp).split())
                        scores["lengths"]["ref"] += partial_ref_length[max_idx]
            else:
                # scoring 指定なしの場合に
                # ref が<TAB>で複数ある場合は先頭を利用する
                ref = ref.split("\t")[0]
                hyp = [" ".join(_.split()) for _ in hyp.split(".") if len(_) > 0]
                ref = [" ".join(_.split()) for _ in ref.split(".") if len(_) > 0]
                for m in self.metrics:
                    fn = Rouge.AVAILABLE_METRICS[m]
                    sc = fn(hyp, ref, exclusive=self.exclusive)
                    scores[m] = {s: scores[m][s] + sc[s] for s in self.stats}

                if self.return_lengths:
                    scores["lengths"]["hyp"] += len(" ".join(hyp).split())
                    scores["lengths"]["ref"] += len(" ".join(ref).split())

            count += 1
        
        avg_scores = {
            m: {s: scores[m][s] / count for s in self.stats}
            for m in self.metrics
        }

        if self.return_lengths:
            avg_scores["lengths"] = {
                k: scores["lengths"][k] / count
                for k in ["hyp", "ref"]
            }
        return avg_scores