Пример #1
0
    def any_found(self, para):
        words = [normalize_answer(w) for w in flatten_iterable(para)]
        occurances = []
        for answer_ix, answer in enumerate(self.answer_tokens):
            answer = [normalize_answer(w) for w in answer]

            word_starts = [i for i, w in enumerate(words) if answer[0] == w]
            n_tokens = len(answer)
            for start in word_starts:
                end = start + 1
                ans_token = 1
                while ans_token < n_tokens and end < len(words):
                    next = words[end]
                    if answer[ans_token] == next:
                        ans_token += 1
                        end += 1
                    elif next == "":
                        end += 1
                    else:
                        break
                if n_tokens == ans_token:
                    occurances.append((start, end))
        return list(set(occurances))
def evaluate_question_detector(questions,
                               corpus,
                               word_tokenize,
                               detector,
                               reference_detector=None,
                               compute_f1s=False):
    """ Just for debugging """
    n_no_docs = 0
    answer_per_doc = []
    answer_f1s = []

    for question_ix, q in enumerate(tqdm(questions)):
        tokenized_aliases = [
            word_tokenize(x) for x in q.answer.normalized_aliases
        ]
        detector.set_question(tokenized_aliases)

        for doc in q.all_docs:
            doc = corpus.get_document(doc.doc_id)
            if doc is None:
                n_no_docs += 1
                continue

            output = []
            for i, para in enumerate(doc):
                for s, e in detector.any_found(para):
                    output.append((i, s, e))

            if len(output) == 0 and reference_detector is not None:
                if reference_detector is not None:
                    reference_detector.set_question(tokenized_aliases)
                    detected = []
                    for i, para in enumerate(doc):
                        for s, e in reference_detector.any_found(para):
                            detected.append((i, s, e))

                    if len(detected) > 0:
                        print("Found a difference")
                        print(q.answer.normalized_aliases)
                        print(tokenized_aliases)
                        for p, s, e in detected:
                            token = flatten_iterable(doc[p])[s:e]
                            print(token)

            answer_per_doc.append(output)

            if compute_f1s:
                f1s = []
                for p, s, e in output:
                    token = flatten_iterable(doc[p])[s:e]
                    answer = normalize_answer(" ".join(token))
                    f1 = 0
                    for gt in q.answer.normalized_aliases:
                        f1 = max(f1, f1_score(answer, gt))
                    f1s.append(f1)
                answer_f1s.append(f1s)

    n_answers = sum(len(x) for x in answer_per_doc)
    print("Found %d answers (av %.4f)" %
          (n_answers, n_answers / len(answer_per_doc)))
    print("%.4f docs have answers" %
          np.mean([len(x) > 0 for x in answer_per_doc]))
    if len(answer_f1s) > 0:
        print("Average f1 is %.4f" % np.mean(flatten_iterable(answer_f1s)))