示例#1
0
    def eval_transformer(self, subsample: int = False):
        results = {}
        all_questions_ids = get_topics(task=self.task, subset=self.subset)
        all_questions = dict([(int(qid),
                               self.post_parser.map_questions[int(qid)])
                              for qid in all_questions_ids])
        # all_questions = dict([(int(qid), "Question %s content" % qid) for qid in all_questions_ids])
        if subsample:
            all_questions = all_questions[:subsample]

        for i, (qid, question) in enumerate(all_questions.items()):
            results[str(qid)] = {}
            judged_answer_ids = get_judged_documents(task=self.task,
                                                     subset=self.subset,
                                                     topic=str(qid))
            question_e = self.model.encode([question.body], batch_size=8)
            try:
                answers_bodies = [
                    self.post_parser.map_just_answers[int(aid)].body
                    for aid in judged_answer_ids
                ]
                # answers_bodies = ["Answer %s body" % aid for aid in judged_answer_ids]
            except KeyError:
                print("Key error at qid %s" % qid)
                answers_bodies = []
                # answers_bodies = ["Answer %s body" % aid for aid in judged_answer_ids]
            if not answers_bodies:
                print("No evaluated answers for question %s, dtype %s" %
                      (qid, str(type(qid))))
                continue
            answers_e = self.model.encode(answers_bodies, batch_size=8)
            answers_dists = cosine_similarity(np.array(question_e),
                                              np.array(answers_e))[0]

            for aid, answer_sim in sorted(zip(judged_answer_ids,
                                              answers_dists),
                                          key=lambda qid_dist: qid_dist[1],
                                          reverse=True):
                print(aid, answer_sim)
                results[str(qid)][str(aid)] = float(answer_sim)

        return results
示例#2
0
    def index_judged_questions(self, reload_embs_dir=False, subsample_to=2000):
        relevant_qs = dict()
        for relevant_qi in get_judged_documents(task='task1-votes',
                                                subset='validation'):
            try:
                parent_id = self.post_parser.map_just_answers[int(
                    relevant_qi)].parent_id
            except KeyError as e:
                print(
                    "IREvaluator error: judged answer %s was not loaded and can not be evaluated"
                    % relevant_qi)
                raise e
            relevant_qs[parent_id] = self.post_parser.map_questions[parent_id]
        indexed_items = list(relevant_qs.items())
        if subsample_to:
            # assures to choose the same questions every time
            random.seed(1234)
            indexed_items = random.sample(indexed_items, subsample_to)

        self.add_to_index(indexed_items, reload_embs_dir=reload_embs_dir)
def eval_transformer(model_dir: str, preproc: str, subsample: int = False):
    model = SentenceTransformer(model_dir, device=device)

    task = 'task1-votes'
    subset = 'validation'
    results = {}
    all_questions_ids = get_topics(task=task, subset=subset)
    all_questions = get_questions(all_questions_ids, preproc=preproc)
    if subsample:
        all_questions = all_questions[:subsample]

    for i, (qid, question) in enumerate(all_questions):
        results[qid] = {}
        judged_answer_ids = get_judged_documents(task=task,
                                                 subset=subset,
                                                 topic=str(qid))
        question_e = model.encode([question.body], batch_size=8)
        answers_bodies = [
            dr.post_parser.map_just_answers[int(aid)].body
            for aid in judged_answer_ids
        ]
        if not answers_bodies:
            print("No evaluated answers for question %s, dtype %s" %
                  (qid, str(type(qid))))
            continue
        answers_e = model.encode(answers_bodies, batch_size=8)
        answers_dists = cosine_similarity(np.array(question_e),
                                          np.array(answers_e))[0]
        if i % 100 == 0:
            print("Question %s of %s" % (i, len(all_questions)))
        for aid, answer_sim in sorted(zip(judged_answer_ids, answers_dists),
                                      key=lambda qid_dist: qid_dist[1],
                                      reverse=True):
            # print(aid, answer_sim)
            results[qid][aid] = float(answer_sim)

    ndcg_val = get_ndcg(results, task=task, subset=subset)
    return ndcg_val, results
from arqmath_eval import get_topics, get_judged_documents
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm

task = 'task1-votes'
subset = 'validation'
results = {}
all_questions_ids = get_topics(task=task, subset=subset)
all_questions = dict([(int(qid), postproc_parser.map_questions[int(qid)]) for qid in all_questions_ids])

for i, (qid, question) in tqdm(list(enumerate(all_questions.items())), desc="Collecting answers for %s questions" % len(all_questions)):
    results[qid] = {}
    judged_answer_ids = get_judged_documents(task=task, subset=subset, topic=str(qid))
    question_e = model_saved.encode([question.body], batch_size=8)
    answers_bodies = [postproc_parser.map_just_answers[int(aid)].body for aid in judged_answer_ids]
    if not answers_bodies:
        print("No evaluated answers for question %s, dtype %s" % (qid, str(type(qid))))
        continue
    answers_e = model_saved.encode(answers_bodies, batch_size=8)
    answers_dists = cosine_similarity(np.array(question_e), np.array(answers_e))[0]
    for aid, answer_sim in sorted(zip(judged_answer_ids, answers_dists), key=lambda qid_dist: qid_dist[1], reverse=True):
        # print("Q %s, A %s: sim: %s" % (qid, aid, answer_sim))
        results[qid][aid] = float(answer_sim)


def report_ndcg_results(result_tsv_name: str, results: dict):
    with open(result_tsv_name, 'wt') as f:
        for topic, documents in results.items():
            top_documents = sorted(documents.items(), key=lambda x: x[1], reverse=True)[:1000]
            for rank, (document, similarity_score) in enumerate(top_documents):
示例#5
0
def get_common_parameters(result_type, math_representation, datasets,
                          dataset_parameters):
    discard_math = math_representation == 'nomath'
    judged_results = result_type == 'judged'

    if result_type == 'judged':
        topic_corpus_filename = ARQMATH_COLLECTION_POSTS_FILENAMES[
            math_representation if not discard_math else 'latex']
        topic_corpus_num_documents = ARQMATH_COLLECTION_POSTS_NUM_DOCUMENTS
        topic_ids = get_topics(task=TASK[result_type],
                               subset=SUBSET[result_type])
        document_corpus_filename = topic_corpus_filename
        document_corpus_num_documents = topic_corpus_num_documents
        document_ids = get_judged_documents(task=TASK[result_type],
                                            subset=SUBSET[result_type])
        topic_judgements = {
            topic_id: get_judged_documents(task=TASK[result_type],
                                           subset=SUBSET[result_type],
                                           topic=topic_id)
            for topic_id in topic_ids
        }
    else:
        topic_judgements = None
        if result_type == 'task1':
            topic_corpus_filename = ARQMATH_TASK1_POSTS_FILENAMES[
                math_representation if not discard_math else 'latex']
            topic_corpus_num_documents = ARQMATH_TASK1_TEST_POSTS_NUM_DOCUMENTS
            topic_ids = [
                'A.{}'.format(query_number + 1) for query_number in range(100)
                if (query_number + 1) not in (31, 78)
            ]
            document_corpus_filename = ARQMATH_COLLECTION_POSTS_FILENAMES[
                math_representation if not discard_math else 'latex']
            document_corpus_num_documents = ARQMATH_COLLECTION_POSTS_NUM_DOCUMENTS
            document_ids = get_judged_documents(task=TASK[result_type],
                                                subset=SUBSET[result_type])
        elif result_type == 'task2':
            assert not discard_math
            topic_corpus_filename = ARQMATH_TASK2_FORMULAE_FILENAMES[
                math_representation]
            topic_corpus_num_documents = ARQMATH_TASK2_FORMULAE_NUM_FORMULAE[
                math_representation]
            topic_ids = set((
                formula_id,
                *get_judged_documents(
                    task=TASK[result_type],
                    subset=SUBSET[result_type],
                    topic=formula_id,
                ),
            ) for formula_id in get_topics(task=TASK[result_type],
                                           subset=SUBSET[result_type]))
            document_corpus_filename = ARQMATH_COLLECTION_FORMULAE_FILENAMES[
                math_representation]
            document_corpus_num_documents = ARQMATH_COLLECTION_FORMULAE_NUM_FORMULAE[
                math_representation]
            document_ids = None

    if isinstance(datasets, str):
        datasets = [datasets]
    json_filenames = [
        DATASET_JSON_FILENAMES[dataset]
        [math_representation if not discard_math else 'latex']
        for dataset in datasets
    ]
    json_nums_paragraphs = [
        DATASET_NUMS_PARAGRAPHS[dataset] for dataset in datasets
    ]

    dataset_parameters = {**DATASET_DEFAULT_PARAMETERS, **dataset_parameters}
    dataset_formattable_parameter_string = parameters_to_string({
        **dataset_parameters,
        **{
            'phrases': '{}'
        }
    })
    phraser_filename = ARXMLIV_OUTPUT_FILENAME.format(
        math_representation,
        '{}.phraser'.format(dataset_formattable_parameter_string))

    return {
        'judged_results': judged_results,
        'topic_judgements': topic_judgements,
        'json_filenames': json_filenames,
        'json_nums_paragraphs': json_nums_paragraphs,
        'dataset_parameters': dataset_parameters,
        'phraser_filename': phraser_filename,
        'topic_ids': topic_ids,
        'topic_corpus_filename': topic_corpus_filename,
        'topic_corpus_num_documents': topic_corpus_num_documents,
        'document_ids': document_ids,
        'document_corpus_filename': document_corpus_filename,
        'document_corpus_num_documents': document_corpus_num_documents,
        'discard_math': discard_math,
        'result_type': result_type,
    }