Пример #1
0
def get_bot_accuracies(bot, scored_qa_pairs=None, min_qa_bot_confidence=.2):
    """ Compare answers from bot to answers in test set

    >>> from qary.skills import glossary_bots
    >>> bot = glossary_bots.Bot()
    >>> scored_qa_pairs = [dict(question='What is RMSE?', answer='Root Mean Square Error', score=.9, topic='ds')]
    >>> get_bot_accuracies(bot=bot, scored_qa_pairs=scored_qa_pairs)[0]['bot_accuracy']
    1.0
    >>> scored_qa_pairs = [dict(question='What is RMSE?', answer='root-mean-sqr-error', score=.9, topic='ds')]
    >>> get_bot_accuracies(bot=bot, scored_qa_pairs=scored_qa_pairs)[0]
    {'question': 'What is RMSE?',
     'answer': 'root-mean-sqr-error',
     'score': 0.9,
     'topic': 'ds',
     'bot_answer': 'Root Mean Square Error',
     'bot_w2v_similarity': 0.64...,
     'bot_ed_distance': 0.52...,
     'bot_ed_distance_low': 0.31...,
     'bot_ed_distance_folded': 0.15...,
     'bot_accuracy': 0.65...}
    """
    scored_qa_pairs = load_qa_dataset(
    ) if scored_qa_pairs is None else scored_qa_pairs
    scored_qa_pairs = load_qa_dataset(scored_qa_pairs) if isinstance(
        scored_qa_pairs, str) else scored_qa_pairs
    validated_qa_pairs = []
    for truth in scored_qa_pairs:
        texts = scrape_wikipedia.find_document_texts(topic=truth['topic'],
                                                     max_results=10)
        for context in texts:
            bot.reset_context(context)
            replies = sorted(bot.reply(truth['question']))
            if len(replies) and sorted(replies)[-1][0] > min_qa_bot_confidence:
                break
        replies = replies or [(0, "Sorry, I don't know.")]
        truth['bot_answer'] = replies[-1][1]
        truth['bot_w2v_similarity'] = nlp(truth['bot_answer']).similarity(
            nlp(truth['answer']))
        truth['bot_ed_distance'] = distance(
            truth['answer'], truth['bot_answer']) / len(truth['answer'])
        truth['bot_ed_distance_low'] = distance(
            truth['answer'].lower().strip(),
            truth['bot_answer'].lower().strip()) / len(truth['answer'].strip())
        truth['bot_ed_distance_folded'] = distance(
            fold_characters(truth['answer']),
            fold_characters(truth['bot_answer'])) / len(
                truth['answer'].strip())
        truth['bot_accuracy'] = .5 * truth['bot_w2v_similarity'] + .5 * (
            1 - (truth['bot_ed_distance'] + truth['bot_ed_distance_low'] +
                 truth['bot_ed_distance_folded']) / 3)
        validated_qa_pairs.append(dict(truth))

    return validated_qa_pairs
Пример #2
0
def load(domains=FAQ_DOMAINS):
    """ Load yaml file, use hashtags to create context tags as multihot columns

    Load faq*.yml into dictionary: question: answer

    >>> g = load(domains='dsdh'.split(','))
    >>> len(g['raw']) <= len(g['cleaned']) > 30
    True
    >>> sorted(g['cleaned']['Allele'])
    ['acronym', 'definition', 'hashtags', 'parenthetical']
    """
    # faq_raw = {}
    # q_vectors = pd.DataFrame()
    questions, answers, question_vectors = [], [], []

    faqdirpath = os.path.join(DATA_DIR, 'faq')
    for domain in tqdm(domains):
        for filepath in Path(faqdirpath).glob(f'faq*{domain}*.yml'):
            # filename = filepath.name
            try:
                filepointer = open(filepath)
            except FileNotFoundError as e:
                log.error(f"{e}\n    Unable to find the file path object: {filepath}")
                continue
            with filepointer:
                log.info(f"loading: {filepath.name}\n    with file pointer: {filepointer}")
                try:
                    qa_list = yaml.load(filepointer)
                except ScannerError as e:
                    log.error(f"{e}\n    yaml.load unable to read {filepointer.name}")
                    continue
            for qa_dict in qa_list:
                questions.append(qa_dict.get('Q', qa_dict.get('q', '')))
                answers.append(qa_dict.get('A', qa_dict.get('a', '')))
                try:
                    question_vectors.append(list(nlp(questions[-1] or '').vector))
                except TypeError:
                    question_vectors.append(list(UNKNOWN_WORDVEC))
                    continue
                assert len(UNKNOWN_WORDVEC) == len(question_vectors[-1])

    log.debug(f'len(question_vectors): {len(question_vectors)}')

    questions = np.array(questions)
    log.debug(f'len(questions): {len(questions)}')
    answers = np.array(answers)
    log.debug(f'len(answers): {len(answers)}')
    mask = np.array([(bool(a) and bool(q) and len(str(a).strip()) > 0 and len(str(q).strip()) > 0)
                     for a, q in zip(questions, answers)])

    question_vectors = normalize_docvectors(question_vectors)

    # This should be a Kendra/gensim/annoy class (with methods like .find_similar)
    return dict(
        questions=questions[mask],
        answers=answers[mask],
        question_vectors=np.array([qv for qv, m in zip(question_vectors, mask) if m])
    )
Пример #3
0
def get_bot_accuracies(bot,
                       scored_qa_pairs=None,
                       min_qa_bot_confidence=.2,
                       num_questions=None,
                       shuffle_seed=None):
    """ Compare answers from bot to answers in test set

    >>> from qary.skills import glossary_bots
    >>> bot = glossary_bots.Bot()
    >>> scored_qa_pairs = [dict(question='What is RMSE?', answer='Root Mean Square Error', score=.9, topic='ds')]
    >>> next(get_bot_accuracies(bot=bot, scored_qa_pairs=scored_qa_pairs))['bot_accuracy']
    1.0
    >>> scored_qa_pairs = [dict(question='What is RMSE?', answer='root-mean-sqr-error', score=.9, topic='ds')]
    >>> next(get_bot_accuracies(bot=bot, scored_qa_pairs=scored_qa_pairs))
    {'question': 'What is RMSE?',
     'answer': 'root-mean-sqr-error',
     'score': 0.9,
     'topic': 'ds',
     'bot_answer': 'Root Mean Square Error',
     'bot_w2v_similarity': 0.64...,
     'bot_ed_distance': 0.52...,
     'bot_ed_distance_low': 0.31...,
     'bot_ed_distance_folded': 0.15...,
     'bot_accuracy': 0.65...}
    """
    if scored_qa_pairs is None:
        scored_qa_pairs = load_qa_dataset()
    elif isinstance(scored_qa_pairs, str):
        scored_qa_pairs = load_qa_dataset(scored_qa_pairs)
    if shuffle_seed:
        np.random.seed(shuffle_seed)
        np.random.shuffle(scored_qa_pairs)
    bot_answers = {}
    for i, truth in enumerate(scored_qa_pairs):
        if num_questions and i >= num_questions:
            break
        topic = truth.get('topic')
        if not topic or not truth or not truth['answer'] or not truth[
                'question']:
            continue
        log.warning(f"topic: {truth['topic']}, question: {truth['question']}")
        textgen = scrape_wikipedia.find_article_texts(query=[topic],
                                                      max_articles=5)
        texts = chain(
            textgen,
            scrape_wikipedia.find_article_texts(query=truth['question'],
                                                max_articles=10))

        # TODO: def get_best_bot_answer(bot, question, texts)  # memoize
        bot_answer = bot_answers.get(truth['question'], None)
        if not bot_answer:
            for context in texts:
                bot.reset_context(context)
                replies = sorted(bot.reply(truth['question']))
                if len(replies
                       ) and sorted(replies)[-1][0] > min_qa_bot_confidence:
                    break
            replies = replies or [(0, "Sorry, I don't know.")]
            bot_answer = replies[-1][1]
            bot_answers[truth['question']] = bot_answer
        truth['bot_answer'] = bot_answer
        # END TODO: def get_best_bot_answer(bot, question, texts)

        truth['bot_w2v_similarity'] = nlp(truth['bot_answer']).similarity(
            nlp(truth['answer']))
        truth['bot_ed_distance'] = distance(
            truth['answer'], truth['bot_answer']) / len(truth['answer'])
        truth['bot_ed_distance_low'] = distance(
            truth['answer'].lower().strip(),
            truth['bot_answer'].lower().strip()) / len(truth['answer'].strip())
        truth['bot_ed_distance_folded'] = distance(
            fold_characters(truth['answer']),
            fold_characters(truth['bot_answer'])) / len(
                truth['answer'].strip())
        truth['bot_accuracy'] = .5 * truth['bot_w2v_similarity'] + .5 * (
            1 - (truth['bot_ed_distance'] + truth['bot_ed_distance_low'] +
                 truth['bot_ed_distance_folded']) / 3)
        log.warning(
            f"q: accuracy: {truth['question']}: {truth['bot_accuracy']}")
        yield dict(truth)
Пример #4
0
def test_spacy_language_model():
    assert callable(nlp)
    assert len(list(nlp("Hello world!"))) == 3