def calc_lexical_similarity_metrics(nn_model, train, questions,
                                    tfidf_vectorizer):
    responses_baseline = predict_for_condition_id(nn_model, questions.x)

    for condition, condition_id in nn_model.condition_to_index.items():
        if condition == DEFAULT_CONDITION:
            continue

        responses_token_ids_ground_truth = train.y[train.condition_ids ==
                                                   condition_id]
        if not responses_token_ids_ground_truth.size:
            _logger.warn(
                'No dataset samples found with the given condition "%s", skipping metrics.'
                % condition)
            continue

        responses_ground_truth = transform_token_ids_to_sentences(
            responses_token_ids_ground_truth, nn_model.index_to_token)
        responses = predict_for_condition_id(nn_model, questions.x,
                                             condition_id)

        lex_sim_conditioned_vs_non_conditioned = calculate_lexical_similarity(
            responses, responses_baseline, tfidf_vectorizer)
        lex_sim_conditioned_vs_groundtruth = calculate_lexical_similarity(
            responses, responses_ground_truth, tfidf_vectorizer)

        yield condition, (lex_sim_conditioned_vs_non_conditioned,
                          lex_sim_conditioned_vs_groundtruth)
def calc_lexical_similarity_metrics(nn_model, testset, tfidf_vectorizer):
    """
    For each condition calculate lexical similarity between ground-truth responses and
    generated conditioned responses. Similarly compare ground-truth responses with non-conditioned generated responses.
    If lex_sim(gt, cond_resp) > lex_sim(gt, non_cond_resp), the conditioning on extra information proves to be useful.
    :param nn_model: trained model to evaluate
    :param testset: context-sensitive testset, instance of Dataset
    :param tfidf_vectorizer: instance of scikit-learn TfidfVectorizer, calculates lexical similariry for documents
    according to TF-IDF metric
    :return: two dictionaries:
        {condition: lex_sim(gt, cond_resp)},
        {condition: lex_sim(gt, non_cond_resp)}
    """
    gt_vs_cond_lex_sim, gt_vs_non_cond_lex_sim = {}, {}

    for condition, condition_id in nn_model.condition_to_index.items():
        sample_mask_for_condition = testset.condition_ids == condition_id
        contexts_for_condition = testset.x[sample_mask_for_condition]
        responses_for_condition = testset.y[sample_mask_for_condition]

        if not responses_for_condition.size:
            _logger.warning('No dataset samples found for condition "{}", skip it.'.format(condition))
            continue

        gt_responses = transform_token_ids_to_sentences(responses_for_condition, nn_model.index_to_token)
        conditioned_responses = predict_for_condition_id(nn_model, contexts_for_condition, condition_id)
        non_conditioned_responses = predict_for_condition_id(nn_model, contexts_for_condition, condition_id=None)

        gt_vs_cond_lex_sim[condition] = \
            calculate_lexical_similarity(gt_responses, conditioned_responses, tfidf_vectorizer)

        gt_vs_non_cond_lex_sim[condition] = \
            calculate_lexical_similarity(gt_responses, non_conditioned_responses, tfidf_vectorizer)

    return gt_vs_cond_lex_sim, gt_vs_non_cond_lex_sim
Пример #3
0
def calc_lexical_similarity_metrics(nn_model, train, questions, tfidf_vectorizer):
    responses_baseline = predict_for_condition_id(nn_model, questions.x)

    for condition, condition_id in nn_model.condition_to_index.items():
        if condition == DEFAULT_CONDITION:
            continue

        responses_token_ids_ground_truth = train.y[train.condition_ids == condition_id]
        if not responses_token_ids_ground_truth.size:
            _logger.warn('No dataset samples found with the given condition "%s", skipping metrics.' % condition)
            continue

        responses_ground_truth = transform_token_ids_to_sentences(responses_token_ids_ground_truth,
                                                                  nn_model.index_to_token)
        responses = predict_for_condition_id(nn_model, questions.x, condition_id)

        lex_sim_conditioned_vs_non_conditioned = calculate_lexical_similarity(responses, responses_baseline,
                                                                              tfidf_vectorizer)
        lex_sim_conditioned_vs_groundtruth = calculate_lexical_similarity(responses, responses_ground_truth,
                                                                          tfidf_vectorizer)

        yield condition, (lex_sim_conditioned_vs_non_conditioned, lex_sim_conditioned_vs_groundtruth)