def evaluate_on_test_set(test_essay_feats, out_predictions_file, out_predicted_margins_file, out_metrics_file, out_categories_file):

    test_feats, test_tags = flatten_to_wordlevel_feat_tags(test_essay_feats)
    wd_test_ys_bytag = get_wordlevel_ys_by_code(test_tags, wd_train_tags)
    test_x = feature_transformer.transform(test_feats)
    """ TEST Tagger """
    test_wd_predictions_by_code = test_classifier_per_code(test_x, tag2word_classifier, wd_test_tags)
    print "\nRunning Sentence Model"
    """ SENTENCE LEVEL PREDICTIONS FROM STACKING """
    sent_test_xs, sent_test_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(sent_input_feat_tags,
                                                                                         sent_input_interaction_tags,
                                                                                         test_essay_feats, test_x,
                                                                                         wd_test_ys_bytag,
                                                                                         tag2word_classifier,
                                                                                         SPARSE_SENT_FEATS, LOOK_BACK)
    """ Test Stack Classifier """
    test_sent_predictions_by_code \
        = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags)
    if USE_SVM:
        test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier,
                                                                   sent_output_train_test_tags,
                                                                   predict_fn=decision_function_for_tag)
    else:
        test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier,
                                                                   sent_output_train_test_tags,
                                                                   predict_fn=probability_for_tag)

    """ Write out the predicted classes """
    with open(out_predictions_file, "w+") as f_output_file:
        f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n")
        predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats,
                            regular_tags + sent_output_train_test_tags)
    with open(out_predicted_margins_file, "w+") as f_output_file:
        f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n")
        predictions_to_file(f_output_file, sent_test_ys_bycode, test_decision_functions_by_code, test_essay_feats,
                            regular_tags + sent_output_train_test_tags, output_confidence=True)
    """ Write out the accuracy metrics """
    train_wd_metrics = ResultsProcessor.compute_mean_metrics(wd_td_ys_bytag, train_wd_predictions_by_code)
    test_wd_metrics = ResultsProcessor.compute_mean_metrics(wd_test_ys_bytag, test_wd_predictions_by_code)
    train_sent_metrics = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code)
    test_sent_metrics = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code)
    with open(out_metrics_file, "w+") as f_metrics_file:
        s = ""
        pad = ResultsProcessor.pad_str
        s += ResultsProcessor.metrics_to_string(train_wd_metrics, test_wd_metrics,
                                                "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test")))
        s += ResultsProcessor.metrics_to_string(train_sent_metrics, test_sent_metrics,
                                                "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test")))
        f_metrics_file.write(s)
        write_categories(out_predictions_file, "CB", out_categories_file)
        print s
def evaluate_ranker(model, xs, essay2crels, ys_bytag, set_cr_tags):
    clone = model.clone()
    if hasattr(model, "average_weights"):
        clone.average_weights()
    pred_ys_bytag = defaultdict(list)
    ename2inps = dict()
    for parser_input in xs:
        ename2inps[parser_input.essay_name] = parser_input

    for ename, act_crels in essay2crels.items():
        if ename not in ename2inps:
            # no predicted crels for this essay
            highest_ranked = set()
        else:
            parser_input = ename2inps[ename]
            ixs = clone.rank(parser_input.all_feats_array)
            highest_ranked = parser_input.all_parses[ixs[0]]  # type: Tuple[str]

        add_cr_labels(set(highest_ranked), pred_ys_bytag, set_cr_tags)

    mean_metrics = ResultsProcessor.compute_mean_metrics(ys_bytag, pred_ys_bytag)
    df = get_micro_metrics(metrics_to_df(mean_metrics))
    return df
if USE_SVM:
    test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=decision_function_for_tag)
else:
    test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=probability_for_tag)

""" Write out the predicted classes """
with open(out_predictions_file, "w+") as f_output_file:
    f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n")
    predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags)

with open(out_predicted_margins_file, "w+") as f_output_file:
    f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n")
    predictions_to_file(f_output_file, sent_test_ys_bycode, test_decision_functions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags, output_confidence=True)

""" Write out the accuracy metrics """
train_wd_metrics    = ResultsProcessor.compute_mean_metrics(wd_td_ys_bytag, train_wd_predictions_by_code)
test_wd_metrics     = ResultsProcessor.compute_mean_metrics(wd_test_ys_bytag, test_wd_predictions_by_code)

train_sent_metrics  = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code)
test_sent_metrics   = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code)

with open(out_metrics_file, "w+") as f_metrics_file:
    s = ""
    pad = ResultsProcessor.pad_str
    s += ResultsProcessor.metrics_to_string(train_wd_metrics,   test_wd_metrics,   "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test")))
    s += ResultsProcessor.metrics_to_string(train_sent_metrics, test_sent_metrics, "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test")))
    f_metrics_file.write(s)
    print s
    #TODO - need to add logic here for GW
    #write_categories(out_predictions_file, "CB", out_categories_file)
示例#4
0
def evaluate_on_test_set(test_essay_feats, out_predictions_file,
                         out_predicted_margins_file, out_metrics_file,
                         out_categories_file):

    test_feats, test_tags = flatten_to_wordlevel_feat_tags(test_essay_feats)
    wd_test_ys_bytag = get_wordlevel_ys_by_code(test_tags, wd_train_tags)
    test_x = feature_transformer.transform(test_feats)
    """ TEST Tagger """
    test_wd_predictions_by_code = test_classifier_per_code(
        test_x, tag2word_classifier, wd_test_tags)
    print "\nRunning Sentence Model"
    """ SENTENCE LEVEL PREDICTIONS FROM STACKING """
    sent_test_xs, sent_test_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(
        sent_input_feat_tags, sent_input_interaction_tags, test_essay_feats,
        test_x, wd_test_ys_bytag, tag2word_classifier, SPARSE_SENT_FEATS,
        LOOK_BACK)
    """ Test Stack Classifier """
    test_sent_predictions_by_code \
        = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags)
    if USE_SVM:
        test_decision_functions_by_code = test_classifier_per_code(
            sent_test_xs,
            tag2sent_classifier,
            sent_output_train_test_tags,
            predict_fn=decision_function_for_tag)
    else:
        test_decision_functions_by_code = test_classifier_per_code(
            sent_test_xs,
            tag2sent_classifier,
            sent_output_train_test_tags,
            predict_fn=probability_for_tag)
    """ Write out the predicted classes """
    with open(out_predictions_file, "w+") as f_output_file:
        f_output_file.write(
            "Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n")
        predictions_to_file(f_output_file, sent_test_ys_bycode,
                            test_sent_predictions_by_code, test_essay_feats,
                            regular_tags + sent_output_train_test_tags)
    with open(out_predicted_margins_file, "w+") as f_output_file:
        f_output_file.write(
            "Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n"
        )
        predictions_to_file(f_output_file,
                            sent_test_ys_bycode,
                            test_decision_functions_by_code,
                            test_essay_feats,
                            regular_tags + sent_output_train_test_tags,
                            output_confidence=True)
    """ Write out the accuracy metrics """
    train_wd_metrics = ResultsProcessor.compute_mean_metrics(
        wd_td_ys_bytag, train_wd_predictions_by_code)
    test_wd_metrics = ResultsProcessor.compute_mean_metrics(
        wd_test_ys_bytag, test_wd_predictions_by_code)
    train_sent_metrics = ResultsProcessor.compute_mean_metrics(
        sent_test_ys_bycode, test_sent_predictions_by_code)
    test_sent_metrics = ResultsProcessor.compute_mean_metrics(
        sent_test_ys_bycode, test_sent_predictions_by_code)
    with open(out_metrics_file, "w+") as f_metrics_file:
        s = ""
        pad = ResultsProcessor.pad_str
        s += ResultsProcessor.metrics_to_string(
            train_wd_metrics, test_wd_metrics,
            "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test")))
        s += ResultsProcessor.metrics_to_string(
            train_sent_metrics, test_sent_metrics,
            "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test")))
        f_metrics_file.write(s)
        write_categories(out_predictions_file, "CB", out_categories_file)
        print s
def get_metrics_raw(essays, expected_tags, micro_only=False):
    act_ys_bycode  = ResultsProcessor.get_wd_level_lbs(essays,  expected_tags=expected_tags)
    pred_ys_bycode = get_wd_level_preds(essays, expected_tags=expected_tags)
    mean_metrics = ResultsProcessor.compute_mean_metrics(act_ys_bycode, pred_ys_bycode)
    return mean_metrics
示例#6
0
    predictions_to_file(f_output_file, sent_test_ys_bycode,
                        test_sent_predictions_by_code, test_essay_feats,
                        regular_tags + sent_output_train_test_tags)

with open(out_predicted_margins_file, "w+") as f_output_file:
    f_output_file.write(
        "Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n"
    )
    predictions_to_file(f_output_file,
                        sent_test_ys_bycode,
                        test_decision_functions_by_code,
                        test_essay_feats,
                        regular_tags + sent_output_train_test_tags,
                        output_confidence=True)
""" Write out the accuracy metrics """
train_wd_metrics = ResultsProcessor.compute_mean_metrics(
    wd_td_ys_bytag, train_wd_predictions_by_code)
test_wd_metrics = ResultsProcessor.compute_mean_metrics(
    wd_test_ys_bytag, test_wd_predictions_by_code)

train_sent_metrics = ResultsProcessor.compute_mean_metrics(
    sent_test_ys_bycode, test_sent_predictions_by_code)
test_sent_metrics = ResultsProcessor.compute_mean_metrics(
    sent_test_ys_bycode, test_sent_predictions_by_code)

with open(out_metrics_file, "w+") as f_metrics_file:
    s = ""
    pad = ResultsProcessor.pad_str
    s += ResultsProcessor.metrics_to_string(
        train_wd_metrics, test_wd_metrics,
        "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test")))
    s += ResultsProcessor.metrics_to_string(
test_folds = [(pred_tagged_essays_train, pred_tagged_essays_test)]  # type: List[Tuple[Any,Any]]
cv_folds = cross_validation(pred_tagged_essays_train, CV_FOLDS)  # type: List[Tuple[Any,Any]]

result_test_essay_level = evaluate_model_essay_level(
    folds=cv_folds,
    extractor_fn_names_lst=best_extractor_names,
    all_extractor_fns=all_extractor_fns,
    ngrams=ngrams,
    beta=beta,
    stemmed=stemmed,
    down_sample_rate=1.0,
    max_epochs=max_epochs)

models, cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag, cv_td_preds_by_sent, cv_sent_vd_ys_by_tag = result_test_essay_level

mean_metrics = ResultsProcessor.compute_mean_metrics(cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag)
print(get_micro_metrics(metrics_to_df(mean_metrics)))

models, cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag, cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag = result_test_essay_level

mean_metrics = ResultsProcessor.compute_mean_metrics(cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag)
print(get_micro_metrics(metrics_to_df(mean_metrics)))

result_final_test = evaluate_model_essay_level(
    folds=test_folds,
    extractor_fn_names_lst=best_extractor_names,
    all_extractor_fns=all_extractor_fns,
    ngrams=ngrams,
    beta=beta,
    stemmed=stemmed,
    down_sample_rate=1.0,