def evaluate_on_test_set(test_essay_feats, out_predictions_file, out_predicted_margins_file, out_metrics_file, out_categories_file): test_feats, test_tags = flatten_to_wordlevel_feat_tags(test_essay_feats) wd_test_ys_bytag = get_wordlevel_ys_by_code(test_tags, wd_train_tags) test_x = feature_transformer.transform(test_feats) """ TEST Tagger """ test_wd_predictions_by_code = test_classifier_per_code(test_x, tag2word_classifier, wd_test_tags) print "\nRunning Sentence Model" """ SENTENCE LEVEL PREDICTIONS FROM STACKING """ sent_test_xs, sent_test_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(sent_input_feat_tags, sent_input_interaction_tags, test_essay_feats, test_x, wd_test_ys_bytag, tag2word_classifier, SPARSE_SENT_FEATS, LOOK_BACK) """ Test Stack Classifier """ test_sent_predictions_by_code \ = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags) if USE_SVM: test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=decision_function_for_tag) else: test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=probability_for_tag) """ Write out the predicted classes """ with open(out_predictions_file, "w+") as f_output_file: f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n") predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags) with open(out_predicted_margins_file, "w+") as f_output_file: f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n") predictions_to_file(f_output_file, sent_test_ys_bycode, test_decision_functions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags, output_confidence=True) """ Write out the accuracy metrics """ train_wd_metrics = ResultsProcessor.compute_mean_metrics(wd_td_ys_bytag, train_wd_predictions_by_code) test_wd_metrics = ResultsProcessor.compute_mean_metrics(wd_test_ys_bytag, test_wd_predictions_by_code) train_sent_metrics = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code) test_sent_metrics = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code) with open(out_metrics_file, "w+") as f_metrics_file: s = "" pad = ResultsProcessor.pad_str s += ResultsProcessor.metrics_to_string(train_wd_metrics, test_wd_metrics, "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test"))) s += ResultsProcessor.metrics_to_string(train_sent_metrics, test_sent_metrics, "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test"))) f_metrics_file.write(s) write_categories(out_predictions_file, "CB", out_categories_file) print s
def evaluate_ranker(model, xs, essay2crels, ys_bytag, set_cr_tags): clone = model.clone() if hasattr(model, "average_weights"): clone.average_weights() pred_ys_bytag = defaultdict(list) ename2inps = dict() for parser_input in xs: ename2inps[parser_input.essay_name] = parser_input for ename, act_crels in essay2crels.items(): if ename not in ename2inps: # no predicted crels for this essay highest_ranked = set() else: parser_input = ename2inps[ename] ixs = clone.rank(parser_input.all_feats_array) highest_ranked = parser_input.all_parses[ixs[0]] # type: Tuple[str] add_cr_labels(set(highest_ranked), pred_ys_bytag, set_cr_tags) mean_metrics = ResultsProcessor.compute_mean_metrics(ys_bytag, pred_ys_bytag) df = get_micro_metrics(metrics_to_df(mean_metrics)) return df
if USE_SVM: test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=decision_function_for_tag) else: test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=probability_for_tag) """ Write out the predicted classes """ with open(out_predictions_file, "w+") as f_output_file: f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n") predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags) with open(out_predicted_margins_file, "w+") as f_output_file: f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n") predictions_to_file(f_output_file, sent_test_ys_bycode, test_decision_functions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags, output_confidence=True) """ Write out the accuracy metrics """ train_wd_metrics = ResultsProcessor.compute_mean_metrics(wd_td_ys_bytag, train_wd_predictions_by_code) test_wd_metrics = ResultsProcessor.compute_mean_metrics(wd_test_ys_bytag, test_wd_predictions_by_code) train_sent_metrics = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code) test_sent_metrics = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code) with open(out_metrics_file, "w+") as f_metrics_file: s = "" pad = ResultsProcessor.pad_str s += ResultsProcessor.metrics_to_string(train_wd_metrics, test_wd_metrics, "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test"))) s += ResultsProcessor.metrics_to_string(train_sent_metrics, test_sent_metrics, "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test"))) f_metrics_file.write(s) print s #TODO - need to add logic here for GW #write_categories(out_predictions_file, "CB", out_categories_file)
def evaluate_on_test_set(test_essay_feats, out_predictions_file, out_predicted_margins_file, out_metrics_file, out_categories_file): test_feats, test_tags = flatten_to_wordlevel_feat_tags(test_essay_feats) wd_test_ys_bytag = get_wordlevel_ys_by_code(test_tags, wd_train_tags) test_x = feature_transformer.transform(test_feats) """ TEST Tagger """ test_wd_predictions_by_code = test_classifier_per_code( test_x, tag2word_classifier, wd_test_tags) print "\nRunning Sentence Model" """ SENTENCE LEVEL PREDICTIONS FROM STACKING """ sent_test_xs, sent_test_ys_bycode = get_sent_feature_for_stacking_from_tagging_model( sent_input_feat_tags, sent_input_interaction_tags, test_essay_feats, test_x, wd_test_ys_bytag, tag2word_classifier, SPARSE_SENT_FEATS, LOOK_BACK) """ Test Stack Classifier """ test_sent_predictions_by_code \ = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags) if USE_SVM: test_decision_functions_by_code = test_classifier_per_code( sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=decision_function_for_tag) else: test_decision_functions_by_code = test_classifier_per_code( sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=probability_for_tag) """ Write out the predicted classes """ with open(out_predictions_file, "w+") as f_output_file: f_output_file.write( "Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n") predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags) with open(out_predicted_margins_file, "w+") as f_output_file: f_output_file.write( "Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n" ) predictions_to_file(f_output_file, sent_test_ys_bycode, test_decision_functions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags, output_confidence=True) """ Write out the accuracy metrics """ train_wd_metrics = ResultsProcessor.compute_mean_metrics( wd_td_ys_bytag, train_wd_predictions_by_code) test_wd_metrics = ResultsProcessor.compute_mean_metrics( wd_test_ys_bytag, test_wd_predictions_by_code) train_sent_metrics = ResultsProcessor.compute_mean_metrics( sent_test_ys_bycode, test_sent_predictions_by_code) test_sent_metrics = ResultsProcessor.compute_mean_metrics( sent_test_ys_bycode, test_sent_predictions_by_code) with open(out_metrics_file, "w+") as f_metrics_file: s = "" pad = ResultsProcessor.pad_str s += ResultsProcessor.metrics_to_string( train_wd_metrics, test_wd_metrics, "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test"))) s += ResultsProcessor.metrics_to_string( train_sent_metrics, test_sent_metrics, "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test"))) f_metrics_file.write(s) write_categories(out_predictions_file, "CB", out_categories_file) print s
def get_metrics_raw(essays, expected_tags, micro_only=False): act_ys_bycode = ResultsProcessor.get_wd_level_lbs(essays, expected_tags=expected_tags) pred_ys_bycode = get_wd_level_preds(essays, expected_tags=expected_tags) mean_metrics = ResultsProcessor.compute_mean_metrics(act_ys_bycode, pred_ys_bycode) return mean_metrics
predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags) with open(out_predicted_margins_file, "w+") as f_output_file: f_output_file.write( "Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n" ) predictions_to_file(f_output_file, sent_test_ys_bycode, test_decision_functions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags, output_confidence=True) """ Write out the accuracy metrics """ train_wd_metrics = ResultsProcessor.compute_mean_metrics( wd_td_ys_bytag, train_wd_predictions_by_code) test_wd_metrics = ResultsProcessor.compute_mean_metrics( wd_test_ys_bytag, test_wd_predictions_by_code) train_sent_metrics = ResultsProcessor.compute_mean_metrics( sent_test_ys_bycode, test_sent_predictions_by_code) test_sent_metrics = ResultsProcessor.compute_mean_metrics( sent_test_ys_bycode, test_sent_predictions_by_code) with open(out_metrics_file, "w+") as f_metrics_file: s = "" pad = ResultsProcessor.pad_str s += ResultsProcessor.metrics_to_string( train_wd_metrics, test_wd_metrics, "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test"))) s += ResultsProcessor.metrics_to_string(
test_folds = [(pred_tagged_essays_train, pred_tagged_essays_test)] # type: List[Tuple[Any,Any]] cv_folds = cross_validation(pred_tagged_essays_train, CV_FOLDS) # type: List[Tuple[Any,Any]] result_test_essay_level = evaluate_model_essay_level( folds=cv_folds, extractor_fn_names_lst=best_extractor_names, all_extractor_fns=all_extractor_fns, ngrams=ngrams, beta=beta, stemmed=stemmed, down_sample_rate=1.0, max_epochs=max_epochs) models, cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag, cv_td_preds_by_sent, cv_sent_vd_ys_by_tag = result_test_essay_level mean_metrics = ResultsProcessor.compute_mean_metrics(cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag) print(get_micro_metrics(metrics_to_df(mean_metrics))) models, cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag, cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag = result_test_essay_level mean_metrics = ResultsProcessor.compute_mean_metrics(cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag) print(get_micro_metrics(metrics_to_df(mean_metrics))) result_final_test = evaluate_model_essay_level( folds=test_folds, extractor_fn_names_lst=best_extractor_names, all_extractor_fns=all_extractor_fns, ngrams=ngrams, beta=beta, stemmed=stemmed, down_sample_rate=1.0,