def eval_tagger(tagger, outfilename=None, all_tags=None, trainfile=TRAIN_FILE, testfile=DEV_FILE): """Calculate confusion_matrix for a given tagger Parameters: tagger -- Function mapping (words, possible_tags) to an optimal sequence of tags for the words outfilename -- Filename to write tagger predictions to testfile -- (optional) Filename containing true labels Returns: confusion_matrix -- dict of occurences of (true_label, pred_label) """ def get_outfile(): if outfilename is not None: return open(outfilename, 'w') else: return tempfile.NamedTemporaryFile('w', delete=False) with get_outfile() as outfile: apply_tagger(tagger, outfile.name, all_tags, trainfile, testfile) confusion = scorer.get_confusion( testfile, outfile.name) #run the scorer on the prediction file return confusion
def test_model_crf_nr_dev_accuracies(): confusion = scorer.get_confusion(DEV_FILE, '../bilstm-dev-en.preds') acc = scorer.accuracy(confusion) print("Acc: " + str(acc)) # ok_(acc > .86, "Accuracy Obt: " + str(acc)) confusion = scorer.get_confusion(DEV_FILE, '../bilstm_crf-dev-en.preds') acc = scorer.accuracy(confusion) print("Acc: " + str(acc)) # ok_(acc > .86, "Accuracy Obt: " + str(acc)) confusion = scorer.get_confusion(NR_DEV_FILE, '../bilstm_crf-dev-nr.preds') acc = scorer.accuracy(confusion) print("Acc: " + str(acc)) # ok_(acc > .86, "Accuracy Obt: " + str(acc)) confusion = scorer.get_confusion(NR_DEV_FILE, '../bilstm-dev-nr.preds') acc = scorer.accuracy(confusion) print("Acc: " + str(acc))
def eval_model(model,outfilename, word_to_ix, all_tags=None,trainfile=TRAIN_FILE,testfile=DEV_FILE): """Calculate confusion_matrix for a given model Parameters: tagger -- Model mapping (words) to an optimal sequence of tags for the words outfilename -- Filename to write tagger predictions to testfile -- (optional) Filename containing true labels Returns: confusion_matrix -- dict of occurences of (true_label, pred_label) """ apply_model(model,outfilename,word_to_ix, all_tags,trainfile,testfile) return scorer.get_confusion(testfile,outfilename) #run the scorer on the prediction file
def eval_tagger(tagger,outfilename,all_tags=None,trainfile=TRAIN_FILE,testfile=DEV_FILE): """Calculate confusion_matrix for a given tagger Parameters: tagger -- Function mapping (words, possible_tags) to an optimal sequence of tags for the words outfilename -- Filename to write tagger predictions to testfile -- (optional) Filename containing true labels Returns: confusion_matrix -- dict of occurences of (true_label, pred_label) """ apply_tagger(tagger,outfilename,all_tags,trainfile,testfile) return scorer.get_confusion(testfile,outfilename) #run the scorer on the prediction file
def eval_tagger(tagger,outfilename=None,all_tags=None,trainfile=TRAIN_FILE,testfile=DEV_FILE): """Calculate confusion_matrix for a given tagger Parameters: tagger -- Function mapping (words, possible_tags) to an optimal sequence of tags for the words outfilename -- Filename to write tagger predictions to testfile -- (optional) Filename containing true labels Returns: confusion_matrix -- dict of occurences of (true_label, pred_label) """ def get_outfile(): if outfilename is not None: return open(outfilename,'w') else: return tempfile.NamedTemporaryFile('w',delete=False) with get_outfile() as outfile: apply_tagger(tagger,outfile.name,all_tags,trainfile,testfile) confusion = scorer.get_confusion(testfile,outfile.name) #run the scorer on the prediction file return confusion
def test_hmm_test_accuracy(): confusion = scorer.get_confusion(TEST_FILE,'hmm-te-en.preds') acc = scorer.accuracy(confusion) ok_(acc > .840)
def test_hmm_test_accuracy(): confusion = scorer.get_confusion(TEST_FILE, 'hmm-te-en.preds') acc = scorer.accuracy(confusion) ok_(acc > .840)
def test_nr_hmm_test_accuracy(): confusion = scorer.get_confusion(NR_TEST_FILE, 'hmm-te-nr.preds') acc = scorer.accuracy(confusion) ok_(acc > .853)
def test_bakeoff_acc_d2_6_ja_beat_the_prof(): acc = scorer.accuracy( scorer.get_confusion(JA_TEST_FILE, 'avp-words-best-te.ja.preds')) assert_greater(acc, .87882)
def test_sp_score_d1_7(): confusion = scorer.get_confusion(JA_DEV_FILE,'avp-words.ja.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .78) # should be .7902
def test_sp_score_d1_7_test(): confusion = scorer.get_confusion(JA_TEST_FILE, 'avp-words-te.ja.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .741) # should be .7514
def test_neighbor_acc_d2_5_en(): confusion = scorer.get_confusion(DEV_FILE, 'avp-words-neighbor.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .848) # should be .858
def test_neighbor_acc_d2_5_ja(): confusion = scorer.get_confusion(JA_DEV_FILE,'avp-words-neighbor.ja.preds') acc = scorer.accuracy(confusion) assert_greater(acc,.792) # should be .802
def test_bakeoff_acc_d2_6_en_half_credit(): acc = scorer.accuracy(scorer.get_confusion(DEV_FILE,'avp-words-best.preds')) assert_greater(acc,.87)
def test_suff_feats_acc_d2_2_ja_test(): confusion = scorer.get_confusion(JA_TEST_FILE,'avp-words-suff-te.ja.preds') acc = scorer.accuracy(confusion) assert_greater(acc,.834) # should be .844
def test_neighbor_acc_d2_5_en(): confusion = scorer.get_confusion(DEV_FILE,'avp-words-neighbor.preds') acc = scorer.accuracy(confusion) assert_greater(acc,.848) # should be .858
def test_suff_feats_acc_d2_2_ja_dev(): confusion = scorer.get_confusion(JA_DEV_FILE,'avp-words-suff.ja.preds') acc = scorer.accuracy(confusion) assert_greater(acc,.872) # should be .882
def test_suff_feats_acc_d2_2_en_dev(): confusion = scorer.get_confusion(DEV_FILE,'avp-words-suff.preds') acc = scorer.accuracy(confusion) assert_greater(acc,.834) # should be .844
def test_sp_score_d1_7_test(): confusion = scorer.get_confusion(JA_TEST_FILE,'avp-words-te.ja.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .741) # should be .7514
def test_nr_hmm_test_accuracy(): confusion = scorer.get_confusion(NR_TEST_FILE,'hmm-te-nr.preds') acc = scorer.accuracy(confusion) ok_(acc > .853)
def test_bakeoff_acc_d2_6_ja_full_credit(): acc = scorer.accuracy(scorer.get_confusion(JA_DEV_FILE,'avp-words-best.ja.preds')) assert_greater(acc,.90)
def test_sp_score_d1_6_test(): confusion = scorer.get_confusion(TEST_FILE, 'avp-words-te.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .815) # should be .8229
def test_bakeoff_acc_d2_6_ja_beat_the_prof(): acc = scorer.accuracy(scorer.get_confusion(JA_TEST_FILE,'avp-words-best-te.ja.preds')) assert_greater(acc,.87882)
def test_suff_feats_acc_d2_2_ja_dev(): confusion = scorer.get_confusion(JA_DEV_FILE, 'avp-words-suff.ja.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .872) # should be .882
def test_hmm_feat_acc_d3_3_en(): confusion = scorer.get_confusion(DEV_FILE,'sp-hmm.preds') acc = scorer.accuracy(confusion) assert_greater(acc,.862) # should be .872
def test_bakeoff_acc_d2_6_en_half_credit(): acc = scorer.accuracy( scorer.get_confusion(DEV_FILE, 'avp-words-best.preds')) assert_greater(acc, .87)
def test_hmm_feat_acc_d3_3_ja(): confusion = scorer.get_confusion(JA_DEV_FILE,'sp-hmm.ja.preds') acc = scorer.accuracy(confusion) assert_greater(acc,.797) # should be .807
def test_hmm_feat_acc_d3_3_ja(): confusion = scorer.get_confusion(JA_DEV_FILE, 'sp-hmm.ja.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .797) # should be .807
def test_bakeoff_acc_d3_4_en_half_credit(): acc = scorer.accuracy(scorer.get_confusion(DEV_FILE,'sp-best.preds')) assert_greater(acc,.885)
def test_hmm_dev_accuracy(): confusion = scorer.get_confusion(DEV_FILE, 'hmm-dev-en.preds') acc = scorer.accuracy(confusion) ok_(acc > .840)
def test_bakeoff_acc_d3_4_en_beat_the_prof(): acc = scorer.accuracy(scorer.get_confusion(TEST_FILE,'sp-best-te.preds')) assert_greater(acc,.88735) # same as with the classification-based tagger!
def test_nr_hmm_dev_accuracy(): confusion = scorer.get_confusion(NR_DEV_FILE, 'hmm-dev-nr.preds') acc = scorer.accuracy(confusion) ok_(acc > .861)
def test_bakeoff_acc_d3_4_ja_full_credit(): acc = scorer.accuracy(scorer.get_confusion(JA_DEV_FILE,'sp-best.ja.preds')) assert_greater(acc,.91)
def test_hmm_dev_accuracy(): confusion = scorer.get_confusion(DEV_FILE,'hmm-dev-en.preds') acc = scorer.accuracy(confusion) ok_(acc > .840)
def test_bakeoff_acc_d3_4_ja_beat_the_prof(): acc = scorer.accuracy(scorer.get_confusion(JA_TEST_FILE,'sp-best-te.ja.preds')) assert_greater(acc,.879926)
def test_nr_hmm_dev_accuracy(): confusion = scorer.get_confusion(NR_DEV_FILE,'hmm-dev-nr.preds') acc = scorer.accuracy(confusion) ok_(acc > .861)
def test_bilstm_test_accuracy(): confusion = scorer.get_confusion(DEV_FILE, '../bilstm-te-en.preds') acc = scorer.accuracy(confusion) ok_(acc > .83, "Accuracy expected: 0.83, actual:" + str(acc)) # change the no's
def test_sp_score_d1_6(): confusion = scorer.get_confusion(DEV_FILE, 'avp-words.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .805) # should be .8129
def test_model_en_test_accuracy1(): confusion = scorer.get_confusion(TEST_FILE, 'bakeoff-te-en.preds') acc = scorer.accuracy(confusion) ok_(acc > .87)
def test_sp_score_d1_7(): confusion = scorer.get_confusion(JA_DEV_FILE, 'avp-words.ja.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .78) # should be .7902
def test_model_nr_dev_accuracy1(): confusion = scorer.get_confusion(NR_DEV_FILE, 'bakeoff-dev-nr.preds') acc = scorer.accuracy(confusion) ok_(acc > .89)
def test_suff_feats_acc_d2_2_en_dev(): confusion = scorer.get_confusion(DEV_FILE, 'avp-words-suff.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .834) # should be .844
def test_model_nr_test_accuracy1(): confusion = scorer.get_confusion(NR_TEST_FILE, 'bakeoff-te-nr.preds') acc = scorer.accuracy(confusion) ok_(acc > .88)
def test_suff_feats_acc_d2_2_ja_test(): confusion = scorer.get_confusion(JA_TEST_FILE, 'avp-words-suff-te.ja.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .834) # should be .844
def test_model_en_dev_accuracy1(): confusion = scorer.get_confusion(DEV_FILE, 'bakeoff-dev-en.preds') acc = scorer.accuracy(confusion) ok_(acc > .88)
def test_neighbor_acc_d2_5_ja(): confusion = scorer.get_confusion(JA_DEV_FILE, 'avp-words-neighbor.ja.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .792) # should be .802
def test_bakeoff_acc_d3_4_en_beat_the_prof(): acc = scorer.accuracy(scorer.get_confusion(TEST_FILE, 'sp-best-te.preds')) assert_greater(acc, .88735) # same as with the classification-based tagger!
def test_bakeoff_acc_d2_6_ja_full_credit(): acc = scorer.accuracy( scorer.get_confusion(JA_DEV_FILE, 'avp-words-best.ja.preds')) assert_greater(acc, .90)
def test_bakeoff_acc_d3_4_ja_beat_the_prof(): acc = scorer.accuracy( scorer.get_confusion(JA_TEST_FILE, 'sp-best-te.ja.preds')) assert_greater(acc, .879926)
def test_hmm_feat_acc_d3_3_en(): confusion = scorer.get_confusion(DEV_FILE, 'sp-hmm.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .862) # should be .872
def test_sp_score_d1_6(): confusion = scorer.get_confusion(DEV_FILE,'avp-words.preds') acc = scorer.accuracy(confusion) assert_greater(acc,.805) # should be .8129
def test_bakeoff_acc_d3_4_en_half_credit(): acc = scorer.accuracy(scorer.get_confusion(DEV_FILE, 'sp-best.preds')) assert_greater(acc, .885)
def test_sp_score_d1_6_test(): confusion = scorer.get_confusion(TEST_FILE,'avp-words-te.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .815) # should be .8229
def test_bakeoff_acc_d3_4_ja_full_credit(): acc = scorer.accuracy(scorer.get_confusion(JA_DEV_FILE, 'sp-best.ja.preds')) assert_greater(acc, .91)
def test_ja_hmm_dev_accuracy(): confusion = scorer.get_confusion(JA_DEV_FILE,'hmm-dev-ja.preds') acc = scorer.accuracy(confusion) ok_(acc > .84)
def test_bilstm_test_accuracy(): confusion = scorer.get_confusion(DEV_FILE,'bilstm-te-en.preds') acc = scorer.accuracy(confusion) ok_(acc > .83) #change the no's
def test_ja_hmm_test_accuracy(): confusion = scorer.get_confusion(JA_TEST_FILE,'hmm-test-ja.preds') acc = scorer.accuracy(confusion) ok_(acc > .81)