def test_custom_feat_avg_perceptron(): # w, tr_acc, dv_acc = avg_perceptron.trainAvgPerceptron(10,tr_all,features.yourFeatures,alltags) # confusion = tagger_base.evalTagger(lambda words,alltags : tagger_base.classifierTagger(words,features.yourFeatures,w,alltags),'classifier') confusion = scorer.getConfusion(constants.DEV_FILE, 'avg_perceptron_custom.response') expected_acc = 0.810 actual_acc = scorer.accuracy(confusion) ok_ (expected_acc < actual_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected_acc, actual_acc))
def test_custom_str_perceptron(): # w,tr_acc,dv_acc = str_perceptron.trainAvgStructPerceptron(10,tr_all,features.yourHMMFeatures,alltags) # confusion = tagger_base.evalTagger(lambda words,alltags : viterbi.viterbiTagger(words,features.yourHMMFeatures,w,alltags)[0],'custom_str_classifier') confusion = scorer.getConfusion(DEV_FILE, 'str_avg_perceptron_custom.response') expected_acc = 0.810 actual_acc = scorer.accuracy(confusion) ok_ (expected_acc < actual_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected_acc, actual_acc))
def test_str_perceptron(): # w,tr_acc,dv_acc = str_perceptron.trainAvgStructPerceptron(10,tr_all,features.wordTransFeatures,alltags) # confusion = tagger_base.evalTagger(lambda words,alltags : viterbi.viterbiTagger(words,features.wordTransFeatures,w,alltags)[0],'str_classifier') confusion = scorer.getConfusion(DEV_FILE, 'str_avg_perceptron.response') expected_acc = 0.749 actual_acc = scorer.accuracy(confusion) ok_(expected_acc < actual_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected_acc, actual_acc))
def test_avg_perceptron(): # w, tr_acc, dv_acc = avg_perceptron.trainAvgPerceptron(10,tr_all,features.wordCharFeatures,alltags) # confusion = tagger_base.evalTagger(lambda words,alltags : tagger_base.classifierTagger(words,features.wordCharFeatures,w,alltags),'classifier') confusion = scorer.getConfusion(constants.DEV_FILE, 'avg_perceptron.response') expected_acc = 0.740 actual_acc = scorer.accuracy(confusion) ok_(expected_acc < actual_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected_acc, actual_acc))
def evalTagger(tagger,outfilename,testfile=DEV_FILE): alltags = set() for i,(words, tags) in enumerate(preproc.conllSeqGenerator(TRAIN_FILE)): for tag in tags: alltags.add(tag) with open(outfilename,'w') as outfile: for words,_ in preproc.conllSeqGenerator(testfile): pred_tags = tagger(words,alltags) for tag in pred_tags: print >>outfile, tag print >>outfile, "" return scorer.getConfusion(testfile,outfilename) #run the scorer on the prediction file
def evalTagger(tagger,outfilename,testfile=DEV_FILE): alltags = set() for i,(words, tags) in enumerate(preproc.conllSeqGenerator(TRAIN_FILE)): for tag in tags: alltags.add(tag) with open(outfilename,'w') as outfile: for words,_ in preproc.conllSeqGenerator(testfile): pred_tags = tagger(words,alltags) for tag in pred_tags: print >>outfile, tag print >>outfile, "" return scorer.getConfusion(testfile,outfilename) #run the scorer on the prediction file
def evalTagger(tagger,outfilename,testfile=DEV_FILE): """Calculate confusion_matrix for a given tagger Parameters: tagger -- Function mapping (words, possible_tags) to an optimal sequence of tags for the words outfilename -- Filename to write tagger predictions to testfile -- (optional) Filename containing true labels Returns: confusion_matrix -- dict of occurences of (true_label, pred_label) """ alltags = set() for i,(words, tags) in enumerate(preproc.conllSeqGenerator(TRAIN_FILE)): for tag in tags: alltags.add(tag) with open(outfilename,'w') as outfile: for words,_ in preproc.conllSeqGenerator(testfile): pred_tags = tagger(words,alltags) for tag in pred_tags: print >>outfile, tag print >>outfile, "" return scorer.getConfusion(testfile,outfilename) #run the scorer on the prediction file
def evalTagger(tagger, outfilename, testfile=DEV_FILE): """Calculate confusion_matrix for a given tagger Parameters: tagger -- Function mapping (words, possible_tags) to an optimal sequence of tags for the words outfilename -- Filename to write tagger predictions to testfile -- (optional) Filename containing true labels Returns: confusion_matrix -- dict of occurences of (true_label, pred_label) """ alltags = set() for i, (words, tags) in enumerate(preproc.conllSeqGenerator(TRAIN_FILE)): for tag in tags: alltags.add(tag) with open(outfilename, 'w') as outfile: for words, _ in preproc.conllSeqGenerator(testfile): pred_tags = tagger(words, alltags) for tag in pred_tags: print >> outfile, tag print >> outfile, "" return scorer.getConfusion( testfile, outfilename) #run the scorer on the prediction file