def main(): q21 = q2_1() x = [] y = [] pos = ['N', 'VG', 'ADJ', 'ADV'] print pos extractor = make_pos_extractor(pos) classifier = q21.evaluate_features(extractor, 10) x.append(1) acc = accuracy(q21.maintest, q21.testClassify) y.append(acc) pos = ['N', 'V', 'VG', 'VN', 'VN', 'ADJ', 'ADV'] print pos extractor = make_pos_extractor(pos) classifier = q21.evaluate_features(extractor, 10) x.append(2) acc = accuracy(q21.maintest, q21.testClassify) y.append(acc) pos = ['V', 'ADJ', 'ADV'] print pos extractor = make_pos_extractor(pos) classifier = q21.evaluate_features(extractor, 10) x.append(3) acc = accuracy(q21.maintest, q21.testClassify) y.append(acc) pos = ['ADJ', 'ADV'] print pos extractor = make_pos_extractor(pos) classifier = q21.evaluate_features(extractor, 10) x.append(4) acc = accuracy(q21.maintest, q21.testClassify) y.append(acc) pos = ['N', 'ADJ', 'ADV'] print pos extractor = make_pos_extractor(pos) classifier = q21.evaluate_features(extractor, 10) x.append(5) acc = accuracy(q21.maintest, q21.testClassify) y.append(acc) pylab.bar(x, y, width=0.02, facecolor='blue', align='center') pylab.xlabel('POS') pylab.ylabel("Accuracy") pylab.title("Accuracy for each pos set") pylab.grid(False) pylab.show() return
def evaluate_features(self,feature_extractor, N): self.negative = movie_reviews.fileids('neg') #list of all names of the documents under neg folder self.positive = movie_reviews.fileids('pos') #list of all names of the documents under pos folder self.maintrain, self.maintest = self.stratifiedSplit(self.negative, self.positive, N) lst = [] trainvocabulary = [] for doc,lbl in self.maintrain: x = (feature_extractor(movie_reviews.words(fileids=[doc])),lbl) lst.append(x) trainvocabulary = trainvocabulary + x[0].keys() trainvocabulary = set(trainvocabulary) if q2_1.W == 0: q2_1.W = len(trainvocabulary) print "no. of features in train:", self.W nb = classifier.train(lst) self.testClassify = self.classifyTest(self.maintest, nb, feature_extractor) print "accuracy = ", accuracy(self.maintest, self.testClassify) print "Negative:" print " precision = ", self.calcPrec('neg', self.maintest, self.testClassify) print " recall = ", self.calcRecall('neg', self.maintest, self.testClassify) print " f measure = ", self.calcFMeasur('neg', self.maintest, self.testClassify) print "Positive:" print " precision = ", self.calcPrec('pos', self.maintest, self.testClassify) print " recall = ", self.calcRecall('pos', self.maintest, self.testClassify) print " f measure = ", self.calcFMeasur('pos', self.maintest, self.testClassify) nb.show_most_informative_features() return nb
def accuracy(rtetagger, gold): """ Score the accuracy of the RTETagger against the Gold standard. @type rtetagger: ??? @param tagger: The rtetagger being evaluated. @type gold: C{list} of L{RTEPair} @param gold: The list of tagged text-hypothesis pairs to score the tagger on. @rtype: C{float} """ gold_values = [(rtepair.gid, rtepair.value) for rtepair in gold] predictions = [] for rtepair in gold: predictions.append((rtepair.gid, rtetagger.tag(rtepair))) return evaluate.accuracy(gold_values, predictions)
def plotGraph(q21, K): x = [] y = [] for i in range(1, 6): newK = kVal(q21, i, K) extractor = make_topK_non_stopword_extractor(newK, stopset) print "top K without stops words, K = ", newK, ":" classifier = q21.evaluate_features(extractor, 10) x.append(float(newK) / float(q21.W)) acc = accuracy(q21.maintest, q21.testClassify) y.append(acc) pylab.bar(x, y, width=0.02, facecolor='blue', align='center') pylab.xlabel('K/W') pylab.ylabel("Accuracy") pylab.title("Accuracy for each K/W value") pylab.grid(False) pylab.show() return
def plotGraph(q21, K): x = [] y = [] for i in range(1,6): newK = kVal(q21, i, K) extractor = make_topK_non_stopword_extractor(newK, stopset) print "top K without stops words, K = ", newK, ":" classifier = q21.evaluate_features(extractor, 10) x.append(float(newK)/float(q21.W)) acc = accuracy(q21.maintest, q21.testClassify) y.append(acc) pylab.bar(x, y, width=0.02, facecolor='blue', align='center') pylab.xlabel('K/W') pylab.ylabel("Accuracy") pylab.title("Accuracy for each K/W value") pylab.grid(False) pylab.show() return
def accuracy(tagger, gold): """ Score the accuracy of the tagger against the gold standard. Strip the tags from the gold standard text, retag it using the tagger, then compute the accuracy score. @type tagger: C{TaggerI} @param tagger: The tagger being evaluated. @type gold: C{list} of C{Token} @param gold: The list of tagged tokens to score the tagger on. @rtype: C{float} """ gold_tokens = [] test_tokens = [] for sent in gold: sent = list(sent) gold_tokens += sent test_tokens += list(tagger.tag(untag(sent))) return evaluate.accuracy(gold_tokens, test_tokens)
def accuracy(chunker, gold): """ Score the accuracy of the chunker against the gold standard. Strip the chunk information from the gold standard and rechunk it using the chunker, then compute the accuracy score. @type chunker: C{ChunkParserI} @param tagger: The chunker being evaluated. @type gold: C{tree} @param gold: The chunk structures to score the chunker on. @rtype: C{float} """ gold_tags = [] test_tags = [] for gold_tree in gold: test_tree = chunker.parse(gold_tree.flatten()) gold_tags += tree2conlltags(gold_tree) test_tags += tree2conlltags(test_tree) # print 'GOLD:', gold_tags[:50] # print 'TEST:', test_tags[:50] return evaluate.accuracy(gold_tags, test_tags)
def evaluate_features(self, feature_extractor, N): self.negative = movie_reviews.fileids( 'neg') #list of all names of the documents under neg folder self.positive = movie_reviews.fileids( 'pos') #list of all names of the documents under pos folder self.maintrain, self.maintest = self.stratifiedSplit( self.negative, self.positive, N) lst = [] trainvocabulary = [] for doc, lbl in self.maintrain: x = (feature_extractor(movie_reviews.words(fileids=[doc])), lbl) lst.append(x) trainvocabulary = trainvocabulary + x[0].keys() trainvocabulary = set(trainvocabulary) if q2_1.W == 0: q2_1.W = len(trainvocabulary) print "no. of features in train:", self.W nb = classifier.train(lst) self.testClassify = self.classifyTest(self.maintest, nb, feature_extractor) print "accuracy = ", accuracy(self.maintest, self.testClassify) print "Negative:" print " precision = ", self.calcPrec('neg', self.maintest, self.testClassify) print " recall = ", self.calcRecall('neg', self.maintest, self.testClassify) print " f measure = ", self.calcFMeasur('neg', self.maintest, self.testClassify) print "Positive:" print " precision = ", self.calcPrec('pos', self.maintest, self.testClassify) print " recall = ", self.calcRecall('pos', self.maintest, self.testClassify) print " f measure = ", self.calcFMeasur('pos', self.maintest, self.testClassify) nb.show_most_informative_features() return nb