#Part 2: compare tagger output file with gs tagger_output = codecs.open("dummy_compare.csv", "r", "utf-8") allLines = tagger_output.readlines() wordNotFoundCounter = 0 combinationFoundCounter = 0 combinationNotFoundCounter = 0 for line in allLines: splitValues = line.split("\t") #print splitValues if splitValues[0] in words: word = words[splitValues[0]] tagComb = TagCombination(splitValues[1], splitValues[2], splitValues[3], splitValues[4], splitValues[5], splitValues[6], splitValues[7], splitValues[8]) if word.hasTagCombination(tagComb): combinationFoundCounter += 1 else: combinationNotFoundCounter += 1 else: wordNotFoundCounter += 1 print splitValues[0] word.add(tagComb) tagger_output.close() print "Words not found in gold standard: " + str(wordNotFoundCounter) print "Wrong tagged words: " + str(combinationNotFoundCounter) #Unknown tag combinations (known word) print "Correctly tagged words: " + str(combinationFoundCounter) #Known tag combinations