Пример #1
0
#Part 2: compare tagger output file with gs
tagger_output = codecs.open("dummy_compare.csv", "r", "utf-8")
allLines = tagger_output.readlines()
wordNotFoundCounter = 0
combinationFoundCounter = 0
combinationNotFoundCounter = 0

for line in allLines:
        splitValues = line.split("\t")
        #print splitValues
                
        if splitValues[0] in words:
                word = words[splitValues[0]]
                tagComb = TagCombination(splitValues[1], splitValues[2], splitValues[3], splitValues[4],
                                         splitValues[5], splitValues[6], splitValues[7], splitValues[8])
                if word.hasTagCombination(tagComb):
                        combinationFoundCounter += 1
                else:
                        combinationNotFoundCounter += 1
                
        else:
                wordNotFoundCounter += 1
                print splitValues[0] 

        word.add(tagComb)

tagger_output.close()

print "Words not found in gold standard: " + str(wordNotFoundCounter)
print "Wrong tagged words: " + str(combinationNotFoundCounter) #Unknown tag combinations (known word)
print "Correctly tagged words: " + str(combinationFoundCounter) #Known tag combinations