Пример #1
0
        #filter waste lines
        if splitValues[0] == 'text' or len(splitValues) < 9: 
                #print splitValues
                continue
        
        if splitValues[0] in words:
                word = words[splitValues[0]]
        else:
                word = Word(splitValues[0]) #create Word object
                words[splitValues[0]] = word 

        tagComb = TagCombination(splitValues[1], splitValues[2], splitValues[3],
                                 splitValues[4], splitValues[5], splitValues[6],
                                 splitValues[7], splitValues[8])

        word.add(tagComb) #create connection between word and tag combination

gs_long.close()

#Part 2: compare tagger output file with gs
tagger_output = codecs.open("dummy_compare.csv", "r", "utf-8")
allLines = tagger_output.readlines()
wordNotFoundCounter = 0
combinationFoundCounter = 0
combinationNotFoundCounter = 0

for line in allLines:
        splitValues = line.split("\t")
        #print splitValues
                
        if splitValues[0] in words: