#filter waste lines if splitValues[0] == 'text' or len(splitValues) < 9: #print splitValues continue if splitValues[0] in words: word = words[splitValues[0]] else: word = Word(splitValues[0]) #create Word object words[splitValues[0]] = word tagComb = TagCombination(splitValues[1], splitValues[2], splitValues[3], splitValues[4], splitValues[5], splitValues[6], splitValues[7], splitValues[8]) word.add(tagComb) #create connection between word and tag combination gs_long.close() #Part 2: compare tagger output file with gs tagger_output = codecs.open("dummy_compare.csv", "r", "utf-8") allLines = tagger_output.readlines() wordNotFoundCounter = 0 combinationFoundCounter = 0 combinationNotFoundCounter = 0 for line in allLines: splitValues = line.split("\t") #print splitValues if splitValues[0] in words: