def compareOnFile(self, fileName, encoding, resAccumulator): helper = Helper() writer = Writer() runner = MecabOutputGetter() lineNum = 1 for line in self.readFile(fileName, encoding, resAccumulator): text = line.strip() #if isPy2(): # text = text_type(text) if encoding == 'utf-8': text = helper.fixEncodingError(text) nodes = self.viterbi.getBestPath(text) pyResult = writer.getMecabOutput(self.viterbi.getTokenizer(), nodes) try: #runner = MecabOutputGetter() mecabResult = runner.run(text) except IOError as e: resAccumulator.print(text_type(e)) continue try: self.assertEqual(len(mecabResult), len(pyResult), text + '\npyPort:\n' + helper.outputNodes(pyResult) + '\nmecab:\n' + helper.outputNodes(mecabResult)) for i in range(len(mecabResult)): self.assertEqual(mecabResult[i], pyResult[i], "at line " + str(lineNum) + ": '" + line + "'") except AssertionError as e: resAccumulator.print(text_type(e)) lineNum += 1 if lineNum % 500 == 0: resAccumulator.print(text_type(lineNum) + ' lines have been processed') resAccumulator.print(text_type(lineNum) + ' lines have been processed')
def compareOneSentence(self, expr): nodes = self.viterbi.getBestPath(expr) writer = Writer() pyResult = writer.getMecabOutput(self.viterbi.getTokenizer(), nodes) runner = MecabOutputGetter() mecabResult = runner.run(expr) self.assertEqual(len(mecabResult), len(pyResult)) for i in range(len(mecabResult)): self.assertEqual(mecabResult[i], pyResult[i])