Exemplo n.º 1
0
    def compareOnFile(self, fileName, encoding, resAccumulator):
        helper = Helper()
        writer = Writer()
        runner = MecabOutputGetter()
        lineNum = 1
        for line in self.readFile(fileName, encoding, resAccumulator):
                text = line.strip()
                #if isPy2():
                #    text = text_type(text)
                if encoding == 'utf-8':
                    text = helper.fixEncodingError(text)
                nodes = self.viterbi.getBestPath(text)

                pyResult = writer.getMecabOutput(self.viterbi.getTokenizer(), nodes)
                try:
                    #runner = MecabOutputGetter()
                    mecabResult = runner.run(text)
                except IOError as e:
                    resAccumulator.print(text_type(e))
                    continue
                try:

                    self.assertEqual(len(mecabResult), len(pyResult),
                        text + '\npyPort:\n' + helper.outputNodes(pyResult) +
                         '\nmecab:\n' + helper.outputNodes(mecabResult))
                    for i in range(len(mecabResult)):
                        self.assertEqual(mecabResult[i], pyResult[i], "at line " + str(lineNum) + ": '" + line + "'")
                except AssertionError as e:
                    resAccumulator.print(text_type(e))
                lineNum += 1
                if lineNum % 500 == 0:
                    resAccumulator.print(text_type(lineNum) + ' lines have been processed')
        resAccumulator.print(text_type(lineNum) + ' lines have been processed')
Exemplo n.º 2
0
 def compareOneSentence(self, expr):
     nodes = self.viterbi.getBestPath(expr)
     writer = Writer()
     pyResult = writer.getMecabOutput(self.viterbi.getTokenizer(), nodes)
     runner = MecabOutputGetter()
     mecabResult = runner.run(expr)
     self.assertEqual(len(mecabResult), len(pyResult))
     for i in range(len(mecabResult)):
         self.assertEqual(mecabResult[i], pyResult[i])