Пример #1
0
class  PyWordReaderTestCases(unittest.TestCase):
    def setUp(self):
        self.lukija = WordReader(["../../Material/Grimm's Fairy Tales.txt"],
                                 specialCharacters = ["-", "'"],
                                 acceptNumerals = True, acceptUpperCase = True,
                                 acceptLowerCase = False)
        # test addFileName
        self.lukija.addFileName("../../Material/The Adventures of Tom Sawyer by Mark Twain.txt")

    def tearDown(self):
        self.lukija.clear('all')

    def testSanitize(self):
        """ Test whether word sanitizing works """
        self.words = []
        for word in unsanitizedWords:
            self.words.append(self.lukija.sanitize(word))
        self.assertEqual(self.words, sanitizedWords, 'Failed to sanitize words')

    def testCreateChrMap(self):
        """ Test whether index and character maps are okay """
        self.chrMap, self.idxMap = self.lukija._createChrMap()
        self.assertEqual(self.chrMap, properChrMap, 'Bad character map')
        self.assertEqual(self.idxMap, properIdxMap, 'Bad index map')

    def testInd2char(self):
        """ Test function ind2char """
        for index, val in enumerate(properChrMap):
            self.assertEqual(self.lukija.ind2char(index), properChrMap[index],
                             'ind2char function failed to map indices to characters')

    def testChar2ind(self):
        """ Test function ind2char """
        for char in properIdxMap:
            self.assertEqual(self.lukija.char2ind(char), properIdxMap[char],
                             'char2ind function failed to map characters to indices')

    def testGetCharMapSize(self):
        """ Test whether getCharMapSize returns the correct value """
        self.assertEqual(self.lukija.getCharMapSize(), len(properChrMap),
                             'getCharMapSize returned wrong map size')

    def testLineCount(self):
        """ Test whether WordReader reads all lines in files """
        self.lukija.readWords()
        self.assertEqual(self.lukija.linecount , linesIn2books,
                             'Did not read correct number of lines from file')
        self.assertEqual(self.lukija.filecount , noOfFiles,
                             'Did not read correct number of lines from file')

    def testWordCountAndClear(self):
        """ Test if the reader finds the correct number of words """
        self.lukija = WordReader(['../../Material/50words_in_UTF-8.txt'])
        self.lukija.readWords()
        self.assertEqual(self.lukija.wordcount , wordsInTestFile,
                             'Did not get the correct number of words')
        # After clearing, we should not have any words in memory
        self.lukija.clear()
        self.assertEqual((self.lukija.words, self.lukija.wordcount,
                          self.lukija.filecount, self.lukija.linecount),
                          ([], 0, 0, 0))