class PyWordReaderTestCases(unittest.TestCase): def setUp(self): self.lukija = WordReader(["../../Material/Grimm's Fairy Tales.txt"], specialCharacters = ["-", "'"], acceptNumerals = True, acceptUpperCase = True, acceptLowerCase = False) # test addFileName self.lukija.addFileName("../../Material/The Adventures of Tom Sawyer by Mark Twain.txt") def tearDown(self): self.lukija.clear('all') def testSanitize(self): """ Test whether word sanitizing works """ self.words = [] for word in unsanitizedWords: self.words.append(self.lukija.sanitize(word)) self.assertEqual(self.words, sanitizedWords, 'Failed to sanitize words') def testCreateChrMap(self): """ Test whether index and character maps are okay """ self.chrMap, self.idxMap = self.lukija._createChrMap() self.assertEqual(self.chrMap, properChrMap, 'Bad character map') self.assertEqual(self.idxMap, properIdxMap, 'Bad index map') def testInd2char(self): """ Test function ind2char """ for index, val in enumerate(properChrMap): self.assertEqual(self.lukija.ind2char(index), properChrMap[index], 'ind2char function failed to map indices to characters') def testChar2ind(self): """ Test function ind2char """ for char in properIdxMap: self.assertEqual(self.lukija.char2ind(char), properIdxMap[char], 'char2ind function failed to map characters to indices') def testGetCharMapSize(self): """ Test whether getCharMapSize returns the correct value """ self.assertEqual(self.lukija.getCharMapSize(), len(properChrMap), 'getCharMapSize returned wrong map size') def testLineCount(self): """ Test whether WordReader reads all lines in files """ self.lukija.readWords() self.assertEqual(self.lukija.linecount , linesIn2books, 'Did not read correct number of lines from file') self.assertEqual(self.lukija.filecount , noOfFiles, 'Did not read correct number of lines from file') def testWordCountAndClear(self): """ Test if the reader finds the correct number of words """ self.lukija = WordReader(['../../Material/50words_in_UTF-8.txt']) self.lukija.readWords() self.assertEqual(self.lukija.wordcount , wordsInTestFile, 'Did not get the correct number of words') # After clearing, we should not have any words in memory self.lukija.clear() self.assertEqual((self.lukija.words, self.lukija.wordcount, self.lukija.filecount, self.lukija.linecount), ([], 0, 0, 0))