Python WordReader примеры использования

Язык программирования: Python

Пространство имен/Пакет: WordReader

Класс/Тип: WordReader

Примеров на hotexamples.com: 7

Python WordReader - 7 примеров найдено. Это лучшие примеры Python кода для WordReader.WordReader, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

readWords(3)

addFileName(2)

_createChrMap(1)

char2ind(1)

clear(1)

getCharMapSize(1)

ind2char(1)

sanitize(1)

Пример #1

Показать файл

Файл: pysanaindeksi.py Проект: pahvenai/pySanaIndeksi

def testRun():

    lukija = WordReader(["../Material/Grimm's Fairy Tales.txt"])

    lukija.readWords()


    punamusta = RedBlack(lukija)
    trie = Trie(lukija)

    print "Adding words from selected material..."
    intti = 0; setti = 0
    for word in lukija.words:
        trie.add(word[0], word[1:])
        punamusta.add(word[0], word[1:])
        intti = intti + 1
        if intti > lukija.wordcount / 100.0:
            setti = setti + 1
            print setti, '% of words added'
            intti = 0

    print "Searching for words in Grimm's Fairy tales and The Adventures of Tom Sawyer"

    word = raw_input( "Find a word (or its beginning) in the text: " ).rstrip( '\n' )

    positions, count, linecount = trie.find(word)
    print "Found", count, "instances (", linecount, "lines) @", positions
    RBpositions, RBcount, RBlinecount = punamusta.find(word)
    print "Found", RBcount, "instances (", RBlinecount, "lines) @", RBpositions

Пример #2

Показать файл

Файл: WordReaderUnitTest.py Проект: pahvenai/pySanaIndeksi

 def setUp(self):
     self.lukija = WordReader(["../../Material/Grimm's Fairy Tales.txt"],
                              specialCharacters = ["-", "'"],
                              acceptNumerals = True, acceptUpperCase = True,
                              acceptLowerCase = False)
     # test addFileName
     self.lukija.addFileName("../../Material/The Adventures of Tom Sawyer by Mark Twain.txt")

Пример #3

Показать файл

Файл: SearcherUnitTest.py Проект: pahvenai/pySanaIndeksi

class  PySearcherTestCases(unittest.TestCase):
    def setUp(self):
        self.reader = WordReader()
        self.finder = Trie(self.reader)
        self.searcher = Searcher(self.finder, '')

    def tearDown(self):
        self.reader = None
        self.finder = None
        self.searcher = None

    def testRandomWord(self):
       """ Tests that non-empty words are found and they are not the same """
       word1 = self.searcher.randomWord()
       word2 = self.searcher.randomWord()
       self.assertTrue(len(word1) > 1, 'Word length too short')
       self.assertTrue(len(word2) > 1, 'Word length too short')
       self.assertNotEqual(word1, word2, 'Found the same word')

    def testRandomWords(self):
       """ Tests that a set of random words do not contain the same words """
       words = self.searcher.randomWord(5)
       self.assertTrue(len(set(words)) == 5, 'Did not find 5 unique words')

    def testBinaryOperationsAreWorking(self):
        """
        Checks that operations are not identic and that correct number of hits
        is returned for every known result.
        """
        self.reader.addFileName(MaterialFilePath, readNow=True)
        self.finder.addFromReader()

        results = []
        for operation in operations:
            results.append(self.searcher.search(operations[operation],
                                                returnCount=True))
        self.assertTrue(len(set(results)) == 6, #i.e. operations are not identic
                        'Searcher failed binary operation check')
        for searchTerm in binaryOperationsSearch:
            self.assertEqual(self.searcher.search(searchTerm, returnCount=True),
                             binaryOperationsSearch[searchTerm],
                            'Searcher found wrong number of hits on some search')

Пример #4

Показать файл

Файл: WordReaderUnitTest.py Проект: pahvenai/pySanaIndeksi

 def testWordCountAndClear(self):
     """ Test if the reader finds the correct number of words """
     self.lukija = WordReader(['../../Material/50words_in_UTF-8.txt'])
     self.lukija.readWords()
     self.assertEqual(self.lukija.wordcount , wordsInTestFile,
                          'Did not get the correct number of words')
     # After clearing, we should not have any words in memory
     self.lukija.clear()
     self.assertEqual((self.lukija.words, self.lukija.wordcount,
                       self.lukija.filecount, self.lukija.linecount),
                       ([], 0, 0, 0))

Пример #5

Показать файл

Файл: WordReaderUnitTest.py Проект: pahvenai/pySanaIndeksi

class  PyWordReaderTestCases(unittest.TestCase):
    def setUp(self):
        self.lukija = WordReader(["../../Material/Grimm's Fairy Tales.txt"],
                                 specialCharacters = ["-", "'"],
                                 acceptNumerals = True, acceptUpperCase = True,
                                 acceptLowerCase = False)
        # test addFileName
        self.lukija.addFileName("../../Material/The Adventures of Tom Sawyer by Mark Twain.txt")

    def tearDown(self):
        self.lukija.clear('all')

    def testSanitize(self):
        """ Test whether word sanitizing works """
        self.words = []
        for word in unsanitizedWords:
            self.words.append(self.lukija.sanitize(word))
        self.assertEqual(self.words, sanitizedWords, 'Failed to sanitize words')

    def testCreateChrMap(self):
        """ Test whether index and character maps are okay """
        self.chrMap, self.idxMap = self.lukija._createChrMap()
        self.assertEqual(self.chrMap, properChrMap, 'Bad character map')
        self.assertEqual(self.idxMap, properIdxMap, 'Bad index map')

    def testInd2char(self):
        """ Test function ind2char """
        for index, val in enumerate(properChrMap):
            self.assertEqual(self.lukija.ind2char(index), properChrMap[index],
                             'ind2char function failed to map indices to characters')

    def testChar2ind(self):
        """ Test function ind2char """
        for char in properIdxMap:
            self.assertEqual(self.lukija.char2ind(char), properIdxMap[char],
                             'char2ind function failed to map characters to indices')

    def testGetCharMapSize(self):
        """ Test whether getCharMapSize returns the correct value """
        self.assertEqual(self.lukija.getCharMapSize(), len(properChrMap),
                             'getCharMapSize returned wrong map size')

    def testLineCount(self):
        """ Test whether WordReader reads all lines in files """
        self.lukija.readWords()
        self.assertEqual(self.lukija.linecount , linesIn2books,
                             'Did not read correct number of lines from file')
        self.assertEqual(self.lukija.filecount , noOfFiles,
                             'Did not read correct number of lines from file')

    def testWordCountAndClear(self):
        """ Test if the reader finds the correct number of words """
        self.lukija = WordReader(['../../Material/50words_in_UTF-8.txt'])
        self.lukija.readWords()
        self.assertEqual(self.lukija.wordcount , wordsInTestFile,
                             'Did not get the correct number of words')
        # After clearing, we should not have any words in memory
        self.lukija.clear()
        self.assertEqual((self.lukija.words, self.lukija.wordcount,
                          self.lukija.filecount, self.lukija.linecount),
                          ([], 0, 0, 0))

Пример #6

Показать файл

Файл: SearcherUnitTest.py Проект: pahvenai/pySanaIndeksi

 def setUp(self):
     self.reader = WordReader()
     self.finder = Trie(self.reader)
     self.searcher = Searcher(self.finder, '')

Пример #7

Показать файл

Файл: timing.py Проект: pahvenai/pySanaIndeksi

        return average(runtimes)
    else:
        print string + '%20.3f ms' %  (sum(runtimes) / repeats)


if __name__ == "__main__":
    print "Hello World"

    trieAddFile = openFile('trieAddToEmpty', 'w')
    punamustaAddFile = openFile('punamustaAddToEmpty', 'w')
    trieFindLengthFile = openFile('trieFindWordLength', 'w')
    punamustaFindLengthFile = openFile('punamustaFindWordLength', 'w')
    trieFindWordCountFile = openFile('trieFindWordCount', 'w')
    punamustaFindWordCountFile = openFile('punamustaFindWordCoun', 'w')

    lukija = WordReader(["../Material/Grimm's Fairy Tales.txt"])
    lukija.readWords()
    punamusta = RedBlack(lukija)
    trie = Trie(lukija)

    words = pickle.load( open( "randomWordList", "rb" ) ) # indexed by word len

    repeats = 100;
    runtimes = []
    for i in range(2,17):
        runtime = addWordsToEmptyList(trie, 2**i, repeats, '%25s\t%10d\t' % ('trie:add', 2**i), False)
        print '%25s\t%10d\t\t%14.3f ms' % ('trie:addToEmpty', 2**i, runtime)
        trieAddFile.write('%10d\t%8.3f\n' % (2**i, runtime))
        runtimes.append(findWords(trie, words[7], repeats, printout=False))
    for index, runtime in enumerate(runtimes):
        print '%25s\t%10d\t\t%14.3f ms' % ('trie:findWordCount', 2**(index+2), runtime)