def test_types(self): histogram = Dictogram(self.fish_words) # Verify count of distinct word types assert len(set(self.fish_words)) == 5 assert histogram.types == 5 # Adding words again should not change count of distinct word types for word in self.fish_words: histogram.add_count(word) assert histogram.types == 5
def test_tokens(self): histogram = Dictogram(self.fish_words) # Verify total count of all word tokens assert len(self.fish_words) == 8 assert histogram.tokens == 8 # Adding words again should double total count of all word tokens for word in self.fish_words: histogram.add_count(word) assert histogram.tokens == 8 * 2
def test_entries(self): dictogram = Dictogram(self.fish_words) # Verify histogram as dictionary of entries like {word: count} assert len(dictogram) == 5 self.assertCountEqual(dictogram, self.fish_dict) # Ignore item order # Verify histogram as list of entries like [(word, count)] listogram = dictogram.items() assert len(listogram) == 5 self.assertCountEqual(listogram, self.fish_list) # Ignore item order
def make_markov(self, corpus): '''Generates nth order markov chain from corpus''' for i, word in enumerate(corpus): # Prepopulate queue if i < self.order: self.queue.enqueue(word) else: # Queue becomes state key in markov dict state = tuple(self.queue) # Advance the queue self.queue.dequeue() self.queue.enqueue(word) if match(r'(([A-Z])\w*)', state[0]) is not None: self['START'].add_count(state) if state not in self.keys(): self[state] = Dictogram() # If state exists, add word to dictogram self.get(state).add_count(word) return self
def test_contains(self): histogram = Dictogram(self.fish_words) # All of these words should be found for word in self.fish_words: assert word in histogram # None of these words should be found for word in ('fishy', 'food'): assert word not in histogram
def __init__(self, corpus=None, order=2): super(NarkovChain, self).__init__() self.order = order self.queue = Queue(order) self.sentence = [] if corpus is not None: self['START'] = Dictogram() self.make_markov(corpus)
def test_frequency(self): histogram = Dictogram(self.fish_words) # Verify frequency count of all words assert histogram.frequency('one') == 1 assert histogram.frequency('two') == 1 assert histogram.frequency('red') == 1 assert histogram.frequency('blue') == 1 assert histogram.frequency('fish') == 4 # Verify frequency count of unseen words assert histogram.frequency('food') == 0
def test_add_count(self): histogram = Dictogram(self.fish_words) # Add more words to update frequency counts histogram.add_count('two', 2) histogram.add_count('blue', 3) histogram.add_count('fish', 4) histogram.add_count('food', 5) # Verify updated frequency count of all words assert histogram.frequency('one') == 1 assert histogram.frequency('two') == 3 assert histogram.frequency('red') == 1 assert histogram.frequency('blue') == 4 assert histogram.frequency('fish') == 8 assert histogram.frequency('food') == 5 # Verify count of distinct word types assert histogram.types == 6 # Verify total count of all word tokens assert histogram.tokens == 8 + 14