def generate(namespace,mytext): bigrams = pt.bigramms(mytext) current_word = namespace.start used_words = collections.Counter() waswor = set() total = 0 bit = bigrams.items() current_text = ' ' current_sent = capitalizeFirst(current_word) sent = 0 for i in range(namespace.count): total += 1 if total > 5 or current_word not in bigrams or len(bigrams[current_word]) == 0: if (total > 1): current_text += current_sent+'. ' sent += 1 if (sent > namespace.paragraph_lenght): current_text += '\n\n ' sent = 0 current_word = list(bit)[random.randrange(len(bit))][0] current_sent = capitalizeFirst(current_word) total = 0 now = list(bigrams[current_word].items()) next_index = used_words[current_word]
def test_bigramms(self): p = parse_text.bigramms('''Привет всем вам! Как дела?''') self.assertEqual({'всем': collections.Counter({'вам': 1}), 'Как': collections.Counter({'дела': 1}), 'Привет' : collections.Counter({'всем' : 1})}, p)