示例#1
0
def generate(namespace,mytext):
    bigrams = pt.bigramms(mytext)
    current_word = namespace.start
    used_words = collections.Counter()
    waswor = set()
    total = 0
    bit = bigrams.items()
    current_text = '   '
    current_sent = capitalizeFirst(current_word)
    sent = 0
    for i in range(namespace.count):
        total += 1
        if total > 5 or current_word not in bigrams or len(bigrams[current_word]) == 0:
            if (total > 1):
                current_text += current_sent+'. '
                sent += 1
                if (sent > namespace.paragraph_lenght):
                    current_text += '\n\n   '
                    sent = 0
            current_word = list(bit)[random.randrange(len(bit))][0]
            current_sent = capitalizeFirst(current_word)
            total = 0
        now = list(bigrams[current_word].items())
        next_index = used_words[current_word]
示例#2
0
 def test_bigramms(self):
     p = parse_text.bigramms('''Привет всем вам! Как дела?''')
     self.assertEqual({'всем': collections.Counter({'вам': 1}),
                       'Как': collections.Counter({'дела': 1}),
                       'Привет' : collections.Counter({'всем' : 1})}, p)