示例#1
0
def main():
    text = ('I have a cat. His name is Bruno. '
            'I have a dog too. His name is Rex. '
            'Her name is Rex too.')

    corpus = tokenize_by_sentence(text)

    storage = WordStorage()
    storage.update(corpus)

    encoded = encode_text(storage, corpus)

    two = NGramTrie(2, encoded)
    trie = NGramTrie(3, encoded)

    context = (
        storage.get_id('name'),
        storage.get_id('is'),
    )

    generator = BackOffGenerator(storage, trie, two)

    expected = 'rex'
    actual = storage.get_word(generator._generate_next_word(context))

    print(f'TEXT:\n{text}')
    print(f'\nEXPECTED WORD AFTER name is IS {expected}')
    print(f'ACTUAL WORD AFTER name is IS {actual}')

    save_model(generator, 'model.txt')
    load_model('model.txt')

    return actual == expected
 def test_word_storage_get_word_ideal(self):
     """
     ideal case for get_word
     """
     word_storage = WordStorage()
     word_storage.storage = {'word': 1}
     expected = 'word'
     actual = word_storage.get_word(1)
     self.assertEqual(expected, actual)
    def test_ngram_text_generator_generate_sentence_no_end(self):
        """
        should generate '<END>' anyway
        """
        corpus = ('i', 'have', 'a', 'cat', 'his', 'name', 'is', 'bruno', 'i',
                  'have', 'a', 'dog', 'too', 'his', 'name', 'is', 'rex', 'her',
                  'name', 'is', 'rex', 'too', '<END>')
        word_storage = WordStorage()
        word_storage.update(corpus)
        encoded = encode_text(word_storage, corpus)
        trie = NGramTrie(2, encoded)
        context = (word_storage.get_id('cat'), )

        generator = NGramTextGenerator(word_storage, trie)
        actual = generator._generate_sentence(context)

        expected = '<END>'
        actual = word_storage.get_word(actual[-1])
        self.assertEqual(expected, actual)
示例#4
0
if __name__ == '__main__':

    text = tokenize_by_sentence(
        """Hi everyone! Nice to meet you again. What are you doing in my laboratory work?
                                    You are very nice person, do you know it? To be honest, I can't stand programming.
                                    But it doesn't depend on you! It's my personal problem and I don't know how to
                                    solve it... It doesn't matter right now""")

    word_storage = WordStorage()
    word_storage.update(text)

    encoded_text = encode_text(word_storage, text)

    n_gram_trie = NGramTrie(3, encoded_text)

    generator_of_text = NGramTextGenerator(word_storage, n_gram_trie)
    context = word_storage.get_id('on'), word_storage.get_id('you')

    formed_ids = generator_of_text.generate_text(context, 1)
    formed_text = []

    for ids in formed_ids:
        word = word_storage.get_word(ids)
        if word != '<END>':
            formed_text.append(word)

    RESULT = ' '.join(formed_text)
    print(RESULT)
    assert RESULT == 'on you', ''
示例#5
0
    On Monday, I go to work. 
    I work at the post office. 
    Everyone shops for food at the grocery store. 
    They also eat at the restaurant. 
    The restaurant serves pizza and ice cream.
    My friends and I go to the park. 
    We like to play soccer at the park. 
    On Fridays, we go to the cinema to see a movie. 
    Children don't go to school on the weekend. 
    Each day, people go to the hospital when they are sick. 
    The doctors and nurses take care of them in the city. 
    The police keep everyone safe. I am happy to live in my city.
    """

    corpus = tokenize_by_sentence(TEXT)

    word_storage = WordStorage()
    word_storage.update(corpus)

    encoded_corpus = encode_text(word_storage, corpus)

    ngrams = NGramTrie(3, encoded_corpus)

    generator = NGramTextGenerator(word_storage, ngrams)

    context = (word_storage.get_id('the'), word_storage.get_id('post'))

    RESULT = generator.generate_text(context, 1)
    print(' '.join([word_storage.get_word(word) for word in RESULT]))
    assert RESULT, 'Language generator work incorrect'
示例#6
0
from lab_4.main import encode_text
from lab_4.main import NGramTextGenerator

if __name__ == '__main__':
    text = 'I have a cat. His name is Bruno. I have a dog too. ' \
           'His name is Rex. Her name is Rex too'
    corpus = tokenize_by_sentence(text)

    word_storage = WordStorage()
    word_storage.update(corpus)

    encoded_text = encode_text(word_storage, corpus)

    n_gram_trie = NGramTrie(2, encoded_text)

    n_gram_text_generator = NGramTextGenerator(word_storage, n_gram_trie)

    context = (word_storage.get_id('i'), word_storage.get_id('have'))

    text_generated = n_gram_text_generator.generate_text(context, 2)
    output_text = []

    for word_id in text_generated:
        word = word_storage.get_word(word_id)
        if word != '<END>':
            output_text.append(word)

    RESULT = ' '.join(output_text)
    print(RESULT)
    assert RESULT == 'i have a cat name is rex', 'Something went wrong :('