def time_emma(self):
        emma_raw_text = nltk.corpus.gutenberg.raw('austen-emma.txt')

        time_before = time.perf_counter()
        vocab = Vocabulary()
        vocab.train([(emma_raw_text)])
        time_after_training = time.perf_counter()

        sentence_tags = [
            START_SENTENCE, 'DT', 'JJ', 'NN', 'VBZ', 'DT', 'NN', 'IN', 'DT',
            'NN', END_SENTENCE
        ]
        sentences = []
        count = 10
        for i in range(0, count):
            sentence = vocab.build_sentence(sentence_tags)
            sentences.append(sentence)

        time_after_sentence = time.perf_counter()

        diff_training = time_after_training - time_before
        diff_sentence = time_after_sentence - time_after_training
        print('Time to train on Emma:', diff_training, 'seconds')
        print('Time to generate', count, 'sentences:', diff_sentence)
        print('Sentences generated:')
        for sentence in sentences:
            print(sentence)
Пример #2
0
    def test_tags_to_random_words(self):
        vocab = Vocabulary()
        text = """The black cat saw the white cat. 
                   The white cat was easy to see."""
        vocab.train([text])

        words = vocab.tags_to_random_words(['<s>', 'DT', 'NN'])
        self.assertListEqual(words, ['the', 'cat'])
Пример #3
0
    def test_random_word_when_two(self):
        vocab = Vocabulary()
        text = """The black cat saw a white cat in the black night. 
        The black night was darker than usual."""
        vocab.train([text])

        random_noun = vocab.random_word('black', 'JJ', 'NN', 'VBD')
        self.assertTrue(random_noun == 'cat' or random_noun == 'night')
Пример #4
0
    def test_random_word_when_tag_not_found(self):
        vocab = Vocabulary()
        text = """The black cat saw a white cat. 
              The white cat saw the black cat."""
        vocab.train([text])

        word = vocab.random_word('night', 'NN', 'RB', 'RB')
        self.assertEqual(word, 'cat')
Пример #5
0
    def test_random_word_when_prev_tag_not_found(self):
        vocab = Vocabulary()
        text = """The black cat saw a white cat. 
              The white cat was easy to see."""
        vocab.train([text])

        word = vocab.random_word(START_SENTENCE, START_SENTENCE, 'NN', 'VBD')
        self.assertEqual(word, 'cat')
Пример #6
0
    def test_random_word_when_only_one(self):
        vocab = Vocabulary()
        text = """The black cat saw a white cat in the night. 
           The white cat was easy to see."""
        vocab.train([text])

        word = vocab.random_word(START_SENTENCE, START_SENTENCE, 'DT', 'NN')
        self.assertEqual(word, 'the')
Пример #7
0
    def test_build_sentence_basic(self):
        vocab = Vocabulary()
        text = "The black cat was very cold."
        vocab.train([text])

        sentence = vocab.build_sentence(
            ['<s>', 'DT', 'JJ', 'NN', 'VBD', 'RB', 'JJ', '</s>'])
        self.assertEqual(sentence, text)
Пример #8
0
    def test_tokenize_by_sentence_with_line_breaks(self):
        vocab = Vocabulary()
        utterance = 'To be. Or not\nto be?'

        sentences = vocab._tokenize_by_sentence(utterance)

        self.assertEqual(len(sentences), 2)
        self.assertEqual(sentences[0], ['To', 'be', '.'])
        self.assertEqual(sentences[1], ['Or', 'not', 'to', 'be', '?'])
Пример #9
0
    def test_tokenize_by_sentence_for_multiple_sentences(self):
        vocab = Vocabulary()
        utterance = 'To be. Or not to be?'

        sentences = vocab._tokenize_by_sentence(utterance)

        self.assertEqual(len(sentences), 2)
        self.assertEqual(sentences[0], ['To', 'be', '.'])
        self.assertEqual(sentences[1], ['Or', 'not', 'to', 'be', '?'])
Пример #10
0
    def test_tokenize_by_sentence_for_single_sentence(self):
        vocab = Vocabulary()
        utterance = 'It was a dark and dreary morning.'

        sentences = vocab._tokenize_by_sentence(utterance)

        self.assertEqual(len(sentences), 1)
        self.assertEqual(
            sentences[0],
            ['It', 'was', 'a', 'dark', 'and', 'dreary', 'morning', '.'])
Пример #11
0
    def test_build_sentence_with_proper_noun(self):
        vocab = Vocabulary()
        text = "The black cat was very cold, and said so to Emma."
        vocab.train([text])

        sentence = vocab.build_sentence([
            '<s>', 'DT', 'JJ', 'NN', 'VBD', 'RB', 'JJ', ',', 'CC', 'VBD', 'RB',
            'TO', 'NNP', '</s>'
        ])
        self.assertEqual(sentence, text)
Пример #12
0
    def test_build_sentence_with_comma(self):
        vocab = Vocabulary()
        text = "The black cat was very cold, and looked quite sad."
        vocab.train([text])

        sentence = vocab.build_sentence([
            '<s>', 'DT', 'JJ', 'NN', 'VBD', 'RB', 'JJ', ',', 'CC', 'VBD', 'RB',
            'JJ', '</s>'
        ])
        self.assertEqual(sentence, text)
Пример #13
0
    def test_train_single_utterance(self):
        vocab = Vocabulary()
        text = """The white cat saw the black cat in the night. 
        The black cat saw the white cat in the night."""
        vocab.train([text])

        cfd = vocab.freqs_by_features
        self.assertSetEqual(
            set(cfd.keys()), {('<s>', '<s>', 'DT', 'JJ'),
                              ('the', 'DT', 'JJ', 'NN'),
                              ('white', 'JJ', 'NN', 'VBD'),
                              ('black', 'JJ', 'NN', 'VBD'),
                              ('white', 'JJ', 'NN', 'IN'),
                              ('black', 'JJ', 'NN', 'IN'),
                              ('cat', 'NN', 'VBD', 'DT'),
                              ('saw', 'VBD', 'DT', 'JJ'),
                              ('cat', 'NN', 'IN', 'DT'),
                              ('in', 'IN', 'DT', 'NN'),
                              ('the', 'DT', 'NN', '</s>')})
        self.assertSetEqual(set(cfd[('the', 'DT', 'JJ', 'NN')].keys()),
                            {'black', 'white'})
        self.assertSetEqual(set(cfd[('the', 'DT', 'NN', '</s>')].keys()),
                            {'night'})
        self.assertEqual(cfd[('the', 'DT', 'NN', '</s>')]['night'], 2)
Пример #14
0
    def test_train_different_character_utterances(self):
        vocab = Vocabulary()
        text1 = 'The white cat saw the black cat in the night.'
        text2 = 'The black cat saw the white cat in the night.'
        vocab.train([text1, text2])

        cfd = vocab.freqs_by_features
        self.assertSetEqual(
            set(cfd.keys()), {('<s>', '<s>', 'DT', 'JJ'),
                              ('the', 'DT', 'JJ', 'NN'),
                              ('white', 'JJ', 'NN', 'VBD'),
                              ('black', 'JJ', 'NN', 'VBD'),
                              ('white', 'JJ', 'NN', 'IN'),
                              ('black', 'JJ', 'NN', 'IN'),
                              ('cat', 'NN', 'VBD', 'DT'),
                              ('saw', 'VBD', 'DT', 'JJ'),
                              ('cat', 'NN', 'IN', 'DT'),
                              ('in', 'IN', 'DT', 'NN'),
                              ('the', 'DT', 'NN', '</s>')})
        self.assertSetEqual(set(cfd[('the', 'DT', 'JJ', 'NN')].keys()),
                            {'black', 'white'})
        self.assertSetEqual(set(cfd[('the', 'DT', 'NN', '</s>')].keys()),
                            {'night'})
        self.assertEqual(cfd[('the', 'DT', 'NN', '</s>')]['night'], 2)