Exemplo n.º 1
0
    def test_case_sentsitive(self):
        texts = [
            'Fwd: Mail for solution',
        ]

        reserved_tokens = [
            ['FWD', 'Forward'],
        ]
        aug = naw.ReservedAug(reserved_tokens=reserved_tokens, 
            case_sensitive=True)

        for text in texts:
            augmented_text = aug.augment(text)
            self.assertEqual(augmented_text, text)

        aug = naw.ReservedAug(reserved_tokens=reserved_tokens, 
            case_sensitive=False)

        for text in texts:
            augmented_text = aug.augment(text)
            self.assertNotEqual(augmented_text, text)

        text = 'Dear NLP, text, texttt Thanks. Regards NLPAug'
        reserved_tokens = [
            ['1', 'Regards'],
        ]

        aug = naw.ReservedAug(reserved_tokens=reserved_tokens, 
            case_sensitive=False)
        for _ in range(10):
            augmented_text = aug.augment(text)
            self.assertNotEqual(augmented_text, text)

        text = 'Dear NLP, text, texttt Thanks. regards NLPAug'
        reserved_tokens = [
            ['Best Regards', 'Regards']
        ]
        aug = naw.ReservedAug(reserved_tokens=reserved_tokens, 
            case_sensitive=False)
        for _ in range(10):
            augmented_text = aug.augment(text)
            self.assertNotEqual(augmented_text, text)
            self.assertTrue('Best Regards' in augmented_text)

        text = 'Dear NLP, text, texttt Thanks. best regards NLPAug'
        reserved_tokens = [
            ['Best Regards', 'Regards']
        ]
        aug = naw.ReservedAug(reserved_tokens=reserved_tokens, 
            case_sensitive=False)
        for _ in range(10):
            augmented_text = aug.augment(text)
            self.assertNotEqual(augmented_text, text)
            self.assertTrue('Regards' in augmented_text)
Exemplo n.º 2
0
    def test_exact_match(self):
        texts = ['Dear NLP, Thanks. Regards NLPAug']

        reserved_tokens = [['Best Regards', 'Sincerely', 'Regard']]
        aug = naw.ReservedAug(reserved_tokens=reserved_tokens)

        for text in texts:
            augmented_text = aug.augment(text)

            self.assertEqual(augmented_text, text)

        reserved_tokens = [['Regards', 'Sincerely', 'Regard']]
        aug = naw.ReservedAug(reserved_tokens=reserved_tokens)

        for text in texts:
            augmented_text = aug.augment(text)

            self.assertNotEqual(augmented_text, text)
Exemplo n.º 3
0
    def test_only_match_word(self):
        text = 'Dear NLP, text, texttt Thanks. Regards NLPAug'
        reserved_tokens = [
            ['t', 'a']
        ]

        aug = naw.ReservedAug(reserved_tokens=reserved_tokens)
        augmented_text = aug.augment(text)
        self.assertEqual(augmented_text, text)
Exemplo n.º 4
0
    def test_reserved_word(self):
        texts = ['Fwd: Mail for solution', 'Dear NLP, Thanks. Regards. NLPAug']

        reserved_tokens = [['Fwd', 'Forward'], ['Regards', 'Sincerely']]
        aug = naw.ReservedAug(reserved_tokens=reserved_tokens)

        for text in texts:
            augmented_text = aug.augment(text)

            self.assertNotEqual(augmented_text, text)
Exemplo n.º 5
0
    def test_duplicate_word(self):
        text = 'Dear NLP, text, texttt Thanks. best regards NLPAug'
        reserved_tokens = [
            ['Best Regards', 'ABCD'],
            ['regards', '1234']
        ]

        aug = naw.ReservedAug(reserved_tokens=reserved_tokens, case_sensitive=False)
        augmented_text = aug.augment(text)
        self.assertTrue('ABCD' in augmented_text)
Exemplo n.º 6
0
    def test_case_sentsitive(self):
        texts = [
            'Fwd: Mail for solution',
        ]

        reserved_tokens = [
            ['FWD', 'Forward'],
        ]
        aug = naw.ReservedAug(reserved_tokens=reserved_tokens,
                              case_sensitive=True)

        for text in texts:
            augmented_text = aug.augment(text)

            self.assertEqual(augmented_text, text)

        aug = naw.ReservedAug(reserved_tokens=reserved_tokens,
                              case_sensitive=False)

        for text in texts:
            augmented_text = aug.augment(text)

            self.assertNotEqual(augmented_text, text)
Exemplo n.º 7
0
    def test_multi_words(self):
        texts = [
            'Dear NLP, Thanks. Best Regards Augmenter'
        ]

        reserved_tokens = [
            ['Best Regards', 'Sincerely', 'Regard'],
            ['NLP', 'Natural Langauge Processing', 'Text']
        ]
        aug = naw.ReservedAug(reserved_tokens=reserved_tokens)

        for text in texts:
            augmented_text = aug.augment(text)
            self.assertNotEqual(augmented_text, text)
            for t in ['NLP', 'Best Regards']:
                self.assertTrue(t not in augmented_text)
Exemplo n.º 8
0
    def test_allow_original(self):
        texts = ['Fwd: Mail for solution', 'Dear NLP, Thanks. Regards. NLPAug']

        reserved_tokens = [['Fwd', 'Forward'], ['Regards', 'Sincerely']]
        aug = naw.ReservedAug(reserved_tokens=reserved_tokens,
                              allow_original=True)

        for text in texts:
            at_least_one_true = False
            for _ in range(10):
                augmented_text = aug.augment(text)

                if augmented_text == text:
                    at_least_one_true = True
                    break

            self.assertTrue(at_least_one_true)
Exemplo n.º 9
0
    def test_all_combination(self):
        texts = [
            'Dear NLP, text, texttt Thanks. best regards NLPAug',
            'Dear Natural Language Processing, text, texttt Thanks, regards NLPAug'
        ]
        reserved_tokens = [
            ['Best Regards', 'Best Regards2222', 'Best Regards3333', 'Best Regards4444'],
            ['thx', 'Thanks', 'thank you'],
            ['Dear', 'Hi', 'Hello']
        ]
        aug = naw.ReservedAug(
            aug_p=1,
            generate_all_combinations=True,
            reserved_tokens=reserved_tokens, 
            case_sensitive=False)

        augmented_texts = aug.augment(texts)
        assert len(augmented_texts[0]) == 35
        assert len(augmented_texts[1]) == 8
Exemplo n.º 10
0
    def test_all_combination_error(self):
        texts = [
            'Dear NLP, text, texttt Thanks. best regards NLPAug',
        ]
        reserved_tokens = [
            ['Best Regards', 'Best Regards2222', 'Best Regards3333', 'Best Regards4444'],
            ['thx', 'Thanks', 'thank you'],
            ['Dear', 'Hi', 'Hello']
        ]

        aug = naw.ReservedAug(
            aug_p=0.5,
            generate_all_combinations=True,
            reserved_tokens=reserved_tokens, 
            case_sensitive=False)

        with self.assertRaises(AssertionError) as error:
            aug.augment(texts)
        self.assertTrue('Augmentation probability has to be 1 to genenerate all combinations. Set aug_p=1 in constructor.' in str(error.exception))