Пример #1
0
    def test_empty_input_substitute(self):
        text = ' '
        augs = [naw.SpellingAug()]

        for aug in augs:
            augmented_text = aug.augment(text)
            self.assertEqual('', augmented_text)
Пример #2
0
def spell_attack(text, n):
    #Spelling Augmenter
    #Substitute word by spelling mistake words dictionary
    aug = naw.SpellingAug('models/spelling_en.txt')
    attacked_texts = aug.augment(text, n=n)
    print("Attacked Texts:")
    print(attacked_texts)
Пример #3
0
    def test_substitute_stopwords(self):
        texts = ['The quick brown fox jumps over the lazy dog']

        stopwords = [t.lower() for t in texts[0].split(' ')[:3]]
        aug_n = 3

        aug = naw.SpellingAug(dict_path=os.environ.get("MODEL_DIR") +
                              'spelling_en.txt',
                              stopwords=stopwords)

        for text in texts:
            self.assertLess(0, len(text))
            augmented_text = aug.augment(text)

            augmented_tokens = aug.tokenizer(augmented_text)
            tokens = aug.tokenizer(text)

            augmented_cnt = 0

            for token, augmented_token in zip(tokens, augmented_tokens):
                if token.lower() in stopwords and len(token) > aug_n:
                    self.assertEqual(token.lower(), augmented_token)
                else:
                    augmented_cnt += 1

            self.assertGreater(augmented_cnt, 0)

        self.assertLess(0, len(texts))
Пример #4
0
    def test_empty_input_substitute(self):
        texts = ['', '           ']

        self.word2vec_model.action = 'substitute'
        self.context_word_embs_model.action = 'substitute'

        augs = [
            naw.SpellingAug(),
            naw.AntonymAug(),
            naw.RandomWordAug(action='substitute'),
            naw.SynonymAug(aug_src='wordnet'),
            naw.TfIdfAug(model_path=self.tfidf_model_path,
                         action="substitute"), self.word2vec_model,
            self.context_word_embs_model
        ]

        for aug in augs:
            for text in texts:
                augmented_text = aug.augment(text)
                self.assertTrue(augmented_text is None
                                or augmented_text.strip() == '')

            augmented_texts = aug.augment(texts)
            for augmented_text in augmented_texts:
                self.assertTrue(augmented_text is None
                                or augmented_text.strip() == '')
Пример #5
0
    def test_case(self):
        # Swap
        aug = naw.RandomWordAug(action='swap')
        self.assertEqual('bB aA', aug.augment('aA bB'))

        data = 'I love McDonalds'
        doc = Doc(data, aug.tokenizer(data))
        augmented_tokens = aug.change_case(doc, 1, 0, 1).get_augmented_tokens()
        self.assertEqual(['Love', 'I', 'McDonalds'], augmented_tokens)
        doc = Doc(data, aug.tokenizer(data))
        augmented_tokens = aug.change_case(doc, 0, 1, 1).get_augmented_tokens()
        self.assertEqual(['Love', 'I', 'McDonalds'], augmented_tokens)

        data = 'He loves McDonalds'
        doc = Doc(data, aug.tokenizer(data))
        augmented_tokens = aug.change_case(doc, 1, 0, 1).get_augmented_tokens()
        self.assertEqual(['Loves', 'he', 'McDonalds'], augmented_tokens)
        doc = Doc(data, aug.tokenizer(data))
        augmented_tokens = aug.change_case(doc, 0, 1, 1).get_augmented_tokens()
        self.assertEqual(['Loves', 'he', 'McDonalds'], augmented_tokens)
        doc = Doc(data, aug.tokenizer(data))
        augmented_tokens = aug.change_case(doc, 2, 1, 1).get_augmented_tokens()
        self.assertEqual(['He', 'McDonalds', 'loves'], augmented_tokens)

        # Insert
        aug = naw.TfIdfAug(model_path=self.tfidf_model_path, action='insert')
        expected = False
        for i in range(10):
            augmented_text = aug.augment('Good')
            if 'good' in augmented_text and aug.get_word_case(augmented_text.split(' ')[0]) == 'capitalize':
                expected = True
                break
        self.assertTrue(expected)

        # Substitute
        aug = naw.RandomWordAug(action='substitute', target_words=['abc'])
        expected = False
        for i in range(10):
            augmented_text = aug.augment('I love')
            if augmented_text == 'Abc love':
                expected = True
                break
        self.assertTrue(expected)

        aug = naw.AntonymAug()
        self.assertEqual('Unhappy', aug.augment('Happy'))

        # Do not change if target word is non-lower
        aug = naw.SpellingAug()
        self.assertEqual('RE', aug.augment('Re'))

        # Delete case
        aug = naw.RandomWordAug(action='delete')
        expected = False
        for i in range(10):
            augmented_text = aug.augment('I love')
            if augmented_text == 'Love':
                expected = True
                break
        self.assertTrue(expected)
Пример #6
0
    def test_read_default_dict(self):
        text = 'abcdef'

        aug = naw.SpellingAug()
        self.assertTrue(aug.model.dict_path)
        aug.augment(text)
        self.assertTrue(True)
Пример #7
0
    def test_oov(self):
        text = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'

        aug = naw.SpellingAug(dict_path=os.environ.get("MODEL_DIR") +
                              'spelling_en.txt')
        augmented_text = aug.augment(text)

        self.assertEqual(text, augmented_text)
Пример #8
0
    def test_oov(self):
        text = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'

        aug = naw.SpellingAug(
            dict_path=os.path.join(self.model_dir, 'spelling_en.txt'))
        augmented_text = aug.augment(text)

        self.assertEqual(text, augmented_text)
Пример #9
0
    def test_empty_input_substitute(self):
        text = ' '
        augs = [
            naw.SpellingAug(dict_path=os.environ.get("MODEL_DIR") + 'spelling_en.txt')
        ]

        for aug in augs:
            augmented_text = aug.augment(text)
            self.assertEqual('', augmented_text)
Пример #10
0
    def test_empty_input(self):
        texts = [' ']
        aug = naw.SpellingAug(dict_path=os.environ.get("MODEL_DIR") +
                              'spelling_en.txt')

        for text in texts:
            augmented_text = aug.augment(text)

            self.assertEqual(text, augmented_text)

        self.assertEqual(1, len(texts))
def spelling_aug(corpus):
    aug = naw.SpellingAug()
   # augmented_sentences = []

    # go through all train and dev sentences
    for sentence in corpus.train:
        augmented_texts = aug.augment(sentence, n=3)

    corpus = Corpus(train=SentenceDataset(augmented_texts),
                    dev=corpus.dev,
                    test=corpus.test)
    return corpus
Пример #12
0
def augment_(isear_train):
    import nlpaug.augmenter.word as naw
    aug = naw.SpellingAug()
    isear_aug = isear_train.apply(
        lambda x: pd.Series([x[0], aug.augment(x[1])]), axis=1)
    isear_aug.columns = isear_train.columns
    isear_aug1 = isear_train.apply(
        lambda x: pd.Series([x[0], aug.augment(x[1])]), axis=1)
    isear_aug1.columns = isear_train.columns
    isear_train = pd.concat([isear_train, isear_aug, isear_aug1],
                            ignore_index=True)
    return isear_train
Пример #13
0
def nlpaug(word):
    aug = naf.Sometimes([
        nac.OcrAug(),
        nac.KeyboardAug(),
        nac.RandomCharAug(action="insert"),
        nac.RandomCharAug(action="substitute"),
        nac.RandomCharAug(action="swap"),
        nac.RandomCharAug(action="delete"),
        naw.SpellingAug(),
    ])
    word = aug.augment(word)
    return word
Пример #14
0
    def test_substitute(self):
        texts = ['The quick brown fox jumps over the lazy dog']

        aug = naw.SpellingAug(dict_path=os.environ.get("MODEL_DIR") +
                              'spelling_en.txt')

        for text in texts:
            self.assertLess(0, len(text))
            augmented_text = aug.augment(text)

            self.assertNotEqual(text, augmented_text)

        self.assertLess(0, len(texts))
Пример #15
0
    def test_substitute(self):
        texts = ['The quick brown fox jumps over the lazy dog']

        aug = naw.SpellingAug(
            dict_path=os.path.join(self.model_dir, 'spelling_en.txt'))

        for text in texts:
            self.assertLess(0, len(text))
            augmented_text = aug.augment(text)

            self.assertNotEqual(text, augmented_text)

        self.assertLess(0, len(texts))
Пример #16
0
    def test_case(self):
        # Swap
        aug = naw.RandomWordAug(action='swap')
        self.assertEqual('bB aA', aug.augment('aA bB'))
        self.assertEqual(['Love', 'I', 'McDonalds'], aug.change_case('I love McDonalds'.split(' '), 1, 0))
        self.assertEqual(['Love', 'I', 'McDonalds'], aug.change_case('I love McDonalds'.split(' '), 0, 1))
        self.assertEqual(['Loves', 'he', 'McDonalds'], aug.change_case('He loves McDonalds'.split(' '), 1, 0))
        self.assertEqual(['Loves', 'he', 'McDonalds'], aug.change_case('He loves McDonalds'.split(' '), 0, 1))
        self.assertEqual(['He', 'McDonalds', 'loves'], aug.change_case('He loves McDonalds'.split(' '), 2, 1))

        # Insert
        aug = naw.TfIdfAug(model_path=os.environ.get("MODEL_DIR"), action='insert')
        expected = False
        for i in range(10):
            augmented_text = aug.augment('Good')
            if 'good' in augmented_text and aug.get_word_case(augmented_text.split(' ')[0]) == 'capitalize':
                expected = True
                break
        self.assertTrue(expected)

        # Substitute
        aug = naw.RandomWordAug(action='substitute', target_words=['abc'])
        expected = False
        for i in range(10):
            augmented_text = aug.augment('I love')
            if augmented_text == 'Abc love':
                expected = True
                break
        self.assertTrue(expected)

        aug = naw.AntonymAug()
        self.assertEqual('Unhappy', aug.augment('Happy'))

        # Do not change if target word is non-lower
        aug = naw.SpellingAug(dict_path=os.environ.get("MODEL_DIR") + 'spelling_en.txt')
        self.assertEqual('RE', aug.augment('Re'))

        # Delete case
        aug = naw.RandomWordAug(action='delete')
        expected = False
        for i in range(10):
            augmented_text = aug.augment('I love')
            if augmented_text == 'Love':
                expected = True
                break
        self.assertTrue(expected)
Пример #17
0
def spelling_aug(text):
  aug=naw.SpellingAug()
  augmented_text= aug.augment(text, n=1)
  return augmented_text