Пример #1
0
def split_word(text):
    #Split Augmenter
    #Split word to two tokens randomly
    aug = naw.SplitAug()
    attacked_text = aug.augment(text)
    print("Attacked Text:")
    print(attacked_text)
Пример #2
0
    def test_split_min_char(self):
        texts = ['quick brown']
        aug = naw.SplitAug(min_char=6)

        for text in texts:
            augmented_text = aug.augment(text)
            self.assertEqual(text, augmented_text)
Пример #3
0
    def test_split(self):
        texts = ['The quick brown fox jumps over the lazy dog']
        aug = naw.SplitAug()

        for text in texts:
            augmented_text = aug.augment(text)

            self.assertLess(len(text), len(augmented_text))
Пример #4
0
    def __init__(self):
        aug0 = naw.RandomWordAug()
        aug1 = naw.ContextualWordEmbsAug(model_path='bert-base-uncased',
                                         action="substitute")
        aug2 = naw.SynonymAug(aug_src='wordnet')
        aug3 = naw.SplitAug()
        aug4 = naw.ContextualWordEmbsAug(model_path='bert-base-uncased',
                                         action="insert")

        self.augs = [aug0, aug1, aug2, aug3, aug4]
Пример #5
0
    def __init__(self,
                 aug_max=2,
                 aug_p=0.3,
                 tokenizer=None,
                 always_apply=False,
                 p=0.5):
        super().__init__(always_apply, p)

        self.aug = naw.SplitAug(aug_max=aug_max,
                                aug_p=aug_p,
                                tokenizer=tokenizer)
Пример #6
0
        rows = []
        for row in csv_reader:
            rows.append(row)

        for row in list(rows): # Delete character randomly
            if row[1] != "flag" and row[1] !='R' and row[1]!="":
                counter += 1
                if counter != 2:
                    row[2] = str(character_delete.augment(row[2]))
                if counter == 3:
                    counter = 0
            csv_writer.writerow(row)

with open("input_classification_test_data.csv","r") as input:
    with open("/Users/wenyaxie/Downloads/negative_data_split_word.csv","w") as output:
        split_words = naw.SplitAug()

        csv_reader = csv.reader(input)
        csv_writer = csv.writer(output)
        counter = 0

        rows = []
        for row in csv_reader:
            rows.append(row)

        for row in list(rows): # Split word to two tokens randomly
            if row[1] != "flag" and row[1] !='R' and row[1]!="":
                counter += 1
                if counter != 2:
                    row[2] = str(split_words.augment(row[2]))
                if counter == 3: