def split_word(text): #Split Augmenter #Split word to two tokens randomly aug = naw.SplitAug() attacked_text = aug.augment(text) print("Attacked Text:") print(attacked_text)
def test_split_min_char(self): texts = ['quick brown'] aug = naw.SplitAug(min_char=6) for text in texts: augmented_text = aug.augment(text) self.assertEqual(text, augmented_text)
def test_split(self): texts = ['The quick brown fox jumps over the lazy dog'] aug = naw.SplitAug() for text in texts: augmented_text = aug.augment(text) self.assertLess(len(text), len(augmented_text))
def __init__(self): aug0 = naw.RandomWordAug() aug1 = naw.ContextualWordEmbsAug(model_path='bert-base-uncased', action="substitute") aug2 = naw.SynonymAug(aug_src='wordnet') aug3 = naw.SplitAug() aug4 = naw.ContextualWordEmbsAug(model_path='bert-base-uncased', action="insert") self.augs = [aug0, aug1, aug2, aug3, aug4]
def __init__(self, aug_max=2, aug_p=0.3, tokenizer=None, always_apply=False, p=0.5): super().__init__(always_apply, p) self.aug = naw.SplitAug(aug_max=aug_max, aug_p=aug_p, tokenizer=tokenizer)
rows = [] for row in csv_reader: rows.append(row) for row in list(rows): # Delete character randomly if row[1] != "flag" and row[1] !='R' and row[1]!="": counter += 1 if counter != 2: row[2] = str(character_delete.augment(row[2])) if counter == 3: counter = 0 csv_writer.writerow(row) with open("input_classification_test_data.csv","r") as input: with open("/Users/wenyaxie/Downloads/negative_data_split_word.csv","w") as output: split_words = naw.SplitAug() csv_reader = csv.reader(input) csv_writer = csv.writer(output) counter = 0 rows = [] for row in csv_reader: rows.append(row) for row in list(rows): # Split word to two tokens randomly if row[1] != "flag" and row[1] !='R' and row[1]!="": counter += 1 if counter != 2: row[2] = str(split_words.augment(row[2])) if counter == 3: