def _validate_augmenter(self, method, action): if method not in Method.getall(): raise ValueError( 'Method must be one of {} while {} is passed'.format(Method.getall(), method)) if action not in Action.getall(): raise ValueError( 'Action must be one of {} while {} is passed'.format(Action.getall(), action))
def test_augment_detail(self): text = 'The quick brown fox jumps over the lazy dog' flows = [ naf.Sequential([ naf.Sometimes([ nac.RandomCharAug(action="insert"), nac.RandomCharAug(action="delete") ], pipeline_p=0.5), naf.Sequential([ nac.RandomCharAug(action="substitute", aug_char_min=1, aug_char_p=0.6, aug_word_p=0.6) ], name='Sub_Seq') ], include_detail=True), naf.Sometimes([ naf.Sometimes([ nac.RandomCharAug(action="insert"), nac.RandomCharAug(action="delete") ]), naf.Sequential([ nac.OcrAug(), nac.KeyboardAug(aug_char_min=1), nac.RandomCharAug(action="substitute", aug_char_min=1, aug_char_p=0.6, aug_word_p=0.6) ]) ], pipeline_p=1, include_detail=True) ] for flow in flows: augmented_text, augment_details = flow.augment(text) self.assertNotEqual(text, augmented_text) self.assertGreater(len(augment_details), 0) for augment_detail in augment_details: self.assertGreater(augment_detail['orig_start_pos'], -1) self.assertGreater(augment_detail['new_start_pos'], -1) self.assertGreater(augment_detail['change_seq'], 0) self.assertIn(augment_detail['action'], Action.getall())
def test_augment_detail(self): for model_path in self.model_paths: aug = nas.ContextualWordEmbsForSentenceAug(model_path=model_path, include_detail=True) augmented_text, augment_details = aug.augment(self.text) self.assertNotEqual(self.text, augmented_text) self.assertGreater(len(augment_details), 0) for augment_detail in augment_details: self.assertTrue(augment_detail['orig_token'] in self.text) self.assertEqual(augment_detail['orig_start_pos'], -1) self.assertGreater(augment_detail['new_start_pos'], -1) self.assertGreater(augment_detail['change_seq'], 0) self.assertIn(augment_detail['action'], Action.getall()) self.assertNotEqual(self.text, augmented_text)
def test_augment_detail(self): text = 'The quick brown fox jumps over the lazy dog' augs = [ naw.RandomWordAug(include_detail=True), # Delete, use SWAP later naw.ContextualWordEmbsAug(model_path='bert-base-uncased', include_detail=True) # Substitute ] for aug in augs: augmented_text, augment_details = aug.augment(text) self.assertNotEqual(text, augmented_text) self.assertGreater(len(augment_details), 0) for augment_detail in augment_details: self.assertTrue(augment_detail['orig_token'] in text) self.assertGreater(augment_detail['orig_start_pos'], -1) self.assertGreater(augment_detail['new_start_pos'], -1) self.assertGreater(augment_detail['change_seq'], 0) self.assertIn(augment_detail['action'], Action.getall())
def test_augment_detail(self): text = 'The quick brown fox jumps over the lazy dog' augs = [ nac.KeyboardAug(min_char=1, include_detail=True), nac.OcrAug(min_char=1, include_detail=True), nac.RandomCharAug(min_char=2, include_detail=True) ] for aug in augs: augmented_text, augment_details = aug.augment(text) self.assertNotEqual(text, augmented_text) self.assertGreater(len(augment_details), 0) for augment_detail in augment_details: self.assertTrue(augment_detail['orig_token'] in text) self.assertGreater(augment_detail['orig_start_pos'], -1) self.assertGreater(augment_detail['new_start_pos'], -1) self.assertGreater(augment_detail['change_seq'], 0) self.assertIn(augment_detail['action'], Action.getall()) # Get back original input by re-engineering reengineering_text = augmented_text for change_obj in sorted(augment_details, key=lambda item: item['orig_start_pos'], reverse=True): if change_obj['action'] == Action.DELETE: text_prefix = reengineering_text[:change_obj['new_start_pos']] text_core = change_obj['orig_token'] + ' ' text_suffix = reengineering_text[change_obj['new_start_pos']:] elif change_obj['action'] in [Action.INSERT, Action.SUBSTITUTE]: text_prefix = reengineering_text[:change_obj['new_start_pos']] text_core = reengineering_text[change_obj['new_start_pos']:].replace( change_obj['new_token'], change_obj['orig_token'], 1) text_suffix = '' # TODO # elif change_obj['action'] in Action.SWAP: reengineering_text = text_prefix + text_core + text_suffix reengineering_text = reengineering_text.strip() self.assertEqual(text, reengineering_text)