def test_reset_model(self):
        for model_path in self.model_paths:
            original_aug = nas.ContextualWordEmbsForSentenceAug(model_path=model_path, top_p=0.5)
            original_temperature = original_aug.model.temperature
            original_top_k = original_aug.model.top_k
            # original_top_p = original_aug.model.top_p

            new_aug = nas.ContextualWordEmbsForSentenceAug(
                model_path=model_path, temperature=original_temperature+1, top_k=original_top_k+1)
            new_temperature = new_aug.model.temperature
            new_top_k = new_aug.model.top_k
            # new_top_p = new_aug.model.top_p

            self.assertEqual(original_temperature+1, new_temperature)
            self.assertEqual(original_top_k + 1, new_top_k)
示例#2
0
    def execute_by_device(self, device):
        for model_path in self.model_paths:
            aug = nas.ContextualWordEmbsForSentenceAug(model_path=model_path, force_reload=True, device=device)

            self.empty_input(aug)
            self.insert(aug)

        self.assertLess(0, len(self.model_paths))
    def setUpClass(cls):
        env_config_path = os.path.abspath(
            os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
        load_dotenv(env_config_path)

        cls.augs = [
            nac.RandomCharAug(),
            naw.ContextualWordEmbsAug(),
            nas.ContextualWordEmbsForSentenceAug()
        ]
    def execute_by_device(self, device):
        for model_path in self.model_paths:
            aug = nas.ContextualWordEmbsForSentenceAug(model_path=model_path, device=device)

            self.empty_input(aug)

            for data in [self.text, self.texts]:
                self.insert(aug, data)

        self.assertLess(0, len(self.model_paths))
    def test_batch_size(self):
        # 1 per batch
        aug = nas.ContextualWordEmbsForSentenceAug(model_path='distilgpt2', batch_size=1)
        aug_data = aug.augment(self.texts)
        self.assertEqual(len(aug_data), len(self.texts))

        # batch size = input size
        aug = nas.ContextualWordEmbsForSentenceAug(model_path='distilgpt2', batch_size=len(self.texts))
        aug_data = aug.augment(self.texts)
        self.assertEqual(len(aug_data), len(self.texts))

        # batch size > input size
        aug = nas.ContextualWordEmbsForSentenceAug(model_path='distilgpt2', batch_size=len(self.texts)+1)
        aug_data = aug.augment(self.texts)
        self.assertEqual(len(aug_data), len(self.texts))

        # input size > batch size
        aug = nas.ContextualWordEmbsForSentenceAug(model_path='distilgpt2', batch_size=2)
        aug_data = aug.augment(self.texts * 2)
        self.assertEqual(len(aug_data), len(self.texts)*2)
示例#6
0
    def execute_by_device(self, device):
        for model_path in self.model_paths:
            aug = nas.ContextualWordEmbsForSentenceAug(model_path=model_path, device=device)

            self.empty_input(aug)
            self.insert(aug)
            self.top_k(aug)
            self.top_p(aug)
            self.top_k_top_p(aug)
            self.no_top_k_top_p(aug)

        self.assertLess(0, len(self.model_paths))
示例#7
0
    def test_optimize(self):
        model_paths = ['gpt2', 'distilgpt2']
        device = 'cpu'
        enable_optimize = {'external_memory': 1024, 'return_proba': True}
        disable_optimize = {'external_memory': 0, 'return_proba': True}
        epoch = 10

        for model_path in model_paths:
            # Optimized
            durations = []
            aug = nas.ContextualWordEmbsForSentenceAug(
                model_path=model_path,
                device=device,
                optimize=enable_optimize,
                force_reload=True)
            for i in range(epoch):
                start_dt = time.monotonic()
                for j in range(epoch):
                    aug.augment(self.text)
                end_dt = time.monotonic()
                durations.append(round(end_dt - start_dt, 2))

            optimized_total_duration = sum(durations)
            optimized_average_duration = round(
                optimized_total_duration / len(durations), 2)

            # No optimized
            durations = []
            aug.model.optimize = disable_optimize
            for _ in range(epoch):
                start_dt = time.monotonic()
                for _ in range(epoch):
                    aug.augment(self.text)
                end_dt = time.monotonic()
                durations.append(round(end_dt - start_dt, 2))

            no_optimized_total_duration = sum(durations)
            no_optimized_average_duration = round(
                no_optimized_total_duration / len(durations), 2)

            print('Model:{}, Optimized: {}({}), No Optimized: {}({})'.format(
                model_path, optimized_total_duration,
                optimized_average_duration, no_optimized_total_duration,
                no_optimized_average_duration))

            self.assertGreater(no_optimized_total_duration,
                               optimized_total_duration)
            self.assertGreater(no_optimized_average_duration,
                               optimized_average_duration)
示例#8
0
    def test_augment_detail(self):
        for model_path in self.model_paths:
            aug = nas.ContextualWordEmbsForSentenceAug(model_path=model_path,
                                                       include_detail=True)

            augmented_text, augment_details = aug.augment(self.text)

            self.assertNotEqual(self.text, augmented_text)
            self.assertGreater(len(augment_details), 0)
            for augment_detail in augment_details:
                self.assertTrue(augment_detail['orig_token'] in self.text)
                self.assertEqual(augment_detail['orig_start_pos'], -1)
                self.assertGreater(augment_detail['new_start_pos'], -1)
                self.assertGreater(augment_detail['change_seq'], 0)
                self.assertIn(augment_detail['action'], Action.getall())

            self.assertNotEqual(self.text, augmented_text)
示例#9
0
    def setUpClass(cls):
        env_config_path = os.path.abspath(
            os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
        load_dotenv(env_config_path)
        # https://freewavesamples.com/yamaha-v50-rock-beat-120-bpm
        cls.sample_wav_file = os.environ.get(
            "DATA_DIR") + 'Yamaha-V50-Rock-Beat-120bpm.wav'
        cls.audio, cls.sampling_rate = librosa.load(cls.sample_wav_file)

        cls.textual_augs = [
            nac.RandomCharAug(),
            naw.ContextualWordEmbsAug(),
            nas.ContextualWordEmbsForSentenceAug()
        ]

        cls.audio_augs = [
            naa.CropAug(sampling_rate=cls.sampling_rate),
            naa.SpeedAug(),
        ]
示例#10
0
    def test_optimize(self):
        model_paths = ['gpt2', 'distilgpt2']
        # model_paths = ['xlnet-base-cased']

        for model_path in model_paths:
            aug = nas.ContextualWordEmbsForSentenceAug(model_path=model_path)

            enable_optimize = aug.model.get_default_optimize_config()
            enable_optimize['external_memory'] = 1024
            disable_optimize = aug.model.get_default_optimize_config()
            disable_optimize['external_memory'] = 0

            original_optimize = aug.model.optimize

            aug.model.optimize = enable_optimize
            augmented_data = aug.augment(self.text)
            self.assertNotEqual(self.text, augmented_data)

            aug.model.optimize = disable_optimize
            augmented_data = aug.augment(self.text)
            self.assertNotEqual(self.text, augmented_data)

            aug.model.optimize = original_optimize
示例#11
0
 def test_none_device(self):
     for model_path in self.model_paths:
         aug = nas.ContextualWordEmbsForSentenceAug(
             model_path=model_path, force_reload=True, device=None)
         self.assertTrue(aug.device == 'cuda' or aug.device == 'cpu')
示例#12
0
    def test_incorrect_model_name(self):
        with self.assertRaises(ValueError) as error:
            nas.ContextualWordEmbsForSentenceAug(model_path='unknown')

        self.assertTrue('Model name value is unexpected.' in str(error.exception))
示例#13
0
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.sentence as nas
import nlpaug.flow as nafc
import os

from nlpaug.util import Action

text = "Embarrassment is a common emotion. But as a bot I don't really feel it."

aug = nas.ContextualWordEmbsForSentenceAug(model_path='xlnet-base-cased')
augmented_texts = aug.augment(text, n=3)
print("Original:")
print(text)
print("Augmented Texts:")
print(augmented_texts)

aug = nas.ContextualWordEmbsForSentenceAug(model_path='gpt2')
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

aug = nas.ContextualWordEmbsForSentenceAug(model_path='distilgpt2')
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)