Exemplos de TFWav2Vec2ForCTC em Python, exemplos de transformers.TFWav2Vec2ForCTC em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_modeling_tf_wav2vec2.py Projeto: ruyimarone/pytorch-pretrained-BERT

    def check_training(self, config, input_values, *args):
        model = TFWav2Vec2ForCTC(config)

        # freeze feature encoder
        model.freeze_feature_extractor()

        input_values = input_values[:3]

        input_lengths = tf.constant(
            [input_values.shape[-1] // i for i in [4, 2, 1]])
        max_length_labels = model.wav2vec2._get_feat_extract_output_lengths(
            input_lengths)
        labels = ids_tensor(
            (input_values.shape[0], max(max_length_labels) - 2),
            model.config.vocab_size)

        length_mask = tf.sequence_mask(input_lengths, dtype=tf.float32)

        input_values = input_values * length_mask

        pad_size = max(max_length_labels) - labels.shape[1]
        labels = tf.pad(labels, ((0, 0), (0, pad_size)), constant_values=-100)

        loss = model(input_values, labels=labels, training=True).loss

        self.parent.assertFalse(tf.math.is_inf(loss))

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_modeling_tf_wav2vec2.py Projeto: ruyimarone/pytorch-pretrained-BERT

    def test_inference_ctc_robust_batched(self):
        model = TFWav2Vec2ForCTC.from_pretrained(
            "facebook/wav2vec2-large-960h-lv60-self")
        processor = Wav2Vec2Processor.from_pretrained(
            "facebook/wav2vec2-large-960h-lv60-self", do_lower_case=True)

        input_speech = self._load_datasamples(4)

        inputs = processor(input_speech,
                           return_tensors="tf",
                           padding=True,
                           sampling_rate=16000)

        input_values = inputs.input_values
        attention_mask = inputs.attention_mask

        logits = model(input_values, attention_mask=attention_mask).logits

        predicted_ids = tf.argmax(logits, axis=-1)
        predicted_trans = processor.batch_decode(predicted_ids)

        EXPECTED_TRANSCRIPTIONS = [
            "a man said to the universe sir i exist",
            "sweat covered brion's body trickling into the tight loin cloth that was the only garment he wore",
            "the cut on his chest still dripping blood the ache of his overstrained eyes even the soaring arena around him with the thousands of spectators were trivialities not worth thinking about",
            "his instant panic was followed by a small sharp blow high on his chest",
        ]
        self.assertListEqual(predicted_trans, EXPECTED_TRANSCRIPTIONS)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_modeling_tf_wav2vec2.py Projeto: ruyimarone/pytorch-pretrained-BERT

    def check_ctc_loss(self, config, input_values, *args):
        model = TFWav2Vec2ForCTC(config)

        input_values = input_values[:3]
        attention_mask = tf.ones_like(input_values)

        input_lengths = tf.constant(
            [input_values.shape[-1] // i for i in [4, 2, 1]])
        max_length_labels = model.wav2vec2._get_feat_extract_output_lengths(
            input_lengths)
        labels = ids_tensor(
            (input_values.shape[0], min(max_length_labels) - 1),
            model.config.vocab_size)

        length_mask = tf.sequence_mask(input_lengths, dtype=tf.float32)

        # convert values that are over input_lengths to padding
        input_values = input_values * length_mask
        attention_mask = attention_mask * length_mask

        model.config.ctc_loss_reduction = "sum"
        sum_loss = model(input_values,
                         attention_mask=attention_mask,
                         labels=labels).loss

        model.config.ctc_loss_reduction = "mean"
        mean_loss = model(input_values,
                          attention_mask=attention_mask,
                          labels=labels).loss

        self.parent.assertTrue(
            abs(labels.shape[0] * mean_loss - sum_loss) < 1e-2)

Exemplo n.º 4

0

Exibir arquivo

 def check_labels_out_of_vocab(self, config, input_values, *args):
     model = TFWav2Vec2ForCTC(config)
     input_lengths = tf.constant([input_values.shape[-1] // i for i in [4, 2, 1]])
     max_length_labels = model.wav2vec2._get_feat_extract_output_lengths(input_lengths)
     labels = ids_tensor((input_values.shape[0], min(max_length_labels) - 1), model.config.vocab_size + 100)
     with pytest.raises(ValueError):
         model(input_values, labels=labels)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_modeling_tf_wav2vec2.py Projeto: zhangbo2008/transformers

    def test_inference_ctc_normal(self):
        model = TFWav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
        processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h", do_lower_case=True)
        input_speech = self._load_datasamples(1)

        input_values = processor(input_speech, return_tensors="tf", sampling_rate=16000).input_values

        logits = model(input_values).logits

        predicted_ids = tf.argmax(logits, axis=-1)
        predicted_trans = processor.batch_decode(predicted_ids)

        EXPECTED_TRANSCRIPTIONS = ["a man said to the universe sir i exist"]
        self.assertListEqual(predicted_trans, EXPECTED_TRANSCRIPTIONS)

Exemplo n.º 6

0

Exibir arquivo

    def test_inference_ctc_normal_batched(self):
        model = TFWav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
        processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h", do_lower_case=True)

        input_speech = self._load_datasamples(2)

        input_values = processor(input_speech, return_tensors="tf", padding=True, sampling_rate=16000).input_values

        logits = model(input_values).logits

        predicted_ids = tf.argmax(logits, axis=-1)
        predicted_trans = processor.batch_decode(predicted_ids)

        EXPECTED_TRANSCRIPTIONS = [
            "a man said to the universe sir i exist",
            "sweat covered brion's body trickling into the tight lowing cloth that was the only garment he wore",
        ]
        self.assertListEqual(predicted_trans, EXPECTED_TRANSCRIPTIONS)

Exemplo n.º 7

0

Exibir arquivo

    def test_wav2vec2_with_lm(self):
        downloaded_folder = snapshot_download(
            "patrickvonplaten/common_voice_es_sample")
        file_path = glob.glob(downloaded_folder + "/*")[0]
        sample = librosa.load(file_path, sr=16_000)[0]

        model = TFWav2Vec2ForCTC.from_pretrained(
            "patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm")
        processor = Wav2Vec2ProcessorWithLM.from_pretrained(
            "patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm")

        input_values = processor(sample, return_tensors="tf").input_values

        logits = model(input_values).logits

        transcription = processor.batch_decode(logits.numpy()).text

        self.assertEqual(transcription[0],
                         "el libro ha sido escrito por cervantes")

Exemplo n.º 8

0

Exibir arquivo

Arquivo: test_modeling_tf_wav2vec2.py Projeto: sashank06/transformers

    def test_wav2vec2_with_lm(self):
        ds = load_dataset("common_voice", "es", split="test", streaming=True)
        sample = next(iter(ds))

        resampled_audio = librosa.resample(sample["audio"]["array"], 48_000,
                                           16_000)

        model = TFWav2Vec2ForCTC.from_pretrained(
            "patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm")
        processor = Wav2Vec2ProcessorWithLM.from_pretrained(
            "patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm")

        input_values = processor(resampled_audio,
                                 return_tensors="tf").input_values

        logits = model(input_values).logits

        transcription = processor.batch_decode(logits.numpy()).text

        self.assertEqual(transcription[0],
                         "bien y qué regalo vas a abrir primero")