Exemplo n.º 1
0
class TranscribeRunner(RunnerBase):

    def __init__(self):
        self.blob_manager = BlobManager()
        self.googleTranscriber = GoogleTranscriber()
        self.azure_table = Database()

    def __str__(self):
        return "TranscribeRunner"

    def call(self):
        azure_blob, partition_key = \
            self.azure_table.retrieve_next_record_for_transcribing()

        with TmpFileCleanup() as tmp_file_store:
            filename = "{0}.{1}".format(uuid.uuid4(), "wav")
            local_filename = local_tmp_dir + "/" + filename
            tmp_file_store.tmp_files.append(local_filename)
            self.blob_manager.download_wav_from_blob_and_save_to_local_file(
                azure_blob,
                local_filename,
            )
            transcript, transcription_status = \
                self.googleTranscriber.transcribe_audio_file_path(
                    local_filename,
            )
            if transcript:
                print("Transcript for {partition_key}: {transcript}"
                      .format(**locals()))
            self.azure_table.update_transcript(
                partition_key,
                transcript,
                transcription_status,
            )
Exemplo n.º 2
0
class TranscribeRunner(RunnerBase):

    def __init__(self):
        self.blob_manager = BlobManager()
        self.googleTranscriber = GoogleTranscriber()
        self.azure_table = Database()
        self.consec_error_count = 0

    def __str__(self):
        return "TranscribeRunner"

    def call(self):
        if self.consec_error_count > self.MAX_ALLOWABLE_ERRORS:
            raise TooManyErrorsException

        azure_blob, partition_key = \
            self.azure_table.retrieve_next_record_for_transcribing()

        with TmpFileCleanup() as tmp_file_store:
            filename = "{0}.{1}".format(uuid.uuid4(), "wav")
            local_filename = local_tmp_dir + "/" + filename
            tmp_file_store.tmp_files.append(local_filename)
            self.blob_manager.download_wav_from_blob_and_save_to_local_file(
                azure_blob,
                local_filename,
            )
            local_trim_filename = local_tmp_dir + "/trim_" + filename
            subprocess.call(
                ["sox", local_filename, local_trim_filename, "trim", "0", "59"]
            )
            transcript, transcription_status = \
                self.googleTranscriber.transcribe_audio_file_path(
                    local_trim_filename,
            )
        self.azure_table.update_transcript(
            partition_key,
            transcript,
            transcription_status,
        )
        if transcription_status != TranscriptionStatus.success:
            self.consec_error_count += 1
            raise TranscriptionError("Transcription failed, status: " +
                transcription_status)
        else:
            self.consec_error_count = 0
            print("Transcript for {partition_key}: {transcript}"
                .format(**locals()))