Пример #1
0
def main():
    def _abs_path(rel_path):
        return os.path.join(os.path.dirname(__file__), '../..', rel_path)

    parser = argparse.ArgumentParser()

    parser.add_argument('--library_path',
                        help="absolute path to Cheetah's dynamic library",
                        type=str,
                        default=_abs_path('lib/linux/x86_64/libpv_cheetah.so'))

    parser.add_argument('--acoustic_model_path',
                        help='absolute path to acoustic model parameter file',
                        type=str,
                        default=_abs_path('lib/common/acoustic_model.pv'))

    parser.add_argument('--language_model_path',
                        help='absolute path to language model parameter file',
                        type=str,
                        default=_abs_path('lib/common/language_model.pv'))

    parser.add_argument(
        '--license_path',
        help='absolute path to license file',
        type=str,
        default=_abs_path('resources/license/cheetah_eval_linux_public.lic'))

    parser.add_argument(
        '--audio_paths',
        help='comma-separated absolute paths to audio files to be transcribed',
        type=str,
        required=True)

    args = parser.parse_args()

    cheetah = Cheetah(library_path=args.library_path,
                      acoustic_model_path=args.acoustic_model_path,
                      language_model_path=args.language_model_path,
                      license_path=args.license_path)

    for audio_path in [
            os.path.expanduser(x.strip()) for x in args.audio_paths.split(',')
    ]:
        audio, sample_rate = soundfile.read(audio_path, dtype='int16')
        if sample_rate != cheetah.sample_rate:
            raise ValueError(
                'Cheetah can only process audio data with sample rate of %d' %
                cheetah.sample_rate)

        num_frames = len(audio) // cheetah.frame_length
        transcript = ''
        for i in range(num_frames):
            frame = audio[i * cheetah.frame_length:(i + 1) *
                          cheetah.frame_length]
            partial_transcript, _ = cheetah.process(frame)
            transcript += partial_transcript

        transcript += cheetah.flush()

        print(transcript)
Пример #2
0
    def test_transcribe(self):
        def abs_path(rel_path):
            return os.path.join(os.path.dirname(__file__), '../..', rel_path)

        cheetah = Cheetah(
            library_path=abs_path('/home/nithin/project1/libpv_cheetah.so'),
            acoustic_model_path=abs_path(
                '/home/nithin/project1/acoustic_model.pv'),
            language_model_path=abs_path(
                '/home/nithin/project1/language_model.pv'),
            license_path=abs_path(
                '/home/nithin/project1/cheetah_eval_linux_public.lic'))

        audio, sample_rate = soundfile.read(
            abs_path('/home/nithin/project1/audio_samples/test.wav'),
            dtype='int16')
        assert sample_rate == cheetah.sample_rate

        transcript = ''
        num_frames = len(audio) // cheetah.frame_length
        for i in range(num_frames):
            frame = audio[i * cheetah.frame_length:(i + 1) *
                          cheetah.frame_length]
            partial_transcript, _ = cheetah.process(frame)
            transcript += partial_transcript

        final_transcript = cheetah.flush()
        transcript += final_transcript
        self.assertEqual(
            transcript,
            "MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL"
        )
Пример #3
0
    def run(self):
        cheetah = None
        pa = None
        audio_stream = None
        try:
            cheetah = Cheetah(
                library_path=self._library_path,
                acoustic_model_path=self._acoustic_model_path,
                language_model_path=self._language_model_path,
                license_path=self._license_path,
                endpoint_duration_sec=1)

            pa = pyaudio.PyAudio()
            audio_stream = pa.open(
                rate=cheetah.sample_rate,
                channels=1,
                format=pyaudio.paInt16,
                input=True,
                frames_per_buffer=cheetah.frame_length,
                input_device_index=self._input_device_index)

            while True:
                pcm = audio_stream.read(cheetah.frame_length)
                pcm = struct.unpack_from("h" * cheetah.frame_length, pcm)

                if self._output_path is not None:
                    self._recorded_frames.append(pcm)

                partial_transcript, is_endpoint = cheetah.process(pcm)
                print(partial_transcript, end='', flush=True)
                if is_endpoint:
                    print(cheetah.flush())
        except Exception as e:
            print(e)
        finally:
            if cheetah is not None:
                print(cheetah.flush())
                cheetah.delete()

            if audio_stream is not None:
                audio_stream.close()

            if pa is not None:
                pa.terminate()

            if self._output_path is not None and len(self._recorded_frames) > 0:
                recorded_audio = np.concatenate(self._recorded_frames, axis=0).astype(np.int16)
                soundfile.write(self._output_path, recorded_audio, samplerate=cheetah.sample_rate, subtype='PCM_16')
Пример #4
0
    parser.add_argument(
        '--audio_paths',
        help='comma-separated absolute paths to audio files to be transcribed',
        type=str,
        required=True)

    args = parser.parse_args()

    cheetah = Cheetah(library_path=args.library_path,
                      acoustic_model_file_path=args.acoustic_model_file_path,
                      language_model_file_path=args.language_model_file_path,
                      license_file_path=args.license_file_path)

    for audio_path in [
            os.path.expanduser(x.strip()) for x in args.audio_paths.split(',')
    ]:
        audio, sample_rate = soundfile.read(audio_path, dtype='int16')
        if sample_rate != cheetah.sample_rate:
            raise ValueError(
                'Cheetah can only process audio data with sample rate of %d' %
                cheetah.sample_rate)

        num_frames = len(audio) // cheetah.frame_length
        for i in range(num_frames):
            frame = audio[i * cheetah.frame_length:(i + 1) *
                          cheetah.frame_length]
            cheetah.process(frame)

        print(cheetah.transcribe())
Пример #5
0
        type=str,
        required=True)

    args = parser.parse_args()

    cheetah = Cheetah(library_path=args.library_path,
                      acoustic_model_path=args.acoustic_model_path,
                      language_model_path=args.language_model_path,
                      license_path=args.license_path)

    for audio_path in [
            os.path.expanduser(x.strip()) for x in args.audio_paths.split(',')
    ]:
        audio, sample_rate = soundfile.read(audio_path, dtype='int16')
        if sample_rate != cheetah.sample_rate:
            raise ValueError(
                'Cheetah can only process audio data with sample rate of %d' %
                cheetah.sample_rate)

        num_frames = len(audio) // cheetah.frame_length
        transcript = ''
        for i in range(num_frames):
            frame = audio[i * cheetah.frame_length:(i + 1) *
                          cheetah.frame_length]
            partial_transcript, _ = cheetah.process(frame)
            transcript += partial_transcript

        transcript += cheetah.flush()

        print(transcript)