示例#1
0
    def transcribe_proc():
        """Transcribe live audio stream indefinitely."""
        while True:
            # Get result of transcription
            transcribe_result = transcriber.transcribe_stream(
                audio_stream(), sample_rate, sample_width, channels)

            _LOGGER.debug("Transcription result: %s", transcribe_result)

            transcribe_result = transcribe_result or Transcription.empty()
            transcribe_dict = dataclasses.asdict(transcribe_result)
            transcribe_dict["timeout"] = is_timeout

            print_json(transcribe_dict)
示例#2
0
def transcribe(args: argparse.Namespace):
    """Do speech to text on one more WAV files."""
    # Load transcriber
    args.model_dir = Path(args.model_dir)

    if args.graph_dir:
        args.graph_dir = Path(args.graph_dir)
    else:
        args.graph_dir = args.model_dir / "graph"

    transcriber = KaldiCommandLineTranscriber(
        args.model_type, args.model_dir, args.graph_dir
    )

    # Do transcription
    try:
        if args.wav_file:
            # Transcribe WAV files
            for wav_path in args.wav_file:
                _LOGGER.debug("Processing %s", wav_path)
                wav_bytes = open(wav_path, "rb").read()
                result = transcriber.transcribe_wav(wav_bytes)

                if not result:
                    result = Transcription.empty()

                print_json(result)
        else:
            # Read WAV data from stdin
            if os.isatty(sys.stdin.fileno()):
                print("Reading WAV data from stdin...", file=sys.stderr)

            # Stream in chunks
            with wave.open(sys.stdin.buffer, "rb") as wav_file:

                def audio_stream(wav_file, frames_in_chunk):
                    num_frames = wav_file.getnframes()
                    try:
                        while num_frames > frames_in_chunk:
                            yield wav_file.readframes(frames_in_chunk)
                            num_frames -= frames_in_chunk

                        if num_frames > 0:
                            # Last chunk
                            yield wav_file.readframes(num_frames)
                    except KeyboardInterrupt:
                        pass

                result = transcriber.transcribe_stream(
                    audio_stream(wav_file, args.frames_in_chunk),
                    wav_file.getframerate(),
                    wav_file.getsampwidth(),
                    wav_file.getnchannels(),
                )

                assert result
                print_json(result)
    except KeyboardInterrupt:
        pass
    finally:
        transcriber.stop()
示例#3
0
async def transcribe_wav(args: argparse.Namespace,
                         core: Voice2JsonCore) -> None:
    """Speech to text from WAV file(s)."""
    from rhasspyasr import Transcription

    # Make sure profile has been trained
    assert core.check_trained(), "Not trained"

    # Get speech to text transcriber for profile
    transcriber = core.get_transcriber(open_transcription=args.open,
                                       debug=args.debug)

    # Directory to report WAV file names relative to
    relative_dir = (None if args.relative_directory is None else Path(
        args.relative_directory))

    try:
        if args.wav_file or args.stdin_files:
            # Read WAV file paths
            wav_files = args.wav_file
            if args.stdin_files:
                _LOGGER.debug("Reading file paths from stdin")
                wav_files = itertools.chain(wav_files, sys.stdin)

            for wav_path_str in wav_files:
                wav_path_str = wav_path_str.strip()

                # Load and convert
                wav_path = Path(wav_path_str)
                _LOGGER.debug("Transcribing %s", wav_path)

                wav_data = await core.maybe_convert_wav(wav_path.read_bytes())

                # Transcribe
                transcription = (transcriber.transcribe_wav(wav_data)
                                 or Transcription.empty())
                result = dataclasses.asdict(transcription)

                if relative_dir is None:
                    # Add name of WAV file to result
                    result["wav_name"] = wav_path.name
                else:
                    # Make relative to some directory
                    result["wav_name"] = str(wav_path.absolute().relative_to(
                        relative_dir.absolute()))

                print_json(result)
        else:
            # Read WAV data from stdin
            _LOGGER.debug("Reading WAV data from stdin")

            if args.input_size:
                # Number of bytes is on separate line
                line = sys.stdin.buffer.readline().strip()
                if not line:
                    return

                num_bytes = int(line)
                while num_bytes > 0:
                    # Read in WAV
                    wav_data = sys.stdin.buffer.read(num_bytes)
                    while len(wav_data) < num_bytes:
                        wav_data = sys.stdin.buffer.read(num_bytes -
                                                         len(wav_data))

                    # Transcribe
                    wav_data = await core.maybe_convert_wav(wav_data)
                    transcription = (transcriber.transcribe_wav(wav_data)
                                     or Transcription.empty())
                    result = dataclasses.asdict(transcription)

                    print_json(result)

                    # Next WAV
                    line = sys.stdin.buffer.readline().strip()
                    if not line:
                        break

                    num_bytes = int(line)
            else:
                # Load and convert entire input
                wav_data = await core.maybe_convert_wav(
                    sys.stdin.buffer.read())

                # Transcribe
                transcription = (transcriber.transcribe_wav(wav_data)
                                 or Transcription.empty())
                result = dataclasses.asdict(transcription)

                print_json(result)
    finally:
        transcriber.stop()