def audio_params_gen(audio_chunks, start):
     for chunk in audio_chunks[:-1]:
         yield config(len(chunk)), RecognitionAudio(content=chunk)
         time.sleep(chunk_size)
     start[0] = time.time()
     yield config(len(audio_chunks[-1])), RecognitionAudio(
         content=audio_chunks[-1])
示例#2
0
def transcribe_audio(audio_stream, model: str, language_code: str):
    """
    Transcribe the given audio chunks
    """
    global client

    encoding = RecognitionConfig.AudioEncoding.LINEAR16

    try:
        audio = RecognitionAudio(content=audio_stream)
        
        config = RecognitionConfig(
            sample_rate_hertz=SR,
            encoding=encoding,
            language_code=language_code,
            max_alternatives=10,
            model=model,
        )
        
        response = client.recognize(config, audio, uuid="", timeout=1000)
    except Exception as e:
        print(f"error: {str(e)}")
        return []

    return parse_response(response)
示例#3
0
def transcribe_audio(audio_stream,
                     model: str,
                     language_code: str,
                     sample_rate=8000,
                     max_alternatives=10,
                     raw: bool = False):
    """
    Transcribe the given audio chunks
    """
    global client

    try:
        audio = RecognitionAudio(content=audio_stream)

        config = RecognitionConfig(sample_rate_hertz=sample_rate,
                                   encoding=ENCODING,
                                   language_code=language_code,
                                   max_alternatives=max_alternatives,
                                   model=model,
                                   raw=raw,
                                   data_bytes=len(audio_stream))

        response = client.recognize(config,
                                    audio,
                                    uuid=str(random.randint(1000, 100000)),
                                    timeout=1000)
    except Exception as e:
        print(f"error: {str(e)}")
        return []

    return parse_response(response)
示例#4
0
def transcribe_chunks(audio_chunks,
                      model: str,
                      language_code: str,
                      raw: bool = False):
    """
    Transcribe the given audio chunks
    """
    global client

    response = {}
    encoding = RecognitionConfig.AudioEncoding.LINEAR16

    try:
        if raw:
            config = lambda chunk_len: RecognitionConfig(sample_rate_hertz=SR,
                                                         encoding=encoding,
                                                         language_code=
                                                         language_code,
                                                         max_alternatives=10,
                                                         model=model,
                                                         raw=True,
                                                         data_bytes=chunk_len)
            audio_params = [(config(len(chunk)),
                             RecognitionAudio(content=chunk))
                            for chunk in audio_chunks]
            response = client.streaming_recognize_raw(audio_params,
                                                      uuid="",
                                                      timeout=1000)
        else:
            audio = (RecognitionAudio(content=chunk) for chunk in audio_chunks)
            config = RecognitionConfig(
                sample_rate_hertz=SR,
                encoding=encoding,
                language_code=language_code,
                max_alternatives=10,
                model=model,
            )
            response = client.streaming_recognize(config,
                                                  audio,
                                                  uuid="",
                                                  timeout=1000)
    except Exception as e:
        traceback.print_exc()
        print(f'error: {str(e)}')
        return None

    return parse_response(response)
def transcribe_chunks_streaming(client,
                                audio_chunks,
                                model: str,
                                language_code: str,
                                sample_rate=8000,
                                max_alternatives=10,
                                raw: bool = False,
                                word_level: bool = False,
                                chunk_size: float = 0.5):
    """
    Transcribe the given audio chunks
    """

    response = {}

    try:
        if raw:
            config = lambda chunk_len: RecognitionConfig(
                sample_rate_hertz=sample_rate,
                encoding=ENCODING,
                language_code=language_code,
                max_alternatives=max_alternatives,
                model=model,
                raw=True,
                word_level=word_level,
                data_bytes=chunk_len)

            start = [None]

            def audio_params_gen(audio_chunks, start):
                for chunk in audio_chunks[:-1]:
                    yield config(len(chunk)), RecognitionAudio(content=chunk)
                    time.sleep(chunk_size)
                start[0] = time.time()
                yield config(len(audio_chunks[-1])), RecognitionAudio(
                    content=audio_chunks[-1])

            response = client.streaming_recognize_raw(
                audio_params_gen(audio_chunks, start),
                uuid=str(random.randint(1000, 100000)))
            end = time.time()
            print(f"{((end - start[0])*1000):.2f}ms")
        else:
            audio = (RecognitionAudio(content=chunk) for chunk in audio_chunks)
            config = RecognitionConfig(sample_rate_hertz=sample_rate,
                                       encoding=ENCODING,
                                       language_code=language_code,
                                       max_alternatives=max_alternatives,
                                       model=model,
                                       word_level=word_level)
            response = client.streaming_recognize(
                config, audio, uuid=str(random.randint(1000, 100000)))
    except Exception as e:
        traceback.print_exc()
        print(f'error: {str(e)}')

    pprint(parse_response(response))
示例#6
0
def transcribe_chunks_streaming(client,
                                audio_chunks,
                                model: str,
                                language_code: str,
                                sample_rate=8000,
                                max_alternatives=10,
                                raw: bool = False,
                                word_level: bool = False):
    """
    Transcribe the given audio chunks
    """

    response = {}

    try:
        if raw:
            config = lambda chunk_len: RecognitionConfig(
                sample_rate_hertz=sample_rate,
                encoding=ENCODING,
                language_code=language_code,
                max_alternatives=max_alternatives,
                model=model,
                raw=True,
                word_level=word_level,
                data_bytes=chunk_len)
            audio_params = [(config(len(chunk)),
                             RecognitionAudio(content=chunk))
                            for chunk in audio_chunks]
            response = client.streaming_recognize_raw(audio_params, uuid="")
        else:
            audio = (RecognitionAudio(content=chunk) for chunk in audio_chunks)
            config = RecognitionConfig(sample_rate_hertz=sample_rate,
                                       encoding=ENCODING,
                                       language_code=language_code,
                                       max_alternatives=max_alternatives,
                                       model=model,
                                       word_level=word_level)
            response = client.streaming_recognize(config, audio, uuid="")
    except Exception as e:
        traceback.print_exc()
        print(f'error: {str(e)}')

    pprint(parse_response(response))
示例#7
0
def ktranscribe():
    global KS_CLIENT
    audio_chunks = [(ffmpeg.input(FILE).output("-",
                                               format="wav",
                                               acodec="pcm_s16le",
                                               ac=1,
                                               ar="8k",
                                               t="30").overwrite_output().run(
                                                   capture_stdout=True,
                                                   quiet=True))[0]]
    audio = (RecognitionAudio(content=chunk) for chunk in audio_chunks)
    return KS_CLIENT.streaming_recognize(CONFIG, audio, uuid="")
示例#8
0
    def decode_audio(self, index: int):
        # NOTE: These are only assumptions for now so test failures might not
        #       necessarily mean error in model/server.
        config = RecognitionConfig(
            sample_rate_hertz=8000,
            encoding=RecognitionConfig.AudioEncoding.LINEAR16,
            language_code="hi",
            max_alternatives=10,
            model="general")

        audio = dreamer((RecognitionAudio(content=chunk)
                         for chunk in self.audios[index][0]), 1)
        self.results[index] = self.client.streaming_recognize(config,
                                                              audio,
                                                              uuid="")
示例#9
0
 def audio_chunks_gen(audio_chunks):
     for chunk in audio_chunks:
         yield RecognitionAudio(content=chunk)
示例#10
0
 def audio_params_gen(audio_chunks):
     for chunk in audio_chunks:
         yield config(len(chunk)), RecognitionAudio(content=chunk)