Exemplo n.º 1
0
def main():
    args = StreamingRecognitionParser().parse_args()

    with audio_open_read(args.audio_file, args.encoding, args.rate, args.num_channels, args.chunk_size,
                         args.pyaudio_max_seconds) as reader:
        stub = stt_pb2_grpc.SpeechToTextStub(make_channel(args))
        metadata = authorization_metadata(args.api_key, args.secret_key, "tinkoff.cloud.stt")
        responses = stub.StreamingRecognize(stt_generate_requests(args, reader), metadata=metadata)
        print_streaming_recognition_responses(responses)
Exemplo n.º 2
0
def speech2text(binary_file):
    stub = stt_pb2_grpc.SpeechToTextStub(
        grpc.secure_channel(endpoint, grpc.ssl_channel_credentials()))
    metadata = authorization_metadata(api_key, secret_key, "tinkoff.cloud.stt")
    response = stub.Recognize(build_request(binary_file), metadata=metadata)
    print_recognition_response(response)
    output = ''
    for result in response.results:
        for alternative in result.alternatives:
            output += str(alternative.transcript)
    return output
Exemplo n.º 3
0
def main():
    args = BaseRecognitionParser().parse_args()
    if args.encoding == stt_pb2.RAW_OPUS:
        raise ValueError("RAW_OPUS encoding is not supported by this script")
    with audio_open_read(args.audio_file, args.encoding, args.rate,
                         args.num_channels, args.chunk_size,
                         args.pyaudio_max_seconds) as reader:
        stub = stt_pb2_grpc.SpeechToTextStub(make_channel(args))
        metadata = authorization_metadata(args.api_key, args.secret_key,
                                          "tinkoff.cloud.stt")
        response = stub.Recognize(build_recognition_request(args, reader),
                                  metadata=metadata)
        print_recognition_response(response)
Exemplo n.º 4
0
 def __init__(self, recognize):
     self._recognize = recognize
     r = stt_pb2.StreamingRecognizeRequest()
     r.streaming_config.config.encoding = stt_pb2.AudioEncoding.LINEAR16
     r.streaming_config.config.sample_rate_hertz = 16000
     r.streaming_config.config.num_channels = 1
     r.streaming_config.config.enable_denormalization = True
     r.streaming_config.config.enable_automatic_punctuation = True
     r.streaming_config.config.vad_config.silence_duration_threshold = 1.20
     r.streaming_config.single_utterance = True
     r.streaming_config.config.speech_contexts.append(
         stt_pb2.SpeechContext(phrases=[
             stt_pb2.SpeechContextPhrase(text=text, score=10.0)
             for text in self._recognize.context_words
         ]))
     metadata = authorization_metadata(API_KEY, SECRET_KEY,
                                       "tinkoff.cloud.stt")
     stub = stt_pb2_grpc.SpeechToTextStub(
         grpc.secure_channel(ENDPOINT, grpc.ssl_channel_credentials()))
     self._responses = stub.StreamingRecognize(self.requests(r),
                                               metadata=metadata)
Exemplo n.º 5
0
def main():
    agi = AGI()
    #agi = None
    if agi != None:
        agi.verbose("EAGI script started...")
        ani = agi.env['agi_callerid']
        uid = agi.env['agi_uniqueid']
        agi.verbose("Call answered from: %s with id %s" % (ani, uid))
    else:
        ani = ""
        uid = str(uuid.uuid4())

    try:
        with dbcon.cursor() as cursor:
            sql = "INSERT INTO calls SET uniqueid=%s,callerid=%s,calldate=NOW()"
            cursor.execute(sql, (uid, ani))
            call_id = cursor.lastrowid
    finally:
        dbcon.commit()

    data = {
        "type": "call",
        "unqueid": uid,
        "callerid": ani[-4:],
        "calldate": time.strftime('%Y-%m-%d %H:%M:%S'),
        "call_id": call_id
    }
    client.publish(cent_channel, data)
    if agi == None:
        ic(data)

    args = StreamingRecognitionParser().parse_args()

    stub = stt_pb2_grpc.SpeechToTextStub(make_channel(args))
    metadata = authorization_metadata(cfg.api_key, cfg.secret_key,
                                      "tinkoff.cloud.stt")
    responses = stub.StreamingRecognize(generate_requests(args, agi),
                                        metadata=metadata)
    save_streaming_recognition_responses(responses, agi, ani, uid, call_id)
Exemplo n.º 6
0
def main():
    args = BaseRecognitionParser().parse_args()
    total = ''
    if args.encoding == stt_pb2.RAW_OPUS:
        raise ValueError("RAW_OPUS encoding is not supported by this script")
    with audio_open_read(args.audio_file, args.encoding, args.rate,
                         args.num_channels, args.chunk_size,
                         args.pyaudio_max_seconds) as reader:
        stub = stt_pb2_grpc.SpeechToTextStub(make_channel(args))
        metadata = authorization_metadata(args.api_key, args.secret_key,
                                          "tinkoff.cloud.stt")
        response = stub.Recognize(build_recognition_request(args, reader),
                                  metadata=metadata)

        if not isinstance(response, dict):
            # https://developers.google.com/protocol-buffers/docs/proto3#json
            response = MessageToDict(response,
                                     including_default_value_fields=True,
                                     preserving_proto_field_name=True)
        for result in response["results"]:
            for alternative in result["alternatives"]:
                total = total + alternative["transcript"]
    print(total)
Exemplo n.º 7
0
def speach2text(url, tmp_dir='./tmp'):
    ts = int(time.time() * 10**6)
    wav_tmp = os.path.join(tmp_dir, '{}.wav'.format(ts))
    oga_tmp = os.path.join(tmp_dir, '{}.oga'.format(ts))

    r = urlopen(url)

    with open(oga_tmp, 'wb') as f:
        f.write(r.read())

    stream = ffmpeg.input(oga_tmp)
    stream = ffmpeg.output(stream, wav_tmp)
    ffmpeg.run(stream, overwrite_output=True)

    stub = stt_pb2_grpc.SpeechToTextStub(
        grpc.secure_channel(endpoint, grpc.ssl_channel_credentials()))
    metadata = authorization_metadata(api_key, secret_key, "tinkoff.cloud.stt")
    responses = stub.StreamingRecognize(generate_requests(wav_tmp),
                                        metadata=metadata)
    text = next(
        responses).results[0].recognition_result.alternatives[0].transcript
    os.remove(wav_tmp)
    os.remove(oga_tmp)
    return text
Exemplo n.º 8
0
        with wave.open("../../audio/sample_5.wav") as f:
            yield build_first_request(f.getframerate(), f.getnchannels())
            frame_samples = f.getframerate() // 10  # Send 100ms at a time
            for data in iter(lambda: f.readframes(frame_samples), b''):
                request = stt_pb2.StreamingRecognizeRequest()
                request.audio_content = data
                yield request
    except Exception as e:
        print("Got exception in generate_requests", e)
        raise


def print_streaming_recognition_responses(responses):
    for response in responses:
        for result in response.results:
            print("Channel", result.recognition_result.channel)
            print("Phrase start:",
                  result.recognition_result.start_time.ToTimedelta())
            print("Phrase end:  ",
                  result.recognition_result.end_time.ToTimedelta())
            for alternative in result.recognition_result.alternatives:
                print('"' + alternative.transcript + '"')
            print("------------------")


stub = stt_pb2_grpc.SpeechToTextStub(
    grpc.secure_channel(endpoint, grpc.ssl_channel_credentials()))
metadata = authorization_metadata(api_key, secret_key, "tinkoff.cloud.stt")
responses = stub.StreamingRecognize(generate_requests(), metadata=metadata)
print_streaming_recognition_responses(responses)