예제 #1
0
def translate_file(filename="last5.wav"):
    SetLogLevel(-1)

    if not os.path.exists("model"):
        print(
            "Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder."
        )
        exit(1)
    filepath = "./" + filename
    wf = wave.open(filepath, "rb")
    if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
    ) != "NONE":
        print("Audio file must be WAV format mono PCM.")
        exit(1)

    model = Model("./model")
    rec = KaldiRecognizer(model, 16000)
    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            res = rec.FinalResult()
            #print(rec.FinalResult())
        #else:
        #print(rec.PartialResult())
    try:  #for some reason res doesnt get assigned post loop
        results = res
        #print("results: " +results)
    except UnboundLocalError:
        results = rec.FinalResult(
        )  #rec.FinalResult() holds the words in this case
    results_json = json.loads(results)
    #print(results_json["text"])
    return (results_json["text"])  #["results"] for confidence of each word
예제 #2
0
def speech_to_text(args):
    if not os.path.exists(os.path.join('models', args.model)):
        print(
            "Please download the model from https://alphacephei.com/vosk/models and unpack to 'models' folder.")
        exit(1)

    for filepath in glob.iglob(os.path.join(os.getcwd(), args.data, '*.wav')):
        print(filepath)

        wf = wave.open(args.data, "rb")
        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
            print("Audio file must be WAV format mono PCM.")
            exit(1)

        model = Model(args.model)
        rec = KaldiRecognizer(model, wf.getframerate())

        while True:
            data = wf.readframes(4000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                print(rec.Result())
            else:
                print(rec.PartialResult())

        print(rec.FinalResult())

        hypothesis_path = os.path.join(args.hypothesis, filepath.split('.')[0] + '.txt')
        with open(hypothesis_path, 'w') as hypothesis:
            hypothesis.write(rec.FinalResult())
예제 #3
0
def wav2str(filename, sample_rate=16000, foldername="voskmodel"):
    # this is the name of the model folder
    model = Model(foldername)
    rec = KaldiRecognizer(model, sample_rate)

    wf = wave.open(filename, "rb")
    if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
    ) != "NONE":
        print("Audio file must be WAV format mono PCM.")
        exit(1)

    results = []
    subs = []
    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            results.append(rec.Result())
    results.append(rec.FinalResult())

    Strings = []
    for i, res in enumerate(results):
        jres = json.loads(res)
        if not 'result' in jres:
            continue
        words = jres['result']
        for j in range(len(words)):
            Strings.append(words[j]['word'])
    return Strings
예제 #4
0
def upload_voice_input(request):
    if request.method == "POST":
        myFile = request.FILES.get("myfile", None)
        if not myFile:
            print("no files for upload!")
            return HttpResponse("no files for upload!")
        destination = open(os.path.join("media/voice", myFile.name), 'wb+')
        for chunk in myFile.chunks():
            destination.write(chunk)
        destination.close()

        rec = KaldiRecognizer(vosk_model, 16000)
        wf = wave.open(BASE_DIR + '/media/voice/voicehome.wav', "rb")

        while True:
            data = wf.readframes(4000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                rec.Result()

        data = json.loads(rec.FinalResult())
        voicetext = data['text']
        print(voicetext)

        selectitem = dragon_cf['voicerec'][voice_section]
        item_value_array = selectitem.split(',')
        if voicetext not in item_value_array:
            newvalue = selectitem + ',' + voicetext
            dragon_cf.set('voicerec', voice_section, newvalue)
            dragon_cf.write(open(voice_rec_config, 'w'))
            return HttpResponse("voice added added added")

        return HttpResponse("voice already exist")
예제 #5
0
async def processVoice(waveChunk, recognizer: KaldiRecognizer):
    """ Recognize audio chunk and process with terminal.onText() """
    signature = None
    text = ''
    final = False
    try:
        final = recognizer.AcceptWaveform(waveChunk)

        if final:  # Фраза распознана полностью
            j = json.loads(recognizer.FinalResult())
            # Получить распознанный текст
            text = str(j['text']).strip() if 'text' in j else ''
        else:
            # Получить распознанный текст
            j = json.loads(recognizer.PartialResult())
            text = str(j['partial']).strip() if 'partial' in j else ''

        # Попытаться извлечь сигнатуру голоса:
        signature = j["spk"] if 'spk' in j else []
    except KeyboardInterrupt as e:
        onCtrlC()
        raise e
    except Exception as e:
        logError(f'Exception processing phrase chunk : {e}')
    return (final, text, signature)
예제 #6
0
    def recognize(self):
        if not os.path.exists("Speech_Recognition/model"):
            print(
                "Please create speech model as 'model' in the current folder.")
            exit(1)
        sound = AudioSegment.from_wav(self.file_folder)
        sound = sound.set_channels(1)
        sound.export("path.wav", format="wav")
        wf = wave.open('path.wav', "rb")
        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
        ) != "NONE":
            print("Audio file must be WAV format mono PCM.")
            exit(1)

        model = Model("Speech_Recognition/model")
        rec = KaldiRecognizer(model, wf.getframerate())
        result = ''

        while True:
            data = wf.readframes(4000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                x = json.loads(rec.Result())
                result += x['text'] + ' '
            else:
                pass
        result += json.loads(rec.FinalResult())['text']

        return result
예제 #7
0
def translate_file(filename="last5.wav"):
    SetLogLevel(0)

    if not os.path.exists("model"):
        print(
            "Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder."
        )
        exit(1)
    filepath = "./" + filename
    wf = wave.open(filepath, "rb")
    if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
    ) != "NONE":
        print("Audio file must be WAV format mono PCM.")
        exit(1)

    model = Model("./model")
    rec = KaldiRecognizer(model, wf.getframerate())

    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            print(rec.Result())
        else:
            print(rec.PartialResult())
    results = rec.FinalResult()
    return json.loads(results)[
        "text"]  #["results"] for confidence of each word
예제 #8
0
    def speechtotext(string):
        if not os.path.exists("model-en"):
            print ("Please download the model from https://github.com/alphacep/kaldi-android-demo/releases and unpack as 'model-en' in the current folder.")
            exit (1)

        wf = wave.open(string, "rb")

        # print(wf)
        if wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
            print ("Audio file must be WAV format mono PCM.")
            exit (1)

        model = Model("model-en")
        rec = KaldiRecognizer(model, wf.getframerate())
        text=""
        while True:
            data = wf.readframes(100000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                res = json.loads(rec.Result())
                text = text + res[ 'text']
            # else:
                # res = json.loads(rec.PartialResult()
        res = json.loads(rec.FinalResult())
        text = text +res[ 'text' ]
        return text
예제 #9
0
def ShiBie_ZiRanYuYan():
    wf = wave.open('yuyin.wav', "rb")
    model = Model("model")
    rec = KaldiRecognizer(model, wf.getframerate())
    wenben = ""
    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            res = json.loads(rec.Result())
            wenben = res['text']
            #print("识别结果:",wenben)
        else:
            pass
            #if '"text" : "' in rec.PartialResult():
            #wenben = rec.PartialResult()
            #print("部分识别结果:",rec.PartialResult())

    if wenben == "":
        res = json.loads(rec.FinalResult())
        wenben = res['text']


#    n = wenben.find('"text" : "')
#    wenben = wenben[n+10:].strip('}""')
    del_zf = ' "\n'
    for c in wenben:
        if c in del_zf:
            wenben = wenben.replace(c, '')

    return wenben
예제 #10
0
def ShiBie_ZiFu():
    if PanDuan == "":
        ZiFuJi = "继 续 检 搜 索 全 部 无 损 听 歌 播 放 音 乐 停 止 诗 词 单 曲 专 辑 循 环 顺 序 随 相 声 评 书 讲 坛 朗 读 关 机 复 制 上 下 一 个 从 头 添 加 收 藏 中 文 日 语 英 更 新 升 级 清 空 谁 多 少 什 么 唱 名 叫 他 的"
    else:
        ZiFuJi = "对 是 嗯 没 错"
    wenben = ""
    model = Model("model")
    rec = KaldiRecognizer(model, 16000, ZiFuJi)
    WaveWenJian = open("yuyin.wav", "rb")
    WaveWenJian.read(44)
    while True:
        data = WaveWenJian.read(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            res = json.loads(rec.Result())
            wenben = res['text']
            print("识别结果是: " + res['text'])

    res = json.loads(rec.FinalResult())
    if wenben == "":
        wenben = res['text']
    print("最终结果是: " + wenben)

    return wenben
def recognition():
    if not os.path.exists("model"):
        print(
            "Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder."
        )
        exit(1)

    wf = wave.open(sys.argv[1], "rb")
    if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
    ) != "NONE":
        print("Audio file must be WAV format mono PCM.")
        exit(1)

    model = Model("model")

    # You can also specify the possible word or phrase list as JSON list, the order doesn't have to be strict
    rec = KaldiRecognizer(
        model, wf.getframerate(),
        '["oh one two three four five six seven eight nine zero", "[unk]"]')

    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            print(rec.Result())
        else:
            print(rec.PartialResult())

    print(rec.FinalResult())
예제 #12
0
def recognizer_process(queue_audio, queue_text):
    """
    as result: place into queue_text <- (text, True|False)  where:
        text - a str with recognizer result, to json.loads()
    """
    print('Worker started')
    rec = KaldiRecognizer(model, 8000)
    last_received = datetime.datetime.now()
    partial = True
    while True:
        queue_bytes = b''
        while not queue_audio.empty():
            last_received = datetime.datetime.now()
            queue_bytes += queue_audio.get()
        if rec.AcceptWaveform(queue_bytes):
            res = rec.Result()
            partial = False
            queue_text.put(res)

        if datetime.datetime.now() - datetime.timedelta(
                seconds=60) > last_received:
            if partial:
                queue_text.put(rec.FinalResult())
            print(f'Worker stopped ')
            time.sleep(1)
            return
        time.sleep(1)
예제 #13
0
파일: class_f.py 프로젝트: hewimetall/105
    def _get_data_in_audio(self,audio_wav_path: str):
        """
        :param audio_wav_path:
                    -path to wav
        :return:
        """

        wf = wave.open(audio_wav_path, "rb")
        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
            # check file not bead
            return
        # список для объединения результатов
        result = list()
        # wf.getframerate()->Возвращает частоту дискретизации.
        rec = KaldiRecognizer(self.model, wf.getframerate())
        while True:
            data = wf.readframes(1000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                # get result in JSON
                data = rec.Result()
                jsonData = json.loads(data)
                result.append(jsonData['text'])
        jsonData = json.loads(rec.FinalResult())
        # data is void
        if 'result' in jsonData:
            result.append(jsonData.get('text'))
        wf.close()
        self.raw_data = result
        return result
예제 #14
0
파일: server.py 프로젝트: wncbb/stt_vosk
    def Recognize(self, request, context):
        recognizer = KaldiRecognizer(self.model, vosk_sample_rate)

        recognizer.AcceptWaveform(self.mp3ToWav(request.audio_content))
        finalResult = recognizer.FinalResult()
        print(finalResult)
        return self.get_response(finalResult)
예제 #15
0
 def process_file(self, file_name):
     """
     Run the Vosk model on the input file
     :param file_name: Input wav or mp3 file
     :return: List of dictionaries containing: confidence, start time, end time and the predicted word
     """
     logger.info(f'Recognising speech for {file_name}')
     wf = wave.open(file_name, "rb")
     # Check to see if the audio file can be read by the Vosk model
     if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
     ) != "NONE":
         raise Exception(f'Invalid file format for {file_name}')
     rec = KaldiRecognizer(self.model, wf.getframerate())
     results = []
     while True:
         data = wf.readframes(config.frame_to_read)
         # If the data we have read is empty then we are at the end of the file
         if len(data) == 0:
             break
         if rec.AcceptWaveform(data):
             result = json.loads(rec.Result())
             # Result can contain an empty text string but no result list
             if len(result['text']) > 0:
                 # If we reach here we have accepted the translation of a section of text
                 results.extend(result['result'])
     result = json.loads(rec.FinalResult())
     # Add to results list
     if len(result['text']) > 0:
         results.extend(result['result'])
     logger.info(f'Processed speech, captured {len(results)} results')
     return results
예제 #16
0
class WakeWordDetector:
    """唤醒词检测器,对 `vosk-api <https://github.com/alphacep/vosk-api>`_ 的简单封装,默认的唤醒词是 `'阿Q'` 和 `'R-Cute'`。

    如果要自定义唤醒词,请参考 https://github.com/alphacep/vosk-api/blob/master/python/example/test_words.py
    """
    def __init__(
        self,
        sr=16000,
        lang='en',
        grammar='[ "a b c d e f g h i j k l m n o p q r s t u v w x y z key cute", "[unk]" ]'
    ):
        self.load(lang)
        self._det = KaldiRecognizer(util.cache[f'vosk.{lang}'], sr, grammar)

    def _detected(self, text):
        if text == 'r q':
            return '阿Q'
        elif text == 'r cute':
            return 'R-Cute'

    def load(self, lang='en'):
        """load language model in advance"""
        model = util.cache.get(f'vosk.{lang}',
                               Model(util.data_file(f'vosk/{lang}')))
        util.cache[f'vosk.{lang}'] = model

    def detect(self, source, timeout=None):
        """开始检测

        :param source: 声音来源
        :param timeout: 超时,即检测的最长时间(秒),默认为 `None` ,表示不设置超时,知道检测到唤醒词才返回
        :type timeout: float, optional
        :return: 检测到的唤醒词模型对应的唤醒词,若超时没检测到唤醒词则返回 `None`
        :rtype: str
        """
        self._cancel = False  # possible race condition?
        if timeout:
            count = 0.0
        self._det.FinalResult()  # clear buffer
        while True:
            segment = source.read()
            if self._det.AcceptWaveform(segment.raw_data):
                p = self._detected(json.loads(self._det.Result())['text'])
            else:
                p = self._detected(
                    json.loads(self._det.PartialResult())['partial'])
            if p:
                return p
            if self._cancel:
                return
                # raise RuntimeError('Hotword detection cancelled by another thread')
            elif timeout:
                count += segment.duration_seconds
                if count > timeout:
                    return  # self._detected(self._det.FinalResult()['text'])

    def cancel(self):
        """停止检测"""
        self._cancel = True
예제 #17
0
파일: app.py 프로젝트: Aculeasis/vosk-rest
def stt(fp, buffer_size=8192) -> str:
    kaldi = KaldiRecognizer(kaldi_model, 16000)
    buf = bytearray(buffer_size)
    im_ok = False
    while fp.readinto(buf):
        kaldi.AcceptWaveform(buf)
        im_ok = True
    return json.loads(kaldi.FinalResult())['text'] if im_ok else ''
예제 #18
0
def speech(request):
    result = {
        'имя': None,
        'фамилия': None,
        'отчество': None,
    }
    questions = {
        'первый': None,
    }

    answer = None

    text = None

    if request.method == "POST":
        form = NameForm(request.POST, files=request.FILES)
        if form.is_valid():
            wf = wave.open(form.cleaned_data['file'], mode="rb")

            rec = KaldiRecognizer(model, wf.getframerate())

            while True:
                data = wf.readframes(5000)
                if len(data) == 0:
                    break
                rec.AcceptWaveform(data)

            text = json.loads(rec.FinalResult())['text']
            res_list = text.split()

            for word in result.keys():
                if word in res_list:
                    result[word] = res_list[res_list.index(word) + 1]
            for word in questions.keys():
                if word in res_list:
                    questions[word] = res_list[res_list.index(word) + 2]

            if questions['первый'] == 'да':
                answer = 2
            if questions['первый'] == 'нет':
                answer = 1
            form = NameForm({
                'last_name': result['фамилия'],
                'first_name': result['имя'],
                'middle_name': result['отчество'],
                'choice': answer
            })

    else:
        form = NameForm()

    return render(request,
                  'speech/speech.html',
                  context={
                      'text': text,
                      'form': form
                  })
예제 #19
0
 def next_sentence(self, process):
     reconizer = KaldiRecognizer(self.vosk_model, self.sample_rate)
     while True:
         data = process.stdout.read(8000)
         if len(data) == 0:
             break
         if reconizer.AcceptWaveform(data):
             yield self.format_result(reconizer.Result())
     yield self.format_result(reconizer.FinalResult(), final=True)
예제 #20
0
def main():

    argv = sys.argv[1:]
    model_path = "./model"
    filename = ""

    try:

        opts, _ = getopt.getopt(argv, "f:m:", ["file_name =", "model_path ="])

        #print(opts)
        #print(args)

    except:
        print("Error with arguments")
        return

    for opt, arg in opts:
        if opt in ['-f', '--file_name']:
            filename = arg
        elif opt in ['-m', '--model_path']:
            model_path = arg

    print("FILE: ", filename, " MODEL: ", model_path)

    if not os.path.exists(model_path):
        print(
            "Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder."
        )
        return

    SetLogLevel(-1)
    sample_rate = 16000
    model = Model(model_path)
    rec = KaldiRecognizer(model, sample_rate)

    process = subprocess.Popen([
        'ffmpeg', '-loglevel', 'quiet', '-i', filename, '-ar',
        str(sample_rate), '-ac', '1', '-f', 's16le', '-'
    ],
                               stdout=subprocess.PIPE)

    result = ""
    while True:
        data = process.stdout.read(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            data = json.loads(rec.Result())
            result += data['text']

    #print(result)
    data = json.loads(rec.FinalResult())
    result += data['text']
    print("\n")
    print(result)
예제 #21
0
 def StreamingRecognize(self, request_iterator, context):
     request = next(request_iterator)
     partial = request.config.specification.partial_results
     recognizer = KaldiRecognizer(self.model, request.config.specification.sample_rate_hertz)
     for request in request_iterator:
         res = recognizer.AcceptWaveform(request.audio_content)
         if res:
             yield self.get_response(recognizer.Result())
         elif partial:
             yield self.get_response(recognizer.PartialResult())
     yield self.get_response(recognizer.FinalResult())
예제 #22
0
def reconize(model_path, process):
    vosk_model = Model(model_path)
    reconizer = KaldiRecognizer(vosk_model, sample_rate)
    reconizer.SetWords(True)

    while True:
        data = process.stdout.read(8000)
        if len(data) == 0:
            break
        if reconizer.AcceptWaveform(data):
            yield format_result(reconizer.Result())

    yield format_result(reconizer.FinalResult())
예제 #23
0
def creat_text_gpu(path):
    wf = wave.open(path.replace('.wav', '_mono.wav'), "rb")
    rec = KaldiRecognizer(model, wf.getframerate())

    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            pass
        else:
            rec.PartialResult()

    write_file(parse_json(rec.FinalResult()), path.split('/')[-1].replace('.wav', ''))
예제 #24
0
    def post(self):
        # global conversation
        voice_data = self.get_argument('voice')
        tmpfile = utils.write_temp_file(base64.b64decode(voice_data), '.mp3',
                                        '/home/asrdatabases')
        fname, _ = os.path.splitext(tmpfile)
        nfile = fname + '-16k.wav'
        # downsampling
        soxCall = 'sox ' + tmpfile + \
                    ' ' + nfile + ' rate 16k'
        subprocess.call([soxCall], shell=True, close_fds=True)
        utils.check_and_delete(tmpfile)
        wf = wave.open(nfile, "rb")

        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getframerate(
        ) != 16000:
            # print ("Audio file must be WAV format mono PCM.")
            # exit (1)
            res = {
                "code": 1,
                "err_msg": "Audio file must be WAV format mono PCM."
            }
            self.write(json.dumps(res))
        else:

            model = Model("model")
            rec = KaldiRecognizer(model, wf.getframerate())

            while True:
                data = wf.readframes(4000)
                if len(data) == 0:
                    break
                if rec.AcceptWaveform(data):
                    # print(rec.Result())
                    pass
                else:
                    # print(rec.PartialResult())
                    pass
            res_json = rec.FinalResult()
            res_dict = json.loads(res_json)
            text = res_dict.get('text', -1)
            text = ''.join(text.split())
            if len(text) < 3:
                res = {"code": 1, "result": "Invalid audio Please Try again."}
                self.write(json.dumps(res))
            else:
                res = {"code": 0, "result": text}
                self.write(json.dumps(res))

        self.finish()
예제 #25
0
    def video2data(self, url):
        """Получаем распознанный текст ролика по его url
        """
        current_dir = os.getcwd()
        os.chdir(self.path)
        ydl_opts = {
            'format':
            'bestaudio/best',
            'writeinfojson':
            'info',
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': '192',
            }],
            'progress_hooks': [self._catch_filename],
        }
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])

        time.sleep(20)
        video_description = self._downloaded_data()

        model = Model(self.kaldi_path)
        rec = KaldiRecognizer(model, 16000)

        process = subprocess.Popen([
            'ffmpeg', '-loglevel', 'quiet', '-i',
            os.path.join(self.path, self.filename), '-ar',
            str(16_000), '-ac', '1', '-f', 's16le', '-'
        ],
                                   stdout=subprocess.PIPE)

        full_text = ''
        while True:
            data = process.stdout.read(4000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                res = json.loads(rec.Result())
                full_text += ' ' + res['text']
        full_text += ' ' + json.loads(rec.FinalResult())['text']

        os.remove(os.path.join(self.path, self.description_file))
        os.remove(os.path.join(self.path, self.filename))

        os.chdir(current_dir)
        return full_text, video_description
예제 #26
0
def listen(wf):

    model = Model('model')
    rec = KaldiRecognizer(model, wf.getframerate())

    def g():
        while True:
            data = wf.readframes(4000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                yield json.loads(rec.Result())

    f = lambda: json.loads(rec.FinalResult())

    return (g(), f)
예제 #27
0
def recognize(line):
    uid, fn = line.split()
    wf = wave.open(fn, "rb")
    rec = KaldiRecognizer(model, wf.getframerate())

    text = ""
    while True:
        data = wf.readframes(1000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            jres = json.loads(rec.Result())
            text = text + " " + jres['text']
    jres = json.loads(rec.FinalResult())
    text = text + " " + jres['text']
    return (uid + text)
예제 #28
0
    def StreamingRecognize(self, request_iterator, context):
        request = next(request_iterator)
        partial = request.config.specification.partial_results
        recognizer = KaldiRecognizer(
            self.model, request.config.specification.sample_rate_hertz)
        recognizer.SetMaxAlternatives(
            request.config.specification.max_alternatives)
        recognizer.SetWords(
            request.config.specification.enable_word_time_offsets)

        for request in request_iterator:
            res = recognizer.AcceptWaveform(request.audio_content)
            if res:
                yield self.get_response(recognizer.Result())
            elif partial:
                yield self.get_response(recognizer.PartialResult())
        yield self.get_response(recognizer.FinalResult())
예제 #29
0
def speech_recog(fileIn):
    datalist = []
    SetLogLevel(0)

    if not os.path.exists("model"):
        print(
            "Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder."
        )
        exit(1)

    sample_rate = 16000
    model = Model("model")
    rec = KaldiRecognizer(model, sample_rate)
    try:
        process = subprocess.Popen([
            'ffmpeg', '-loglevel', 'quiet', '-i', fileIn, '-ar',
            str(sample_rate), '-ac', '1', '-f', 's16le', '-'
        ],
                                   stdout=subprocess.PIPE)
    except IndexError:
        raise

    while True:
        data = process.stdout.read(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            result = rec.Result()
            datalist.append(json.loads(result))

    finalResult = rec.FinalResult()
    datalist.append(json.loads(finalResult))
    print(fileIn)

    for entry in datalist:
        if "result" in entry:
            for word in entry["result"]:
                word.update({"file": fileIn})

    words = words_from_list(datalist)

    with open(os.path.splitext(fileIn)[0] + ".json", "w") as output_json:
        output_json.write(json.dumps(datalist))

    return words
예제 #30
0
def transcribe_vosk_filename(filepath, model):
    model = Model(model)

    wf = wave.open(filepath, "rb")
    rec = KaldiRecognizer(model, wf.getframerate())

    if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
    ) != "NONE":
        print("Audio file must be WAV format mono PCM.")
        exit(1)

    rec.AcceptWaveform(wf.readframes(10**8))
    result = json.loads(rec.FinalResult())

    stamps = [x["start"] for x in result["result"]]
    words = [x["word"] for x in result["result"]]

    return words, stamps