Пример #1
0
def transcribe_file():
    if request.method == 'POST':
        res = {}
        if 'file' not in request.files:
            res['status'] = "error"
            res['message'] = "audio file should be passed for the transcription"
            return jsonify(res)
        file = request.files['file']
        filename = file.filename
        _, file_extension = os.path.splitext(filename)
        if file_extension.lower() not in ALLOWED_EXTENSIONS:
            res['status'] = "error"
            res['message'] = "{} is not supported format.".format(
                file_extension)
            return jsonify(res)
        with NamedTemporaryFile(suffix=file_extension) as tmp_saved_audio_file:
            file.save(tmp_saved_audio_file.name)
            logging.info('Transcribing file...')
            transcription, _ = run_transcribe(audio_path=tmp_saved_audio_file,
                                              spect_parser=spect_parser,
                                              model=model,
                                              decoder=decoder,
                                              device=device,
                                              use_half=config.model.use_half)
            logging.info('File transcribed')
            res['status'] = "OK"
            res['transcription'] = transcription[0][0]
            return json.dumps(res, ensure_ascii=False).encode('utf8')
Пример #2
0
def transcribe_file():
    if request.method == 'POST':
        try:
            res = {}
            if 'file' not in request.files:
                res['status'] = "error"
                res['message'] = "audio file should be passed for the transcription"
                return jsonify(res)
            file = request.files['file']
            filename = file.filename
            _, file_extension = os.path.splitext(filename)
            if file_extension.lower() not in ALLOWED_EXTENSIONS:
                res['status'] = "error"
                res['message'] = "{} is not supported format.".format(
                    file_extension)
                return jsonify(res)
            with NamedTemporaryFile(
                    suffix=file_extension) as tmp_saved_audio_file:
                file.save(tmp_saved_audio_file.name)
                logging.info('Transcribing file...')
                transcription, _ = run_transcribe(
                    audio_path=tmp_saved_audio_file,
                    spect_parser=spect_parser,
                    model=model,
                    decoder=decoder,
                    device=device,
                    use_half=config.model.use_half)
                logging.info('File transcribed')
                res['status'] = "OK"
                res['transcription'] = transcription[0][0]
                return jsonify(res)
        except Exception as e:
            logging.error(e)
            res['status'] = "error"
            res['message'] = "The server encountered an internal error and was unable to complete your request."
            return jsonify(res)
Пример #3
0
def transcribe_file():
    if request.method == 'POST':
        res = {}
        res['total'] = 0
        res['seconds'] = 0
        t0 = time.time()
        transTxt = ""
        if 'file' not in request.files:
            res['code'] = 403
            res['data'] = "Missed audio files"
            return jsonify(res)
        try:
            file = request.files['file']
            filename = file.filename
            _, file_extension = os.path.splitext(filename)
            if file_extension.lower() not in ALLOWED_EXTENSIONS:
                res['code'] = 403
                res['data'] = "{} is not supported format.".format(
                    file_extension)
                return jsonify(res)
            with NamedTemporaryFile(prefix="product_",
                                    suffix=file_extension,
                                    dir='/work/dataset_product/wav',
                                    delete=False) as temp_audio:
                file.save(temp_audio.name)
                path = temp_audio.name
                if (file_extension.lower() == ".webm"):
                    # copyFile = temp_audio.name
                    # copyFile = copyFile + ".webm"
                    # strCopy = "cp {0} {1}".format(temp_audio.name,copyFile)
                    # os.system(strCopy)
                    # wavName = temp_audio.name
                    # wavName = wavName.replace("webm", "wav")
                    # strCv = "ffmpeg -i {0} -r 16000 -bits_per_raw_sample 16 -ac 1 {1}".format(temp_audio.name, wavName)
                    # os.system(strCv)
                    # path = wavName

                    #-------------------

                    #chuyen sang webm->mp3
                    src1 = temp_audio.name  #.webm
                    dst1 = temp_audio.name  #.webm
                    dst1 = dst1.replace("webm", "mp3")  #.mp3
                    convertWebmToMp3(src1, dst1)  #.wav
                    #chuyen mp3->wav 16000Hz
                    src2 = dst1
                    dst2 = dst1.replace("mp3", "wav")
                    convertMp3ToWav16(src2, dst2)
                    os.remove(dst1)
                    path = dst2
                if (file_extension.lower() == ".mp3"):
                    #chuyen mp3->wav 16000Hz
                    src = temp_audio.name
                    dst = src.replace("mp3", "wav")
                    convertMp3ToWav16(src, dst)
                    path = dst
                if (file_extension.lower() != ".wav"):
                    os.remove(temp_audio.name)
                print("File name : " + str(path))
                # strCovert = "ffmpeg -i "+"/transcribe_tmp/tmpbh97i2v0.webm" +" -c:a pcm_f32le "+/transcribe_tmp/ou2t.wav"
                choose = 1
                try:
                    choose = int(request.form['model'])
                except:
                    pass

                global model, model2, model3
                runingModel = model
                if (choose == 2):
                    runingModel = model2
                    print("Using model 2")
                if (choose == 3):
                    runingModel = model3
                    print("Using model 3")
                transcription, _ = run_transcribe(audio_path=path,
                                                  spect_parser=spect_parser,
                                                  model=runingModel,
                                                  decoder=decoder,
                                                  device=device,
                                                  use_half=True)
                res['status'] = 200
                res_text = ""
                if (len(transcription) > 0):
                    res_text = transcription[0][0]
                    res['total'] = len(transcription[0])
                else:
                    res_text = transcription
                    res['total'] = len(transcription)

                res['data'] = transcribe_comma.runTranscribe(
                    commo_model, dict_data, word_dict, char_dict, res_text)
                res['path'] = path
                transTxt = path.replace("wav", "txt")
                with open(transTxt, "w") as textFile:
                    textFile.write(res['data'])
                logging.info('Success transcript')
                logging.debug(res)
                #os.remove(dst2)
        except Exception as exx:
            res['status'] = 403
            res['data'] = str(exx)
        t1 = time.time()
        total = t1 - t0
        targetString = ""
        wer = 100
        cer = 0
        try:
            targetString = request.form['targetString']
            wer = werPecentage(targetString, res_text)
            cer = cerPecentage(targetString, res_text)
        except:
            wer = 0
            er = 0
        res['seconds'] = total
        res['wer'] = round(wer, 3)
        res['cer'] = round(cer, 3)
        return res
Пример #4
0
def transcribe_file():
    if request.method == 'POST':
        res = {}
        res['total'] = 0
        res['seconds'] = 0
        t0 = time.time()
        transTxt = ""
        if 'file' not in request.files and 'url' not in request.form:
            res['code'] = 403
            res['data'] = "Missed audio files or url of mp3 file."
            return jsonify(res)
        try:
            #đây là trường hợp có 1 trong 2 tham số 'file' và 'url', hoặc có cả 2

            #***TH1 có tham số 'file'
            file_extension = ""
            path = ""  #đường dẫn lưu file âm thanh ở server cần nhận dạng
            if ('file' in request.files):
                file = request.files['file']
                filename = file.filename
                _, file_extension = os.path.splitext(filename)
                if file_extension.lower() not in ALLOWED_EXTENSIONS:
                    res['code'] = 403
                    res['data'] = "{} is not supported format.".format(
                        file_extension)
                    return jsonify(res)
                with NamedTemporaryFile(prefix="product_",
                                        suffix=file_extension,
                                        dir='/work/dataset_product/wav',
                                        delete=False) as temp_audio:
                    file.save(
                        temp_audio.name
                    )  #lưu file cần nhận dạng vào đường dẫn temp_audio.name
                    path = temp_audio.name
            elif ('url'
                  in request.form):  #***TH2 ko có 'file', có tham số 'url'
                url = request.form['url']  #đường dẫn mp3 hoặc video

                #tải về
                folder = 'work/dataset_recording/wav'
                absolute_path = download_file(url, folder)

                #nếu là mp3
                if (absolute_path).endswith('.mp3'):
                    path = absolute_path

                #nếu là mp4 : tách audio trong video
                elif (absolute_path).endswith('.mp4'):
                    my_clip = mp.VideoFileClip(absolute_path)
                    filename = os.path.splitext(absolute_path)[0]
                    path = os.path.join(folder, filename + '.mp3')
                    my_clip.audio.write_audiofile(path)

                #nếu ko phải
                else:
                    res['code'] = 403
                    res['data'] = "Extension is not supported."
                    return jsonify(res)

            #Chuyển đổi file âm thanh đúng định dạng wav, trả về đường dẫn wav sau chuyển đổi, file wav mới có tên giống file cũ
            path = ConvertAudioToWav(path)

            print("File name : " + str(path))
            # strCovert = "ffmpeg -i "+"/transcribe_tmp/tmpbh97i2v0.webm" +" -c:a pcm_f32le "+/transcribe_tmp/ou2t.wav"
            choose = 1
            try:
                choose = int(request.form['model'])
            except:
                pass

            global model, model2, model3
            runingModel = model
            if (choose == 2):
                runingModel = model2
                print("Using model 2")
            if (choose == 3):
                runingModel = model3
                print("Using model 3")
            transcription, transcriptionGreedy, _, _ = run_transcribe(
                audio_path=path,
                spect_parser=spect_parser,
                model=runingModel,
                decoder=decoder,
                device=device,
                use_half=True)
            res['status'] = 200
            res_text = ""
            if (len(transcription) > 0):
                res_text = transcription[0][0]
                res['total'] = len(transcription[0])
            else:
                res_text = transcription
                res['total'] = len(transcription)

            res['data'] = transcribe_comma.runTranscribe(
                commo_model, dict_data, word_dict, char_dict, res_text)
            res['path'] = path
            res['greedy'] = transcribe_comma.runTranscribe(
                commo_model, dict_data, word_dict, char_dict,
                transcriptionGreedy[0][0])
            transTxt = path.replace("wav", "txt")
            with open(transTxt, "w") as textFile:
                textFile.write(res['data'])
            logging.info('Success transcript')
            logging.debug(res)
        except Exception as exx:
            res['status'] = 403
            res['data'] = "Không thể nhận dạng\n" + str(exx)
        t1 = time.time()
        total = t1 - t0
        targetString = ""
        wer = 100
        cer = 0
        try:
            targetString = request.form['targetString']
            wer = werPecentage(targetString, res_text)
            cer = cerPecentage(targetString, res_text)
        except:
            wer = 0
            er = 0
        res['seconds'] = total
        res['wer'] = round(wer, 3)
        res['cer'] = round(cer, 3)
        return res