def convert_audio_miniaudio(in_file, out_file): channels = 1 sample_rate = 44100 src = miniaudio.decode_file( in_file) # , dither=miniaudio.DitherMode.TRIANGLE # DecodedSoundFile - Contains various properties and also the PCM frames of # a fully decoded audio file. tgt = miniaudio.DecodedSoundFile("result", 1, sample_rate, miniaudio.SampleFormat.SIGNED16, array.array('b')) converted_frames = miniaudio.convert_frames(src.sample_format, src.nchannels, src.sample_rate, src.samples.tobytes(), tgt.sample_format, tgt.nchannels, tgt.sample_rate) tgt.num_frames = int( len(converted_frames) / tgt.nchannels / tgt.sample_width) tgt.samples.frombytes(converted_frames) miniaudio.wav_write_file(out_file, tgt) logger.debug("wrote converted file to '%s'" % out_file) return False
def transcribe_file(): if request.method == 'POST': # Authenticate first print("INFO] token", request.form.get("token")) authResponse = requests.post('http://127.0.0.1:8001/next', json={ "token": request.form.get("token") }).json() if "token" not in authResponse: return jsonify("Forbidden") # upload wav_file to work directory f = request.files['file'] greedy = True if request.form.get('beam'): if not ENABLE_NGRAM: print( "Error: Beam Search with ngram LM is not enabled on this server" ) else: greedy = False file_path = os.path.join( WORK_DIR, secure_filename(str(uuid.uuid1()) + f.filename)) f.save(file_path) # conversion to support inputAudio = miniaudio.wav_read_file_s16(file_path) result = miniaudio.convert_frames(inputAudio.sample_format, inputAudio.nchannels, \ inputAudio.sample_rate, bytes(inputAudio.samples), \ miniaudio.SampleFormat.UNSIGNED8, 1, 16000) newFileName = secure_filename(str(uuid.uuid1()) + 'converted.wav') newFilePath = os.path.join(WORK_DIR, newFileName) outputAudio = miniaudio.DecodedSoundFile( newFileName, 1, 16000, miniaudio.SampleFormat.UNSIGNED8, array.array('B', result)) miniaudio.wav_write_file(newFilePath, outputAudio) # create manifest manifest = dict() manifest['audio_filepath'] = newFilePath manifest['duration'] = 18000 manifest['text'] = 'todo' with open(newFilePath + ".json", 'w') as fout: fout.write(json.dumps(manifest)) start_t = time.time() transcription = wav_to_text(newFilePath + ".json", greedy=greedy) total_t = time.time() - start_t os.remove(file_path) os.remove(newFileName) os.remove(newFilePath + ".json") return jsonify({ "transcription": transcription, "total_time": total_t, "token": authResponse["token"] })
def audio_iter(): conv = miniaudio.convert_frames(format, channels, sample_rate, source[start:end], device.format, device.nchannels, device.sample_rate) samp_iter = iter(conv) required_frames = yield b'' old_time = time.time() while True: sample_data = bytes( itertools.islice(samp_iter, required_frames * channels * field_size)) if not sample_data: break new_time = time.time() old_time = new_time required_frames = yield sample_data
""" Convert an audio file to WAV and different sample formats. """ import os import array import miniaudio def samples_path(filename): return os.path.join(os.path.abspath(os.path.dirname(__file__)), 'samples', filename) src = miniaudio.decode_file(samples_path("music.ogg"), dither=miniaudio.DitherMode.TRIANGLE) print("Source: ", src) result = miniaudio.DecodedSoundFile("result", 1, 22050, miniaudio.SampleFormat.UNSIGNED8, array.array('b')) converted_frames = miniaudio.convert_frames(src.sample_format, src.nchannels, src.sample_rate, src.samples.tobytes(), result.sample_format, result.nchannels, result.sample_rate) # note: currently it is not possible to provide a dithermode to convert_frames() result.num_frames = int(len(converted_frames) / result.nchannels / result.sample_width) result.samples.frombytes(converted_frames) miniaudio.wav_write_file("converted.wav", result) print("Converted sound written to ./converted.wav") output_info = miniaudio.get_file_info("converted.wav") print(output_info)