示例#1
0
def convert_audio_miniaudio(in_file, out_file):

    channels = 1
    sample_rate = 44100

    src = miniaudio.decode_file(
        in_file)  # , dither=miniaudio.DitherMode.TRIANGLE

    # DecodedSoundFile - Contains various properties and also the PCM frames of
    #  a fully decoded audio file.
    tgt = miniaudio.DecodedSoundFile("result", 1, sample_rate,
                                     miniaudio.SampleFormat.SIGNED16,
                                     array.array('b'))

    converted_frames = miniaudio.convert_frames(src.sample_format,
                                                src.nchannels, src.sample_rate,
                                                src.samples.tobytes(),
                                                tgt.sample_format,
                                                tgt.nchannels, tgt.sample_rate)

    tgt.num_frames = int(
        len(converted_frames) / tgt.nchannels / tgt.sample_width)
    tgt.samples.frombytes(converted_frames)

    miniaudio.wav_write_file(out_file, tgt)

    logger.debug("wrote converted file to '%s'" % out_file)

    return False
 def _save_to_file(self, file_name: str, capture: miniaudio.CaptureDevice):
     buffer = b"".join(self._stream_data)
     samples = array.array('h')
     samples.frombytes(buffer)
     sound = miniaudio.DecodedSoundFile('capture', capture.nchannels,
                                        capture.sample_rate, capture.format,
                                        samples)
     miniaudio.wav_write_file(file_name, sound)
示例#3
0
def transcribe_file():
    if request.method == 'POST':
        # Authenticate first
        print("INFO] token", request.form.get("token"))

        authResponse = requests.post('http://127.0.0.1:8001/next',
                                     json={
                                         "token": request.form.get("token")
                                     }).json()

        if "token" not in authResponse:
            return jsonify("Forbidden")

        # upload wav_file to work directory
        f = request.files['file']
        greedy = True
        if request.form.get('beam'):
            if not ENABLE_NGRAM:
                print(
                    "Error: Beam Search with ngram LM is not enabled on this server"
                )
            else:
                greedy = False
        file_path = os.path.join(
            WORK_DIR, secure_filename(str(uuid.uuid1()) + f.filename))
        f.save(file_path)
        # conversion to support
        inputAudio = miniaudio.wav_read_file_s16(file_path)
        result = miniaudio.convert_frames(inputAudio.sample_format, inputAudio.nchannels, \
                    inputAudio.sample_rate, bytes(inputAudio.samples), \
                    miniaudio.SampleFormat.UNSIGNED8, 1, 16000)
        newFileName = secure_filename(str(uuid.uuid1()) + 'converted.wav')
        newFilePath = os.path.join(WORK_DIR, newFileName)
        outputAudio = miniaudio.DecodedSoundFile(
            newFileName, 1, 16000, miniaudio.SampleFormat.UNSIGNED8,
            array.array('B', result))
        miniaudio.wav_write_file(newFilePath, outputAudio)
        # create manifest
        manifest = dict()
        manifest['audio_filepath'] = newFilePath
        manifest['duration'] = 18000
        manifest['text'] = 'todo'
        with open(newFilePath + ".json", 'w') as fout:
            fout.write(json.dumps(manifest))
        start_t = time.time()
        transcription = wav_to_text(newFilePath + ".json", greedy=greedy)
        total_t = time.time() - start_t

        os.remove(file_path)
        os.remove(newFileName)
        os.remove(newFilePath + ".json")

        return jsonify({
            "transcription": transcription,
            "total_time": total_t,
            "token": authResponse["token"]
        })
示例#4
0
"""
Convert an audio file to WAV and different sample formats.
"""

import os
import array
import miniaudio


def samples_path(filename):
    return os.path.join(os.path.abspath(os.path.dirname(__file__)), 'samples', filename)


src = miniaudio.decode_file(samples_path("music.ogg"), dither=miniaudio.DitherMode.TRIANGLE)
print("Source: ", src)

result = miniaudio.DecodedSoundFile("result", 1, 22050, miniaudio.SampleFormat.UNSIGNED8, array.array('b'))
converted_frames = miniaudio.convert_frames(src.sample_format, src.nchannels, src.sample_rate, src.samples.tobytes(),
                                            result.sample_format, result.nchannels, result.sample_rate)
# note: currently it is not possible to provide a dithermode to convert_frames()

result.num_frames = int(len(converted_frames) / result.nchannels / result.sample_width)
result.samples.frombytes(converted_frames)


miniaudio.wav_write_file("converted.wav", result)
print("Converted sound written to ./converted.wav")

output_info = miniaudio.get_file_info("converted.wav")
print(output_info)
converter = miniaudio.StreamingConverter(decoded.sample_format,
                                         decoded.nchannels,
                                         decoded.sample_rate,
                                         miniaudio.SampleFormat.UNSIGNED8, 1,
                                         12000, producer,
                                         miniaudio.DitherMode.TRIANGLE)

print("Stream format conversion of source:")
framechunks = []
while True:
    framedata = converter.read(4000)
    if not framedata:
        break
    print("got chunk of size", len(framedata))
    framechunks.append(framedata)

print("\nGot", len(framechunks), "total frame chunks")

# convert the frames to bytes and write it to a file
samples = array.array('B')
for f in framechunks:
    samples.extend(f)
outputfile = miniaudio.DecodedSoundFile("converted", converter.out_channels,
                                        converter.out_samplerate,
                                        converter.out_format, samples)
miniaudio.wav_write_file("converted.wav", outputfile)

print("\nConverted sound written to ./converted.wav")
output_info = miniaudio.get_file_info("converted.wav")
print(output_info)