Пример #1
0
def audiosegment_google_speech(audio, filename, sample_rate, lang='ms'):
    if os.path.exists('output-wav/' + filename):
        return False

    sf.write(filename, audio.array, sample_rate)
    try:
        with sr.AudioFile(filename) as source:
            a = r.record(source)

        text = r.recognize_google(a, language=lang)
    except:
        text = ''

    if len(text):
        text_filename = f'output-text/{filename}.txt'
        with open(text_filename, 'w') as fopen:
            fopen.write(text)

        a = malaya_speech.resample(
            malaya_speech.astype.int_to_float(audio.array), sample_rate, 16000)
        sf.write('output-wav/' + filename, a, 16000)

    os.remove(filename)

    return True
Пример #2
0
def split(file, max_duration=10.0):
    print(file)
    audio = AudioSegment.from_mp3(file).set_channels(1)
    y = np.array(audio.get_array_of_samples())
    y = malaya_speech.astype.int_to_float(y)
    y = p_noise(y)['concatenate']
    y_int = malaya_speech.astype.float_to_int(y)
    y_ = malaya_speech.resample(y_int, audio.frame_rate, 16000).astype(int)
    frames = generator.frames(y, 30, audio.frame_rate)
    frames_ = generator.frames(y_, 30, 16000, append_ending_trail=False)
    frames_webrtc = [(frames[no], vad(frame))
                     for no, frame in enumerate(frames_)]
    splitted = split_vad_duration(
        frames_webrtc,
        max_duration=max_duration,
        negative_threshold=0.1,
    )
    results = [s.array for s in splitted]
    return results, audio, audio.frame_rate
Пример #3
0
def parallel(f):
    y = read_wav(f)[0]
    y = random_sampling(y, length = 1000)
    y_ = malaya_speech.resample(y, sr, sr // reduction_factor)
    return y_, y
Пример #4
0
def downsample(y, sr, down_sr):
    y_ = malaya_speech.resample(y, sr, down_sr)
    return malaya_speech.resample(y_, down_sr, sr)
                  frame_duration_ms=30).batching(20).foreach_map(
                      model_v2.predict).flatten())

from glob import glob

mp3s = glob('*.mp3')
mp3s

for file in mp3s:
    print(file)
    try:
        audio = AudioSegment.from_mp3(file)
        sample_rate = audio.frame_rate
        samples = np.array(audio.get_array_of_samples())
        samples = malaya_speech.astype.int_to_float(samples)
        samples_16k = malaya_speech.resample(samples, sample_rate, 16000)
        frames_16k = list(
            malaya_speech.utils.generator.frames(samples_16k, 30, 16000))
        frames = list(
            malaya_speech.utils.generator.frames(samples, 30, sample_rate))
        result = p.emit(samples_16k)
        frames_deep_v2_batch = [(frame, result['flatten'][no])
                                for no, frame in enumerate(frames)]
        results = malaya_speech.split.split_vad(frames_deep_v2_batch,
                                                n=5,
                                                negative_threshold=0.1)

        for no in tqdm(range(len(results))):
            result = results[no]

            sf.write('test.wav', result.array, sample_rate)