Exemplo n.º 1
0
def audio_split(path,min_sl=300,sth=-40):
    """
    sgm:AudioSegment 对象
    silence_thresh=-70 # 小于-70dBFS以下的为静默 
    min_silence_len=700 # 静默超过700毫秒则拆分 
    length_limit=60*1000 # 拆分后每段不得超过1分钟 
    abandon_chunk_len=500 # 放弃小于500毫秒的段 
    joint_silence_len=1300 # 段拼接时加入1300毫秒间隔用于断句
    """
    if os.path.isfile(path):
        dp=os.path.splitext(path)
        if os.path.splitext(path)[1] in ['.mp3','.wav','.flv','.ogg','.raw','.m4a']:
            sgm=AudioSegment.from_file(path,format=dp[1].replace('.',''))
            chunks=split_on_silence(sgm,min_silence_len=min_sl,silence_thresh=sth)
            return chunks
        else:
            print('%s is not audio file,Please input audio file....'%path)
            sys.exit()

    elif isinstance(path,pydub.audio_segment.AudioSegment):
        #sgm=path
        chunks=split_on_silence(path,min_silence_len=min_sl,silence_thresh=sth)
        return chunks
    else:
        print('Input is not audio file or AudioSegment')
        sys.exit()
        return
 def split_audio(self, path):
     path = str(path)
     dirc = os.path.dirname(path)
     base = os.path.basename(dirc)
     ########CONVERT VIDEO TO AUDIO#########
     svideo = path
     saudio = dirc + '/' + 'audio.wav'
     command = 'ffmpeg', '-i', svideo, '-ar', '16000', '-ac', '1', saudio
     subprocess.call(command, shell=True)
     ########SPLIT AUDIO#########
     sound = AudioSegment.from_wav(dirc + '/' + 'audio.wav')
     if (base == 'Video LK'):
         chunks = split_on_silence(sound,
                                   min_silence_len=200,
                                   silence_thresh=-40)
     else:
         chunks = split_on_silence(sound,
                                   min_silence_len=200,
                                   silence_thresh=-50)
     for i, chunk in enumerate(chunks):
         chunk.export(dirc + '/' + 'chunk{0}.wav'.format(i), format="wav")
     time.sleep(2)
     samplerate, data = wavfile.read(dirc + '/' + 'audio.wav')
     os.remove(dirc + '/' + 'audio.wav')
     return data
Exemplo n.º 3
0
def slice_cut_silence(audio_path_str):
    # Load your audio.
    song = AudioSegment.from_wav(audio_path_str)

    audio_path = Path(audio_path_str)
    dir = audio_path.parent
    stem = audio_path.stem

    print("song dBFS: {}".format(song.dBFS))

    # Split track where the silence is 2 seconds or more and get chunks using
    # the imported function.
    chunks = split_on_silence(
        # Use the loaded audio.
        song,
        # Specify that a silent chunk must be at least 1 seconds or 1000 ms long.
        min_silence_len=400,
        # Consider a chunk silent if it's quieter than (the max. amplitude of track - 15) dBFS.
        silence_thresh=song.dBFS - 15,
        keep_silence=400)

    print(len(chunks))
    audio_slices = []

    # setting minimum length of each chunk to 10 seconds
    min_length = 3 * 1000
    output_chunks = []
    for chunk in chunks:
        if len(chunk) > 60 * 1000:
            sub_chunks = split_on_silence(chunk,
                                          min_silence_len=200,
                                          keep_silence=100,
                                          silence_thresh=song.dBFS - 15)
            print(len(sub_chunks))
            output_chunks.extend(sub_chunks)
        else:
            # if the last output chunk is longer than the target length,
            # we can start a new one
            output_chunks.append(chunk)

    # Process each chunk with your parameters
    for i, chunk in enumerate(output_chunks):
        # Create a silence chunk that's 0.5 seconds (or 500 ms) long for padding.
        # set frame rate as the original track
        # silence_chunk = AudioSegment.silent(duration=200, frame_rate=song.frame_rate)

        # Add the padding chunk to beginning and end of the entire chunk.
        # audio_chunk = silence_chunk + chunk + silence_chunk

        # Normalize the entire chunk. To amplify the audio
        # normalized_chunk = match_target_amplitude(chunk, -20.0)

        # Export the audio chunk
        out_path = dir / "{}_silence{}.wav".format(stem, i)
        # print("Exporting {} dBFS {}.".format(out_path, normalized_chunk.dBFS))
        chunk.export(out_path, format="wav")
        audio_slices.append(str(out_path))
    return audio_slices
Exemplo n.º 4
0
    def audio_split_by_silence(self, pathname, j):
        total = 0
        song = AudioSegment.from_wav(pathname).set_channels(1)
        chunks = split_on_silence(
            song,

            # split on silences longer than 1000ms (1 sec)
            min_silence_len=self.min_silent,

            # anything under -16 dBFS is considered silence
            silence_thresh=song.dBFS - 16,

            # keep 200 ms of leading/trailing silence
            keep_silence=False)
        base = os.path.basename(pathname)
        basefilename = os.path.splitext(base)[0]
        #print(chunks)
        i = 0

        for chunk in tqdm(chunks):
            if math.ceil(chunk.duration_seconds) <= 10:
                total += chunk.duration_seconds
                FileName = '{3}/{0}_{1:0>3}_{2}.wav'.format(
                    j, i, str(math.ceil(chunk.duration_seconds)),
                    self.outpathname)
                #                 print("Saving........  "+FileName)
                chunk.export(FileName, format="wav")
                i += 1
            else:
                song1 = AudioSegment.from_mono_audiosegments(chunk)
                chunks = split_on_silence(
                    song1,

                    # split on silences longer than 1000ms (1 sec)
                    min_silence_len=self.min_silent,

                    # anything under -16 dBFS is considered silence
                    silence_thresh=song.dBFS - 16,

                    # keep 200 ms of leading/trailing silence
                    keep_silence=False)
                for chunk in chunks:
                    if math.ceil(chunk.duration_seconds) <= 10:
                        total += chunk.duration_seconds
                        FileName = '{3}/{0}_{1:0>3}_{2}.wav'.format(
                            j, i, str(math.ceil(chunk.duration_seconds)),
                            self.outpathname)
                        #                         print("Saving........  "+FileName)
                        chunk.export(FileName, format="wav")
                        i += 1
        print("Total duration we extract:%s" %
              time.strftime('%H:%M:%S', time.gmtime(total)))
        print("Total duration of file:%s" % time.strftime(
            '%H:%M:%S', time.gmtime(round(song.duration_seconds, 2))))
Exemplo n.º 5
0
def contact(folder):
    file_path = './input/{0}/{0}'.format(folder)

    # 处理excel
    file = docx.Document(file_path + ".docx")

    sentences = []
    for para in file.paragraphs:
        sen = para.text.split('.')[0].split('?')[0].split('!')[0].strip()
        sentences.append(sen)

    #处理音频
    woman_sound = AudioSegment.from_file(file_path + '_女.mp3', format="mp3")
    woman_chunks = split_on_silence(woman_sound,
                                    min_silence_len=1000,
                                    silence_thresh=-55)
    # exportChunks(woman_chunks)

    man_sound = AudioSegment.from_file(file_path + '_男.mp3', format="mp3")
    man_chunks = split_on_silence(man_sound,
                                  min_silence_len=500,
                                  silence_thresh=-55)
    # exportChunks(man_chunks)

    print("word中共{0}个句子".format(len(sentences)))
    print("女生 音频中共划分出{0}个音频".format(len(woman_chunks)))
    # print("女生慢 音频中共划分出{0}个音频".format(len(woman_slow_chunks)))
    print("男生 音频中共划分出{0}个音频".format(len(man_chunks)))

    # 开始输出
    count = 0
    for i in range(1, len(sentences) + 1):
        sentence = sentences[-i]
        sentence = processSentence(sentence)

        path = "./output/{0}/{1}.mp3".format(folder, sentence)
        if os.path.exists(path):
            print("{0}已经存在".format(sentence))
            continue

        chinese_chunk = increaseDB(woman_chunks[-i * 3])
        man_chunk = increaseDB(man_chunks[-i])
        woman_slow_chunk = increaseDB(woman_chunks[-i * 3 + 2])

        contacted_chunk = silence_sound * 2 + chinese_chunk + silence_sound * 3 + man_chunk + silence_sound * 3 + woman_slow_chunk + silence_sound * 2

        contacted_chunk.export(path, format="mp3")
        count = count + 1

    print("此次共生成{0}个单词音频".format(count))
Exemplo n.º 6
0
def test_final(test_files_folder):
    filenames_all = []
    phone_numbers_all = []
    # loop for each of the test file
    for filename in sorted(os.listdir(test_files_folder),
                           key=lambda x: int(os.path.splitext(x)[0])):
        # only if it is wave file
        if filename.endswith(".wav") and "_" not in filename:
            # full file path
            current_file = os.path.join(test_files_folder, filename)
            # print("Looping for file ", current_file)

            # In this file, take out all 10 utterances
            all_utterances = asg.from_file(current_file)
            silence_len = 205  # (in ms) minimum length of a silence to be used for a split
            thresh = -70  # (in dBFS) anything quieter than this will be considered silence. default=-16
            separate_utterances = split_on_silence(all_utterances,
                                                   min_silence_len=silence_len,
                                                   silence_thresh=thresh)
            # fail-safe
            if len(separate_utterances) != 10:
                thresh = -55  # (in dBFS) anything quieter than this will be considered silence. default=-16
                separate_utterances = split_on_silence(
                    all_utterances,
                    min_silence_len=silence_len,
                    silence_thresh=thresh)
            if len(separate_utterances) != 10:
                raise ValueError("Error in ", filename)

            # string to hold the detected phone number
            output = ""
            # for each of the numeral in this file
            for numeral in separate_utterances:
                # apply all 10 models to this numeral
                numeral_path = "test_attempt" + filename[:-4] + "_delete.wav"
                numeral.export(numeral_path, format="wav")
                max_score = -float("inf")
                output_label = None
                for item in num_models:
                    trained_model, label = item
                    current_score = score_one_word(trained_model, numeral_path)
                    if current_score > max_score:
                        max_score = current_score
                        output_label = label
                output += output_label
                os.remove(numeral_path)
            filenames_all.append(filename)
            phone_numbers_all.append(output)
    return filenames_all, phone_numbers_all
Exemplo n.º 7
0
def ReadAudioFile(fname, p=PATH):
    """
    Reads audio.
    
    When reading, we will first pad the file with some silence
    and then trim it. That way all the audio files in our system
    will be similarly positioned.
    """
    from pydub import AudioSegment
    from pydub.silence import split_on_silence
    from scipy.io import wavfile

    silence = AudioSegment.silent(500)
    audio = AudioSegment.from_wav(p + '/' + fname)
    audio = silence + audio + silence
    chunk = split_on_silence(audio, min_silence_len=175, silence_thresh=-50)

    c = chunk[0]
    c.export(fname, format='wav')
    f = ConvertSampleRate(fname)
    rate, data = wavfile.read(f)

    ### CLEAN-UP ###
    os.remove(fname)
    if f != fname:
        # A new file was also created. Delete it too.
        os.remove(f)

    return rate, data
Exemplo n.º 8
0
def segmentate(audios_path):
    classes = listdir(audios_path)

    for i, animal in enumerate(classes):
        if animal.find(' ') != -1: continue  # not an animal but a set of words
        current_path = path.join(audios_path, animal)
        sounds = listdir(current_path)
        for sound in sounds:
            if not sound.lower().endswith('.wav'): continue
            AUDIO_FILE = path.join(current_path, sound)
            sound_file = AudioSegment.from_wav(AUDIO_FILE)
            try:
                audio_chunks = split_on_silence(
                    sound_file,
                    min_silence_len=2,
                    # consider it silent if quieter than -16 dBFS
                    silence_thresh=min(-16, sound_file.dBFS*7),
                    keep_silence=0
                )
            except:
                continue
            for j, chunk in enumerate(audio_chunks):
                out_file = "splitAudio/{1}/{2}chunk{0}.wav".format(j, animal, sound[:-4])
                if chunk.duration_seconds < 0.5:
                    continue
                #print(chunk.duration_seconds)
                if not exists("splitAudio/{0}".format(animal)):
                    makedirs("splitAudio/{0}".format(animal))
                #print("exporting", out_file)
                chunk.export(out_file, format="wav")
Exemplo n.º 9
0
def xcut(wavfn, outfd):
    cnt = 0
    sound = AudioSegment.from_wav(wavfn)
    print len(sound)
    length =  len(sound)/1000.0 #length in second
    print "length=",length
    if length < LENGTH_MIN:
        print "too short, drop"
        return
    
    sound = sound[CUT_START:-CUT_TAIL]
    print len(sound)


    chunks = split_on_silence(sound, min_silence_len=500, silence_thresh=-36, keep_silence=400)#silence time:700ms and silence_dBFS<-70dBFS

    print "chunks num =", len(chunks)
    if len(chunks) <= 2:
        print "no un-head-tail chunk, drop"
        return
    
    chunks = chunks[1:-1]

    for ck in chunks:
            nfn = outfd + "/" + (wavfn.split('/')[-1][:-4]) + ('_%d.wav'%cnt)
            nck = ck.set_frame_rate(16000)
            nck = nck.set_channels(1)
            if len(nck) < EFFECT_MIN:
                pass
            elif len(nck) > EFFECT_MAX:
                pass
            else:
                nck.export(nfn, format="wav")
                cnt = cnt + 1
                print "chunk name= %s length= %d" % (nfn, len(nck))
Exemplo n.º 10
0
def get_large_audio_transcription(path):
    """
    Splitting the large audio file into chunks
    and apply speech recognition on each of these chunks
    """
    sound = AudioSegment.from_wav(path)

    chunks = split_on_silence(
        sound,
        min_silence_len=500,
        silence_thresh=sound.dBFS - 14,
        keep_silence=500,
    )
    folder_name = "audio-chunks"
    if not os.path.isdir(folder_name):
        os.mkdir(folder_name)
    whole_text = ""
    print('[+] Processing Audio')
    for i, audio_chunk in enumerate(chunks, start=1):
        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
        audio_chunk.export(chunk_filename, format="wav")
        with sr.AudioFile(chunk_filename) as source:
            audio_listened = r.record(source)
            try:
                text = r.recognize_google(audio_listened)
            except sr.UnknownValueError as e:
                print("Error:", str(e))
            else:
                text = f"{text.capitalize()}. "
                print(chunk_filename, ":", text)
                whole_text += text
    os.rmdir(folder_name)
    return whole_text
def segmentation(min_silence_len,silence_thresh,chunk_silent):
    
    song = AudioSegment.from_file('test212.wav',format="wav")
    #print(song.frame_rate)
    shutil.rmtree("audio_chunks")
    chunk_dur=[]
    #print(4)
    chunks,silent_ranges= split_on_silence(song,min_silence_len = min_silence_len,silence_thresh = silence_thresh) 
    #print(6)
    try: 
        os.mkdir('audio_chunks') 
        
    except(FileExistsError): 
        pass  
    i=0
    chunk_silent = AudioSegment.silent(duration = 100)
    for chunk in chunks:
        audio_chunk = chunk_silent + chunk + chunk_silent 
        audio_chunk.export("./audio_chunks/chunk{0}.wav".format(i), bitrate ='64k', format ="wav")
        i+=1
    filename = './audio_chunks/chunk'+str(i-1)+'.wav'
    with cl.closing(wave.open(filename,'r')) as f:
        rate1 = f.getframerate()
    chunk_rate = rate1
    nf_chunks=i
    return chunk_rate,nf_chunks,silent_ranges
Exemplo n.º 12
0
def SplitAudio(fname):
    """
    Splits given audio file on silence and exports
    the separated chunks. 
    """
    from pydub import AudioSegment
    from pydub.silence import split_on_silence
    from scipy.io import wavfile
    from scipy import signal

    silence = AudioSegment.silent(500)
    audio = AudioSegment.from_wav(fname)
    audio = silence + audio + silence
    chunks = split_on_silence(audio, min_silence_len=175, silence_thresh=-50)

    wavs = defaultdict(list)
    b, a = signal.butter(4, 0.1, analog=False)

    for i, c in enumerate(chunks):
        fname = 'Words/word{}.wav'.format(i)
        c.export(fname, format='wav')

        rate, data = wavfile.read(fname)
        if len(data.shape) == 2:
            # Two channels were found (stereo), but we need only one
            data = data[:, 0]

        data = signal.filtfilt(b, a, data)  # Filter signal

        wavs['rate'].append(rate)
        wavs['data'].append(data)

    return pd.DataFrame(wavs)
Exemplo n.º 13
0
def crop2chunk(filename):
    # clear folder firstly

    if os.path.exists('./static/segmentFile') == False:
        os.mkdir('./static/segmentFile')
    else:
        shutil.rmtree('./static/segmentFile')
        os.mkdir('./static/segmentFile')

    sound = AudioSegment.from_mp3(filename)
    loudness = sound.dBFS

    chunks = split_on_silence(
        sound,
        #must be silent for at least half a second
        min_silence_len=430,
        #consider it silent if quieter than - 16 dBFS
        silence_thresh=-45,
        keep_silence=400)

    # 放弃长度小于2秒的录音片段
    for i in list(range(len(chunks)))[::-1]:
        if len(chunks[i]) <= 2000 or len(chunks[i]) >= 10000:
            chunks.pop(i)
    print('取有效分段(大于2s小于10s):', len(chunks))
    '''
    for x in range(0,int(len(sound)/1000)):
        print(x,sound[x*1000:(x+1)*1000].max_dBFS)
    '''

    for i, chunk in enumerate(chunks):
        chunk.export("./static/segmentFile/chunk{0}.wav".format(i),
                     format="wav")
Exemplo n.º 14
0
def split_aud_by_dir(src_dir, out_dir):
    l = os.listdir(src_dir)
    m = os.listdir(out_dir)
    if len(m) != 0:
        m = [i[:8] for i in m]
        l = l[l.index(m[-1] + ".mp3"):]
    for j in l:
        try:
            sound_file = AudioSegment.from_mp3(src_dir + j)
            #         sound_file = librosa.core.amplitude_to_db(sound_file0)
            audio_chunks = split_on_silence(
                sound_file,
                # must be silent for at least half a second
                min_silence_len=25,
                # consider it silent if quieter than -50 dBFS
                silence_thresh=-90)

            for i, chunk in enumerate(audio_chunks):
                out_file = out_dir + j.replace(".mp3",
                                               "") + "_{0}.mp3".format(i)
                #     print "exporting", out_file
                chunk.export(out_file, format="mp3")
        except:
            print("error with " + j)
    return 0
Exemplo n.º 15
0
class Home(TemplateView):
    template_name = 'home.html'


    Splitting the large audio file into chunks
    and apply speech recognition on each of these chunks
    """
    # open the audio file using pydub
    sound = AudioSegment.from_wav(path)  
    # split audio sound where silence is 700 miliseconds or more and get chunks
    chunks = split_on_silence(sound,
        # experiment with this value for your target audio file
        min_silence_len = 500,
        # adjust this per requirement
        silence_thresh = sound.dBFS-14,
        # keep the silence for 1 second, adjustable as well
        keep_silence=500,
    )
    folder_name = "audio-chunks"
    # create a directory to store the audio chunks
    if not os.path.isdir(folder_name):
      recognize_google(audio_listened,language="fr-FR")
            except sr.UnknownValueError as e:
                print("Error:", str(e))
            else:
                #text = f"{text.capitalize()}. "
                print(chunk_filename, ":", text)
                text+="."
                inter_text = translator.translate(text,dest = 'hi')
                print(inter_text.text)
                hindi_text+=inter_text.text
                whole_text += text
Exemplo n.º 16
0
def slice_clip(inFile, outDir, minSilence, threshold, verbose):
    print("Loading input file...")
    sound = AudioSegment.from_mp3(inFile)

    print("Setting Channels to mono...")
    channels = sound.split_to_mono()

    for c, channel in enumerate(channels):
        print("Processing Channel {}".format(c))

        print("Creating Slices...")
        chunks = split_on_silence(channel,
                                  min_silence_len=minSilence,
                                  silence_thresh=threshold)

        print("Saving slices to disk...")
        for i, chunk in enumerate(chunks):
            # Normalize the entire chunk.
            normalized_chunk = SU.match_target_amplitude(chunk, -20.0)

            fileName = "c{}s{}.wav".format(c, i)
            # Export the audio chunk with new bitrate.
            if verbose:
                print("Exporting {}...".format(fileName))
                print("\tChunk Size: {}".format(SU.ms2hms(len(chunk))))

            if outDir[-1] != "/":
                outDir += "/"
            normalized_chunk.export(outDir + fileName, format="wav")
Exemplo n.º 17
0
def extractWordFiles(filename, filedir, subdir, purge=False):

    sound_file = AudioSegment.from_wav(filename)
    audio_chunks = split_on_silence(
        sound_file,
        # must be silent for at least ... in ms
        min_silence_len=150,

        # consider it silent if quieter than -... dBFS
        silence_thresh=-50)

    if not os.path.exists(filedir + subdir):
        os.makedirs(filedir + subdir)

    # Delete tmp files in directory
    files = glob.glob(filedir + subdir + '*')
    filecount = 0
    for f in files:
        if purge:
            os.remove(f)
        else:
            filecount += 1

    for i, chunk in enumerate(audio_chunks):

        out_file = filedir + subdir + "chunk{0}.wav".format(i + filecount)
        print("exporting: ", out_file)
        chunk.export(out_file, format="wav")
Exemplo n.º 18
0
def split_slience(audio_file):
    dbfs = audio_file.dBFS
    audio_chunks = split_on_silence(audio_file,
                                    min_silence_len=1000,
                                    silence_thresh=dbfs - 30,
                                    keep_silence=True)
    return audio_chunks
Exemplo n.º 19
0
def cutOnBestSilence(bestSilence, file, targetFolder):
    song = AudioSegment.from_wav(file)
    normalized_sound = match_target_amplitude(song, -20.0)

    chunks = split_on_silence (normalized_sound, min_silence_len=bestSilence, silence_thresh=-45)

    # print(str(len(chunks)) + " Chunks")
    # print("Exporting Chuncks....")
    # print("\n")

    for i, chunk in enumerate(chunks):
        normalized_chunk = chunk

        if i >= 0 and i <= 9:
            normalized_chunk.export(targetFolder +'/chunk00000' + str(i) + '.wav', format="wav")
        if i >= 10 and i <= 99 :
            normalized_chunk.export(targetFolder + '/chunk0000' + str(i) + '.wav', format="wav")
        if i >= 100 and i <= 999 :
            normalized_chunk.export(targetFolder + '/chunk000' + str(i) + '.wav', format="wav")
        if i >= 1000 and i <= 9999 :
            normalized_chunk.export(targetFolder + '/chunk00' + str(i) + '.wav', format="wav")
        if i >= 10000 and i <= 99999 :
            normalized_chunk.export(targetFolder + '/chunk0' + str(i) + '.wav', format="wav")



# cutOnBestSilence(222, "test.wav", r"C:\Pro\Py\MySpeechRecognizer\toAnalyse\SplittedFiles\out")
Exemplo n.º 20
0
def cut_audio_chunks(loadpath,
                     savepath,
                     min_silence_len,
                     silence_thresh,
                     audioformat='mp3'):
    '''Function to split raw audio into chunks corresponding to isolated events
    takes a specified loading path, a saving path, the minimum silence time length 
    in ms, and the threshold for silence in dB.'''

    if audioformat == 'mp3':
        sound_file = AudioSegment.from_mp3(loadpath)
    elif audioformat == 'wav':
        sound_file = AudioSegment.from_wav(loadpath)

    # Make sure the directories exist to store the segmented audio:
    if not os.path.exists(savepath):
        os.makedirs(savepath)
    if not os.path.exists(savepath):
        os.makedirs(savepath)

    # split audio
    audio_chunks = split_on_silence(
        sound_file,
        # must be silent for at least half a second
        min_silence_len=min_silence_len,
        # consider it silent if quieter than
        silence_thresh=silence_thresh)
    # store the audio chunks
    for i, chunk in enumerate(audio_chunks):
        out_file = savepath + i + ".wav"
        print("exporting ", out_file)
        chunk.export(out_file, format="wav")
    def gapProc(self):
    #def gapProc(self , lowest):
        sound_file = AudioSegment.from_wav(self.fname)
        audio_chunks = split_on_silence(sound_file, 
            # must be silent for at least 100ms
            min_silence_len=1,
            # consider it silent if quieter than -16 dBFS
            silence_thresh=8)

        # List made to store all of the silence .wav chunks
        waveAry = []
        # List made to store the lengths of the silence chunks
        chunkLengthArray = []

        for i, chunk in enumerate(audio_chunks):
            out_file = ".//splitAudio//chunk{0}.wav".format(i)
            #waveAry.append(chunk)
            chunkLengthArray.append(len(chunk))
    
        #If there were no silences, set the mean variable to 0
        if len(chunkLengthArray) == 0:
            avgChunkLength = 0
            stdevChunkLength = 0
        # If thee is exactly 1 silence, set the stdev to 0
        #   and the average chunk length to the value of the only silence
        elif len(chunkLengthArray) == 1:
            stdevChunkLength = 0
            avgChunkLength = chunkLengthArray[0]
        # Otherwise calculate the mean gap and stdev of the gaps and store
        #   them in variables
        else:
            avgChunkLength = mean(chunkLengthArray)
            stdevChunkLength = stdev(chunkLengthArray)
        # Return the array containing the lengths of the gaps
        return(chunkLengthArray)
Exemplo n.º 22
0
 def btnOpen_Click(self):
     path, _ = QFileDialog.getOpenFileName(None, "Open Audio files", "","Sound Files (*.wav)")
     # audio = path.replace("/","\\")
     # path='audiototext.wav'
     if path != "":
         self.lbl_audio.setText("Converting audio file to text...")
         r = sr.Recognizer()
         sound = AudioSegment.from_wav(path)
         chunks = split_on_silence(sound,
             min_silence_len = 500,
             silence_thresh = sound.dBFS-14,
             keep_silence=500,
         )
         folder_name = "audio-data"
         if not os.path.isdir(folder_name):
             os.mkdir(folder_name)
         whole_text = ""
         for i, audio_chunk in enumerate(chunks, start=1):
             chunk_filename = os.path.join(folder_name, f"audio{i}.wav")
             audio_chunk.export(chunk_filename, format="wav")
             with sr.AudioFile(chunk_filename) as source:
                 audio_listened = r.record(source)
                 try:
                     text = r.recognize_google(audio_listened,language="vi-VI")
                 except Exception as ex:
                     self.txtData.setText("False Loading!!!\n" + str(ex) + "\n" + path + "\n")
                 else:
                     text = f"{text.capitalize()}. "
                     # print(chunk_filename, ":", text)
                     print(text)
                     whole_text += text
         self.txtData.setText(whole_text)
         shutil.rmtree(folder_name)
         self.lbl_audio.setText("Successfuly!")
         self.btnSaveAs.setEnabled(True)
Exemplo n.º 23
0
def split(
    file,
    min_silence_len=500,
    silence_thresh=-20,
    max_len=7,
    keep_silence=1000,
):
    audio = AudioSegment.from_mp3(file)
    audio_chunks = split_on_silence(
        audio,
        min_silence_len=min_silence_len,
        silence_thresh=silence_thresh,
        keep_silence=keep_silence,
    )

    audios, temp, length = [], [], 0
    for i in range(len(audio_chunks)):
        if length + audio_chunks[i].duration_seconds >= max_len and len(temp):
            audios.append(sum(temp))
            temp = []
            length = 0
        temp.append(audio_chunks[i])
        length += audio_chunks[i].duration_seconds

    if len(temp):
        audios.append(sum(temp))

    return audios, audio
Exemplo n.º 24
0
def split_and_recognize(binary):
    """
    Performs automated speech recognition on input WAV audio bytes.
    Uses CMUSphinx for ASR, see https://cmusphinx.github.io/.
    Requires speech_recognition and pocketsphix python packages.
    Requires swig to be installed. Also possibly libpulse-dev libasound2-dev.
    """
    r = sr.Recognizer()
    bigWav = AudioSegment(binary)
    chunks = split_on_silence(bigWav, min_silence_len = 500, silence_thresh = bigWav.dBFS-14, keep_silence=500)
    whole_text = ""
    num = len(chunks)
    for i, audio_chunk in enumerate(chunks):
        chunk_audio = audio_chunk.export(io.BytesIO(), format="wav")
        chunk_audio.seek(0)
        with sr.AudioFile(chunk_audio) as source:
            audio = r.record(source)
        try:
            text = r.recognize_sphinx(audio)
            whole_text += f"{text} "
        except:
            msg = "<inaudible>"
            whole_text += msg
        chunk_audio.close()
    return whole_text
Exemplo n.º 25
0
def text_from_large_clip(audio_file):
    r = sr.Recognizer()
    audio_path = os.path.join(os.getcwd(), "wav_outputs")
    audio_path = os.path.join("wav_outputs", audio_file)
    audio = AudioSegment.from_wav(audio_path)
    # in order to split the audio we have to do in chuncks (these chunks will be split by the silence found for atleast 1 second)
    audio_chunks = split_on_silence(audio, 
                                    min_silence_len=1000, # 1 second is also 1000 milliseconds
                                    silence_thresh = audio.dBFS - 16 # our audio chunks will detect silence under 16 dBFS the normal
                                    )
    # now since speech_recognition needs to listen to audio files we have to save the chunks to a folder
    save_chunks_to_folder(audio_chunks)

    audio_files = os.listdir(os.getcwd() + r"/audio_chunks")
    chunks_folder = os.path.join(os.getcwd() + r"/audio_chunks")
    translated_text = ""
    for audio_fi1e in audio_files:
        with sr.AudioFile(os.path.join(chunks_folder, audio_fi1e)) as audio:
            audio_data = r.record(audio)

            try:
                audio_chunk_text = r.recognize_google(audio_data)
                audio_chunk_text = audio_chunk_text.capitalize() + "."
                translated_text += audio_chunk_text
            except:
                print("No Text Detected in the Audio!")
            
    
    return translated_text
Exemplo n.º 26
0
def splitWavFileAndStore(filename,
                         minsillen=50,
                         silthresh=-60):  # minsillen= 100, silthresh = -60

    line = AudioSegment.from_wav(filename)

    audio_chunks = split_on_silence(
        line, min_silence_len=minsillen,
        silence_thresh=silthresh)  # isolation of words is done here

    rejectedOffset = 0

    for i, chunk in enumerate(audio_chunks):  # audio_chunks is a python list

        if (checkChunk(chunk, i, minimumWordSize, maximumWordSize)):  #
            rejectedOffset = rejectedOffset + 1
            continue

        out_file = DEFAULT_CHUNKNAME.format(i - rejectedOffset + fileOffset)
        print("size of chunk{}: {} ".format(i - rejectedOffset + fileOffset,
                                            len(chunk)))
        print("exporting", out_file)
        chunk.export(out_file, format="wav")
        print("done exporting...")
        temp = i

    print("Total number of files:", temp + 1)

    return temp + 1
Exemplo n.º 27
0
            def get_large_audio_transcription(path):
                sound = AudioSegment.from_wav(path)
                chunks = split_on_silence(
                    sound,
                    min_silence_len=500,
                    silence_thresh=sound.dBFS - 14,
                    keep_silence=500,
                )
                folder_name = "audio-chunks"
                if not os.path.isdir(folder_name):
                    os.mkdir(folder_name)
                whole_text = ""
                for i, audio_chunk in enumerate(chunks, start=1):
                    chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
                    audio_chunk.export(chunk_filename, format="wav")
                    with sr.AudioFile(chunk_filename) as source:
                        audio_listened = r.record(source)

                        try:
                            text = r.recognize_google(audio_listened,
                                                      language="en-US")
                        except sr.UnknownValueError as e:
                            pass
                            #print("Error:",str(e))
                        else:
                            text = f"{text.capitalize()}. "
                            #print(chunk_filename,":",text)
                            whole_text += text

                return whole_text
Exemplo n.º 28
0
def split(filepath, save_path, time_length):
    sound = AudioSegment.from_wav(filepath)
    dBFS = sound.dBFS
    chunks = split_on_silence(
        sound,
        min_silence_len=500,
        silence_thresh=dBFS - 16,
        keep_silence=250  # optional
    )
    # setting minimum length of each chunk to x seconds
    target_length = time_length * 1000
    output_chunks = [chunks[0]]
    for chunk in chunks[1:]:
        if len(output_chunks[-1]) < target_length:
            output_chunks[-1] += chunk
        else:
            # if the last output chunk is longer than the target length,
            # we can start a new one
            output_chunks.append(chunk)

    # Attention!
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.mkdir(save_path)

    for i, chunk in enumerate(output_chunks):
        chunk.export(os.path.join(save_path, "{0}.wav".format(i)),
                     format="wav")
    return len(output_chunks)
Exemplo n.º 29
0
def split_silence_hm(audio_dir, split_silence_dir, sum_dir):
    '''
    Args : 
        audio_dir : 여러 오디오('wav')가 있는 파일경로
        split_silence_dir : 묵음 부분 마다 자른 오디오 파일을 저장할 파일 경로
        sum_dir : 묵음 부분 마다 자른 오디오 파일을 합쳐서 저장할 파일경로
    '''

    # audio_dir에 있는 모든 파일을 가져온다.
    audio_dir = librosa.util.find_files(audio_dir, ext=['wav'])

    # 폴더 생성하기
    def createFolder(directory):
        try:
            if not os.path.exists(directory):
                os.makedirs(directory)
        except OSError:
            print('Error: Creating directory. ' + directory)

    # audio_dir에 있는 파일을 하나 씩 불러온다.
    for path in audio_dir:
        print("묵음을 없앨 파일 ", path)

        # 오디오 불러오기
        sound_file = AudioSegment.from_wav(path)

        # 파일 이름만 가져오기
        _, w_id = os.path.split(path)
        w_id = w_id[:-4]

        # 가장 최소의 dbfs가 무엇인지
        # dbfs : 아날로그 db과는 다른 디지털에서의 db 단위, 0일 때가 최고 높은 레벨
        dbfs = sound_file.dBFS

        # silence 부분 마다 자른다.
        audio_chunks = split_on_silence(
            sound_file,
            min_silence_len=200,
            silence_thresh=dbfs - 16,
            # keep_silence= 100
            keep_silence=0)

        # 파일 명으로 새로운 폴더를 생성한다.
        createFolder(split_silence_dir + w_id)

        # silence 부분 마다 자른 거 wav로 저장
        for i, chunk in enumerate(audio_chunks):
            out_file = split_silence_dir + w_id + "\\" + w_id + f"_{i}.wav"
            # print ("exporting", out_file)
            chunk.export(out_file, format="wav")

        # 묵음을 기준으로 자른 오디오 파일을 하나의 파일로 합친다.
        path_wav = split_silence_dir + w_id + "\\"
        print("묵음으로 잘린 파일이 저장된 곳", path_wav)
        path_out = sum_dir + w_id + '_silence_total.wav'
        print("오디오 합친 파일 경로 ", path_out)
        voice_sum(form='wav',
                  audio_dir=path_wav,
                  save_dir=None,
                  out_dir=path_out)
Exemplo n.º 30
0
def split_words(folder):
    file_path = './input/{0}/{0}'.format(folder)
    sound = AudioSegment.from_file(file_path + '.mp3', format="mp3")
    chunks = split_on_silence(sound, min_silence_len=1000, silence_thresh=-50)

    loc = (file_path + ".xls")
    wb = xlrd.open_workbook(loc)
    sheet = wb.sheet_by_index(0)

    print("音频中共划分出{0}个单音频".format(len(chunks)))
    print("excel中共{0}个单词".format(sheet.nrows - 1))

    # 检查单词是否重复
    word_exist = {}
    for i in range(1, sheet.nrows):
        word = sheet.cell_value(i, 2)
        if word_exist.get(word):
            print("{0}重复了".format(word))
        else:
            word_exist.setdefault(word, True)

    # 开始输出
    count = 0
    for i in range(1, sheet.nrows):
        word = sheet.cell_value(sheet.nrows - i, 2).strip()
        path = "./output/{0}/{1}.mp3".format(folder, word)
        if os.path.exists(path):
            print("{0}已经存在".format(word))
        else:
            chunk = addSilence(increaseDB(chunks[-i]))
            chunk.export(path, format="mp3")
            # print(chunk.max)
            count = count + 1

    print("此次共生成{0}个单词音频".format(count))
Exemplo n.º 31
0
def split_silence_hm(audio_dir, split_silence_dir, sum_dir):
    audio_dir = librosa.util.find_files(audio_dir,
                                        ext=['wav'
                                             ])  # audio_dir에 있는 모든 파일을 가져온다
    for path in audio_dir:  # audio_dir에 있는 파일을 하나 씩 불러온다.
        sound_file = AudioSegment.from_wav(path)
        _, w_id = os.path.split(path)
        w_id = w_id[:-4]

        dbfs = sound_file.dBFS
        audio_chunks = split_on_silence(
            sound_file,  # silence 부분 마다 자른다.
            silence_thresh=dbfs - 16,  # silence_thresh : 몇 db 이하를 침묵이라고 할 것인지
            min_silence_len=200,  # min_silence_len : 몇 초 이상 침묵할 때 자를 것인지
            keep_silence=0  # keep_silence : 앞, 뒤로 몇 초 여유를 줄 것인지
        )

        for i, chunk in enumerate(audio_chunks):  # silence 부분 마다 자른 거 wav로 저장
            out_file = split_silence_dir + w_id + "\\" + w_id + f"_{i}.wav"
            chunk.export(out_file, format="wav")
        path_wav = split_silence_dir + w_id + "\\"  # 묵음을 기준으로 자른 오디오 파일을 하나의 파일로 합친다. # 묵음으로 잘린 파일이 저장된 곳
        path_out = sum_dir + w_id + '_silence_total.wav'  # 오디오 합친 파일 경로
        voice_sum(form='wav',
                  audio_dir=path_wav,
                  save_dir=None,
                  out_dir=path_out)
Exemplo n.º 32
0
def multi_split_on_silence(wav_file, output_dir, min_silence_len, silence_thresh, pass_first_time=0, seek_step=110, keep_silence=250):
    speech = AudioSegment.from_wav(wav_file)
    file_name = os.path.basename(wav_file)
    # print(file_name)
    chunks = split_on_silence(speech, min_silence_len=min_silence_len, silence_thresh=silence_thresh,
                seek_step=seek_step, keep_silence=keep_silence)
    curr_pass_time = 0.
    for i in range(0, len(chunks)-3):
        if curr_pass_time < pass_first_time:
            curr_pass_time += len(chunks[i])/1000
            continue
        # print(len(chunks[i]), os.path.join(output_dir, file_name[:-4]+'_'+str(i+1).zfill(4)+'.wav'))
        chunks[i].export(os.path.join(output_dir, file_name[:-4]+'_'+str(i+1).zfill(4)+'.wav'), format='wav', parameters=["-ar", "16000", "-ac", "1"])
Exemplo n.º 33
0
	def cutbySilence(self, min_silence_len=1000, r=1):
		#using dBFS to normalize the silence across files
		silence_thresh = self.audio.dBFS - 5/r
		audio_splits = silence.split_on_silence(self.audio,
		 	min_silence_len=min_silence_len, 
		 	keep_silence=150, 
		 	silence_thresh=-16)
		#cuts that are still too long, maybe an area of higher overall dBFS
		long_splits = [split for split in audio_splits if math.floor(split.duration_seconds)>20]
		if r != 2:
			for split in long_splits:
				audio_splits.remove(split)
				#cut recursively
				new_splits = self.cutbySilence(split, r=r+1)
				for ns in new_splits:
					audio_splits.append(ns)
		#clean the cuts of anything too short
		audio_splits = [split for split in audio_splits if math.floor(split.duration_seconds)>.5]
		return audio_splits
Exemplo n.º 34
0
def split_song(f_name, length_val, threshold_val, overwrite=True, export_all=False):
	f_prefix = f_name[0:-4]
	sound = AudioSegment.from_mp3(f_name)
	print '%s was loaded.' % f_name
	print 'Splitting by silence STARTED.'
	chunks = split_on_silence(sound, min_silence_len=length_val, silence_thresh=-threshold_val)
	print 'Splitting by silence FINISHED.'
	if not len(chunks)==1:
		if export_all:
			for i, chunk in enumerate(chunks):
				chunk.export('%s_part_%i.mp3' % (f_prefix, i), format="mp3")
			print 'Segments were saved to %i mp3 files.' % len(chunks)
		else:
			if overwrite:
				chunks[0].export(f_name, format="mp3")
			else:
				chunks[0].export('%s_part_0.mp3' % f_prefix, format="mp3")
		print 'Splitting %s DONE' % f_name
	else:
		print 'No splitting was needed for %s!' % f_name
Exemplo n.º 35
0
def main():
    voice = "Kate"
    description = "fern_hill"
    silence_length = 145
    words_per_minute = 140 #maybe this should be slower
    #TODO: maybe make it so the ambient is same bpm as wpm
    print "Doing text-to-speech synthesis."
    generate_audio(description, voice, words_per_minute)

    print "Getting nouns"
    with open("descriptions/%s.txt" % description, "r") as f:
        text = f.read()
        text = text.decode('utf-8')
    tokenizer = nltk.RegexpTokenizer(r'\w+')
    tokenized_text = tokenizer.tokenize(text)
    nouns = get_nouns(tokenized_text)

    print "Getting sfx for nouns"
    sfx = find_sfx(nouns)


    try:
        reading = AudioSegment.from_file("/Users/restd/PycharmProjects/dreamScene/readings/%s.wav" % description, format="wav") + 19
    except IOError:
        reading1 = AudioSegment.from_file("/Users/restd/PycharmProjects/dreamScene/readings/%s.mp3" % description,
                                          format="mp3")
        process_reading("/Users/restd/PycharmProjects/dreamScene/readings/%s.aiff" % description, len(reading1))
        sys.exit(0)


    #TODO: make this less of a guessing game
    word_chunks = split_on_silence(reading, min_silence_len=silence_length, silence_thresh=-16)

    print "Adding sfx to reading"
    scene = add_sfx_to_reading(word_chunks, tokenized_text, nouns, sfx, silence_length, reading)

    print "Getting and adding related (rough) ambient music."
    ambient = get_ambient(len(scene), nouns, description)#len(scene), nouns)

    scene = scene.overlay(ambient - 16, loop=True)
    scene.export("scenes/%s.mp3" % description, format="mp3")
Exemplo n.º 36
0
def split(input_path, output_directory=None):
  print "Start cutting"
  sound = AudioSegment.from_wav(input_path)

  chunks = split_on_silence(sound, 
    # must be silent for at least half a second
    min_silence_len = 500,
    # consider it silent if quieter than -50 dBFS
    silence_thresh = -33
  )

  # export the chunk
  # i for the output file count
  i = 0
  slash_index = input_path.rfind("/") + 1
  filename = input_path[slash_index: -4]
  for i, chunk in enumerate(chunks):
    chunk.export("{dir}{name}_{count}.wav".format(
      dir=output_directory, 
      name=filename, 
      count=i), format="wav")

  print 'There are splited into {number} files'.format(number=i + 1)
  return i + 1
        # 404 for LiveATC is 168 bytes   
        if os.path.getsize(mp3) == 168:
            print "Not available yet"
            os.remove(mp3)

            print "Waiting 10 minutes to ask again"
            time.sleep(600)
            continue
        else:
            file_name = next_filename

            print "Creating audio segment from " + file_name
            podcast = AudioSegment.from_mp3(file_name)

            print "Chunking based on silence"
            chunks = split_on_silence(podcast, min_silence_len=500, silence_thresh=-50)

            output_directory = file_name[0:-4]

            os.mkdir(output_directory)

            print "Exporting chunks"
            for i, chunk in enumerate(chunks):
                chunk.export(output_directory + "/chunk{0}.mp3".format(i), format="mp3")

            print "Removing " + files_to_get.pop(0) + " from list of files to get"

            time.sleep(600)

Exemplo n.º 38
0
from pydub import AudioSegment
from pydub.silence import split_on_silence
import random
import sys,os

name = '01.mp3'
path = '/Users/syslot/Desktop'
file_name = os.path.join(path,name)
sound = AudioSegment.from_mp3(file_name)

chunks = split_on_silence(sound,min_silence_len=700,silence_thresh=-70)#silence time:700ms and silence_dBFS<-70dBFS

words = chunks[2:] #first and second are not words.

len1 = len(words)

new = AudioSegment.empty()
silence = AudioSegment.silent(duration=1000)#1000ms silence


order = range(len1)
random.shuffle(order)
print(order)
comments = ""

for i in order:
    new += words[i]+silence
    comments += str(i)+","

save_name = file_name.split(".")[0]+"-random{0}.".format(random.randrange(0,9))+file_name.split(".")[1]
new.export(save_name, format="mp3",tags={'artist': 'AppLeU0', 'album': file_name, 'comments': comments[:-1]})
Exemplo n.º 39
0
from pydub import AudioSegment
from pydub.silence import split_on_silence
import sys

params = {
	'file' : sys.argv[1],
	'min_silence_len': sys.argv[2],
	'silence_thresh': sys.argv[3]
}

sound = AudioSegment.from_wav(params['file'])
chunks = split_on_silence(sound, 
    # must be silent for at least half a second
    min_silence_len=params['min_silence_len'],

    # consider it silent if quieter than -16 dBFS
    silence_thresh=['silence_thresh']
)

for i, chunk in enumerate(chunks):
    chunk.export("./cutups/chunk{0}.wav".format(i), format="wav")
Exemplo n.º 40
0
from dadasql.model import Line, Fundamental, DBFS, Duration
from sqlalchemy.orm.exc import NoResultFound
import random, math
from pydub import AudioSegment, silence
path = '/root/dada-dial/sounds/'
filename = 'user.wav'
#create pydub audio file
user_audio = AudioSegment.from_wav(path+filename)

#this is a hacky way to get rests that mimic the users
user_rests = silence.detect_silence(user_audio)
user_rests_len = [s[1]-s[0] for s in user_rests if (user_audio.duration_seconds*1000 - s[1])>3] 
user_rest_segments = [AudioSegment.silent(duration=rest_len) for rest_len in user_rests_len]
print [r.duration_seconds for r in user_rest_segments]

user_splits = silence.split_on_silence(user_audio)
split_durations = [math.ceil(s.duration_seconds) for s in user_splits]
split_dbfs = [int(s.dBFS) for s in user_splits]
split_fundamentals = []
for s in user_splits:
	s.export(path + 'temp.wav', format='wav')
	s_fft = dadaFFT(path+'temp.wav')
	fundamental, power = s_fft.get_fundamental()
	split_fundamentals.append(int(fundamental))
#got all the user input information, now we need to find lines that match
#match on duration
duration_results = []
for d in split_durations:
	try:
		duration_results.append([d[0] for d in db_session.query(Line.id).join(Duration.lines).filter(Duration.duration==d).all()])
	except NoResultFound:
EXPORT_PATH = '/home/gswewf/data/五十音图'
time_start = "00:16"
time_end = "01:35"

song = AudioSegment.from_mp3(file)
start = (int(time_start.split(':')[0])*60 + int(time_start.split(':')[1]))*1000
end = (int(time_end.split(':')[0])*60 + int(time_end.split(':')[1]))*1000
# print(start, end)
# 剪切时间:是按ms 毫秒来的,所以时间格式的转换就要到毫秒级的。
word = song[start:end]

# 这里silence_thresh是认定小于-42dBFS以下的为silence,然后需要保持小于-42dBFS超过 700毫秒。这样子分割成一段一段的。
# 最关键的就是这两个值的确定,这里需要我们用到foobar的一个功能:视图——可视化———音量计
# 可以观察一段音频的dBFS大小,正常的音量差不多都是在-25dBFS到-10dBFS。这个单位是从-96dBFS到0dBFS的,越靠近0,音量越大。
# 我们这里取-42dBFS以下的,认为是静音。然后可以用foobar估算每个单词中间的间隙时间,大概是在900ms也就是0.9s。我们还是取小一些 0.7s分割。
words = split_on_silence(word, min_silence_len=700, silence_thresh=-42)

# 再来就是生成一个乱序的序列,然后把单词对应进去,然后中间插入空白静音1s。
silent = AudioSegment.silent(duration=1000)

print("共分割出{}个音".format(len(words)))
wushiyintu = ['あ', 'い', 'う', 'え', 'お',
              'か', 'き', 'く', 'け', 'こ',
              'さ', 'し', 'す', 'せ', 'そ',
              'た', 'ち', 'つ', 'て', 'と',
              'な', 'に', 'ぬ', 'ね', 'の',
              'は', 'ひ', 'ふ', 'へ', 'ほ',
              'ま', 'み', 'む', 'め', 'も',
              'や', 'ゆ', 'よ',
              'ら', 'り', 'る', 'れ', 'ろ',
              'わ', 'を', 'ん']