Exemplo n.º 1
0
def determineSilences(file):
    trackA = AudioSegment.from_file(file["trackA"])
    speakingA = silence.detect_nonsilent(trackA, min_silence_len=500, silence_thresh=-30)
    trackB = AudioSegment.from_file(file["trackB"])
    speakingB = silence.detect_nonsilent(trackB, min_silence_len=500, silence_thresh=-30)
    aClass = [[set[0],set[1], hasConflicts(set, speakingB)] for set in speakingA]
    bClass = [[set[0], set[1], hasConflicts(set, speakingA)] for set in speakingB]
    return aClass, bClass
Exemplo n.º 2
0
    def compareSpeechPeriods(self, source_in, source_out):
        segment_in = AudioSegment(data=source_in,
                                  sample_width=2,
                                  frame_rate=8000,
                                  channels=1)
        segment_out = AudioSegment(data=source_out,
                                   sample_width=2,
                                   frame_rate=8000,
                                   channels=1)

        speech_period_in = silence.detect_nonsilent(segment_in,
                                                    min_silence_len=1000,
                                                    silence_thresh=-32)
        speech_period_out = silence.detect_nonsilent(segment_out,
                                                     min_silence_len=1000,
                                                     silence_thresh=-32)

        #print('period in: ', speech_period_in)
        #print('period out: ', speech_period_out)

        if len(speech_period_out) == 0 and len(speech_period_in) == 0:
            return 0
        elif len(speech_period_out) == 0 and len(speech_period_in) > 0:
            voiceEdgePoints = [
                speech_period_in[0][0],
                speech_period_in[len(speech_period_in) - 1][1]
            ]
            return (1, voiceEdgePoints)
        elif len(speech_period_in) == 0 and len(speech_period_out) > 0:
            voiceEdgePoints = [
                speech_period_out[0][0],
                speech_period_out[len(speech_period_out) - 1][1]
            ]
            return (2, voiceEdgePoints)
        elif speech_period_in[0][0] <= speech_period_out[0][0]:
            if speech_period_out[len(speech_period_out) -
                                 1][1] >= speech_period_in[
                                     len(speech_period_in) - 1][1]:
                voiceEdgePoint = speech_period_out[len(speech_period_out) -
                                                   1][1]
            else:
                voiceEdgePoint = speech_period_in[len(speech_period_in) - 1][1]
            voiceEdgePoints = [speech_period_in[0][0], voiceEdgePoint]
            return (3, voiceEdgePoints)
        elif speech_period_in[0][0] > speech_period_out[0][0]:
            if speech_period_out[len(speech_period_out) -
                                 1][1] >= speech_period_in[
                                     len(speech_period_in) - 1][1]:
                voiceEdgePoint = speech_period_out[len(speech_period_out) -
                                                   1][1]
            else:
                voiceEdgePoint = speech_period_in[len(speech_period_in) - 1][1]
            voiceEdgePoints = [speech_period_out[0][0], voiceEdgePoint]
            return (4, voiceEdgePoints)
Exemplo n.º 3
0
def trim_silence(wav):
    start_end = detect_nonsilent(wav, 250, -40, 1)
    start_end = [se for se in start_end if se[1] - se[0] > 50]
    while len(start_end) == 0 or wav.dBFS > 0:  #if can't detect nonsilent
        wav = wav + 5
        start_end = detect_nonsilent(wav, 250, -40, 1)
        start_end = [se for se in start_end if se[1] - se[0] > 50]
    start = min(start_end)[0]
    end = max(start_end)[1]
    wav = wav[start - 50:end + 50]
    wav = np.array(wav.get_array_of_samples(), dtype='float32') / 2**15
    return wav
Exemplo n.º 4
0
def goAmadeus(file,targetFolder, silenceModifier):
    audio_segment = AudioSegment.from_wav(file)
    normalized_sound = match_target_amplitude(audio_segment, -20.0)
    nonsilent_data = detect_nonsilent(normalized_sound, min_silence_len=50, silence_thresh=-45, seek_step=1)

    # print("start,Stop")
    # for chunks in nonsilent_data:
    #     print([chunk / 1000 for chunk in chunks])

    activeHolder = 0
    where = 1
    howManyLoops = 0
    for i in range(len(nonsilent_data)):
        if(where-1 < 0):
            activeHolder += 0
            where += 0
        if where >= 1:
            current_silence = nonsilent_data[howManyLoops][0] - nonsilent_data[howManyLoops-1][1]
            if(current_silence >= 2000):
                activeHolder += 0
            elif current_silence <= 50 and howManyLoops < 3:
                activeHolder += 5
            elif current_silence > 50:
                activeHolder += current_silence
                where += 1
        if(where == 0):
            where += 1
        howManyLoops += 1

    allSilence = activeHolder
    # print("All silence: " + str(allSilence) + " ms")

    bestSingleSilence = allSilence / where

    add = bestSingleSilence * silenceModifier/100
    bestSingleSilence = bestSingleSilence+add

    # print("Best Silence: " + str(round(bestSingleSilence)) + " ms")

    # print("Started Chunking..")

    cutOnBestSilence(round(bestSingleSilence), file, targetFolder)

    best_nonsilent_data = detect_nonsilent(normalized_sound, min_silence_len=round(bestSingleSilence), silence_thresh=-45, seek_step=1)

    return best_nonsilent_data

# goAmadeus("test.wav",1)
Exemplo n.º 5
0
    def run(self):
        try:
            if os.path.exists(self.wav_name) is not True:
                AudioSegment.from_file(self.video_name).export(self.wav_name,
                                                               format='mp3',
                                                               bitrate="64k")
            sound = AudioSegment.from_mp3(self.wav_name)
            chunks = detect_nonsilent(sound,
                                      min_silence_len=1000,
                                      silence_thresh=-45)

            # now recombine the chunks so that the parts are at least 60 sec long
            target_length = 60 * 1000
            output_chunks = [chunks[0]]
            for chunk in chunks[1:]:
                if output_chunks[-1][1] - output_chunks[-1][0] < target_length:
                    output_chunks[-1][1] = chunk[1]
                else:
                    # if the last output chunk is longer than the target length,
                    # we can start a new one
                    output_chunks.append(chunk)

            config = {
                'duration': sound.duration_seconds,
                'total': len(output_chunks),
                'chunks': output_chunks
            }
            self.config_sig.emit(config)
        except Exception as e:
            print(e)
            self.config_sig.emit({})
Exemplo n.º 6
0
 def get_first_nonsilent(self, sound, silence_threshold=-28):
     non_silences = silence.detect_nonsilent(
         sound,
         min_silence_len=self.fade_length,
         silence_thresh=silence_threshold,
         seek_step=1)
     return non_silences[0]
Exemplo n.º 7
0
def cut_speech(dirs):
    """
    This function is used to cut speech samples into non-silent pieces for further usage in MFCC extraction
    :return: None
    """

    logging.basicConfig(filename="logfile.log", level=logging.DEBUG)

    for d in dirs:
        for speaker in os.listdir(d):
            wav_names = glob('{}/{}/*.wav'.format(d, speaker))

            for name in wav_names:
                print "processing file {}...".format(name)
                sound = AudioSegment.from_file(name, format="wav")

                speech = silence.detect_nonsilent(sound, silence_thresh=-50)
                i = 1
                for frag in speech:
                    part = sound[frag[0]:frag[1]]
                    part.export('{}/{}/part_{}.wav'.format(d, speaker, i),
                                format="wav")
                    i += 1

    logging.info("Finished cutting audio samples")
Exemplo n.º 8
0
def splitAudioBySilence(audio_path,
                        skip_idx=0,
                        out_ext="wav",
                        silence_thresh=-40,
                        silence_chunk_len=100,
                        keep_silence=100):
    audio = read_audio(audio_path)

    not_silence_ranges = silence.detect_nonsilent(
        audio,
        min_silence_len=silence_chunk_len,
        silence_thresh=silence_thresh)

    edges = concatenate_edges(not_silence_ranges)
    intervals = get_rid_of_short_intervals(edges)

    for idx, (start_idx, end_idx) in enumerate(intervals[skip_idx:]):
        start_idx = max(0, start_idx - keep_silence)
        end_idx += keep_silence

        segment = audio[start_idx:end_idx]
        segment.export("./chunks/chunk{0}.mp3".format(idx), out_ext)
        segment.set_channels(1)
        segment.export("./chunks/chunk{0}.wav".format(idx), format="wav")
        os.remove("./chunks/chunk{0}.mp3".format(idx))


# emptyFolder("./chunks/")
# splitAudioBySilence("audio.mp3")
Exemplo n.º 9
0
    def split_audio(self,
                    audio_segment,
                    min_silence_len=500,
                    silence_thresh=-30,
                    keep_silence=100,
                    seek_step=1):

        not_silence_ranges = detect_nonsilent(audio_segment, min_silence_len,
                                              silence_thresh, seek_step)

        def pairwise(iterable):
            "s -> (s0,s1), (s1,s2), (s2, s3), ..."
            a, b = itertools.tee(iterable)
            next(b, None)
            return zip(a, b)

        chunks = []
        audio_starts = []
        audio_ends = []

        # 根据silence片段切分视频
        for (start1, end1), (start2, end2) in pairwise(not_silence_ranges):
            chunks.append(audio_segment[end1:start2])
            audio_starts.append(end1)
            audio_ends.append(start2)

        return chunks, audio_starts, audio_ends
Exemplo n.º 10
0
def get_segments(args, audio_file, segments_file):
    if os.path.exists(segments_file):
        with open(segments_file) as json_file:
            json_data = json.load(json_file)
            return json_data["sound_ranges"][0]["sounds"]
    else:
        long_nonsilence = detect_nonsilent(audio_file,
                                           min_silence_len=args.long_silence,
                                           silence_thresh=args.silence_thresh)

        silence = detect_silence(audio_file,
                                 min_silence_len=args.short_silence,
                                 silence_thresh=args.silence_thresh)

        gaps_silence = list(
            map(
                lambda x:
                [x[0] + args.short_silence / 2, x[1] - args.short_silence / 2],
                detect_silence(audio_file,
                               min_silence_len=2 * args.short_silence,
                               silence_thresh=args.silence_thresh + 20)))

        nonsilence1 = split_long_nonsilence(long_nonsilence, silence,
                                            args.min * 1000, args.max * 1000)

        segments = split_long_nonsilence(nonsilence1, gaps_silence,
                                         args.min * 1000, args.max * 1000)
        return segments
Exemplo n.º 11
0
def get_non_silent_ranges(filepath, audio_length, silence_length,
                          silence_thresh):
    """
    Given a filepath to a .wav file and a target audio length, return all the
    non-silent ranges from the audio sample

    :param filepath:        filepath to the .wav audio file
    :param audio_length:    length in seconds of audio to process
    :param silence_length:  minimum length of a silence to be used for
                                a split
    :param silence_thresh: (in dBFS) anything quieter than this will be
                                considered silence
    :return: 2D array:      array of shape [num_ranges, 2]
    """
    # Load data into AudioSegment object and extract audio_length seconds
    audio_file = AudioSegment.from_wav(filepath)
    audio_file = audio_file[:audio_length * MS]
    # Use given parameters to return non_silent ranges
    ranges = []
    for i in range(10):
        ranges = detect_nonsilent(audio_file,
                                  min_silence_len=int(silence_length),
                                  silence_thresh=silence_thresh)
        if len(ranges) > 0:
            break
        else:
            silence_length /= 2
    return ranges
def cut_by_silence(precut_audio_path, output_folder, filebasename):
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    #use pydub AudioSegment &silence module to detect silence, cut and save,
    #return last chunck's time stamp in millionsecond
    cut_num = 0
    audio_segment = AudioSegment.from_wav(precut_audio_path)
    silence_thresh_tries = range(-40, -5)
    for silence_thresh in silence_thresh_tries:
        chuncks = detect_nonsilent(audio_segment,
                                   min_silence_len=500,
                                   silence_thresh=silence_thresh)
        logging.debug("try {}".format(silence_thresh))
        if len(chuncks) >= 2:
            for chunck in chuncks:
                out_audio_file = os.path.join(
                    output_folder, filebasename + "_" +
                    str(TimestampMillisec64()) + "_" + str(cut_num) + ".wav")
                audio_segment[chunck[0]:chunck[1]].export(out_audio_file,
                                                          format='wav')
                cut_num = cut_num + 1
            break
    if silence_thresh == -5 and len(chuncks) < 2:
        out_audio_file = os.path.join(
            output_folder, filebasename + "_" + str(TimestampMillisec64()) +
            "_" + str(cut_num) + ".wav")
        audio_segment[chuncks[0][0]:chuncks[0][1]].export(out_audio_file,
                                                          format='wav')
    return 60, cut_num
Exemplo n.º 13
0
def preprocess_wav_files(my_dir):
    vowelsDict = defaultdict(list)
    for filename in os.listdir(my_dir):
        vowel = filename.split("_")[0][-1]
        # File read
        signal = AudioSegment.from_wav(my_dir + '/' + filename)
        # Remove silence - beginning and end
        non_sil_times = detect_nonsilent(signal,
                                         min_silence_len=50,
                                         silence_thresh=signal.dBFS * 1.5)
        if len(non_sil_times):
            signal = signal[non_sil_times[0][0]:non_sil_times[0][1]]
        # Downsampling to 16KHz
        signal = signal.set_frame_rate(RATE)
        # Wav segmentation
        segmented_signal = wav_segmentation(signal)
        segmented_signal = [
            chunk.get_array_of_samples() for chunk in segmented_signal
        ]
        if '_' in filename:
            vowelsDict[vowel].extend(segmented_signal)
            # librosa_data[vowel].append(librosa_features(my_dir + '/' + filename))
        else:
            vowelsDict['iau'].extend(segmented_signal)
    return vowelsDict
Exemplo n.º 14
0
 def splitSound(self):
     self.__soundChunks = []
     gaps = silence.detect_nonsilent(self.__audio,
                                     silence_thresh=self.__threshold)
     for start, final in gaps:
         self.__soundChunks.append(self.__audio[start:final])
     return self
def trim_on_silence(audio_path,
                    skip_idx=0,
                    out_ext="wav",
                    silence_thresh=-40,
                    min_silence_len=400,
                    silence_chunk_len=100,
                    keep_silence=200):

    audio = read_audio(audio_path)
    not_silence_ranges = silence.detect_nonsilent(
        audio,
        min_silence_len=silence_chunk_len,
        silence_thresh=silence_thresh)

    if not not_silence_ranges:
        print(audio_path)
        return []

    start_idx = not_silence_ranges[0][0]
    end_idx = not_silence_ranges[-1][1]

    start_idx = max(0, start_idx - keep_silence)
    end_idx = min(len(audio), end_idx + keep_silence)

    trimmed = audio[start_idx:end_idx]
    trimmed.export(audio_path, out_ext)
    return []
Exemplo n.º 16
0
def split_on_silence_with_pydub(
        audio_path, skip_idx=0, out_ext="wav",
        silence_thresh=-40, silence_chunk_len=100, keep_silence=100):

    filename = os.path.basename(audio_path).split('.', 1)[0]
    audio = read_audio(audio_path)

    not_silence_ranges = silence.detect_nonsilent(
        audio, min_silence_len=silence_chunk_len,
        silence_thresh=silence_thresh)

    edges = concatenate_edges(not_silence_ranges)
    intervals = get_rid_of_short_intervals(edges)

    # Save audio files
    audio_paths = []

    for idx, (start_idx, end_idx) in enumerate(intervals[skip_idx:]):
        start_idx = max(0, start_idx - keep_silence)
        end_idx += keep_silence

        target_audio_path = "{}/pre_audio/{}.{:04d}.{}".format(os.path.dirname(audio_path), filename, idx, out_ext)
        segment = audio[start_idx:end_idx]
        segment.export(target_audio_path, out_ext)
        audio_paths.append(target_audio_path)

    return audio_paths, intervals
Exemplo n.º 17
0
    def extractor(self):
        seg = AudioSegment.from_file(self.song)

        # reduce loudness of sounds over 120Hz (focus on bass drum, etc)
        seg = seg.low_pass_filter(120.0)

        # we'll call a beat: anything above average loudness
        beat_loudness = seg.dBFS

        # the fastest tempo we'll allow is 240 bpm (60000ms / 240beats)
        minimum_silence = int(60000 / 240.0)

        nonsilent_times = detect_nonsilent(seg, minimum_silence, beat_loudness)

        spaces_between_beats = []
        last_t = nonsilent_times[0][0]

        for peak_start, _ in nonsilent_times[1:]:
            spaces_between_beats.append(peak_start - last_t)
            last_t = peak_start

        # We'll base our guess on the median space between beats
        spaces_between_beats = sorted(spaces_between_beats)
        space = spaces_between_beats[int(len(spaces_between_beats) / 2)]

        bpm = 60000 / space

        return bpm
Exemplo n.º 18
0
    def _split_on_silence_ranges(self,
                                 min_silence_len=1000,
                                 silence_thresh=-16,
                                 keep_silence=100,
                                 seek_step=1):
        # from the itertools documentation
        def pairwise(iterable):
            "s -> (s0,s1), (s1,s2), (s2, s3), ..."
            a, b = itertools.tee(iterable)
            next(b, None)
            return zip(a, b)

        if isinstance(keep_silence, bool):
            keep_silence = len(self.audio) if keep_silence else 0

        output_ranges = [[
            start - keep_silence, end + keep_silence
        ] for (start, end) in detect_nonsilent(self.audio, min_silence_len,
                                               silence_thresh, seek_step)]

        for range_i, range_ii in pairwise(output_ranges):
            last_end = range_i[1]
            next_start = range_ii[0]
            if next_start < last_end:
                range_i[1] = (last_end + next_start) // 2
                range_ii[0] = range_i[1]

        return [(max(start, 0), min(end, len(self.audio)))
                for start, end in output_ranges]
Exemplo n.º 19
0
def split_videos(filename, output):
    global silence
    # Split file on silences
    video = AudioSegment.from_file(filename, "mp4")
    silences = silence.detect_nonsilent(video,
                                        silence_thresh=-40,
                                        min_silence_len=400)

    print("Done splitting audio, start splitting clips")

    print(filename)
    clip = VideoFileClip(filename)
    for i, silence in enumerate(silences):
        start = silence[0] / 1000
        end = silence[1] / 1000
        length = end - start
        long_vid = False
        while length > 10:
            long_vid = True
            temp = start + 5
            subpart = clip.subclip(start, temp)
            subpart.write_videofile("{}/000s{}.mp4".format(output, i))
            i += 1
            start += 5
            length -= 5
        if long_vid or length < 1:
            continue
        subpart = clip.subclip(start, end)
        subpart.write_videofile("{}/000{}.mp4".format(output, i))
Exemplo n.º 20
0
def splitAudioFile(filename_mp3, in_dir, min_silence_len=400, silence_thresh=-65):

    sound = AudioSegment.from_mp3(filename_mp3)
    nonsilence_range = detect_nonsilent(sound, min_silence_len, silence_thresh)
    chunks_range = []

    for i, chunk in enumerate(nonsilence_range):
        if i==0:
            print(chunk, len(nonsilence_range))
            start = chunk[0]
            end = (chunk[1] + nonsilence_range[i+1][0])/2
            sound[:end].export(".\\tmp\\%d\\%d.wav" % (in_dir, i), format="wav", bitrate="16k")
        elif i == len(nonsilence_range)-1:
            start = (nonsilence_range[i-1][1] + chunk[0])/2
            end = chunk[1] + 1000.0
            sound[start:].export(".\\tmp\\%d\\%d.wav" % (in_dir, i), format="wav", bitrate="16k")
        else:
            start = (nonsilence_range[i-1][1] + chunk[0])/2
            end = (chunk[1] + nonsilence_range[i+1][0])/2
            sound[start:end].export(".\\tmp\\%d\\%d.wav" % (in_dir, i), format="wav", bitrate="16k")

        start = round(start/1000, 1)
        end = round(end/1000, 1)
        chunks_range.append((start,end))

    return chunks_range
Exemplo n.º 21
0
def split_on_silence(audio_segment,
                     min_silence_len=1000,
                     silence_thresh=-16,
                     keep_silence=100,
                     seek_step=1):
    """
    audio_segment - original pydub.AudioSegment() object

    min_silence_len - (in ms) minimum length of a silence to be used for
        a split. default: 1000ms

    silence_thresh - (in dBFS) anything quieter than this will be
        considered silence. default: -16dBFS

    keep_silence - (in ms) amount of silence to leave at the beginning
        and end of the chunks. Keeps the sound from sounding like it is
        abruptly cut off. (default: 100ms)
    """

    not_silence_ranges = detect_nonsilent(audio_segment, min_silence_len,
                                          silence_thresh, seek_step)

    chunks = []
    starttime = []
    endtime = []
    for start_i, end_i in not_silence_ranges:
        start_i = max(0, start_i - keep_silence)
        end_i += keep_silence

        chunks.append(audio_segment[start_i:end_i])
        starttime.append(start_i)
        endtime.append(end_i)

    return chunks, starttime, endtime
def get_segmented_samples(my_dir, dictsList, iter_num=5):
    all_pitchs = []
    for dictL in dictsList:
        all_speakers = []
        for speakerNum in dictL.keys():
            segments = []
            for file in dictL[speakerNum]:
                signal = AudioSegment.from_wav(my_dir + '\\' + file)
                # Remove silence - beginning and end
                non_sil_times = detect_nonsilent(signal, min_silence_len=50, silence_thresh=signal.dBFS * 1.5)
                if len(non_sil_times): signal = signal[non_sil_times[0][0]:non_sil_times[0][1]]
                # Downsampling to 16KHz
                signal = signal.set_frame_rate(RATE)
                # Wav segmentation
                segmented_signal = wav_segmentation(signal)
                segmented_signal = [chunk.get_array_of_samples() for chunk in segmented_signal]
                segments.append(segmented_signal)
            iter_segments_per_speaker = []
            for n in range(iter_num):
                tmp = []
                for segList in segments:
                    rand_idx = np.random.randint(len(segList))
                    tmp.append(segList[rand_idx])
                iter_segments_per_speaker.append(tmp)
            all_speakers.append(iter_segments_per_speaker)
        all_pitchs.append(all_speakers)
    return all_pitchs
Exemplo n.º 23
0
def shorter_filler(json_result, audio_file, min_silence_len, start_time, non_silence_start):
  
  # 침묵 길이를 더 짧게
  min_silence_length = (int)(min_silence_len/1.2)

  intervals = detect_nonsilent(audio_file,
                              min_silence_len=min_silence_length,
                              silence_thresh=-32.64
                              )
  
  for interval in intervals:

    interval_audio = audio_file[interval[0]:interval[1]]

    # padding 40 길이 이상인 경우 더 짧게
    if (interval[1]-interval[0] >= 460):
      non_silence_start = shorter_filler(json_result, interval_audio, min_silence_length, interval[0]+start_time, non_silence_start)
    else:# padding 40 길이보다 짧은 경우 predict
      if interval[1]-interval[0] > 10 :
        if predict_filler(interval_audio) == 0 : # 추임새인 경우
          json_result.append({'start':non_silence_start,'end':start_time+interval[0],'tag':'1000'}) # tag: 1000 means non-slience
          non_silence_start = start_time + interval[0]
          
          # 추임새 tagging
          json_result.append({'start':start_time+interval[0],'end':start_time+interval[1],'tag':'1111'}) # tag: 1111 means filler word
        
    
  return non_silence_start
Exemplo n.º 24
0
def volume_trial(file):
    # The threshhold of volume (decibels)
    threshhold = -45
    # The threshhold of total time over volume threshhold (seconds)
    length_seconds = 5

    # Get audio from file and data about audio
    audio = AudioSegment.from_file(file)
    data = mediainfo(file)

    # The number of samples in the audio per second of audio
    samples_per_second = int(len(audio) / float(data["duration"]))

    # The time threshhold in samples
    length_samples = length_seconds * samples_per_second

    # Find all sets of nonsilent samples
    nonsilences = silence.detect_nonsilent(audio, 1, silence_thresh=threshhold)

    # calculate total amount of nonsilences
    total = 0
    for i in nonsilences:
        total += i[1] - i[0]

    print(total / samples_per_second)
    if total >= length_samples:
        return True
    else:
        return False
def calc_bpm(audio: AudioSegment) -> int:
    tmp_audio = effects.low_pass_filter(audio,
                                        120)  # cut off sounds below 120 Hz
    beat_volume = tmp_audio.dBFS
    min_silence = int(60000 / 240.0)  # Allow up to 240 bpm
    nonsilent_ranges = detect_nonsilent(tmp_audio, min_silence, beat_volume)
    spaces_between_beats = []
    last_t = nonsilent_ranges[0][0]

    for peak_start, _ in nonsilent_ranges[1:]:
        spaces_between_beats.append(peak_start - last_t)
        last_t = peak_start

    spaces_between_beats = sorted(spaces_between_beats)
    temp = len(spaces_between_beats) / 2
    temp = int(temp)
    print(temp)
    if temp == 0:
        # This just means that this segment had no audio louder then 120 Hz
        # might as well discard it since we aren't going to get a good bpm
        # measurment from quieter sections of the song
        return 0
    space = spaces_between_beats[temp]
    bpm = 60000 / space
    return bpm
Exemplo n.º 26
0
    def trim_silent(self):
        """ Trims extraneous silence at the ends of the audio """
        a = AudioSegment.empty()
        for seg in silence.detect_nonsilent(self.audio):
            a = a.append(self.audio[seg[0]:seg[1]], crossfade=0)

        self.audio = a
        return self
Exemplo n.º 27
0
 def chunk(self):
     print('In Audio_Object.Chunk')
     ''' Chunk the audio in smaller files and save to chunk_path '''
     
     # open the audio file using pydub
     sound = AudioSegment.from_wav(self.audio_path + 
                                           self.audio_filename +
                                           self.audio_extn)
 
     silence_to_keep = 100
     # split audio sound where silence is 700 miliseconds or more and get chunks
     chunks = split_on_silence(sound,
         # experiment with this value for your target audio file
         min_silence_len = 700,
         # adjust this per requirement
         silence_thresh = sound.dBFS-14,
         # keep the silence for 1 second, adjustable as well
         keep_silence=silence_to_keep,
     )
     
     # create a directory to store the audio chunks
     if not os.path.isdir(self.chunk_path):
         os.mkdir(self.chunk_path)
         
     times = []
     #Print detected non-silent chunks, which in our case would be spoken words.
     nonsilent_data = detect_nonsilent(sound, min_silence_len=700, silence_thresh=sound.dBFS-14, seek_step=1)
     
     #convert ms to seconds
     print("start,Stop")
     for chunks_times in nonsilent_data:
         times.append( [ct/1000 for ct in chunks_times])
         
     self.ts = pd.DataFrame(times, columns=["start_time", "stop_time"])
     
     # print(self.ts.head())
     
     # process each chunk 
     
     dur_sec = []
     text = []
     
     for i, audio_chunk in enumerate(chunks, start=1):
         # export audio chunk and save it in
         # the `folder_name` directory.
         chunk_filename = os.path.join(self.chunk_path, f"chunk{i}.wav")
         audio_chunk.export(chunk_filename, format="wav")
         
         dur_sec.append(audio_chunk.duration_seconds - 
                        ((silence_to_keep*2)/1000))
         text.append(Audio_Object.extract_text(chunk_filename))
     # return the text for all chunks detected
     # return(self.whole_text)
 
     self.ts['duration'] = dur_sec
     self.ts['text'] = text
     self.ts['label'] = self.audio_filename
     self.save_ts_df()
 def detect_nonsilences(self, sound):
     snd = AudioSegment.from_wav(sound)
     dBFS = snd.dBFS
     non_silent = detect_nonsilent(snd,
                                   min_silence_len=1000,
                                   silence_thresh=dBFS - 16)
     #Convert to seconds
     non_silence = [((start / 1000), (stop / 1000))
                    for start, stop in non_silent]
     return non_silence
Exemplo n.º 29
0
    def cutspeech(self, song1):
        not_silence_ranges = detect_nonsilent(song1,
                                              min_silence_len=100,
                                              silence_thresh=-32)
        starti = not_silence_ranges[0][0]
        if len(not_silence_ranges) == 0:
            return song1

        endi = not_silence_ranges[-1][1]
        return song1[starti:endi]
Exemplo n.º 30
0
def detect_nonsilence_audiotime(filename, format):
    sound = AudioSegment.from_file(filename, format=format)
    dbfs = sound.dBFS
    print("正在查找非静音片段,根据视频大小,等待时间会有不同~请耐心等待")
    timestamp_list = detect_nonsilent(sound,
                                      min_silence_len=700,
                                      silence_thresh=dbfs - 16,
                                      seek_step=1)
    print('恭喜你,已经完成' + "共找到" + str(len(timestamp_list)) + "段语音")
    return timestamp_list
Exemplo n.º 31
0
def split_on_silence_with_pydub(
		audio_path, deepspeech, skip_idx=0, out_ext="wav",
		silence_thresh=-40, min_silence_len=400,
		silence_chunk_len=100, keep_silence=100,
		min_segment_length=0): #silence_chunk_len 100->200

	filename = os.path.basename(audio_path).split('.', 1)[0]
	in_ext = audio_path.rsplit(".")[1]

	audio = read_audio(audio_path)
	audio= audio.set_channels(1)
	audio_sample_width=audio.sample_width
	min_chunk_len=int(float(1000)*min_segment_length)

	
	not_silence_ranges = silence.detect_nonsilent(
		audio, min_silence_len=silence_chunk_len,
		silence_thresh=silence_thresh)

	edges = [not_silence_ranges[0]]

	for idx in range(1, len(not_silence_ranges)-1):
		cur_start = not_silence_ranges[idx][0]
		prev_end = edges[-1][1]
		prev_start = edges[-1][0]
		prev_len = prev_end - prev_start

		# if silence is too short or nonsilent is too short
		# merge current nonsilence with prev one 
		if cur_start - prev_end < min_silence_len or (min_chunk_len!=0 and prev_len < min_chunk_len) : 
			edges[-1][1] = not_silence_ranges[idx][1]
		else:
			edges.append(not_silence_ranges[idx])
	print("Finished finding Edges")
	
	audio_paths = []
	for idx, (start_idx, end_idx) in enumerate(edges[skip_idx:]):
		start_idx = max(0, start_idx - keep_silence)
		end_idx += keep_silence

		target_audio_path = "{}/{}.{:04d}.{}".format(
				os.path.dirname(audio_path), filename, idx, out_ext)
		segment=audio[start_idx:end_idx]
		
		# Set this to deepspeech compatible 
		if deepspeech:
			temp=segment.set_frame_rate(16000)
			temp=temp.set_sample_width(2)
			temp=temp.set_channels(1)
			temp.export(target_audio_path, out_ext)	# for soundsegment
		else:
			segment.export(target_audio_path, out_ext)	# for soundsegment
		audio_paths.append(target_audio_path)

	return audio_paths
Exemplo n.º 32
0
def bpm(seg):
    l_seg = seg.low_pass_filter(120.0)
    beat_loudness = l_seg.dBFS
    minimum_silence = int(60000/240.0)
    nonsilent_times = detect_nonsilent(l_seg, minimum_silence, beat_loudness)
    spaces_between_beats = []
    last_t = nonsilent_times[0][0]
    for peak_start, _ in nonsilent_times[1:]:
        spaces_between_beats.append(peak_start - last_t)
        last_t = peak_start

    spaces_between_beats = sorted(spaces_between_beats)
    space = spaces_between_beats[len(spaces_between_beats) / 2]
    bpm = 60000 / space
    return bpm
Exemplo n.º 33
0
    def process(self, dl_entry):
        """Make new audio file in the media directory.

        Take the audio file pointed to by dl_entry, normalize, remove silence,
        convert to output_format.
        """
        input_format = dl_entry.file_extension.lstrip('.')
        try:
            loader = load_functions[input_format]
        except KeyError:
            loader = lambda file: AudioSegment.from_file(
                file=file, format=input_format)
        segment = loader(dl_entry.file_path) # This
        # sometimes raised a pydub.exceptions.CouldntDecodeError
        segment = segment.normalize()  # First normalize
        # Try to remove silence
        loud_pos = detect_nonsilent(
            segment, min_silence_len=minimum_silence_length,
            silence_thresh=silence_threshold)
        fade_in_length = rapid_fade_length
        fade_out_length = rapid_fade_length
        if len(loud_pos) == 1:
            loud_p = loud_pos[0]
            if loud_p[0] > silence_fade_length:
                fade_in_length = silence_fade_length
            if loud_p[1] < len(segment) - silence_fade_length:
                fade_out_length = silence_fade_length
            if loud_p[0] > 0 or loud_p[1] < len(segment):
                segment = segment[loud_p[0] : loud_p[1]]
        segment = segment.fade_in(fade_in_length).fade_out(fade_out_length)
        # Now write
        tof = tempfile.NamedTemporaryFile(
            delete=False, suffix=output_suffix, prefix=u'anki_audio_')
        temp_out_file_name = tof.name
        tof.close()
        segment.export(temp_out_file_name, output_format)
        os.unlink(dl_entry.file_path)  # Get rid of unprocessed version
        return temp_out_file_name, output_suffix