def get_element(datafile, indv=None, element_number=1, element="syllable", hparams=None): # if an individual isnt specified, grab the first one if indv == None: indv = datafile.indvs[0] # get the element element = datafile.data["indvs"][indv][element] # get the part of the wav we want to load st = element["start_times"][element_number] et = element["end_times"][element_number] # load the data rate, element = load_wav(datafile.data["wav_loc"], offset=st, duration=et - st, sr=None) if np.issubdtype(type(element[0]), np.integer): element = int16_to_float32(data) if hparams is not None: element = butter_bandpass_filter(element, hparams.butter_lowcut, hparams.butter_highcut, rate, order=5) return rate, element
def prepare_wav(wav_loc, hparams=None): """ load wav and convert to correct format """ # get rate and date rate, data = load_wav(wav_loc) # convert data if needed if np.issubdtype(type(data[0]), np.integer): data = int16_to_float32(data) # bandpass filter if hparams is not None: data = butter_bandpass_filter(data, hparams.butter_lowcut, hparams.butter_highcut, rate, order=5) # reduce noise if hparams.reduce_noise: data = nr.reduce_noise(audio_clip=data, noise_clip=data, **hparams.noise_reduce_kwargs) return rate, data
def subset_syllables(json_dict, indv, unit="syllables", hparams=None, include_labels=True): """ Grab syllables from wav data """ if type(indv) == list: indv = indv[0] if type(json_dict) != OrderedDict: json_dict = read_json(json_dict) # get unit info start_times = json_dict["indvs"][indv][unit]["start_times"] # stop times vs end_times is a quick fix that should be fixed on the parsing side if "end_times" in json_dict["indvs"][indv][unit].keys(): end_times = json_dict["indvs"][indv][unit]["end_times"] else: end_times = json_dict["indvs"][indv][unit]["stop_times"] if include_labels: labels = json_dict["indvs"][indv][unit]["labels"] else: labels = None # get rate and date rate, data = load_wav(json_dict["wav_loc"]) # convert data if needed if np.issubdtype(type(data[0]), np.integer): data = int16_to_float32(data) # bandpass filter if hparams is not None: data = butter_bandpass_filter(data, hparams.butter_lowcut, hparams.butter_highcut, rate, order=5) # reduce noise if hparams.reduce_noise: data = nr.reduce_noise(audio_clip=data, noise_clip=data, **hparams.noise_reduce_kwargs) syllables = [ data[int(st * rate):int(et * rate)] for st, et in zip(start_times, end_times) ] return syllables, rate, labels
def segment_spec_custom(key, df, DT_ID, save=False, plot=False): # load wav rate, data = load_wav(df.data["wav_loc"]) # filter data data = butter_bandpass_filter(data, butter_min, butter_max, rate) # segment # results = dynamic_threshold_segmentation( # data, # rate, # n_fft=n_fft, # hop_length_ms=hop_length_ms, # win_length_ms=win_length_ms, # min_level_db_floor=min_level_db_floor, # db_delta=db_delta, # ref_level_db=ref_level_db, # pre=pre, # min_silence_for_spec=min_silence_for_spec, # max_vocal_for_spec=max_vocal_for_spec, # min_level_db=min_level_db, # silence_threshold=silence_threshold, # verbose=True, # min_syllable_length_s=min_syllable_length_s, # spectral_range=spectral_range, # ) results = dynamic_threshold_segmentation(data, hparams, verbose=True, min_syllable_length_s=min_syllable_length_s, spectral_range=spectral_range) if results is None: return if plot: plot_segmentations( results["spec"], results["vocal_envelope"], results["onsets"], results["offsets"], hop_length_ms, rate, figsize=(15, 3) ) plt.show() # save the results json_out = DATA_DIR / "processed" / (DATASET_ID + "_segmented") / DT_ID / "JSON" / ( key + ".JSON" ) json_dict = df.data.copy() json_dict["indvs"][list(df.data["indvs"].keys())[0]]["syllables"] = { "start_times": list(results["onsets"]), "end_times": list(results["offsets"]), } json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2) # save json if save: ensure_dir(json_out.as_posix()) with open(json_out.as_posix(), "w") as json_file: json.dump(json_dict, json_file, cls=NoIndentEncoder, indent=2) json_file.close() # print(json_txt, file=open(json_out.as_posix(), "w")) #print(json_txt) return results
# 'ebr_min': 0.05, # expected syllabic rate (/s) low # 'ebr_max': 0.2, # expected syllabic rate (/s) high # 'max_thresh': 0.02, # maximum pct of syllabic envelope to threshold at in second pass # 'thresh_delta': 0.005, # delta change in threshold to match second pass syllabification # 'slow_threshold': 0.005, # starting threshold for second pass syllabification # 'pad_length' : syll_size, # length to pad spectrograms to #### DATASET_ID = 'bengalese_finch_sakata' dataset = DataSet(DATASET_ID, hparams = hparams) # Print sample dataset dataset.sample_json rate, data = load_wav(dataset.sample_json["wav_loc"]) mypath = r'I:\avgn_paper-vizmerge\data\processed\bengalese_finch_sakata\2020-04-29_21-12-51\WAV' # file_current = 'br81bl41_0016.wav' file_current = 'br82bl42_0016.wav' file_current = 'tutor_bl5w5_0017.WAV' rate, data_loaded = load_wav(mypath+'\\'+file_current) data = data_loaded times = np.linspace(0,len(data)/rate,len(data)); # filter data data = butter_bandpass_filter(data, butter_min, butter_max, rate) plt.plot(times,data)
# 'ebr_min': 0.05, # expected syllabic rate (/s) low # 'ebr_max': 0.2, # expected syllabic rate (/s) high # 'max_thresh': 0.02, # maximum pct of syllabic envelope to threshold at in second pass # 'thresh_delta': 0.005, # delta change in threshold to match second pass syllabification # 'slow_threshold': 0.005, # starting threshold for second pass syllabification # 'pad_length' : syll_size, # length to pad spectrograms to #### DATASET_ID = 'bengalese_finch_sakata' dataset = DataSet(DATASET_ID, hparams=hparams) import numpy as np # Print sample dataset dataset.sample_json rate, data = load_wav(dataset.sample_json["wav_loc"]) hparams.sample_rate = rate ### Individual data file segmentation # import librosa # rate, data = load_wav(dataset.sample_json["wav_loc"]) # np.min(data), np.max(data) # data = data / np.max(np.abs(data)) # filter data data = butter_bandpass_filter(data, butter_min, butter_max, rate) plt.plot(data)
hparams.db_delta = db_delta hparams.min_silence_for_spec = min_silence_for_spec hparams.max_vocal_for_spec = max_vocal_for_spec hparams.silence_threshold = silence_threshold syll_size = 128 hparams.mel_fiter = True # should a mel filter be used? hparams.num_mels = syll_size, # how many channels to use in the mel-spectrogram (eg 128) hparams.fmin = 300, # low frequency cutoff for mel filter hparams.fmax = None, # high frequency cutoff for mel filter hparams.power = 1.5; rate, data = load_wav('I:/avgn_paper-vizmerge/data/processed/bengalese_finch_sakata/2020-04-29_21-12-51/WAV/br82bl42_0001.WAV') rate, data = load_wav('I:/avgn_paper-vizmerge/data/processed/bengalese_finch_sakata/2020-04-29_21-12-51/WAV/tutor_bl5w5_0000.WAV') hparams.sample_rate = rate data = butter_bandpass_filter(data, butter_min, butter_max, rate) plt.plot(data) hparams.ref_level_db = 90 spec_orig = spectrogram(data, rate, hparams) plot_spec( norm(spec_orig), fig=None, ax=None,
hparams.db_delta = db_delta hparams.min_silence_for_spec = min_silence_for_spec hparams.max_vocal_for_spec = max_vocal_for_spec hparams.silence_threshold = silence_threshold syll_size = 128 hparams.mel_fiter = True # should a mel filter be used? hparams.num_mels = syll_size, # how many channels to use in the mel-spectrogram (eg 128) hparams.fmin = 300, # low frequency cutoff for mel filter hparams.fmax = None, # high frequency cutoff for mel filter hparams.power = 1.5 rate, data = load_wav( 'I:/avgn_paper-vizmerge/data/processed/bengalese_finch_sakata/2020-04-29_21-12-51/WAV/br81bl41_0000.WAV' ) hparams.sample_rate = rate data = butter_bandpass_filter(data, butter_min, butter_max, rate) plt.plot(data) # hparams.ref_level_db = 10 spec_orig = spectrogram_nn(data, hparams) plot_spec( spec_orig, fig=None, ax=None, rate=None,
mypath = 'F:\data_for_avishek\Dir_Undir\done\o122p123' file_current = 'motif.1614.898o22p23_dir5.wav' mypath = 'F:\data_for_avishek\Dir_Undir\done\o121p122' file_current = 'motif.2210.9524o21p22_dir7_new.wav' mypath = r'F:\data_for_avishek\motif_all_dataset' file_current = 'motif.2636.5533blublu50-58_dir4_new.wav' file_current = 'motif.53.8549blublu50-58_dir5_new.wav' mypath = r'F:\data_for_avishek\Stimuli\blu19org61\hctsadata' file_current = 'directed_b19o61.3-041103.385.wav' #rate, data_loaded = wavfile.read(mypath+'\\'+file_current) rate, data = load_wav(mypath + '\\' + file_current) # filter data data = butter_bandpass_filter(data, butter_min, butter_max, rate) plt.plot(data) hparams.ref_level_db = 70 spec_orig = spectrogram(data, rate, hparams) figsize = (5, 3) # norm = matplotlib.colors.Normalize(vmin=np.min(spec_dir), vmax=np.max(spec_dir)) # current_map = cm.get_cmap('RdPu',512) current_map = cm.get_cmap('Blues', 512) # current_map = cm.get_cmap('Reds',512) # current_map = cm.get_cmap('Oranges',512)
def process_bird_wav( bird, wav_info, wav_time, params, save_to_folder, visualize=False, skip_created=False, seconds_timeout=300, save_spectrograms=True, verbose=False, ): """splits a wav file into periods of silence and periods of sound based on params """ # Load up the WAV rate, data = load_wav(wav_info) params["sample_rate"] = rate if rate is None or data is None: return # bandpass filter data = butter_bandpass_filter(data.astype("float32"), params["lowcut"], params["highcut"], rate, order=2) data = float32_to_int16(data) # we only want one channel if len(np.shape(data)) == 2: data = data[:, 0] # threshold the (root mean squared of the) audio rms_data, sound_threshed = RMS( data, rate, params["rms_stride"], params["rms_window"], params["rms_padding"], params["noise_thresh"], ) # Find the onsets/offsets of sound onset_sounds, offset_sounds = detect_onsets_offsets( np.repeat(sound_threshed, int(params["rms_stride"] * rate)), threshold=0, min_distance=0, ) # make sure all onset sounds are at least zero (due to downsampling in RMS) onset_sounds[onset_sounds < 0] = 0 # threshold clips of sound for onset_sound, offset_sound in zip(onset_sounds, offset_sounds): # segment the clip clip = data[onset_sound:offset_sound] ### if the clip is thresholded, as noise, do not save it into dataset # bin width in Hz of spectrogram freq_step_size_Hz = (rate / 2) / params["num_freq"] bout_spec = threshold_clip(clip, rate, freq_step_size_Hz, params, visualize=visualize, verbose=verbose) if bout_spec is None: # visualize spectrogram if desired if visualize: # compute spectrogram of clip wav_spectrogram = spectrogram(int16_to_float32(clip), params) visualize_spec(wav_spectrogram, show=True) continue # determine the datetime of this clip start_time = wav_time + timedelta(seconds=onset_sound / float(rate)) time_string = start_time.strftime("%Y-%m-%d_%H-%M-%S-%f") # create a subfolder for the individual bird if it doesn't already exist bird_folder = Path(save_to_folder).resolve() / bird ensure_dir(bird_folder) # save data save_bout_wav(data, rate, bird_folder, bird, wav_info, time_string, skip_created) # save the spectrogram of the data if save_spectrograms: save_bout_spec(bird_folder, bout_spec, time_string, skip_created)