from ecosound.core.measurement import Measurement # # ## netcdf folder # netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test' # annot4 = Annotation() # annot4.from_netcdf(netcdf_files, verbose=True) # print(len(annot4)) # # ## Load netcdf measurmeent folder from folder # netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\Noise_dataset' # outfile=r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\Noise_dataset\dataset_noise.nc' # meas = Measurement() # meas.from_netcdf(netcdf_files, verbose=True) # print(len(meas)) # #meas.to_netcdf(outfile) # # ## Load netcdf measurmeent folder from single file netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\Full_dataset_with_metadata2\JASCOAMARHYDROPHONE742_20140913T115018.797Z.wav.nc' meas = Measurement() meas.from_netcdf(netcdf_files, verbose=True) print(len(meas)) # # ## Load netcdf measurmeent folder from list of files # netcdf_files = [] # netcdf_files.append(r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test2\67391492.181017121114.wav.nc") # netcdf_files.append(r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test2\67391492.181017151114.wav.nc") # netcdf_files.append(r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test2\67391492.181017181114.wav.nc") # meas = Measurement() # meas.from_netcdf(netcdf_files, verbose=True) # print(len(meas))
def plot_full_figure(time_sec=None): loc_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\results\large-array_quillback\AMAR173.4.20190920T161248Z.nc' audio_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\large_array\2019-09-15_HornbyIsland_AMAR_07-HI\AMAR173.1.20190920T161248Z.wav' video_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\large_array\2019-09-15_HornbyIsland_AMAR_07-HI\3420_FishCam01_20190920T163627.613206Z_1600x1200_awb-auto_exp-night_fr-10_q-20_sh-0_b-50_c-0_i-400_sat-0.mp4' hp_config_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\large_array\2019-09-15_HornbyIsland_AMAR_07-HI\hydrophones_config_07-HI.csv' t1_sec = 1570 t2_sec = 1587 #1590 filter_x = [-1.5, 1.5] filter_y = [-1.5, 1.5] filter_z = [-1.5, 1.5] filter_x_std = 0.5 filter_y_std = 0.5 filter_z_std = 0.5 params = pd.DataFrame({ 'loc_color': ['black'], 'loc_marker': ['o'], 'loc_alpha': [1], 'loc_size': [5], 'uncertainty_color': ['black'], 'uncertainty_style': ['-'], 'uncertainty_alpha': [1], #0.7 'uncertainty_width': [0.2], #0.2 'x_min': [-1.26], 'x_max': [1.26], 'y_min': [-1.26], 'y_max': [1.26], 'z_min': [-1.5], 'z_max': [2.1], }) ## ########################################################################### ## load localization results loc = Measurement() loc.from_netcdf(loc_file) loc_data = loc.data ## load hydrophone locations hydrophones_config = pd.read_csv(hp_config_file) # Filter loc_data = loc_data.dropna(subset=['x', 'y', 'z']) # remove NaN loc_data = loc_data.loc[(loc_data['x'] >= min(filter_x)) & (loc_data['x'] <= max(filter_x)) & (loc_data['y'] >= min(filter_y)) & (loc_data['y'] <= max(filter_y)) & (loc_data['z'] >= min(filter_z)) & (loc_data['z'] <= max(filter_z)) & (loc_data['x_std'] <= filter_x_std) & (loc_data['y_std'] <= filter_y_std) & (loc_data['z_std'] <= filter_z_std)] # Adjust detection times loc_data['time_min_offset'] = loc_data['time_min_offset'] - t1_sec loc_data['time_max_offset'] = loc_data['time_max_offset'] - t1_sec if time_sec != None: loc_data = loc_data.loc[(loc_data['time_max_offset'] <= time_sec)] else: print('Static') # update loc object loc.data = loc_data # plots # fig, ax = plt.subplots(figsize=(6, 1)) # fig.subplots_adjust(bottom=0.5) # n_colors = t2_sec-t1_sec # cmap = mpl.cm.get_cmap('CMRmap', n_colors*2) # norm = mpl.colors.Normalize(vmin=0, vmax=n_colors) # ax_cmap = mpl.colorbar.ColorbarBase(ax, cmap=cmap, # norm=norm, # orientation='horizontal') # ax_cmap.set_label('Time (s)') # Plot spectrogram fig_final, ax_spectro = plot_spectrogram(audio_file, loc, t1_sec, t2_sec, geometry=(5, 1, 1)) ax_spectro.set_title("") ax_spectro.get_xaxis().set_visible(False) n_colors = t2_sec - t1_sec cmap = mpl.cm.get_cmap('viridis', n_colors * 4) norm = mpl.colors.Normalize(vmin=0, vmax=n_colors) divider = make_axes_locatable(ax_spectro) cax = divider.append_axes('bottom', 0.1, pad=0.03) ax_cmap = mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=norm, orientation='horizontal') ax_cmap.set_label('Time (s)') if time_sec: SFreq_min, SFreq_max = ax_spectro.get_ylim() ax_spectro.plot([time_sec, time_sec], [SFreq_min, SFreq_max], 'r') # plot detection points on top of spectrogram #gs0 = fig_final.add_gridspec(60,1) ax_detec = fig_final.add_subplot(20, 1, 1) det_y = np.asarray(np.ones((1, len(loc_data['time_min_offset']))))[0] det_x = np.asarray(loc_data['time_min_offset']) ax_detec.scatter(det_x, det_y, c=loc_data['time_min_offset'], cmap=cmap, norm=norm, s=12) ax_detec.set_xlim(ax_spectro.get_xlim()) ax_detec.get_xaxis().set_visible(False) ax_detec.get_yaxis().set_visible(False) ax_detec.axis('off') # #pos =[left, bottom, width, height] # box = ax_detec.get_position() # box.y0 = box.y0 + 0.6 # box.y1 = box.y1 + 0.6 # ax_detec.set_position(box) #size = fig_final.get_size_inches() plt.subplots_adjust(left=0.08, bottom=0.1, right=0.95, top=0.95, wspace=0, hspace=0) # divider2 = make_axes_locatable(ax_spectro) # cax2 = divider2.append_axes('top', size=0.2, pad=10.0) # det_y = np.asarray(np.ones((1,len(loc_data['time_min_offset']))))[0] # det_x = np.asarray(loc_data['time_min_offset']) # cax2.plot(det_x,det_y,'.r') # cax2.set_xlim(ax_spectro.get_xlim()) # ax_cmap = mpl.colorbar.ColorbarBase(cax, cmap=cmap, # norm=norm, # orientation='horizontal') gs = fig_final.add_gridspec(3, 2) # plot localization top ax_toploc = fig_final.add_subplot(gs[1:, 1]) plot_top_view(hydrophones_config, loc_data, params, cmap, norm, ax_toploc) ax_toploc.set_anchor('E') # plot localization side #ax_sideloc = fig_final.add_subplot(3,3,7,sharex = ax_toploc) ax_sideloc = fig_final.add_subplot(gs[1:, 0]) plot_side_view(hydrophones_config, loc_data, params, cmap, norm, ax_sideloc) ax_sideloc.set_anchor('W') # set the spacing between subplots plt.subplots_adjust(wspace=0, hspace=0) # # plot video frame 1 # fig_video1, ax_video1 = plt.subplots(1,1) # frame1_sec = 152.8 # second detection -> 16:38:59.8 # #ax_video1 = fig_final.add_subplot(3,3,5) # plot_video_frame(video_file,frame1_sec, ax_video1) # ax_video1.get_xaxis().set_visible(False) # ax_video1.get_yaxis().set_visible(False) # # plot video frame 2 # fig_video2, ax_video2 = plt.subplots(1,1) # frame2_sec = 160 # 4th detection -> 16:39:07 # #ax_video2 = fig_final.add_subplot(3,3,6) # plot_video_frame(video_file,frame2_sec, ax_video2) # ax_video2.get_xaxis().set_visible(False) # ax_video2.get_yaxis().set_visible(False) fig_final.set_size_inches(8.6, 6.72) box = ax_spectro.get_position() box.y0 = box.y0 - 0.03 box.y1 = box.y1 - 0.03 ax_spectro.set_position(box) return fig_final
import pandas as pd """ Gathers measuremenst for all annotations and noise. Merges into a single dataset, and re-label classes to create a 2-class dataset 'FS' vs 'NN'. """ # Define input and output files annot_file = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\dataset_annotations_only.nc' noise_file = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\Noise_dataset' outfile=r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\dataset_FS-NN_modified_20201105145300.nc' # Load measurements meas_annot = Measurement() meas_annot.from_netcdf(annot_file) meas_noise = Measurement() meas_noise.from_netcdf(noise_file) ## Label noise measurement as 'NN' meas_noise.insert_values(label_class='NN') print(meas_noise.summary()) ## relabel annotations that are not 'FS' as 'NN' print(meas_annot.summary()) meas_annot.data['label_class'].replace(to_replace=['', 'ANT','HS','KW','UN'], value='NN', inplace=True) print(meas_annot.summary()) ## merge the 2 datasets meas_NN_FS = meas_noise + meas_annot print(meas_NN_FS.summary())
def plot_full_figure(time_sec=None): #loc_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\results\mobile_array_copper\localizations_1m_5cm.nc' loc_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\results\mobile_array_copper\localizations_2cm_3m.nc' loc_file_matlab = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\results\mobile_array_copper\localizations_matlab_with_CI.csv' audio_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\mobile_array\2019-09-14_HornbyIsland_Trident\671404070.190918222812.wav' video_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\large_array\2019-09-15_HornbyIsland_AMAR_07-HI\3420_FishCam01_20190920T163627.613206Z_1600x1200_awb-auto_exp-night_fr-10_q-20_sh-0_b-50_c-0_i-400_sat-0.mp4' hp_config_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\mobile_array\2019-09-14_HornbyIsland_Trident\hydrophones_config_HI-201909.csv' localization_config_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\config_files\localization_config_mobile_array.yaml' t1_sec = 214 #216 t2_sec = 224 #223 filter_x = [-5, 5] filter_y = [-5, 5] filter_z = [-2, 5] filter_x_std = 6 filter_y_std = 9 filter_z_std = 6 params = pd.DataFrame({ 'loc_color': ['black'], 'loc_marker': ['o'], 'loc_alpha': [1], 'loc_size': [5], 'uncertainty_color': ['black'], 'uncertainty_style': ['-'], 'uncertainty_alpha': [1], #0.7 'uncertainty_width': [0.2], #0.2 'x_min': [-1.5], 'x_max': [1.5], 'y_min': [-0.5], 'y_max': [3], 'z_min': [-1.5], 'z_max': [1.5], }) ## ########################################################################### localization_config = read_yaml(localization_config_file) hydrophones_config = pd.read_csv(hp_config_file) sound_speed_mps = localization_config['ENVIRONMENT']['sound_speed_mps'] ref_channel = localization_config['TDOA']['ref_channel'] hydrophone_pairs = defineReceiverPairs(len(hydrophones_config), ref_receiver=ref_channel) ## load localization results loc = Measurement() loc.from_netcdf(loc_file) loc_data = loc.data # used matlab CI loc_data = pd.read_csv(loc_file_matlab) # ## recalculate data errors # diff=[] # idx = 0 # for idx in range(len(loc_data)): # m = loc_data.loc[[idx],['x','y','z']] # tdoa_m = predict_tdoa(m, sound_speed_mps, hydrophones_config, hydrophone_pairs) # tdoa_measured = loc_data.loc[[idx],['tdoa_sec_1','tdoa_sec_2','tdoa_sec_3']].to_numpy() # #diff_temp = (tdoa_m-tdoa_measured.T)**2 # if idx==0: # diff = (tdoa_m-tdoa_measured.T)**2 # else: # diff = np.vstack((diff,(tdoa_m-tdoa_measured.T)**2)) # Q = len(loc_data) # #M = m.size # number of dimensions of the model (here: X, Y, and Z) # #N = len(tdoa_sec) # number of measurements # #error_std = np.sqrt((1/(Q*(N-M))) * (sum((tdoa_sec-tdoa_m)**2))) # tdoa_errors_std = np.sqrt( (1/Q)*(sum(diff))) # #tdoa_errors_std = calc_data_error(tdoa_sec, m, sound_speed_mps,hydrophones_config, hydrophone_pairs) # for idx in range(len(loc_data)): # loc_errors_std = calc_loc_errors(tdoa_errors_std, loc_data.loc[[idx],['x','y','z']] , sound_speed_mps, hydrophones_config, hydrophone_pairs) # print('m') # Filter loc_data = loc_data.dropna(subset=['x', 'y', 'z']) # remove NaN loc_data = loc_data.loc[(loc_data['x'] >= min(filter_x)) & (loc_data['x'] <= max(filter_x)) & (loc_data['y'] >= min(filter_y)) & (loc_data['y'] <= max(filter_y)) & (loc_data['z'] >= min(filter_z)) & (loc_data['z'] <= max(filter_z)) & (loc_data['x_std'] <= filter_x_std) & (loc_data['y_std'] <= filter_y_std) & (loc_data['z_std'] <= filter_z_std)] # Adjust detection times loc_data['time_min_offset'] = loc_data['time_min_offset'] - t1_sec loc_data['time_max_offset'] = loc_data['time_max_offset'] - t1_sec if time_sec != None: loc_data = loc_data.loc[(loc_data['time_max_offset'] <= time_sec)] else: print('Static') # update loc object loc.data = loc_data # plots # fig, ax = plt.subplots(figsize=(6, 1)) # fig.subplots_adjust(bottom=0.5) # n_colors = t2_sec-t1_sec # cmap = mpl.cm.get_cmap('CMRmap', n_colors*2) # norm = mpl.colors.Normalize(vmin=0, vmax=n_colors) # ax_cmap = mpl.colorbar.ColorbarBase(ax, cmap=cmap, # norm=norm, # orientation='horizontal') # ax_cmap.set_label('Time (s)') # Plot spectrogram fig_final, ax_spectro = plot_spectrogram(audio_file, loc, t1_sec, t2_sec, geometry=(5, 1, 1)) ax_spectro.set_title("") ax_spectro.get_xaxis().set_visible(False) n_colors = t2_sec - t1_sec cmap = mpl.cm.get_cmap('viridis', n_colors * 4) norm = mpl.colors.Normalize(vmin=0, vmax=n_colors) divider = make_axes_locatable(ax_spectro) cax = divider.append_axes('bottom', 0.1, pad=0.03) ax_cmap = mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=norm, orientation='horizontal') ax_cmap.set_label('Time (s)') if time_sec: SFreq_min, SFreq_max = ax_spectro.get_ylim() ax_spectro.plot([time_sec, time_sec], [SFreq_min, SFreq_max], 'r') # plot detection points on top of spectrogram #gs0 = fig_final.add_gridspec(60,1) ax_detec = fig_final.add_subplot(20, 1, 1) det_y = np.asarray(np.ones((1, len(loc_data['time_min_offset']))))[0] det_x = np.asarray(loc_data['time_min_offset']) ax_detec.scatter(det_x, det_y, c=loc_data['time_min_offset'], cmap=cmap, norm=norm, s=12) ax_detec.set_xlim(ax_spectro.get_xlim()) ax_detec.get_xaxis().set_visible(False) ax_detec.get_yaxis().set_visible(False) ax_detec.axis('off') # #pos =[left, bottom, width, height] # box = ax_detec.get_position() # box.y0 = box.y0 + 0.6 # box.y1 = box.y1 + 0.6 # ax_detec.set_position(box) #size = fig_final.get_size_inches() plt.subplots_adjust(left=0.08, bottom=0.1, right=0.95, top=0.95, wspace=0, hspace=0) # divider2 = make_axes_locatable(ax_spectro) # cax2 = divider2.append_axes('top', size=0.2, pad=10.0) # det_y = np.asarray(np.ones((1,len(loc_data['time_min_offset']))))[0] # det_x = np.asarray(loc_data['time_min_offset']) # cax2.plot(det_x,det_y,'.r') # cax2.set_xlim(ax_spectro.get_xlim()) # ax_cmap = mpl.colorbar.ColorbarBase(cax, cmap=cmap, # norm=norm, # orientation='horizontal') gs = fig_final.add_gridspec(3, 2) # plot localization top ax_toploc = fig_final.add_subplot(gs[1:, 1]) plot_top_view(hydrophones_config, loc_data, params, cmap, norm, ax_toploc) ax_toploc.set_anchor('E') # plot localization side #ax_sideloc = fig_final.add_subplot(3,3,7,sharex = ax_toploc) ax_sideloc = fig_final.add_subplot(gs[1:, 0]) plot_side_view(hydrophones_config, loc_data, params, cmap, norm, ax_sideloc) ax_sideloc.set_anchor('W') # set the spacing between subplots plt.subplots_adjust(wspace=0, hspace=0) # # plot video frame 1 # fig_video1, ax_video1 = plt.subplots(1,1) # frame1_sec = 152.8 # second detection -> 16:38:59.8 # #ax_video1 = fig_final.add_subplot(3,3,5) # plot_video_frame(video_file,frame1_sec, ax_video1) # ax_video1.get_xaxis().set_visible(False) # ax_video1.get_yaxis().set_visible(False) # # plot video frame 2 # fig_video2, ax_video2 = plt.subplots(1,1) # frame2_sec = 160 # 4th detection -> 16:39:07 # #ax_video2 = fig_final.add_subplot(3,3,6) # plot_video_frame(video_file,frame2_sec, ax_video2) # ax_video2.get_xaxis().set_visible(False) # ax_video2.get_yaxis().set_visible(False) fig_final.set_size_inches(9.08, 6.72) box = ax_spectro.get_position() box.y0 = box.y0 - 0.03 box.y1 = box.y1 - 0.03 ax_spectro.set_position(box) return fig_final
'uncertainty_style': ['-'], 'uncertainty_alpha': [1], #0.7 'uncertainty_width': [0.2], #0.2 'x_min': [-1.26], 'x_max': [1.26], 'y_min': [-1.26], 'y_max': [1.26], 'z_min': [-1.5], 'z_max': [2.1], }) ## ########################################################################### ## load localization results loc = Measurement() loc.from_netcdf(loc_file) loc_data = loc.data ## load hydrophone locations hydrophones_config = pd.read_csv(hp_config_file) # Filter loc_data = loc_data.dropna(subset=['x', 'y', 'z']) # remove NaN loc_data = loc_data.loc[(loc_data['x'] >= min(filter_x)) & (loc_data['x'] <= max(filter_x)) & (loc_data['y'] >= min(filter_y)) & (loc_data['y'] <= max(filter_y)) & (loc_data['z'] >= min(filter_z)) & (loc_data['z'] <= max(filter_z)) & (loc_data['x_std'] <= filter_x_std) & (loc_data['y_std'] <= filter_y_std) &
# annot4.from_netcdf(netcdf_files, verbose=True) # print(len(annot4)) # # ## netcdf folder # netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test' # annot4 = Annotation() # annot4.from_netcdf(netcdf_files, verbose=True) # print(len(annot4)) # # ## netcdf folder from Measurements # netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test2' # annot4 = Annotation() # annot4.from_netcdf(netcdf_files, verbose=True) # print(len(annot4)) # ## netcdf folder from Measurements netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test2' annot4 = Measurement() annot4.from_netcdf(netcdf_files, verbose=True) print(len(annot4)) # import xarray as xr # d=annot3.data # index = range(0,len(d),1) # d['index']=index # #d = d.set_index(['index','entry_date', 'frequency_min','label_class']) # d = d.set_index(['index']) # data = d.to_xarray() # data2=data.sel(index=0)
'uncertainty_alpha': [1], #0.7 'uncertainty_width': [0.2], #0.2 'x_min':[-3], 'x_max':[3], 'y_min':[-3], 'y_max':[3], 'z_min':[-3], 'z_max':[3], }) ## ########################################################################### ## load localization results 0 degrees file1 = '0_deg.nc' loc1 = Measurement() loc1.from_netcdf(os.path.join(indir,file1)) # loc1_data = loc1.data # # Filter # loc1_data = loc1_data.dropna(subset=['x', 'y','z']) # remove NaN # loc1_data = loc1_data.loc[(loc1_data['x']>=min(filter_x)) & # (loc1_data['x']<=max(filter_x)) & # (loc1_data['y']>=min(filter_y)) & # (loc1_data['y']<=max(filter_y)) & # (loc1_data['z']>=min(filter_z)) & # (loc1_data['z']<=max(filter_z)) & # (loc1_data['x_std']<= filter_x_std) & # (loc1_data['y_std']<= filter_y_std) & # (loc1_data['z_std']<= filter_z_std) # ] # # Adjust detection times
# filter_y=[-1.5, 1.5] # filter_z=[-2, 2] # filter_x_std=0.3 # filter_y_std=0.3 # filter_z_std=0.3 # load data print('') print('Loading dataset') idx = 0 for infile in os.listdir(indir): if infile.endswith(".nc"): print(infile) locs = Measurement() locs.from_netcdf(os.path.join(indir, infile)) loc_data = locs.data # Filter loc_data = loc_data.dropna(subset=['x', 'y', 'z']) # remove NaN loc_data = loc_data.loc[(loc_data['x'] >= min(filter_x)) & (loc_data['x'] <= max(filter_x)) & (loc_data['y'] >= min(filter_y)) & (loc_data['y'] <= max(filter_y)) & (loc_data['z'] >= min(filter_z)) & (loc_data['z'] <= max(filter_z)) & (loc_data['x_std'] <= filter_x_std) & (loc_data['y_std'] <= filter_y_std) & (loc_data['z_std'] <= filter_z_std)] if idx == 0:
import pandas as pd ## Input paraneters ########################################################## annotation_file = r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\Master_annotations_dataset.nc" detection_file = r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\Full_dataset_with_metadata2" outfile=r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\dataset_annotations_only2.nc' # load annotations annot = Annotation() annot.from_netcdf(annotation_file) # load detections detec = Measurement() detec.from_netcdf(detection_file) print(detec) freq_ovp = True # default True dur_factor_max = None # default None dur_factor_min = 0.1 # default None ovlp_ratio_min = 0.3 # defaulkt None remove_duplicates = True # dfault - False inherit_metadata = True # default False filter_deploymentID = False # default True detec.filter_overlap_with(annot, freq_ovp=freq_ovp, dur_factor_max=dur_factor_max, dur_factor_min=dur_factor_min, ovlp_ratio_min=ovlp_ratio_min,
deployment_file = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\UVIC_mill-bay_2019\deployment_info.csv' data_dir = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\UVIC_mill-bay_2019\audio_data' # load meta data operator_name = platform.uname().node dep_info = DeploymentInfo() dep_info.read(deployment_file) #list files files = ecosound.core.tools.list_files(indir, ext, recursive=False, case_sensitive=True) for idx, file in enumerate(files): print(str(idx) + r'/' + str(len(files)) + ': ' + file) meas = Measurement() meas.from_netcdf(file) meas.insert_metadata(deployment_file) file_name = os.path.splitext(os.path.basename(file))[0] meas.insert_values( operator_name=platform.uname().node, audio_file_name=os.path.splitext(os.path.basename(file_name))[0], audio_file_dir=data_dir, audio_file_extension='.wav', audio_file_start_date=ecosound.core.tools.filename_to_datetime( file_name)[0]) meas.to_netcdf(os.path.join(outdir, file_name + '.nc'))
def main(): # input arguments input_args = dict() input_args['positive_class_label'] = 'FS' input_args['train_ratio'] = 0.75 input_args['cv_splits'] = 10 #5 input_args['cv_repeats'] = 1 input_args['rebalance_classes'] = True #input_args['data_file']= r'C:\Users\xavier.mouy\Documents\PhD\Projects\Detector\results\dataset_FS-NN_modified_20201105145300.nc' input_args[ 'data_file'] = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Detector\results\dataset_FS-NN_modified_20200902194334.nc' input_args[ 'out_dir'] = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Detector\results\Classification' input_args['run_CV'] = False input_args['train_final_model'] = True input_args['final_model_name'] = 'RF50' ## DEFINITION OF CLASSIFIERS ------------------------------------------------- models = [] models.append(('Dummy', DummyClassifier(strategy="constant", constant=1))) models.append( ('LR', LogisticRegression(solver='liblinear', multi_class='ovr'))) models.append(('LDA', LinearDiscriminantAnalysis())) #models.append(('KNN', KNeighborsClassifier())) #models.append(('KNN', KNeighborsClassifier(n_neighbors=4, metric='euclidean'))) models.append(('CART', DecisionTreeClassifier())) #models.append(('NB', GaussianNB())) models.append(('XGBoost', XGBClassifier())) #models.append(('MLP', MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=0))) models.append(('RF5', RandomForestClassifier(n_estimators=5, min_samples_split=100, min_samples_leaf=50, random_state=0))) models.append(('RF10', RandomForestClassifier(n_estimators=10, min_samples_split=100, min_samples_leaf=50, random_state=0))) models.append(('RF30', RandomForestClassifier(n_estimators=30, min_samples_split=100, min_samples_leaf=50, random_state=0))) models.append(('RF50', RandomForestClassifier(n_estimators=50, min_samples_split=100, min_samples_leaf=50, random_state=0))) #models.append(('RF100', RandomForestClassifier(n_estimators=100,min_samples_split= 100, min_samples_leaf=50,random_state=0))) ## setup output folder now = datetime.now() now_str = now.strftime("%Y%m%dT%H%M%S") out_dir = os.path.join(input_args['out_dir'], now_str) os.mkdir(out_dir) ## Save input args to txt file text_file = open(os.path.join(out_dir, 'input_args_' + now_str + '.txt'), "w") n = text_file.write(str(input_args)) text_file.close() ## Checks that model name exists before running all the processing if input_args['train_final_model']: model_idx = [model[0] for model in models].index(input_args['final_model_name']) ## LOAD DATSET --------------------------------------------------------------- dataset = Measurement() dataset.from_netcdf(input_args['data_file']) print(dataset.summary()) ## DATA PREPARATION ---------------------------------------------------------- # features features = dataset.metadata['measurements_name'][ 0] # list of features used for the classification # data data = dataset.data # drop FS observations at Mill Bay indexNames = data[(data['label_class'] == 'FS') & (data['location_name'] == 'Mill bay')].index data.drop(indexNames, inplace=True) # add subclass + IDs data, class_encoder = add_class_ID(data, input_args['positive_class_label']) data, _ = add_subclass(data) #subclass2class_table = subclass2class_conversion(data) # add group ID data, group_encoder = add_group(data) ## DATA CLEAN-UP ------------------------------------------------------------- # Basic stats on all features data_stats = data[features].describe() #print(data_stats) # how many NaNs and Infs per column data = data.replace([np.inf, -np.inf], np.nan) Nnan = data[features].isna().sum() ax = Nnan.plot(kind='bar', title='Number of NaN/Inf', grid=True) ax.set_ylabel('Number of observations with NaNs/Infs') # Drop some features with too many NaNs features.remove('freq_flatness') features.remove('snr') features.remove('uuid') # drop observations/rows with NaNs data.dropna(subset=features, axis=0, how='any', thresh=None, inplace=True) data_stats2 = data[features].describe() # ## VISUALIZATION ------------------------------------------------------------- # # box and whisker plots # data[features].plot(kind='box', subplots=True, layout=(7,7), sharex=False, sharey=False) # # histograms # data[features].hist() # # scatter plot matrix # pd.plotting.scatter_matrix(data[features]) # scatter plot PCA # pca = PCA(n_components=2) # X = pca.fit_transform(data[features]) # y = data['class_ID'] # plot_2d_space(X, y, 'Imbalanced dataset (2 PCA components)') ## SPLIT DATA INTO TRAIN & TEST SETS ------------------------------------------ n_splits = round(1 / (1 - input_args['train_ratio'])) skf = StratifiedGroupKFold(n_splits=n_splits, shuffle=True, random_state=None) for train_index, test_index in skf.split(data, data['subclass_ID'], groups=data['group_ID']): data_train, data_test = data.iloc[train_index], data.iloc[test_index] break # plot class repartition plot_datasets_distrib(data_train, data_test) plot_dataset_distrib(data, attr_list=['subclass_label', 'label_class'], title='Full dataset') plot_dataset_distrib(data_train, attr_list=['subclass_label', 'label_class'], title='Training set') plot_dataset_distrib(data_test, attr_list=['subclass_label', 'label_class'], title='Test set') # verify groups are not used in both datasets groups_intersection = plot_datasets_groups(data_train, data_test, show=True) ## CROSS VALIDATION ON TRAIN SET ---------------------------------------------- if input_args['run_CV']: # run train/test experiments cv_predictions, cv_performance = cross_validation( data_train, models, features, cv_splits=input_args['cv_splits'], cv_repeats=input_args['cv_repeats'], rebalance=input_args['rebalance_classes']) # display summary results performance_report = summarize_performance(cv_performance, threshold=0.5) print(performance_report) # plot mean Precision and Recall curves plot_PR_curves(cv_performance) plot_F_curves(cv_performance) # save results CV_results = { 'cv_predictions': cv_predictions, 'cv_performance': cv_performance, 'models': models, 'input_args': input_args, } pickle.dump( CV_results, open(os.path.join(out_dir, 'CV_' + now_str + '.sav'), 'wb')) ## FINAL EVALUATION ON TEST SET ----------------------------------------------- if input_args['train_final_model']: print(' ') print('Final evaluation on test set:') print(' ') model_name = models[model_idx][0] model = models[model_idx][1] # RF50 print(model) X_train = data_train[features] # features Y_train = data_train['class_ID'] #labels X_test = data_test[features] # features Y_test = data_test['class_ID'] #labels # feature normalization Norm_mean = X_train.mean() Norm_std = X_train.std() X_train = (X_train - Norm_mean) / Norm_std X_test = (X_test - Norm_mean) / Norm_std # Train on entire train set final_model = classification_train( X_train, Y_train, model, rebalance=input_args['rebalance_classes']) # Evaluate on full test set pred_class, pred_prob = classification_predict(X_test, final_model) # Print evaluation report CR = classification_report(Y_test, pred_class) print(CR) # save the model to disk model = { 'name': model_name, 'model': final_model, 'features': features, 'normalization_mean': Norm_mean, 'normalization_std': Norm_std, 'classes': class_encoder, 'input_args': input_args, } pickle.dump( model, open( os.path.join(out_dir, model_name + '_model_' + now_str + '.sav'), 'wb'))
# print('-----------------') # print(' Annotations ') # annot = Annotation() # annot.from_netcdf(annotation_file) # print(annot.summary()) # annot_perfile = annot.summary(rows='audio_file_name',columns='label_class') # annot_perfile.rename(columns={"FS": "FS-annot"}, inplace=True) # annot_perfile = annot_perfile['FS-annot'].to_frame() # #annot_perfile.to_csv('annot.csv') print(' ') print('-----------------') print(' Detections ') # load detections detec = Measurement() detec.from_netcdf(detec_file) print(detec.summary()) detec_perfile = detec.summary(rows='audio_file_name', columns='label_class') detec_perfile.rename(columns={"FS": "FS-detec"}, inplace=True) detec_perfile = detec_perfile['FS-detec'].to_frame() dd = pd.concat([annot_perfile, detec_perfile], axis=1) dd['diff'] = dd['FS-annot'] - dd['FS-detec'] dd.plot() # outdir=r'C:\Users\xavier.mouy\Documents\Workspace\GitHub\ecosound\tests\detec_export' # detec.to_pamlab(outdir, single_file=False) # outdir=r'C:\Users\xavier.mouy\Documents\Workspace\GitHub\ecosound\tests\annot_export' # annot.to_pamlab(outdir, single_file=False)
min_threshold = 0.7 noise_label = 'NN' # load names of file and start/stop times where false alarms have been manually # identified df = pd.read_excel(xls_file, header=None) for idx in range(0, len(df)): # file name to load wav_file_name = df[0][idx] tmin_sec = df[1][idx] tmax_sec = df[2][idx] print(wav_file_name, tmin_sec, tmax_sec) detec_file_path = os.path.join(in_dir, wav_file_name + '.nc') # load detection/measurement file meas = Measurement() meas.from_netcdf(detec_file_path) data_df = meas.data # Only keep fish detections above the given confidence threshold and times data_df_filt = data_df[(data_df.label_class == fish_label) & (data_df.confidence >= min_threshold) & (data_df.time_min_offset >= tmin_sec) & (data_df.time_max_offset <= tmax_sec)] data_df_filt.reset_index(inplace=True, drop=True) meas.data = data_df_filt # Change fish labels to noise labels meas.insert_values(label_class=noise_label) # Save to new nc file meas.to_netcdf(os.path.join(out_dir, wav_file_name + str(idx))) print('done')
classif_model = pickle.load(open(classif_model_file, 'rb')) features = classif_model['features'] model = classif_model['model'] Norm_mean = classif_model['normalization_mean'] Norm_std = classif_model['normalization_std'] classes_encoder = classif_model['classes'] # loops thrugh each file files_list = os.listdir(indir) # list of files for file in files_list: if os.path.isfile(os.path.join(indir, file)) & file.endswith(file_ext): if os.path.isfile(os.path.join(outdir, file)) is False: # load file print(file) meas = Measurement() meas.from_netcdf(os.path.join(indir, file)) # reclassify data = meas.data n1 = len(data) # drop observations/rows with NaNs data = data.replace([np.inf, -np.inf], np.nan) data.dropna(subset=features, axis=0, how='any', thresh=None, inplace=True) n2 = len(data) print('Deleted observations (due to NaNs): ' + str(n1 - n2)) # Classification - predictions X = data[features] X = (X - Norm_mean) / Norm_std