def plot_full_figure(time_sec=None):

    loc_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\results\large-array_quillback\AMAR173.4.20190920T161248Z.nc'
    audio_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\large_array\2019-09-15_HornbyIsland_AMAR_07-HI\AMAR173.1.20190920T161248Z.wav'
    video_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\large_array\2019-09-15_HornbyIsland_AMAR_07-HI\3420_FishCam01_20190920T163627.613206Z_1600x1200_awb-auto_exp-night_fr-10_q-20_sh-0_b-50_c-0_i-400_sat-0.mp4'
    hp_config_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\large_array\2019-09-15_HornbyIsland_AMAR_07-HI\hydrophones_config_07-HI.csv'
    t1_sec = 1570
    t2_sec = 1587  #1590

    filter_x = [-1.5, 1.5]
    filter_y = [-1.5, 1.5]
    filter_z = [-1.5, 1.5]
    filter_x_std = 0.5
    filter_y_std = 0.5
    filter_z_std = 0.5

    params = pd.DataFrame({
        'loc_color': ['black'],
        'loc_marker': ['o'],
        'loc_alpha': [1],
        'loc_size': [5],
        'uncertainty_color': ['black'],
        'uncertainty_style': ['-'],
        'uncertainty_alpha': [1],  #0.7
        'uncertainty_width': [0.2],  #0.2
        'x_min': [-1.26],
        'x_max': [1.26],
        'y_min': [-1.26],
        'y_max': [1.26],
        'z_min': [-1.5],
        'z_max': [2.1],
    })

    ## ###########################################################################

    ## load localization results
    loc = Measurement()
    loc.from_netcdf(loc_file)
    loc_data = loc.data

    ## load hydrophone locations
    hydrophones_config = pd.read_csv(hp_config_file)

    # Filter
    loc_data = loc_data.dropna(subset=['x', 'y', 'z'])  # remove NaN
    loc_data = loc_data.loc[(loc_data['x'] >= min(filter_x))
                            & (loc_data['x'] <= max(filter_x)) &
                            (loc_data['y'] >= min(filter_y)) &
                            (loc_data['y'] <= max(filter_y)) &
                            (loc_data['z'] >= min(filter_z)) &
                            (loc_data['z'] <= max(filter_z)) &
                            (loc_data['x_std'] <= filter_x_std) &
                            (loc_data['y_std'] <= filter_y_std) &
                            (loc_data['z_std'] <= filter_z_std)]
    # Adjust detection times
    loc_data['time_min_offset'] = loc_data['time_min_offset'] - t1_sec
    loc_data['time_max_offset'] = loc_data['time_max_offset'] - t1_sec

    if time_sec != None:
        loc_data = loc_data.loc[(loc_data['time_max_offset'] <= time_sec)]
    else:
        print('Static')

    # update loc object
    loc.data = loc_data

    # plots
    # fig, ax = plt.subplots(figsize=(6, 1))
    # fig.subplots_adjust(bottom=0.5)
    # n_colors = t2_sec-t1_sec
    # cmap = mpl.cm.get_cmap('CMRmap', n_colors*2)
    # norm = mpl.colors.Normalize(vmin=0, vmax=n_colors)
    # ax_cmap = mpl.colorbar.ColorbarBase(ax, cmap=cmap,
    #                                 norm=norm,
    #                                 orientation='horizontal')
    # ax_cmap.set_label('Time (s)')

    # Plot spectrogram
    fig_final, ax_spectro = plot_spectrogram(audio_file,
                                             loc,
                                             t1_sec,
                                             t2_sec,
                                             geometry=(5, 1, 1))
    ax_spectro.set_title("")
    ax_spectro.get_xaxis().set_visible(False)
    n_colors = t2_sec - t1_sec
    cmap = mpl.cm.get_cmap('viridis', n_colors * 4)
    norm = mpl.colors.Normalize(vmin=0, vmax=n_colors)
    divider = make_axes_locatable(ax_spectro)
    cax = divider.append_axes('bottom', 0.1, pad=0.03)
    ax_cmap = mpl.colorbar.ColorbarBase(cax,
                                        cmap=cmap,
                                        norm=norm,
                                        orientation='horizontal')
    ax_cmap.set_label('Time (s)')

    if time_sec:
        SFreq_min, SFreq_max = ax_spectro.get_ylim()
        ax_spectro.plot([time_sec, time_sec], [SFreq_min, SFreq_max], 'r')

    # plot detection points on top of spectrogram
    #gs0 = fig_final.add_gridspec(60,1)
    ax_detec = fig_final.add_subplot(20, 1, 1)
    det_y = np.asarray(np.ones((1, len(loc_data['time_min_offset']))))[0]
    det_x = np.asarray(loc_data['time_min_offset'])
    ax_detec.scatter(det_x,
                     det_y,
                     c=loc_data['time_min_offset'],
                     cmap=cmap,
                     norm=norm,
                     s=12)
    ax_detec.set_xlim(ax_spectro.get_xlim())
    ax_detec.get_xaxis().set_visible(False)
    ax_detec.get_yaxis().set_visible(False)
    ax_detec.axis('off')

    # #pos =[left, bottom, width, height]
    # box = ax_detec.get_position()
    # box.y0 = box.y0 + 0.6
    # box.y1 = box.y1 + 0.6
    # ax_detec.set_position(box)

    #size = fig_final.get_size_inches()

    plt.subplots_adjust(left=0.08,
                        bottom=0.1,
                        right=0.95,
                        top=0.95,
                        wspace=0,
                        hspace=0)

    # divider2 = make_axes_locatable(ax_spectro)
    # cax2 = divider2.append_axes('top', size=0.2, pad=10.0)
    # det_y = np.asarray(np.ones((1,len(loc_data['time_min_offset']))))[0]
    # det_x = np.asarray(loc_data['time_min_offset'])
    # cax2.plot(det_x,det_y,'.r')
    # cax2.set_xlim(ax_spectro.get_xlim())

    # ax_cmap = mpl.colorbar.ColorbarBase(cax, cmap=cmap,
    #                                     norm=norm,
    #                                     orientation='horizontal')

    gs = fig_final.add_gridspec(3, 2)

    # plot localization top
    ax_toploc = fig_final.add_subplot(gs[1:, 1])
    plot_top_view(hydrophones_config, loc_data, params, cmap, norm, ax_toploc)
    ax_toploc.set_anchor('E')

    # plot localization side
    #ax_sideloc = fig_final.add_subplot(3,3,7,sharex = ax_toploc)
    ax_sideloc = fig_final.add_subplot(gs[1:, 0])
    plot_side_view(hydrophones_config, loc_data, params, cmap, norm,
                   ax_sideloc)
    ax_sideloc.set_anchor('W')

    # set the spacing between subplots
    plt.subplots_adjust(wspace=0, hspace=0)

    # # plot video frame 1
    # fig_video1, ax_video1 = plt.subplots(1,1)
    # frame1_sec = 152.8 # second detection -> 16:38:59.8
    # #ax_video1 = fig_final.add_subplot(3,3,5)
    # plot_video_frame(video_file,frame1_sec, ax_video1)
    # ax_video1.get_xaxis().set_visible(False)
    # ax_video1.get_yaxis().set_visible(False)

    # # plot video frame 2
    # fig_video2, ax_video2 = plt.subplots(1,1)
    # frame2_sec = 160 # 4th detection -> 16:39:07
    # #ax_video2 = fig_final.add_subplot(3,3,6)
    # plot_video_frame(video_file,frame2_sec, ax_video2)
    # ax_video2.get_xaxis().set_visible(False)
    # ax_video2.get_yaxis().set_visible(False)

    fig_final.set_size_inches(8.6, 6.72)

    box = ax_spectro.get_position()
    box.y0 = box.y0 - 0.03
    box.y1 = box.y1 - 0.03
    ax_spectro.set_position(box)
    return fig_final
def plot_full_figure(time_sec=None):

    #loc_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\results\mobile_array_copper\localizations_1m_5cm.nc'
    loc_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\results\mobile_array_copper\localizations_2cm_3m.nc'
    loc_file_matlab = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\results\mobile_array_copper\localizations_matlab_with_CI.csv'
    audio_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\mobile_array\2019-09-14_HornbyIsland_Trident\671404070.190918222812.wav'
    video_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\large_array\2019-09-15_HornbyIsland_AMAR_07-HI\3420_FishCam01_20190920T163627.613206Z_1600x1200_awb-auto_exp-night_fr-10_q-20_sh-0_b-50_c-0_i-400_sat-0.mp4'
    hp_config_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\mobile_array\2019-09-14_HornbyIsland_Trident\hydrophones_config_HI-201909.csv'
    localization_config_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\config_files\localization_config_mobile_array.yaml'
    t1_sec = 214  #216
    t2_sec = 224  #223

    filter_x = [-5, 5]
    filter_y = [-5, 5]
    filter_z = [-2, 5]
    filter_x_std = 6
    filter_y_std = 9
    filter_z_std = 6

    params = pd.DataFrame({
        'loc_color': ['black'],
        'loc_marker': ['o'],
        'loc_alpha': [1],
        'loc_size': [5],
        'uncertainty_color': ['black'],
        'uncertainty_style': ['-'],
        'uncertainty_alpha': [1],  #0.7
        'uncertainty_width': [0.2],  #0.2
        'x_min': [-1.5],
        'x_max': [1.5],
        'y_min': [-0.5],
        'y_max': [3],
        'z_min': [-1.5],
        'z_max': [1.5],
    })

    ## ###########################################################################
    localization_config = read_yaml(localization_config_file)
    hydrophones_config = pd.read_csv(hp_config_file)
    sound_speed_mps = localization_config['ENVIRONMENT']['sound_speed_mps']
    ref_channel = localization_config['TDOA']['ref_channel']
    hydrophone_pairs = defineReceiverPairs(len(hydrophones_config),
                                           ref_receiver=ref_channel)

    ## load localization results
    loc = Measurement()
    loc.from_netcdf(loc_file)
    loc_data = loc.data

    # used matlab CI
    loc_data = pd.read_csv(loc_file_matlab)

    # ## recalculate data errors
    # diff=[]
    # idx = 0
    # for idx in range(len(loc_data)):
    #     m = loc_data.loc[[idx],['x','y','z']]
    #     tdoa_m = predict_tdoa(m, sound_speed_mps, hydrophones_config, hydrophone_pairs)
    #     tdoa_measured = loc_data.loc[[idx],['tdoa_sec_1','tdoa_sec_2','tdoa_sec_3']].to_numpy()
    #     #diff_temp = (tdoa_m-tdoa_measured.T)**2
    #     if idx==0:
    #         diff = (tdoa_m-tdoa_measured.T)**2
    #     else:
    #         diff = np.vstack((diff,(tdoa_m-tdoa_measured.T)**2))

    # Q = len(loc_data)
    # #M = m.size # number of dimensions of the model (here: X, Y, and Z)
    # #N = len(tdoa_sec) # number of measurements
    # #error_std = np.sqrt((1/(Q*(N-M))) * (sum((tdoa_sec-tdoa_m)**2)))
    # tdoa_errors_std = np.sqrt( (1/Q)*(sum(diff)))

    # #tdoa_errors_std = calc_data_error(tdoa_sec, m, sound_speed_mps,hydrophones_config, hydrophone_pairs)
    # for idx in range(len(loc_data)):
    #     loc_errors_std = calc_loc_errors(tdoa_errors_std, loc_data.loc[[idx],['x','y','z']] , sound_speed_mps, hydrophones_config, hydrophone_pairs)
    #     print('m')

    # Filter
    loc_data = loc_data.dropna(subset=['x', 'y', 'z'])  # remove NaN
    loc_data = loc_data.loc[(loc_data['x'] >= min(filter_x))
                            & (loc_data['x'] <= max(filter_x)) &
                            (loc_data['y'] >= min(filter_y)) &
                            (loc_data['y'] <= max(filter_y)) &
                            (loc_data['z'] >= min(filter_z)) &
                            (loc_data['z'] <= max(filter_z)) &
                            (loc_data['x_std'] <= filter_x_std) &
                            (loc_data['y_std'] <= filter_y_std) &
                            (loc_data['z_std'] <= filter_z_std)]
    # Adjust detection times
    loc_data['time_min_offset'] = loc_data['time_min_offset'] - t1_sec
    loc_data['time_max_offset'] = loc_data['time_max_offset'] - t1_sec

    if time_sec != None:
        loc_data = loc_data.loc[(loc_data['time_max_offset'] <= time_sec)]
    else:
        print('Static')

    # update loc object
    loc.data = loc_data

    # plots
    # fig, ax = plt.subplots(figsize=(6, 1))
    # fig.subplots_adjust(bottom=0.5)
    # n_colors = t2_sec-t1_sec
    # cmap = mpl.cm.get_cmap('CMRmap', n_colors*2)
    # norm = mpl.colors.Normalize(vmin=0, vmax=n_colors)
    # ax_cmap = mpl.colorbar.ColorbarBase(ax, cmap=cmap,
    #                                 norm=norm,
    #                                 orientation='horizontal')
    # ax_cmap.set_label('Time (s)')

    # Plot spectrogram
    fig_final, ax_spectro = plot_spectrogram(audio_file,
                                             loc,
                                             t1_sec,
                                             t2_sec,
                                             geometry=(5, 1, 1))
    ax_spectro.set_title("")
    ax_spectro.get_xaxis().set_visible(False)
    n_colors = t2_sec - t1_sec
    cmap = mpl.cm.get_cmap('viridis', n_colors * 4)
    norm = mpl.colors.Normalize(vmin=0, vmax=n_colors)
    divider = make_axes_locatable(ax_spectro)
    cax = divider.append_axes('bottom', 0.1, pad=0.03)
    ax_cmap = mpl.colorbar.ColorbarBase(cax,
                                        cmap=cmap,
                                        norm=norm,
                                        orientation='horizontal')
    ax_cmap.set_label('Time (s)')

    if time_sec:
        SFreq_min, SFreq_max = ax_spectro.get_ylim()
        ax_spectro.plot([time_sec, time_sec], [SFreq_min, SFreq_max], 'r')

    # plot detection points on top of spectrogram
    #gs0 = fig_final.add_gridspec(60,1)
    ax_detec = fig_final.add_subplot(20, 1, 1)
    det_y = np.asarray(np.ones((1, len(loc_data['time_min_offset']))))[0]
    det_x = np.asarray(loc_data['time_min_offset'])
    ax_detec.scatter(det_x,
                     det_y,
                     c=loc_data['time_min_offset'],
                     cmap=cmap,
                     norm=norm,
                     s=12)
    ax_detec.set_xlim(ax_spectro.get_xlim())
    ax_detec.get_xaxis().set_visible(False)
    ax_detec.get_yaxis().set_visible(False)
    ax_detec.axis('off')

    # #pos =[left, bottom, width, height]
    # box = ax_detec.get_position()
    # box.y0 = box.y0 + 0.6
    # box.y1 = box.y1 + 0.6
    # ax_detec.set_position(box)

    #size = fig_final.get_size_inches()

    plt.subplots_adjust(left=0.08,
                        bottom=0.1,
                        right=0.95,
                        top=0.95,
                        wspace=0,
                        hspace=0)

    # divider2 = make_axes_locatable(ax_spectro)
    # cax2 = divider2.append_axes('top', size=0.2, pad=10.0)
    # det_y = np.asarray(np.ones((1,len(loc_data['time_min_offset']))))[0]
    # det_x = np.asarray(loc_data['time_min_offset'])
    # cax2.plot(det_x,det_y,'.r')
    # cax2.set_xlim(ax_spectro.get_xlim())

    # ax_cmap = mpl.colorbar.ColorbarBase(cax, cmap=cmap,
    #                                     norm=norm,
    #                                     orientation='horizontal')

    gs = fig_final.add_gridspec(3, 2)

    # plot localization top
    ax_toploc = fig_final.add_subplot(gs[1:, 1])
    plot_top_view(hydrophones_config, loc_data, params, cmap, norm, ax_toploc)
    ax_toploc.set_anchor('E')

    # plot localization side
    #ax_sideloc = fig_final.add_subplot(3,3,7,sharex = ax_toploc)
    ax_sideloc = fig_final.add_subplot(gs[1:, 0])
    plot_side_view(hydrophones_config, loc_data, params, cmap, norm,
                   ax_sideloc)
    ax_sideloc.set_anchor('W')

    # set the spacing between subplots
    plt.subplots_adjust(wspace=0, hspace=0)

    # # plot video frame 1
    # fig_video1, ax_video1 = plt.subplots(1,1)
    # frame1_sec = 152.8 # second detection -> 16:38:59.8
    # #ax_video1 = fig_final.add_subplot(3,3,5)
    # plot_video_frame(video_file,frame1_sec, ax_video1)
    # ax_video1.get_xaxis().set_visible(False)
    # ax_video1.get_yaxis().set_visible(False)

    # # plot video frame 2
    # fig_video2, ax_video2 = plt.subplots(1,1)
    # frame2_sec = 160 # 4th detection -> 16:39:07
    # #ax_video2 = fig_final.add_subplot(3,3,6)
    # plot_video_frame(video_file,frame2_sec, ax_video2)
    # ax_video2.get_xaxis().set_visible(False)
    # ax_video2.get_yaxis().set_visible(False)

    fig_final.set_size_inches(9.08, 6.72)

    box = ax_spectro.get_position()
    box.y0 = box.y0 - 0.03
    box.y1 = box.y1 - 0.03
    ax_spectro.set_position(box)
    return fig_final
示例#3
0
    'uncertainty_color': ['black'],
    'uncertainty_style': ['-'],
    'uncertainty_alpha': [1],  #0.7
    'uncertainty_width': [0.2],  #0.2
    'x_min': [-1.26],
    'x_max': [1.26],
    'y_min': [-1.26],
    'y_max': [1.26],
    'z_min': [-1.5],
    'z_max': [2.1],
})

## ###########################################################################

## load localization results
loc = Measurement()
loc.from_netcdf(loc_file)
loc_data = loc.data

## load hydrophone locations
hydrophones_config = pd.read_csv(hp_config_file)

# Filter
loc_data = loc_data.dropna(subset=['x', 'y', 'z'])  # remove NaN
loc_data = loc_data.loc[(loc_data['x'] >= min(filter_x))
                        & (loc_data['x'] <= max(filter_x)) &
                        (loc_data['y'] >= min(filter_y)) &
                        (loc_data['y'] <= max(filter_y)) &
                        (loc_data['z'] >= min(filter_z)) &
                        (loc_data['z'] <= max(filter_z)) &
                        (loc_data['x_std'] <= filter_x_std) &
示例#4
0
# annot4.from_netcdf(netcdf_files, verbose=True)
# print(len(annot4))

# # ## netcdf folder
# netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test'
# annot4 = Annotation()
# annot4.from_netcdf(netcdf_files, verbose=True)
# print(len(annot4))

# # ## netcdf folder from Measurements
# netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test2'
# annot4 = Annotation()
# annot4.from_netcdf(netcdf_files, verbose=True)
# print(len(annot4))

# ## netcdf folder from Measurements
netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test2'
annot4 = Measurement()
annot4.from_netcdf(netcdf_files, verbose=True)
print(len(annot4))

# import xarray as xr
# d=annot3.data
# index = range(0,len(d),1)
# d['index']=index
# #d = d.set_index(['index','entry_date', 'frequency_min','label_class'])
# d = d.set_index(['index'])

# data = d.to_xarray()

# data2=data.sel(index=0)
    'uncertainty_style': ['-'],
    'uncertainty_alpha': [1], #0.7
    'uncertainty_width': [0.2], #0.2
    'x_min':[-3],
    'x_max':[3],
    'y_min':[-3],
    'y_max':[3],
    'z_min':[-3],
    'z_max':[3],    
    })
    
## ###########################################################################

## load localization results 0 degrees
file1 = '0_deg.nc'
loc1 = Measurement()
loc1.from_netcdf(os.path.join(indir,file1))
# loc1_data = loc1.data
# # Filter
# loc1_data = loc1_data.dropna(subset=['x', 'y','z']) # remove NaN
# loc1_data = loc1_data.loc[(loc1_data['x']>=min(filter_x)) & 
#                         (loc1_data['x']<=max(filter_x)) &
#                         (loc1_data['y']>=min(filter_y)) & 
#                         (loc1_data['y']<=max(filter_y)) &
#                         (loc1_data['z']>=min(filter_z)) & 
#                         (loc1_data['z']<=max(filter_z)) &
#                         (loc1_data['x_std']<= filter_x_std) & 
#                         (loc1_data['y_std']<= filter_y_std) &
#                         (loc1_data['z_std']<= filter_z_std)
#                         ]
示例#6
0
detection_config = read_yaml(detection_config_file)
localization_config = read_yaml(localization_config_file)

# Look up data files for all channels
audio_files = find_audio_files(infile, hydrophones_config)

# run detector on selected channel
print('DETECTION')
# detections = run_detector(audio_files['path'][detection_config['AUDIO']['channel']],
#                           audio_files['channel'][detection_config['AUDIO']['channel']],
#                           detection_config,
#                           chunk = [t1, t2],
#                           deployment_file=deployment_info_file)
# detections.insert_values(frequency_min=20)

detections = Measurement()
detections.from_pamlab(annotation_file)

print(str(len(detections)) + ' detections')

# # plot spectrogram/waveforms of all channels and detections
# plot_data(audio_files,
#           detection_config['SPECTROGRAM']['frame_sec'],
#           detection_config['SPECTROGRAM']['window_type'],
#           detection_config['SPECTROGRAM']['nfft_sec'],
#           detection_config['SPECTROGRAM']['step_sec'],
#           detection_config['SPECTROGRAM']['fmin_hz'],
#           detection_config['SPECTROGRAM']['fmax_hz'],
#           chunk = [t1, t2],
#           detections=detections,
#           detections_channel=detection_config['AUDIO']['channel'])
    # sources['phi'] = phi

# Define Measurement object for the localization results
# if localization_config['METHOD']['linearized_inversion']:
#     localizations = Measurement()
#     localizations.metadata['measurer_name'] = localization_method_name
#     localizations.metadata['measurer_version'] = '0.1'
#     localizations.metadata['measurements_name'] = [['x', 'y', 'z', 'x_std', 'y_std', 'z_std', 'tdoa_errors_std']]

# if localization_config['METHOD']['grid_search']:
#     localizations = Measurement()
#     localizations.metadata['measurer_name'] = localization_method_name
#     localizations.metadata['measurer_version'] = '0.1'
#     localizations.metadata['measurements_name'] = [['theta', 'phi', 'theta_std', 'phi_std', 'tdoa_errors_std']]

localizations = Measurement()
localizations.metadata['measurer_name'] = localization_method_name
localizations.metadata['measurer_version'] = '0.1'
localizations.metadata['measurements_name'] = [['x', 'y', 'z', 'x_std', 'y_std', 'z_std', 'tdoa_errors_std','tdoa_sec_1','tdoa_sec_2','tdoa_sec_3','tdoa_sec_4','tdoa_sec_5']]


# need to define what output is for grid search


# pick single detection (will use loop after)
print('LOCALIZATION')
for detec_idx, detec in detections.data.iterrows():

    if 'detec_idx_forced' in locals():
        print('Warning: forced to only process detection #', str(detec_idx_forced))
        detec = detections.data.iloc[detec_idx_forced]
示例#8
0
# # Plot
# graph = GrapherFactory('SoundPlotter', title='Recording', frequency_max=1000)
# graph.add_data(sound) # add waveform data
# graph.add_data(spectro) # add spectrogram
# graph.add_annotation(detec, panel=0, color='green', label='Detections') # overlay detections on waveform plot
# graph.add_annotation(detec, panel=1, color='green', label='Detections') # overlay detections on spectrogram plot
# graph.colormap = 'binary'
# #graph.colormap = 'jet'
# graph.show()

from ecosound.core.measurement import Measurement
import seaborn as sns
# load detections
detection_file = r"C:\Users\xavier.mouy\Desktop\Tutorial\results\67674121.181017060806.wav.nc"
detec = Measurement()
detec.from_netcdf(detection_file)

detec.data['freq_peak'].groupby(detec.data['label_class'])
fig = sns.violinplot(y=detec.data['label_class'], x=detec.data['freq_peak'])
fig.set(xlabel='Detections peak frequency (Hz)', ylabel='Detections class')
#sns.show()

# ## Input paraneters ##########################################################

# audio_file = r"C:\Users\xavier.mouy\Desktop\Tutorial\data\67674121.181017060806.wav"

# #audio_file = r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\UVIC_hornby-island_2019\audio_data\AMAR173.4.20190916T011248Z.wav"
# #annotation_file = r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\UVIC_hornby-island_2019\manual_annotations\AMAR173.4.20190916T011248Z.Table.1.selections.txt"
# #detection_file = r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\Full_dataset\AMAR173.4.20190916T011248Z.wav.nc"
示例#9
0
# filter_x=[-1.5, 1.5]
# filter_y=[-1.5, 1.5]
# filter_z=[-2, 2]
# filter_x_std=0.3
# filter_y_std=0.3
# filter_z_std=0.3

# load data
print('')
print('Loading dataset')
idx = 0
for infile in os.listdir(indir):
    if infile.endswith(".nc"):
        print(infile)

        locs = Measurement()
        locs.from_netcdf(os.path.join(indir, infile))
        loc_data = locs.data

        # Filter
        loc_data = loc_data.dropna(subset=['x', 'y', 'z'])  # remove NaN
        loc_data = loc_data.loc[(loc_data['x'] >= min(filter_x))
                                & (loc_data['x'] <= max(filter_x)) &
                                (loc_data['y'] >= min(filter_y)) &
                                (loc_data['y'] <= max(filter_y)) &
                                (loc_data['z'] >= min(filter_z)) &
                                (loc_data['z'] <= max(filter_z)) &
                                (loc_data['x_std'] <= filter_x_std) &
                                (loc_data['y_std'] <= filter_y_std) &
                                (loc_data['z_std'] <= filter_z_std)]
示例#10
0
def main():
    # input arguments
    input_args = dict()
    input_args['positive_class_label'] = 'FS'
    input_args['train_ratio'] = 0.75
    input_args['cv_splits'] = 10  #5
    input_args['cv_repeats'] = 1
    input_args['rebalance_classes'] = True
    #input_args['data_file']= r'C:\Users\xavier.mouy\Documents\PhD\Projects\Detector\results\dataset_FS-NN_modified_20201105145300.nc'
    input_args[
        'data_file'] = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Detector\results\dataset_FS-NN_modified_20200902194334.nc'
    input_args[
        'out_dir'] = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Detector\results\Classification'
    input_args['run_CV'] = False
    input_args['train_final_model'] = True
    input_args['final_model_name'] = 'RF50'

    ## DEFINITION OF CLASSIFIERS -------------------------------------------------
    models = []
    models.append(('Dummy', DummyClassifier(strategy="constant", constant=1)))
    models.append(
        ('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
    models.append(('LDA', LinearDiscriminantAnalysis()))
    #models.append(('KNN', KNeighborsClassifier()))
    #models.append(('KNN', KNeighborsClassifier(n_neighbors=4, metric='euclidean')))
    models.append(('CART', DecisionTreeClassifier()))
    #models.append(('NB', GaussianNB()))
    models.append(('XGBoost', XGBClassifier()))
    #models.append(('MLP', MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=0)))
    models.append(('RF5',
                   RandomForestClassifier(n_estimators=5,
                                          min_samples_split=100,
                                          min_samples_leaf=50,
                                          random_state=0)))
    models.append(('RF10',
                   RandomForestClassifier(n_estimators=10,
                                          min_samples_split=100,
                                          min_samples_leaf=50,
                                          random_state=0)))
    models.append(('RF30',
                   RandomForestClassifier(n_estimators=30,
                                          min_samples_split=100,
                                          min_samples_leaf=50,
                                          random_state=0)))
    models.append(('RF50',
                   RandomForestClassifier(n_estimators=50,
                                          min_samples_split=100,
                                          min_samples_leaf=50,
                                          random_state=0)))
    #models.append(('RF100', RandomForestClassifier(n_estimators=100,min_samples_split= 100, min_samples_leaf=50,random_state=0)))

    ## setup output folder
    now = datetime.now()
    now_str = now.strftime("%Y%m%dT%H%M%S")
    out_dir = os.path.join(input_args['out_dir'], now_str)
    os.mkdir(out_dir)

    ## Save input args to txt file
    text_file = open(os.path.join(out_dir, 'input_args_' + now_str + '.txt'),
                     "w")
    n = text_file.write(str(input_args))
    text_file.close()

    ## Checks that model name exists before running all the processing
    if input_args['train_final_model']:
        model_idx = [model[0]
                     for model in models].index(input_args['final_model_name'])

    ## LOAD DATSET ---------------------------------------------------------------
    dataset = Measurement()
    dataset.from_netcdf(input_args['data_file'])
    print(dataset.summary())

    ## DATA PREPARATION ----------------------------------------------------------
    # features
    features = dataset.metadata['measurements_name'][
        0]  # list of features used for the classification
    # data
    data = dataset.data
    # drop FS observations at Mill Bay
    indexNames = data[(data['label_class'] == 'FS')
                      & (data['location_name'] == 'Mill bay')].index
    data.drop(indexNames, inplace=True)
    # add subclass + IDs
    data, class_encoder = add_class_ID(data,
                                       input_args['positive_class_label'])
    data, _ = add_subclass(data)
    #subclass2class_table = subclass2class_conversion(data)
    # add group ID
    data, group_encoder = add_group(data)

    ## DATA CLEAN-UP -------------------------------------------------------------
    # Basic stats on all features
    data_stats = data[features].describe()
    #print(data_stats)

    # how many NaNs and Infs per column
    data = data.replace([np.inf, -np.inf], np.nan)
    Nnan = data[features].isna().sum()
    ax = Nnan.plot(kind='bar', title='Number of NaN/Inf', grid=True)
    ax.set_ylabel('Number of observations with NaNs/Infs')

    # Drop some features with too many NaNs
    features.remove('freq_flatness')
    features.remove('snr')
    features.remove('uuid')

    # drop observations/rows with NaNs
    data.dropna(subset=features, axis=0, how='any', thresh=None, inplace=True)
    data_stats2 = data[features].describe()

    # ## VISUALIZATION -------------------------------------------------------------
    # # box and whisker plots
    # data[features].plot(kind='box', subplots=True, layout=(7,7), sharex=False, sharey=False)
    # # histograms
    # data[features].hist()
    # # scatter plot matrix
    # pd.plotting.scatter_matrix(data[features])
    # scatter plot PCA
    # pca = PCA(n_components=2)
    # X  = pca.fit_transform(data[features])
    # y = data['class_ID']
    # plot_2d_space(X, y, 'Imbalanced dataset (2 PCA components)')

    ## SPLIT DATA INTO TRAIN & TEST SETS ------------------------------------------
    n_splits = round(1 / (1 - input_args['train_ratio']))
    skf = StratifiedGroupKFold(n_splits=n_splits,
                               shuffle=True,
                               random_state=None)
    for train_index, test_index in skf.split(data,
                                             data['subclass_ID'],
                                             groups=data['group_ID']):
        data_train, data_test = data.iloc[train_index], data.iloc[test_index]
        break
    # plot class repartition
    plot_datasets_distrib(data_train, data_test)
    plot_dataset_distrib(data,
                         attr_list=['subclass_label', 'label_class'],
                         title='Full dataset')
    plot_dataset_distrib(data_train,
                         attr_list=['subclass_label', 'label_class'],
                         title='Training set')
    plot_dataset_distrib(data_test,
                         attr_list=['subclass_label', 'label_class'],
                         title='Test set')
    # verify groups are not used in both datasets
    groups_intersection = plot_datasets_groups(data_train,
                                               data_test,
                                               show=True)

    ## CROSS VALIDATION ON TRAIN SET ----------------------------------------------
    if input_args['run_CV']:
        # run train/test experiments
        cv_predictions, cv_performance = cross_validation(
            data_train,
            models,
            features,
            cv_splits=input_args['cv_splits'],
            cv_repeats=input_args['cv_repeats'],
            rebalance=input_args['rebalance_classes'])
        # display summary results
        performance_report = summarize_performance(cv_performance,
                                                   threshold=0.5)
        print(performance_report)
        # plot mean Precision and Recall curves
        plot_PR_curves(cv_performance)
        plot_F_curves(cv_performance)
        # save results
        CV_results = {
            'cv_predictions': cv_predictions,
            'cv_performance': cv_performance,
            'models': models,
            'input_args': input_args,
        }
        pickle.dump(
            CV_results,
            open(os.path.join(out_dir, 'CV_' + now_str + '.sav'), 'wb'))

    ## FINAL EVALUATION ON TEST SET -----------------------------------------------
    if input_args['train_final_model']:

        print(' ')
        print('Final evaluation on test set:')
        print(' ')

        model_name = models[model_idx][0]
        model = models[model_idx][1]  # RF50
        print(model)
        X_train = data_train[features]  # features
        Y_train = data_train['class_ID']  #labels
        X_test = data_test[features]  # features
        Y_test = data_test['class_ID']  #labels
        # feature normalization
        Norm_mean = X_train.mean()
        Norm_std = X_train.std()
        X_train = (X_train - Norm_mean) / Norm_std
        X_test = (X_test - Norm_mean) / Norm_std
        # Train on entire train set
        final_model = classification_train(
            X_train, Y_train, model, rebalance=input_args['rebalance_classes'])
        # Evaluate on full test set
        pred_class, pred_prob = classification_predict(X_test, final_model)
        # Print evaluation report
        CR = classification_report(Y_test, pred_class)
        print(CR)
        # save the model to disk
        model = {
            'name': model_name,
            'model': final_model,
            'features': features,
            'normalization_mean': Norm_mean,
            'normalization_std': Norm_std,
            'classes': class_encoder,
            'input_args': input_args,
        }
        pickle.dump(
            model,
            open(
                os.path.join(out_dir,
                             model_name + '_model_' + now_str + '.sav'), 'wb'))
示例#11
0
deployment_file = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\UVIC_mill-bay_2019\deployment_info.csv'
data_dir = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\UVIC_mill-bay_2019\audio_data'

# load meta data
operator_name = platform.uname().node
dep_info = DeploymentInfo()
dep_info.read(deployment_file)

#list files
files = ecosound.core.tools.list_files(indir,
                                       ext,
                                       recursive=False,
                                       case_sensitive=True)

for idx, file in enumerate(files):
    print(str(idx) + r'/' + str(len(files)) + ': ' + file)
    meas = Measurement()
    meas.from_netcdf(file)

    meas.insert_metadata(deployment_file)

    file_name = os.path.splitext(os.path.basename(file))[0]
    meas.insert_values(
        operator_name=platform.uname().node,
        audio_file_name=os.path.splitext(os.path.basename(file_name))[0],
        audio_file_dir=data_dir,
        audio_file_extension='.wav',
        audio_file_start_date=ecosound.core.tools.filename_to_datetime(
            file_name)[0])
    meas.to_netcdf(os.path.join(outdir, file_name + '.nc'))
def run_localization(infile, deployment_info_file, detection_config,
                     hydrophones_config, localization_config):
    t1 = 0
    t2 = 70
    # Look up data files for all channels
    audio_files = find_audio_files(infile, hydrophones_config)

    # run detector on selected channel
    print('DETECTION')
    detections = run_detector(
        audio_files['path'][detection_config['AUDIO']['channel']],
        audio_files['channel'][detection_config['AUDIO']['channel']],
        detection_config,
        chunk=[t1, t2],
        deployment_file=deployment_info_file)
    #detections.insert_values(frequency_min=20)

    print(str(len(detections)) + ' detections')

    # # plot spectrogram/waveforms of all channels and detections
    # plot_data(audio_files,
    #           detection_config['SPECTROGRAM']['frame_sec'],
    #           detection_config['SPECTROGRAM']['window_type'],
    #           detection_config['SPECTROGRAM']['nfft_sec'],
    #           detection_config['SPECTROGRAM']['step_sec'],
    #           detection_config['SPECTROGRAM']['fmin_hz'],
    #           detection_config['SPECTROGRAM']['fmax_hz'],
    #           chunk = [t1, t2],
    #           detections=detections,
    #           detections_channel=detection_config['AUDIO']['channel'])

    # localization
    sound_speed_mps = localization_config['ENVIRONMENT']['sound_speed_mps']
    ref_channel = localization_config['TDOA']['ref_channel']

    # define search window based on hydrophone separation and sound speed
    hydrophones_dist_matrix = calc_hydrophones_distances(hydrophones_config)
    TDOA_max_sec = np.max(hydrophones_dist_matrix) / sound_speed_mps

    # define hydrophone pairs
    hydrophone_pairs = defineReceiverPairs(len(hydrophones_config),
                                           ref_receiver=ref_channel)

    # pre-compute grid search if needed
    if localization_config['METHOD']['grid_search']:
        sources = defineSphereVolumeGrid(
            localization_config['GRIDSEARCH']['spacing_m'],
            localization_config['GRIDSEARCH']['radius_m'],
            origin=localization_config['GRIDSEARCH']['origin'])
        #sources = defineCubeVolumeGrid(0.2, 2, origin=[0, 0, 0])
        sources_tdoa = np.zeros(shape=(len(hydrophone_pairs), len(sources)))
        for source_idx, source in sources.iterrows():
            sources_tdoa[:, source_idx] = predict_tdoa(source, sound_speed_mps,
                                                       hydrophones_config,
                                                       hydrophone_pairs).T
        theta = np.arctan2(sources['y'].to_numpy(),
                           sources['x'].to_numpy()) * (180 / np.pi)  # azimuth
        phi = np.arctan2(
            sources['y'].to_numpy()**2 + sources['x'].to_numpy()**2,
            sources['z'].to_numpy()) * (180 / np.pi)
        sources['theta'] = theta
        sources['phi'] = phi

    # Define Measurement object for the localization results
    if localization_config['METHOD']['linearized_inversion']:
        localizations = Measurement()
        localizations.metadata['measurer_name'] = localization_method_name
        localizations.metadata['measurer_version'] = '0.1'
        localizations.metadata['measurements_name'] = [[
            'x', 'y', 'z', 'x_std', 'y_std', 'z_std', 'tdoa_errors_std'
        ]]
    # need to define what output is for grid search

    # pick single detection (will use loop after)
    print('LOCALIZATION')
    for detec_idx, detec in detections.data.iterrows():

        if 'detec_idx_forced' in locals():
            print('Warning: forced to only process detection #',
                  str(detec_idx_forced))
            detec = detections.data.iloc[detec_idx_forced]

        print(str(detec_idx + 1) + '/' + str(len(detections)))

        # load data from all channels for that detection
        waveform_stack = stack_waveforms(audio_files, detec, TDOA_max_sec)

        # readjust signal boundaries to only focus on section with most energy
        percentage_max_energy = 90
        chunk = ecosound.core.tools.tighten_signal_limits_peak(
            waveform_stack[detection_config['AUDIO']['channel']],
            percentage_max_energy)
        waveform_stack = [x[chunk[0]:chunk[1]] for x in waveform_stack]

        # calculate TDOAs
        tdoa_sec, corr_val = calc_tdoa(
            waveform_stack,
            hydrophone_pairs,
            detec['audio_sampling_frequency'],
            TDOA_max_sec=TDOA_max_sec,
            upsample_res_sec=localization_config['TDOA']['upsample_res_sec'],
            normalize=localization_config['TDOA']['normalize'],
            doplot=False,
        )

        if localization_config['METHOD']['grid_search']:
            delta_tdoa = sources_tdoa - tdoa_sec
            delta_tdoa_norm = np.linalg.norm(delta_tdoa, axis=0)
            sources['delta_tdoa'] = delta_tdoa_norm

            fig = plt.figure()
            ax = fig.add_subplot(111, projection='3d')
            colors = matplotlib.cm.tab10(hydrophones_config.index.values)
            #alphas = delta_tdoa_norm - min(delta_tdoa_norm)
            #alphas = alphas/max(alphas)
            #alphas = alphas - 1
            #alphas = abs(alphas)
            #alphas = np.array(alphas)
            alphas = 0.5
            for index, hp in hydrophones_config.iterrows():
                point = ax.scatter(
                    hp['x'],
                    hp['y'],
                    hp['z'],
                    s=40,
                    color=colors[index],
                    label=hp['name'],
                )
            ax.scatter(
                sources['x'],
                sources['y'],
                sources['z'],
                c=sources['delta_tdoa'],
                s=2,
                alpha=alphas,
            )
            # Axes labels
            ax.set_xlabel('X (m)', labelpad=10)
            ax.set_ylabel('Y (m)', labelpad=10)
            ax.set_zlabel('Z (m)', labelpad=10)
            # legend
            ax.legend(bbox_to_anchor=(1.07, 0.7, 0.3, 0.2), loc='upper left')
            plt.tight_layout()
            plt.show()

            plt.figure()
            sources.plot.hexbin(x="theta",
                                y="phi",
                                C="delta_tdoa",
                                reduce_C_function=np.mean,
                                gridsize=40,
                                cmap="viridis")

        # Lineralized inversion
        if localization_config['METHOD']['linearized_inversion']:
            [m, iterations_logs
             ] = linearized_inversion(tdoa_sec,
                                      hydrophones_config,
                                      hydrophone_pairs,
                                      localization_config['INVERSION'],
                                      sound_speed_mps,
                                      doplot=False)

            # Estimate uncertainty
            tdoa_errors_std = calc_data_error(tdoa_sec, m, sound_speed_mps,
                                              hydrophones_config,
                                              hydrophone_pairs)
            loc_errors_std = calc_loc_errors(tdoa_errors_std, m,
                                             sound_speed_mps,
                                             hydrophones_config,
                                             hydrophone_pairs)

        # Bring all detection and localization informations together
        detec.loc['x'] = m['x'].values[0]
        detec.loc['y'] = m['y'].values[0]
        detec.loc['z'] = m['z'].values[0]
        detec.loc['x_std'] = loc_errors_std['x_std'].values[0]
        detec.loc['y_std'] = loc_errors_std['y_std'].values[0]
        detec.loc['z_std'] = loc_errors_std['z_std'].values[0]
        detec.loc['tdoa_errors_std'] = tdoa_errors_std[0]

        # stack to results into localization object
        localizations.data = localizations.data.append(detec,
                                                       ignore_index=True)

    return localizations
# # load annotations
# print('-----------------')
# print('  Annotations    ')
# annot = Annotation()
# annot.from_netcdf(annotation_file)
# print(annot.summary())
# annot_perfile = annot.summary(rows='audio_file_name',columns='label_class')
# annot_perfile.rename(columns={"FS": "FS-annot"}, inplace=True)
# annot_perfile = annot_perfile['FS-annot'].to_frame()
# #annot_perfile.to_csv('annot.csv')

print(' ')
print('-----------------')
print('  Detections     ')
# load detections
detec = Measurement()
detec.from_netcdf(detec_file)
print(detec.summary())
detec_perfile = detec.summary(rows='audio_file_name', columns='label_class')
detec_perfile.rename(columns={"FS": "FS-detec"}, inplace=True)
detec_perfile = detec_perfile['FS-detec'].to_frame()

dd = pd.concat([annot_perfile, detec_perfile], axis=1)
dd['diff'] = dd['FS-annot'] - dd['FS-detec']
dd.plot()

# outdir=r'C:\Users\xavier.mouy\Documents\Workspace\GitHub\ecosound\tests\detec_export'
# detec.to_pamlab(outdir, single_file=False)

# outdir=r'C:\Users\xavier.mouy\Documents\Workspace\GitHub\ecosound\tests\annot_export'
# annot.to_pamlab(outdir, single_file=False)
示例#14
0
min_threshold = 0.7
noise_label = 'NN'

# load names of file and start/stop times where false alarms have been manually
# identified
df = pd.read_excel(xls_file, header=None)

for idx in range(0, len(df)):
    # file name to load
    wav_file_name = df[0][idx]
    tmin_sec = df[1][idx]
    tmax_sec = df[2][idx]
    print(wav_file_name, tmin_sec, tmax_sec)
    detec_file_path = os.path.join(in_dir, wav_file_name + '.nc')
    # load detection/measurement file
    meas = Measurement()
    meas.from_netcdf(detec_file_path)
    data_df = meas.data
    # Only keep fish detections above the given confidence threshold and times
    data_df_filt = data_df[(data_df.label_class == fish_label)
                           & (data_df.confidence >= min_threshold)
                           & (data_df.time_min_offset >= tmin_sec)
                           & (data_df.time_max_offset <= tmax_sec)]
    data_df_filt.reset_index(inplace=True, drop=True)
    meas.data = data_df_filt
    # Change fish labels to noise labels
    meas.insert_values(label_class=noise_label)
    # Save to new nc file
    meas.to_netcdf(os.path.join(out_dir, wav_file_name + str(idx)))

print('done')
from ecosound.core.annotation import Annotation
import pandas as pd

"""
Gathers measuremenst for all annotations and noise. Merges into a single dataset,
and re-label classes to create a 2-class dataset 'FS' vs 'NN'.

"""

# Define input and output files
annot_file = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\dataset_annotations_only.nc'
noise_file = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\Noise_dataset'
outfile=r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\dataset_FS-NN_modified_20201105145300.nc'

# Load measurements
meas_annot = Measurement()
meas_annot.from_netcdf(annot_file)
meas_noise = Measurement()
meas_noise.from_netcdf(noise_file)

## Label noise measurement as 'NN'
meas_noise.insert_values(label_class='NN')
print(meas_noise.summary())

## relabel annotations that are not 'FS' as 'NN'
print(meas_annot.summary())
meas_annot.data['label_class'].replace(to_replace=['', 'ANT','HS','KW','UN'], value='NN', inplace=True)
print(meas_annot.summary())

## merge the 2 datasets
meas_NN_FS = meas_noise + meas_annot
示例#16
0
import time
import pandas as pd


## Input paraneters ##########################################################

annotation_file = r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\Master_annotations_dataset.nc"
detection_file = r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\Full_dataset_with_metadata2"
outfile=r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\dataset_annotations_only2.nc'

# load annotations
annot = Annotation()
annot.from_netcdf(annotation_file)

# load detections
detec = Measurement()
detec.from_netcdf(detection_file)
print(detec)

freq_ovp = True # default True
dur_factor_max = None # default None
dur_factor_min = 0.1 # default None
ovlp_ratio_min = 0.3 # defaulkt None
remove_duplicates = True # dfault - False
inherit_metadata = True # default False
filter_deploymentID = False # default True

detec.filter_overlap_with(annot,
                          freq_ovp=freq_ovp,
                          dur_factor_max=dur_factor_max,
                          dur_factor_min=dur_factor_min,
示例#17
0
from ecosound.core.measurement import Measurement

# # ## netcdf folder
# netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test'
# annot4 = Annotation()
# annot4.from_netcdf(netcdf_files, verbose=True)
# print(len(annot4))

# # ## Load netcdf measurmeent folder from folder
# netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\Noise_dataset'
# outfile=r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\Noise_dataset\dataset_noise.nc'
# meas = Measurement()
# meas.from_netcdf(netcdf_files, verbose=True)
# print(len(meas))
# #meas.to_netcdf(outfile)

# # ## Load netcdf measurmeent folder from single file
netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\Full_dataset_with_metadata2\JASCOAMARHYDROPHONE742_20140913T115018.797Z.wav.nc'
meas = Measurement()
meas.from_netcdf(netcdf_files, verbose=True)
print(len(meas))

# # ## Load netcdf measurmeent folder from list of files
# netcdf_files = []
# netcdf_files.append(r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test2\67391492.181017121114.wav.nc")
# netcdf_files.append(r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test2\67391492.181017151114.wav.nc")
# netcdf_files.append(r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test2\67391492.181017181114.wav.nc")
# meas = Measurement()
# meas.from_netcdf(netcdf_files, verbose=True)
# print(len(meas))
# laod classif model
classif_model = pickle.load(open(classif_model_file, 'rb'))
features = classif_model['features']
model = classif_model['model']
Norm_mean = classif_model['normalization_mean']
Norm_std = classif_model['normalization_std']
classes_encoder = classif_model['classes']

# loops thrugh each file
files_list = os.listdir(indir)  # list of files
for file in files_list:
    if os.path.isfile(os.path.join(indir, file)) & file.endswith(file_ext):
        if os.path.isfile(os.path.join(outdir, file)) is False:
            # load file
            print(file)
            meas = Measurement()
            meas.from_netcdf(os.path.join(indir, file))
            # reclassify
            data = meas.data
            n1 = len(data)
            # drop observations/rows with NaNs
            data = data.replace([np.inf, -np.inf], np.nan)
            data.dropna(subset=features,
                        axis=0,
                        how='any',
                        thresh=None,
                        inplace=True)
            n2 = len(data)
            print('Deleted observations (due to NaNs): ' + str(n1 - n2))
            # Classification - predictions
            X = data[features]