def preprocess_array(array_idx, output_dir, total_duration): dataio = DataIO(dirname=output_dir, ch_grp=array_idx) fullchain_kargs = { 'duration': total_duration, 'preprocessor': { 'highpass_freq': 250., 'lowpass_freq': 3000., 'smooth_size': 0, 'common_ref_removal': True, 'chunksize': 32768, 'lostfront_chunksize': 0, 'signalpreprocessor_engine': 'numpy', } } cc = CatalogueConstructor(dataio=dataio, chan_grp=array_idx) p = {} p.update(fullchain_kargs['preprocessor']) cc.set_preprocessor_params(**p) # TODO offer noise esatimation duration somewhere noise_duration = min( 10., fullchain_kargs['duration'], dataio.get_segment_length(seg_num=0) / dataio.sample_rate * .99) # ~ print('noise_duration', noise_duration) t1 = time.perf_counter() cc.estimate_signals_noise(seg_num=0, duration=noise_duration) t2 = time.perf_counter() print('estimate_signals_noise', t2 - t1) t1 = time.perf_counter() cc.run_signalprocessor(duration=fullchain_kargs['duration'], detect_peak=False) t2 = time.perf_counter() print('run_signalprocessor', t2 - t1)
def compute_array_catalogue(array_idx, preprocess_dir, subject, recording_date, data_files, cluster_merge_threshold): # If data exists for this array if os.path.exists( os.path.join(preprocess_dir, 'channel_group_%d' % array_idx, 'catalogue_constructor')): output_dir = os.path.join(cfg['single_unit_spike_sorting_dir'], subject, recording_date, 'array_%d' % array_idx) if os.path.exists(output_dir): # remove is already exists shutil.rmtree(output_dir) # Compute total duration (want to use all data for clustering) data_file_names = [] for seg in range(len(data_files)): data_file_names.append( os.path.join(preprocess_dir, 'channel_group_%d' % array_idx, 'segment_%d' % seg, 'processed_signals.raw')) dataio = DataIO(dirname=output_dir) dataio.set_data_source(type='RawData', filenames=data_file_names, dtype='float32', sample_rate=cfg['intan_srate'], total_channel=cfg['n_channels_per_array']) dataio.datasource.bit_to_microVolt = 0.195 for ch_grp in range(cfg['n_channels_per_array']): dataio.add_one_channel_group(channels=[ch_grp], chan_grp=ch_grp) total_duration = np.sum([x['duration'] for x in data_files]) figure_out_dir = os.path.join(output_dir, 'figures') os.mkdir(figure_out_dir) for ch_grp in range(cfg['n_channels_per_array']): print(ch_grp) cc = CatalogueConstructor(dataio=DataIO(dirname=output_dir, ch_grp=ch_grp), chan_grp=ch_grp) fullchain_kargs = { 'duration': total_duration, 'preprocessor': { 'highpass_freq': None, 'lowpass_freq': None, 'smooth_size': 0, 'common_ref_removal': False, 'chunksize': 32768, 'lostfront_chunksize': 0, 'signalpreprocessor_engine': 'numpy', }, 'peak_detector': { 'peakdetector_engine': 'numpy', 'peak_sign': '-', 'relative_threshold': 2., 'peak_span': 0.0002, }, 'noise_snippet': { 'nb_snippet': 300, }, 'extract_waveforms': { 'n_left': -20, 'n_right': 30, 'mode': 'all', 'nb_max': 2000000, 'align_waveform': False, }, 'clean_waveforms': { 'alien_value_threshold': 100., }, } feat_method = 'pca_by_channel' feat_kargs = {'n_components_by_channel': 5} clust_method = 'sawchaincut' clust_kargs = { 'max_loop': 1000, 'nb_min': 20, 'break_nb_remain': 30, 'kde_bandwith': 0.01, 'auto_merge_threshold': 2., 'print_debug': False # 'max_loop': 1000, # 'nb_min': 20, # 'break_nb_remain': 30, # 'kde_bandwith': 0.01, # 'auto_merge_threshold': cluster_merge_threshold, # 'print_debug': False } p = {} p.update(fullchain_kargs['preprocessor']) p.update(fullchain_kargs['peak_detector']) cc.set_preprocessor_params(**p) noise_duration = min( 10., fullchain_kargs['duration'], dataio.get_segment_length(seg_num=0) / dataio.sample_rate * .99) # ~ print('noise_duration', noise_duration) t1 = time.perf_counter() cc.estimate_signals_noise(seg_num=0, duration=noise_duration) t2 = time.perf_counter() print('estimate_signals_noise', t2 - t1) t1 = time.perf_counter() cc.run_signalprocessor(duration=fullchain_kargs['duration']) t2 = time.perf_counter() print('run_signalprocessor', t2 - t1) t1 = time.perf_counter() cc.extract_some_waveforms(**fullchain_kargs['extract_waveforms']) t2 = time.perf_counter() print('extract_some_waveforms', t2 - t1) fname = 'chan_%d_init_waveforms.png' % ch_grp fig = plot_waveforms(np.squeeze(cc.some_waveforms).T) fig.savefig(os.path.join(figure_out_dir, fname)) fig.clf() plt.close() t1 = time.perf_counter() # ~ duration = d['duration'] if d['limit_duration'] else None # ~ d['clean_waveforms'] cc.clean_waveforms(**fullchain_kargs['clean_waveforms']) t2 = time.perf_counter() print('clean_waveforms', t2 - t1) fname = 'chan_%d_clean_waveforms.png' % ch_grp fig = plot_waveforms(np.squeeze(cc.some_waveforms).T) fig.savefig(os.path.join(figure_out_dir, fname)) fig.clf() plt.close() # ~ t1 = time.perf_counter() # ~ n_left, n_right = cc.find_good_limits(mad_threshold = 1.1,) # ~ t2 = time.perf_counter() # ~ print('find_good_limits', t2-t1) t1 = time.perf_counter() cc.extract_some_noise(**fullchain_kargs['noise_snippet']) t2 = time.perf_counter() print('extract_some_noise', t2 - t1) # Plot noise fname = 'chan_%d_noise.png' % ch_grp fig = plot_noise(cc) fig.savefig(os.path.join(figure_out_dir, fname)) fig.clf() plt.close() t1 = time.perf_counter() cc.extract_some_features(method=feat_method, **feat_kargs) t2 = time.perf_counter() print('project', t2 - t1) t1 = time.perf_counter() cc.find_clusters(method=clust_method, **clust_kargs) t2 = time.perf_counter() print('find_clusters', t2 - t1) # Remove empty clusters cc.trash_small_cluster(n=0) if cc.centroids_median is None: cc.compute_all_centroid() # order cluster by waveforms rms cc.order_clusters(by='waveforms_rms') fname = 'chan_%d_init_clusters.png' % ch_grp cluster_labels = cc.clusters['cluster_label'] fig = plot_cluster_waveforms(cc, cluster_labels) fig.savefig(os.path.join(figure_out_dir, fname)) fig.clf() plt.close() # save the catalogue cc.make_catalogue_for_peeler() gc.collect()