def run(standardized_path, standardized_params, whiten_filter, output_directory='tmp/', if_file_exists='skip', save_results=False): """Execute detect step Cat: THIS CODE KEEP TENSORFLOW OPEN FOR DETECTION AND THEN COMPUTES corrections post-detection Parameters ---------- standardized_path: str or pathlib.Path Path to standardized data binary file standardized_params: dict, str or pathlib.Path Dictionary with standardized data parameters or path to a yaml file channel_index: numpy.ndarray, str or pathlib.Path Channel index or path to a npy file whiten_filter: numpy.ndarray, str or pathlib.Path Whiten matrix or path to a npy file output_directory: str, optional Location to store partial results, relative to CONFIG.data.root_folder, defaults to tmp/ if_file_exists: str, optional One of 'overwrite', 'abort', 'skip'. Control de behavior for every generated file. If 'overwrite' it replaces the files if any exist, if 'abort' it raises a ValueError exception if any file exists, if 'skip' if skips the operation if any file exists save_results: bool, optional Whether to save results to disk, defaults to False Returns ------- clear_scores: numpy.ndarray (n_spikes, n_features, n_channels) 3D array with the scores for the clear spikes, first simension is the number of spikes, second is the nymber of features and third the number of channels spike_index_clear: numpy.ndarray (n_clear_spikes, 2) 2D array with indexes for clear spikes, first column contains the spike location in the recording and the second the main channel (channel whose amplitude is maximum) spike_index_call: numpy.ndarray (n_collided_spikes, 2) 2D array with indexes for all spikes, first column contains the spike location in the recording and the second the main channel (channel whose amplitude is maximum) Notes ----- Running the preprocessor will generate the followiing files in CONFIG.data.root_folder/output_directory/ (if save_results is True): * ``spike_index_clear.npy`` - Same as spike_index_clear returned * ``spike_index_all.npy`` - Same as spike_index_collision returned * ``rotation.npy`` - Rotation matrix for dimensionality reduction * ``scores_clear.npy`` - Scores for clear spikes Threshold detector runs on CPU, neural network detector runs CPU and GPU, depending on how tensorflow is configured. Examples -------- .. literalinclude:: ../../examples/pipeline/detect.py """ CONFIG = read_config() # load files in case they are strings or Path objects standardized_params = file_loader(standardized_params) whiten_filter = file_loader(whiten_filter) # run detection if CONFIG.detect.method == 'threshold': return run_threshold(standardized_path, standardized_params, whiten_filter, output_directory, if_file_exists, save_results) elif CONFIG.detect.method == 'nn': return run_neural_network(standardized_path, standardized_params, whiten_filter, output_directory, if_file_exists, save_results)
def run(spike_index, templates, output_directory='tmp/', recordings_filename='standarized.bin'): """Deconvolute spikes Parameters ---------- spike_index: numpy.ndarray (n_data, 2), str or pathlib.Path A 2D array for all potential spikes whose first column indicates the spike time and the second column the principal channels. Or path to npy file templates: numpy.ndarray (n_channels, waveform_size, n_templates), str or pathlib.Path A 3D array with the templates. Or path to npy file output_directory: str, optional Output directory (relative to CONFIG.data.root_folder) used to load the recordings to generate templates, defaults to tmp/ recordings_filename: str, optional Recordings filename (relative to CONFIG.data.root_folder/ output_directory) used to draw the waveforms from, defaults to standarized.bin Returns ------- spike_train: numpy.ndarray (n_clear_spikes, 2) A 2D array with the spike train, first column indicates the spike time and the second column the neuron ID Examples -------- .. literalinclude:: ../../examples/pipeline/deconvolute.py """ spike_index = file_loader(spike_index) templates = file_loader(templates) logger = logging.getLogger(__name__) # read config file CONFIG = read_config() # read recording recording_path = os.path.join(CONFIG.data.root_folder, output_directory, 'preprocess', recordings_filename) bp = BatchProcessor(recording_path, buffer_size=templates.shape[1]) logging.debug('Starting deconvolution. templates.shape: {}, ' 'spike_index.shape: {}'.format(templates.shape, spike_index.shape)) # run deconvolution algorithm n_rf = int(CONFIG.deconvolution.n_rf * CONFIG.recordings.sampling_rate / 1000) # run nn preprocess batch-wsie mc = bp.multi_channel_apply res = mc(deconvolve, mode='memory', cleanup_function=fix_indexes, pass_batch_info=True, templates=templates, spike_index=spike_index, spike_size=CONFIG.spike_size, n_explore=CONFIG.deconvolution.n_explore, n_rf=n_rf, upsample_factor=CONFIG.deconvolution.upsample_factor, threshold_a=CONFIG.deconvolution.threshold_a, threshold_dd=CONFIG.deconvolution.threshold_dd) spike_train = np.concatenate([element for element in res], axis=0) logger.debug('spike_train.shape: {}'.format(spike_train.shape)) # sort spikes by time spike_train = spike_train[np.argsort(spike_train[:, 0])] # save spike train path_to_spike_train = os.path.join(CONFIG.data.root_folder, output_directory, 'spike_train.npy') logger.info('Spike train saved in %s', path_to_spike_train) file_saver(spike_train, path_to_spike_train) return spike_train
def run(scores, spike_index, output_directory='tmp/', if_file_exists='skip', save_results=False): """Spike clustering Parameters ---------- scores: numpy.ndarray (n_spikes, n_features, n_channels), str or Path 3D array with the scores for the clear spikes, first simension is the number of spikes, second is the nymber of features and third the number of channels. Or path to a npy file spike_index: numpy.ndarray (n_clear_spikes, 2), str or Path 2D array with indexes for spikes, first column contains the spike location in the recording and the second the main channel (channel whose amplitude is maximum). Or path to an npy file output_directory: str, optional Location to store/look for the generate spike train, relative to CONFIG.data.root_folder if_file_exists: str, optional One of 'overwrite', 'abort', 'skip'. Control de behavior for the spike_train_cluster.npy. file If 'overwrite' it replaces the files if exists, if 'abort' it raises a ValueError exception if exists, if 'skip' it skips the operation if the file exists (and returns the stored file) save_results: bool, optional Whether to save spike train to disk (in CONFIG.data.root_folder/relative_to/spike_train_cluster.npy), defaults to False Returns ------- spike_train: (TODO add documentation) Examples -------- .. literalinclude:: ../../examples/pipeline/cluster.py """ # load files in case they are strings or Path objects scores = file_loader(scores) spike_index = file_loader(spike_index) CONFIG = read_config() startTime = datetime.datetime.now() Time = {'t': 0, 'c': 0, 'm': 0, 's': 0, 'e': 0} logger = logging.getLogger(__name__) scores_all = np.copy(scores) spike_index_all = np.copy(spike_index) ########## # Triage # ########## _b = datetime.datetime.now() logger.info("Randomly subsampling...") scores, spike_index = random_subsample(scores, spike_index, CONFIG.cluster.max_n_spikes) logger.info("Triaging...") scores, spike_index = triage(scores, spike_index, CONFIG.cluster.triage.nearest_neighbors, CONFIG.cluster.triage.percent, CONFIG.cluster.method == 'location') Time['t'] += (datetime.datetime.now() - _b).total_seconds() if CONFIG.cluster.method == 'location': ############## # Clustering # ############## _b = datetime.datetime.now() logger.info("Clustering...") vbParam, tmp_loc, scores, spike_index = run_cluster_location( scores, spike_index, CONFIG.cluster.min_spikes, CONFIG) Time['s'] += (datetime.datetime.now() - _b).total_seconds() else: ########### # Coreset # ########### _b = datetime.datetime.now() logger.info("Coresetting...") groups = coreset(scores, spike_index, CONFIG.cluster.coreset.clusters, CONFIG.cluster.coreset.threshold) Time['c'] += (datetime.datetime.now() - _b).total_seconds() ########### # Masking # ########### _b = datetime.datetime.now() logger.info("Masking...") masks = getmask(scores, spike_index, groups, CONFIG.cluster.masking_threshold) Time['m'] += (datetime.datetime.now() - _b).total_seconds() ############## # Clustering # ############## _b = datetime.datetime.now() logger.info("Clustering...") vbParam, tmp_loc, scores, spike_index = run_cluster( scores, masks, groups, spike_index, CONFIG.cluster.min_spikes, CONFIG) Time['s'] += (datetime.datetime.now() - _b).total_seconds() vbParam.rhat = calculate_sparse_rhat(vbParam, tmp_loc, scores_all, spike_index_all, CONFIG.neigh_channels) idx_keep = get_core_data(vbParam, scores_all, np.inf, 2) spike_train = vbParam.rhat[idx_keep] spike_train[:, 0] = spike_index_all[spike_train[:, 0].astype('int32'), 0] # report timing currentTime = datetime.datetime.now() logger.info("Mainprocess done in {0} seconds.".format( (currentTime - startTime).seconds)) logger.info("\ttriage:\t{0} seconds".format(Time['t'])) logger.info("\tcoreset:\t{0} seconds".format(Time['c'])) logger.info("\tmasking:\t{0} seconds".format(Time['m'])) logger.info("\tclustering:\t{0} seconds".format(Time['s'])) return spike_train, tmp_loc, vbParam
def run(spike_train, tmp_loc, output_directory='tmp/', recordings_filename='standarized.bin', if_file_exists='skip', save_results=True): """Compute templates Parameters ---------- spike_train: numpy.ndarray, str or pathlib.Path Spike train from cluster step or path to npy file tmp_loc: np.array(n_templates) At which channel the clustering is done. output_directory: str, optional Output directory (relative to CONFIG.data.root_folder) used to load the recordings to generate templates, defaults to tmp/ recordings_filename: str, optional Recordings filename (relative to CONFIG.data.root_folder/ output_directory) used to generate the templates, defaults to standarized.bin if_file_exists: str, optional One of 'overwrite', 'abort', 'skip'. Control de behavior for the templates.npy. file If 'overwrite' it replaces the files if exists, if 'abort' it raises a ValueError exception if exists, if 'skip' it skips the operation if the file exists (and returns the stored file) save_results: bool, optional Whether to templates to disk (in CONFIG.data.root_folder/relative_to/templates.npy), defaults to False Returns ------- templates: npy.ndarray templates spike_train: np.array(n_data, 3) The 3 columns represent spike time, unit id, weight (from soft assignment) groups: list(n_units) After template merge, it shows which ones are merged together idx_good_templates: np.array index of which templates are kept after clean up Examples -------- .. literalinclude:: ../../examples/pipeline/templates.py """ spike_train = file_loader(spike_train) CONFIG = read_config() startTime = datetime.datetime.now() Time = {'t': 0, 'c': 0, 'm': 0, 's': 0, 'e': 0} logger = logging.getLogger(__name__) _b = datetime.datetime.now() logger.info("Getting Templates...") path_to_recordings = os.path.join(CONFIG.data.root_folder, output_directory, 'preprocess', recordings_filename) # relevant parameters merge_threshold = CONFIG.templates.merge_threshold spike_size = CONFIG.spike_size template_max_shift = CONFIG.templates.max_shift neighbors = CONFIG.neigh_channels geometry = CONFIG.geom # make templates templates, weights = get_templates(spike_train, path_to_recordings, CONFIG.resources.max_memory, 2 * (spike_size + template_max_shift)) # clean up bad templates snr_threshold = 2 spread_threshold = 100 templates, weights, spike_train, idx_good_templates = clean_up_templates( templates, weights, spike_train, tmp_loc, geometry, neighbors, snr_threshold, spread_threshold) # align templates templates = align_templates(templates, template_max_shift) # merge templates templates, spike_train, groups = merge_templates(templates, weights, spike_train, neighbors, template_max_shift, merge_threshold) # remove the edge since it is bad templates = templates[:, template_max_shift:(template_max_shift + (4 * spike_size + 1))] Time['e'] += (datetime.datetime.now() - _b).total_seconds() # report timing currentTime = datetime.datetime.now() logger.info("Templates done in {0} seconds.".format( (currentTime - startTime).seconds)) return templates, spike_train, groups, idx_good_templates
def run(standarized_path, standarized_params, channel_index, whiten_filter, output_directory='tmp/', if_file_exists='skip', save_results=False, gmm_params=None): """Execute detect step Parameters ---------- standarized_path: str or pathlib.Path Path to standarized data binary file standarized_params: dict, str or pathlib.Path Dictionary with standarized data parameters or path to a yaml file channel_index: numpy.ndarray, str or pathlib.Path Channel index or path to a npy file whiten_filter: numpy.ndarray, str or pathlib.Path Whiten matrix or path to a npy file output_directory: str, optional Location to store partial results, relative to CONFIG.data.root_folder, defaults to tmp/ if_file_exists: str, optional One of 'overwrite', 'abort', 'skip'. Control de behavior for every generated file. If 'overwrite' it replaces the files if any exist, if 'abort' it raises a ValueError exception if any file exists, if 'skip' if skips the operation if any file exists save_results: bool, optional Whether to save results to disk, defaults to False Returns ------- clear_scores: numpy.ndarray (n_spikes, n_features, n_channels) 3D array with the scores for the clear spikes, first simension is the number of spikes, second is the nymber of features and third the number of channels spike_index_clear: numpy.ndarray (n_clear_spikes, 2) 2D array with indexes for clear spikes, first column contains the spike location in the recording and the second the main channel (channel whose amplitude is maximum) spike_index_call: numpy.ndarray (n_collided_spikes, 2) 2D array with indexes for all spikes, first column contains the spike location in the recording and the second the main channel (channel whose amplitude is maximum) gmm_params is a dictionary with the following keys (use None to default values): gmtype: str One of 'idist', 'iinf' or 'ipeak'. Controls which separability metric to use. See [1] for further details. max_samples: int, optional Define the number of samples that will be used to compute the Gaussian mixture model (GMM) replicates: int, optional The number of replicates of the GMM max_iter: int, optional Define the maximum number of iterations in the GMMs n_components: int, optional Define the number of Gaussians in the model use_channel_features: bool, optional If True, computes the GMMs using only the main samples for each channel, as implemented in the original PCA. If False, computes the GMMs using all the samples. Notes ----- Running the preprocessor will generate the followiing files in CONFIG.data.root_folder/output_directory/ (if save_results is True): * ``spike_index_clear.npy`` - Same as spike_index_clear returned * ``spike_index_all.npy`` - Same as spike_index_collision returned * ``rotation.npy`` - Rotation matrix for dimensionality reduction * ``scores_clear.npy`` - Scores for clear spikes Threshold detector runs on CPU, neural network detector runs CPU and GPU, depending on how tensorflow is configured. Examples -------- .. literalinclude:: ../../examples/pipeline/detect.py """ if gmm_params == None: gmm_params = {'gmtype': 'idist'} #logger = logging.getLogger('yass.__main__') CONFIG = read_config() # load files in case they are strings or Path objects standarized_params = file_loader(standarized_params) channel_index = file_loader(channel_index) whiten_filter = file_loader(whiten_filter) # run detection if CONFIG.detect.method == 'threshold': return run_threshold(standarized_path, standarized_params, channel_index, whiten_filter, output_directory, if_file_exists, save_results, gmm_params) elif CONFIG.detect.method == 'nn': return run_neural_network(standarized_path, standarized_params, channel_index, whiten_filter, output_directory, if_file_exists, save_results)