def test_can_read_waveform(path_to_data_folder): e = RecordingExplorer(os.path.join(path_to_data_folder, 'filtered.bin'), spike_size=15, dtype='float32', n_channels=10, data_order='channels', loader='array') with pytest.raises(ValueError): e.read_waveform(time=0)
def test_can_read_waveform(path_to_standardized_data): spike_size = 15 e = RecordingExplorer(path_to_standardized_data, spike_size=spike_size, dtype='float32', n_channels=10, data_order='channels', loader='array') assert len(e.read_waveform(time=100)) == 2 * spike_size + 1
def test_error_raised_if_cannot_read_complete_waveform(path_to_data_folder): spike_size = 15 e = RecordingExplorer(os.path.join(path_to_data_folder, 'filtered.bin'), spike_size=spike_size, dtype='float32', n_channels=10, data_order='channels', loader='array') assert len(e.read_waveform(time=100)) == 2 * spike_size + 1
def test_returns_empty_if_cannot_get_complete_wf(path_to_standarized_data): e = RecordingExplorer(path_to_standarized_data, spike_size=15, dtype='float32', n_channels=10, data_order='channels', loader='array') assert len(e.read_waveform(time=0)) == 0
def test_can_read_waveform(path_to_standarized_data): e = RecordingExplorer(path_to_standarized_data, spike_size=15, dtype='float32', n_channels=10, data_order='channels', loader='array') with pytest.raises(ValueError): e.read_waveform(time=0)
def test_can_use_detect_and_triage_after_reload(path_to_tests, path_to_sample_pipeline_folder, tmp_folder, path_to_standarized_data): yass.set_config(path.join(path_to_tests, 'config_nnet.yaml')) CONFIG = yass.read_config() (x_detect, y_detect, x_triage, y_triage, x_ae, y_ae) = make_training_data(CONFIG, spike_train, chosen_templates, min_amplitude, n_spikes, path_to_sample_pipeline_folder) _, waveform_length, n_neighbors = x_detect.shape path_to_model = path.join(tmp_folder, 'detect-net.ckpt') detector = NeuralNetDetector(path_to_model, filters, waveform_length, n_neighbors, threshold=0.5, channel_index=CONFIG.channel_index, n_iter=10) detector.fit(x_detect, y_detect) detector = NeuralNetDetector.load(path_to_model, threshold=0.5, channel_index=CONFIG.channel_index) triage = NeuralNetTriage(path_to_model, filters, waveform_length, n_neighbors, threshold=0.5, n_iter=10) triage.fit(x_detect, y_detect) triage = NeuralNetTriage.load(path_to_model, threshold=0.5) data = RecordingExplorer(path_to_standarized_data).reader.data output_names = ('spike_index', 'waveform', 'probability') (spike_index, waveform, proba) = detector.predict(data, output_names=output_names) triage.predict(waveform[:, :, :n_neighbors])
def test_can_use_detector_after_fit(path_to_config, path_to_sample_pipeline_folder, make_tmp_folder, path_to_standardized_data): yass.set_config(path_to_config, make_tmp_folder) CONFIG = yass.read_config() spike_train = np.load(path.join(path_to_sample_pipeline_folder, 'spike_train.npy')) chosen_templates = np.unique(spike_train[:, 1]) min_amplitude = 4 max_amplitude = 60 n_spikes_to_make = 100 templates = make.load_templates(path_to_sample_pipeline_folder, spike_train, CONFIG, chosen_templates) path_to_standardized = path.join(path_to_sample_pipeline_folder, 'preprocess', 'standarized.bin') (x_detect, y_detect, x_triage, y_triage, x_ae, y_ae) = make.training_data(CONFIG, templates, min_amplitude, max_amplitude, n_spikes_to_make, path_to_standardized) _, waveform_length, n_neighbors = x_detect.shape path_to_model = path.join(make_tmp_folder, 'detect-net.ckpt') detector = NeuralNetDetector(path_to_model, [8, 4], waveform_length, n_neighbors, threshold=0.5, channel_index=CONFIG.channel_index, n_iter=10) detector.fit(x_detect, y_detect) data = RecordingExplorer(path_to_standardized_data).reader.data output_names = ('spike_index', 'waveform', 'probability') (spike_index, waveform, proba) = detector.predict_recording(data, output_names=output_names) detector.predict(x_detect)
def run(config, logger_level='INFO', clean=False, output_dir='tmp/', complete=False, set_zero_seed=False): """Run YASS built-in pipeline Parameters ---------- config: str or mapping (such as dictionary) Path to YASS configuration file or mapping object logger_level: str Logger level clean: bool, optional Delete CONFIG.data.root_folder/output_dir/ before running output_dir: str, optional Output directory (if relative, it makes it relative to CONFIG.data.root_folder) to store the output data, defaults to tmp/. If absolute, it leaves it as it is. complete: bool, optional Generates extra files (needed to generate phy files) Notes ----- Running the preprocessor will generate the followiing files in CONFIG.data.root_folder/output_directory/: * ``config.yaml`` - Copy of the configuration file * ``metadata.yaml`` - Experiment metadata * ``filtered.bin`` - Filtered recordings (from preprocess) * ``filtered.yaml`` - Filtered recordings metadata (from preprocess) * ``standardized.bin`` - Standarized recordings (from preprocess) * ``standardized.yaml`` - Standarized recordings metadata (from preprocess) * ``whitening.npy`` - Whitening filter (from preprocess) Returns ------- numpy.ndarray Spike train """ # load yass configuration parameters set_config(config, output_dir) CONFIG = read_config() TMP_FOLDER = CONFIG.path_to_output_directory # remove tmp folder if needed if os.path.exists(TMP_FOLDER) and clean: shutil.rmtree(TMP_FOLDER) # create TMP_FOLDER if needed if not os.path.exists(TMP_FOLDER): os.makedirs(TMP_FOLDER) # load logging config file logging_config = load_logging_config_file() logging_config['handlers']['file']['filename'] = path.join( TMP_FOLDER, 'yass.log') logging_config['root']['level'] = logger_level # configure logging logging.config.dictConfig(logging_config) # instantiate logger logger = logging.getLogger(__name__) # print yass version logger.info('YASS version: %s', yass.__version__) ''' ********************************************** ******** SET ENVIRONMENT VARIABLES *********** ********************************************** ''' os.environ["OPENBLAS_NUM_THREADS"] = "1" os.environ["MKL_NUM_THREADS"] = "1" os.environ["GIO_EXTRA_MODULES"] = "/usr/lib/x86_64-linux-gnu/gio/modules/" ''' ********************************************** ************** PREPROCESS ******************** ********************************************** ''' # preprocess start = time.time() (standardized_path, standardized_params, whiten_filter) = (preprocess.run( if_file_exists=CONFIG.preprocess.if_file_exists)) time_preprocess = time.time() - start ''' ********************************************** ************** DETECT EVENTS ***************** ********************************************** ''' # detect # Cat: This code now runs with open tensorflow calls start = time.time() (spike_index_all) = detect.run(standardized_path, standardized_params, whiten_filter, if_file_exists=CONFIG.detect.if_file_exists, save_results=CONFIG.detect.save_results) spike_index_clear = None time_detect = time.time() - start ''' ********************************************** ***************** CLUSTER ******************** ********************************************** ''' # cluster start = time.time() path_to_spike_train_cluster = path.join(TMP_FOLDER, 'spike_train_cluster.npy') if os.path.exists(path_to_spike_train_cluster) == False: cluster.run(spike_index_clear, spike_index_all) else: print("\nClustering completed previously...\n\n") spike_train_cluster = np.load(path_to_spike_train_cluster) templates_cluster = np.load( os.path.join(TMP_FOLDER, 'templates_cluster.npy')) time_cluster = time.time() - start #print ("Spike train clustered: ", spike_index_cluster.shape, "spike train clear: ", # spike_train_clear.shape, " templates: ", templates.shape) ''' ********************************************** ************** DECONVOLUTION ***************** ********************************************** ''' # run deconvolution start = time.time() spike_train, postdeconv_templates = deconvolve.run(spike_train_cluster, templates_cluster) time_deconvolution = time.time() - start # save spike train path_to_spike_train = path.join(TMP_FOLDER, 'spike_train_post_deconv_post_merge.npy') np.save(path_to_spike_train, spike_train) logger.info('Spike train saved in: {}'.format(path_to_spike_train)) # save template path_to_templates = path.join(TMP_FOLDER, 'templates_post_deconv_post_merge.npy') np.save(path_to_templates, postdeconv_templates) logger.info('Templates saved in: {}'.format(path_to_templates)) ''' ********************************************** ************** POST PROCESSING**************** ********************************************** ''' # save metadata in tmp path_to_metadata = path.join(TMP_FOLDER, 'metadata.yaml') logging.info('Saving metadata in {}'.format(path_to_metadata)) save_metadata(path_to_metadata) # save metadata in tmp path_to_metadata = path.join(TMP_FOLDER, 'metadata.yaml') logging.info('Saving metadata in {}'.format(path_to_metadata)) save_metadata(path_to_metadata) # save config.yaml copy in tmp/ path_to_config_copy = path.join(TMP_FOLDER, 'config.yaml') if isinstance(config, Mapping): with open(path_to_config_copy, 'w') as f: yaml.dump(config, f, default_flow_style=False) else: shutil.copy2(config, path_to_config_copy) logging.info('Saving copy of config: {} in {}'.format( config, path_to_config_copy)) # this part loads waveforms for all spikes in the spike train and scores # them, this data is needed to later generate phy files if complete: STANDARIZED_PATH = path.join(TMP_FOLDER, 'standardized.bin') PARAMS = load_yaml(path.join(TMP_FOLDER, 'standardized.yaml')) # load waveforms for all spikes in the spike train logger.info('Loading waveforms from all spikes in the spike train...') explorer = RecordingExplorer(STANDARIZED_PATH, spike_size=CONFIG.spike_size, dtype=PARAMS['dtype'], n_channels=PARAMS['n_channels'], data_order=PARAMS['data_order']) waveforms = explorer.read_waveforms(spike_train[:, 0]) path_to_waveforms = path.join(TMP_FOLDER, 'spike_train_waveforms.npy') np.save(path_to_waveforms, waveforms) logger.info('Saved all waveforms from the spike train in {}...'.format( path_to_waveforms)) # score all waveforms logger.info('Scoring waveforms from all spikes in the spike train...') path_to_rotation = path.join(TMP_FOLDER, 'rotation.npy') rotation = np.load(path_to_rotation) main_channels = explorer.main_channel_for_waveforms(waveforms) path_to_main_channels = path.join(TMP_FOLDER, 'waveforms_main_channel.npy') np.save(path_to_main_channels, main_channels) logger.info('Saved all waveforms main channels in {}...'.format( path_to_waveforms)) waveforms_score = dim_red.score(waveforms, rotation, main_channels, CONFIG.neigh_channels, CONFIG.geom) path_to_waveforms_score = path.join(TMP_FOLDER, 'waveforms_score.npy') np.save(path_to_waveforms_score, waveforms_score) logger.info('Saved all scores in {}...'.format(path_to_waveforms)) # score templates # TODO: templates should be returned in the right shape to avoid .T templates_ = templates.T main_channels_tmpls = explorer.main_channel_for_waveforms(templates_) path_to_templates_main_c = path.join(TMP_FOLDER, 'templates_main_channel.npy') np.save(path_to_templates_main_c, main_channels_tmpls) logger.info('Saved all templates main channels in {}...'.format( path_to_templates_main_c)) templates_score = dim_red.score(templates_, rotation, main_channels_tmpls, CONFIG.neigh_channels, CONFIG.geom) path_to_templates_score = path.join(TMP_FOLDER, 'templates_score.npy') np.save(path_to_templates_score, templates_score) logger.info( 'Saved all templates scores in {}...'.format(path_to_waveforms)) logger.info('Finished YASS execution. Timing summary:') total = (time_preprocess + time_detect + time_cluster + time_deconvolution) logger.info('\t Preprocess: %s (%.2f %%)', human_readable_time(time_preprocess), time_preprocess / total * 100) logger.info('\t Detection: %s (%.2f %%)', human_readable_time(time_detect), time_detect / total * 100) logger.info('\t Clustering: %s (%.2f %%)', human_readable_time(time_cluster), time_cluster / total * 100) logger.info('\t Deconvolution: %s (%.2f %%)', human_readable_time(time_deconvolution), time_deconvolution / total * 100) return spike_train
def _run_pipeline(config, output_file, logger_level='INFO', clean=True, output_dir='tmp/', complete=False): """ Run the entire pipeline given a path to a config file and output path """ # load yass configuration parameters set_config(config) CONFIG = read_config() ROOT_FOLDER = CONFIG.data.root_folder TMP_FOLDER = path.join(ROOT_FOLDER, output_dir) # remove tmp folder if needed if os.path.exists(TMP_FOLDER) and clean: shutil.rmtree(TMP_FOLDER) # create TMP_FOLDER if needed if not os.path.exists(TMP_FOLDER): os.makedirs(TMP_FOLDER) # load logging config file logging_config = load_logging_config_file() logging_config['handlers']['file']['filename'] = path.join( TMP_FOLDER, 'yass.log') logging_config['root']['level'] = logger_level # configure logging logging.config.dictConfig(logging_config) # instantiate logger logger = logging.getLogger(__name__) # run preprocessor (score, spike_index_clear, spike_index_collision) = preprocess.run(output_directory=output_dir) # run processor (spike_train_clear, templates, spike_index_collision) = process.run(score, spike_index_clear, spike_index_collision, output_directory=output_dir) # run deconvolution spike_train = deconvolute.run(spike_train_clear, templates, spike_index_collision, output_directory=output_dir) # save metadata in tmp path_to_metadata = path.join(TMP_FOLDER, 'metadata.yaml') logging.info('Saving metadata in {}'.format(path_to_metadata)) save_metadata(path_to_metadata) # save config.yaml copy in tmp/ path_to_config_copy = path.join(TMP_FOLDER, 'config.yaml') shutil.copy2(config, path_to_config_copy) logging.info('Saving copy of config: {} in {}'.format( config, path_to_config_copy)) # save templates path_to_templates = path.join(TMP_FOLDER, 'templates.npy') logging.info('Saving templates in {}'.format(path_to_templates)) np.save(path_to_templates, templates) path_to_spike_train = path.join(TMP_FOLDER, output_file) np.save(path_to_spike_train, spike_train) logger.info('Spike train saved in: {}'.format(path_to_spike_train)) # this part loads waveforms for all spikes in the spike train and scores # them, this data is needed to later generate phy files if complete: STANDARIZED_PATH = path.join(TMP_FOLDER, 'standarized.bin') PARAMS = load_yaml(path.join(TMP_FOLDER, 'standarized.yaml')) # load waveforms for all spikes in the spike train logger.info('Loading waveforms from all spikes in the spike train...') explorer = RecordingExplorer(STANDARIZED_PATH, spike_size=CONFIG.spikeSize, dtype=PARAMS['dtype'], n_channels=PARAMS['n_channels'], data_format=PARAMS['data_format']) waveforms = explorer.read_waveforms(spike_train[:, 0]) path_to_waveforms = path.join(TMP_FOLDER, 'spike_train_waveforms.npy') np.save(path_to_waveforms, waveforms) logger.info('Saved all waveforms from the spike train in {}...'.format( path_to_waveforms)) # score all waveforms logger.info('Scoring waveforms from all spikes in the spike train...') path_to_rotation = path.join(TMP_FOLDER, 'rotation.npy') rotation = np.load(path_to_rotation) main_channels = explorer.main_channel_for_waveforms(waveforms) path_to_main_channels = path.join(TMP_FOLDER, 'waveforms_main_channel.npy') np.save(path_to_main_channels, main_channels) logger.info('Saved all waveforms main channels in {}...'.format( path_to_waveforms)) waveforms_score = dim_red.score(waveforms, rotation, main_channels, CONFIG.neighChannels, CONFIG.geom) path_to_waveforms_score = path.join(TMP_FOLDER, 'waveforms_score.npy') np.save(path_to_waveforms_score, waveforms_score) logger.info('Saved all scores in {}...'.format(path_to_waveforms)) # score templates # TODO: templates should be returned in the right shape to avoid .T templates_ = templates.T main_channels_tmpls = explorer.main_channel_for_waveforms(templates_) path_to_templates_main_c = path.join(TMP_FOLDER, 'templates_main_channel.npy') np.save(path_to_templates_main_c, main_channels_tmpls) logger.info('Saved all templates main channels in {}...'.format( path_to_templates_main_c)) templates_score = dim_red.score(templates_, rotation, main_channels_tmpls, CONFIG.neighChannels, CONFIG.geom) path_to_templates_score = path.join(TMP_FOLDER, 'templates_score.npy') np.save(path_to_templates_score, templates_score) logger.info( 'Saved all templates scores in {}...'.format(path_to_waveforms))
def _neural_network_detection(standarized_path, standarized_params, n_observations, output_directory): """Run neural network detection and autoencoder dimensionality reduction """ logger = logging.getLogger(__name__) CONFIG = read_config() OUTPUT_DTYPE = CONFIG.preprocess.dtype TMP_FOLDER = os.path.join(CONFIG.data.root_folder, output_directory) # detect spikes bp = BatchProcessor(standarized_path, standarized_params['dtype'], standarized_params['n_channels'], standarized_params['data_format'], CONFIG.resources.max_memory, buffer_size=0) # check if all scores, clear and collision spikes exist.. path_to_score = os.path.join(TMP_FOLDER, 'score_clear.npy') path_to_spike_index_clear = os.path.join(TMP_FOLDER, 'spike_index_clear.npy') path_to_spike_index_collision = os.path.join(TMP_FOLDER, 'spike_index_collision.npy') if all([ os.path.exists(path_to_score), os.path.exists(path_to_spike_index_clear), os.path.exists(path_to_spike_index_collision) ]): logger.info('Loading "{}", "{}" and "{}"'.format( path_to_score, path_to_spike_index_clear, path_to_spike_index_collision)) scores = np.load(path_to_score) clear = np.load(path_to_spike_index_clear) collision = np.load(path_to_spike_index_collision) else: logger.info('One or more of "{}", "{}" or "{}" files were missing, ' 'computing...'.format(path_to_score, path_to_spike_index_clear, path_to_spike_index_collision)) # apply threshold detector on standarized data autoencoder_filename = CONFIG.neural_network_autoencoder.filename mc = bp.multi_channel_apply res = mc( neuralnetwork.nn_detection, mode='memory', cleanup_function=neuralnetwork.fix_indexes, neighbors=CONFIG.neighChannels, geom=CONFIG.geom, temporal_features=CONFIG.spikes.temporal_features, # FIXME: what is this? temporal_window=3, th_detect=CONFIG.neural_network_detector.threshold_spike, th_triage=CONFIG.neural_network_triage.threshold_collision, detector_filename=CONFIG.neural_network_detector.filename, autoencoder_filename=autoencoder_filename, triage_filename=CONFIG.neural_network_triage.filename) # save clear spikes clear = np.concatenate([element[1] for element in res], axis=0) logger.info('Removing clear indexes outside the allowed range to ' 'draw a complete waveform...') clear, idx = detect.remove_incomplete_waveforms( clear, CONFIG.spikeSize + CONFIG.templatesMaxShift, n_observations) np.save(path_to_spike_index_clear, clear) logger.info('Saved spike index clear in {}...'.format( path_to_spike_index_clear)) # save collided spikes collision = np.concatenate([element[2] for element in res], axis=0) logger.info('Removing collision indexes outside the allowed range to ' 'draw a complete waveform...') collision, _ = detect.remove_incomplete_waveforms( collision, CONFIG.spikeSize + CONFIG.templatesMaxShift, n_observations) np.save(path_to_spike_index_collision, collision) logger.info('Saved spike index collision in {}...'.format( path_to_spike_index_collision)) if CONFIG.clustering.clustering_method == 'location': ####################### # Waveform extraction # ####################### # TODO: what should the behaviour be for spike indexes that are # when starting/ending the recordings and it is not possible to # draw a complete waveform? logger.info('Computing whitening matrix...') bp = BatchProcessor(standarized_path, standarized_params['dtype'], standarized_params['n_channels'], standarized_params['data_format'], CONFIG.resources.max_memory) batches = bp.multi_channel() first_batch, _, _ = next(batches) Q = whiten.matrix(first_batch, CONFIG.neighChannels, CONFIG.spikeSize) path_to_whitening_matrix = os.path.join(TMP_FOLDER, 'whitening.npy') np.save(path_to_whitening_matrix, Q) logger.info('Saved whitening matrix in {}'.format( path_to_whitening_matrix)) # apply whitening to every batch (whitened_path, whitened_params) = bp.multi_channel_apply( np.matmul, mode='disk', output_path=os.path.join(TMP_FOLDER, 'whitened.bin'), if_file_exists='skip', cast_dtype=OUTPUT_DTYPE, b=Q) main_channel = clear[:, 1] # load and dump waveforms from clear spikes path_to_waveforms_clear = os.path.join(TMP_FOLDER, 'waveforms_clear.npy') if os.path.exists(path_to_waveforms_clear): logger.info( 'Found clear waveforms in {}, loading them...'.format( path_to_waveforms_clear)) waveforms_clear = np.load(path_to_waveforms_clear) else: logger.info( 'Did not find clear waveforms in {}, reading them from {}'. format(path_to_waveforms_clear, whitened_path)) explorer = RecordingExplorer(whitened_path, spike_size=CONFIG.spikeSize) waveforms_clear = explorer.read_waveforms(clear[:, 0], 'all') np.save(path_to_waveforms_clear, waveforms_clear) logger.info('Saved waveform from clear spikes in: {}'.format( path_to_waveforms_clear)) main_channel = clear[:, 1] # save rotation detector_filename = CONFIG.neural_network_detector.filename autoencoder_filename = CONFIG.neural_network_autoencoder.filename rotation = neuralnetwork.load_rotation(detector_filename, autoencoder_filename) path_to_rotation = os.path.join(TMP_FOLDER, 'rotation.npy') logger.info("rotation_matrix_shape = {}".format(rotation.shape)) np.save(path_to_rotation, rotation) logger.info( 'Saved rotation matrix in {}...'.format(path_to_rotation)) logger.info('Denoising...') path_to_denoised_waveforms = os.path.join( TMP_FOLDER, 'denoised_waveforms.npy') if os.path.exists(path_to_denoised_waveforms): logger.info( 'Found denoised waveforms in {}, loading them...'.format( path_to_denoised_waveforms)) denoised_waveforms = np.load(path_to_denoised_waveforms) else: logger.info( 'Did not find denoised waveforms in {}, evaluating them' 'from {}'.format(path_to_denoised_waveforms, path_to_waveforms_clear)) waveforms_clear = np.load(path_to_waveforms_clear) denoised_waveforms = dim_red.denoise(waveforms_clear, rotation, CONFIG) logger.info('Saving denoised waveforms to {}'.format( path_to_denoised_waveforms)) np.save(path_to_denoised_waveforms, denoised_waveforms) isolated_index, x, y = get_isolated_spikes_and_locations( denoised_waveforms, main_channel, CONFIG) x = (x - np.mean(x)) / np.std(x) y = (y - np.mean(y)) / np.std(y) corrupted_index = np.logical_not( np.in1d(np.arange(clear.shape[0]), isolated_index)) collision = np.concatenate([collision, clear[corrupted_index]], axis=0) clear = clear[isolated_index] waveforms_clear = waveforms_clear[isolated_index] ################################################# # Dimensionality reduction (Isolated Waveforms) # ################################################# scores = dim_red.main_channel_scores(waveforms_clear, rotation, clear, CONFIG) scores = (scores - np.mean(scores, axis=0)) / np.std(scores) scores = np.concatenate([ x[:, np.newaxis, np.newaxis], y[:, np.newaxis, np.newaxis], scores[:, :, np.newaxis] ], axis=1) else: # save scores scores = np.concatenate([element[0] for element in res], axis=0) logger.info( 'Removing scores for indexes outside the allowed range to ' 'draw a complete waveform...') scores = scores[idx] # compute Q for whitening logger.info('Computing whitening matrix...') bp = BatchProcessor(standarized_path, standarized_params['dtype'], standarized_params['n_channels'], standarized_params['data_format'], CONFIG.resources.max_memory) batches = bp.multi_channel() first_batch, _, _ = next(batches) Q = whiten.matrix_localized(first_batch, CONFIG.neighChannels, CONFIG.geom, CONFIG.spikeSize) path_to_whitening_matrix = os.path.join(TMP_FOLDER, 'whitening.npy') np.save(path_to_whitening_matrix, Q) logger.info('Saved whitening matrix in {}'.format( path_to_whitening_matrix)) scores = whiten.score(scores, clear[:, 1], Q) np.save(path_to_score, scores) logger.info('Saved spike scores in {}...'.format(path_to_score)) # save rotation detector_filename = CONFIG.neural_network_detector.filename autoencoder_filename = CONFIG.neural_network_autoencoder.filename rotation = neuralnetwork.load_rotation(detector_filename, autoencoder_filename) path_to_rotation = os.path.join(TMP_FOLDER, 'rotation.npy') np.save(path_to_rotation, rotation) logger.info( 'Saved rotation matrix in {}...'.format(path_to_rotation)) np.save(path_to_score, scores) logger.info('Saved spike scores in {}...'.format(path_to_score)) return scores, clear, collision
def _threshold_detection(standarized_path, standarized_params, n_observations, output_directory): """Run threshold detector and dimensionality reduction using PCA """ logger = logging.getLogger(__name__) CONFIG = read_config() OUTPUT_DTYPE = CONFIG.preprocess.dtype TMP_FOLDER = os.path.join(CONFIG.data.root_folder, output_directory) ############### # Whiten data # ############### # compute Q for whitening logger.info('Computing whitening matrix...') bp = BatchProcessor(standarized_path, standarized_params['dtype'], standarized_params['n_channels'], standarized_params['data_format'], CONFIG.resources.max_memory) batches = bp.multi_channel() first_batch, _, _ = next(batches) Q = whiten.matrix(first_batch, CONFIG.neighChannels, CONFIG.spikeSize) path_to_whitening_matrix = os.path.join(TMP_FOLDER, 'whitening.npy') np.save(path_to_whitening_matrix, Q) logger.info( 'Saved whitening matrix in {}'.format(path_to_whitening_matrix)) # apply whitening to every batch (whitened_path, whitened_params) = bp.multi_channel_apply( np.matmul, mode='disk', output_path=os.path.join(TMP_FOLDER, 'whitened.bin'), if_file_exists='skip', cast_dtype=OUTPUT_DTYPE, b=Q) ################### # Spike detection # ################### path_to_spike_index_clear = os.path.join(TMP_FOLDER, 'spike_index_clear.npy') bp = BatchProcessor(standarized_path, standarized_params['dtype'], standarized_params['n_channels'], standarized_params['data_format'], CONFIG.resources.max_memory, buffer_size=0) # clear spikes if os.path.exists(path_to_spike_index_clear): # if it exists, load it... logger.info('Found file in {}, loading it...'.format( path_to_spike_index_clear)) spike_index_clear = np.load(path_to_spike_index_clear) else: # if it doesn't, detect spikes... logger.info('Did not find file in {}, finding spikes using threshold' ' detector...'.format(path_to_spike_index_clear)) # apply threshold detector on standarized data spikes = bp.multi_channel_apply(detect.threshold, mode='memory', cleanup_function=detect.fix_indexes, neighbors=CONFIG.neighChannels, spike_size=CONFIG.spikeSize, std_factor=CONFIG.stdFactor) spike_index_clear = np.vstack(spikes) logger.info('Removing clear indexes outside the allowed range to ' 'draw a complete waveform...') spike_index_clear, _ = (detect.remove_incomplete_waveforms( spike_index_clear, CONFIG.spikeSize + CONFIG.templatesMaxShift, n_observations)) logger.info('Saving spikes in {}...'.format(path_to_spike_index_clear)) np.save(path_to_spike_index_clear, spike_index_clear) path_to_spike_index_collision = os.path.join(TMP_FOLDER, 'spike_index_collision.npy') # collided spikes if os.path.exists(path_to_spike_index_collision): # if it exists, load it... logger.info('Found collided spikes in {}, loading them...'.format( path_to_spike_index_collision)) spike_index_collision = np.load(path_to_spike_index_collision) if spike_index_collision.shape[0] != 0: raise ValueError('Found non-empty collision spike index in {}, ' 'but threshold detector is selected, collision ' 'detection is not implemented for threshold ' 'detector so array must have dimensios (0, 2) ' 'but had ({}, {})'.format( path_to_spike_index_collision, *spike_index_collision.shape)) else: # triage is not implemented on threshold detector, return empty array logger.info('Creating empty array for' ' collided spikes (collision detection is not implemented' ' with threshold detector. Saving them in {}'.format( path_to_spike_index_collision)) spike_index_collision = np.zeros((0, 2), 'int32') np.save(path_to_spike_index_collision, spike_index_collision) ####################### # Waveform extraction # ####################### # load and dump waveforms from clear spikes path_to_waveforms_clear = os.path.join(TMP_FOLDER, 'waveforms_clear.npy') if os.path.exists(path_to_waveforms_clear): logger.info('Found clear waveforms in {}, loading them...'.format( path_to_waveforms_clear)) waveforms_clear = np.load(path_to_waveforms_clear) else: logger.info( 'Did not find clear waveforms in {}, reading them from {}'.format( path_to_waveforms_clear, standarized_path)) explorer = RecordingExplorer(standarized_path, spike_size=CONFIG.spikeSize) waveforms_clear = explorer.read_waveforms(spike_index_clear[:, 0]) np.save(path_to_waveforms_clear, waveforms_clear) logger.info('Saved waveform from clear spikes in: {}'.format( path_to_waveforms_clear)) ######################### # PCA - rotation matrix # ######################### # compute per-batch sufficient statistics for PCA on standarized data logger.info('Computing PCA sufficient statistics...') stats = bp.multi_channel_apply(dim_red.suff_stat, mode='memory', spike_index=spike_index_clear, spike_size=CONFIG.spikeSize) suff_stats = reduce(lambda x, y: np.add(x, y), [e[0] for e in stats]) spikes_per_channel = reduce(lambda x, y: np.add(x, y), [e[1] for e in stats]) # compute rotation matrix logger.info('Computing PCA projection matrix...') rotation = dim_red.project(suff_stats, spikes_per_channel, CONFIG.spikes.temporal_features, CONFIG.neighChannels) path_to_rotation = os.path.join(TMP_FOLDER, 'rotation.npy') np.save(path_to_rotation, rotation) logger.info('Saved rotation matrix in {}...'.format(path_to_rotation)) main_channel = spike_index_clear[:, 1] ########################################### # PCA - waveform dimensionality reduction # ########################################### if CONFIG.clustering.clustering_method == 'location': logger.info('Denoising...') path_to_denoised_waveforms = os.path.join(TMP_FOLDER, 'denoised_waveforms.npy') if os.path.exists(path_to_denoised_waveforms): logger.info( 'Found denoised waveforms in {}, loading them...'.format( path_to_denoised_waveforms)) denoised_waveforms = np.load(path_to_denoised_waveforms) else: logger.info( 'Did not find denoised waveforms in {}, evaluating them' 'from {}'.format(path_to_denoised_waveforms, path_to_waveforms_clear)) waveforms_clear = np.load(path_to_waveforms_clear) denoised_waveforms = dim_red.denoise(waveforms_clear, rotation, CONFIG) logger.info('Saving denoised waveforms to {}'.format( path_to_denoised_waveforms)) np.save(path_to_denoised_waveforms, denoised_waveforms) isolated_index, x, y = get_isolated_spikes_and_locations( denoised_waveforms, main_channel, CONFIG) x = (x - np.mean(x)) / np.std(x) y = (y - np.mean(y)) / np.std(y) corrupted_index = np.logical_not( np.in1d(np.arange(spike_index_clear.shape[0]), isolated_index)) spike_index_collision = np.concatenate( [spike_index_collision, spike_index_clear[corrupted_index]], axis=0) spike_index_clear = spike_index_clear[isolated_index] waveforms_clear = waveforms_clear[isolated_index] ################################################# # Dimensionality reduction (Isolated Waveforms) # ################################################# scores = dim_red.main_channel_scores(waveforms_clear, rotation, spike_index_clear, CONFIG) scores = (scores - np.mean(scores, axis=0)) / np.std(scores) scores = np.concatenate([ x[:, np.newaxis, np.newaxis], y[:, np.newaxis, np.newaxis], scores[:, :, np.newaxis] ], axis=1) else: logger.info('Reducing spikes dimensionality with PCA matrix...') scores = dim_red.score(waveforms_clear, rotation, spike_index_clear[:, 1], CONFIG.neighChannels, CONFIG.geom) # save scores path_to_score = os.path.join(TMP_FOLDER, 'score_clear.npy') np.save(path_to_score, scores) logger.info('Saved spike scores in {}...'.format(path_to_score)) return scores, spike_index_clear, spike_index_collision
def run(config, logger_level='INFO', clean=False, output_dir='tmp/', complete=False, set_zero_seed=False): """Run YASS built-in pipeline Parameters ---------- config: str or mapping (such as dictionary) Path to YASS configuration file or mapping object logger_level: str Logger level clean: bool, optional Delete CONFIG.data.root_folder/output_dir/ before running output_dir: str, optional Output directory (relative to CONFIG.data.root_folder to store the output data, defaults to tmp/ complete: bool, optional Generates extra files (needed to generate phy files) Notes ----- Running the preprocessor will generate the followiing files in CONFIG.data.root_folder/output_directory/: * ``config.yaml`` - Copy of the configuration file * ``metadata.yaml`` - Experiment metadata * ``filtered.bin`` - Filtered recordings (from preprocess) * ``filtered.yaml`` - Filtered recordings metadata (from preprocess) * ``standarized.bin`` - Standarized recordings (from preprocess) * ``standarized.yaml`` - Standarized recordings metadata (from preprocess) * ``whitening.npy`` - Whitening filter (from preprocess) Returns ------- numpy.ndarray Spike train """ # load yass configuration parameters set_config(config) CONFIG = read_config() ROOT_FOLDER = CONFIG.data.root_folder TMP_FOLDER = path.join(ROOT_FOLDER, output_dir) # remove tmp folder if needed if os.path.exists(TMP_FOLDER) and clean: shutil.rmtree(TMP_FOLDER) # create TMP_FOLDER if needed if not os.path.exists(TMP_FOLDER): os.makedirs(TMP_FOLDER) # load logging config file logging_config = load_logging_config_file() logging_config['handlers']['file']['filename'] = path.join(TMP_FOLDER, 'yass.log') logging_config['root']['level'] = logger_level # configure logging logging.config.dictConfig(logging_config) # instantiate logger and start coloredlogs logger = logging.getLogger(__name__) coloredlogs.install(logger=logger) if set_zero_seed: logger.warning('Set numpy seed to zero') np.random.seed(0) # print yass version logger.info('YASS version: %s', yass.__version__) # preprocess start = time.time() (standarized_path, standarized_params, channel_index, whiten_filter) = (preprocess .run(output_directory=output_dir, if_file_exists=CONFIG.preprocess.if_file_exists)) time_preprocess = time.time() - start # detect start = time.time() (score, spike_index_clear, spike_index_all) = detect.run(standarized_path, standarized_params, channel_index, whiten_filter, output_directory=output_dir, if_file_exists=CONFIG.detect.if_file_exists, save_results=CONFIG.detect.save_results) time_detect = time.time() - start # cluster start = time.time() spike_train_clear, tmp_loc, vbParam = cluster.run( score, spike_index_clear, output_directory=output_dir, if_file_exists=CONFIG.cluster.if_file_exists, save_results=CONFIG.cluster.save_results) time_cluster = time.time() - start # get templates start = time.time() (templates, spike_train_clear_after_templates, groups, idx_good_templates) = get_templates.run( spike_train_clear, tmp_loc, output_directory=output_dir, if_file_exists=CONFIG.templates.if_file_exists, save_results=CONFIG.templates.save_results) time_templates = time.time() - start # run deconvolution start = time.time() spike_train = deconvolute.run(spike_index_all, templates, output_directory=output_dir) time_deconvolution = time.time() - start # save metadata in tmp path_to_metadata = path.join(TMP_FOLDER, 'metadata.yaml') logging.info('Saving metadata in {}'.format(path_to_metadata)) save_metadata(path_to_metadata) # save config.yaml copy in tmp/ path_to_config_copy = path.join(TMP_FOLDER, 'config.yaml') if isinstance(config, Mapping): with open(path_to_config_copy, 'w') as f: yaml.dump(config, f, default_flow_style=False) else: shutil.copy2(config, path_to_config_copy) logging.info('Saving copy of config: {} in {}'.format(config, path_to_config_copy)) # TODO: complete flag saves other files needed for integrating phy # with yass, the integration hasn't been completed yet # this part loads waveforms for all spikes in the spike train and scores # them, this data is needed to later generate phy files if complete: STANDARIZED_PATH = path.join(TMP_FOLDER, 'standarized.bin') PARAMS = load_yaml(path.join(TMP_FOLDER, 'standarized.yaml')) # load waveforms for all spikes in the spike train logger.info('Loading waveforms from all spikes in the spike train...') explorer = RecordingExplorer(STANDARIZED_PATH, spike_size=CONFIG.spike_size, dtype=PARAMS['dtype'], n_channels=PARAMS['n_channels'], data_order=PARAMS['data_order']) waveforms = explorer.read_waveforms(spike_train[:, 0]) path_to_waveforms = path.join(TMP_FOLDER, 'spike_train_waveforms.npy') np.save(path_to_waveforms, waveforms) logger.info('Saved all waveforms from the spike train in {}...' .format(path_to_waveforms)) # score all waveforms logger.info('Scoring waveforms from all spikes in the spike train...') path_to_rotation = path.join(TMP_FOLDER, 'rotation.npy') rotation = np.load(path_to_rotation) main_channels = explorer.main_channel_for_waveforms(waveforms) path_to_main_channels = path.join(TMP_FOLDER, 'waveforms_main_channel.npy') np.save(path_to_main_channels, main_channels) logger.info('Saved all waveforms main channels in {}...' .format(path_to_waveforms)) waveforms_score = dim_red.score(waveforms, rotation, main_channels, CONFIG.neigh_channels, CONFIG.geom) path_to_waveforms_score = path.join(TMP_FOLDER, 'waveforms_score.npy') np.save(path_to_waveforms_score, waveforms_score) logger.info('Saved all scores in {}...'.format(path_to_waveforms)) # score templates # TODO: templates should be returned in the right shape to avoid .T templates_ = templates.T main_channels_tmpls = explorer.main_channel_for_waveforms(templates_) path_to_templates_main_c = path.join(TMP_FOLDER, 'templates_main_channel.npy') np.save(path_to_templates_main_c, main_channels_tmpls) logger.info('Saved all templates main channels in {}...' .format(path_to_templates_main_c)) templates_score = dim_red.score(templates_, rotation, main_channels_tmpls, CONFIG.neigh_channels, CONFIG.geom) path_to_templates_score = path.join(TMP_FOLDER, 'templates_score.npy') np.save(path_to_templates_score, templates_score) logger.info('Saved all templates scores in {}...' .format(path_to_waveforms)) logger.info('Finished YASS execution. Timing summary:') total = (time_preprocess + time_detect + time_cluster + time_templates + time_deconvolution) logger.info('\t Preprocess: %s (%.2f %%)', human_readable_time(time_preprocess), time_preprocess/total*100) logger.info('\t Detection: %s (%.2f %%)', human_readable_time(time_detect), time_detect/total*100) logger.info('\t Clustering: %s (%.2f %%)', human_readable_time(time_cluster), time_cluster/total*100) logger.info('\t Templates: %s (%.2f %%)', human_readable_time(time_templates), time_templates/total*100) logger.info('\t Deconvolution: %s (%.2f %%)', human_readable_time(time_deconvolution), time_deconvolution/total*100) return spike_train
""" Checking results from threshold detector """ import matplotlib.pyplot as plt from yass.explore import SpikeTrainExplorer, RecordingExplorer path_to_data = '/Users/Edu/data/yass/tmp/standarized.bin' path_to_spike_train = '/Users/Edu/data/yass/tmp/spike_train.npy' exp = RecordingExplorer(path_to_data, spike_size=15) spe = SpikeTrainExplorer(path_to_spike_train, exp) spe.plot_templates(group_ids=range(10)) plt.show()
# path to whitened recordings, geometry file and spike train ROOT = path.join(path.expanduser('~'), 'data/yass') path_to_data = path.join(ROOT, 'tmp/whitened.bin') path_to_geom = path.join(ROOT, 'ej49_geometry1.txt') path_to_spike_train = path.join(ROOT, 'middle_spike_train.npy') # In[ ]: # initialize recordings explorer, this is another helper class # to explore recordings and is needed to initialize the spike # train explorer rce = RecordingExplorer(path_to_data, path_to_geom, spike_size=15, neighbor_radius=70, dtype='float64', n_channels=49, data_format='long') # In[ ]: # we now initialize the spike train explorer spe = SpikeTrainExplorer(path_to_spike_train, rce) # In[ ]: # we can take a look at the spike train, first column is the spike # index, second column is the spike ID spe.spike_train