def test_cell_type_subsamples(session='1', animal='MD0ST5', lab='dipoppa', expt='full_trial', sample='sst85_sample_0', cell_id=3): data_dir = get_user_dir('data') path = os.path.join(data_dir, lab, expt, animal, session, 'data.hdf5') if not os.path.exists(path): print(path, ' does not exist.') return # Get cell types (Need to change this for different mice) info_cells = loadmat('neural_dir/info_cells_MD0ST5_2018-04-04.mat')['info_cells'] cell_type_idxs = info_cells[0][0][0][0] # 0 = excitatory, 3 = inhibitory good_cell_idxs = info_cells[0][0][1][0] # Label all "bad" cells as -1 cell_type_idxs = [cell_type_idxs[i] if good_cell_idxs[i] == 1 else -1 for i in range(len(good_cell_idxs))] def select_idxs(arr, key): return [i for i in range(len(arr)) if arr[i] == key] idxs = select_idxs(cell_type_idxs, cell_id) data = h5py.File(path, 'r', libver='latest', swmr=True) cell_types = data['samples']['cell_types'] session = cell_types[sample] # Correct number of indexes assert len(session[:]) == len(idxs) # No repeats assert len(set(session[:])) == len(idxs) data.close() print('Cell Type Subsample: Success')
def get_all_params(search_type='grid_search', args=None): # Raise error if user has other command line arguments specified (as could override configs in # confusing ways) if args is not None and len(args) != 8: raise ValueError( 'No command line arguments allowed other than config file names') elif args is None and len(sys.argv[1:]) != 8: raise ValueError( 'No command line arguments allowed other than config file names') # Create parser parser = HyperOptArgumentParser(strategy=search_type) parser.add_argument('--data_config', type=str) parser.add_argument('--model_config', type=str) parser.add_argument('--training_config', type=str) parser.add_argument('--compute_config', type=str) namespace, extra = parser.parse_known_args(args) # Add arguments from all configs configs = [ namespace.data_config, namespace.model_config, namespace.training_config, namespace.compute_config ] for config in configs: config_json = commentjson.load(open(config, 'r')) for (key, value) in config_json.items(): add_to_parser(parser, key, value) # Add save/user dirs parser.add_argument('--save_dir', default=get_user_dir('save'), type=str) parser.add_argument('--data_dir', default=get_user_dir('data'), type=str) # Add parameters dependent on previous inputs namespace, extra = parser.parse_known_args(args) add_dependent_params(parser, namespace) return parser.parse_args(args)
def main(): # Get hdf5 file fs = [] data_dir = get_user_dir('data') for i in [1, 3, 4, 5, 6]: path = data_dir + '/dipoppa/MSP_pupil/SB028/{}/data.hdf5'.format(i) fs.append(h5py.File(path, 'a', libver='latest', swmr=True)) print(fs) # Get cell types info_cells = loadmat( 'neural_dir/SB028/2019-11-08/info_cells_SB028_2019-11-08.mat' )['info_cells'] cell_type_idxs = info_cells[0][0][0][0] # 0 = excitatory, 3 = inhibitory good_cell_idxs = info_cells[0][0][1][0] # Label all "bad" cells as -1 cell_type_idxs = [ cell_type_idxs[i] if good_cell_idxs[i] == 1 else -1 for i in range(len(good_cell_idxs)) ] def select_idxs(arr, key): return [i for i in range(len(arr)) if arr[i] == key] inh_id = [i for i in cell_type_idxs if i != 0 and i != -1][0] print('Inhibitory Cell Type: ' + str(inh_id)) inh_idxs = select_idxs(cell_type_idxs, inh_id) exc_idxs = select_idxs(cell_type_idxs, 0) num_cells = len(cell_type_idxs) print(num_cells, 'NUM CELLS') print('inh: ', len(inh_idxs)) print('exc: ', len(exc_idxs)) ''' for f in fs: if f.get('samples'): del f['samples'] del f['samples'] ''' print('Adding subsamples') add_subsamples(fs, num_cells) # print('Adding Histogram Trials') add_cell_type(fs, inh_idxs, exc_idxs) print('Adding powerlaw samples') cell_types_powerlaw(fs, inh_idxs, exc_idxs) print('...done') close(fs)
def sample_n_neurons(session_params, sample_sizes, n_samples, group_class='samples', group_name='subsamples', prefix=''): ''' Create N samples of randomly selected neurons in behavenet's HDF5 files. Supports multiple sessions for a given animal, but not multiple animals/experiements/labs. Params: session_params:dict: Each entry in session params dictionary should contain another dictionary specifying the following fields which define a dataset: - lab (str) - experiment_name (str) - animal (str) - sessions (list) sample_sizes:list: list specifying the number of neurons in each sample n_samples:int: how many repeats to take of each sample size (ie; n=10 would add 10 samples for each sample size) group_class:str: HDF5 upper level group name group_name:str: name of second level group to add these samples under prefix:str: identifying prefix for datasets ''' lab = session_params.get('lab') experiment_name = session_params.get('experiment_name') animal = session_params.get('animal') sessions = session_params.get('sessions') for session in sessions: data_path = os.path.join(behavenet.get_user_dir('data'), lab, experiment_name, animal, str(session), 'data.hdf5') if not os.path.isfile(data_path): raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), data_path) logging.info('HDF5 File: {}'.format(data_path)) data = h5py.File(data_path, 'a', libver='latest', swmr=True) ex_trial = list(data['neural'].keys())[0] n_neural = data['neural'][ex_trial][:].shape[1] # Delete old data if group_name in data[group_class]: logging.info('Deleting old group: {}'.format(group_name)) del data[group_class][group_name] group = data[group_class].require_group(group_name) for size in sample_sizes: for i in range(n_samples): dset = np.random.choice(n_neural, size, replace=False) dset_name = prefix + 'n{}_t{}'.format(size, i) logging.info('Adding dataset: {}'.format(dset_name)) group.create_dataset(dset_name, data=dset) data.close() logging.info('Added {} total datasets'.format(n_samples * len(sample_sizes)))
def add_cell_type_samples(session_params, inh_idxs, exc_idxs, n_samples, group_class='samples', group_name='cell_types', prefix=''): ''' Adds n_samples of inhibitory and excitatory neurons. Uses the maximum number of neurons in the smaller dataset as default number of neurons params: session_params:dict: Dictionary containing information to extract data.hdf5 file inh_idxs:list: list of inhibitory cell indexes exc_idxs:list: excitatory cell indexes n_samples:int: how many random samples to take of each group_class:str: higher level group to add data to group_name:str: lower level group to add data to prefix:str: unique naming prefix (otherwise will be labelled exc_t{trial_number}, inh_t{trial_number}) ''' lab = session_params.get('lab') experiment_name = session_params.get('experiment_name') animal = session_params.get('animal') sessions = session_params.get('sessions') for session_number in sessions: data_path = os.path.join(behavenet.get_user_dir('data'), lab, experiment_name, animal, str(session_number), 'data.hdf5') if not os.path.isfile(data_path): raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), data_path) logging.info('HDF5 File: {}'.format(data_path)) data = h5py.File(data_path, 'a', libver='latest', swmr=True) data.require_group(group_class) # Delete old data if group_name in data[group_class]: logging.info('Deleting old group: {}'.format(group_name)) #del data[group_class][group_name] group = data[group_class].require_group(group_name) n_neurons = min(len(inh_idxs), len(exc_idxs)) # Assumes that there are fewer inhibitory neurons than excitatory (so we only need one inhibitory sample) # TODO: Probably a fair assumption, but correct this inh_dataset = np.random.choice(inh_idxs, n_neurons, replace=False) group.create_dataset(prefix + 'inh_all', data=inh_dataset) logging.info('Added Inhibitory dataset: {}'.format(inh_dataset.shape)) for i in range(n_samples): exc_dataset = np.random.choice(exc_idxs, n_neurons, replace=False) group.create_dataset(prefix + 'exc_t{}'.format(i), data=exc_dataset) logging.info('Added excitatory dataset {}: {}'.format(i, exc_dataset.shape)) data.close()
def test_subsamples(session='1', animal='MD0ST5', lab='dipoppa', expt='full_trial', sample='sample_100_t0', cell_id=3): data_dir = get_user_dir('data') path = os.path.join(data_dir, lab, expt, animal, session, 'data.hdf5') if not os.path.exists(path): print(path, ' does not exist.') return data = h5py.File(path, 'r', libver='latest', swmr=True) subsamples = data['samples']['subsamples'] session = subsamples[sample] # No duplicates assert len(set(session[:])) == len(session[:]) print('Subsample Test: Success') data.close()
def get_cell_idxs(session_params, inh_key=3): ''' Return indexes of all the inhibitory, excitatory, and overall good cells Requires info_cells file in same session level directory of behavenet's data directory (ie; if my data.hdf5 files are in "lab/experiment/animal/session_1/data.hdf5", the info_cells file should be in the same folder as "session_1") Params: session_params:dict: Dictionary specifying relevant details for session inh_key:int: key indicitating inhibitory cell type (2: PV, 3: SST, 5: GAD) Returns: inhibitory indexes, excitatory indexes, all good cell indexes ''' lab = session_params.get('lab') experiment_name = session_params.get('experiment_name') animal = session_params.get('animal') date = session_params.get('date') # Load file and separate contents key = 'info_cells_' + animal + '_' + date + '.mat' cell_data_path = os.path.join(behavenet.get_user_dir('data'), lab, experiment_name, animal, key) if not os.path.exists(cell_data_path): raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), cell_data_path) info_cells = loadmat(cell_data_path)['info_cells'] good_cells = info_cells[0][0][1][0] cell_types = info_cells[0][0][0][0] good_cells = info_cells[0][0][1][0] cell_types = info_cells[0][0][0][0] # Remove bad cells from neural activity and labelled cell data bad_cells = [i for i in range(len(good_cells)) if good_cells[i] == 0] cell_types = np.delete(cell_types, bad_cells, axis=0) # Collect indexes of inhibitory and excitatory neurons inh_idxs = [idx for idx,key in enumerate(cell_types) if key == inh_key] exc_idxs = [idx for idx,key in enumerate(cell_types) if key == 0] return inh_idxs, exc_idxs, list(range(len(cell_types)))
def process(sess_id, lab='dipoppa', expt='SSSVAE', animal='MD0ST5', date='2018-04-04', neural_data_root='/home/yoni/behavenet/neural_dir'): vid_id = '{}_{}_{}'.format(date, sess_id, animal) neural_dir = os.path.join(neural_data_root, animal, date) # data will be stored in data_dir/lab/expt/animal/session/data.hdf5 lab = lab expt = expt animal = animal date = date session = sess_id mp4_file = os.path.join( neural_dir, session, vid_id + '_eye.mj2') # (can be any format loadable by open cv) # video frames will be resized to these dimensions # downsampling is preferrable as it speeds up autoencoder fitting time # we typically use images with the largest dimension <= 256 xpix = 256 # choose a number (e.g. 256) ypix = 128 # choose a number (e.g. 128) # processed data in behavenet format will be stored here data_dir = get_user_dir('data') proc_data_filepath = os.path.join(data_dir, lab, expt, animal, session) print('Writing to: ', proc_data_filepath, 'Video from: ', mp4_file) ########### # Load data ########### # set up hdf5 file hdf5_file = os.path.join(proc_data_filepath, 'data.hdf5') if False: # os.path.exists(hdf5_file): raise IOError('data.hdf5 file already exists; skipping') else: hdf5_dir = os.path.dirname(hdf5_file) if not os.path.exists(hdf5_dir): os.makedirs(hdf5_dir) # read video file and check cap = cv2.VideoCapture(mp4_file) if not cap.isOpened(): raise IOError('error opening video file at %s' % mp4_file) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) print('Loading and processing data...') # Load trial info to sync video/neural data, and neural activity vvi = loadmat( os.path.join(neural_dir, sess_id, 'batch_video_id' + vid_id + '.mat'))['batch_video_id'] nni = loadmat( os.path.join(neural_dir, sess_id, 'batch_neural_id' + vid_id + '.mat'))['batch_neural_id'] nna = loadmat( os.path.join(neural_dir, sess_id, 'neural_activity' + vid_id + '.mat'))['neural_activity'] # Load cell labels cell_data_path = os.path.join(neural_dir, 'info_cells_{}_{}.mat'.format(animal, date)) info_cells = loadmat(cell_data_path)['info_cells'] good_cells = info_cells[0][0][1][0] cell_types = info_cells[0][0][0][0] # Remove bad cells from neural activity and labelled cell data bad_cells = [i for i in range(len(good_cells)) if good_cells[i] == 0] cell_types = np.delete(cell_types, bad_cells, axis=0) nna = np.delete(nna, bad_cells, axis=1) nna = ZScore(nna) # Collect indexes of inhibitory and excitatory neurons inh_idxs = [i for i in range(len(cell_types)) if cell_types[i] == 3] exc_idxs = [i for i in range(len(cell_types)) if cell_types[i] == 0] # Load facemap data and create labels facemap_data = np.load(os.path.join(neural_dir, sess_id, 'facemap', vid_id + '_eye_proc.npy'), allow_pickle=True).item() pupil_area = facemap_data['pupil'][0]['area_smooth'].reshape(-1, 1) pupil_com = facemap_data['pupil'][0]['com_smooth'] print('Neural data: ', nna.shape, '# Inhibitory neurons: ', len(inh_idxs), '# Excitatory neurons: ', len(exc_idxs)) # Labels_sc are the un-normalized versions of the labels labels_sc = np.concatenate([pupil_area, pupil_com], axis=1) # Labels are a collection of ZScored pupil area and center of mass p_area_norm = ZScore(pupil_area, axis=0) p_com_norm = ZScore(pupil_com, axis=0) labels = np.concatenate([p_area_norm, p_com_norm], axis=1) print('Label shape (scaled+unscaled): ', labels.shape, labels_sc.shape) n_trials = vvi.shape[1] print(hdf5_file) print('Creating hdf5 file') t_beg = time.time() f = h5py.File(hdf5_file, 'a', libver='latest', swmr=True) with f as f: # single write multi-read f.swmr_mode = True # create image group group_i = f.require_group('images') # create neural group group_n = f.require_group('neural') # Create group for behavioral variables group_labels = f.require_group('labels') group_labels_sc = f.require_group('labels_sc') # create a dataset for each trial within groups t = 0 for trial in range(n_trials): if trial % 10 == 0: print('processing trial %03i' % trial) # find video indices during this trial trial_beg = vvi[0, trial] trial_end = vvi[1, trial] ts_idxs = np.arange(trial_beg - 1, trial_end) # load and process corresponding frames frames = get_frames_from_idxs(cap, ts_idxs) sh = frames.shape frames_proc = np.zeros((sh[0], sh[1], ypix, xpix), dtype='uint8') for i in range(sh[0]): frames_proc[i, 0, :, :] = cv2.resize(frames[i, 0], (xpix, ypix)) # save image data group_i.create_dataset('trial_%04i' % t, data=frames_proc, dtype='uint8') label_frames = labels[ts_idxs] label_sc_frames = labels_sc[ts_idxs] #print('Image: ', frames_proc.shape, 'Labels: ', label_frames.shape, label_sc_frames.shape) group_labels.create_dataset('trial_%04i' % t, data=label_frames, dtype='float32') group_labels_sc.create_dataset('trial_%04i' % t, data=label_sc_frames, dtype='float32') # find neural indices during this trial trial_beg = nni[0, trial] trial_end = nni[1, trial] # pick out corresponding neural activity neural = nna[trial_beg - 1:trial_end, :] # save neural data group_n.create_dataset('trial_%04i' % t, data=neural, dtype='float32') t += 1 # Add variable size subsamples samples = f.create_group('samples') subsamples = samples.create_group('subsamples') subsample_sizes = [ 20, 40, 60, 80, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300 ] n_cells = range(nna.shape[1]) for size in subsample_sizes: for trial in range(10): # Choose random sample w/out replacement from all neural indexes subsample = np.random.choice(n_cells, size, replace=False) subsamples.create_dataset('n{}_t{}'.format(size, trial), data=subsample) # Add Inh/Exc subsamples cell_types = samples.create_group('cell_types') cell_types.create_dataset('inh_t0', data=inh_idxs, dtype='uint8') for i in range(50): exc_sample = np.random.choice(exc_idxs, len(inh_idxs), replace=False) cell_types.create_dataset('exc_t{}'.format(i), data=exc_sample, dtype='uint8') # print out timing info t_end = time.time() t_tot = t_end - t_beg print('Processed {} frames in total'.format(t)) print('total processing time: %f sec' % t_tot) print('time per trial: %f sec' % (t_tot / n_trials))
import os import h5py from tqdm import tqdm import behavenet import numpy as np mouse = 'MD0ST5' expt = 'full_trial' sessions = ['1', '2', '3', '4'] for session in sessions: data_path = os.path.join(behavenet.get_user_dir('data'), 'dipoppa', expt, mouse, session, 'data.hdf5') f = h5py.File(data_path, 'a', swmr=True, libver='latest') # Concatenate all trials together data = np.array([]) neural = f['neural'] print('Loading data for session: ', session) for trial_name in tqdm(neural): trial = neural[trial_name][:] if data.size == 0: data = trial else: data = np.concatenate([data, trial], axis=0) print('Computing mean, std...')