def extract_traces(dataset: SplitDataset, rois, output_dir=None, block_duration=40): new_dataset = EmptySplitDataset( shape_full=dataset.shape, shape_block=(block_duration, ) + dataset.shape[1:], root=output_dir or dataset.root.parent, name="traces", ) Parallel(n_jobs=20)(delayed(_extract_rois_block)( dataset, new_block, str(new_dataset.root / new_dataset.files[i_block]), rois=rois, ) for i_block, (_, new_block) in enumerate(new_dataset.slices( as_tuples=True))) trace_dset = new_dataset.finalize() traces = np.concatenate( [fl.load(str(f), "/traces") for f in trace_dset.files.flatten()], 1) first_file = trace_dset.files.flatten()[0] coords = fl.load(str(first_file), "/coords") areas = fl.load(str(first_file), "/areas") trace_data = dict(traces=traces, coords=coords, areas=areas) fl.save(str(trace_dset.root.parent / "traces.h5"), trace_data) return trace_data
def test_load_multiple_groups(self): with tmp_filename() as fn: x = dict(one=np.ones(10), two='string', three=200) fl.save(fn, x) one, three = fl.load(fn, ['/one', '/three']) np.testing.assert_array_equal(one, x['one']) assert three == x['three'] three, two = fl.load(fn, ['/three', '/two']) assert three == x['three'] assert two == x['two']
def test_load_group(self): with tmp_filename() as fn: x = dict(one=np.ones(10), two='string') fl.save(fn, x) one = fl.load(fn, '/one') np.testing.assert_array_equal(one, x['one']) two = fl.load(fn, '/two') assert two == x['two'] full = fl.load(fn, '/') np.testing.assert_array_equal(x['one'], full['one']) assert x['two'] == full['two']
def load_tracks(filepath): """Load tracker data""" with h5py.File(filepath, 'r') as f: if 'data' in f.keys( ): # in old-style or unfixes tracks, everything is in the 'data' group data = dd_io.load(filepath) chbb = data['chambers_bounding_box'][:] heads = data['lines'][:, 0, :, 0, :] # nframe, fly id, coordinates tails = data['lines'][:, 0, :, 1, :] # nframe, fly id, coordinates box_centers = data[ 'centers'][:, 0, :, :] # nframe, fly id, coordinates background = data['background'][:] first_tracked_frame = data['start_frame'] last_tracked_frame = data['frame_count'] else: chbb = f['chambers_bounding_box'][:] heads = f['lines'][:, 0, :, 0, :] # nframe, fly id, coordinates tails = f['lines'][:, 0, :, 1, :] # nframe, fly id, coordinates box_centers = f['centers'][:, 0, :, :] # nframe, fly id, coordinates background = f['background'][:] first_tracked_frame = f.attrs['start_frame'] last_tracked_frame = f.attrs['frame_count'] # everything to frame coords heads = heads[..., ::-1] tails = tails[..., ::-1] heads = heads + chbb[1][0][:] tails = tails + chbb[1][0][:] box_centers = box_centers + chbb[1][0][:] body_parts = ['head', 'center', 'tail'] # first_tracked_frame, last_tracked_frame = data['start_frame'], data['frame_count'] x = np.stack((heads, box_centers, tails), axis=2) x = x[first_tracked_frame:last_tracked_frame, ...] return x, body_parts, first_tracked_frame, last_tracked_frame, background
def test_tracking_experiments(self): """ Note: this test assumes that the default parameters for the tracking functions are the correct ones to track the videos in the examples/assets folder, from which the correct results have been calculated. """ self.app = QApplication([]) video_file = str( Path(__file__).parent.parent / "examples" / "assets" / "fish_compressed.h5") for method in ["eyes", "tail"]: self.run_experiment( protocol=TestProtocol(), camera=dict(video_file=video_file), tracking=dict(method=method), log_format="hdf5", ) with open(self.metadata_path, "r") as f: data = json.load(f) behavior_log = fl.load( self.metadata_path.parent / data["tracking"]["behavior_log"], "/data") assert (method == data["general"]["program_version"]["arguments"] ["tracking"]["method"]) if method == "tail": for k in ["theta_00", "theta_08"]: self.check_result(behavior_log[k].values, k) elif method == "eyes": for k in ["th_e0", "th_e1"]: self.check_result(behavior_log[k].values, k) self.clear_dir()
def test_force_pickle(self): with tmp_filename() as fn: x = dict(one=dict(two=np.arange(10)), three='string') xf = dict(one=dict(two=x['one']['two']), three=x['three']) fl.save(fn, xf) xs = fl.load(fn) np.testing.assert_array_equal(x['one']['two'], xs['one']['two']) assert x['three'] == xs['three'] # Try direct loading one two = fl.load(fn, '/one/two') np.testing.assert_array_equal(x['one']['two'], two)
def apply_shifts(dataset, output_dir=None, block_size=120, n_jobs=10, verbose=False): new_dataset = EmptySplitDataset( root=output_dir or dataset.root.parent, name="aligned", shape_full=dataset.shape, shape_block=(block_size, ) + dataset.shape_block[1:], ) shifts_data = fl.load(str(next(output_dir.glob("*shifts*")))) Parallel(n_jobs=n_jobs)(delayed(_apply_shifts)( dataset, new_block, str(new_dataset.root / new_dataset.files[i_block]), shifts_data["shifts"], shifts_data["shifts"], shifts_data["shift_times"], ) for i_block, (_, new_block) in enumerate(new_dataset.slices( as_tuples=True))) return new_dataset.finalize()
def extract_traces_coords(dataset: SplitDataset, coords, output_dir=None, block_duration=60, n_jobs=5, **kwargs): new_dataset = EmptySplitDataset( shape_full=dataset.shape, shape_block=(block_duration, ) + dataset.shape[1:], root=output_dir or dataset.root.parent, name="traces", ) Parallel(n_jobs=n_jobs)( delayed(_extract_traces_coords)(dataset, new_block, str(new_dataset.root / new_dataset.files[i_block]), coords=coords, **kwargs) for i_block, ( _, new_block) in enumerate(new_dataset.slices(as_tuples=True))) trace_dset = new_dataset.finalize() traces = np.concatenate( [fl.load(str(f), "/traces") for f in trace_dset.files.flatten()], 1) trace_data = dict(traces=traces, coords=coords) fl.save(str(trace_dset.root.parent / "traces.h5"), trace_data) return trace_data
def test_compression_true(self): rs = np.random.RandomState(1234) with tmp_filename() as fn: x = rs.normal(size=(1000, 5)) for comp in [None, True, 'blosc', 'zlib', ('zlib', 5)]: fl.save(fn, x, compression=comp) x1 = fl.load(fn) assert (x == x1).all()
def return_cached(*args, **kwargs): f_name = filename or func.__name__ output_file = output_dir / (f_name + ".h5") if output_file.is_file(): return fl.load(output_file) else: res = func(*args, **kwargs) fl.save(output_file, res) return res
def test_load_slice(self): with tmp_filename() as fn: x = np.arange(3 * 4 * 5).reshape((3, 4, 5)) fl.save(fn, dict(x=x)) s = fl.aslice[:2] xs = fl.load(fn, '/x', sel=s) np.testing.assert_array_equal(xs, x[s]) s = fl.aslice[:, 1:3] xs = fl.load(fn, '/x', sel=s) np.testing.assert_array_equal(xs, x[s]) xs = fl.load(fn, sel=s, unpack=True) np.testing.assert_array_equal(xs, x[s]) fl.save(fn, x) xs = fl.load(fn, sel=s) np.testing.assert_array_equal(xs, x[s])
def existing_file_background(filepath): """ Returns a numpy array from an image stored at filepath """ if filepath.endswith(".h5"): return fl.load(filepath) else: # If using OpenCV, we have to get RGB, not BGR try: return cv2.imread(filepath)[:, :, [2, 1, 0]] except TypeError: log = logging.getLogger() log.info("Could nor load " + filepath) return np.zeros((10, 10), dtype=np.uint8)
def existing_file_background(filepath): """Returns a numpy array from an image stored at filepath""" filepath = Path(filepath) if filepath.suffix == ".h5": return fl.load(filepath) else: # If using OpenCV, we have to get RGB, not BGR try: return imageio.imread(str(filepath)) except TypeError: log = logging.getLogger() log.info("Could nor load " + filepath) return np.zeros((10, 10), dtype=np.uint8)
def test_softlinks_recursion(self): with tmp_filename() as fn: A = np.random.randn(3, 3) df = pd.DataFrame({'int': np.arange(3), 'name': ['zero', 'one', 'two']}) AA = 4 s = dict(A=A, B=A, c=A, d=A, f=A, g=[A, A, A], AA=AA, h=AA, df=df, df2=df) s['g'].append(s) n = reconstruct(fn, s) assert n['g'][0] is n['A'] assert (n['A'] is n['B'] is n['c'] is n['d'] is n['f'] is n['g'][0] is n['g'][1] is n['g'][2]) assert n['g'][3] is n assert n['AA'] == AA == n['h'] assert n['df'] is n['df2'] assert (n['df'] == df).all().all() # test 'sel' option on link ... need to read two vars # to ensure at least one is a link: col1 = fl.load(fn, '/A', fl.aslice[:, 1]) assert np.all(A[:, 1] == col1) col1 = fl.load(fn, '/B', fl.aslice[:, 1]) assert np.all(A[:, 1] == col1)
def read_hdf5(path): # read the image if the h5 file is an array or part of split dataset files try: data = fl.load(path) except (OSError, RuntimeError): return None if not isinstance(data, (np.ndarray, dict)): return None elif isinstance(data, dict): key = [key for key in list(data.keys()) if "stack" in key] if key: data = data[key[0]] else: return None add_kwargs = {} return [(data, add_kwargs)]
def load_segmentation(filepath): """Load output produced by DeepSongSegmenter. File should have at least 'event_names' and 'event_indices' datasets.""" res = dd_io.load(filepath) # extract event_seconds event_seconds = {} for indices, name in zip(res['event_indices'], res['event_names']): event_seconds[name] = indices / res['samplerate_Hz'] # event_categories if 'event_categories' in res: event_categories = res['event_categories'] else: event_categories = {} for name, times in event_seconds.items(): if times.ndim == 1 or times.shape[1] == 1: event_categories[name] = 'event' else: event_categories[name] = 'segment' return event_seconds, event_categories
def __init__(self, fd, name, load_matrix=True, keep_barcodes=None, groups=None): self.name = name if load_matrix: dd = fl.load(fd) self.barcodes = dd['barcodes'] self.snvs = dd['ann'] self.strand_ref = dd['strand_ref_mat'] self.strand_alt = dd['strand_alt_mat'] if keep_barcodes is not None: cidx = [ i for i, b in enumerate(self.barcodes) if b in keep_barcodes ] print( f'Keeping {len(cidx)} barcodes out of {len(self.barcodes)}' ) self.strand_ref = self.strand_ref[:, cidx] self.strand_alt = self.strand_alt[:, cidx] self.barcodes = self.barcodes[cidx] else: with tables.open_file(fd, mode='r') as fp: hp = _HDFStoreWithHandle(fp) self.snvs = hp.get('ann') self.snvs['snv_idx'] = NP.arange(0, len(self.snvs)) self.snvs['g1000_raw'] = self.snvs['g1000'] self.snvs['strand_change'] = self.snvs['strand_ref'] + self.snvs[ 'strand_alt'] if 'known' not in self.snvs.columns: self.snvs['known'] = '' self._build_annotations(groups)
def __init__(self, root, prefix=None): """ :param root: The directory containing the files :param prefix: The class assumes individual file names to be xxxx.h5. If there is a prefix to this, for example if the files are stack_xxxx.h5 this has to be passed to the object as a string, in this particular case it would be prefix="stack_" """ # Load information about stack and splitting. Use the json metadata # file if possible: self.root = Path(root) try: stack_meta_f = next(self.root.glob("*stack_metadata.json")) with open(str(stack_meta_f), "r") as f: block_metadata = json.load(f) except StopIteration: last_data_f = sorted(list(self.root.glob( "{}*.h5".format(prefix))))[-1] block_metadata = fl.load(str(last_data_f), "/stack_metadata") # Ugly keyword fix to handle transition to new json system: for new_k, old_k in zip(["shape_block", "shape_full"], ["block_size", "full_size"]): block_metadata[new_k] = block_metadata.pop(old_k) # By putting this here, we generate the proper stack_metadata # file when we open old version data (int conversion for some # weird format problem with flammkuchen dictionary): clean_metadata = dict() _save_metadata_json(block_metadata, self.root) for k in block_metadata.keys(): if isinstance(block_metadata[k], tuple): clean_metadata[k] = tuple( int(n) if n is not None else None for n in block_metadata[k]) else: clean_metadata[k] = block_metadata[k] with open(str(), "w") as f: json.dump(clean_metadata, f) # Start the parent BlockSplitter: super().__init__( shape_full=block_metadata["shape_full"], shape_block=block_metadata["shape_block"], ) if prefix is None: files = sorted(self.root.glob("*[0-9]*.h5")) else: files = sorted(self.root.glob("*{}_[0-9]*.h5".format(prefix))) self.files = np.array(files).reshape(self.block_starts.shape[:-1]) # If available, read resolution try: self.resolution = block_metadata["resolution"] except KeyError: self.resolution = (1, 1, 1) # TODO check this self.shape = self.shape_cropped
mversion = args['--mversion'] if speaker is not None and origin is not None: if mversion is None: mversion = metadata.speakers_info[speaker]['default_measurement'] mformat = metadata.speakers_info[speaker]['measurements'][mversion]['format'] brand = metadata.speakers_info[speaker]['brand'] df = {} df[speaker] = {} df[speaker][origin] = {} df[speaker][origin][mversion] = {} df[speaker][origin][mversion] = parse_graphs_speaker('./datas', brand, speaker, mformat, mversion) else: parse_max = args['--parse-max'] if parse_max is not None: parse_max = int(parse_max) df = fl.load('cache.parse_all_speakers.h5') if df is None: df = parse_all_speakers(metadata.speakers_info, origin, './datas', None, parse_max) if sanity_check(df, metadata.speakers_info) != 0: logging.error('Sanity checks failed!') sys.exit(1) # add computed data to metadata logging.info('Compute estimates per speaker') add_estimates(df) # check that json is valid #try: # json.loads(metadata.speakers_info) #except ValueError as ve: # logging.fatal('Metadata Json is not valid {0}'.format(ve))
ERA5 = '/work_users/b.legras/ERA5/STC' TPPdir = os.path.join(ERA5, 'TPP/LR') ENPsourceDir = os.path.join(ERA5, 'ENPsource') ENPdir = os.path.join(ERA5, 'ENP') tmpFile = 'tpp.grib' date = datetime(2017, 9, 1, 0) if (date.hour % 3) != 0: print('hour must be a multiple of 3') raise ValueError else: # read first tropopause file tppFile = date.strftime('TPP%y%m%d%H.hdf5') TPPfullName = os.path.join(TPPdir, date.strftime('%Y/%m'), tppFile) tppN = fl.load(TPPfullName) w1 = 1 / 3 w2 = 2 / 3 lons = np.arange(-10, 160.1, 0.25) lats = np.arange(0, 50.1, 0.25) while date < datetime(2017, 10, 1, 0): print('processing ', date) # Read of the tropopause data if (date.hour % 3) == 0: tppO = tppN.copy() # read the tropopause data for the next 3h slot
def reconstruct(fn, x): fl.save(fn, x) return fl.load(fn)
def load(self, filename, compression='blosc'): return AttrDict(flammkuchen.load(filename))
# Windows for stimulus cropping for reliability index, # padding with pre- and post- pause (in seconds): PRE_INT_S = 2 POST_INT_S = 5 # Windows for computing the average response, in seconds from stim start: BL_START_S = -2 BL_END_S = 0 RSP_START_S = 0 RSP_END_S = 4 # Microscope pixel size, not logged in metadata: PX_SIZE = 0.6 # Manually defined offsets for rigid transformation: all_offsets = fl.load(IMAGING_DATA_MASTER_PATH / "manual_alignment_offsets.h5") # Abbreviation of genotype from logging: GEN_ABBR_DICT = { "Huc:H2B-GCaMP6s;olig1:Ntr": "OPC-abl", "Huc:H2B-GCaMP6s": "MTZ-cnt" } # find all data-containing folders: path_list = [ f.parent for f in IMAGING_DATA_MASTER_PATH.glob("*/data_from_suite2p_unfiltered.h5") ] for path in tqdm(path_list): # Load experiment metadata using bouter class to read Stytra data: exp = EmbeddedExperiment(path)
def assert_array(fn, x): fl.save(fn, x) x1 = fl.load(fn) np.testing.assert_array_equal(x, x1)
def load(self, filename: Optional[str] = None): """Load output produced by DeepAudioSegmenter wrapper.""" if filename is None: filename = self.path onsets = [] offsets = [] names = [] res = flammkuchen.load(filename) if 'event_indices' in res or 'segment_labels' in res: # load old style format logging.info(' Converting legacy DAS format...') res['event_seconds'] = np.zeros((0, )) res['event_sequence'] = [] for event_name, event_idx in zip(res['event_names'], res['event_indices']): res['event_seconds'] = np.append( res['event_seconds'], np.array(event_idx) / res['samplerate_Hz']) res['event_sequence'] = np.append( res['event_sequence'], [event_name for _ in range(len(event_idx))]) res['segment_onsets_seconds'] = np.zeros((0, )) res['segment_offsets_seconds'] = np.zeros((0, )) res['segment_sequence'] = [] for segment_name, segment_labels in zip(res['segment_names'], res['segment_labels']): if 'sine' in segment_name: logging.info(f' Postprocessing {segment_name}') segment_labels = fill_gaps(segment_labels, gap_dur=0.02 * res['samplerate_Hz']) segment_labels = remove_short(segment_labels, min_len=0.02 * res['samplerate_Hz']) # detect on and offset from binary labels segment_onset_idx = np.where( np.diff(segment_labels.astype(np.float), prepend=0) == 1)[0].astype(np.float) segment_offset_idx = np.where( np.diff(segment_labels.astype(np.float), append=0) == -1)[0].astype(np.float) res['segment_onsets_seconds'] = np.append( res['segment_onsets_seconds'], segment_onset_idx / res['samplerate_Hz']) res['segment_offsets_seconds'] = np.append( res['segment_offsets_seconds'], segment_offset_idx / res['samplerate_Hz']) res['segment_sequence'] = np.append( res['segment_sequence'], [segment_name for _ in range(len(segment_onset_idx))]) for event_seconds, event_names in zip(res['event_seconds'], res['event_sequence']): onsets.append(event_seconds) offsets.append(event_seconds) names.append(event_names) if 'event_names' in res: # ensure empty event types are initialized for name in res['event_names']: if name not in names: names.append(name) onsets.append(np.nan) offsets.append(np.nan) for segment_onsets, segment_offsets, segment_names in zip( res['segment_onsets_seconds'], res['segment_offsets_seconds'], res['segment_sequence']): onsets.append(segment_onsets) offsets.append(segment_offsets) names.append(segment_names) if 'segment_names' in res: # ensure empty segment types are initialized for name in res['segment_names']: if name not in names: names.append(name) onsets.append(np.nan) offsets.append(0) et = annot.Events.from_lists(names=names, start_seconds=onsets, stop_seconds=offsets) return et, et.categories
# parameters for circle of subplots: ax_w = 0.12 ax_c = (0.5, 0.5) ax_r = 0.37 n_stims = 36 scaling_percentiles = [1, 99.89] xlims = (-2, 7) p_w = 0.22 # side of the central histogram path = IMAGING_DATA_MASTER_PATH / "210611_f5" # Load traces and experiment metadata: print("loading data...") rois = fl.load(path / "data_from_suite2p_unfiltered.h5", "/rois_stack") coords = fl.load(path / "data_from_suite2p_unfiltered.h5", "/coords") ot_mask = fl.load(path / "anatomy.mask", "/mask") exp = EmbeddedExperiment(path) cells_df = fl.load(path / "cell_df.h5") print("preprocessing traces...") traces = preprocess_traces( fl.load(path / "data_from_suite2p_unfiltered.h5", "/traces").T) # Read original frequency: fs = int(exp["imaging"]["microscope_config"]["lightsheet"]["scanning"]["z"] ["frequency"]) stim_df = stimulus_df_from_exp0070(exp)
import numpy as np import seaborn as sns from bouter import EmbeddedExperiment from matplotlib import pyplot as plt from xiao_et_al_utils.defaults import IMAGING_DATA_MASTER_PATH from xiao_et_al_utils.plotting_utils import add_fish, despine, save_figure sns.set(palette="deep", style="ticks") cols = sns.color_palette() path = IMAGING_DATA_MASTER_PATH / "210611_f5" # Load traces and experiment metadata: print("loading data...") anatomy = fl.load(path / "data_from_suite2p_unfiltered.h5", "/anatomy_stack") ot_mask = fl.load(path / "anatomy.mask", "/mask") exp = EmbeddedExperiment(path) print("generating figure...") fig_a = plt.figure(figsize=(2.5, 3)) xpos, ypos, side = 0.1, 0.7, 0.16 axs = [ fig_a.add_axes((xpos + side * 1.1 * i, ypos, side, side)) for i in range(5) ] clip_masks = [s["clip_mask"] for s in exp["stimulus"]["log"][1::2]] titles = ["4 s", "2 s", "4 s", "2 s", "4 s"] stimuli = [0, None, 20, None, 10]
import seaborn as sns from xiao_et_al_utils.defaults import IMAGING_DATA_MASTER_PATH, REL_SCORE_THR from xiao_et_al_utils.plotting_utils import add_fish, despine, save_figure sns.set(palette="deep", style="ticks") cols = sns.color_palette() def _shift_90_deg(array): out = array + np.pi / 2 out[out > np.pi] = -np.pi + np.mod(out[out > np.pi], np.pi) return out stim_thetas = np.array(fl.load(IMAGING_DATA_MASTER_PATH / "stim_pos.h5")) pooled_data = fl.load(IMAGING_DATA_MASTER_PATH / "pooled_dfs.h5", "/all_cells_df") all_responses = pooled_data.loc[ :, [f"rel_{i}" for i in range(len(stim_thetas))] ].values.T all_coords = pooled_data.loc[:, ["z_trasf", "x_trasf", "y_trasf"]].values all_in_tectum = pooled_data["in_tectum"].values responsive = all_responses.max(0) > REL_SCORE_THR all_peaks = np.argmax(all_responses, 0) fig_c = plt.figure(figsize=(4.5, 2)) m_xpos, m_ypos, xside, yside = 0.05, 0.1, 0.7, 0.7 anat_scatt_size = 1
import flammkuchen as fl import numpy as np import seaborn as sns from matplotlib import pyplot as plt from scipy.stats import ranksums from xiao_et_al_utils.defaults import IMAGING_DATA_MASTER_PATH, REL_SCORE_THR from xiao_et_al_utils.plotting_utils import despine, save_figure sns.set(palette="deep", style="ticks") cols = sns.color_palette() stim_thetas = np.array(fl.load(IMAGING_DATA_MASTER_PATH / "stim_pos.h5")) pooled_data_df = fl.load(IMAGING_DATA_MASTER_PATH / "pooled_dfs.h5", "/all_cells_df") exp_df = fl.load(IMAGING_DATA_MASTER_PATH / "pooled_dfs.h5", "/exp_df") popt = fl.load(IMAGING_DATA_MASTER_PATH / "gaussian_fit.h5", "/popt") fit_params = np.array(popt) for i, par_name in enumerate(["fit_amp", "fit_mn", "fit_sigma"]): pooled_data_df[par_name] = fit_params[:, i] pooled_data_df["fit_sigma"] = np.abs(pooled_data_df["fit_sigma"]) pooled_data_df["mean_sigma"] = np.nan for f in exp_df.index: s = pooled_data_df.loc[(pooled_data_df["fid"] == f) & (pooled_data_df["max_rel"] > REL_SCORE_THR) & pooled_data_df["in_tectum"], "fit_sigma", ]
def load_array(x): return fl.load(str(x), "/stack_3D")