def get_dendritic_trace_path(orig_file, check=True): """ get_dendritic_trace_path(orig_file) Returns path to traces for EXTRACT dendritic trace data. Required args: - orig_file (Path): path to allen ROI traces Optional args: - check (bool): if True, the existence of the dendritic file is checked default: True Returns: - dend_file (Path): path to corresponding EXTRACT dendritic ROI traces """ orig_file = Path(orig_file) filepath = Path(orig_file.parent, orig_file.stem) ext = orig_file.suffix dend_part = "_dendritic" dend_file = Path(f"{filepath}{dend_part}").with_suffix(ext) if check: file_util.checkfile(dend_file) return dend_file
def get_stim_fr_timestamps(stim_sync_h5, time_sync_h5=None, stim_align=None): """ get_stim_fr_timestamps(stim_sync_h5) Returns time stamps for stimulus frames, optionally adjusted to experiment start, recorded in 2-photon imaging timestamps. Adapted from allensdk.brain_observatory.running_processing.__main__.main(). Required args: - stim_sync_h5 (Path): full path name of the stimulus sync h5 file Optional args: - time_sync_h5 (Path) : full path to the time synchronization hdf5 file, used to adjust stimulus frame timestamps to experiment start default: None - stim_align (1D array): stimulus to 2p alignment array, used to adjust stimulus frame timestamps to experiment start default: None Returns: - stim_fr_timestamps (1D array): time stamp for each stimulus frame (seconds) """ # check that the sync file exists file_util.checkfile(stim_sync_h5) dataset = sync_dataset.Dataset(str(stim_sync_h5)) # Why the rising edge? See Sweepstim.update in camstim. This method does: # 1. updates the stimuli # 2. updates the "items", causing a running speed sample to be acquired # 3. sets the vsync line high # 4. flips the buffer stim_fr_timestamps = dataset.get_edges("rising", sync_dataset.Dataset.FRAME_KEYS, units="seconds") if time_sync_h5 is not None or stim_align is not None: if time_sync_h5 is None or stim_align is None: raise ValueError( "If providing time_sync_h5 or stim_align, must provide both.") stim_fr_timestamps = stim_fr_timestamps - stim_fr_timestamps[0] with h5py.File(time_sync_h5, "r") as f: twop_timestamps = f["twop_vsync_fall"][:] # Convert to the two photon reference frame offset = twop_timestamps[stim_align[0]] stim_fr_timestamps = offset + stim_fr_timestamps return stim_fr_timestamps
def get_dendritic_mask_path(maindir, sessid, expid, mouseid, runtype="prod", mouse_dir=True, check=True): """ get_dendritic_mask_path(maindir, sessid, expid, mouseid) Returns path to dendritic mask file. Required args: - maindir (Path): path of the main data directory - sessid (int) : session ID (9 digits), e.g. "712483302" - expid (str) : experiment ID (9 digits), e.g. "715925563" - date (str) : date for the session in YYYYMMDD e.g. "20160802" - mouseid (str) : mouse 6-digit ID string used for session files e.g. "389778" Optional args: - runtype (str) : "prod" (production) or "pilot" data default: "prod" - mouse_dir (bool): if True, session information is in a "mouse_*" subdirectory default: True - check (bool) : if True, checks whether the mask file exists default: True Returns: - maskfile (Path): full path name of the extract masks hdf5 file """ procdir = get_sess_dirs(maindir, sessid, expid, None, mouseid, runtype, mouse_dir, check=check)[2] maskfile = Path(procdir, f"{sessid}_dendritic_masks.h5") if check: file_util.checkfile(maskfile) return maskfile
def get_monitor_delay(stim_sync_h5): """ get_monitor_delay(stim_sync_h5) Returns monitor delay lag. Required args: - stim_sync_h5 (Path): full path name of the experiment sync hdf5 file """ # check if exists file_util.checkfile(stim_sync_h5) # create Dataset2p object which allows delay to be calculated monitor_display_lag = Dataset2p.Dataset2p(str(stim_sync_h5)).display_lag return monitor_display_lag
def get_vsync_falls(stim_sync_h5): """ get_vsync_falls(stim_sync_h5) Calculates vsyncs for 2p and stimulus frames. Required args: - stim_sync_h5 (Path): full path name of the experiment sync hdf5 file Returns: - stim_vsync_fall_adj (1D array) : vsyncs for each stimulus frame, adjusted by monitor delay - valid_twop_vsync_fall (1D array): vsyncs for each 2p frame """ # check that the sync file exists file_util.checkfile(stim_sync_h5) # create a Dataset object with the sync file # (ignore deprecated keys warning) with gen_util.TempWarningFilter("The loaded sync file", UserWarning): sync_data = sync_dataset.Dataset(str(stim_sync_h5)) sample_frequency = sync_data.meta_data["ni_daq"]["counter_output_freq"] # calculate the valid twop_vsync fall valid_twop_vsync_fall = Dataset2p.calculate_valid_twop_vsync_fall( sync_data, sample_frequency) # get the stim_vsync_fall stim_vsync_fall = Dataset2p.calculate_stim_vsync_fall( sync_data, sample_frequency) # find the delay # delay = calculate_delay(sync_data, stim_vsync_fall, sample_frequency) delay = get_monitor_delay(stim_sync_h5) # adjust stimulus time with monitor delay stim_vsync_fall_adj = stim_vsync_fall + delay return stim_vsync_fall_adj, valid_twop_vsync_fall
def get_nway_match_path_from_sessid(maindir, sessid, runtype="prod", check=True): """ get_nway_match_path_from_sessid(maindir, sessid) Returns the full path name for the nway match file in the main directory for the specified session. Required args: - maindir (path): main directory - sessid (int) : session ID Optional args: - runtype (str) : "prod" (production) or "pilot" data default: "prod" - check (bool) : if True, checks whether the files in the output dictionary exist default: True Returns: - nway_match_path (path): n-way match path """ sessdir, mouse_dir = get_sess_dir_path(maindir, sessid, runtype) mouseid = get_mouseid(sessdir, mouse_dir) expid = get_expid(sessdir) segid = get_segid(sessdir) _, _, procdir, _, _ = get_sess_dirs(maindir, sessid, expid, segid, mouseid, runtype, mouse_dir, check) nway_match_path = Path( procdir, f"mouse_{mouseid}__session_{sessid}__nway_matched_rois.json") if check: file_util.checkfile(nway_match_path) return nway_match_path
def get_run_velocity(stim_sync_h5, stim_pkl="", stim_dict=None, filter_ks=5): """ get_run_velocity(stim_sync_h5) Adapted from allensdk.brain_observatory.running_processing.__main__.main(). Loads and calculates the linear running velocity from the raw running data. Required args: - stim_sync_h5 (Path): full path name of the stimulus sync h5 file Optional args: - stim_pkl (Path) : full path name of the experiment stim pickle file default: "" - stim_dict (dict): stimulus dictionary, with keys "fps" and "items", from which running velocity is extracted. If not None, overrides pkl_file_name. default: None - filter_ks (int) : kernel size to use in median filtering the linear running velocity (0 to skip filtering). default: 5 Returns: - running_velocity (array): array of length equal to the number of stimulus frames, each element corresponds to the linear running velocity for that stimulus frame """ if stim_pkl == "" and stim_dict is None: raise ValueError("Must provide either the pickle file name or the " "stimulus dictionary.") if stim_dict is None: # check that the pickle file exists file_util.checkfile(stim_pkl) # read the input pickle file and call it "pkl" stim_dict = file_util.loadfile(stim_pkl) stim_fr_timestamps = get_stim_fr_timestamps(stim_sync_h5) # occasionally an extra set of frame times are acquired after the rest of # the signals. We detect and remove these stim_fr_timestamps = sync_utilities.trim_discontiguous_times( stim_fr_timestamps) num_raw_timestamps = len(stim_fr_timestamps) raw_running_deg = running_main.running_from_stim_file( stim_dict, "dx", num_raw_timestamps) if num_raw_timestamps != len(raw_running_deg): raise ValueError( f"found {num_raw_timestamps} rising edges on the vsync line, " f"but only {len(raw_running_deg)} rotation samples") use_median_duration = False use_filter_ks = filter_ks # for running alignement test analyses if TEST_RUNNING_BLIPS: logger.warning("Pre-processing running data using median duration " "and no filter, for testing purposes.") use_median_duration = True use_filter_ks = 0 running_velocity = calculate_running_velocity( stim_fr_timestamps=stim_fr_timestamps, raw_running_deg=raw_running_deg, wheel_radius=WHEEL_RADIUS, subject_position=SUBJECT_POSITION, use_median_duration=use_median_duration, filter_ks=use_filter_ks, ) return running_velocity
def get_stim_frames(pkl_file_name, stim_sync_h5, time_sync_h5, df_pkl_name, sessid, runtype="prod"): """ get_stim_frames(pkl_file_name, stim_sync_h5, time_sync_h5, df_pkl_name, sessid) Pulls out the stimulus frame information from the stimulus pickle file, as well as synchronization information from the stimulus sync file, and stores synchronized stimulus frame information in the output pickle file along with the stimulus alignment array. Required args: - pkl_file_name (Path): full path name of the experiment stim pickle file - stim_sync_h5 (Path): full path name of the experiment sync hdf5 file - time_sync_h5 (Path) : full path to the time synchronization hdf5 file - df_pkl_name (Path) : full path name of the output pickle file to create - sessid (int) : session ID, needed the check whether this session needs to be treated differently (e.g., for alignment bugs) Optional argument: - runtype (str) : the type of run, either "pilot" or "prod" default: "prod" """ # read the pickle file and call it "pkl" if isinstance(pkl_file_name, dict): pkl = pkl_file_name else: # check that the pickle file exists file_util.checkfile(pkl_file_name) pkl = file_util.loadfile(pkl_file_name, filetype="pickle") if runtype == "pilot": num_stimtypes = 2 # visual flow (bricks) and Gabors elif runtype == "prod": num_stimtypes = 3 # 2 visual flow (bricks) and 1 set of Gabors if len(pkl["stimuli"]) != num_stimtypes: raise ValueError(f"{num_stimtypes} stimuli types expected, but " f"{len(pkl['stimuli'])} found.") # get dataset object, sample frequency and vsyncs stim_vsync_fall_adj, valid_twop_vsync_fall = get_vsync_falls(stim_sync_h5) # calculate the alignment logger.info("Calculating stimulus alignment.") stimulus_alignment = Dataset2p.calculate_stimulus_alignment( stim_vsync_fall_adj, valid_twop_vsync_fall) # get the second stimulus alignment from sess_util.sess_load_util import load_beh_sync_h5_data second_stimulus_alignment = load_beh_sync_h5_data(time_sync_h5)[2] if len(second_stimulus_alignment) == len(stimulus_alignment) + 1: second_stimulus_alignment = second_stimulus_alignment[:-1] if int(sessid) in ADJUST_SECOND_ALIGNMENT: diff = second_stimulus_alignment - stimulus_alignment adjustment = scist.mode(diff)[0][0] # most frequent difference stimulus_alignment += adjustment # compare alignments compare_alignments(stimulus_alignment, second_stimulus_alignment) offset = int(pkl["pre_blank_sec"] * pkl["fps"]) logger.info("Creating the stim_df:") # get number of segments expected and actually recorded for each stimulus segs = [] segs_exp = [] frames_per_seg = [] stim_types = [] stim_type_names = [] for i in range(num_stimtypes): # records the max num of segs in the frame list for each stimulus segs.extend([np.max(pkl["stimuli"][i]["frame_list"]) + 1]) # calculates the expected number of segs based on fps, # display duration (s) and seg length fps = pkl["stimuli"][i]["fps"] if runtype == "pilot": name = pkl["stimuli"][i]["stimParams"]["elemParams"]["name"] elif runtype == "prod": name = pkl["stimuli"][i]["stim_params"]["elemParams"]["name"] stim_type_names.extend([name]) stim_types.extend([name[0]]) if name == "bricks": frames_per_seg.extend([fps]) segs_exp.extend([ int(60. * np.sum(np.diff(pkl["stimuli"][i]["display_sequence"])) / frames_per_seg[i]) ]) elif name == "gabors": frames_per_seg.extend([fps / 1000. * 300]) # to exclude grey seg segs_exp.extend([ int(60. * np.sum(np.diff(pkl["stimuli"][i]["display_sequence"])) / frames_per_seg[i] * 4. / 5) ]) else: raise ValueError(f"{name} stimulus type not recognized.") # check whether the actual number of frames is within a small range of # expected about two frames per sequence? n_seq = pkl["stimuli"][0]["display_sequence"].shape[0] * 2 if np.abs(segs[i] - segs_exp[i]) > n_seq: raise ValueError( f"Expected {segs_exp[i]} frames for stimulus {i}, " f"but found {segs[i]}.") total_stimsegs = np.sum(segs) stim_df = pd.DataFrame(index=list(range(np.sum(total_stimsegs))), columns=[ "stimType", "stimPar1", "stimPar2", "surp", "stimSeg", "GABORFRAME", "start_frame", "end_frame", "num_frames" ]) zz = 0 # For gray-screen pre_blank stim_df.loc[zz, "stimType"] = -1 stim_df.loc[zz, "stimPar1"] = -1 stim_df.loc[zz, "stimPar2"] = -1 stim_df.loc[zz, "surp"] = -1 stim_df.loc[zz, "stimSeg"] = -1 stim_df.loc[zz, "GABORFRAME"] = -1 stim_df.loc[zz, "start_frame"] = stimulus_alignment[0] # 2p start frame stim_df.loc[zz, "end_frame"] = stimulus_alignment[offset] # 2p end frame stim_df.loc[zz, "num_frames"] = \ (stimulus_alignment[offset] - stimulus_alignment[0]) zz += 1 for stype_n in range(num_stimtypes): logger.info(f"Stimtype: {stim_type_names[stype_n]}", extra={"spacing": TAB}) movie_segs = pkl["stimuli"][stype_n]["frame_list"] for segment in range(segs[stype_n]): seg_inds = np.where(movie_segs == segment)[0] tup = (segment, int(stimulus_alignment[seg_inds[0] + offset]), \ int(stimulus_alignment[seg_inds[-1] + 1 + offset])) stim_df.loc[zz, "stimType"] = stim_types[stype_n][0] stim_df.loc[zz, "stimSeg"] = segment stim_df.loc[zz, "start_frame"] = tup[1] stim_df.loc[zz, "end_frame"] = tup[2] stim_df.loc[zz, "num_frames"] = tup[2] - tup[1] get_seg_params(stim_types, stype_n, stim_df, zz, pkl, segment, runtype) zz += 1 # check whether any 2P frames are in associated to 2 stimuli overlap = np.any((np.sort(stim_df["start_frame"])[1:] - np.sort(stim_df["end_frame"])[:-1]) < 0) if overlap: raise ValueError("Some 2P frames associated with two stimulus " "segments.") # create a dictionary for pickling stim_dict = {"stim_df": stim_df, "stim_align": stimulus_alignment} # store in the pickle file try: file_util.saveinfo(stim_dict, df_pkl_name, overwrite=True) except: raise OSError(f"Could not save stimulus pickle file {df_pkl_name}")
def get_roi_trace_paths(maindir, sessid, expid, segid, mouseid, runtype="prod", mouse_dir=True, dendritic=False, check=True): """ get_roi_trace_paths(maindir, sessid, expid, segid, mouseid) Returns the full path names of all of the expected ROI trace files in the main directory. Required arguments: - maindir (Path): path of the main data directory - sessid (int) : session ID (9 digits) - expid (str) : experiment ID (9 digits) - segid (str) : segmentation ID (9 digits) - mouseid (str) : mouse 6-digit ID string used for session files e.g. "389778" Optional arguments - runtype (str) : "prod" (production) or "pilot" data default: "prod" - mouse_dir (bool): if True, session information is in a "mouse_*" subdirectory default: True - dendritic (bool): if True, paths are changed to EXTRACT dendritic version default: False - check (bool) : if True, checks whether the files in the output dictionary exist default: True Returns: - roi_trace_paths (dict): ROI trace paths dictionary ["demixed_trace_h5"] (Path) : full path to demixed trace hdf5 file ["neuropil_trace_h5"] (Path) : full path to neuropil trace hdf5 file ["roi_trace_h5"] (Path) : full path name of the ROI raw processed fluorescence trace hdf5 file ["roi_trace_dff_h5"] (Path) : full path name of the ROI dF/F trace hdf5 file ["unproc_roi_trace_h5"] (Path): full path to unprocessed ROI trace hdf5 file (data stored under "FC") """ _, expdir, procdir, demixdir, _ = get_sess_dirs(maindir, sessid, expid, segid, mouseid, runtype, mouse_dir, check) roi_trace_paths = { "unproc_roi_trace_h5": Path(procdir, "roi_traces.h5"), "neuropil_trace_h5": Path(procdir, "neuropil_traces.h5"), "demixed_trace_h5": Path(demixdir, f"{expid}_demixed_traces.h5"), "roi_trace_h5": Path(expdir, "neuropil_correction.h5"), "roi_trace_dff_h5": Path(expdir, f"{expid}_dff.h5"), } if dendritic: for key, val in roi_trace_paths.items(): roi_trace_paths[key] = get_dendritic_trace_path(val, check=check) elif check: for _, val in roi_trace_paths.items(): file_util.checkfile(val) return roi_trace_paths
def get_file_names(maindir, sessid, expid, segid, date, mouseid, runtype="prod", mouse_dir=True, check=True): """ get_file_names(maindir, sessionid, expid, date, mouseid) Returns the full path names of all of the expected data files in the main directory for the specified session and experiment on the given date that can be used for the Credit Assignment analysis. Required args: - maindir (Path): path of the main data directory - sessid (int) : session ID (9 digits) - expid (str) : experiment ID (9 digits) - segid (str) : segmentation ID (9 digits) - date (str) : date for the session in YYYYMMDD, e.g. "20160802" - mouseid (str) : mouse 6-digit ID string used for session files Optional args: - runtype (str) : "prod" (production) or "pilot" data default: "prod" - mouse_dir (bool): if True, session information is in a "mouse_*" subdirectory default: True - check (bool) : if True, checks whether the files and directories in the output dictionaries exist (with a few exceptions) default: True Returns: - dirpaths (dict): dictionary of directory paths ["expdir"] (Path) : full path name of the experiment directory ["procdir"] (Path) : full path name of the processed directory ["demixdir"] (Path): full path name of the demixed directory ["segdir"] (Path) : full path name of the segmentation directory - filepaths (dict): dictionary of file paths ["behav_video_h5"] (Path) : full path name of the behavioral hdf5 video file ["max_proj_png"] (Path) : full path to max projection of stack in png format ["pupil_video_h5"] (Path) : full path name of the pupil hdf5 video file ["roi_extract_json"] (Path) : full path name of the ROI extraction json ["roi_objectlist_txt"] (Path): full path to ROI object list txt ["stim_pkl"] (Path) : full path name of the stimulus pickle file ["stim_sync_h5"] (Path) : full path name of the stimulus synchronization hdf5 file ["time_sync_h5"] (Path) : full path name of the time synchronization hdf5 file Existence not checked: ["align_pkl"] (Path) : full path name of the stimulus alignment pickle file ["corrected_data_h5"] (Path) : full path name of the motion corrected 2p data hdf5 file ["roi_trace_h5"] (Path) : full path name of the ROI raw processed fluorescence trace hdf5 file (allen version) ["roi_trace_dff_h5"] (Path) : full path name of the ROI dF/F trace hdf5 file (allen version) ["zstack_h5"] (Path) : full path name of the zstack 2p hdf5 file """ sessdir, expdir, procdir, demixdir, segdir = get_sess_dirs( maindir, sessid, expid, segid, mouseid, runtype, mouse_dir, check) roi_trace_paths = get_roi_trace_paths( maindir, sessid, expid, segid, mouseid, runtype, mouse_dir, dendritic=False, check=False) # will check below, if required # set the file names sess_m_d = f"{sessid}_{mouseid}_{date}" dirpaths = { "expdir": expdir, "procdir": procdir, "segdir": segdir, "demixdir": demixdir } filepaths = { "align_pkl": Path(sessdir, f"{sess_m_d}_df.pkl"), "behav_video_h5": Path(sessdir, f"{sess_m_d}_video-0.h5"), "correct_data_h5": Path(procdir, "concat_31Hz_0.h5"), "max_proj_png": Path(procdir, "max_downsample_4Hz_0.png"), "pupil_video_h5": Path(sessdir, f"{sess_m_d}_video-1.h5"), "roi_extract_json": Path(procdir, f"{expid}_input_extract_traces.json"), "roi_trace_h5": roi_trace_paths["roi_trace_h5"], "roi_trace_dff_h5": roi_trace_paths["roi_trace_dff_h5"], "roi_objectlist_txt": Path(segdir, "objectlist.txt"), "stim_pkl": Path(sessdir, f"{sess_m_d}_stim.pkl"), "stim_sync_h5": Path(sessdir, f"{sess_m_d}_sync.h5"), "time_sync_h5": Path(expdir, f"{expid}_time_synchronization.h5"), "zstack_h5": Path(sessdir, f"{sessid}_zstack_column.h5"), } if check: # files not to check for (are created if needed or should be checked # when needed, due to size) no_check = [ "align_pkl", "correct_data_h5", "zstack_h5", "roi_trace_h5", "roi_trace_dff_h5" ] for key in filepaths.keys(): if key not in no_check: file_util.checkfile(filepaths[key]) return dirpaths, filepaths