def get_unit_analysis_metrics_for_session(self, session_id, annotate: bool = True, filter_by_validity: bool = True, **unit_filter_kwargs): """ Cache and return a table of analysis metrics calculated on each unit from a specified session. See get_session_table for a list of sessions. Parameters ---------- session_id : int identifies the session from which to fetch analysis metrics. annotate : bool, optional if True, information from the annotated units table will be merged onto the outputs filter_by_validity : bool, optional Filter units used by analysis so that only 'valid' units are returned, by default True **unit_filter_kwargs : Additional keyword arguments that can be used to filter units (for power users). Returns ------- metrics : pd.DataFrame Each row corresponds to a single unit, describing a set of analysis metrics calculated on that unit. """ path = self.get_cache_path(None, self.SESSION_ANALYSIS_METRICS_KEY, session_id, session_id) fetch_metrics = partial(self.fetch_api.get_unit_analysis_metrics, ecephys_session_ids=[session_id]) metrics = one_file_call_caching(path, fetch_metrics, write_metrics_csv, read_metrics_csv, num_tries=self.fetch_tries) if annotate: units = self.get_units(filter_by_validity=filter_by_validity, **unit_filter_kwargs) units = units[units["ecephys_session_id"] == session_id] metrics = pd.merge(units, metrics, left_index=True, right_index=True, how="inner") metrics.index.rename("ecephys_unit_id", inplace=True) return metrics
def _get_units(self, filter_by_validity: bool = True, **unit_filter_kwargs) -> pd.DataFrame: path = self.get_cache_path(None, self.UNITS_KEY) units = one_file_call_caching(path, self.fetch_api.get_units, write_csv, read_csv, num_tries=self.fetch_tries) units = units.rename(columns={ 'PT_ratio': 'waveform_PT_ratio', 'amplitude': 'waveform_amplitude', 'duration': 'waveform_duration', 'halfwidth': 'waveform_halfwidth', 'recovery_slope': 'waveform_recovery_slope', 'repolarization_slope': 'waveform_repolarization_slope', 'spread': 'waveform_spread', 'velocity_above': 'waveform_velocity_above', 'velocity_below': 'waveform_velocity_below', 'l_ratio': 'L_ratio', }) units = units[ (units["amplitude_cutoff"] <= get_unit_filter_value("amplitude_cutoff_maximum", **unit_filter_kwargs)) & (units["presence_ratio"] >= get_unit_filter_value("presence_ratio_minimum", **unit_filter_kwargs)) & (units["isi_violations"] <= get_unit_filter_value("isi_violations_maximum", **unit_filter_kwargs)) ] if "quality" in units.columns and filter_by_validity: units = units[units["quality"] == "good"] units.drop(columns="quality", inplace=True) if "ecephys_structure_id" in units.columns and unit_filter_kwargs.get("filter_out_of_brain_units", True): units = units[~(units["ecephys_structure_id"].isna())] return units
def get_natural_scene_template(self, number): return one_file_call_caching( self.get_cache_path(None, self.NATURAL_SCENE_KEY, number), partial(self.fetch_api.get_natural_scene_template, number=number), self.stream_writer, read_scene, num_tries=self.fetch_tries)
def get_experiment_table( self, suppress: Optional[List[str]] = None) -> pd.DataFrame: """ Return summary table of all ophys_experiment_ids in the database. :param suppress: optional list of columns to drop from the resulting dataframe. :type suppress: list of str :rtype: pd.DataFrame """ write_csv = partial( _write_csv, array_fields=["reporter_line", "driver_line"]) read_csv = partial( _read_csv, index_col="ophys_experiment_id", array_fields=["reporter_line", "driver_line"], array_types=[str, str]) path = self.get_cache_path(None, self.OPHYS_EXPERIMENTS_KEY) experiments = one_file_call_caching( path, self.fetch_api.get_experiment_table, write_csv, read_csv) if suppress: experiments.drop(columns=suppress, inplace=True, errors="ignore") return experiments
def _get_channels(self): path = self.get_cache_path(None, self.CHANNELS_KEY) return one_file_call_caching(path, self.fetch_api.get_channels, write_csv, read_csv, num_tries=self.fetch_tries)
def get_behavior_session_table( self, suppress: Optional[List[str]] = None) -> pd.DataFrame: """ Return summary table of all behavior_session_ids in the database. :param suppress: optional list of columns to drop from the resulting dataframe. :type suppress: list of str :rtype: pd.DataFrame """ read_csv = partial( _read_csv, index_col="behavior_session_id", array_fields=["reporter_line", "driver_line"], array_types=[str, str]) write_csv = partial( _write_csv, array_fields=["reporter_line", "driver_line"]) path = self.get_cache_path(None, self.BEHAVIOR_SESSIONS_KEY) sessions = one_file_call_caching( path, self.fetch_api.get_behavior_only_session_table, write_csv, read_csv) sessions = sessions.rename(columns={"genotype": "full_genotype"}) if suppress: sessions.drop(columns=suppress, inplace=True, errors="ignore") return sessions
def get_natural_movie_template(self, number): return one_file_call_caching( self.get_cache_path(None, self.NATURAL_MOVIE_KEY, number), partial(self.fetch_api.get_natural_movie_template, number=number), write_from_stream, read_movie, num_tries=self.fetch_tries)
def _get_probes(self): path: str = self.get_cache_path(None, self.PROBES_KEY) probes = one_file_call_caching(path, self.fetch_api.get_probes, write_csv, read_csv, num_tries=self.fetch_tries) # Divide the lfp sampling by the subsampling factor for clearer presentation (if provided) if all(c in list(probes) for c in ["lfp_sampling_rate", "lfp_temporal_subsampling_factor"]): probes["lfp_sampling_rate"] = ( probes["lfp_sampling_rate"] / probes["lfp_temporal_subsampling_factor"]) return probes
def _get_sessions(self): path = self.get_cache_path(None, self.SESSIONS_KEY) response = one_file_call_caching(path, self.fetch_api.get_sessions, write_csv, read_csv, num_tries=self.fetch_tries) if "structure_acronyms" in response.columns: # unfortunately, structure_acronyms is a list of str response["ecephys_structure_acronyms"] = [ast.literal_eval(item) for item in response["structure_acronyms"]] response.drop(columns=["structure_acronyms"], inplace=True) return response
def get_unit_analysis_metrics_by_session_type( self, session_type, annotate: bool = True, filter_by_validity: bool = True, **unit_filter_kwargs): """ Cache and return a table of analysis metrics calculated on each unit from a specified session type. See get_all_session_types for a list of session types. Parameters ---------- session_type : str identifies the session type for which to fetch analysis metrics. annotate : bool, optional if True, information from the annotated units table will be merged onto the outputs filter_by_validity : bool, optional Filter units used by analysis so that only 'valid' units are returned, by default True **unit_filter_kwargs : Additional keyword arguments that can be used to filter units (for power users). Returns ------- metrics : pd.DataFrame Each row corresponds to a single unit, describing a set of analysis metrics calculated on that unit. """ known_session_types = self.get_all_session_types() if session_type not in known_session_types: raise ValueError( f"unrecognized session type: {session_type}. Available types: {known_session_types}" ) path = self.get_cache_path(None, self.TYPEWISE_ANALYSIS_METRICS_KEY, session_type) fetch_metrics = partial(self.fetch_api.get_unit_analysis_metrics, session_types=[session_type]) metrics = one_file_call_caching(path, fetch_metrics, write_metrics_csv, read_metrics_csv, num_tries=self.fetch_tries) if annotate: units = self.get_units(filter_by_validity=filter_by_validity, **unit_filter_kwargs) metrics = pd.merge(units, metrics, left_index=True, right_index=True, how="inner") metrics.index.rename("ecephys_unit_id", inplace=True) return metrics
def get_session_data(self, session_id: int, filter_by_validity: bool = True, **unit_filter_kwargs): """ Obtain an EcephysSession object containing detailed data for a single session """ def read(_path): session_api = self._build_nwb_api_for_session(_path, session_id, filter_by_validity, **unit_filter_kwargs) return EcephysSession(api=session_api, test=True) return one_file_call_caching( self.get_cache_path(None, self.SESSION_NWB_KEY, session_id, session_id), partial(self.fetch_api.get_session_data, session_id), self.stream_writer, read, num_tries=self.fetch_tries )
def get_session_table(self, suppress: Optional[List[str]] = None, by: str = "ophys_session_id") -> pd.DataFrame: """ Return summary table of all ophys_session_ids in the database. :param suppress: optional list of columns to drop from the resulting dataframe. :type suppress: list of str :param by: (default="ophys_session_id"). Column to index on, either "ophys_session_id" or "ophys_experiment_id". If by="ophys_experiment_id", then each row will only have one experiment id, of type int (vs. an array of 1>more). :type by: str :rtype: pd.DataFrame """ write_csv = partial(_write_csv, array_fields=[ "reporter_line", "driver_line", "ophys_experiment_id" ]) read_csv = partial(_read_csv, index_col="ophys_session_id", array_fields=[ "reporter_line", "driver_line", "ophys_experiment_id" ], array_types=[str, str, int]) path = self.get_cache_path(None, self.OPHYS_SESSIONS_KEY) sessions = one_file_call_caching(path, self.fetch_api.get_session_table, write_csv, read_csv) if suppress: sessions.drop(columns=suppress, inplace=True, errors="ignore") # Possibly explode and reindex if by == "ophys_session_id": pass elif by == "ophys_experiment_id": sessions = (sessions.reset_index().explode( "ophys_experiment_id").set_index("ophys_experiment_id")) else: self.logger.warning( f"Invalid value for `by`, '{by}', passed to get_session_table." " Valid choices for `by` are 'ophys_experiment_id' and " "'ophys_session_id'.") return sessions
def test_one_file_call_caching(tmpdir_factory, existing): tmpdir = str(tmpdir_factory.mktemp("foo")) path = os.path.join(tmpdir, "baz.csv") getter = get_data data = getter() if existing: data.to_csv(path, index=False) getter = lambda: "foo" obtained = cu.one_file_call_caching( path, getter, lambda path, df: df.to_csv(path, index=False), lambda path: pd.read_csv(path), num_tries=2) pd.testing.assert_frame_equal(get_data(), obtained, check_like=True, check_dtype=False)
def get_session_data(self, session_id: int, filter_by_validity: bool = True, **unit_filter_kwargs): """ Obtain an EcephysSession object containing detailed data for a single session """ path = self.get_cache_path(None, self.SESSION_NWB_KEY, session_id, session_id) def read(_path): session_api = self._build_nwb_api_for_session( _path, session_id, filter_by_validity, **unit_filter_kwargs) return EcephysSession(api=session_api, test=True) Manifest.safe_make_parent_dirs(path) return one_file_call_caching(path, partial(self.s3fs.get, self._get_s3_path(path), path), lambda *a, **k: None, read, num_tries=self.fetch_tries)
def get_experiment_table( self, suppress: Optional[List[str]] = None) -> pd.DataFrame: """ Return summary table of all ophys_experiment_ids in the database. :param suppress: optional list of columns to drop from the resulting dataframe. :type suppress: list of str :rtype: pd.DataFrame """ if self.cache: path = self.get_cache_path(None, self.OPHYS_EXPERIMENTS_KEY) experiments = one_file_call_caching( path, self.fetch_api.get_experiment_table, _write_json, _read_json) experiments.set_index("ophys_experiment_id") else: experiments = self.fetch_api.get_experiment_table() if suppress: experiments.drop(columns=suppress, inplace=True, errors="ignore") return experiments
def get_behavior_session_table( self, suppress: Optional[List[str]] = None) -> pd.DataFrame: """ Return summary table of all behavior_session_ids in the database. :param suppress: optional list of columns to drop from the resulting dataframe. :type suppress: list of str :rtype: pd.DataFrame """ if self.cache: path = self.get_cache_path(None, self.BEHAVIOR_SESSIONS_KEY) sessions = one_file_call_caching( path, self.fetch_api.get_behavior_only_session_table, _write_json, _read_json) sessions.set_index("behavior_session_id") else: sessions = self.fetch_api.get_behavior_only_session_table() sessions = sessions.rename(columns={"genotype": "full_genotype"}) if suppress: sessions.drop(columns=suppress, inplace=True, errors="ignore") return sessions