Exemplo n.º 1
0
    def get_unit_analysis_metrics_for_session(self, session_id, annotate: bool = True, filter_by_validity: bool = True, **unit_filter_kwargs):
        """ Cache and return a table of analysis metrics calculated on each unit from a specified session. See
        get_session_table for a list of sessions.

        Parameters
        ----------
        session_id : int
            identifies the session from which to fetch analysis metrics.
        annotate : bool, optional
            if True, information from the annotated units table will be merged onto the outputs
        filter_by_validity : bool, optional
            Filter units used by analysis so that only 'valid' units are returned, by default True
        **unit_filter_kwargs :
            Additional keyword arguments that can be used to filter units (for power users).

        Returns
        -------
        metrics : pd.DataFrame
            Each row corresponds to a single unit, describing a set of analysis metrics calculated on that unit.

        """

        path = self.get_cache_path(None, self.SESSION_ANALYSIS_METRICS_KEY, session_id, session_id)
        fetch_metrics = partial(self.fetch_api.get_unit_analysis_metrics, ecephys_session_ids=[session_id])

        metrics = one_file_call_caching(path, fetch_metrics, write_metrics_csv, read_metrics_csv, num_tries=self.fetch_tries)

        if annotate:
            units = self.get_units(filter_by_validity=filter_by_validity, **unit_filter_kwargs)
            units = units[units["ecephys_session_id"] == session_id]
            metrics = pd.merge(units, metrics, left_index=True, right_index=True, how="inner")
            metrics.index.rename("ecephys_unit_id", inplace=True)

        return metrics
Exemplo n.º 2
0
    def _get_units(self, filter_by_validity: bool = True, **unit_filter_kwargs) -> pd.DataFrame:
        path = self.get_cache_path(None, self.UNITS_KEY)

        units = one_file_call_caching(path, self.fetch_api.get_units, write_csv, read_csv, num_tries=self.fetch_tries)
        units = units.rename(columns={
            'PT_ratio': 'waveform_PT_ratio',
            'amplitude': 'waveform_amplitude',
            'duration': 'waveform_duration',
            'halfwidth': 'waveform_halfwidth',
            'recovery_slope': 'waveform_recovery_slope',
            'repolarization_slope': 'waveform_repolarization_slope',
            'spread': 'waveform_spread',
            'velocity_above': 'waveform_velocity_above',
            'velocity_below': 'waveform_velocity_below',
            'l_ratio': 'L_ratio',
        })

        units = units[
            (units["amplitude_cutoff"] <= get_unit_filter_value("amplitude_cutoff_maximum", **unit_filter_kwargs))
            & (units["presence_ratio"] >= get_unit_filter_value("presence_ratio_minimum", **unit_filter_kwargs))
            & (units["isi_violations"] <= get_unit_filter_value("isi_violations_maximum", **unit_filter_kwargs))
        ]

        if "quality" in units.columns and filter_by_validity:
            units = units[units["quality"] == "good"]
            units.drop(columns="quality", inplace=True)

        if "ecephys_structure_id" in units.columns and unit_filter_kwargs.get("filter_out_of_brain_units", True):
            units = units[~(units["ecephys_structure_id"].isna())]

        return units
Exemplo n.º 3
0
 def get_natural_scene_template(self, number):
     return one_file_call_caching(
         self.get_cache_path(None, self.NATURAL_SCENE_KEY, number),
         partial(self.fetch_api.get_natural_scene_template, number=number),
         self.stream_writer,
         read_scene,
         num_tries=self.fetch_tries)
Exemplo n.º 4
0
 def get_experiment_table(
         self,
         suppress: Optional[List[str]] = None) -> pd.DataFrame:
     """
     Return summary table of all ophys_experiment_ids in the database.
     :param suppress: optional list of columns to drop from the resulting
         dataframe.
     :type suppress: list of str
     :rtype: pd.DataFrame
     """
     write_csv = partial(
         _write_csv,
         array_fields=["reporter_line", "driver_line"])
     read_csv = partial(
         _read_csv, index_col="ophys_experiment_id",
         array_fields=["reporter_line", "driver_line"],
         array_types=[str, str])
     path = self.get_cache_path(None, self.OPHYS_EXPERIMENTS_KEY)
     experiments = one_file_call_caching(
         path,
         self.fetch_api.get_experiment_table,
         write_csv, read_csv)
     if suppress:
         experiments.drop(columns=suppress, inplace=True, errors="ignore")
     return experiments
Exemplo n.º 5
0
 def _get_channels(self):
     path = self.get_cache_path(None, self.CHANNELS_KEY)
     return one_file_call_caching(path,
                                  self.fetch_api.get_channels,
                                  write_csv,
                                  read_csv,
                                  num_tries=self.fetch_tries)
Exemplo n.º 6
0
 def get_behavior_session_table(
         self,
         suppress: Optional[List[str]] = None) -> pd.DataFrame:
     """
     Return summary table of all behavior_session_ids in the database.
     :param suppress: optional list of columns to drop from the resulting
         dataframe.
     :type suppress: list of str
     :rtype: pd.DataFrame
     """
     read_csv = partial(
         _read_csv, index_col="behavior_session_id",
         array_fields=["reporter_line", "driver_line"],
         array_types=[str, str])
     write_csv = partial(
         _write_csv, array_fields=["reporter_line", "driver_line"])
     path = self.get_cache_path(None, self.BEHAVIOR_SESSIONS_KEY)
     sessions = one_file_call_caching(
         path,
         self.fetch_api.get_behavior_only_session_table,
         write_csv, read_csv)
     sessions = sessions.rename(columns={"genotype": "full_genotype"})
     if suppress:
         sessions.drop(columns=suppress, inplace=True, errors="ignore")
     return sessions
Exemplo n.º 7
0
 def get_natural_movie_template(self, number):
     return one_file_call_caching(
         self.get_cache_path(None, self.NATURAL_MOVIE_KEY, number),
         partial(self.fetch_api.get_natural_movie_template, number=number),
         write_from_stream,
         read_movie,
         num_tries=self.fetch_tries)
Exemplo n.º 8
0
 def _get_probes(self):
     path: str = self.get_cache_path(None, self.PROBES_KEY)
     probes = one_file_call_caching(path, self.fetch_api.get_probes, write_csv, read_csv, num_tries=self.fetch_tries)
     # Divide the lfp sampling by the subsampling factor for clearer presentation (if provided)
     if all(c in list(probes) for c in
            ["lfp_sampling_rate", "lfp_temporal_subsampling_factor"]):
         probes["lfp_sampling_rate"] = (
             probes["lfp_sampling_rate"] / probes["lfp_temporal_subsampling_factor"])
     return probes
Exemplo n.º 9
0
    def _get_sessions(self):
        path = self.get_cache_path(None, self.SESSIONS_KEY)
        response = one_file_call_caching(path, self.fetch_api.get_sessions, write_csv, read_csv, num_tries=self.fetch_tries)

        if "structure_acronyms" in response.columns:  # unfortunately, structure_acronyms is a list of str
            response["ecephys_structure_acronyms"] = [ast.literal_eval(item) for item in response["structure_acronyms"]]
            response.drop(columns=["structure_acronyms"], inplace=True)

        return response
Exemplo n.º 10
0
    def get_unit_analysis_metrics_by_session_type(
            self,
            session_type,
            annotate: bool = True,
            filter_by_validity: bool = True,
            **unit_filter_kwargs):
        """ Cache and return a table of analysis metrics calculated on each unit from a specified session type. See
        get_all_session_types for a list of session types.

        Parameters
        ----------
        session_type : str
            identifies the session type for which to fetch analysis metrics.
        annotate : bool, optional
            if True, information from the annotated units table will be merged onto the outputs
        filter_by_validity : bool, optional
            Filter units used by analysis so that only 'valid' units are returned, by default True
        **unit_filter_kwargs :
            Additional keyword arguments that can be used to filter units (for power users).

        Returns
        -------
        metrics : pd.DataFrame
            Each row corresponds to a single unit, describing a set of analysis metrics calculated on that unit.

        """

        known_session_types = self.get_all_session_types()
        if session_type not in known_session_types:
            raise ValueError(
                f"unrecognized session type: {session_type}. Available types: {known_session_types}"
            )

        path = self.get_cache_path(None, self.TYPEWISE_ANALYSIS_METRICS_KEY,
                                   session_type)
        fetch_metrics = partial(self.fetch_api.get_unit_analysis_metrics,
                                session_types=[session_type])

        metrics = one_file_call_caching(path,
                                        fetch_metrics,
                                        write_metrics_csv,
                                        read_metrics_csv,
                                        num_tries=self.fetch_tries)

        if annotate:
            units = self.get_units(filter_by_validity=filter_by_validity,
                                   **unit_filter_kwargs)
            metrics = pd.merge(units,
                               metrics,
                               left_index=True,
                               right_index=True,
                               how="inner")
            metrics.index.rename("ecephys_unit_id", inplace=True)

        return metrics
Exemplo n.º 11
0
    def get_session_data(self, session_id: int, filter_by_validity: bool = True, **unit_filter_kwargs):
        """ Obtain an EcephysSession object containing detailed data for a single session
        """

        def read(_path):
            session_api = self._build_nwb_api_for_session(_path, session_id, filter_by_validity, **unit_filter_kwargs)
            return EcephysSession(api=session_api, test=True)

        return one_file_call_caching(
            self.get_cache_path(None, self.SESSION_NWB_KEY, session_id, session_id),
            partial(self.fetch_api.get_session_data, session_id),
            self.stream_writer,
            read,
            num_tries=self.fetch_tries
        )
Exemplo n.º 12
0
    def get_session_table(self,
                          suppress: Optional[List[str]] = None,
                          by: str = "ophys_session_id") -> pd.DataFrame:
        """
        Return summary table of all ophys_session_ids in the database.
        :param suppress: optional list of columns to drop from the resulting
            dataframe.
        :type suppress: list of str
        :param by: (default="ophys_session_id"). Column to index on, either
            "ophys_session_id" or "ophys_experiment_id".
            If by="ophys_experiment_id", then each row will only have one
            experiment id, of type int (vs. an array of 1>more).
        :type by: str
        :rtype: pd.DataFrame
        """
        write_csv = partial(_write_csv,
                            array_fields=[
                                "reporter_line", "driver_line",
                                "ophys_experiment_id"
                            ])
        read_csv = partial(_read_csv,
                           index_col="ophys_session_id",
                           array_fields=[
                               "reporter_line", "driver_line",
                               "ophys_experiment_id"
                           ],
                           array_types=[str, str, int])
        path = self.get_cache_path(None, self.OPHYS_SESSIONS_KEY)
        sessions = one_file_call_caching(path,
                                         self.fetch_api.get_session_table,
                                         write_csv, read_csv)
        if suppress:
            sessions.drop(columns=suppress, inplace=True, errors="ignore")

        # Possibly explode and reindex
        if by == "ophys_session_id":
            pass
        elif by == "ophys_experiment_id":
            sessions = (sessions.reset_index().explode(
                "ophys_experiment_id").set_index("ophys_experiment_id"))
        else:
            self.logger.warning(
                f"Invalid value for `by`, '{by}', passed to get_session_table."
                " Valid choices for `by` are 'ophys_experiment_id' and "
                "'ophys_session_id'.")
        return sessions
Exemplo n.º 13
0
def test_one_file_call_caching(tmpdir_factory, existing):
    tmpdir = str(tmpdir_factory.mktemp("foo"))
    path = os.path.join(tmpdir, "baz.csv")

    getter = get_data
    data = getter()

    if existing:
        data.to_csv(path, index=False)
        getter = lambda: "foo"

    obtained = cu.one_file_call_caching(
        path,
        getter,
        lambda path, df: df.to_csv(path, index=False),
        lambda path: pd.read_csv(path),
        num_tries=2)

    pd.testing.assert_frame_equal(get_data(),
                                  obtained,
                                  check_like=True,
                                  check_dtype=False)
Exemplo n.º 14
0
    def get_session_data(self,
                         session_id: int,
                         filter_by_validity: bool = True,
                         **unit_filter_kwargs):
        """ Obtain an EcephysSession object containing detailed data for a single session
        """

        path = self.get_cache_path(None, self.SESSION_NWB_KEY, session_id,
                                   session_id)

        def read(_path):
            session_api = self._build_nwb_api_for_session(
                _path, session_id, filter_by_validity, **unit_filter_kwargs)
            return EcephysSession(api=session_api, test=True)

        Manifest.safe_make_parent_dirs(path)
        return one_file_call_caching(path,
                                     partial(self.s3fs.get,
                                             self._get_s3_path(path), path),
                                     lambda *a, **k: None,
                                     read,
                                     num_tries=self.fetch_tries)
Exemplo n.º 15
0
 def get_experiment_table(
         self,
         suppress: Optional[List[str]] = None) -> pd.DataFrame:
     """
     Return summary table of all ophys_experiment_ids in the database.
     :param suppress: optional list of columns to drop from the resulting
         dataframe.
     :type suppress: list of str
     :rtype: pd.DataFrame
     """
     if self.cache:
         path = self.get_cache_path(None, self.OPHYS_EXPERIMENTS_KEY)
         experiments = one_file_call_caching(
             path,
             self.fetch_api.get_experiment_table,
             _write_json, _read_json)
         experiments.set_index("ophys_experiment_id")
     else:
         experiments = self.fetch_api.get_experiment_table()
     if suppress:
         experiments.drop(columns=suppress, inplace=True, errors="ignore")
     return experiments
Exemplo n.º 16
0
    def get_behavior_session_table(
            self,
            suppress: Optional[List[str]] = None) -> pd.DataFrame:
        """
        Return summary table of all behavior_session_ids in the database.
        :param suppress: optional list of columns to drop from the resulting
            dataframe.
        :type suppress: list of str
        :rtype: pd.DataFrame
        """

        if self.cache:
            path = self.get_cache_path(None, self.BEHAVIOR_SESSIONS_KEY)
            sessions = one_file_call_caching(
                path,
                self.fetch_api.get_behavior_only_session_table,
                _write_json, _read_json)
            sessions.set_index("behavior_session_id")
        else:
            sessions = self.fetch_api.get_behavior_only_session_table()
        sessions = sessions.rename(columns={"genotype": "full_genotype"})
        if suppress:
            sessions.drop(columns=suppress, inplace=True, errors="ignore")
        return sessions