Python one_file_call_caching示例，allensdk.api.warehouse_cache.caching_utilities.one_file_call_caching Python示例

示例#1

0

显示文件

文件： ecephys_project_cache.py 项目： rgerkin/AllenSDK

 def get_natural_scene_template(self, number):
     return one_file_call_caching(
         self.get_cache_path(None, self.NATURAL_SCENE_KEY, number),
         partial(self.fetch_api.get_natural_scene_template, number=number),
         self.stream_writer,
         read_scene,
         num_tries=self.fetch_tries)

示例#2

0

显示文件

    def get_ophys_experiment_table(
            self,
            suppress: Optional[List[str]] = None,
            as_df=True) -> Union[pd.DataFrame, SessionsTable]:
        """
        Return summary table of all ophys_experiment_ids in the database.
        :param suppress: optional list of columns to drop from the resulting
            dataframe.
        :type suppress: list of str
        :param as_df: whether to return as df or as SessionsTable
        :rtype: pd.DataFrame
        """
        if isinstance(self.fetch_api, BehaviorProjectCloudApi):
            return self.fetch_api.get_ophys_experiment_table()
        if self.cache is not None:
            path = self.cache.get_cache_path(None,
                                             self.cache.OPHYS_EXPERIMENTS_KEY)
            experiments = one_file_call_caching(
                path, self.fetch_api.get_ophys_experiment_table, _write_json,
                lambda path: _read_json(path, index_name='ophys_experiment_id'
                                        ))
        else:
            experiments = self.fetch_api.get_ophys_experiment_table()

        # Merge behavior data in
        behavior_sessions_table = self.get_behavior_session_table(
            suppress=suppress, as_df=True, include_ophys_data=False)
        experiments = behavior_sessions_table.merge(
            experiments,
            left_index=True,
            right_on='behavior_session_id',
            suffixes=('_behavior', '_ophys'))
        experiments = ExperimentsTable(df=experiments, suppress=suppress)
        return experiments.table if as_df else experiments

示例#3

0

显示文件

文件： ecephys_project_cache.py 项目： rgerkin/AllenSDK

 def _get_channels(self):
     path = self.get_cache_path(None, self.CHANNELS_KEY)
     return one_file_call_caching(path,
                                  self.fetch_api.get_channels,
                                  write_csv,
                                  read_csv,
                                  num_tries=self.fetch_tries)

示例#4

0

显示文件

文件： behavior_project_cache.py 项目： matchings/AllenSDK

    def get_ophys_session_table(
            self,
            suppress: Optional[List[str]] = None,
            index_column: str = "ophys_session_id",
            as_df=True,
            include_behavior_data=True,
            passed_only=True) -> \
            Union[pd.DataFrame, BehaviorOphysSessionsTable]:
        """
        Return summary table of all ophys_session_ids in the database.
        :param suppress: optional list of columns to drop from the resulting
            dataframe.
        :type suppress: list of str
        :param index_column: (default="ophys_session_id"). Column to index
        on, either
            "ophys_session_id" or "ophys_experiment_id".
            If index_column="ophys_experiment_id", then each row will only have
            one experiment id, of type int (vs. an array of 1>more).
        :type index_column: str
        :param as_df: whether to return as df or as BehaviorOphysSessionsTable
        :param include_behavior_data
            Whether to include behavior data
        :rtype: pd.DataFrame
        """
        if isinstance(self.fetch_api, BehaviorProjectCloudApi):
            return self.fetch_api.get_ophys_session_table()
        if self.cache is not None:
            path = self.cache.get_cache_path(None,
                                             self.cache.OPHYS_SESSIONS_KEY)
            ophys_sessions = one_file_call_caching(
                path, self.fetch_api.get_ophys_session_table, _write_json,
                lambda path: _read_json(path, index_name='ophys_session_id'))
        else:
            ophys_sessions = self.fetch_api.get_ophys_session_table()

        if include_behavior_data:
            # Merge behavior data in
            behavior_sessions_table = self.get_behavior_session_table(
                suppress=suppress, as_df=True, include_ophys_data=False)
            ophys_sessions = behavior_sessions_table.merge(
                ophys_sessions,
                left_index=True,
                right_on='behavior_session_id',
                suffixes=('_behavior', '_ophys'))

        sessions = BehaviorOphysSessionsTable(df=ophys_sessions,
                                              suppress=suppress,
                                              index_column=index_column)
        if passed_only:
            oet = self.get_ophys_experiment_table(passed_only=True)
            for i in sessions.table.index:
                sub_df = oet.query(f"ophys_session_id=={i}")
                values = list(set(sub_df["ophys_container_id"].values))
                values.sort()
                sessions.table.at[i, "ophys_container_id"] = values

        return sessions.table if as_df else sessions

示例#5

0

显示文件

文件： ecephys_project_cache.py 项目： rgerkin/AllenSDK

    def get_unit_analysis_metrics_by_session_type(
            self,
            session_type,
            annotate: bool = True,
            filter_by_validity: bool = True,
            **unit_filter_kwargs):
        """ Cache and return a table of analysis metrics calculated on each unit from a specified session type. See
        get_all_session_types for a list of session types.

        Parameters
        ----------
        session_type : str
            identifies the session type for which to fetch analysis metrics.
        annotate : bool, optional
            if True, information from the annotated units table will be merged onto the outputs
        filter_by_validity : bool, optional
            Filter units used by analysis so that only 'valid' units are returned, by default True
        **unit_filter_kwargs :
            Additional keyword arguments that can be used to filter units (for power users).

        Returns
        -------
        metrics : pd.DataFrame
            Each row corresponds to a single unit, describing a set of analysis metrics calculated on that unit.

        """

        known_session_types = self.get_all_session_types()
        if session_type not in known_session_types:
            raise ValueError(
                f"unrecognized session type: {session_type}. Available types: {known_session_types}"
            )

        path = self.get_cache_path(None, self.TYPEWISE_ANALYSIS_METRICS_KEY,
                                   session_type)
        fetch_metrics = partial(self.fetch_api.get_unit_analysis_metrics,
                                session_types=[session_type])

        metrics = one_file_call_caching(path,
                                        fetch_metrics,
                                        write_metrics_csv,
                                        read_metrics_csv,
                                        num_tries=self.fetch_tries)

        if annotate:
            units = self.get_units(filter_by_validity=filter_by_validity,
                                   **unit_filter_kwargs)
            metrics = pd.merge(units,
                               metrics,
                               left_index=True,
                               right_index=True,
                               how="inner")
            metrics.index.rename("ecephys_unit_id", inplace=True)

        return metrics

示例#6

0

显示文件

文件： ecephys_project_cache.py 项目： rgerkin/AllenSDK

 def _get_probes(self):
     path: str = self.get_cache_path(None, self.PROBES_KEY)
     probes = one_file_call_caching(path,
                                    self.fetch_api.get_probes,
                                    write_csv,
                                    read_csv,
                                    num_tries=self.fetch_tries)
     # Divide the lfp sampling by the subsampling factor for clearer presentation (if provided)
     if all(c in list(probes) for c in
            ["lfp_sampling_rate", "lfp_temporal_subsampling_factor"]):
         probes["lfp_sampling_rate"] = (
             probes["lfp_sampling_rate"] /
             probes["lfp_temporal_subsampling_factor"])
     return probes

示例#7

0

显示文件

文件： ecephys_project_cache.py 项目： rgerkin/AllenSDK

    def get_unit_analysis_metrics_for_session(self,
                                              session_id,
                                              annotate: bool = True,
                                              filter_by_validity: bool = True,
                                              **unit_filter_kwargs):
        """ Cache and return a table of analysis metrics calculated on each unit from a specified session. See
        get_session_table for a list of sessions.

        Parameters
        ----------
        session_id : int
            identifies the session from which to fetch analysis metrics.
        annotate : bool, optional
            if True, information from the annotated units table will be merged onto the outputs
        filter_by_validity : bool, optional
            Filter units used by analysis so that only 'valid' units are returned, by default True
        **unit_filter_kwargs :
            Additional keyword arguments that can be used to filter units (for power users).

        Returns
        -------
        metrics : pd.DataFrame
            Each row corresponds to a single unit, describing a set of analysis metrics calculated on that unit.

        """

        path = self.get_cache_path(None, self.SESSION_ANALYSIS_METRICS_KEY,
                                   session_id, session_id)
        fetch_metrics = partial(self.fetch_api.get_unit_analysis_metrics,
                                ecephys_session_ids=[session_id])

        metrics = one_file_call_caching(path,
                                        fetch_metrics,
                                        write_metrics_csv,
                                        read_metrics_csv,
                                        num_tries=self.fetch_tries)

        if annotate:
            units = self.get_units(filter_by_validity=filter_by_validity,
                                   **unit_filter_kwargs)
            units = units[units["ecephys_session_id"] == session_id]
            metrics = pd.merge(units,
                               metrics,
                               left_index=True,
                               right_index=True,
                               how="inner")
            metrics.index.rename("ecephys_unit_id", inplace=True)

        return metrics

示例#8

0

显示文件

文件： behavior_project_cache.py 项目： matchings/AllenSDK

    def get_ophys_cells_table(self) -> pd.DataFrame:
        """
        Return summary table of all cells in this project cache
        :rtype: pd.DataFrame
        """
        if isinstance(self.fetch_api, BehaviorProjectCloudApi):
            return self.fetch_api.get_ophys_cells_table()
        if self.cache is not None:
            path = self.cache.get_cache_path(None, self.cache.OPHyS_CELLS_KEY)
            ophys_cells_table = one_file_call_caching(
                path, self.fetch_api.get_ophys_cells_table, _write_json,
                lambda path: _read_json(path, index_name='cell_roi_id'))
        else:
            ophys_cells_table = self.fetch_api.get_ophys_cells_table()

        return ophys_cells_table

示例#9

0

显示文件

文件： ecephys_project_cache.py 项目： rgerkin/AllenSDK

    def _get_sessions(self):
        path = self.get_cache_path(None, self.SESSIONS_KEY)
        response = one_file_call_caching(path,
                                         self.fetch_api.get_sessions,
                                         write_csv,
                                         read_csv,
                                         num_tries=self.fetch_tries)

        if "structure_acronyms" in response.columns:  # unfortunately, structure_acronyms is a list of str
            response["ecephys_structure_acronyms"] = [
                ast.literal_eval(item)
                for item in response["structure_acronyms"]
            ]
            response.drop(columns=["structure_acronyms"], inplace=True)

        return response

示例#10

0

显示文件

文件： ecephys_project_cache.py 项目： rgerkin/AllenSDK

    def get_session_data(self,
                         session_id: int,
                         filter_by_validity: bool = True,
                         **unit_filter_kwargs):
        """ Obtain an EcephysSession object containing detailed data for a single session
        """
        def read(_path):
            session_api = self._build_nwb_api_for_session(
                _path, session_id, filter_by_validity, **unit_filter_kwargs)
            return EcephysSession(api=session_api, test=True)

        return one_file_call_caching(self.get_cache_path(
            None, self.SESSION_NWB_KEY, session_id, session_id),
                                     partial(self.fetch_api.get_session_data,
                                             session_id),
                                     self.stream_writer,
                                     read,
                                     num_tries=self.fetch_tries)

示例#11

0

显示文件

文件： behavior_project_cache.py 项目： matchings/AllenSDK

    def get_behavior_session_table(
            self,
            suppress: Optional[List[str]] = None,
            as_df=True,
            include_ophys_data=True,
            passed_only=True) -> Union[pd.DataFrame, SessionsTable]:
        """
        Return summary table of all behavior_session_ids in the database.
        :param suppress: optional list of columns to drop from the resulting
            dataframe.
        :param as_df: whether to return as df or as SessionsTable
        :param include_ophys_data
            Whether to include ophys data
        :type suppress: list of str
        :rtype: pd.DataFrame
        """
        if isinstance(self.fetch_api, BehaviorProjectCloudApi):
            return self.fetch_api.get_behavior_session_table()
        if self.cache is not None:
            path = self.cache.get_cache_path(None,
                                             self.cache.BEHAVIOR_SESSIONS_KEY)
            sessions = one_file_call_caching(
                path, self.fetch_api.get_behavior_session_table, _write_json,
                lambda path: _read_json(path, index_name='behavior_session_id'
                                        ))
        else:
            sessions = self.fetch_api.get_behavior_session_table()

        if include_ophys_data:
            ophys_session_table = self.get_ophys_session_table(
                suppress=suppress,
                as_df=False,
                include_behavior_data=False,
                passed_only=passed_only)
        else:
            ophys_session_table = None
        sessions = SessionsTable(df=sessions,
                                 suppress=suppress,
                                 fetch_api=self.fetch_api,
                                 ophys_session_table=ophys_session_table)

        return sessions.table if as_df else sessions

示例#12

0

显示文件

文件： test_caching_utilities.py 项目： rgerkin/AllenSDK

def test_one_file_call_caching(tmpdir_factory, existing):
    tmpdir = str(tmpdir_factory.mktemp("foo"))
    path = os.path.join(tmpdir, "baz.csv")

    getter = get_data
    data = getter()

    if existing:
        data.to_csv(path, index=False)
        getter = lambda: "foo"

    obtained = cu.one_file_call_caching(
        path,
        getter,
        lambda path, df: df.to_csv(path, index=False),
        lambda path: pd.read_csv(path),
        num_tries=2
    )

    pd.testing.assert_frame_equal(get_data(), obtained, check_like=True, check_dtype=False)

示例#13

0

显示文件

文件： ecephys_project_cache.py 项目： rgerkin/AllenSDK

    def _get_units(self,
                   filter_by_validity: bool = True,
                   **unit_filter_kwargs) -> pd.DataFrame:
        path = self.get_cache_path(None, self.UNITS_KEY)

        units = one_file_call_caching(path,
                                      self.fetch_api.get_units,
                                      write_csv,
                                      read_csv,
                                      num_tries=self.fetch_tries)
        units = units.rename(
            columns={
                'PT_ratio': 'waveform_PT_ratio',
                'amplitude': 'waveform_amplitude',
                'duration': 'waveform_duration',
                'halfwidth': 'waveform_halfwidth',
                'recovery_slope': 'waveform_recovery_slope',
                'repolarization_slope': 'waveform_repolarization_slope',
                'spread': 'waveform_spread',
                'velocity_above': 'waveform_velocity_above',
                'velocity_below': 'waveform_velocity_below',
                'l_ratio': 'L_ratio',
            })

        units = units[(units["amplitude_cutoff"] <= get_unit_filter_value(
            "amplitude_cutoff_maximum", **unit_filter_kwargs))
                      & (units["presence_ratio"] >= get_unit_filter_value(
                          "presence_ratio_minimum", **unit_filter_kwargs))
                      & (units["isi_violations"] <= get_unit_filter_value(
                          "isi_violations_maximum", **unit_filter_kwargs))]

        if "quality" in units.columns and filter_by_validity:
            units = units[units["quality"] == "good"]
            units.drop(columns="quality", inplace=True)

        if "ecephys_structure_id" in units.columns and unit_filter_kwargs.get(
                "filter_out_of_brain_units", True):
            units = units[~(units["ecephys_structure_id"].isna())]

        return units