Пример #1
0
    def download(self, fpath: Optional[PathLike] = None, **kwargs: Any) -> Any:
        """Download the dataset into ``fpath``."""
        fpath = str(self.path if fpath is None else fpath)
        if not fpath.endswith(self._extension):
            fpath += self._extension

        if os.path.isfile(fpath):
            logg.debug(f"Loading dataset `{self.name}` from `{fpath}`")
        else:
            logg.debug(
                f"Downloading dataset `{self.name}` from `{self.url}` as `{fpath}`"
            )

        dirname = Path(fpath).parent
        try:
            if not dirname.is_dir():
                logg.info(f"Creating directory `{dirname}`")
                dirname.mkdir(parents=True, exist_ok=True)
        except OSError as e:
            logg.error(f"Unable to create directory `{dirname}`. Reason `{e}`")

        data = self._download(fpath=fpath, backup_url=self.url, **kwargs)

        if self.shape is not None and data.shape != self.shape:
            raise ValueError(
                f"Expected the data to have shape `{self.shape}`, found `{data.shape}`."
            )

        return data
Пример #2
0
    def density_normalize(
        self, other: Union[ndarray, spmatrix]
    ) -> Union[ndarray, spmatrix]:
        """
        Density normalization by the underlying KNN graph.

        Params
        ------
        other:
            Matrix to normalize.
        Returns
        -------
        :class:`np.ndarray` or :class:`scipy.sparse.spmatrix`
            Density normalized transition matrix.
        """

        logg.debug("DEBUG: Density-normalizing the transition matrix")

        q = np.asarray(self._conn.sum(axis=0))

        if not issparse(other):
            Q = np.diag(1.0 / q)
        else:
            Q = spdiags(1.0 / q, 0, other.shape[0], other.shape[0])

        return Q @ other @ Q
Пример #3
0
    def _set_iroot_via_xroot(self, xroot):
        """Determine the index of the root cell.

        Given an expression vector, find the observation index that is closest
        to this vector.

        Parameters
        ----------
        xroot : np.ndarray
            Vector that marks the root cell, the vector storing the initial
            condition, only relevant for computing pseudotime.
        """
        if self._adata.shape[1] != xroot.size:
            raise ValueError('The root vector you provided does not have the '
                             'correct dimension.')
        # this is the squared distance
        dsqroot = 1e10
        iroot = 0
        for i in range(self._adata.shape[0]):
            diff = self._adata.X[i, :] - xroot
            dsq = diff @ diff
            if dsq < dsqroot:
                dsqroot = dsq
                iroot = i
                if np.sqrt(dsqroot) < 1e-10: break
        logg.debug(f'setting root index to {iroot}')
        if self.iroot is not None and iroot != self.iroot:
            logg.warning(
                f'Changing index of iroot from {self.iroot} to {iroot}.')
        self.iroot = iroot
Пример #4
0
    def write_to_adata(self, key_added: Optional[str] = None):
        """
        Write the parameters and transition matrix to the underlying adata object.

        Params
        ------
        key_added
            Postfix to be added to :paramref`.adata` `.uns.

        Returns
        -------
        None
            Updates the underlying :paramref:`.adata` object with the following:
                - `.uns[:paramref:`T_{fwd, bwd}` _`:paramref:`key_added`]['T']` - transition matrix
                - `.uns[:paramref:`T_{fwd, bwd}` _`:paramref:`key_added`]['params']` - parameters used for calculation
        """

        if self.transition_matrix is None:
            raise ValueError(
                "Compute transition matrix first as `.compute_transition_matrix()`.`"
            )

        key = _transition(self._direction)
        if key_added is not None:
            key += f"_{key_added}"

        if self.adata.uns.get(key, None) is not None:
            logg.debug(f"DEBUG: Overwriting key `{key!r}` in `adata.uns`")

        self.adata.uns[key] = dict()
        self.adata.uns[key]["params"] = str(self)
        self.adata.uns[key]["T"] = self.transition_matrix

        logg.debug(f"DEBUG: Added `{key!r}` to `adata.uns`")
Пример #5
0
    def _(self,
          img: xr.DataArray,
          copy: bool = True,
          **_: Any) -> xr.DataArray:
        logg.debug(f"Loading data `xarray.DataArray` of shape `{img.shape}`")

        if img.ndim == 2:
            img = img.expand_dims("channels", -1)
        if img.ndim != 3:
            raise ValueError(
                f"Expected image to have `3` dimensions, found `{img.ndim}`.")

        mapping: Dict[Hashable, str] = {}
        if "y" not in img.dims:
            logg.warning(
                f"Dimension `y` not found in the data. Assuming it's `{img.dims[0]}`"
            )
            mapping[img.dims[0]] = "y"
        if "x" not in img.dims:
            logg.warning(
                f"Dimension `x` not found in the data. Assuming it's `{img.dims[1]}`"
            )
            mapping[img.dims[1]] = "x"

        img = img.rename(mapping)
        channel_dim = [d for d in img.dims if d not in ("y", "x")][0]
        try:
            img = img.reset_index(dims_or_levels=channel_dim, drop=True)
        except KeyError:
            # might not be present, ignore
            pass

        return img.copy() if copy else img
Пример #6
0
    def __init__(
        self,
        adata: AnnData,
        *,
        metric: Literal["alignment", "identity", "levenshtein"] = "identity",
        cutoff: float = 0,
        receptor_arms: Literal["TRA", "TRB", "all", "any"] = "all",
        dual_tcr: Literal["primary_only", "all", "any"] = "primary_only",
        sequence: Literal["aa", "nt"] = "aa",
    ):
        """Class to compute Neighborhood graphs of CDR3 sequences. 

        For documentation of the parameters, see :func:`tcr_neighbors`. 
        """
        if metric == "identity" and cutoff != 0:
            raise ValueError("Identity metric only works with cutoff = 0")
        if sequence == "nt" and metric == "alignment":
            raise ValueError(
                "Using nucleotide sequences with alignment metric is not supported. "
            )
        self.adata = adata
        self.metric = metric
        self.cutoff = cutoff
        self.receptor_arms = receptor_arms
        self.dual_tcr = dual_tcr
        self.sequence = sequence
        self._build_index_dict()
        self._dist_mat = None
        logging.debug("Finished initalizing TcrNeighbors object. ")
Пример #7
0
    def _(self, img: np.ndarray, **_: Any) -> xr.DataArray:
        logg.debug(f"Loading data `numpy.array` of shape `{img.shape}`")

        if img.ndim == 2:
            img = img[:, :, np.newaxis]
        if img.ndim != 3:
            raise ValueError(f"Expected image to have `3` dimensions, found `{img.ndim}`.")

        return xr.DataArray(img, dims=["y", "x", "channels"])
Пример #8
0
    def _trim_data(self) -> None:
        """Subset genes :attr:`_data` to those present in interactions."""
        if TYPE_CHECKING:
            assert isinstance(self._data, pd.DataFrame)
            assert isinstance(self.interactions, pd.DataFrame)

        logg.debug("DEBUG: Removing genes not in any interaction")
        self._filtered_data = self._data.loc[:,
                                             set(self.interactions[SOURCE])
                                             | set(self.interactions[TARGET])]
Пример #9
0
    def _read_from_adata(self, time_key: str, **kwargs):
        super()._read_from_adata(variance_key="palantir", **kwargs)
        if time_key not in self.adata.obs.keys():
            raise KeyError(f"Could not find time key `{time_key!r}` in `adata.obs`.")
        logg.debug("Adding `.pseudotime`")

        self.pseudotime = np.array(self.adata.obs[time_key]).astype(_dtype)

        if np.min(self.pseudotime) < 0:
            raise ValueError(f"Pseudotime must be positive")
Пример #10
0
    def _write_eig_to_adata(self, eig):
        # write to class and AnnData object
        if self._eig is not None:
            logg.debug("DEBUG: Overwriting `.eigendecomposition`")
        else:
            logg.debug(
                f"DEBUG: Adding `.eigendecomposition` and `adata.uns['eig_{self._direction}']`"
            )

        self._eig = eig
        self._adata.uns[f"eig_{self._direction}"] = eig
Пример #11
0
    def _(self, img: Pathlike_t, chunks: Optional[int] = None, **_: Any) -> Optional[xr.DataArray]:
        def transform_metadata(data: xr.Dataset) -> xr.Dataset:
            data.attrs[Key.img.coords] = CropCoords.from_tuple(data.attrs.get(Key.img.coords, _NULL_COORDS.to_tuple()))
            data.attrs[Key.img.padding] = CropPadding.from_tuple(
                data.attrs.get(Key.img.padding, _NULL_PADDING.to_tuple())
            )
            if Key.img.mask_circle not in data.attrs:
                data.attrs[Key.img.mask_circle] = False

            if Key.img.scale not in data.attrs:
                data.attrs[Key.img.scale] = 1

            return data

        img = Path(img)
        logg.debug(f"Loading data from `{img}`")

        if not img.exists():
            raise OSError(f"Path `{img}` does not exist.")

        suffix = img.suffix.lower()

        if suffix in (".jpg", ".jpeg"):
            return self._load_img(imread(str(img)))

        if img.is_dir():
            if len(self._data):
                raise ValueError("Loading data from `Zarr` store is disallowed if the container is not empty.")

            self._data = transform_metadata(xr.open_zarr(str(img), chunks=chunks))
            return None

        if suffix in (".nc", ".cdf"):
            if len(self._data):
                raise ValueError("Loading data from `NetCDF` is disallowed if the container is not empty.")

            self._data = transform_metadata(xr.open_dataset(img, chunks=chunks))
            return None

        if suffix in (".tif", ".tiff"):
            # calling _load_img ensures we can safely do the transpose
            return self._load_img(
                xr.concat(
                    [
                        _open_rasterio(f"GTIFF_DIR:{i}:{img}", chunks=chunks, parse_coordinates=False)
                        for i in range(1, _num_pages(img) + 1)
                    ],
                    dim="band",
                ),
                copy=False,
            ).transpose("y", "x", ...)

        raise ValueError(f"Unknown suffix `{img.suffix}`.")
Пример #12
0
def test_formats(capsys, logging_state):
    s.logfile = sys.stderr
    s.verbosity = Verbosity.debug
    l.error('0')
    assert capsys.readouterr().err == 'ERROR: 0\n'
    l.warning('1')
    assert capsys.readouterr().err == 'WARNING: 1\n'
    l.info('2')
    assert capsys.readouterr().err == '2\n'
    l.hint('3')
    assert capsys.readouterr().err == '--> 3\n'
    l.debug('4')
    assert capsys.readouterr().err == '    4\n'
Пример #13
0
    def _filter_interactions_by_genes(self) -> None:
        """Subset :attr:`interactions` to only those for which we have the data."""
        if TYPE_CHECKING:
            assert isinstance(self.interactions, pd.DataFrame)

        logg.debug("DEBUG: Removing interactions with no genes in the data")
        self._interactions = self.interactions[
            self.interactions[SOURCE].isin(self._data.columns)
            & self.interactions[TARGET].isin(self._data.columns)]

        if self.interactions.empty:
            raise ValueError(
                "After filtering by genes, no interactions remain.")
Пример #14
0
 def compute_transitions(self):
     vkey = self.vkey + '_graph'
     if vkey not in self._adata.uns:
         if 'velocyto_transitions' in self._adata.uns:
             self._adata.uns[vkey] = self._adata.uns['velocyto_transitions']
             sclogg.debug(
                 "The key 'velocyto_transitions' has been changed to 'velocity_graph'."
             )
         else:
             raise ValueError(
                 'The passed AnnData needs to have an `uns` annotation '
                 "with key 'velocity_graph' - a sparse matrix from RNA velocity."
             )
     if self._adata.uns[vkey].shape != (self._adata.n_obs,
                                        self._adata.n_obs):
         raise ValueError(
             f"The passed 'velocity_graph' have shape {self._adata.uns[vkey].shape} "
             f"but shoud have shape {(self._adata.n_obs, self._adata.n_obs)}"
         )
     # restore this at some point
     # if 'expected_n_edges_random' not in self._adata.uns['paga']:
     #     raise ValueError(
     #         'Before running PAGA with `use_rna_velocity=True`, run it with `False`.')
     import igraph
     from scanpy.utils import get_igraph_from_adjacency, get_sparse_from_igraph
     g = get_igraph_from_adjacency(self._adata.uns[vkey].astype('bool'),
                                   directed=True)
     vc = igraph.VertexClustering(
         g, membership=self._adata.obs[self._groups_key].cat.codes.values)
     # set combine_edges to False if you want self loops
     cg_full = vc.cluster_graph(combine_edges='sum')
     transitions = get_sparse_from_igraph(cg_full, weight_attr='weight')
     transitions = transitions - transitions.T
     transitions_conf = transitions.copy()
     transitions = transitions.tocoo()
     total_n = self._neighbors.n_neighbors * np.array(vc.sizes())
     # total_n_sum = sum(total_n)
     # expected_n_edges_random = self._adata.uns['paga']['expected_n_edges_random']
     for i, j, v in zip(transitions.row, transitions.col, transitions.data):
         # if expected_n_edges_random[i, j] != 0:
         #     # factor 0.5 because of asymmetry
         #     reference = 0.5 * expected_n_edges_random[i, j]
         # else:
         #     # approximate
         #     reference = self._neighbors.n_neighbors * total_n[i] * total_n[j] / total_n_sum
         reference = np.sqrt(total_n[i] * total_n[j])
         transitions_conf[i, j] = 0 if v < 0 else v / reference
     transitions_conf.eliminate_zeros()
     # transpose in order to match convention of stochastic matrices
     # entry ij means transition from j to i
     self.transitions_confidence = transitions_conf.T
Пример #15
0
    def _read_from_adata(self, **kwargs):
        """
        Import the base-KNN graph and check for symmetry and connectivity.
        """

        if not has_neighs(self.adata):
            raise KeyError("Compute KNN graph first as `scanpy.pp.neighbors()`.")

        self._conn = get_neighs(self.adata, "connectivities").astype(_dtype)

        start = logg.debug("Checking the KNN graph for connectedness")
        if not is_connected(self._conn):
            logg.warning("KNN graph is not connected", time=start)

        start = logg.debug("Checking the KNN graph for symmetry")
        if not is_symmetric(self._conn):
            logg.warning("KNN graph is not symmetric", time=start)

        variance_key = kwargs.pop("variance_key", None)
        if variance_key is not None:
            logg.debug(f"DEBUG: Loading variances from `adata.uns[{variance_key!r}]`")
            variance_key = f"{variance_key}_variances"
            if variance_key in self.adata.uns.keys():
                # keep it sparse
                self._variances = csr_matrix(
                    self.adata.uns[variance_key].astype(_dtype)
                )
            else:
                self._variances = None
                logg.debug(
                    f"DEBUG: Unable to load variances`{variance_key}` from `adata.uns`"
                )
        else:
            logg.debug("DEBUG: No variance key specified")
Пример #16
0
def partition(conn: Union[nx.DiGraph, np.ndarray, spmatrix],
              sort: bool = True) -> Tuple[List[List[Any]], List[List[Any]]]:
    """
    Partition a directed graph into its transient and recurrent classes.

    In a directed graph *G*, node *j* is accessible from node *i* if there exists a path from *i* to *j*.
    If *i* is accessible from *j* and the converse holds as well, then *i* and *j* communicate.
    Communication forms and equivalence relation on directed graphs, so every directed graph can be uniquely partitioned
    into its communication classes (also called strongly connected components).

    If *G* describes the state space of a Markov chain, then communication classes are often
    characterized as either recurrent or transient. Intuitively, once the process enters a recurrent class, it will
    never leave it again. See [Tolver16]_ for more formal definition.

    Params
    ------
    conn
        Directed graph to partition.

    Returns
    -------
    (:class:`list`, :class:`list`)
        Recurrent and transient classes respectively.
    """

    start = logg.debug(
        "Partitioning the graph into current and transient classes")

    def partition(g):
        yield from ((
            (sorted(scc) if sort else scc),
            all((not nx.has_path(g, s, t)
                 for s, t in product(scc, g.nodes - scc))),
        ) for scc in nx.strongly_connected_components(g))

    def maybe_sort(iterable):
        return (sorted(iterable, key=lambda x:
                       (-len(x), x[0])) if sort else list(map(list, iterable)))

    rec_classes, trans_classes = tee(
        partition(
            nx.DiGraph(conn) if not isinstance(conn, nx.DiGraph) else conn), 2)

    rec_classes = (node for node, is_rec in rec_classes if is_rec)
    trans_classes = (node for node, is_rec in trans_classes if not is_rec)

    logg.debug("    Finish", time=start)

    return maybe_sort(rec_classes), maybe_sort(trans_classes)
Пример #17
0
    def _cell_dist_mat_reduce(self):
        """Compute the distance matrix by using custom reduction functions. 
        More flexible than `_build_cell_dist_mat_min`, but requires more memory.
        Reduce dual is called before reduce arms. 
        """
        coord_dict = dict()

        def _add_to_dict(d, c1, c2, cell_row, cell_col, value):
            """Add a value to the nested coord dict"""
            try:
                tmp_dict = d[(cell_row, cell_col)]
                try:
                    tmp_dict2 = tmp_dict[arm]
                    try:
                        if (c1, c2) in tmp_dict2:
                            # can be in arbitrary order apprarently
                            assert (c2, c1) not in tmp_dict2
                            tmp_dict2[(c2, c1)] = value
                        tmp_dict2[(c1, c2)] = value
                    except KeyError:
                        tmp_dict2 = {(c1, c2): value}
                except KeyError:
                    tmp_dict[arm] = {(c1, c2): value}
            except KeyError:
                d[(cell_row, cell_col)] = {arm: {(c1, c2): value}}

        for arm, arm_info in self.index_dict.items():
            dist_mat, seq_to_cell, chain_inds = (
                arm_info["dist_mat"],
                arm_info["seq_to_cell"],
                arm_info["chain_inds"],
            )
            for row, col, value in zip(dist_mat.row, dist_mat.col, dist_mat.data):
                for c1, c2 in itertools.product(chain_inds, repeat=2):
                    for cell_row, cell_col in itertools.product(
                        seq_to_cell[c1][row], seq_to_cell[c2][col]
                    ):
                        # fill upper diagonal. Important: these are dist-mat row,cols
                        # not cell-mat row cols. This is required, because the
                        # itertools.product returns all combinations for the diagonal
                        # but not for the other values.
                        _add_to_dict(coord_dict, c1, c2, cell_row, cell_col, value)
                        if row != col:
                            _add_to_dict(coord_dict, c1, c2, cell_col, cell_row, value)

        logging.debug("Finished constructing coord-dictionary")

        yield from self._reduce_coord_dict(coord_dict)
Пример #18
0
    def maybe_create_lineage(direction: Direction):
        lin_key = str(LinKey.FORWARD if direction ==
                      Direction.FORWARD else LinKey.BACKWARD)
        names_key, colors_key = _lin_names(lin_key), _colors(lin_key)
        if lin_key in adata.obsm.keys():
            n_cells, n_lineages = adata.obsm[lin_key].shape
            logg.info(
                f"Creating {'forward' if direction == Direction.FORWARD else 'backward'} `Lineage` object"
            )

            if names_key not in adata.uns.keys():
                logg.warning(
                    f"Lineage names not found in `adata.uns[{names_key!r}]`, creating dummy names"
                )
                names = [f"Lineage {i}" for i in range(n_lineages)]
            elif len(adata.uns[names_key]) != n_lineages:
                logg.warning(
                    f"Lineage names are don't have the required length ({n_lineages}), creating dummy names"
                )
                names = [f"Lineage {i}" for i in range(n_lineages)]
            else:
                logg.info("Succesfully loaded names")
                names = adata.uns[names_key]

            if colors_key not in adata.uns.keys():
                logg.warning(
                    f"Lineage colors not found in `adata.uns[{colors_key!r}]`, creating new colors"
                )
                colors = _create_categorical_colors(n_lineages)
            elif len(adata.uns[colors_key]) != n_lineages or not all(
                    map(lambda c: is_color_like(c), adata.uns[colors_key])):
                logg.warning(
                    f"Lineage colors don't have the required length ({n_lineages}) "
                    f"or are not color-like, creating new colors")
                colors = _create_categorical_colors(n_lineages)
            else:
                logg.info("Succesfully loaded colors")
                colors = adata.uns[colors_key]

            adata.obsm[lin_key] = Lineage(adata.obsm[lin_key],
                                          names=names,
                                          colors=colors)
            adata.uns[colors_key] = colors
            adata.uns[names_key] = names
        else:
            logg.debug(
                f"DEBUG: Unable to load {'forward' if direction == Direction.FORWARD else 'backward'} "
                f"`Lineage` from `adata.obsm[{lin_key!r}]`")
Пример #19
0
    def _read_from_adata(self, vkey: str, **kwargs):
        super()._read_from_adata(variance_key="velocity", **kwargs)
        if (vkey + "_graph" not in self.adata.uns.keys()) or (
            vkey + "_graph_neg" not in self.adata.uns.keys()
        ):
            raise KeyError(
                "Compute cosine correlations first as `scvelo.tl.velocity_graph()`."
            )

        velo_corr_pos, velo_corr_neg = (
            csr_matrix(self.adata.uns[vkey + "_graph"]).copy(),
            csr_matrix(self.adata.uns[vkey + "_graph_neg"]).copy(),
        )
        logg.debug("Adding `.velo_corr`, the velocity correlations")

        self.velo_corr = (velo_corr_pos + velo_corr_neg).astype(_dtype)
Пример #20
0
def test_logfile(tmp_path, logging_state):
    s.verbosity = Verbosity.hint

    io = StringIO()
    s.logfile = io
    assert s.logfile is io
    assert s.logpath is None
    l.error('test!')
    assert io.getvalue() == 'ERROR: test!\n'

    p = tmp_path / 'test.log'
    s.logpath = p
    assert s.logpath == p
    assert s.logfile.name == str(p)
    l.hint('test2')
    l.debug('invisible')
    assert s.logpath.read_text() == '--> test2\n'
Пример #21
0
def _chain_pairing(
    adata: AnnData,
    mask_ambiguous: np.ndarray,
    mask_has_ir: np.ndarray,
    mask_multichain: np.ndarray,
) -> np.ndarray:
    """Annotate chain pairing categories.

    Parameters:
    -----------
    mask_ambiguous
        boolean array of the same length as `adata.obs`, marking
        which cells have an ambiguous receptor configuration.
    """
    x = adata.obs
    string_length = len("two full chains")
    results = np.empty(dtype=f"<U{string_length}", shape=(x.shape[0], ))

    logging.debug("Done initalizing")

    mask_has_vj1 = ~_is_na(x["IR_VJ_1_junction_aa"].values)
    mask_has_vdj1 = ~_is_na(x["IR_VDJ_1_junction_aa"].values)
    mask_has_vj2 = ~_is_na(x["IR_VJ_2_junction_aa"].values)
    mask_has_vdj2 = ~_is_na(x["IR_VDJ_2_junction_aa"].values)

    logging.debug("Done with masks")

    for m in [mask_has_vj1, mask_has_vdj1, mask_has_vj2, mask_has_vdj2]:
        # no cell can have a junction_aa sequence but no TCR
        assert np.setdiff1d(np.where(m)[0], np.where(mask_has_ir)[0]).size == 0

    results[~mask_has_ir] = "no IR"
    results[mask_has_vj1] = "orphan VJ"
    results[mask_has_vdj1] = "orphan VDJ"
    results[mask_has_vj1 & mask_has_vdj1] = "single pair"
    results[mask_has_vj1 & mask_has_vdj1 & mask_has_vj2] = "extra VJ"
    results[mask_has_vj1 & mask_has_vdj1 & mask_has_vdj2] = "extra VDJ"
    results[mask_has_vj1 & mask_has_vdj1 & mask_has_vj2
            & mask_has_vdj2] = "two full chains"
    results[mask_ambiguous] = "ambiguous"
    results[mask_multichain] = "multichain"

    assert "" not in results, "One or more chains are not characterized"

    return results
Пример #22
0
    def _check_and_create_colors(self):
        n_cats = len(self._meta_states.cat.categories)
        color_key = _colors(self._rc_key)

        if self._meta_states_colors is None:
            if color_key in self._adata.uns and n_cats == len(
                self._adata.uns[color_key]
            ):
                logg.debug("DEBUG: Loading colors from `.adata` object")
                self._meta_states_colors = _convert_to_hex_colors(
                    self._adata.uns[color_key]
                )
            else:
                self._meta_states_colors = _create_categorical_colors(n_cats)
                self._adata.uns[color_key] = self._meta_states_colors
        elif len(self._meta_states_colors) != n_cats:
            self._meta_states_colors = _create_categorical_colors(n_cats)
            self._adata.uns[color_key] = self._meta_states_colors
def scale_array(
    X,
    *,
    zero_center: bool = True,
    max_value: Optional[float] = None,
    copy: bool = False,
    return_mean_std: bool = False,
):
    if copy:
        X = X.copy()
    if not zero_center and max_value is not None:
        logg.info(  # Be careful of what? This should be more specific
            "... be careful when using `max_value` " "without `zero_center`."
        )

    if np.issubdtype(X.dtype, np.integer):
        logg.info(
            '... as scaling leads to float results, integer '
            'input is cast to float, returning copy.'
        )
        X = X.astype(float)

    mean, var = _get_mean_var(X)
    std = np.sqrt(var)
    std[std == 0] = 1
    if issparse(X):
        if zero_center:
            raise ValueError("Cannot zero-center sparse matrix.")
        sparsefuncs.inplace_column_scale(X, 1 / std)
    else:
        if zero_center:
            X -= mean
        X /= std

    # do the clipping
    if max_value is not None:
        logg.debug(f"... clipping at max_value {max_value}")
        X[X > max_value] = max_value

    if return_mean_std:
        return X, mean, std
    else:
        return X
Пример #24
0
def save_fig(fig: Figure, path: Union[str, Path], make_dir: bool = True, ext: str = "png", **kwargs: Any) -> None:
    """
    Save a figure.

    Parameters
    ----------
    fig
        Figure to save.
    path
        Path where to save the figure. If path is relative, save it under :attr:`scanpy.settings.figdir`.
    make_dir
        Whether to try making the directory if it does not exist.
    ext
        Extension to use if none is provided.
    kwargs
        Keyword arguments for :meth:`matplotlib.figure.Figure.savefig`.

    Returns
    -------
    None
        Just saves the plot.
    """
    if os.path.splitext(path)[1] == "":
        path = f"{path}.{ext}"

    path = Path(path)

    if not path.is_absolute():
        path = Path(settings.figdir) / path

    if make_dir:
        try:
            os.makedirs(str(Path.parent), exist_ok=True)
        except OSError as e:
            logg.debug(f"Unable to create directory `{Path.parent}`. Reason: `{e}`")

    logg.debug(f"Saving figure to `{path!r}`")

    kwargs.setdefault("bbox_inches", "tight")
    kwargs.setdefault("transparent", True)

    fig.savefig(path, **kwargs)
Пример #25
0
def _get_categorical(
    adata: AnnData,
    key: str,
    palette: Optional[str] = None,
    vec: Optional[pd.Series] = None,
) -> np.ndarray:
    if vec is not None:
        if not is_categorical_dtype(vec):
            raise TypeError(f"Expected a `categorical` type, found `{infer_dtype(vec)}`.")
        if key in adata.obs:
            logg.debug(f"Overwriting `adata.obs[{key!r}]`")

        adata.obs[key] = vec.values

    add_colors_for_categorical_sample_annotation(
        adata, key=key, force_update_colors=palette is not None, palette=palette
    )
    col_dict = dict(zip(adata.obs[key].cat.categories, [to_rgb(i) for i in adata.uns[Key.uns.colors(key)]]))

    return np.array([col_dict[v] for v in adata.obs[key]])
Пример #26
0
def _knn_smooth(diff_kernel, velo_graph, trans_graph, weight_diffusion):
    # utility function for combining KNN kernel and velocity kernel
    assert weight_diffusion >= 0, "Weight diffusion must be non-negative."
    assert weight_diffusion <= 1, "Weight diffusion must be <= 1."

    # this is necessary because I don't want to normalize this graph (density correction)
    G_sim = trans_graph.copy()

    if diff_kernel == "mult":
        logg.debug("DEBUG: Using a multiplicative diffusion kernel")
        # element wise multiplication
        velo_graph = velo_graph.multiply(G_sim)
    elif diff_kernel == "sum":
        logg.debug("DEBUG: Using an additive diffusion kernel")
        # G_sim  = G_sim.multiply(velo_graph>0)
        velo_graph, trans_graph = _normalize(velo_graph), _normalize(G_sim)
        velo_graph = (
            1 - weight_diffusion) * velo_graph + weight_diffusion * trans_graph
    elif diff_kernel == "both":
        logg.debug(
            "DEBUG: Using first a multiplicative and then an additive diffusion kernel"
        )
        G_sim = G_sim.multiply(velo_graph > 0)
        velo_graph = velo_graph.multiply(G_sim)
        velo_graph, trans_grap = _normalize(velo_graph), _normalize(G_sim)
        velo_graph = (1 -
                      weight_diffusion) * velo_graph + weight_diffusion * G_sim
    else:
        raise ValueError(
            f"Invalid kernel type `{diff_kernel}`. Valid options are: `'mult', 'sum', 'both'`."
        )

    return velo_graph
Пример #27
0
    def compute_transition_matrix(
        self, density_normalize: bool = True, **kwargs
    ) -> "ConnectivityKernel":
        """
        Compute transition matrix based on transcriptomic similarity.

        Uses symmetric, weighted KNN graph to compute symmetric transition matrix. The connectivities are computed
        using :func:`scanpy.pp.neighbors`. Depending on the parameters used there, they can be UMAP connectivities or
        gaussian-kernel-based connectivities with adaptive kernel width.

        Params
        ------
        density_normalize
            Whether or not to use the underlying KNN graph for density normalization.

        Returns
        -------
        None
            Makes :paramref:`transition_matrix` available.
        """

        start = logg.info("Computing transition matrix based on connectivities")

        params = dict(dnorm=density_normalize)
        if params == self._params:
            assert self.transition_matrix is not None, _ERROR_EMPTY_CACHE_MSG
            logg.debug(_LOG_USING_CACHE)
            logg.info("    Finish", time=start)
            return self

        self._params = params
        conn = self._conn.copy()

        if density_normalize:
            conn = self.density_normalize(conn)
        logg.info("    Finish", time=start)

        self.transition_matrix = csr_matrix(conn)

        return self
Пример #28
0
    def connectivities(self):
        """Get the weighted adjacecency matrix derived from the distance matrix.

        The cutoff will be used to normalize the distances.
        """
        if self.cutoff == 0:
            return self._dist_mat

        start = logging.debug(
            "Started converting distances to connectivities. ")

        connectivities = self._dist_mat.copy()

        # actual distances
        d = connectivities.data - 1

        # structure of the matrix stayes the same, we can safely change the data only
        connectivities.data = (self.cutoff - d) / self.cutoff
        connectivities.eliminate_zeros()
        logging.debug("Finished converting distances to connectivities. ",
                      time=start)
        return connectivities
Пример #29
0
    def compute_transition_matrix(self, *args, **kwargs) -> "SimpleNaryExpression":
        # must be done before, because the underlying expression dont' have to be normed
        if isinstance(self, KernelSimpleAdd):
            self._maybe_recalculate_constants(Constant)
        elif isinstance(self, KernelAdaptiveAdd):
            self._maybe_recalculate_constants(ConstantMatrix)

        for kexpr in self:
            if kexpr.transition_matrix is None:
                if isinstance(kexpr, Kernel):
                    raise RuntimeError(
                        f"Kernel `{kexpr}` is uninitialized. "
                        f"Compute its transition matrix as `.compute_transition_matrix()`."
                    )
                kexpr.compute_transition_matrix()
            elif isinstance(kexpr, Kernel):
                logg.debug(_LOG_USING_CACHE)

        self.transition_matrix = csr_matrix(
            self._fn([kexpr.transition_matrix for kexpr in self])
        )

        return self
Пример #30
0
def save_fig(fig,
             path: Union[str, os.PathLike],
             make_dir: bool = True,
             ext: str = "png") -> None:
    """
    Save a plot.

    Params
    ------
    fig: :class:`matplotlib.figure.Figure`
        Figure to save.
    path:
        Path where to save the figure.
        If path is relative, save it under `sc.settings.figdir`.
    make_dir:
        Whether to try making the directory if it does not exist.
    ext:
        Extension to use.

    Returns
    -------
    None
        Just saves the plot.
    """

    if os.path.splitext(path)[1] == "":
        path = f"{path}.{ext}"

    if not os.path.isabs(path):
        path = os.path.join(sc.settings.figdir, path)

    if make_dir:
        _maybe_create_dir(os.path.split(path)[0])

    logg.debug(f"Saving figure to `{path!r}`")

    fig.savefig(path, bbox_inches="tight", transparent=True)