Exemplo n.º 1
0
    def download(self, fpath: Optional[PathLike] = None, **kwargs: Any) -> Any:
        """Download the dataset into ``fpath``."""
        fpath = str(self.path if fpath is None else fpath)
        if not fpath.endswith(self._extension):
            fpath += self._extension

        if os.path.isfile(fpath):
            logg.debug(f"Loading dataset `{self.name}` from `{fpath}`")
        else:
            logg.debug(
                f"Downloading dataset `{self.name}` from `{self.url}` as `{fpath}`"
            )

        dirname = Path(fpath).parent
        try:
            if not dirname.is_dir():
                logg.info(f"Creating directory `{dirname}`")
                dirname.mkdir(parents=True, exist_ok=True)
        except OSError as e:
            logg.error(f"Unable to create directory `{dirname}`. Reason `{e}`")

        data = self._download(fpath=fpath, backup_url=self.url, **kwargs)

        if self.shape is not None and data.shape != self.shape:
            raise ValueError(
                f"Expected the data to have shape `{self.shape}`, found `{data.shape}`."
            )

        return data
Exemplo n.º 2
0
def test_formats(capsys, logging_state):
    s.logfile = sys.stderr
    s.verbosity = Verbosity.debug
    l.error('0')
    assert capsys.readouterr().err == 'ERROR: 0\n'
    l.warning('1')
    assert capsys.readouterr().err == 'WARNING: 1\n'
    l.info('2')
    assert capsys.readouterr().err == '2\n'
    l.hint('3')
    assert capsys.readouterr().err == '--> 3\n'
    l.debug('4')
    assert capsys.readouterr().err == '    4\n'
Exemplo n.º 3
0
def test_logfile(tmp_path, logging_state):
    s.verbosity = Verbosity.hint

    io = StringIO()
    s.logfile = io
    assert s.logfile is io
    assert s.logpath is None
    l.error('test!')
    assert io.getvalue() == 'ERROR: test!\n'

    p = tmp_path / 'test.log'
    s.logpath = p
    assert s.logpath == p
    assert s.logfile.name == str(p)
    l.hint('test2')
    l.debug('invisible')
    assert s.logpath.read_text() == '--> test2\n'
Exemplo n.º 4
0
    def show(self, restore: bool = False) -> None:
        """
        Launch the :class:`napari.Viewer`.

        Parameters
        ----------
        restore
            Whether to reinitialize the GUI after it has been destroyed.

        Returns
        -------
        Nothing, just launches the viewer.
        """
        try:
            self.view.viewer.show()
        except RuntimeError:
            if restore:
                self.view._init_UI()
                self.view.viewer.show()
            else:
                logg.error(
                    "The viewer has already been closed. Try specifying `restore=True`"
                )
Exemplo n.º 5
0
    def screenshot(
        self, return_result: bool = False, dpi: Optional[float] = 180, save: Optional[str] = None, **kwargs: Any
    ) -> Optional[np.ndarray]:
        """
        Plot a screenshot of the viewer's canvas.

        Parameters
        ----------
        return_result
            If `True`, return the image as an :class:`numpy.uint8`.
        dpi
            Dots per inch.
        save
            Whether to save the plot.
        kwargs
            Keyword arguments for :meth:`matplotlib.axes.Axes.imshow`.

        Returns
        -------
        Nothing, if ``return_result = False``, otherwise the image array.
        """
        try:
            arr = self._controller.screenshot(path=None)
        except RuntimeError as e:
            logg.error(f"Unable to take a screenshot. Reason: {e}")
            return None

        fig, ax = plt.subplots(nrows=1, ncols=1, dpi=dpi)
        fig.tight_layout()

        ax.imshow(arr, **kwargs)
        plt.axis("off")

        if save is not None:
            save_fig(fig, save)

        return arr if return_result else None
Exemplo n.º 6
0
def prepare_dataframe(
    adata: AnnData,
    var_names: Union[_VarNames, Mapping[str, _VarNames]],
    groupby: Optional[str] = None,
    use_raw: Optional[bool] = None,
    log: bool = False,
    num_categories: int = 7,
    layer=None,
    gene_symbols: Optional[str] = None,
):
    """
    Given the anndata object, prepares a data frame in which the row index are the categories
    defined by group by and the columns correspond to var_names.

    Parameters
    ----------
    adata
        Annotated data matrix.
    var_names
        `var_names` should be a valid subset of  `adata.var_names`.
    groupby
        The key of the observation grouping to consider. It is expected that
        groupby is a categorical. If groupby is not a categorical observation,
        it would be subdivided into `num_categories`.
    use_raw
        Use `raw` attribute of `adata` if present.
    log
        Use the log of the values
    num_categories
        Only used if groupby observation is not categorical. This value
        determines the number of groups into which the groupby observation
        should be subdivided.
    gene_symbols
        Key for field in .var that stores gene symbols.

    Returns
    -------
    Tuple of `pandas.DataFrame` and list of categories.
    """
    from scipy.sparse import issparse

    sanitize_anndata(adata)
    if use_raw is None and adata.raw is not None:
        use_raw = True
    if isinstance(var_names, str):
        var_names = [var_names]

    if groupby is not None:
        if groupby not in adata.obs_keys():
            raise ValueError(
                'groupby has to be a valid observation. '
                f'Given {groupby}, valid observations: {adata.obs_keys()}')

    if gene_symbols is not None and gene_symbols in adata.var.columns:
        # translate gene_symbols to var_names
        # slow method but gives a meaningful error if no gene symbol is found:
        translated_var_names = []
        for symbol in var_names:
            if symbol not in adata.var[gene_symbols].values:
                logg.error(f"Gene symbol {symbol!r} not found in given "
                           f"gene_symbols column: {gene_symbols!r}")
                return
            translated_var_names.append(
                adata.var[adata.var[gene_symbols] == symbol].index[0])
        symbols = var_names
        var_names = translated_var_names
    if layer is not None:
        if layer not in adata.layers.keys():
            raise KeyError(
                f'Selected layer: {layer} is not in the layers list. '
                f'The list of valid layers is: {adata.layers.keys()}')
        matrix = adata[:, var_names].layers[layer]
    elif use_raw:
        matrix = adata.raw[:, var_names].X
    else:
        matrix = adata[:, var_names].X

    if issparse(matrix):
        matrix = matrix.toarray()
    if log:
        matrix = np.log1p(matrix)

    obs_tidy = pd.DataFrame(matrix, columns=var_names)
    if groupby is None:
        groupby = ''
        categorical = pd.Series(np.repeat('',
                                          len(obs_tidy))).astype('category')
    else:
        if not is_categorical_dtype(adata.obs[groupby]):
            # if the groupby column is not categorical, turn it into one
            # by subdividing into  `num_categories` categories
            categorical = pd.cut(adata.obs[groupby], num_categories)
        else:
            categorical = adata.obs[groupby]

    obs_tidy.set_index(categorical, groupby, inplace=True)
    if gene_symbols is not None:
        # translate the column names to the symbol names
        obs_tidy.rename(
            columns=dict([(var_names[x], symbols[x])
                          for x in range(len(var_names))]),
            inplace=True,
        )
    categories = obs_tidy.index.categories

    return categories, obs_tidy
Exemplo n.º 7
0
def select_affinity(adata: AnnData,
                    level: str = '1',
                    threshold: float = 0.9999,
                    inverse: bool = False,
                    key: Optional[str] = 'nsbm',
                    update_state: Optional[bool] = False,
                    filter: Optional[bool] = True,
                    copy: bool = False):
    """\
    Selects cells based on the affinity values at a specified level.
    
    Parameters
    ----------
    adata
        Annotated data matrix. A NestedBlockState object needs to be saved
    level
        The level to be used for selection
    threshold
        The maximal affinity to be used. Cells with affinities lower than the
        threshold will be discarded
    inverse
        Whether to return cells with affinity lower than the threshold
    key
        key of the groupings used to evaluate the model
    update_state
        Whether to update the state removing unselected cells
    filter
        If False, cells are not filtered and only marked in `adata.obs['selected']`
    copy
        Whether to perform selection in place or return a subsetted object

    Returns
    -------
    Depending on `copy`, returns or updates `adata` with selected cells.

    """

    #this function is needed as subsetting doesn't work on marginals
    raise ValueError('This function has been deprecated')

    adata = adata.copy() if copy else adata

    level = str(level)  # ensure it is a string

    if f'CA_{key}_level_{level}' not in adata.obsm_keys():
        logg.error(f'Level {level} was not found in your data')
        raise

    affinities = adata.obsm[f'CA_{key}_level_{level}']
    max_aff = np.max(affinities, axis=1)
    if inverse:
        mask = max_aff < threshold
    else:
        mask = max_aff >= threshold

    adata.obs['selected'] = mask  #pd.Categorical(mask)

    if filter:
        adata = adata[adata.obs['selected']]  #actually filter cells

        if update_state and adata.uns['schist'][f'{key}']:
            logg.warning(
                'Removing a vertex from a BlockState may result in inconsistent data'
            )
            v_idx = np.where(np.bitwise_not(mask))  #vertex to be removed
            adata.uns['schist']['state'].remove_vertex(v_idx)

    return adata if copy else None
Exemplo n.º 8
0
def count_fragments_features(
    data: Union[AnnData, MuData],
    features: Optional[pd.DataFrame] = None,
    extend_upstream: int = 2e3,
    extend_downstream: int = 0,
) -> AnnData:
    """
    Count fragments overlapping given Features. Returns cells x features matrix.

        Parameters
        ----------
        data
                AnnData object with peak counts or multimodal MuData object with 'atac' modality.
        features
                A DataFrame with feature annotation, e.g. genes.
                Annotation has to contain columns: Chromosome, Start, End.
        extend_upsteam
                Number of nucleotides to extend every gene upstream (2000 by default to extend gene coordinates to promoter regions)
        extend_downstream
                Number of nucleotides to extend every gene downstream (0 by default)
    """
    if isinstance(data, AnnData):
        adata = data
    elif isinstance(data, MuData) and "atac" in data.mod:
        adata = data.mod["atac"]
    else:
        raise TypeError(
            "Expected AnnData or MuData object with 'atac' modality")

    if features is None:
        # Try to gene gene annotation in the data.mod['rna']
        if (isinstance(data, MuData) and "rna" in data.mod
                and "interval" in data.mod["rna"].var.columns):
            features = get_gene_annotation_from_rna(data)
        else:
            raise ValueError(
                "Argument `features` is required. It should be a BED-like DataFrame with gene coordinates and names."
            )

    if "files" not in adata.uns or "fragments" not in adata.uns["files"]:
        raise KeyError(
            "There is no fragments file located yet. Run muon.atac.tl.locate_fragments first."
        )

    try:
        import pysam
    except ImportError:
        raise ImportError(
            "pysam is not available. It is required to work with the fragments file. Install pysam from PyPI (`pip install pysam`) or from GitHub (`pip install git+https://github.com/pysam-developers/pysam`)"
        )

    n = adata.n_obs
    n_features = features.shape[0]

    # Dictionary with matrix positions
    d = {k: v for k, v in zip(adata.obs.index, range(n))}

    fragments = pysam.TabixFile(adata.uns["files"]["fragments"],
                                parser=pysam.asBed())
    try:
        # List of lists matrix is quick and convenient to fill by row
        mx = lil_matrix((n_features, n), dtype=int)

        logging.info(
            f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Counting fragments in {n} cells for {features.shape[0]} features..."
        )

        for i in tqdm(range(n_features)):  # iterate over features (e.g. genes)
            f = features.iloc[i]
            for fr in fragments.fetch(f.Chromosome, f.Start - extend_upstream,
                                      f.End + extend_downstream):
                try:
                    ind = d[fr.name]  # cell barcode (e.g. GTCAGTCAGTCAGTCA-1)
                    mx.rows[i].append(ind)
                    mx.data[i].append(int(
                        fr.score))  # number of cuts per fragment (e.g. 2)
                except:
                    pass

        # Faster to convert to csr first and then transpose
        mx = mx.tocsr().transpose()

        return AnnData(X=mx, obs=adata.obs, var=features)

    except Exception as e:
        logging.error(e)
        raise e

    finally:
        # The connection has to be closed
        fragments.close()