def download(self, fpath: Optional[PathLike] = None, **kwargs: Any) -> Any: """Download the dataset into ``fpath``.""" fpath = str(self.path if fpath is None else fpath) if not fpath.endswith(self._extension): fpath += self._extension if os.path.isfile(fpath): logg.debug(f"Loading dataset `{self.name}` from `{fpath}`") else: logg.debug( f"Downloading dataset `{self.name}` from `{self.url}` as `{fpath}`" ) dirname = Path(fpath).parent try: if not dirname.is_dir(): logg.info(f"Creating directory `{dirname}`") dirname.mkdir(parents=True, exist_ok=True) except OSError as e: logg.error(f"Unable to create directory `{dirname}`. Reason `{e}`") data = self._download(fpath=fpath, backup_url=self.url, **kwargs) if self.shape is not None and data.shape != self.shape: raise ValueError( f"Expected the data to have shape `{self.shape}`, found `{data.shape}`." ) return data
def test_formats(capsys, logging_state): s.logfile = sys.stderr s.verbosity = Verbosity.debug l.error('0') assert capsys.readouterr().err == 'ERROR: 0\n' l.warning('1') assert capsys.readouterr().err == 'WARNING: 1\n' l.info('2') assert capsys.readouterr().err == '2\n' l.hint('3') assert capsys.readouterr().err == '--> 3\n' l.debug('4') assert capsys.readouterr().err == ' 4\n'
def test_logfile(tmp_path, logging_state): s.verbosity = Verbosity.hint io = StringIO() s.logfile = io assert s.logfile is io assert s.logpath is None l.error('test!') assert io.getvalue() == 'ERROR: test!\n' p = tmp_path / 'test.log' s.logpath = p assert s.logpath == p assert s.logfile.name == str(p) l.hint('test2') l.debug('invisible') assert s.logpath.read_text() == '--> test2\n'
def show(self, restore: bool = False) -> None: """ Launch the :class:`napari.Viewer`. Parameters ---------- restore Whether to reinitialize the GUI after it has been destroyed. Returns ------- Nothing, just launches the viewer. """ try: self.view.viewer.show() except RuntimeError: if restore: self.view._init_UI() self.view.viewer.show() else: logg.error( "The viewer has already been closed. Try specifying `restore=True`" )
def screenshot( self, return_result: bool = False, dpi: Optional[float] = 180, save: Optional[str] = None, **kwargs: Any ) -> Optional[np.ndarray]: """ Plot a screenshot of the viewer's canvas. Parameters ---------- return_result If `True`, return the image as an :class:`numpy.uint8`. dpi Dots per inch. save Whether to save the plot. kwargs Keyword arguments for :meth:`matplotlib.axes.Axes.imshow`. Returns ------- Nothing, if ``return_result = False``, otherwise the image array. """ try: arr = self._controller.screenshot(path=None) except RuntimeError as e: logg.error(f"Unable to take a screenshot. Reason: {e}") return None fig, ax = plt.subplots(nrows=1, ncols=1, dpi=dpi) fig.tight_layout() ax.imshow(arr, **kwargs) plt.axis("off") if save is not None: save_fig(fig, save) return arr if return_result else None
def prepare_dataframe( adata: AnnData, var_names: Union[_VarNames, Mapping[str, _VarNames]], groupby: Optional[str] = None, use_raw: Optional[bool] = None, log: bool = False, num_categories: int = 7, layer=None, gene_symbols: Optional[str] = None, ): """ Given the anndata object, prepares a data frame in which the row index are the categories defined by group by and the columns correspond to var_names. Parameters ---------- adata Annotated data matrix. var_names `var_names` should be a valid subset of `adata.var_names`. groupby The key of the observation grouping to consider. It is expected that groupby is a categorical. If groupby is not a categorical observation, it would be subdivided into `num_categories`. use_raw Use `raw` attribute of `adata` if present. log Use the log of the values num_categories Only used if groupby observation is not categorical. This value determines the number of groups into which the groupby observation should be subdivided. gene_symbols Key for field in .var that stores gene symbols. Returns ------- Tuple of `pandas.DataFrame` and list of categories. """ from scipy.sparse import issparse sanitize_anndata(adata) if use_raw is None and adata.raw is not None: use_raw = True if isinstance(var_names, str): var_names = [var_names] if groupby is not None: if groupby not in adata.obs_keys(): raise ValueError( 'groupby has to be a valid observation. ' f'Given {groupby}, valid observations: {adata.obs_keys()}') if gene_symbols is not None and gene_symbols in adata.var.columns: # translate gene_symbols to var_names # slow method but gives a meaningful error if no gene symbol is found: translated_var_names = [] for symbol in var_names: if symbol not in adata.var[gene_symbols].values: logg.error(f"Gene symbol {symbol!r} not found in given " f"gene_symbols column: {gene_symbols!r}") return translated_var_names.append( adata.var[adata.var[gene_symbols] == symbol].index[0]) symbols = var_names var_names = translated_var_names if layer is not None: if layer not in adata.layers.keys(): raise KeyError( f'Selected layer: {layer} is not in the layers list. ' f'The list of valid layers is: {adata.layers.keys()}') matrix = adata[:, var_names].layers[layer] elif use_raw: matrix = adata.raw[:, var_names].X else: matrix = adata[:, var_names].X if issparse(matrix): matrix = matrix.toarray() if log: matrix = np.log1p(matrix) obs_tidy = pd.DataFrame(matrix, columns=var_names) if groupby is None: groupby = '' categorical = pd.Series(np.repeat('', len(obs_tidy))).astype('category') else: if not is_categorical_dtype(adata.obs[groupby]): # if the groupby column is not categorical, turn it into one # by subdividing into `num_categories` categories categorical = pd.cut(adata.obs[groupby], num_categories) else: categorical = adata.obs[groupby] obs_tidy.set_index(categorical, groupby, inplace=True) if gene_symbols is not None: # translate the column names to the symbol names obs_tidy.rename( columns=dict([(var_names[x], symbols[x]) for x in range(len(var_names))]), inplace=True, ) categories = obs_tidy.index.categories return categories, obs_tidy
def select_affinity(adata: AnnData, level: str = '1', threshold: float = 0.9999, inverse: bool = False, key: Optional[str] = 'nsbm', update_state: Optional[bool] = False, filter: Optional[bool] = True, copy: bool = False): """\ Selects cells based on the affinity values at a specified level. Parameters ---------- adata Annotated data matrix. A NestedBlockState object needs to be saved level The level to be used for selection threshold The maximal affinity to be used. Cells with affinities lower than the threshold will be discarded inverse Whether to return cells with affinity lower than the threshold key key of the groupings used to evaluate the model update_state Whether to update the state removing unselected cells filter If False, cells are not filtered and only marked in `adata.obs['selected']` copy Whether to perform selection in place or return a subsetted object Returns ------- Depending on `copy`, returns or updates `adata` with selected cells. """ #this function is needed as subsetting doesn't work on marginals raise ValueError('This function has been deprecated') adata = adata.copy() if copy else adata level = str(level) # ensure it is a string if f'CA_{key}_level_{level}' not in adata.obsm_keys(): logg.error(f'Level {level} was not found in your data') raise affinities = adata.obsm[f'CA_{key}_level_{level}'] max_aff = np.max(affinities, axis=1) if inverse: mask = max_aff < threshold else: mask = max_aff >= threshold adata.obs['selected'] = mask #pd.Categorical(mask) if filter: adata = adata[adata.obs['selected']] #actually filter cells if update_state and adata.uns['schist'][f'{key}']: logg.warning( 'Removing a vertex from a BlockState may result in inconsistent data' ) v_idx = np.where(np.bitwise_not(mask)) #vertex to be removed adata.uns['schist']['state'].remove_vertex(v_idx) return adata if copy else None
def count_fragments_features( data: Union[AnnData, MuData], features: Optional[pd.DataFrame] = None, extend_upstream: int = 2e3, extend_downstream: int = 0, ) -> AnnData: """ Count fragments overlapping given Features. Returns cells x features matrix. Parameters ---------- data AnnData object with peak counts or multimodal MuData object with 'atac' modality. features A DataFrame with feature annotation, e.g. genes. Annotation has to contain columns: Chromosome, Start, End. extend_upsteam Number of nucleotides to extend every gene upstream (2000 by default to extend gene coordinates to promoter regions) extend_downstream Number of nucleotides to extend every gene downstream (0 by default) """ if isinstance(data, AnnData): adata = data elif isinstance(data, MuData) and "atac" in data.mod: adata = data.mod["atac"] else: raise TypeError( "Expected AnnData or MuData object with 'atac' modality") if features is None: # Try to gene gene annotation in the data.mod['rna'] if (isinstance(data, MuData) and "rna" in data.mod and "interval" in data.mod["rna"].var.columns): features = get_gene_annotation_from_rna(data) else: raise ValueError( "Argument `features` is required. It should be a BED-like DataFrame with gene coordinates and names." ) if "files" not in adata.uns or "fragments" not in adata.uns["files"]: raise KeyError( "There is no fragments file located yet. Run muon.atac.tl.locate_fragments first." ) try: import pysam except ImportError: raise ImportError( "pysam is not available. It is required to work with the fragments file. Install pysam from PyPI (`pip install pysam`) or from GitHub (`pip install git+https://github.com/pysam-developers/pysam`)" ) n = adata.n_obs n_features = features.shape[0] # Dictionary with matrix positions d = {k: v for k, v in zip(adata.obs.index, range(n))} fragments = pysam.TabixFile(adata.uns["files"]["fragments"], parser=pysam.asBed()) try: # List of lists matrix is quick and convenient to fill by row mx = lil_matrix((n_features, n), dtype=int) logging.info( f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Counting fragments in {n} cells for {features.shape[0]} features..." ) for i in tqdm(range(n_features)): # iterate over features (e.g. genes) f = features.iloc[i] for fr in fragments.fetch(f.Chromosome, f.Start - extend_upstream, f.End + extend_downstream): try: ind = d[fr.name] # cell barcode (e.g. GTCAGTCAGTCAGTCA-1) mx.rows[i].append(ind) mx.data[i].append(int( fr.score)) # number of cuts per fragment (e.g. 2) except: pass # Faster to convert to csr first and then transpose mx = mx.tocsr().transpose() return AnnData(X=mx, obs=adata.obs, var=features) except Exception as e: logging.error(e) raise e finally: # The connection has to be closed fragments.close()