def __getitem__(self, thing: Any) -> np.ndarray: """ Access a layer by name, or slice through all the layers Args: thing: if string, return the specified layer ("" is the default layer) if slice 2-tuple, return a new LayerManager with all layers sliced """ if type(thing) is str: return self.__getattr__(thing) else: # Assume some kind of slice lm = LayerManager(None) for key, layer in self.items(): lm[key] = loompy.MemoryLoomLayer(key, layer[thing]) return lm
def __getitem__(self, thing: Any) -> np.ndarray: """ Access a layer by name, or slice through all the layers Args: thing: if string, return the specified layer ("" is the default layer) if slice 2-tuple, return a new LayerManager with all layers sliced """ if type(thing) is str: return self.__getattr__(thing) elif type(thing) is tuple: # A tuple of strings giving alternative names for attributes for t in thing: if t in self.__dict__["storage"]: return self.__getattr__(t) raise AttributeError( f"'{type(self)}' object has no attribute {thing}") else: # Assume some kind of slice lm = LayerManager(None) for key, layer in self.items(): lm[key] = loompy.MemoryLoomLayer(key, layer[thing]) return lm
def scan(self, *, items: np.ndarray = None, axis: int = None, layers: Iterable = None, key: str = None, batch_size: int = 8 * 64) -> Iterable[Tuple[int, np.ndarray, loompy.LoomView]]: """ Scan across one axis and return batches of rows (columns) as LoomView objects Args ---- items: np.ndarray the indexes [0, 2, 13, ... ,973] of the rows/cols to include along the axis OR: boolean mask array giving the rows/cols to include axis: int 0:rows or 1:cols batch_size: int the chuncks returned at every element of the iterator layers: iterable if specified it will batch scan only across some of the layers of the loom file if layers == None, all layers will be scanned if layers == [""] or "", only the default layer will be scanned key: Name of primary key attribute. If specified, return the values sorted by the key Returns ------ Iterable that yields triplets (ix, indexes, view) ix: int first position / how many rows/cols have been yielded alredy indexes: np.ndarray[int] the indexes with the same numbering of the input args cells / genes (i.e. np.arange(len(ds.shape[axis]))) this is ix + selection view: LoomView a view corresponding to the current chunk """ if axis is None: raise ValueError("Axis must be given (0 = rows, 1 = cols)") if layers is None: layers = self.layers.keys() if layers == "": layers = [""] if (items is not None) and (np.issubdtype(items.dtype, np.bool_)): items = np.where(items)[0] ordering: np.ndarray = None vals: Dict[str, loompy.MemoryLoomLayer] = {} if axis == 1: if key is not None: ordering = np.argsort(self.ra[key]) else: ordering = np.arange(self.shape[0]) if items is None: items = np.fromiter(range(self.shape[1]), dtype='int') cols_per_chunk = batch_size ix = 0 while ix < self.shape[1]: cols_per_chunk = min(self.shape[1] - ix, cols_per_chunk) selection = items - ix # Pick out the cells that are in this batch selection = selection[np.where(np.logical_and(selection >= 0, selection < cols_per_chunk))[0]] if selection.shape[0] == 0: ix += cols_per_chunk continue # Load the whole chunk from the file, then extract genes and cells using fancy indexing for layer in layers: temp = self.layers[layer][:, ix:ix + cols_per_chunk] temp = temp[ordering, :] temp = temp[:, selection] vals[layer] = loompy.MemoryLoomLayer(layer, temp) lm = loompy.LayerManager(None) for key, layer in vals.items(): lm[key] = loompy.MemoryLoomLayer(key, layer) view = loompy.LoomView(lm, self.ra[ordering], self.ca[ix + selection], self.row_graphs[ordering], self.col_graphs[ix + selection], filename=self.filename, file_attrs=self.attrs) yield (ix, ix + selection, view) ix += cols_per_chunk elif axis == 0: if key is not None: ordering = np.argsort(self.ca[key]) else: ordering = np.arange(self.shape[1]) if items is None: items = np.fromiter(range(self.shape[0]), dtype='int') rows_per_chunk = batch_size ix = 0 while ix < self.shape[0]: rows_per_chunk = min(self.shape[0] - ix, rows_per_chunk) selection = items - ix # Pick out the genes that are in this batch selection = selection[np.where(np.logical_and(selection >= 0, selection < rows_per_chunk))[0]] if selection.shape[0] == 0: ix += rows_per_chunk continue # Load the whole chunk from the file, then extract genes and cells using fancy indexing for layer in layers: temp = self.layers[layer][ix:ix + rows_per_chunk, :] temp = temp[:, ordering] temp = temp[selection, :] vals[layer] = loompy.MemoryLoomLayer(layer, temp) lm = loompy.LayerManager(None) for key, layer in vals.items(): lm[key] = loompy.MemoryLoomLayer(key, layer) view = loompy.LoomView(lm, self.ra[ix + selection], self.ca[ordering], self.row_graphs[ix + selection], self.col_graphs[ordering], filename=self.filename, file_attrs=self.attrs) yield (ix, ix + selection, view) ix += rows_per_chunk else: raise ValueError("axis must be 0 or 1")