Пример #1
0
    def __getitem__(self, thing: Any) -> np.ndarray:
        """
		Access a layer by name, or slice through all the layers

		Args:
			thing:		if string, return the specified layer ("" is the default layer)
						if slice 2-tuple, return a new LayerManager with all layers sliced
		"""
        if type(thing) is str:
            return self.__getattr__(thing)
        else:
            # Assume some kind of slice
            lm = LayerManager(None)
            for key, layer in self.items():
                lm[key] = loompy.MemoryLoomLayer(key, layer[thing])
            return lm
Пример #2
0
    def __getitem__(self, thing: Any) -> np.ndarray:
        """
		Access a layer by name, or slice through all the layers

		Args:
			thing:		if string, return the specified layer ("" is the default layer)
						if slice 2-tuple, return a new LayerManager with all layers sliced
		"""
        if type(thing) is str:
            return self.__getattr__(thing)
        elif type(thing) is tuple:
            # A tuple of strings giving alternative names for attributes
            for t in thing:
                if t in self.__dict__["storage"]:
                    return self.__getattr__(t)
            raise AttributeError(
                f"'{type(self)}' object has no attribute {thing}")
        else:
            # Assume some kind of slice
            lm = LayerManager(None)
            for key, layer in self.items():
                lm[key] = loompy.MemoryLoomLayer(key, layer[thing])
            return lm
Пример #3
0
	def scan(self, *, items: np.ndarray = None, axis: int = None, layers: Iterable = None, key: str = None, batch_size: int = 8 * 64) -> Iterable[Tuple[int, np.ndarray, loompy.LoomView]]:
		"""
		Scan across one axis and return batches of rows (columns) as LoomView objects

		Args
		----
		items: np.ndarray
			the indexes [0, 2, 13, ... ,973] of the rows/cols to include along the axis
			OR: boolean mask array giving the rows/cols to include
		axis: int
			0:rows or 1:cols
		batch_size: int
			the chuncks returned at every element of the iterator
		layers: iterable
			if specified it will batch scan only across some of the layers of the loom file
			if layers == None, all layers will be scanned
			if layers == [""] or "", only the default layer will be scanned
		key:
			Name of primary key attribute. If specified, return the values sorted by the key

		Returns
		------
		Iterable that yields triplets
		(ix, indexes, view)

		ix: int
			first position / how many rows/cols have been yielded alredy
		indexes: np.ndarray[int]
			the indexes with the same numbering of the input args cells / genes (i.e. np.arange(len(ds.shape[axis])))
			this is ix + selection
		view: LoomView
			a view corresponding to the current chunk
		"""
		if axis is None:
			raise ValueError("Axis must be given (0 = rows, 1 = cols)")
		if layers is None:
			layers = self.layers.keys()
		if layers == "":
			layers = [""]

		if (items is not None) and (np.issubdtype(items.dtype, np.bool_)):
			items = np.where(items)[0]

		ordering: np.ndarray = None
		vals: Dict[str, loompy.MemoryLoomLayer] = {}
		if axis == 1:
			if key is not None:
				ordering = np.argsort(self.ra[key])
			else:
				ordering = np.arange(self.shape[0])
			if items is None:
				items = np.fromiter(range(self.shape[1]), dtype='int')
			cols_per_chunk = batch_size
			ix = 0
			while ix < self.shape[1]:
				cols_per_chunk = min(self.shape[1] - ix, cols_per_chunk)
				selection = items - ix
				# Pick out the cells that are in this batch
				selection = selection[np.where(np.logical_and(selection >= 0, selection < cols_per_chunk))[0]]
				if selection.shape[0] == 0:
					ix += cols_per_chunk
					continue

				# Load the whole chunk from the file, then extract genes and cells using fancy indexing
				for layer in layers:
					temp = self.layers[layer][:, ix:ix + cols_per_chunk]
					temp = temp[ordering, :]
					temp = temp[:, selection]
					vals[layer] = loompy.MemoryLoomLayer(layer, temp)
				lm = loompy.LayerManager(None)
				for key, layer in vals.items():
					lm[key] = loompy.MemoryLoomLayer(key, layer)
				view = loompy.LoomView(lm, self.ra[ordering], self.ca[ix + selection], self.row_graphs[ordering], self.col_graphs[ix + selection], filename=self.filename, file_attrs=self.attrs)
				yield (ix, ix + selection, view)
				ix += cols_per_chunk
		elif axis == 0:
			if key is not None:
				ordering = np.argsort(self.ca[key])
			else:
				ordering = np.arange(self.shape[1])
			if items is None:
				items = np.fromiter(range(self.shape[0]), dtype='int')
			rows_per_chunk = batch_size
			ix = 0
			while ix < self.shape[0]:
				rows_per_chunk = min(self.shape[0] - ix, rows_per_chunk)
				selection = items - ix
				# Pick out the genes that are in this batch
				selection = selection[np.where(np.logical_and(selection >= 0, selection < rows_per_chunk))[0]]
				if selection.shape[0] == 0:
					ix += rows_per_chunk
					continue

				# Load the whole chunk from the file, then extract genes and cells using fancy indexing
				for layer in layers:
					temp = self.layers[layer][ix:ix + rows_per_chunk, :]
					temp = temp[:, ordering]
					temp = temp[selection, :]
					vals[layer] = loompy.MemoryLoomLayer(layer, temp)
				lm = loompy.LayerManager(None)
				for key, layer in vals.items():
					lm[key] = loompy.MemoryLoomLayer(key, layer)
				view = loompy.LoomView(lm, self.ra[ix + selection], self.ca[ordering], self.row_graphs[ix + selection], self.col_graphs[ordering], filename=self.filename, file_attrs=self.attrs)
				yield (ix, ix + selection, view)
				ix += rows_per_chunk
		else:
			raise ValueError("axis must be 0 or 1")