def test_create_dataset_from_dataset(tmp_path): from_h5_path = tmp_path / 'from.h5' to_h5_path = tmp_path / 'to.h5' sparse_matrix = ss.csr_matrix([[0, 1, 0], [0, 0, 1], [0, 0, 0], [1, 1, 0]], dtype=np.float64) with h5py.File(from_h5_path) as from_h5f: from_dset = from_h5f.create_dataset('sparse/matrix', data=sparse_matrix) with h5py.File(to_h5_path) as to_h5f: to_h5f.create_dataset('sparse/matrix', data=from_dset) assert (to_h5f['sparse/matrix'].value != sparse_matrix).size == 0
def test_create_and_read_dataset(tmp_path): h5_path = tmp_path / 'test.h5' sparse_matrix = ss.csr_matrix([[0, 1, 0], [0, 0, 1], [0, 0, 0], [1, 1, 0]], dtype=np.float64) with h5py.File(h5_path) as h5f: h5f.create_dataset('sparse/matrix', data=sparse_matrix) with h5py.File(h5_path) as h5f: assert (h5f['sparse']['matrix'][1:3] != sparse_matrix[1:3]).size == 0 assert (h5f['sparse']['matrix'][2:] != sparse_matrix[2:]).size == 0 assert (h5f['sparse']['matrix'][:2] != sparse_matrix[:2]).size == 0 assert (h5f['sparse']['matrix'][-2:] != sparse_matrix[-2:]).size == 0 assert (h5f['sparse']['matrix'][:-2] != sparse_matrix[:-2]).size == 0 assert (h5f['sparse']['matrix'].value != sparse_matrix).size == 0
def test_create_dataset_from_dataset(): from_h5_path = mkstemp(suffix=".h5")[1] to_h5_path = mkstemp(suffix=".h5")[1] sparse_matrix = ss.csr_matrix([[0, 1, 0], [0, 0, 1], [0, 0, 0], [1, 1, 0]], dtype=np.float64) with h5py.File(from_h5_path) as from_h5f: from_dset = from_h5f.create_dataset('sparse/matrix', data=sparse_matrix) with h5py.File(to_h5_path) as to_h5f: to_h5f.create_dataset('sparse/matrix', data=from_dset) assert (to_h5f['sparse/matrix'].value != sparse_matrix).size == 0 os.remove(from_h5_path) os.remove(to_h5_path)
def test_create_and_read_dataset(): h5_path = mkstemp(suffix=".h5")[1] sparse_matrix = ss.csr_matrix([[0, 1, 0], [0, 0, 1], [0, 0, 0], [1, 1, 0]], dtype=np.float64) with h5py.File(h5_path) as h5f: h5f.create_dataset('sparse/matrix', data=sparse_matrix) with h5py.File(h5_path) as h5f: assert (h5f['sparse']['matrix'][1:3] != sparse_matrix[1:3]).size == 0 assert (h5f['sparse']['matrix'][2:] != sparse_matrix[2:]).size == 0 assert (h5f['sparse']['matrix'][:2] != sparse_matrix[:2]).size == 0 assert (h5f['sparse']['matrix'][-2:] != sparse_matrix[-2:]).size == 0 assert (h5f['sparse']['matrix'][:-2] != sparse_matrix[:-2]).size == 0 assert (h5f['sparse']['matrix'].value != sparse_matrix).size == 0 os.remove(h5_path)
def test_dataset_append(tmp_path): h5_path = tmp_path / 'test.h5' sparse_matrix = ss.csr_matrix([[0, 1, 0], [0, 0, 1], [0, 0, 0], [1, 1, 0]], dtype=np.float64) to_append = ss.csr_matrix([[0, 1, 1], [1, 0, 0]], dtype=np.float64) appended_matrix = ss.vstack((sparse_matrix, to_append)) with h5py.File(h5_path) as h5f: h5f.create_dataset('matrix', data=sparse_matrix, chunks=(100000, )) h5f['matrix'].append(to_append) assert (h5f['matrix'].value != appended_matrix).size == 0
def read_h5(self, input_file): h5 = h5py.File(input_file, 'r') self.data = h5['/GRCh38/data'] self.indices = h5['/GRCh38/indices'] self.indptr = h5['/GRCh38/indptr'] self.genes = h5['/GRCh38/genes'] self.barcodes = h5['/GRCh38/barcodes'] self.shape = h5['/GRCh38/shape'] self.gene_names = h5['/GRCh38/gene_names'] csr_m = csr_matrix((self.data, self.indices, self.indptr), shape=(self.shape[1], self.shape[0])) self.trans_m = np.transpose(csr_m)
def getAnnData(input_file): h5 = h5py.File(input_file,'r') data = h5['/GRCh38/data'] indices = h5['/GRCh38/indices'] barcodes = h5['/GRCh38/barcodes'] indptr = h5['/GRCh38/indptr'] genes = h5['/GRCh38/genes'] gene_names = h5['/GRCh38/gene_names'] shape = h5['/GRCh38/shape'] X = csr_matrix((data,indices,indptr),shape=(shape.value[1],shape.value[0]),dtype='float32') adata = AnnData(X, obs=pd.DataFrame(index=barcodes.value), var=pd.DataFrame(index=genes.value), dtype=X.dtype.name, filemode=True) return adata
def getNormAnnData(input_file, normalize_type): h5 = h5py.File(input_file, 'r') data = h5['/GRCh38/data'] indices = h5['/GRCh38/indices'] barcodes = h5['/GRCh38/barcodes'] indptr = h5['/GRCh38/indptr'] genes = h5['/GRCh38/genes'] gene_names = h5['/GRCh38/gene_names'] shape = h5['/GRCh38/shape'] if normalize_type == "rank": zero_value = h5['/GRCh38/zero_value'] else: size_factor = h5['/GRCh38/size_factor'][:] X = csr_matrix((data, indices, indptr), shape=(shape.value[1], shape.value[0]), dtype='float32') if normalize_type == "rank": X = X.toarray() for i in range(shape.value[1]): for j in range(shape.value[0]): if abs(X[i][j] - 0) < 1e-5: X[i][j] = zero_value[i] adata = AnnData(X, obs=pd.DataFrame(index=barcodes.value), var=pd.DataFrame(index=genes.value), dtype=X.dtype.name, filemode=True) if normalize_type == "rank": return adata else: return adata, size_factor