Exemplo n.º 1
0
def test_create_dataset_from_dataset(tmp_path):
    from_h5_path = tmp_path / 'from.h5'
    to_h5_path = tmp_path / 'to.h5'
    sparse_matrix = ss.csr_matrix([[0, 1, 0], [0, 0, 1], [0, 0, 0], [1, 1, 0]],
                                  dtype=np.float64)
    with h5py.File(from_h5_path) as from_h5f:
        from_dset = from_h5f.create_dataset('sparse/matrix',
                                            data=sparse_matrix)

        with h5py.File(to_h5_path) as to_h5f:
            to_h5f.create_dataset('sparse/matrix', data=from_dset)
            assert (to_h5f['sparse/matrix'].value != sparse_matrix).size == 0
Exemplo n.º 2
0
def test_create_and_read_dataset(tmp_path):
    h5_path = tmp_path / 'test.h5'
    sparse_matrix = ss.csr_matrix([[0, 1, 0], [0, 0, 1], [0, 0, 0], [1, 1, 0]],
                                  dtype=np.float64)
    with h5py.File(h5_path) as h5f:
        h5f.create_dataset('sparse/matrix', data=sparse_matrix)
    with h5py.File(h5_path) as h5f:
        assert (h5f['sparse']['matrix'][1:3] != sparse_matrix[1:3]).size == 0
        assert (h5f['sparse']['matrix'][2:] != sparse_matrix[2:]).size == 0
        assert (h5f['sparse']['matrix'][:2] != sparse_matrix[:2]).size == 0
        assert (h5f['sparse']['matrix'][-2:] != sparse_matrix[-2:]).size == 0
        assert (h5f['sparse']['matrix'][:-2] != sparse_matrix[:-2]).size == 0
        assert (h5f['sparse']['matrix'].value != sparse_matrix).size == 0
Exemplo n.º 3
0
def test_create_dataset_from_dataset():
    from_h5_path = mkstemp(suffix=".h5")[1]
    to_h5_path = mkstemp(suffix=".h5")[1]
    sparse_matrix = ss.csr_matrix([[0, 1, 0], [0, 0, 1], [0, 0, 0], [1, 1, 0]],
                                  dtype=np.float64)
    with h5py.File(from_h5_path) as from_h5f:
        from_dset = from_h5f.create_dataset('sparse/matrix',
                                            data=sparse_matrix)

        with h5py.File(to_h5_path) as to_h5f:
            to_h5f.create_dataset('sparse/matrix', data=from_dset)
            assert (to_h5f['sparse/matrix'].value != sparse_matrix).size == 0

    os.remove(from_h5_path)
    os.remove(to_h5_path)
Exemplo n.º 4
0
def test_create_and_read_dataset():
    h5_path = mkstemp(suffix=".h5")[1]
    sparse_matrix = ss.csr_matrix([[0, 1, 0], [0, 0, 1], [0, 0, 0], [1, 1, 0]],
                                  dtype=np.float64)
    with h5py.File(h5_path) as h5f:
        h5f.create_dataset('sparse/matrix', data=sparse_matrix)
    with h5py.File(h5_path) as h5f:
        assert (h5f['sparse']['matrix'][1:3] != sparse_matrix[1:3]).size == 0
        assert (h5f['sparse']['matrix'][2:] != sparse_matrix[2:]).size == 0
        assert (h5f['sparse']['matrix'][:2] != sparse_matrix[:2]).size == 0
        assert (h5f['sparse']['matrix'][-2:] != sparse_matrix[-2:]).size == 0
        assert (h5f['sparse']['matrix'][:-2] != sparse_matrix[:-2]).size == 0
        assert (h5f['sparse']['matrix'].value != sparse_matrix).size == 0

    os.remove(h5_path)
Exemplo n.º 5
0
def test_dataset_append(tmp_path):
    h5_path = tmp_path / 'test.h5'
    sparse_matrix = ss.csr_matrix([[0, 1, 0], [0, 0, 1], [0, 0, 0], [1, 1, 0]],
                                  dtype=np.float64)
    to_append = ss.csr_matrix([[0, 1, 1], [1, 0, 0]], dtype=np.float64)
    appended_matrix = ss.vstack((sparse_matrix, to_append))

    with h5py.File(h5_path) as h5f:
        h5f.create_dataset('matrix', data=sparse_matrix, chunks=(100000, ))
        h5f['matrix'].append(to_append)
        assert (h5f['matrix'].value != appended_matrix).size == 0
Exemplo n.º 6
0
 def read_h5(self, input_file):
     h5 = h5py.File(input_file, 'r')
     self.data = h5['/GRCh38/data']
     self.indices = h5['/GRCh38/indices']
     self.indptr = h5['/GRCh38/indptr']
     self.genes = h5['/GRCh38/genes']
     self.barcodes = h5['/GRCh38/barcodes']
     self.shape = h5['/GRCh38/shape']
     self.gene_names = h5['/GRCh38/gene_names']
     csr_m = csr_matrix((self.data, self.indices, self.indptr),
                        shape=(self.shape[1], self.shape[0]))
     self.trans_m = np.transpose(csr_m)
Exemplo n.º 7
0
def getAnnData(input_file):
    h5 = h5py.File(input_file,'r')
    data = h5['/GRCh38/data']
    indices = h5['/GRCh38/indices']
    barcodes = h5['/GRCh38/barcodes']
    indptr = h5['/GRCh38/indptr']
    genes = h5['/GRCh38/genes']
    gene_names = h5['/GRCh38/gene_names']
    shape = h5['/GRCh38/shape']

    X = csr_matrix((data,indices,indptr),shape=(shape.value[1],shape.value[0]),dtype='float32')
    adata = AnnData(X,
		   obs=pd.DataFrame(index=barcodes.value),
		   var=pd.DataFrame(index=genes.value),
		   dtype=X.dtype.name,
		   filemode=True)
    return adata
Exemplo n.º 8
0
def getNormAnnData(input_file, normalize_type):
    h5 = h5py.File(input_file, 'r')
    data = h5['/GRCh38/data']
    indices = h5['/GRCh38/indices']
    barcodes = h5['/GRCh38/barcodes']
    indptr = h5['/GRCh38/indptr']
    genes = h5['/GRCh38/genes']
    gene_names = h5['/GRCh38/gene_names']
    shape = h5['/GRCh38/shape']

    if normalize_type == "rank":
        zero_value = h5['/GRCh38/zero_value']
    else:
        size_factor = h5['/GRCh38/size_factor'][:]

    X = csr_matrix((data, indices, indptr),
                   shape=(shape.value[1], shape.value[0]),
                   dtype='float32')

    if normalize_type == "rank":
        X = X.toarray()
        for i in range(shape.value[1]):
            for j in range(shape.value[0]):
                if abs(X[i][j] - 0) < 1e-5:
                    X[i][j] = zero_value[i]

    adata = AnnData(X,
                    obs=pd.DataFrame(index=barcodes.value),
                    var=pd.DataFrame(index=genes.value),
                    dtype=X.dtype.name,
                    filemode=True)

    if normalize_type == "rank":
        return adata
    else:
        return adata, size_factor