def test_append_matrix(self): in_fpath = join(TEST_DATA_DIR, '1000snps.hdf5') array = numpy.array([[1, 1, 1], [2, 2, 2]]) expected = ([[1, 8, 5], [3, 5, 3], [6, 0, 4], [7, 4, 2], [4, 2, 3], [1, 1, 1], [2, 2, 2]]) expected2 = [[1, 1, 1], [2, 2, 2], [1, 8, 5], [3, 5, 3], [6, 0, 4], [7, 4, 2], [4, 2, 3], [1, 1, 1], [2, 2, 2], [1, 8, 5], [3, 5, 3], [6, 0, 4], [7, 4, 2], [4, 2, 3], [1, 1, 1], [2, 2, 2]] with NamedTemporaryFile(suffix='.h5') as fhand_out: shutil.copy(in_fpath, fhand_out.name) hdf5 = VariationsH5(fhand_out.name, mode='r+') dset = hdf5['/calls/DP'] orig_array = dset[()] append_matrix(dset, array) assert numpy.all(dset[()] == expected) append_matrix(dset, dset) array2 = numpy.array([[1, 1, 1], [2, 2, 2]]) append_matrix(array2, dset[()]) assert numpy.all(expected2 == array2) append_matrix(orig_array, array) assert numpy.all(orig_array == expected)
def _get_mats_for_chunk(self, variations): field_paths = variations.keys() diff_fields = set(self.keys()).difference(set(field_paths)) if diff_fields: msg = 'Previous matrices do not match matrices in chunk' raise ValueError(msg) matrices = {} for field in field_paths: mat1 = variations[field] mat2 = self[field] self._check_shape_matches(mat1, mat2, field) append_matrix(mat2, mat1) matrices[field] = mat2 return matrices
def put_chunks(self, chunks, kept_fields=None, ignored_fields=None): matrices = None if chunks is None: return for chunk in chunks: if matrices is None: matrices = self._create_or_get_mats_from_chunk(chunk) continue # check all chunks have the same number of snps nsnps = [chunk[path].data.shape[0] for path in chunk.keys()] num_snps = nsnps[0] assert all(num_snps == nsnp for nsnp in nsnps) for path in chunk.keys(): dset_chunk = chunk[path] dset = matrices[path] append_matrix(dset, dset_chunk) self._index = None if hasattr(self, 'flush'): self._h5file.flush()
def test_append_matrix(self): in_fpath = join(TEST_DATA_DIR, '1000snps.hdf5') array = numpy.array([[1, 1, 1], [2, 2, 2]]) expected = ([[1, 8, 5], [3, 5, 3], [6, 0, 4], [7, 4, 2], [4, 2, 3], [1, 1, 1], [2, 2, 2]]) expected2 = [[1, 1, 1], [2, 2, 2], [1, 8, 5], [3, 5, 3], [6, 0, 4], [7, 4, 2], [4, 2, 3], [1, 1, 1], [2, 2, 2], [1, 8, 5], [3, 5, 3], [6, 0, 4], [7, 4, 2], [4, 2, 3], [1, 1, 1], [2, 2, 2]] with NamedTemporaryFile(suffix='.h5') as fhand_out: shutil.copy(in_fpath, fhand_out.name) hdf5 = VariationsH5(fhand_out.name, mode='r+') dset = hdf5['/calls/DP'] orig_array = dset.value append_matrix(dset, array) assert numpy.all(dset.value == expected) append_matrix(dset, dset) array2 = numpy.array([[1, 1, 1], [2, 2, 2]]) append_matrix(array2, dset.value) assert numpy.all(expected2 == array2) append_matrix(orig_array, array) assert numpy.all(orig_array == expected)