def test_merge(path1, path2): with isolated_filesystem(): merge_coolers("test.cool", [path1, path2], mergebuf=int(15e6)) single = cooler.Cooler(path1) merged = cooler.Cooler("test.cool") assert (merged.pixels()["count"][:].sum() == 2 * single.pixels()["count"][:].sum())
def test_ln(): with isolated_filesystem() as fs: src_file = op.join(testdir, 'data', 'toy.symm.upper.2.mcool') # within-file hard link test_file = 'test.hardlink.mcool' shutil.copyfile(src_file, test_file) fileops.ln(test_file + '::resolutions/2', test_file + '::abc/d') with h5py.File(test_file) as f: assert 'resolutions/2' in f assert 'abc/d' in f assert f['resolutions/2'].id == f['abc/d'].id cooler_cmp(test_file + '::resolutions/2', test_file + '::abc/d') # within-file soft link test_file = 'test.softlink.mcool' shutil.copyfile(src_file, test_file) fileops.ln(test_file + '::resolutions/2', test_file + '::abc/d', soft=True) with h5py.File(test_file) as f: assert 'resolutions/2' in f assert 'abc/d' in f assert f['resolutions/2'].id == f['abc/d'].id cooler_cmp(test_file + '::resolutions/2', test_file + '::abc/d') # between-file external link test_file = 'test.extlink.mcool' dst_file = 'test.dst.cool' shutil.copyfile(src_file, test_file) fileops.ln(test_file + '::resolutions/2', dst_file + '::abc/d', soft=True) cooler_cmp(test_file + '::resolutions/2', dst_file + '::abc/d')
def test_merge(path1, path2): with isolated_filesystem(): merge_coolers('test.cool', [path1, path2], mergebuf=int(15e6)) single = cooler.Cooler(path1) merged = cooler.Cooler('test.cool') assert merged.pixels()['count'][:].sum( ) == 2 * single.pixels()['count'][:].sum()
def test_create_scool(fp): c = cooler.Cooler(fp) # chromsizes = c.chromsizes bins = c.bins()[:] pixels = c.pixels()[:] # random and different content to prove only chrom, start, end is linked and the rest is independent for each cell from copy import deepcopy bins_cell1 = deepcopy(bins) bins_cell2 = deepcopy(bins) bins_cell3 = deepcopy(bins) bins_cell1['weight'] = np.array([0] * len(bins_cell1["start"])) bins_cell2['weight'] = np.array([1] * len(bins_cell1["start"])) bins_cell3['weight'] = np.array([2] * len(bins_cell1["start"])) bins_cell1['KR'] = np.array([3] * len(bins_cell1["start"])) bins_cell2['KR'] = np.array([4] * len(bins_cell1["start"])) bins_cell3['KR'] = np.array([5] * len(bins_cell1["start"])) name_pixel_dict = {'cell1': pixels, 'cell2': pixels, 'cell3': pixels} name_bins_dict = {'cell1': bins_cell1, 'cell2': bins_cell2, 'cell3': bins_cell3} with isolated_filesystem(): cooler.create_scool('outfile_test.scool', name_bins_dict, name_pixel_dict) content_of_scool = cooler.fileops.list_scool_cells('outfile_test.scool') content_expected = ['/cells/cell1', '/cells/cell2', '/cells/cell3'] for content in content_expected: assert content in content_of_scool cooler.create_scool('outfile_test.scool', bins, name_pixel_dict) content_of_scool = cooler.fileops.list_scool_cells('outfile_test.scool') content_expected = ['/cells/cell1', '/cells/cell2', '/cells/cell3'] for content in content_expected: assert content in content_of_scool
def test_recursive_agg(): infile = op.join(datadir, "hg19.GM12878-MboI.matrix.2000kb.cool") chunksize = int(10e6) # n_zooms = 2 n_cpus = 1 with isolated_filesystem(): legacy_zoomify(infile, "test.multires.cool", n_cpus, chunksize)
def test_zoomify(): kwargs = dict(chunksize=10, nproc=1, columns=None, dtypes=None, agg=None) with isolated_filesystem(): zoomify_cooler(op.join(datadir, "toy.asymm.2.cool"), "test.2.mcool", resolutions=[4, 8, 16, 32], **kwargs) for res in [2, 4, 8, 16, 32]: cooler_cmp( "test.2.mcool::resolutions/{}".format(res), op.join(datadir, "toy.asymm.{}.cool".format(res)), ) # include base resolution zoomify_cooler(op.join(datadir, "toy.asymm.2.cool"), "test.2.mcool", resolutions=[2, 4, 8, 16, 32], **kwargs) for res in [2, 4, 8, 16, 32]: cooler_cmp( "test.2.mcool::resolutions/{}".format(res), op.join(datadir, "toy.asymm.{}.cool".format(res)), ) # impossible resolution to obtain with pytest.raises(ValueError): zoomify_cooler(op.join(datadir, "toy.asymm.2.cool"), "test.2.mcool", resolutions=[4, 5, 32], **kwargs)
def test_ln(): with isolated_filesystem(): src_file = op.join(testdir, "data", "toy.symm.upper.2.mcool") # within-file hard link test_file = "test.hardlink.mcool" shutil.copyfile(src_file, test_file) fileops.ln(test_file + "::resolutions/2", test_file + "::abc/d") with h5py.File(test_file) as f: assert "resolutions/2" in f assert "abc/d" in f assert f["resolutions/2"].id == f["abc/d"].id cooler_cmp(test_file + "::resolutions/2", test_file + "::abc/d") # within-file soft link test_file = "test.softlink.mcool" shutil.copyfile(src_file, test_file) fileops.ln(test_file + "::resolutions/2", test_file + "::abc/d", soft=True) with h5py.File(test_file) as f: assert "resolutions/2" in f assert "abc/d" in f assert f["resolutions/2"].id == f["abc/d"].id cooler_cmp(test_file + "::resolutions/2", test_file + "::abc/d") # between-file external link test_file = "test.extlink.mcool" dst_file = "test.dst.cool" shutil.copyfile(src_file, test_file) fileops.ln(test_file + "::resolutions/2", dst_file + "::abc/d", soft=True) cooler_cmp(test_file + "::resolutions/2", dst_file + "::abc/d")
def test_rename_chroms(): from shutil import copyfile with isolated_filesystem(): copyfile(op.join(datadir, "toy.asymm.4.cool"), "toy.asymm.4.cool") clr = cooler.Cooler("toy.asymm.4.cool") assert clr.chromnames == ["chr1", "chr2"] cooler.rename_chroms(clr, {"chr1": "1", "chr2": "2"}) assert clr.chromnames == ["1", "2"] # the Cooler object is refreshed
def test_rename_chroms(): from shutil import copyfile with isolated_filesystem() as fs: copyfile(op.join(testdir, 'data', 'toy.asymm.4.cool'), 'toy.asymm.4.cool') clr = cooler.Cooler('toy.asymm.4.cool') assert clr.chromnames == ['chr1', 'chr2'] cooler.rename_chroms(clr, {'chr1': '1', 'chr2': '2'}) assert clr.chromnames == ['1', '2'] # the Cooler object is refreshed
def test_zoomify(): kwargs = dict(chunksize=10, nproc=1, columns=None, dtypes=None, agg=None) with isolated_filesystem(): zoomify_cooler(op.join(datadir, "toy.asymm.2.cool"), "test.2.mcool", resolutions=[4, 8, 16, 32], **kwargs) for res in [2, 4, 8, 16, 32]: cooler_cmp( "test.2.mcool::resolutions/{}".format(res), op.join(datadir, "toy.asymm.{}.cool".format(res)), )
def test_mv(): with isolated_filesystem(): ref_file = "test.ref.mcool" src_file = "test.src.mcool" shutil.copyfile(op.join(testdir, "data", "toy.symm.upper.2.mcool"), ref_file) shutil.copyfile(op.join(testdir, "data", "toy.symm.upper.2.mcool"), src_file) fileops.mv(src_file + "::resolutions/2", src_file + "::abc/d") with h5py.File(src_file) as f: assert "resolutions/2" not in f assert "abc/d" in f cooler_cmp(ref_file + "::resolutions/2", src_file + "::abc/d")
def test_mv(): with isolated_filesystem(): ref_file = 'test.ref.mcool' src_file = 'test.src.mcool' shutil.copyfile(op.join(testdir, 'data', 'toy.symm.upper.2.mcool'), ref_file) shutil.copyfile(op.join(testdir, 'data', 'toy.symm.upper.2.mcool'), src_file) fileops.mv(src_file + '::resolutions/2', src_file + '::abc/d') with h5py.File(src_file) as f: assert 'resolutions/2' not in f assert 'abc/d' in f cooler_cmp(ref_file + '::resolutions/2', src_file + '::abc/d')
def test_coarsen_partitions_correctly(): kwargs = dict(nproc=1, columns=None, dtypes=None, agg=None) with isolated_filesystem(): f_ref = op.join(datadir, "odd.4.cool") f_in = op.join(datadir, "odd.1.cool") coarsen_cooler(f_in, "odd.1.coarsen_4.cool", factor=4, chunksize=2, **kwargs) pix1 = cooler.Cooler(f_ref).pixels()[:] pix2 = cooler.Cooler("odd.1.coarsen_4.cool").pixels()[:] assert len(pix1) == len(pix2) assert sum(pix2[["bin1_id", "bin2_id"]].duplicated()) == 0 assert np.allclose(pix1, pix2)
def test_coarsen(input_uri, factor, ref_uri): kwargs = dict( chunksize=10, nproc=1, columns=None, dtypes=None, agg=None ) with isolated_filesystem(): coarsen_cooler( input_uri, 'test.cool', factor, **kwargs ) cooler_cmp('test.cool', ref_uri)
def test_zoomify(): kwargs = dict( chunksize=10, nproc=1, columns=None, dtypes=None, agg=None, ) with isolated_filesystem(): zoomify_cooler(op.join(testdir, 'data', 'toy.asymm.2.cool'), 'test.2.mcool', resolutions=[4, 8, 16, 32], **kwargs) for res in [2, 4, 8, 16, 32]: cooler_cmp( 'test.2.mcool::resolutions/{}'.format(res), op.join(testdir, 'data', 'toy.asymm.{}.cool'.format(res)))
def test_cp(): with isolated_filesystem() as fs: src_file = op.join(testdir, 'data', 'toy.symm.upper.2.mcool') # file-to-file src_uri = src_file + '::resolutions/2' fileops.cp(src_uri, 'test.2.cool') cooler_cmp(src_uri, 'test.2.cool') # within-file test_file = 'test.src.mcool' shutil.copyfile(src_file, test_file) fileops.cp(test_file + '::resolutions/2', test_file + '::abc/d') cooler_cmp(test_file + '::resolutions/2', test_file + '::abc/d') with h5py.File(test_file) as f: assert 'resolutions/2' in f assert 'abc/d' in f assert f['resolutions/2'].id != f['abc/d'].id
def test_cp(): with isolated_filesystem(): src_file = op.join(testdir, "data", "toy.symm.upper.2.mcool") # file-to-file src_uri = src_file + "::resolutions/2" fileops.cp(src_uri, "test.2.cool") cooler_cmp(src_uri, "test.2.cool") # within-file test_file = "test.src.mcool" shutil.copyfile(src_file, test_file) fileops.cp(test_file + "::resolutions/2", test_file + "::abc/d") cooler_cmp(test_file + "::resolutions/2", test_file + "::abc/d") with h5py.File(test_file) as f: assert "resolutions/2" in f assert "abc/d" in f assert f["resolutions/2"].id != f["abc/d"].id
def test_create_custom_cols(): with isolated_filesystem(): df = pd.DataFrame( { "bin1_id": [0, 1, 1, 1, 2, 2, 3, 4, 5], "bin2_id": [1, 1, 3, 4, 5, 6, 7, 8, 9], "foo": [1, 1, 1, 1, 1, 2, 2, 2, 2], "bar": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], }, columns=["bin1_id", "bin2_id", "foo", "bar"], ) bins = pd.DataFrame({ "chrom": ["chr1"] * 5 + ["chr2"] * 5, "start": list(range(5)) * 2, "end": list(range(1, 6)) * 2, }) # works in unordered mode cooler.create_cooler("test.cool", bins, df, columns=["foo", "bar"]) clr = cooler.Cooler("test.cool") assert len(clr.pixels().columns) == 4 assert np.allclose( df, clr.pixels()[["bin1_id", "bin2_id", "foo", "bar"]][:]) # works in ordered mode cooler.create_cooler("test.cool", bins, df, columns=["foo", "bar"], ordered=True) clr = cooler.Cooler("test.cool") assert len(clr.pixels().columns) == 4 assert np.allclose( df, clr.pixels()[["bin1_id", "bin2_id", "foo", "bar"]][:]) # raises if no custom columns specified and 'count' does not exist with pytest.raises(ValueError): cooler.create_cooler("test.cool", bins, df, columns=None, ordered=True)
def test_coarsen(input_uri, factor, ref_uri): with isolated_filesystem(): kwargs = dict(chunksize=10, nproc=1, columns=None, dtypes=None, agg=None) coarsen_cooler(input_uri, "test.cool", factor, **kwargs) cooler_cmp("test.cool", ref_uri) # custom dtype kwargs = dict(chunksize=10, nproc=1, columns=None, dtypes={'count': np.float64}) coarsen_cooler(input_uri, "test.cool", factor, **kwargs) with h5py.File('test.cool', 'r') as f: assert f['pixels/count'].dtype.kind == 'f' # custom aggregator kwargs = dict(chunksize=10, nproc=1, columns=None, dtypes=None, agg={'count': 'mean'}) coarsen_cooler(input_uri, "test.cool", factor, **kwargs) # parallel kwargs = dict(chunksize=10, nproc=2, columns=None, dtypes=None, agg=None) coarsen_cooler(input_uri, "test.cool", factor, **kwargs) # raise on missing value column kwargs = dict(chunksize=10, nproc=2, columns=['missing'], dtypes=None, agg=None) with pytest.raises(ValueError): coarsen_cooler(input_uri, "test.cool", factor, **kwargs)
def test_ls_data_tree(): with isolated_filesystem(): src_file = op.join(testdir, "data", "toy.symm.upper.2.mcool") listing = fileops.ls(src_file + '::' + 'resolutions/2') for path in [ "/resolutions/2", "/resolutions/2/chroms", "/resolutions/2/chroms/name", "/resolutions/2/chroms/length", "/resolutions/2/bins", "/resolutions/2/bins/chrom", "/resolutions/2/bins/start", "/resolutions/2/bins/end", "/resolutions/2/pixels", "/resolutions/2/pixels/bin1_id", "/resolutions/2/pixels/bin2_id", "/resolutions/2/pixels/count", ]: assert path in listing
def test_create_custom_cols(): with isolated_filesystem() as fs: df = pd.DataFrame( { 'bin1_id': [0, 1, 1, 1, 2, 2, 3, 4, 5], 'bin2_id': [1, 1, 3, 4, 5, 6, 7, 8, 9], 'foo': [1, 1, 1, 1, 1, 2, 2, 2, 2], 'bar': [.1, .2, .3, .4, .5, .6, .7, .8, .9], }, columns=['bin1_id', 'bin2_id', 'foo', 'bar']) bins = pd.DataFrame({ 'chrom': ['chr1'] * 5 + ['chr2'] * 5, 'start': list(range(5)) * 2, 'end': list(range(1, 6)) * 2, }) # works in unordered mode cooler.create_cooler('test.cool', bins, df, columns=['foo', 'bar']) clr = cooler.Cooler('test.cool') assert len(clr.pixels().columns) == 4 assert np.allclose( df, clr.pixels()[['bin1_id', 'bin2_id', 'foo', 'bar']][:]) # works in ordered mode cooler.create_cooler('test.cool', bins, df, columns=['foo', 'bar'], ordered=True) clr = cooler.Cooler('test.cool') assert len(clr.pixels().columns) == 4 assert np.allclose( df, clr.pixels()[['bin1_id', 'bin2_id', 'foo', 'bar']][:]) # raises if no custom columns specified and 'count' does not exist with pytest.raises(ValueError): cooler.create_cooler('test.cool', bins, df, columns=None, ordered=True)
def test_merge2(): with isolated_filesystem(): path1 = op.join(datadir, "toy.symm.upper.2.cool") path2 = op.join(datadir, "toy.symm.upper.2.cool") merge_coolers("test.cool", [path1, path2], mergebuf=3, agg={'count': 'mean'}) single = cooler.Cooler(path1) merged = cooler.Cooler("test.cool") assert (merged.pixels()["count"][:].sum() == single.pixels()["count"] [:].sum()) # different resolution path1 = op.join(datadir, "toy.symm.upper.2.cool") path2 = op.join(datadir, "toy.symm.upper.4.cool") with pytest.raises(ValueError): merge_coolers("test.cool", [path1, path2], mergebuf=3) # incompatible bins path1 = op.join(datadir, "toy.symm.upper.var.cool") path2 = op.join(datadir, "toy.symm.upper.2.cool") with pytest.raises(ValueError): merge_coolers("test.cool", [path1, path2], mergebuf=3) path2 = op.join(datadir, "toy.symm.upper.var.cool") path1 = op.join(datadir, "toy.symm.upper.2.cool") with pytest.raises(ValueError): merge_coolers("test.cool", [path1, path2], mergebuf=3) # incompatible symmetry path1 = op.join(datadir, "toy.symm.upper.2.cool") path2 = op.join(datadir, "toy.asymm.2.cool") with pytest.raises(ValueError): merge_coolers("test.cool", [path1, path2], mergebuf=3) # missing value column path1 = op.join(datadir, "toy.symm.upper.2.cool") path2 = op.join(datadir, "toy.symm.upper.2.cool") with pytest.raises(ValueError): merge_coolers("test.cool", [path1, path2], mergebuf=3, columns=["missing"])
def test_create_cooler_from_dask(): dd = pytest.importorskip("dask.dataframe") chromsizes = cooler.util.read_chromsizes( op.join(datadir, "toy.chrom.sizes") ) bins = cooler.util.binnify(chromsizes, 1) pixels = pd.read_csv( op.join(datadir, "toy.symm.upper.1.zb.coo"), sep='\t', names=['bin1_id', 'bin2_id', 'count'] ) pixels = dd.from_pandas(pixels, npartitions=10) with isolated_filesystem(): cooler.create.create_cooler( "test.cool", bins, pixels, ordered=True )
def test_create_cooler(): chromsizes = cooler.util.read_chromsizes( op.join(datadir, "toy.chrom.sizes") ) bins = cooler.util.binnify(chromsizes, 1) pixels = pd.read_csv( op.join(datadir, "toy.symm.upper.1.zb.coo"), sep='\t', names=['bin1_id', 'bin2_id', 'count'] ) pixels['foo'] = 42.0 with isolated_filesystem(): cooler.create.create_cooler( "test.cool", bins, pixels, assembly='toy', metadata={'hello': 'world', 'list': [1, 2, 3]}, ) cooler.create.create_cooler( "test.cool::foo/bar", bins, pixels, ) cooler.create.create_cooler( "test.cool", bins, pixels, symmetric_upper=False ) cooler.create.create_cooler( "test.cool", bins, pixels, columns=['count', 'foo'], dtypes={'foo': np.float64} ) cooler.create.create_cooler( "test.cool", bins, pixels.to_dict(orient='series'), ) cooler.create.create_cooler( "test.cool", bins, (pixels,), ) cooler.create.create_cooler( "test.cool", bins, (pixels.to_dict(orient='series'),), ) two_piece = ( pixels.iloc[:len(pixels) // 2], pixels.iloc[len(pixels) // 2:] ) cooler.create.create_cooler( "test.cool", bins, two_piece, ordered=True ) cooler.create.create_cooler( "test.cool", bins, two_piece[::-1], ordered=False ) many_piece = tuple( pixels.iloc[lo:hi] for lo, hi in cooler.util.partition(0, len(pixels), 5) )[::-1] cooler.create.create_cooler( "test.cool", bins, many_piece, ordered=False, max_merge=10 ) with pytest.raises(ValueError): cooler.create.create_cooler( "test.cool", bins, pixels, columns=['count', 'missing'], ) with pytest.raises(ValueError): cooler.create.create_cooler( "test.cool", bins[['start', 'end']], pixels, columns=['count', 'missing'], ) with pytest.raises(ValueError): cooler.create.create_cooler( "test.cool", bins[['start', 'end']], pixels, h5opts={'shuffuffle': 'boing'} )