Пример #1
0
def test_load_zero_one_based_bg2():
    kwargs = dict(
        format="bg2",
        metadata=None,
        assembly="toy",
        chunksize=10,
        field=(),
        count_as_float=False,
        comment_char="#",
        input_copy_status="unique",
        no_symmetric_upper=False,
        storage_options=None,
    )
    # 1-based-start BG2 input
    ref = "toy.symm.upper.1.ob.bg2"
    bins_path = op.join(testdir, "data", "toy.chrom.sizes") + ":1"
    pixels_path = op.join(testdir, "data", ref)
    load.callback(bins_path,
                  pixels_path,
                  testcool_path,
                  one_based=True,
                  **kwargs)
    # reference, 1-based starts
    ref_df = pd.read_csv(
        pixels_path,
        sep="\t",
        names=[
            "chrom1", "start1", "end1", "chrom2", "start2", "end2", "count"
        ],
    )
    # output
    out_df = cooler.Cooler(testcool_path).pixels(join=True)[:]
    out_df["start1"] += 1
    out_df["start2"] += 1
    assert np.all(out_df == ref_df)

    # 0-based-start BG2 input
    ref = "toy.symm.upper.1.zb.bg2"
    bins_path = op.join(testdir, "data", "toy.chrom.sizes") + ":1"
    pixels_path = op.join(testdir, "data", ref)
    load.callback(bins_path,
                  pixels_path,
                  testcool_path,
                  one_based=False,
                  **kwargs)
    # reference, 0-based starts
    ref_df = pd.read_csv(
        pixels_path,
        sep="\t",
        names=[
            "chrom1", "start1", "end1", "chrom2", "start2", "end2", "count"
        ],
    )
    # output
    out_df = cooler.Cooler(testcool_path).pixels(join=True)[:]
    assert np.all(out_df == ref_df)
Пример #2
0
def test_load_zero_one_based_bg2():
    kwargs = dict(format='bg2',
                  metadata=None,
                  assembly='toy',
                  chunksize=10,
                  field=(),
                  count_as_float=False,
                  comment_char='#',
                  input_copy_status='unique',
                  no_symmetric_upper=False,
                  storage_options=None)
    # 1-based-start BG2 input
    ref = 'toy.symm.upper.1.ob.bg2'
    bins_path = op.join(testdir, 'data', 'toy.chrom.sizes') + ':1'
    pixels_path = op.join(testdir, 'data', ref)
    load.callback(bins_path,
                  pixels_path,
                  testcool_path,
                  one_based=True,
                  **kwargs)
    # reference, 1-based starts
    ref_df = pd.read_csv(pixels_path,
                         sep='\t',
                         names=[
                             'chrom1', 'start1', 'end1', 'chrom2', 'start2',
                             'end2', 'count'
                         ])
    # output
    out_df = cooler.Cooler(testcool_path).pixels(join=True)[:]
    out_df['start1'] += 1
    out_df['start2'] += 1
    assert np.all(out_df == ref_df)

    # 0-based-start BG2 input
    ref = 'toy.symm.upper.1.zb.bg2'
    bins_path = op.join(testdir, 'data', 'toy.chrom.sizes') + ':1'
    pixels_path = op.join(testdir, 'data', ref)
    load.callback(bins_path,
                  pixels_path,
                  testcool_path,
                  one_based=False,
                  **kwargs)
    # reference, 0-based starts
    ref_df = pd.read_csv(pixels_path,
                         sep='\t',
                         names=[
                             'chrom1', 'start1', 'end1', 'chrom2', 'start2',
                             'end2', 'count'
                         ])
    # output
    out_df = cooler.Cooler(testcool_path).pixels(join=True)[:]
    assert np.all(out_df == ref_df)
Пример #3
0
def test_load_zero_one_based_coo():
    kwargs = dict(
        format='coo',
        metadata=None,
        assembly='toy',
        chunksize=10,
        field=(),
        count_as_float=False,
        comment_char='#',
        symmetric_input='unique',
        no_symmetric_storage=False,
        storage_options=None
    )
    # 1-based-start COO input
    ref = 'toy.symm.upper.1.ob.coo'
    bins_path = op.join(testdir, 'data', 'toy.chrom.sizes') + ':1'
    pixels_path = op.join(testdir, 'data', ref)
    load.callback(
        bins_path,
        pixels_path,
        testcool_path,
        one_based=True,
        **kwargs
    )
    # reference, 1-based starts
    ref_df = pd.read_table(
        pixels_path,
        names=['bin1_id', 'bin2_id', 'count'])
    # output
    out_df = cooler.Cooler(testcool_path).pixels()[:]
    out_df['bin1_id'] += 1
    out_df['bin2_id'] += 1
    assert np.all(out_df == ref_df)

    # 0-based-start COO input
    ref = 'toy.symm.upper.1.zb.coo'
    bins_path = op.join(testdir, 'data', 'toy.chrom.sizes') + ':1'
    pixels_path = op.join(testdir, 'data', ref)
    load.callback(
        bins_path,
        pixels_path,
        testcool_path,
        one_based=False,
        **kwargs
    )
    # reference, 0-based starts
    ref_df = pd.read_table(
        pixels_path,
        names=['bin1_id', 'bin2_id', 'count'])
    # output
    out_df = cooler.Cooler(testcool_path).pixels()[:]
    assert np.all(out_df == ref_df)
Пример #4
0
def test_load_bg2_vs_coo():
    kwargs = dict(
        metadata=None,
        assembly='hg19',
        chunksize=int(20e6),
        field=(),
        count_as_float=False,
        one_based=False,
        comment_char='#',
        symmetric_input='unique',
        no_symmetric_storage=False,
        storage_options=None,
    )

    out_path1 = op.join(tmp, 'test1.cool')
    out_path2 = op.join(tmp, 'test2.cool')

    load.callback(
        op.join(testdir, 'data', 'hg19.bins.2000kb.bed.gz'),
        op.join(testdir, 'data', 'hg19.GM12878-MboI.matrix.2000kb.bg2.gz'),
        out_path1,
        format='bg2',
        **kwargs
    )
    load.callback(
        op.join(testdir, 'data', 'hg19.bins.2000kb.bed.gz'),
        op.join(testdir, 'data', 'hg19.GM12878-MboI.matrix.2000kb.coo.txt'),
        out_path2,
        format='coo',
        **kwargs
    )

    with h5py.File(out_path1, 'r') as f1, \
         h5py.File(out_path2, 'r') as f2:

        for col in ['bin1_id', 'bin2_id', 'count']:
            assert np.all(f1['pixels'][col][:] == f2['pixels'][col][:])

    for fp in [out_path1, out_path2]:
        try:
            os.remove(fp)
        except OSError:
            pass
Пример #5
0
def test_load_bg2_vs_coo():
    kwargs = dict(
        metadata=None,
        assembly="hg19",
        chunksize=int(20e6),
        field=(),
        count_as_float=False,
        one_based=False,
        comment_char="#",
        input_copy_status="unique",
        no_symmetric_upper=False,
        storage_options=None,
    )

    out_path1 = op.join(tmp, "test1.cool")
    out_path2 = op.join(tmp, "test2.cool")

    load.callback(op.join(testdir, "data", "hg19.bins.2000kb.bed.gz"),
                  op.join(testdir, "data",
                          "hg19.GM12878-MboI.matrix.2000kb.bg2.gz"),
                  out_path1,
                  format="bg2",
                  **kwargs)
    load.callback(op.join(testdir, "data", "hg19.bins.2000kb.bed.gz"),
                  op.join(testdir, "data",
                          "hg19.GM12878-MboI.matrix.2000kb.coo.txt"),
                  out_path2,
                  format="coo",
                  **kwargs)

    with h5py.File(out_path1, "r") as f1, h5py.File(out_path2, "r") as f2:

        for col in ["bin1_id", "bin2_id", "count"]:
            assert np.all(f1["pixels"][col][:] == f2["pixels"][col][:])

    for fp in [out_path1, out_path2]:
        try:
            os.remove(fp)
        except OSError:
            pass