def test_binnify():
    chromsizes = bioframe.read_chromsizes(testdir +
                                          "/test_data/test.chrom.sizes",
                                          filter_chroms=False)
    assert len(chromsizes) == 2
    assert len(bioframe.binnify(chromsizes, int(np.max(
        chromsizes.values)))) == len(chromsizes)
    assert len(bioframe.binnify(chromsizes, int(np.min(
        chromsizes.values)))) == (len(chromsizes) + 1)
    assert len(bioframe.binnify(chromsizes, 1)) == np.sum(chromsizes.values)
示例#2
0
def binnify(chromsizes_path, binsize, all_names):
    import bioframe

    chromsizes = bioframe.read_chromsizes(chromsizes_path,
                                          filter_chroms=not (all_names))
    bins = bioframe.binnify(chromsizes, binsize)
    print(bins.to_csv(sep="\t", index=False))
示例#3
0
def gene_content(genome, binsize, gc=True):

    chrom_sizes = bioframe.fetch_chromsizes(genome)
    chrom_table = binnify(chrom_sizes, binsize)

    gene_count = frac_gene_coverage(chrom_table, genome)
    if gc:
        fasta_path = f'/net/levsha/share/lab/genomes/{genome}/{genome}.fa'
        fasta_records = load_fasta(fasta_path)
        gene_count['frac_gc'] = frac_gc(chrom_table, fasta_records)

    return gene_count
def test_frac_mapped():
    pytest.importorskip("pysam")
    chromsizes = bioframe.read_chromsizes(testdir +
                                          "/test_data/test.chrom.sizes",
                                          filter_chroms=False)
    fasta_records = bioframe.load_fasta(testdir + "/test_data/test.fa")

    unmapped = np.array(
        [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0])
    assert (unmapped == bioframe.frac_mapped(bioframe.binnify(chromsizes, 1),
                                             fasta_records,
                                             return_input=False).values).all()

    unmapped = np.array([0.8, 0.8, 0])
    assert (unmapped == bioframe.frac_mapped(bioframe.binnify(chromsizes, 5),
                                             fasta_records,
                                             return_input=False).values).all()

    unmapped = np.array([0.8, 4 / 7])
    assert (unmapped == bioframe.frac_mapped(bioframe.binnify(chromsizes, 7),
                                             fasta_records,
                                             return_input=False).values).all()
def test_frac_gc():
    pytest.importorskip("pysam")
    chromsizes = bioframe.read_chromsizes(testdir +
                                          "/test_data/test.chrom.sizes",
                                          filter_chroms=False)
    fasta_records = bioframe.load_fasta(testdir + "/test_data/test.fa")

    unmapped_bp = (0 == bioframe.frac_mapped(bioframe.binnify(chromsizes, 1),
                                             fasta_records,
                                             return_input=False).values)
    assert np.isnan(
        bioframe.frac_gc(
            bioframe.binnify(chromsizes, 1),
            fasta_records,
            return_input=False,
            mapped_only=True,
        ).values[unmapped_bp]).all()

    ## mapped_only=True should ignore N or return np.nan if interval only contains N
    np.testing.assert_equal(
        np.array([0.5, 0.5, np.nan]),
        bioframe.frac_gc(
            bioframe.binnify(chromsizes, 5),
            fasta_records,
            return_input=False,
            mapped_only=True,
        ).values,
    )

    assert (np.array([0.5, 0.5]) == bioframe.frac_gc(
        bioframe.binnify(chromsizes, 7),
        fasta_records,
        return_input=False,
        mapped_only=True,
    ).values).all()

    ## mapped_only=False should count N as zero
    assert (np.array([0.4, 0.4, 0]) == bioframe.frac_gc(
        bioframe.binnify(chromsizes, 5),
        fasta_records,
        return_input=False,
        mapped_only=False,
    ).values).all()

    assert (np.array([0.4, 2 / 7]) == bioframe.frac_gc(
        bioframe.binnify(chromsizes, 7),
        fasta_records,
        return_input=False,
        mapped_only=False,
    ).values).all()