示例#1
0
def test_strandness():
    b = BAM(sequana_data("test_hg38_chr18.bam"))
    res = b.infer_strandness(sequana_data("hg38_chr18.bed"), 200000)
    assert res[0] == 'Paired-end'
    assert res[1] > 0.94
    assert res[2] < 0.06
    assert res[3] < 0.0011
示例#2
0
def test_alignment():
    s = BAM(datatest)
    # no need to call reset but does not harm and reminds us that it shoudl be
    # used in general to make sure we start at the beginning of the iterator.
    s.reset()
    a = Alignment(next(s))
    a.as_dict()
示例#3
0
def test_alignment():
    datatest = sequana_data("test.bam", "testing")
    s = BAM(datatest)
    # no need to call reset but does not harm and reminds us that it shoudl be
    # used in general to make sure we start at the beginning of the iterator.
    s.reset()
    a = Alignment(next(s))
    a.as_dict()
示例#4
0
def test_cs_in_bam():
    b = BAM(sequana_data("test_CS_tiny.bam"))
    assert  b.summary == {
        'flags': {0: 2, 16: 2},
         'mapq': {60: 4},
         'mean_quality': 0.0,
         'read_length': {1772: 1, 10779: 1, 13726: 1, 20480: 1}}
    df = b.get_df_concordance()
    import math
    assert math.floor(df.sum().sum()) == 103769  # exact is 103769.5600734975
示例#5
0
def test_cs_in_bam():
    b = BAM(sequana_data("test_CS_tiny.bam"))
    assert b.summary == {
        'flags': {
            0: 2,
            16: 2
        },
        'mapq': {
            60: 4
        },
        'mean_quality': 0.0,
        'read_length': {
            1772: 1,
            10779: 1,
            13726: 1,
            20480: 1
        }
    }
    df = b.get_df_concordance()
    import math
    assert math.floor(df.sum().sum()) == 103813  # exact is 103769.5600734975
示例#6
0
def test_bam(tmpdir):

    s = BAM(datatest)
    assert len(s) == 1000
    assert s.is_sorted is True

    assert len(list(s.iter_unmapped_reads())) == 2
    s.reset()
    assert len(list(s.iter_mapped_reads())) == 998
    s.reset()

    # call this here before other computations on purpose
    with TempFile(suffix=".json") as fh:
        s.bam_analysis_to_json(fh.name)

    assert s.get_read_names()
    s.get_mapped_read_length()

    s.get_stats()
    s.get_full_stats_as_df()

    with TempFile(suffix='.png') as fh:
        s.plot_bar_flags(filename=fh.name, logy=True)
        s.plot_bar_flags(filename=fh.name)

    with TempFile(suffix='.png') as fh:
        s.plot_bar_mapq(filename=fh.name)

    with TempFile() as fh:
        s.to_fastq(fh.name)
        from sequana import FastQ
        ff = FastQ(fh.name)
        len(ff) == len(s)

    s.get_gc_content()
    s.get_length_count()
    s.plot_gc_content()
    try:
        s.plot_gc_content(bins=[1, 2, 10])
        assert False
    except:
        assert True
示例#7
0
def test_bam_others():
    b = BAM(sequana_data("measles.fa.sorted.bam"))
    assert len(b) == 2998

    # plot_reaqd_length and data
    X, Y = b._get_read_length()
    assert sum(Y) == 2623
    b.plot_read_length()
    b.plot_acgt_content()
    b.hist_coverage()
    b.plot_coverage()
    b.boxplot_qualities()
    b.plot_indel_dist()
示例#8
0
def test_bam_others():
    b = BAM(sequana_data("measles.fa.sorted.bam"))
    assert len(b) == 2998

    # plot_reaqd_length and data
    X, Y = b._get_read_length()
    assert sum(Y) == 2623
    b.plot_read_length()
    b.hist_coverage()
    b.plot_coverage()
    b.boxplot_qualities()
    b.plot_indel_dist()
示例#9
0
def test_bam(tmpdir):
    datatest = sequana_data("test.bam", "testing")
    s = BAM(datatest)
    assert len(s) == 1000
    assert s.is_sorted is True
    df = s.get_df_concordance()
    assert s.is_paired is True
    assert int(df.length.sum()) == 67938
    assert int(df.M.sum()) == 67788

    # call this here before other computations on purpose
    with TempFile(suffix=".json") as fh:
        s.bam_analysis_to_json(fh.name)

    assert s.get_read_names()
    s.get_mapped_read_length()

    s.get_stats()
    s.get_full_stats_as_df()

    with TempFile() as fh:
        s.to_fastq(fh.name)
        from sequana import FastQ
        ff = FastQ(fh.name)
        len(ff) == len(s)

    # plotting
    with TempFile(suffix='.png') as fh:
        s.plot_bar_flags(filename=fh.name, logy=True)
        s.plot_bar_flags(filename=fh.name)

    with TempFile(suffix='.png') as fh:
        s.plot_bar_mapq(filename=fh.name)

    s.get_gc_content()
    s.get_length_count()
    s.plot_gc_content()
    s.boxplot_qualities()
    s.boxplot_qualities(max_sample=50)
    try:
        s.plot_gc_content(bins=[1,2,10])
        assert False
    except:
        assert True
示例#10
0
def test_bam(tmpdir):
    datatest = sequana_data("test.bam", "testing")
    s = BAM(datatest)
    assert len(s) == 1000
    assert s.is_sorted is True
    df = s.get_df_concordance()
    assert s.is_paired is True
    assert int(df.length.sum()) == 67938
    assert int(df.M.sum()) == 67788

    df = s.get_df()

    # call this here before other computations on purpose
    with TempFile(suffix=".json") as fh:
        s.bam_analysis_to_json(fh.name)

    assert s.get_read_names()
    s.get_mapped_read_length()

    s.get_stats()
    s.get_stats_full()
    s.get_samtools_stats_as_df()

    with TempFile() as fh:
        s.to_fastq(fh.name)
        from sequana import FastQ
        ff = FastQ(fh.name)
        len(ff) == len(s)

    # plotting
    with TempFile(suffix='.png') as fh:
        s.plot_bar_flags(filename=fh.name, logy=True)
        s.plot_bar_flags(filename=fh.name)

    with TempFile(suffix='.png') as fh:
        s.plot_bar_mapq(filename=fh.name)

    s.get_gc_content()
    s.get_length_count()
    s.plot_gc_content()
    s.boxplot_qualities()
    s.boxplot_qualities(max_sample=50)
    try:
        s.plot_gc_content(bins=[1, 2, 10])
        assert False
    except:
        assert True
示例#11
0
def test_mRNA_inner_distance():
    b = BAM(sequana_data("test_hg38_chr18.bam"))
    df = b.mRNA_inner_distance(sequana_data("hg38_chr18.bed"))
    # Total read pairs  used 382
    # mean insert size: 88.3975155279503
    assert df[0]['val'].mean() > 1436 and df[0]['val'].mean() < 1437
示例#12
0
def test_insert_size():

    d1 = sequana_data("test_measles.sam", "testing")
    #d2 = sequana_data("test_measles.cram", "testing")
    d3 = sequana_data("test.bam", "testing")
    d4 = sequana_data("test_CS_tiny.bam")

    b1 = BAM(d1)
    # test max_entries
    assert len(b1._get_insert_size_data(10)) == 7
    b1.get_estimate_insert_size(100)
    b1.get_estimate_insert_size()
    b1.plot_insert_size()

    #b2  = BAM(d2)
    #b2.get_estimate_insert_size()

    b3 = BAM(d3)
    b3.get_estimate_insert_size()

    b4 = BAM(d4)
    assert b4.get_estimate_insert_size() == 0