Exemplo n.º 1
0
def test_hist_metrics(insert_metrics):
    module, command, version, end, pdir = insert_metrics
    fn = pdir.join("medium.insert_size_metrics")
    metrics = odo(str(fn), DataFrame)
    hist = odo(str(fn), DataFrame, key="hist")
    assert all(metrics["MEDIAN_INSERT_SIZE"] == [367])
    assert all(hist["insert_size"][0:3] == [19, 22, 23])
Exemplo n.º 2
0
def test_vsearch_fastq_stats(data):
    module, command, version, end, pdir = data
    fn = pdir.join("medium.fastq_stats.txt")
    df = odo(str(fn), DataFrame)
    assert list(df.columns) == ["N", "Pct", "AccPct"]
    assert df.index.name == "L"
    df = odo(str(fn), DataFrame, key="Truncate at first Q")
    assert list(df.columns) == ["Q=5", "Q=10", "Q=15", "Q=20"]
    assert df.index.name == "Len"
Exemplo n.º 3
0
def test_qualimap(data):
    module, command, version, end, pdir = data
    if command.startswith("qualimap_bamqc_genome_results"):
        fn = pdir.listdir()[0]
        df = odo(str(fn), DataFrame, key='Coverage_per_contig')
        assert list(df.columns) == ['chrlen', 'mapped_bases',
                                    'mean_coverage', 'sd']
        assert list(df.index)[0] == 'scaffold1'
    else:
        fn = pdir.listdir()[0]
        df = odo(str(fn), DataFrame)
        assert "#" not in df.columns[0]
Exemplo n.º 4
0
def test_QUAL(bcftools_stats):
    module, command, version, end, pdir = bcftools_stats
    fn = str(pdir.join("medium.call.stats"))
    df = odo(fn, DataFrame, key="QUAL")
    assert "number_of_transitions_(1st_ALT)" in list(df.columns)
    nsnps = 83 if end == "pe" else 90
    assert (df.loc[3]["number_of_SNPs"] == nsnps)
Exemplo n.º 5
0
def test_basic_statistics(bcftools_stats):
    module, command, version, end, pdir = bcftools_stats
    fn = str(pdir.join("medium.call.stats"))
    df = odo(fn, DataFrame)
    assert (list(df.index)[0] == 'number of samples')
    n = 10667 if end == "pe" else 7400
    assert (df.loc["number of records", "value"] == n)
Exemplo n.º 6
0
def test_cutadapt(cutadapt_metrics):
    module, command, version, end, pdir = cutadapt_metrics
    fn = str(pdir.join("cutadapt_metrics.txt"))
    df = odo(fn, DataFrame)
    if end == "se":
        assert df.loc["Reads with adapters"]["value"] == 792
    elif end == "pe":
        assert df.loc["Read 1 with adapter"]["value"] == 792
Exemplo n.º 7
0
def test_metrics(align_metrics):
    module, command, version, end, pdir = align_metrics
    fn = pdir.join("medium.align_metrics")
    metrics = odo(str(fn), DataFrame)
    if end == "pe":
        assert metrics.loc["FIRST_OF_PAIR"]["MEAN_READ_LENGTH"] - 92.29 < 0.01
    else:
        assert metrics.loc["UNPAIRED"]["MEAN_READ_LENGTH"] - 92.29975 < 0.001
Exemplo n.º 8
0
def test_bamtools_pivot(bamtools_data):
    module, command, version, end, pdir = bamtools_data
    df = odo(str(pdir.listdir()[0]),
             DataFrame,
             values=["value", "percent"],
             columns="statistic",
             index="sample",
             regex=".*/(?P<sample>medium.*)")
    n = 59499 if end == "se" else 119413
    assert df["value", "Mapped reads"].loc["medium.stats"] == n
Exemplo n.º 9
0
def test_per_base_sequence_quality(fastqc_data):
    module, command, version, end, pdir = fastqc_data
    fn = str(pdir.join("medium_fastqc.zip"))
    df = odo(fn, DataFrame, key="Per_base_sequence_quality")
    major, minor, patch = version.split(".")
    if int(minor) <= 10:
        assert df.shape[0] == 28
    else:
        assert df.shape[0] == 55
    assert df.shape[1] == 6
Exemplo n.º 10
0
def test_rseqc_parse(data):
    module, command, version, end, pdir = data
    fn = pdir.listdir()[0]
    if command == "rseqc_read_duplication":
        odo(str(fn), DataFrame)
        fn = pdir.listdir()[1]
        odo(str(fn), DataFrame)
    else:
        odo(str(fn), DataFrame)
Exemplo n.º 11
0
def test_basic_statistics(fastqc_data):
    module, command, version, end, pdir = fastqc_data
    fn = str(pdir.join("medium_fastqc.zip"))
    df = odo(fn, DataFrame)
    major, minor, patch = version.split(".")
    if int(minor) >= 11:
        assert(list(df.index) == ['Filename', 'File type', 'Encoding',
                                  'Total Sequences',
                                  'Sequences flagged as poor quality',
                                  'Sequence length', '%GC'])
    else:
        assert(list(df.index) == ['Filename', 'File type', 'Encoding',
                                  'Total Sequences', 'Filtered Sequences',
                                  'Sequence length', '%GC'])
    assert(df.loc["Filename", "Value"] == "medium.bam")
Exemplo n.º 12
0
def test_GCC(samtools_stats):
    _gcc_stats = {
        '1.2': {
            'se': 30.12,
            'pe': 30.21
        },
        '1.3.1': {
            'se': 30.19,
            'pe': 30.27
        },
        '1.4.1': {
            'se': 30.19,
            'pe': 30.27
        }
    }
    module, command, version, end, pdir = samtools_stats
    fn = str(pdir.join("medium.stats.txt"))
    df = odo(samtools.resource_samtools_stats(fn, key="GCC"), DataFrame)
    assert (df.loc[1]["A"] == _gcc_stats[version][end])
Exemplo n.º 13
0
def test_FFQ(samtools_stats):
    _ffq_stats = {
        '1.2': {
            'se': 27624,
            'pe': 27630
        },
        '1.3.1': {
            'se': 27598,
            'pe': 27598
        },
        '1.4.1': {
            'se': 27598,
            'pe': 27598
        }
    }
    module, command, version, end, pdir = samtools_stats
    fn = str(pdir.join("medium.stats.txt"))
    df = odo(samtools.resource_samtools_stats(fn, key="FFQ"), DataFrame)
    assert (df.loc[1][33] == _ffq_stats[version][end])
Exemplo n.º 14
0
def test_basic_statistics(samtools_stats):
    _stats = {
        '1.2': {
            'se': 60037,
            'pe': 120110
        },
        '1.3.1': {
            'se': 60000,
            'pe': 120000
        },
        '1.4.1': {
            'se': 60000,
            'pe': 120000
        }
    }
    module, command, version, end, pdir = samtools_stats
    fn = str(pdir.join("medium.stats.txt"))
    df = odo(samtools.resource_samtools_stats(fn), DataFrame)
    assert (list(df.index)[0] == 'raw total sequences')
    assert (df.loc["sequences", "value"] == _stats[version][end])
Exemplo n.º 15
0
def test_mapdamage_5pCtoT(mapdamage_data):
    module, command, version, end, pdir = mapdamage_data
    fn = pdir.join("5pCtoT_freq.txt")
    df = odo(str(fn), DataFrame)
    assert (df.index.name == "pos")
Exemplo n.º 16
0
def test_mapdamage_dnacomp_genome(mapdamage_data):
    module, command, version, end, pdir = mapdamage_data
    fn = pdir.join("dnacomp_genome.csv")
    df = odo(str(fn), DataFrame)
    assert (list(df["A"])[0] - 0.265786 < 0.0001)
Exemplo n.º 17
0
def test_bamtools(bamtools_data):
    module, command, version, end, pdir = bamtools_data
    df = odo(str(pdir.listdir()[0]), DataFrame)
    n = 59499 if end == "se" else 119413
    assert df.loc["Mapped reads", "value"] == n
Exemplo n.º 18
0
def test_mapdamage_lgdistribution(mapdamage_data):
    module, command, version, end, pdir = mapdamage_data
    fn = pdir.join("lgdistribution.txt")
    df = odo(str(fn), DataFrame)
    assert (list(df.columns) == ['Std', 'Length', 'Occurences'])
Exemplo n.º 19
0
def test_sga_filter(sga_filter_data):
    _filter_stats = {'0.10.13': {'se': 9400, 'pe': 16670}}
    module, command, version, end, pdir = sga_filter_data
    df = odo(str(pdir.listdir()[0]), DataFrame)
    assert (df.loc["Reads failed kmer check",
                   "value"] == _filter_stats[version][end])
Exemplo n.º 20
0
def test_sga_preprocess(sga_preprocess_data):
    module, command, version, end, pdir = sga_preprocess_data
    df = odo(str(pdir.listdir()[0]), DataFrame)
    n = 10000 if end == "se" else 20000
    assert df.loc["Reads parsed", "value"] == n
Exemplo n.º 21
0
def test_star_final_log(data):
    module, command, version, end, pdir = data
    fn = pdir.join("medium.Log.final.out")
    df = odo(str(fn), DataFrame)
    assert df.loc["Number of input reads", "value"] == 30483
Exemplo n.º 22
0
def test_mapdamage_misincorporation(mapdamage_data):
    module, command, version, end, pdir = mapdamage_data
    fn = pdir.join("misincorporation.txt")
    df = odo(str(fn), DataFrame)
    assert (df.shape[1] == 30)
Exemplo n.º 23
0
def test_IDD(bcftools_stats):
    module, command, version, end, pdir = bcftools_stats
    fn = str(pdir.join("medium.call.stats"))
    df = odo(fn, DataFrame, key="IDD")
    count = 123 if end == "pe" else 95
    assert (df.loc[-1]["count"] == count)
Exemplo n.º 24
0
def test_TSTV(bcftools_stats):
    module, command, version, end, pdir = bcftools_stats
    fn = str(pdir.join("medium.call.stats"))
    df = odo(fn, DataFrame, key="TSTV")
    tstv = 2.12 if end == "pe" else 2.19
    assert (df.loc[0]["ts/tv"] == tstv)
Exemplo n.º 25
0
def test_mapdamage_dnacomp(mapdamage_data):
    module, command, version, end, pdir = mapdamage_data
    fn = pdir.join("dnacomp.txt")
    df = odo(str(fn), DataFrame)
    assert (df["Chr"][0] == "scaffold1")
Exemplo n.º 26
0
def test_summary(fastqc_data):
    module, command, version, end, pdir = fastqc_data
    fn = str(pdir.join("medium_fastqc.zip"))
    df = odo(fn, DataFrame, key="Summary")
    assert(df.loc['Basic_Statistics', 'Value'] == "pass")
Exemplo n.º 27
0
def test_mapdamage_mcmc_correct_prob(mapdamage_data):
    module, command, version, end, pdir = mapdamage_data
    fn = pdir.join("Stats_out_MCMC_correct_prob.csv")
    df = odo(str(fn), DataFrame)
    assert (df.index.name == "Position")
    assert (df.shape[1] == 2)
Exemplo n.º 28
0
def test_wrong_key(fastqc_data):
    module, command, version, end, pdir = fastqc_data
    fn = str(pdir.join("medium_fastqc.zip"))
    with pytest.raises(KeyError):
        odo(fn, DataFrame, key="foo")
Exemplo n.º 29
0
def test_idxstats(samtools_idxstats):
    module, command, version, end, pdir = samtools_idxstats
    fn = str(pdir.join("medium.idxstats.txt"))
    df = odo(fn, DataFrame)
    assert (df.loc[0][0] == "scaffold1")
Exemplo n.º 30
0
def test_mapdamage_mcmc_iter_summ(mapdamage_data):
    module, command, version, end, pdir = mapdamage_data
    fn = pdir.join("Stats_out_MCMC_iter_summ_stat.csv")
    df = odo(str(fn), DataFrame)
    assert (df.shape[1] == 6)