Exemplo n.º 1
0
def gatk_test_lanes():
    genome_human = mbf_genomes.EnsemblGenome("Homo_sapiens", 96)
    input_samples = [
        [
            mbf_align.AlignedSample(
                "Test1GATK",
                "/project/code/mvariants/data/base_raw_test_hg36_Subread_gatk_rg.bam",
                genome_human,
                is_paired=False,
                vid=None,
            )
        ],
        [
            mbf_align.AlignedSample(
                "Test2GATK",
                "data/base_raw_test_hg3612_Subread_gatk_rg.bam",
                genome_human,
                is_paired=False,
                vid=None,
            )
        ],
    ]
    return input_samples
work_dir = Path("_benchmark_read_counting")
work_dir.mkdir(exist_ok=True)
os.chdir(work_dir)

bam_name = (Path("results") / "aligned" / "STAR_2.6.1d" /
            "Drosophila_melanogaster_94" / "ERR2984187" / "ERR2984187.bam")

if not bam_name.exists():
    # leverage pipeline to get some sample data

    import mbf_align
    import mbf_externals

    ppg.new_pipegraph()

    genome = mbf_genomes.EnsemblGenome("Drosophila_melanogaster", 94)
    aligner = mbf_externals.aligners.STAR()

    # just some random drospohila lane.
    samples = {"ERR2984187": "ERR2984187"}
    raw = {
        name: mbf_align.Sample(
            name,
            mbf_align.strategies.FASTQsFromAccession(err),
            reverse_reads=False,
            pairing="only_first",
        )
        for name, err in samples.items()
    }

    aligned = {
Exemplo n.º 3
0
def gatk_preprocessor(tmpdir):
    genome_human = mbf_genomes.EnsemblGenome("Homo_sapiens", 96)
    preprocessor = GATKPreprocess(genome=genome_human,
                                  name="TestGATK_Preprocessor",
                                  cache_dir=tmpdir)
    return preprocessor
Exemplo n.º 4
0
def test_mutect_init():
    with pytest.raises(ValueError):
        Mutect2("this has no genome")
    genome_human = mbf_genomes.EnsemblGenome("Homo_sapiens", 96)
    Mutect2(genome=genome_human)
Exemplo n.º 5
0
import mbf_genomes
import subprocess


genome = mbf_genomes.EnsemblGenome('Homo_sapiens', 94)
allowed = set(['22'])
genes = genome.df_genes[genome.df_genes['chr'].isin(allowed)]
transcripts = genome.df_transcripts[genome.df_transcripts['chr'].isin(allowed)]
genes.to_msgpack("hs_22_genes.msgpack")
transcripts.to_msgpack("hs_22_transcripts.msgpack")

subprocess.check_call(['gzip', 'hs_22_genes.msgpack'])
subprocess.check_call(['gzip', 'hs_22_transcripts.msgpack'])