Exemplo n.º 1
0
def test_unpaired_unsorted():
    ref = os.path.join(data_dir, "ref.fa")
    reads = os.path.join(data_dir, "reads.fq")
    bam = "tmp.minimap_unpaired_unsorted.bam"
    subprocess.check_output(f"rm -f {bam}", shell=True)
    minimap.run(bam, ref, reads, sort=False)
    assert os.path.exists(bam)
    assert not bam_is_sorted_and_indexed(bam)
    os.unlink(bam)
Exemplo n.º 2
0
def test_paired_sorted():
    ref = os.path.join(data_dir, "ref.fa")
    reads1 = os.path.join(data_dir, "reads_1.fq")
    reads2 = os.path.join(data_dir, "reads_2.fq")
    bam = "tmp.minimap_paired_sorted.bam"
    subprocess.check_output(f"rm -f {bam}", shell=True)
    minimap.run(bam, ref, reads1, fq2=reads2, sort=True)
    assert os.path.exists(bam)
    assert bam_is_sorted_and_indexed(bam)
    os.unlink(bam)
    os.unlink(f"{bam}.bai")
Exemplo n.º 3
0
 def map_reads_to_viridian_consensus(self):
     logging.info("Mapping reads to consensus from Viridian")
     if self.paired:
         fq1 = self.sampler.fq_out1
         fq2 = self.sampler.fq_out2
     else:
         fq1 = self.sampler.fq_out
         fq2 = None
     minimap.run(
         self.bam_reads_v_viridian,
         self.viridian_fasta,
         fq1,
         fq2=fq2,
         sample_name=self.sample_name,
         sort=True,
     )
     self.update_json_latest_stage("Map reads to Viridian consensus")
def run(options):
    fq1, fq2 = utils.check_tech_and_reads_opts_and_get_reads(options)

    logging.info("Gathering amplicons scheme files")
    if options.built_in_amp_schemes is None and options.amp_schemes_tsv is None:
        logging.info("No primer schemes provided. Using all built in schemes")
        options.built_in_amp_schemes = list(
            amplicon_schemes.get_built_in_schemes().keys())
    (
        amplicon_scheme_name_to_tsv,
        amplicon_scheme_list,
    ) = amplicon_schemes.load_list_of_amplicon_sets(
        built_in_names_to_use=options.built_in_amp_schemes,
        tsv_others_to_use=options.amp_schemes_tsv,
    )

    temp_sam = f"{options.outprefix}.tmp.sam"
    logging.info("Mapping reads to reference")
    minimap.run(
        temp_sam,
        options.ref_fasta,
        fq1,
        fq2=fq2,
        sample_name=options.sample_name,
        sort=False,
    )

    logging.info("Detecting primers for each read")
    bam_out = f"{options.outprefix}.bam" if options.make_bam else None
    results = detect_primers.gather_stats_from_bam(temp_sam, bam_out,
                                                   amplicon_scheme_list)
    results[
        "amplicon_scheme_set_matches"] = detect_primers.amplicon_set_counts_to_json_friendly(
            results["amplicon_scheme_set_matches"])

    json_out = f"{options.outprefix}.json"
    logging.info(f"Tidying files and writing final JSON {json_out}")
    if not options.debug:
        os.unlink(temp_sam)
    utils.write_json(json_out, results)
Exemplo n.º 5
0
    def initial_read_map_and_detect_amplicon_scheme(self):
        logging.info("Mapping reads to reference")
        unsorted_bam = os.path.join(self.processing_dir,
                                    "map_reads.unsorted.bam")
        minimap.run(
            unsorted_bam,
            self.ref_genome,
            self.fq1,
            fq2=self.fq2,
            sample_name=self.sample_name,
            sort=False,
        )
        self.update_json_latest_stage("Initial map reads")
        logging.info("Detecting amplicon scheme and gathering read statistics")

        primer_stats = detect_primers.gather_stats_from_bam(
            unsorted_bam, self.unsorted_read_tagged_bam,
            self.amplicon_scheme_list)

        self.log_dict["read_and_primer_stats"] = primer_stats

        self.log_dict["read_and_primer_stats"][
            "amplicon_scheme_set_matches"] = detect_primers.amplicon_set_counts_to_json_friendly(
                self.log_dict["read_and_primer_stats"]
                ["amplicon_scheme_set_matches"])
        if self.force_amp_scheme is None:
            self.log_dict["amplicon_scheme_name"] = self.log_dict[
                "read_and_primer_stats"]["chosen_amplicon_scheme"]
        else:
            self.log_dict["chosen_amplicon_scheme"] = self.force_amp_scheme

        chosen_scheme = primer_stats["chosen_amplicon_scheme"]
        self.amplicon_tsv = self.amplicon_scheme_name_to_tsv[chosen_scheme]

        self.amplicon_set = primers.AmpliconSet.from_tsv(self.amplicon_tsv,
                                                         name=chosen_scheme)
        self.update_json_latest_stage(
            "Gather read stats and detect primer scheme")
def test_gather_stats_from_bam():
    # Make a toy genome, two amplicon schemes, and a few reads. Map to make
    # the unsorted BAM, then we can test gather_stats_from_bam().
    random.seed(42)
    ref_seq = "".join(random.choices(["A", "C", "G", "T"], k=1100))
    ref_fasta = "tmp.gather_stats_from_bam.ref.fa"
    with open(ref_fasta, "w") as f:
        print(">ref", file=f)
        print(ref_seq, file=f)

    tsv_files = {
        "scheme1": os.path.join(data_dir,
                                "gather_stats_from_bam.amplicons_1.tsv"),
        "scheme2": os.path.join(data_dir,
                                "gather_stats_from_bam.amplicons_2.tsv"),
    }
    # scheme1:
    # 100-300, 290-800, 790-1000
    #
    # scheme2:
    # 100-300, 290-500, 490-700, 790-1001
    unpaired_read_coords = [(100, 290), (110, 290), (300, 800), (750, 900)]
    unpaired_reads_fa = "tmp.gather_stats_from_bam.reads.fa"
    _write_sim_reads(ref_seq, unpaired_read_coords, unpaired_reads_fa)
    unpaired_bam = "tmp.gather_stats_from_bam.unpaired.bam"
    minimap.run(unpaired_bam, ref_fasta, unpaired_reads_fa, sort=False)

    reads1_coords = [(100, 200), (110, 210), (310, 410)]
    reads2_coords = [(200, 300), (200, 300), (900, 1000)]
    reads1_fa = "tmp.gather_stats_from_bam.reads_1.fa"
    reads2_fa = "tmp.gather_stats_from_bam.reads_2.fa"
    _write_sim_reads(ref_seq, reads1_coords, reads1_fa, suffix="/1")
    _write_sim_reads(ref_seq,
                     reads2_coords,
                     reads2_fa,
                     revcomp=True,
                     suffix="/2")
    paired_bam = "tmp.gather_stats_from_bam.paired.bam"
    minimap.run(paired_bam, ref_fasta, reads1_fa, fq2=reads2_fa, sort=False)

    amplicon_sets = [
        primers.AmpliconSet.from_tsv(v, name=k) for k, v in tsv_files.items()
    ]
    tmp_bam_out = "tmp.bam"
    subprocess.check_output(f"rm -f {tmp_bam_out}", shell=True)
    got = detect_primers.gather_stats_from_bam(unpaired_bam, tmp_bam_out,
                                               amplicon_sets)
    assert got == {
        "total_reads": 4,
        "reads1": 0,
        "reads2": 0,
        "unpaired_reads": 4,
        "mapped": 4,
        "match_any_amplicon": 3,
        "read_lengths": {
            190: 1,
            180: 1,
            500: 1,
            150: 1
        },
        "template_lengths": {
            190: 1,
            180: 1,
            500: 1,
            150: 1
        },  # TODO: check this
        "amplicon_scheme_set_matches": {
            ("scheme1", ): 1,
            ("scheme1", "scheme2"): 2
        },
        "amplicon_scheme_simple_counts": {
            "scheme1": 3,
            "scheme2": 2
        },
        "chosen_amplicon_scheme": "scheme1",
    }
    assert os.path.exists(tmp_bam_out)
    os.unlink(tmp_bam_out)

    got = detect_primers.gather_stats_from_bam(paired_bam, tmp_bam_out,
                                               amplicon_sets)
    assert got == {
        "total_reads": 6,
        "reads1": 3,
        "reads2": 3,
        "unpaired_reads": 0,
        "mapped": 6,
        "match_any_amplicon": 2,
        "read_lengths": {
            100: 6
        },
        "template_lengths": {
            200: 1,
            190: 1,
            690: 1
        },  # TODO: check this
        "amplicon_scheme_set_matches": {
            ("scheme1", "scheme2"): 2
        },
        "amplicon_scheme_simple_counts": {
            "scheme1": 2,
            "scheme2": 2
        },
        "chosen_amplicon_scheme": "scheme2",
    }
    assert os.path.exists(tmp_bam_out)
    os.unlink(tmp_bam_out)

    os.unlink(ref_fasta)
    os.unlink(reads1_fa)
    os.unlink(reads2_fa)
    os.unlink(unpaired_reads_fa)
    os.unlink(unpaired_bam)
    os.unlink(paired_bam)