Пример #1
0
def main_deplete_human(args):
    ''' Run the entire depletion pipeline: bmtagger, mvicuna, blastn.
        Optionally, use lastal to select a specific taxon of interest.'''
    tools.picard.RevertSamTool().execute(
        args.inBam,
        args.revertBam,
        picardOptions=['SORT_ORDER=queryname', 'SANITIZE=true'])
    multi_db_deplete_bam(args.revertBam,
                         args.bmtaggerDbs,
                         deplete_bmtagger_bam,
                         args.bmtaggerBam,
                         threads=args.threads,
                         JVMmemory=args.JVMmemory)
    read_utils.rmdup_mvicuna_bam(args.bmtaggerBam,
                                 args.rmdupBam,
                                 JVMmemory=args.JVMmemory)
    multi_db_deplete_bam(args.rmdupBam,
                         args.blastDbs,
                         deplete_blastn_bam,
                         args.blastnBam,
                         threads=args.threads,
                         JVMmemory=args.JVMmemory)
    if args.taxfiltBam and args.lastDb:
        filter_lastal_bam(args.blastnBam,
                          args.lastDb,
                          args.taxfiltBam,
                          JVMmemory=args.JVMmemory)
    return 0
Пример #2
0
def main_deplete_human(args):
    ''' Run the entire depletion pipeline: bmtagger, mvicuna, blastn.
        Optionally, use lastal to select a specific taxon of interest.'''
    tools.picard.RevertSamTool().execute(
        args.inBam, args.revertBam, picardOptions=['SORT_ORDER=queryname', 'SANITIZE=true']
    )
    multi_db_deplete_bam(
        args.revertBam,
        args.bmtaggerDbs,
        deplete_bmtagger_bam,
        args.bmtaggerBam,
        threads=args.threads,
        JVMmemory=args.JVMmemory
    )
    read_utils.rmdup_mvicuna_bam(args.bmtaggerBam, args.rmdupBam, JVMmemory=args.JVMmemory)
    multi_db_deplete_bam(
        args.rmdupBam,
        args.blastDbs,
        deplete_blastn_bam,
        args.blastnBam,
        threads=args.threads,
        JVMmemory=args.JVMmemory
    )
    if args.taxfiltBam and args.lastDb:
        filter_lastal_bam(args.blastnBam, args.lastDb, args.taxfiltBam, JVMmemory=args.JVMmemory)
    return 0
Пример #3
0
def main_deplete(args):
    ''' Run the entire depletion pipeline: bwa, bmtagger, mvicuna, blastn.
    '''

    assert len(args.bmtaggerDbs) + len(args.blastDbs) + len(args.bwaDbs) > 0

    # only RevertSam if inBam is already aligned
    # Most of the time the input will be unaligned
    # so we can save save time if we can skip RevertSam in the unaligned case
    #
    # via the SAM/BAM spec, if the file is aligned, an SQ line should be present
    # in the header. Using pysam, we can check this if header['SQ'])>0
    #   https://samtools.github.io/hts-specs/SAMv1.pdf

    # if the user has requested a revertBam

    with read_utils.revert_bam_if_aligned(
            args.inBam,
            revert_bam=args.revertBam,
            clear_tags=args.clear_tags,
            tags_to_clear=args.tags_to_clear,
            picardOptions=['MAX_DISCARD_FRACTION=0.5'],
            JVMmemory=args.JVMmemory,
            sanitize=not args.do_not_sanitize) as bamToDeplete:
        multi_db_deplete_bam(bamToDeplete,
                             args.bwaDbs,
                             deplete_bwa_bam,
                             args.bwaBam,
                             threads=args.threads)

    def bmtagger_wrapper(inBam, db, outBam, JVMmemory=None):
        return deplete_bmtagger_bam(inBam,
                                    db,
                                    outBam,
                                    srprism_memory=args.srprism_memory,
                                    JVMmemory=JVMmemory)

    multi_db_deplete_bam(args.bwaBam,
                         args.bmtaggerDbs,
                         bmtagger_wrapper,
                         args.bmtaggerBam,
                         JVMmemory=args.JVMmemory)

    # if the user has not specified saving a revertBam, we used a temp file and can remove it
    if not args.revertBam:
        os.unlink(revertBamOut)

    read_utils.rmdup_mvicuna_bam(args.bmtaggerBam,
                                 args.rmdupBam,
                                 JVMmemory=args.JVMmemory)
    multi_db_deplete_bam(args.rmdupBam,
                         args.blastDbs,
                         deplete_blastn_bam,
                         args.blastnBam,
                         chunkSize=args.chunkSize,
                         threads=args.threads,
                         JVMmemory=args.JVMmemory)
    return 0
Пример #4
0
def main_deplete_human(args):
    ''' Run the entire depletion pipeline: bmtagger, mvicuna, blastn.
        Optionally, use lastal to select a specific taxon of interest.'''

    # only RevertSam if inBam is already aligned
    # Most of the time the input will be unaligned
    # so we can save save time if we can skip RevertSam in the unaligned case
    #
    # via the SAM/BAM spec, if the file is aligned, an SQ line should be present
    # in the header. Using pysam, we can check this if header['SQ'])>0
    #   https://samtools.github.io/hts-specs/SAMv1.pdf

    # if the user has requested a revertBam
    revertBamOut = args.revertBam if args.revertBam else mkstempfname('.bam')

    bamToDeplete = args.inBam
    with pysam.AlignmentFile(args.inBam, 'rb', check_sq=False) as bam:
        # if it looks like the bam is aligned, revert it
        if 'SQ' in bam.header and len(bam.header['SQ'])>0:      
            tools.picard.RevertSamTool().execute(
                args.inBam, revertBamOut, picardOptions=['SORT_ORDER=queryname', 'SANITIZE=true']
            )
            bamToDeplete = revertBamOut
        else:
            # if we don't need to produce a revertBam file
            # but the user has specified one anyway
            # simply touch the output
            if args.revertBam:
                log.warning("An output was specified for 'revertBam', but the input is unaligned, so RevertSam was not needed. Touching the output.")
                util.file.touch(revertBamOut)
                # TODO: error out? run RevertSam anyway?

    multi_db_deplete_bam(
        bamToDeplete,
        args.bmtaggerDbs,
        deplete_bmtagger_bam,
        args.bmtaggerBam,
        threads=args.threads,
        JVMmemory=args.JVMmemory
    )

    # if the user has not specified saving a revertBam, we used a temp file and can remove it
    if not args.revertBam:
        os.unlink(revertBamOut)

    read_utils.rmdup_mvicuna_bam(args.bmtaggerBam, args.rmdupBam, JVMmemory=args.JVMmemory)
    multi_db_deplete_bam(
        args.rmdupBam,
        args.blastDbs,
        deplete_blastn_bam,
        args.blastnBam,
        threads=args.threads,
        JVMmemory=args.JVMmemory
    )
    if args.taxfiltBam and args.lastDb:
        filter_lastal_bam(args.blastnBam, args.lastDb, args.taxfiltBam, JVMmemory=args.JVMmemory)
    return 0
Пример #5
0
 def test_mvicuna_empty_input(self):
     samtools = tools.samtools.SamtoolsTool()
     empty_bam = os.path.join(util.file.get_test_input_path(), 'empty.bam')
     output_bam = util.file.mkstempfname("output.bam")
     read_utils.rmdup_mvicuna_bam(
         empty_bam,
         output_bam
     )
     self.assertEqual(samtools.count(output_bam), 0)
Пример #6
0
def main_deplete(args):
    ''' Run the entire depletion pipeline: bwa, bmtagger, mvicuna, blastn.
    '''

    assert len(args.bmtaggerDbs) + len(args.blastDbs) + len(args.bwaDbs) > 0

    # only RevertSam if inBam is already aligned
    # Most of the time the input will be unaligned
    # so we can save save time if we can skip RevertSam in the unaligned case
    #
    # via the SAM/BAM spec, if the file is aligned, an SQ line should be present
    # in the header. Using pysam, we can check this if header['SQ'])>0
    #   https://samtools.github.io/hts-specs/SAMv1.pdf

    # if the user has requested a revertBam

    with read_utils.revert_bam_if_aligned(              args.inBam,
                                        revert_bam    = args.revertBam,
                                        clear_tags    = args.clear_tags,
                                        tags_to_clear = args.tags_to_clear,
                                        picardOptions = ['MAX_DISCARD_FRACTION=0.5'],
                                        JVMmemory     = args.JVMmemory,
                                        sanitize      = not args.do_not_sanitize) as bamToDeplete:
        multi_db_deplete_bam(
            bamToDeplete,
            args.bwaDbs,
            deplete_bwa_bam,
            args.bwaBam,
            threads=args.threads
        )

    def bmtagger_wrapper(inBam, db, outBam, JVMmemory=None):
        return deplete_bmtagger_bam(inBam, db, outBam, srprism_memory=args.srprism_memory, JVMmemory=JVMmemory)

    multi_db_deplete_bam(
        args.bwaBam,
        args.bmtaggerDbs,
        bmtagger_wrapper,
        args.bmtaggerBam,
        JVMmemory=args.JVMmemory
    )

    # if the user has not specified saving a revertBam, we used a temp file and can remove it
    if not args.revertBam:
        os.unlink(revertBamOut)

    read_utils.rmdup_mvicuna_bam(args.bmtaggerBam, args.rmdupBam, JVMmemory=args.JVMmemory)
    multi_db_deplete_bam(
        args.rmdupBam,
        args.blastDbs,
        deplete_blastn_bam,
        args.blastnBam,
        chunkSize=args.chunkSize,
        threads=args.threads,
        JVMmemory=args.JVMmemory
    )
    return 0
Пример #7
0
 def test_mvicuna_empty_input(self):
     samtools = tools.samtools.SamtoolsTool()
     empty_bam = os.path.join(util.file.get_test_input_path(), 'empty.bam')
     output_bam = util.file.mkstempfname("output.bam")
     read_utils.rmdup_mvicuna_bam(
         empty_bam,
         output_bam
     )
     self.assertEqual(samtools.count(output_bam), 0)
Пример #8
0
    def test_mvicuna_canned_input(self):
        samtools = tools.samtools.SamtoolsTool()

        input_bam = os.path.join(util.file.get_test_input_path(self), 'input.bam')
        expected_bam = os.path.join(util.file.get_test_input_path(self), 'expected.bam')
        output_bam = util.file.mkstempfname("output.bam")
        read_utils.rmdup_mvicuna_bam(
            input_bam,
            output_bam
        )

        self.assertEqual(samtools.count(output_bam), samtools.count(expected_bam))
Пример #9
0
    def test_mvicuna_canned_input(self):
        samtools = tools.samtools.SamtoolsTool()

        input_bam = os.path.join(util.file.get_test_input_path(self), 'input.bam')
        expected_bam = os.path.join(util.file.get_test_input_path(self), 'expected.bam')
        output_bam = util.file.mkstempfname("output.bam")
        read_utils.rmdup_mvicuna_bam(
            input_bam,
            output_bam
        )

        self.assertEqual(samtools.count(output_bam), samtools.count(expected_bam))
Пример #10
0
def main_deplete_human(args):
    ''' Run the entire depletion pipeline: bmtagger, mvicuna, blastn.
        Optionally, use lastal to select a specific taxon of interest.'''

    # only RevertSam if inBam is already aligned
    # Most of the time the input will be unaligned
    # so we can save save time if we can skip RevertSam in the unaligned case
    #
    # via the SAM/BAM spec, if the file is aligned, an SQ line should be present
    # in the header. Using pysam, we can check this if header['SQ'])>0
    #   https://samtools.github.io/hts-specs/SAMv1.pdf

    # if the user has requested a revertBam
    revertBamOut = args.revertBam if args.revertBam else mkstempfname('.bam')

    bamToDeplete = args.inBam
    with pysam.AlignmentFile(args.inBam, 'rb', check_sq=False) as bam:
        # if it looks like the bam is aligned, revert it
        if 'SQ' in bam.header and len(bam.header['SQ']) > 0:
            tools.picard.RevertSamTool().execute(
                args.inBam,
                revertBamOut,
                picardOptions=['SORT_ORDER=queryname', 'SANITIZE=true'])
            bamToDeplete = revertBamOut
        else:
            # if we don't need to produce a revertBam file
            # but the user has specified one anyway
            # simply touch the output
            if args.revertBam:
                log.warning(
                    "An output was specified for 'revertBam', but the input is unaligned, so RevertSam was not needed. Touching the output."
                )
                util.file.touch(revertBamOut)
                # TODO: error out? run RevertSam anyway?

    def bmtagger_wrapper(inBam, db, outBam, threads, JVMmemory=None):
        return deplete_bmtagger_bam(inBam,
                                    db,
                                    outBam,
                                    threads=threads,
                                    srprism_memory=args.srprism_memory,
                                    JVMmemory=JVMmemory)

    multi_db_deplete_bam(bamToDeplete,
                         args.bmtaggerDbs,
                         bmtagger_wrapper,
                         args.bmtaggerBam,
                         threads=args.threads,
                         JVMmemory=args.JVMmemory)

    # if the user has not specified saving a revertBam, we used a temp file and can remove it
    if not args.revertBam:
        os.unlink(revertBamOut)
    read_utils.rmdup_mvicuna_bam(args.bmtaggerBam,
                                 args.rmdupBam,
                                 JVMmemory=args.JVMmemory)

    multi_db_deplete_bam(args.rmdupBam,
                         args.blastDbs,
                         deplete_blastn_bam,
                         args.blastnBam,
                         chunkSize=args.chunkSize,
                         threads=args.threads,
                         JVMmemory=args.JVMmemory)
    return 0
Пример #11
0
 def test_mvicuna_empty_input(self):
     empty_bam = os.path.join(util.file.get_test_input_path(), 'empty.bam')
     read_utils.rmdup_mvicuna_bam(
         empty_bam,
         util.file.mkstempfname()
     )