示例#1
0
def convert_to_fastq(fp_obj, logger=None):
    """Convert a SAM or BAM file to FASTQ file(s) for alignment
    """
    input_file = fp_obj.input_file
    output_dir = fp_obj.output_dir
    fastq_dir = join(output_dir, 'fastq_input')
    fp_obj.check_output_dir(fastq_dir)
    protoname = fp_obj.protoname
    if fp_obj.paired_end:
        fastq_filenames = (join(fastq_dir, '%s.1.txt.gz' % protoname),
                           join(fastq_dir, '%s.2.txt.gz' % protoname))
        logger.info('Converting file %s to FASTQ files %s, %s',
                    input_file, fastq_filenames[0], fastq_filenames[1])
        in_args = [sys.executable, '-m',
                   'seriesoftubes.converters.bamtofastq2',
                   '--gzip', input_file,
                   fastq_filenames[0], fastq_filenames[1]]
    else:
        fastq_filename = join(fastq_dir, '%s.txt.gz' % protoname)
        logger.info('Converting file %s to FASTQ file %s',
                    input_file, fastq_filename)
        in_args = [sys.executable, '-m',
                   'seriesoftubes.converters.bamtofastq2',
                   input_file, fastq_filename]
    logger.debug('Launching %s', ' '.join(in_args))
    polledpipe = PolledPipe(logger=logger, level=logging.ERROR)
    job = Popen(in_args, stdout=polledpipe.w, stderr=STDOUT)
    wait_for_job(job, [polledpipe], logger)
    if fp_obj.paired_end:
        logger.debug('Settings input_file to %s', fastq_filenames[0])
        fp_obj.input_file = fastq_filenames[0]
        logger.debug('Settings second_file to %s', fastq_filenames[1])
        fp_obj.second_file = fastq_filenames[1]
    else:
        logger.debug('Settings input_file to %s', fastq_filename)
        fp_obj.input_file = fastq_filenames[0]
    logger.debug('Setting use_pysam to False')
    fp_obj.use_pysam = False
    logger.debug('Setting format to FASTQ')
    fp_obj.format = 'FASTQ'
    logger.debug('Ignoring open_func, it will not be used')
    if not job.returncode == 0:
        logger.critical('Conversion FAILED!')
    else:
        logger.info('Conversion successful')
    return
示例#2
0
def align_once(fp_obj, flags, ref, use_quality=False,
               path_to_bowtie2=None, path_to_samtools=None, logger=None,
               passthru_args=None,
               **kwargs):
    if use_quality:
        if fp_obj.use_pysam:
            flags.append('--phred33')
        else:
            flags.append('--phred64')

    refname = os.path.basename(ref)
    path_to_unsorted = fp_obj.tmp_filename(refname)
    output_dir = os.path.split(path_to_unsorted)[0]
    fp_obj.check_output_dir(output_dir)
    filename1 = os.path.abspath(fp_obj.input_file)
    second_file = fp_obj.second_file
    if second_file is not None:
        filename2 = os.path.abspath(second_file)
    else:
        filename2 = None

    if fp_obj.paired_end:
        file_args = ['-x', ref, '-1', filename1, '-2', filename2]
    else:
        file_args = ['-x', ref, '-U', filename1]

    if passthru_args:
        bowtie2_args = [path_to_bowtie2] + flags + passthru_args + file_args
    else:
        bowtie2_args = [path_to_bowtie2] + flags + file_args

    # finish parsing input here
    bowtie2_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    logger.info('Launching bowtie2 (output will be piped to samtools for BAM '
                'encoding)')
    logger.info(' '.join(bowtie2_args))
    bowtie2_aligner = Popen(bowtie2_args, stdout=PIPE, stderr=bowtie2_stderr.w,
                            bufsize=-1)

    samtools_args = [path_to_samtools, 'view', '-b', '-S', '-o',
                     path_to_unsorted, '-']
    logger.info('Launching samtools to encode bowtie2 output as BAM')
    logger.info(' '.join(samtools_args))
    samtools_stdout = PolledPipe(logger=logger, level=logging.WARN)
    samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    samtools_viewer = Popen(samtools_args, stdin=bowtie2_aligner.stdout,
                            stdout=samtools_stdout.w,
                            stderr=samtools_stderr.w, bufsize=-1)

    logger.debug('Waiting for bowtie2 to finish')
    pollables = [bowtie2_stderr, samtools_stdout, samtools_stderr]
    wait_for_job(bowtie2_aligner, pollables, logger)

    if not bowtie2_aligner.returncode == 0:
        logger.critical("bowtie2 did not run properly [%d]",
                        bowtie2_aligner.returncode)
        samtools_viewer.terminate()
        samtools_viewer.poll()
        logger.critical("samtools terminated")
        return

    logger.debug('Alignment successfully completed')
    logger.debug('Waiting for samtools to finish')
    wait_for_job(samtools_viewer, [samtools_stdout, samtools_stderr], logger)
    if not samtools_viewer.returncode == 0:
        logger.critical("samtools view did not run properly [%d]",
                        samtools_viewer.returncode)
        return

    logger.debug('Unsorted BAM file successfully written')

    logger.info('Launching samtools again to sort BAM output')
    output_dir, output_file = os.path.split(path_to_unsorted)
    bam_file = os.path.splitext(output_file)[0]
    sorter_args = [path_to_samtools, 'sort', output_file, bam_file]
    logger.info(' '.join(sorter_args))
    samtools_stdout = PolledPipe(logger=logger, level=logging.WARN)
    samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    samtools_sorter = Popen(sorter_args, stdout=samtools_stdout.w,
                            stderr=samtools_stderr.w, cwd=output_dir)
    wait_for_job(samtools_sorter, [samtools_stdout, samtools_stderr], logger)
    if not samtools_sorter.returncode == 0:
        logger.critical("samtools sort did not run properly [%d]",
                        samtools_sorter.returncode)
        return

    # don't destroy the files until we're sure we succeeded!
    assert_path(os.path.join(output_dir, bam_file + '.bam'))
    logger.debug('Removing unsorted file %s', path_to_unsorted)
    os.remove(path_to_unsorted)

    logger.debug('Launching samtools again to index sorted BAM output')
    samtools_stdout = PolledPipe(logger=logger, level=logging.WARN)
    samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    index_args = [path_to_samtools, 'index', bam_file + '.bam']
    samtools_indexer = Popen(index_args, stdout=samtools_stdout.w,
                             stderr=samtools_stderr.w, cwd=output_dir)
    wait_for_job(samtools_indexer, [samtools_stdout, samtools_stderr], logger)
    if not samtools_indexer.returncode == 0:
        logger.critical("samtools index did not run properly [%d]",
                        samtools_indexer.returncode)
        return

    # Make sure indexing succeeds
    assert_path(os.path.join(output_dir, bam_file + '.bam.bai'))
    return
示例#3
0
def counteralign_once(fp_obj, flags, ref, use_quality=False,
                      path_to_bowtie2=None, path_to_samtools=None, logger=None,
                      passthru_args=None,
                      **kwargs):
    """Produce counter-alignements"""
    if use_quality:
        if fp_obj.use_pysam:
            flags.append('--phred33')
        else:
            flags.append('--phred64')

    refname = os.path.basename(ref)
    output_dir, output_file = os.path.split(fp_obj.tmp_filename(refname))
    fp_obj.check_output_dir(output_dir)
    fp_obj.check_output_dir(join(output_dir, 'counteraligned'))
    filename1 = os.path.abspath(fp_obj.input_file)
    input_file = fp_obj.input_file
    second_file = fp_obj.second_file
    if second_file is not None:
        filename2 = os.path.abspath(second_file)
    else:
        filename2 = None

    if fp_obj.paired_end:
        try:
            paired_file = make_paired_name(input_file, second_file)
            counteraligned = os.path.abspath(join(output_dir, 'counteraligned',
                                                  paired_file))
        except ValueError:
            counteraligned = os.path.abspath(join(output_dir, 'counteraligned',
                                                  input_file))
        file_args = ['-x', ref, '-1', filename1, '-2', filename2,
                     '--al-conc-gz', counteraligned,
                     '--un-conc-gz', join(output_dir, paired_file)]
        new_filenames = (join(output_dir, input_file),
                         join(output_dir, second_file))
    else:
        file_args = ['-x', ref, '-U', filename1,
                     '--al-gz', join(output_dir, 'counteraligned', input_file),
                     '--un-gz', join(output_dir, input_file)]
        new_filenames = (join(output_dir, input_file), None)

    if passthru_args is not None:
        bowtie2_args = [path_to_bowtie2] + flags + passthru_args + file_args
    else:
        bowtie2_args = [path_to_bowtie2] + flags + file_args

    # finish parsing input here
    bowtie2_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    logger.info('Launching bowtie2 (output will be piped to samtools for '
                'BAM encoding)')
    logger.info(' '.join(bowtie2_args))
    bowtie2_aligner = Popen(bowtie2_args, stdout=open(devnull, 'w'),
                            stderr=bowtie2_stderr.w,
                            bufsize=-1)
    logger.info(' '.join(bowtie2_args))
    logger.info('counteraligned reads will be saved as GZIPed FASTQ files '
                'in counteraligned/')

    logger.debug('Waiting for bowtie2 to finish')
    pollables = [bowtie2_stderr]
    wait_for_job(bowtie2_aligner, pollables, logger)

    if not bowtie2_aligner.returncode == 0:
        logger.critical("bowtie2 did not run properly [%d]",
                        bowtie2_aligner.returncode)
        return

    logger.debug('Alignment successfully completed')

    return new_filenames
示例#4
0
def align_once(fp_obj, flags, ref, match_type, use_quality=False,
               quals_type='solexa1.3',
               path_to_bowtie=None, path_to_samtools=None, logger=None,
               **kwargs):
    refname = os.path.basename(ref)
    path_to_unsorted = fp_obj.tmp_filename(refname, match_type)
    output_dir = os.path.split(path_to_unsorted)[0]
    fp_obj.check_output_dir(output_dir)
    filename1 = os.path.abspath(fp_obj.input_file)
    second_file = fp_obj.second_file
    if second_file is not None:
        filename2 = os.path.abspath(second_file)
    else:
        filename2 = None
    if use_quality:
        if fp_obj.use_pysam:
            flags.append('--phred33-quals')
        else:
            flags.append(''.join(['--', quals_type, '-quals']))
    if fp_obj.paired_end:
        file_args = [ref, '--12', '-']
        logger.info('Automagically interpreting %s files', fp_obj.format)
    else:
        logger.info('Automagically interpreting %s file', fp_obj.format)
        file_args = [ref, '-']
    bowtie_args = [path_to_bowtie] + flags + file_args

    # finish parsing input here
    input_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    bowtie_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    if fp_obj.use_pysam:
        if fp_obj.paired_end:
            in_args = [sys.executable, '-m',
                       'seriesoftubes.converters.bamtotab',
                       filename1]
        else:
            in_args = [sys.executable, '-m',
                       'seriesoftubes.converters.bamtofastq',
                       '--no-gzip',
                       filename1]
    elif fp_obj.paired_end and fp_obj.format == 'FASTQ':
        in_args = [sys.executable, '-m', 'seriesoftubes.converters.fastqtotab',
                   filename1, filename2]
    elif fp_obj.format == 'FASTQ':
        in_args = [sys.executable, '-m', 'seriesoftubes.converters.cat',
                   filename1]
    else:
        logger.critical("Couldn't figure out what to do with file "
                        "%s of format %s",
                        fp_obj.input_file, fp_obj.format)
    logger.info(' '.join(in_args))
    input_reader = Popen(in_args, stdout=PIPE, stderr=input_stderr.w,
                         bufsize=-1)
    logger.info('Launching bowtie (output will be piped to samtools)')
    logger.info(' '.join(bowtie_args))
    bowtie_aligner = Popen(bowtie_args, stdin=input_reader.stdout,
                           stdout=PIPE, stderr=bowtie_stderr.w,
                           bufsize=-1)

    samtools_args = [path_to_samtools, 'view', '-b', '-S', '-o',
                     path_to_unsorted, '-']
    logger.info('Launching samtools to encode bowtie output as BAM')
    logger.info(' '.join(samtools_args))
    samtools_stdout = PolledPipe(logger=logger, level=logging.WARN)
    samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    samtools_viewer = Popen(samtools_args, stdin=bowtie_aligner.stdout,
                            stdout=samtools_stdout.w,
                            stderr=samtools_stderr.w, bufsize=-1)

    logger.debug('Waiting for bowtie to finish')
    pollables = [input_stderr, bowtie_stderr, samtools_stdout, samtools_stderr]
    wait_for_job(bowtie_aligner, pollables, logger)

    if not bowtie_aligner.returncode == 0:
        logger.critical("bowtie did not run properly [%d]",
                        bowtie_aligner.returncode)
        samtools_viewer.terminate()
        samtools_viewer.poll()
        logger.critical("samtools terminated")
        return

    logger.debug('Alignment successfully completed')
    logger.debug('Waiting for samtools to finish')
    wait_for_job(samtools_viewer, [samtools_stdout, samtools_stderr], logger)
    if not samtools_viewer.returncode == 0:
        logger.critical("samtools view did not run properly [%d]",
                        samtools_viewer.returncode)
        return

    logger.debug('Unsorted BAM file successfully written')

    logger.info('Launching samtools again to sort BAM output')
    output_dir, output_file = os.path.split(path_to_unsorted)
    bam_file = os.path.splitext(output_file)[0]
    sorter_args = [path_to_samtools, 'sort', output_file, bam_file]
    logger.info(' '.join(sorter_args))
    samtools_stdout = PolledPipe(logger=logger, level=logging.WARN)
    samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    samtools_sorter = Popen(sorter_args, stdout=samtools_stdout.w,
                            stderr=samtools_stderr.w, cwd=output_dir)
    wait_for_job(samtools_sorter, [samtools_stdout, samtools_stderr], logger)
    if not samtools_sorter.returncode == 0:
        logger.critical("samtools sort did not run properly [%d]",
                        samtools_sorter.returncode)
        return

    # don't destroy the files until we're sure we succeeded!
    assert_path(os.path.join(output_dir, bam_file + '.bam'))
    logger.debug('Removing unsorted file %s', path_to_unsorted)
    os.remove(path_to_unsorted)

    logger.debug('Launching samtools again to index sorted BAM output')
    samtools_stdout = PolledPipe(logger=logger, level=logging.WARN)
    samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    index_args = [path_to_samtools, 'index', bam_file + '.bam']
    samtools_indexer = Popen(index_args, stdout=samtools_stdout.w,
                             stderr=samtools_stderr.w, cwd=output_dir)
    wait_for_job(samtools_indexer, [samtools_stdout, samtools_stderr], logger)
    if not samtools_indexer.returncode == 0:
        logger.critical("samtools index did not run properly [%d]",
                        samtools_indexer.returncode)
        return

    # Make sure indexing succeeds
    assert_path(os.path.join(output_dir, bam_file + '.bam.bai'))
    return
示例#5
0
def counteralign_once(fp_obj, flags, ref,
                      match_type, use_quality=False,
                      quals_type='solexa1.3',
                      path_to_bowtie=None, path_to_samtools=None, logger=None,
                      **kwargs):
    """Produce counter-alignements"""
    refname = os.path.basename(ref)
    output_dir, output_file = os.path.split(fp_obj.tmp_filename(refname))
    fp_obj.check_output_dir(output_dir)
    filename1 = os.path.abspath(fp_obj.input_file)
    second_file = fp_obj.second_file
    if second_file is not None:
        filename2 = os.path.abspath(second_file)
    else:
        filename2 = None
    if use_quality:
        if fp_obj.use_pysam:
            flags.append('--phred33-quals')
        else:
            flags.append(''.join(['--', quals_type, '-quals']))
    if fp_obj.paired_end:
        file_args = [ref, '--12', '-']
        logger.info('Automagically interpreting %s files', fp_obj.format)
    else:
        logger.info('Automagically interpreting %s file', fp_obj.format)
        file_args = [ref, '-']
    bowtie_args = [path_to_bowtie] + flags + file_args

    # finish parsing input here
    input_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    bowtie_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    if fp_obj.use_pysam:
        if fp_obj.paired_end:
            in_args = [sys.executable, '-m',
                       'seriesoftubes.converters.bamtotab',
                       filename1]
        else:
            in_args = [sys.executable, '-m',
                       'seriesoftubes.converters.bamtofastq',
                       '--no-gzip',
                       filename1]
    elif fp_obj.paired_end and fp_obj.format == 'FASTQ':
        in_args = [sys.executable, '-m', 'seriesoftubes.converters.fastqtotab',
                   filename1, filename2]
    elif fp_obj.format == 'FASTQ':
        in_args = [sys.executable, '-m', 'seriesoftubes.converters.cat',
                   filename1]
    else:
        logger.critical("Couldn't figure out what to do with file "
                        "%s of format %s",
                        fp_obj.input_file, fp_obj.format)
    logger.info(' '.join(in_args))
    input_reader = Popen(in_args, stdout=PIPE, stderr=input_stderr.w,
                         bufsize=-1)
    logger.info('Launching bowtie (output will be piped to samtools)')
    logger.info(' '.join(bowtie_args))
    bowtie_aligner = Popen(bowtie_args, stdin=input_reader.stdout,
                           stdout=PIPE, stderr=bowtie_stderr.w,
                           bufsize=-1)
    logger.info('Only unaligned reads will be saved.')
    samtools_args = [path_to_samtools, 'view', '-b', '-S', '-o',
                     '-f', '0x4',  # ONLY SAVE UNALIGNED READS
                     join(output_dir, output_file), '-']
    logger.info('Launching samtools to encode bowtie output as BAM')
    logger.info(' '.join(samtools_args))
    samtools_stdout = PolledPipe(logger=logger, level=logging.WARN)
    samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    samtools_viewer = Popen(samtools_args, stdin=bowtie_aligner.stdout,
                            stdout=samtools_stdout.w,
                            stderr=samtools_stderr.w, bufsize=-1)

    logger.debug('Waiting for bowtie to finish')
    pollables = [input_stderr, bowtie_stderr, samtools_stdout, samtools_stderr]
    wait_for_job(bowtie_aligner, pollables, logger)

    if not bowtie_aligner.returncode == 0:
        logger.critical("bowtie did not run properly [%d]",
                        bowtie_aligner.returncode)
        samtools_viewer.terminate()
        samtools_viewer.poll()
        logger.critical("samtools terminated")
        return

    logger.debug('Alignment successfully completed')
    logger.debug('Waiting for samtools to finish')
    wait_for_job(samtools_viewer, [samtools_stdout, samtools_stderr], logger)
    if not samtools_viewer.returncode == 0:
        logger.critical("samtools view did not run properly [%d]",
                        samtools_viewer.returncode)
        return

    logger.debug('Unsorted BAM file successfully written')

    return (join(output_dir, output_file), None)
示例#6
0
def run_macs(f, subpeaks=True, path_to_macs=None, logging_level=10,
             user_gsize=None, qvalue=0.01, passthru_args=None,
             **kwargs):
    """Run MACS on a BAM file
    """
    logger = get_logger(logging_level)
    if path_to_macs is None:
        path_to_macs = path_to_executable("macs2")

    input_file = f.input_file
    control_file = f.control_file
    logger.debug('Processing %s', input_file)
    if control_file is not None:
        logger.debug('with control %s', control_file)

    # determine genome name and size
    if user_gsize:
        genome_size = user_gsize
        try:
            genome_build = guess_bam_genome(input_file)
        except NoMatchFoundError:
            genome_build = None
    else:
        try:
            genome_build = guess_bam_genome(input_file)
        except NoMatchFoundError:
            raise Usage('\
Could not determine genome / genome size for file %s' % input_file)

        gname = ''.join([x for x in genome_build if x.isalpha()])
        if gname == 'hg':
            genome_size = 'hs'
        elif gname in ['mm', 'ce', 'dm']:
            genome_size = gname
        else:
            genome_size = '%.1e' % sum(genome(genome_build).itervalues())

    fmt = decide_format(input_file, control_file, logger)
    name = f.sample_name.replace(' ', '_')
    if passthru_args is not None:
        for i in range(len(passthru_args)):
            passthru_args[i] = passthru_args[i].replace('+', '-')
        logger.debug('Passing thru arguments %s', ' '.join(passthru_args))
    macs_options = ['--trackline',
                    '-f', fmt,  # correct file format BAM or BAMPE
                    '-B', '--SPMR',  # bedgraphs, SPMR
                    '-g', genome_size,
                    '-q', qvalue,
                    '-n', name,  # run name
                    '-t', join(getcwd(), input_file)]  # treatment
    if control_file is not None:
        macs_options.extend(['-c', join(getcwd(), control_file)])
    if subpeaks:
        macs_options.append('--call-summits')
    if passthru_args is not None:
        macs_options.extend(passthru_args)

    step = [path_to_macs, 'callpeak'] + macs_options
    if platform.system() is 'Windows':
        step.insert(sys.executable, 0)

    macs_stdout = PolledPipe(logger=logger, level=WARN)
    macs_stderr = PolledPipe(logger=logger, level=ERROR)
    logger.debug('Launching %s', ' '.join(step))
    job = Popen(step, stdout=macs_stdout.w, stderr=macs_stderr.w,
                cwd=f.output_dir)

    pollables = [macs_stdout, macs_stderr]
    wait_for_job(job, pollables, logger)

    return '%s\n\n' % ' '.join(step)
示例#7
0
    bwa_aligner = Popen(bwa_args, stdout=PIPE, stderr=bwa_stderr.w,
                        bufsize=-1)

    samtools_args = [path_to_samtools, 'view', '-b', '-S', '-o',
                     path_to_unsorted, '-']
    logger.info('Launching samtools to encode bwa output as BAM')
    logger.info(' '.join(samtools_args))
    samtools_stdout = PolledPipe(logger=logger, level=logging.WARN)
    samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    samtools_viewer = Popen(samtools_args, stdin=bwa_aligner.stdout,
                            stdout=samtools_stdout.w,
                            stderr=samtools_stderr.w, bufsize=-1)

    logger.debug('Waiting for bwa to finish')
    pollables = [bwa_stderr, samtools_stdout, samtools_stderr]
    wait_for_job(bwa_aligner, pollables, logger)

    if not bwa_aligner.returncode == 0:
        logger.critical("bwa did not run properly [%d]",
                        bwa_aligner.returncode)
        samtools_viewer.terminate()
        samtools_viewer.poll()
        logger.critical("samtools terminated")
        return

    logger.debug('Alignment successfully completed')
    logger.debug('Waiting for samtools to finish')
    wait_for_job(samtools_viewer, [samtools_stdout, samtools_stderr], logger)
    if not samtools_viewer.returncode == 0:
        logger.critical("samtools view did not run properly [%d]",
                        samtools_viewer.returncode)