def convert_to_fastq(fp_obj, logger=None): """Convert a SAM or BAM file to FASTQ file(s) for alignment """ input_file = fp_obj.input_file output_dir = fp_obj.output_dir fastq_dir = join(output_dir, 'fastq_input') fp_obj.check_output_dir(fastq_dir) protoname = fp_obj.protoname if fp_obj.paired_end: fastq_filenames = (join(fastq_dir, '%s.1.txt.gz' % protoname), join(fastq_dir, '%s.2.txt.gz' % protoname)) logger.info('Converting file %s to FASTQ files %s, %s', input_file, fastq_filenames[0], fastq_filenames[1]) in_args = [sys.executable, '-m', 'seriesoftubes.converters.bamtofastq2', '--gzip', input_file, fastq_filenames[0], fastq_filenames[1]] else: fastq_filename = join(fastq_dir, '%s.txt.gz' % protoname) logger.info('Converting file %s to FASTQ file %s', input_file, fastq_filename) in_args = [sys.executable, '-m', 'seriesoftubes.converters.bamtofastq2', input_file, fastq_filename] logger.debug('Launching %s', ' '.join(in_args)) polledpipe = PolledPipe(logger=logger, level=logging.ERROR) job = Popen(in_args, stdout=polledpipe.w, stderr=STDOUT) wait_for_job(job, [polledpipe], logger) if fp_obj.paired_end: logger.debug('Settings input_file to %s', fastq_filenames[0]) fp_obj.input_file = fastq_filenames[0] logger.debug('Settings second_file to %s', fastq_filenames[1]) fp_obj.second_file = fastq_filenames[1] else: logger.debug('Settings input_file to %s', fastq_filename) fp_obj.input_file = fastq_filenames[0] logger.debug('Setting use_pysam to False') fp_obj.use_pysam = False logger.debug('Setting format to FASTQ') fp_obj.format = 'FASTQ' logger.debug('Ignoring open_func, it will not be used') if not job.returncode == 0: logger.critical('Conversion FAILED!') else: logger.info('Conversion successful') return
def align_once(fp_obj, flags, ref, use_quality=False, path_to_bowtie2=None, path_to_samtools=None, logger=None, passthru_args=None, **kwargs): if use_quality: if fp_obj.use_pysam: flags.append('--phred33') else: flags.append('--phred64') refname = os.path.basename(ref) path_to_unsorted = fp_obj.tmp_filename(refname) output_dir = os.path.split(path_to_unsorted)[0] fp_obj.check_output_dir(output_dir) filename1 = os.path.abspath(fp_obj.input_file) second_file = fp_obj.second_file if second_file is not None: filename2 = os.path.abspath(second_file) else: filename2 = None if fp_obj.paired_end: file_args = ['-x', ref, '-1', filename1, '-2', filename2] else: file_args = ['-x', ref, '-U', filename1] if passthru_args: bowtie2_args = [path_to_bowtie2] + flags + passthru_args + file_args else: bowtie2_args = [path_to_bowtie2] + flags + file_args # finish parsing input here bowtie2_stderr = PolledPipe(logger=logger, level=logging.ERROR) logger.info('Launching bowtie2 (output will be piped to samtools for BAM ' 'encoding)') logger.info(' '.join(bowtie2_args)) bowtie2_aligner = Popen(bowtie2_args, stdout=PIPE, stderr=bowtie2_stderr.w, bufsize=-1) samtools_args = [path_to_samtools, 'view', '-b', '-S', '-o', path_to_unsorted, '-'] logger.info('Launching samtools to encode bowtie2 output as BAM') logger.info(' '.join(samtools_args)) samtools_stdout = PolledPipe(logger=logger, level=logging.WARN) samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR) samtools_viewer = Popen(samtools_args, stdin=bowtie2_aligner.stdout, stdout=samtools_stdout.w, stderr=samtools_stderr.w, bufsize=-1) logger.debug('Waiting for bowtie2 to finish') pollables = [bowtie2_stderr, samtools_stdout, samtools_stderr] wait_for_job(bowtie2_aligner, pollables, logger) if not bowtie2_aligner.returncode == 0: logger.critical("bowtie2 did not run properly [%d]", bowtie2_aligner.returncode) samtools_viewer.terminate() samtools_viewer.poll() logger.critical("samtools terminated") return logger.debug('Alignment successfully completed') logger.debug('Waiting for samtools to finish') wait_for_job(samtools_viewer, [samtools_stdout, samtools_stderr], logger) if not samtools_viewer.returncode == 0: logger.critical("samtools view did not run properly [%d]", samtools_viewer.returncode) return logger.debug('Unsorted BAM file successfully written') logger.info('Launching samtools again to sort BAM output') output_dir, output_file = os.path.split(path_to_unsorted) bam_file = os.path.splitext(output_file)[0] sorter_args = [path_to_samtools, 'sort', output_file, bam_file] logger.info(' '.join(sorter_args)) samtools_stdout = PolledPipe(logger=logger, level=logging.WARN) samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR) samtools_sorter = Popen(sorter_args, stdout=samtools_stdout.w, stderr=samtools_stderr.w, cwd=output_dir) wait_for_job(samtools_sorter, [samtools_stdout, samtools_stderr], logger) if not samtools_sorter.returncode == 0: logger.critical("samtools sort did not run properly [%d]", samtools_sorter.returncode) return # don't destroy the files until we're sure we succeeded! assert_path(os.path.join(output_dir, bam_file + '.bam')) logger.debug('Removing unsorted file %s', path_to_unsorted) os.remove(path_to_unsorted) logger.debug('Launching samtools again to index sorted BAM output') samtools_stdout = PolledPipe(logger=logger, level=logging.WARN) samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR) index_args = [path_to_samtools, 'index', bam_file + '.bam'] samtools_indexer = Popen(index_args, stdout=samtools_stdout.w, stderr=samtools_stderr.w, cwd=output_dir) wait_for_job(samtools_indexer, [samtools_stdout, samtools_stderr], logger) if not samtools_indexer.returncode == 0: logger.critical("samtools index did not run properly [%d]", samtools_indexer.returncode) return # Make sure indexing succeeds assert_path(os.path.join(output_dir, bam_file + '.bam.bai')) return
def counteralign_once(fp_obj, flags, ref, use_quality=False, path_to_bowtie2=None, path_to_samtools=None, logger=None, passthru_args=None, **kwargs): """Produce counter-alignements""" if use_quality: if fp_obj.use_pysam: flags.append('--phred33') else: flags.append('--phred64') refname = os.path.basename(ref) output_dir, output_file = os.path.split(fp_obj.tmp_filename(refname)) fp_obj.check_output_dir(output_dir) fp_obj.check_output_dir(join(output_dir, 'counteraligned')) filename1 = os.path.abspath(fp_obj.input_file) input_file = fp_obj.input_file second_file = fp_obj.second_file if second_file is not None: filename2 = os.path.abspath(second_file) else: filename2 = None if fp_obj.paired_end: try: paired_file = make_paired_name(input_file, second_file) counteraligned = os.path.abspath(join(output_dir, 'counteraligned', paired_file)) except ValueError: counteraligned = os.path.abspath(join(output_dir, 'counteraligned', input_file)) file_args = ['-x', ref, '-1', filename1, '-2', filename2, '--al-conc-gz', counteraligned, '--un-conc-gz', join(output_dir, paired_file)] new_filenames = (join(output_dir, input_file), join(output_dir, second_file)) else: file_args = ['-x', ref, '-U', filename1, '--al-gz', join(output_dir, 'counteraligned', input_file), '--un-gz', join(output_dir, input_file)] new_filenames = (join(output_dir, input_file), None) if passthru_args is not None: bowtie2_args = [path_to_bowtie2] + flags + passthru_args + file_args else: bowtie2_args = [path_to_bowtie2] + flags + file_args # finish parsing input here bowtie2_stderr = PolledPipe(logger=logger, level=logging.ERROR) logger.info('Launching bowtie2 (output will be piped to samtools for ' 'BAM encoding)') logger.info(' '.join(bowtie2_args)) bowtie2_aligner = Popen(bowtie2_args, stdout=open(devnull, 'w'), stderr=bowtie2_stderr.w, bufsize=-1) logger.info(' '.join(bowtie2_args)) logger.info('counteraligned reads will be saved as GZIPed FASTQ files ' 'in counteraligned/') logger.debug('Waiting for bowtie2 to finish') pollables = [bowtie2_stderr] wait_for_job(bowtie2_aligner, pollables, logger) if not bowtie2_aligner.returncode == 0: logger.critical("bowtie2 did not run properly [%d]", bowtie2_aligner.returncode) return logger.debug('Alignment successfully completed') return new_filenames
def align_once(fp_obj, flags, ref, match_type, use_quality=False, quals_type='solexa1.3', path_to_bowtie=None, path_to_samtools=None, logger=None, **kwargs): refname = os.path.basename(ref) path_to_unsorted = fp_obj.tmp_filename(refname, match_type) output_dir = os.path.split(path_to_unsorted)[0] fp_obj.check_output_dir(output_dir) filename1 = os.path.abspath(fp_obj.input_file) second_file = fp_obj.second_file if second_file is not None: filename2 = os.path.abspath(second_file) else: filename2 = None if use_quality: if fp_obj.use_pysam: flags.append('--phred33-quals') else: flags.append(''.join(['--', quals_type, '-quals'])) if fp_obj.paired_end: file_args = [ref, '--12', '-'] logger.info('Automagically interpreting %s files', fp_obj.format) else: logger.info('Automagically interpreting %s file', fp_obj.format) file_args = [ref, '-'] bowtie_args = [path_to_bowtie] + flags + file_args # finish parsing input here input_stderr = PolledPipe(logger=logger, level=logging.ERROR) bowtie_stderr = PolledPipe(logger=logger, level=logging.ERROR) if fp_obj.use_pysam: if fp_obj.paired_end: in_args = [sys.executable, '-m', 'seriesoftubes.converters.bamtotab', filename1] else: in_args = [sys.executable, '-m', 'seriesoftubes.converters.bamtofastq', '--no-gzip', filename1] elif fp_obj.paired_end and fp_obj.format == 'FASTQ': in_args = [sys.executable, '-m', 'seriesoftubes.converters.fastqtotab', filename1, filename2] elif fp_obj.format == 'FASTQ': in_args = [sys.executable, '-m', 'seriesoftubes.converters.cat', filename1] else: logger.critical("Couldn't figure out what to do with file " "%s of format %s", fp_obj.input_file, fp_obj.format) logger.info(' '.join(in_args)) input_reader = Popen(in_args, stdout=PIPE, stderr=input_stderr.w, bufsize=-1) logger.info('Launching bowtie (output will be piped to samtools)') logger.info(' '.join(bowtie_args)) bowtie_aligner = Popen(bowtie_args, stdin=input_reader.stdout, stdout=PIPE, stderr=bowtie_stderr.w, bufsize=-1) samtools_args = [path_to_samtools, 'view', '-b', '-S', '-o', path_to_unsorted, '-'] logger.info('Launching samtools to encode bowtie output as BAM') logger.info(' '.join(samtools_args)) samtools_stdout = PolledPipe(logger=logger, level=logging.WARN) samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR) samtools_viewer = Popen(samtools_args, stdin=bowtie_aligner.stdout, stdout=samtools_stdout.w, stderr=samtools_stderr.w, bufsize=-1) logger.debug('Waiting for bowtie to finish') pollables = [input_stderr, bowtie_stderr, samtools_stdout, samtools_stderr] wait_for_job(bowtie_aligner, pollables, logger) if not bowtie_aligner.returncode == 0: logger.critical("bowtie did not run properly [%d]", bowtie_aligner.returncode) samtools_viewer.terminate() samtools_viewer.poll() logger.critical("samtools terminated") return logger.debug('Alignment successfully completed') logger.debug('Waiting for samtools to finish') wait_for_job(samtools_viewer, [samtools_stdout, samtools_stderr], logger) if not samtools_viewer.returncode == 0: logger.critical("samtools view did not run properly [%d]", samtools_viewer.returncode) return logger.debug('Unsorted BAM file successfully written') logger.info('Launching samtools again to sort BAM output') output_dir, output_file = os.path.split(path_to_unsorted) bam_file = os.path.splitext(output_file)[0] sorter_args = [path_to_samtools, 'sort', output_file, bam_file] logger.info(' '.join(sorter_args)) samtools_stdout = PolledPipe(logger=logger, level=logging.WARN) samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR) samtools_sorter = Popen(sorter_args, stdout=samtools_stdout.w, stderr=samtools_stderr.w, cwd=output_dir) wait_for_job(samtools_sorter, [samtools_stdout, samtools_stderr], logger) if not samtools_sorter.returncode == 0: logger.critical("samtools sort did not run properly [%d]", samtools_sorter.returncode) return # don't destroy the files until we're sure we succeeded! assert_path(os.path.join(output_dir, bam_file + '.bam')) logger.debug('Removing unsorted file %s', path_to_unsorted) os.remove(path_to_unsorted) logger.debug('Launching samtools again to index sorted BAM output') samtools_stdout = PolledPipe(logger=logger, level=logging.WARN) samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR) index_args = [path_to_samtools, 'index', bam_file + '.bam'] samtools_indexer = Popen(index_args, stdout=samtools_stdout.w, stderr=samtools_stderr.w, cwd=output_dir) wait_for_job(samtools_indexer, [samtools_stdout, samtools_stderr], logger) if not samtools_indexer.returncode == 0: logger.critical("samtools index did not run properly [%d]", samtools_indexer.returncode) return # Make sure indexing succeeds assert_path(os.path.join(output_dir, bam_file + '.bam.bai')) return
def counteralign_once(fp_obj, flags, ref, match_type, use_quality=False, quals_type='solexa1.3', path_to_bowtie=None, path_to_samtools=None, logger=None, **kwargs): """Produce counter-alignements""" refname = os.path.basename(ref) output_dir, output_file = os.path.split(fp_obj.tmp_filename(refname)) fp_obj.check_output_dir(output_dir) filename1 = os.path.abspath(fp_obj.input_file) second_file = fp_obj.second_file if second_file is not None: filename2 = os.path.abspath(second_file) else: filename2 = None if use_quality: if fp_obj.use_pysam: flags.append('--phred33-quals') else: flags.append(''.join(['--', quals_type, '-quals'])) if fp_obj.paired_end: file_args = [ref, '--12', '-'] logger.info('Automagically interpreting %s files', fp_obj.format) else: logger.info('Automagically interpreting %s file', fp_obj.format) file_args = [ref, '-'] bowtie_args = [path_to_bowtie] + flags + file_args # finish parsing input here input_stderr = PolledPipe(logger=logger, level=logging.ERROR) bowtie_stderr = PolledPipe(logger=logger, level=logging.ERROR) if fp_obj.use_pysam: if fp_obj.paired_end: in_args = [sys.executable, '-m', 'seriesoftubes.converters.bamtotab', filename1] else: in_args = [sys.executable, '-m', 'seriesoftubes.converters.bamtofastq', '--no-gzip', filename1] elif fp_obj.paired_end and fp_obj.format == 'FASTQ': in_args = [sys.executable, '-m', 'seriesoftubes.converters.fastqtotab', filename1, filename2] elif fp_obj.format == 'FASTQ': in_args = [sys.executable, '-m', 'seriesoftubes.converters.cat', filename1] else: logger.critical("Couldn't figure out what to do with file " "%s of format %s", fp_obj.input_file, fp_obj.format) logger.info(' '.join(in_args)) input_reader = Popen(in_args, stdout=PIPE, stderr=input_stderr.w, bufsize=-1) logger.info('Launching bowtie (output will be piped to samtools)') logger.info(' '.join(bowtie_args)) bowtie_aligner = Popen(bowtie_args, stdin=input_reader.stdout, stdout=PIPE, stderr=bowtie_stderr.w, bufsize=-1) logger.info('Only unaligned reads will be saved.') samtools_args = [path_to_samtools, 'view', '-b', '-S', '-o', '-f', '0x4', # ONLY SAVE UNALIGNED READS join(output_dir, output_file), '-'] logger.info('Launching samtools to encode bowtie output as BAM') logger.info(' '.join(samtools_args)) samtools_stdout = PolledPipe(logger=logger, level=logging.WARN) samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR) samtools_viewer = Popen(samtools_args, stdin=bowtie_aligner.stdout, stdout=samtools_stdout.w, stderr=samtools_stderr.w, bufsize=-1) logger.debug('Waiting for bowtie to finish') pollables = [input_stderr, bowtie_stderr, samtools_stdout, samtools_stderr] wait_for_job(bowtie_aligner, pollables, logger) if not bowtie_aligner.returncode == 0: logger.critical("bowtie did not run properly [%d]", bowtie_aligner.returncode) samtools_viewer.terminate() samtools_viewer.poll() logger.critical("samtools terminated") return logger.debug('Alignment successfully completed') logger.debug('Waiting for samtools to finish') wait_for_job(samtools_viewer, [samtools_stdout, samtools_stderr], logger) if not samtools_viewer.returncode == 0: logger.critical("samtools view did not run properly [%d]", samtools_viewer.returncode) return logger.debug('Unsorted BAM file successfully written') return (join(output_dir, output_file), None)
def run_macs(f, subpeaks=True, path_to_macs=None, logging_level=10, user_gsize=None, qvalue=0.01, passthru_args=None, **kwargs): """Run MACS on a BAM file """ logger = get_logger(logging_level) if path_to_macs is None: path_to_macs = path_to_executable("macs2") input_file = f.input_file control_file = f.control_file logger.debug('Processing %s', input_file) if control_file is not None: logger.debug('with control %s', control_file) # determine genome name and size if user_gsize: genome_size = user_gsize try: genome_build = guess_bam_genome(input_file) except NoMatchFoundError: genome_build = None else: try: genome_build = guess_bam_genome(input_file) except NoMatchFoundError: raise Usage('\ Could not determine genome / genome size for file %s' % input_file) gname = ''.join([x for x in genome_build if x.isalpha()]) if gname == 'hg': genome_size = 'hs' elif gname in ['mm', 'ce', 'dm']: genome_size = gname else: genome_size = '%.1e' % sum(genome(genome_build).itervalues()) fmt = decide_format(input_file, control_file, logger) name = f.sample_name.replace(' ', '_') if passthru_args is not None: for i in range(len(passthru_args)): passthru_args[i] = passthru_args[i].replace('+', '-') logger.debug('Passing thru arguments %s', ' '.join(passthru_args)) macs_options = ['--trackline', '-f', fmt, # correct file format BAM or BAMPE '-B', '--SPMR', # bedgraphs, SPMR '-g', genome_size, '-q', qvalue, '-n', name, # run name '-t', join(getcwd(), input_file)] # treatment if control_file is not None: macs_options.extend(['-c', join(getcwd(), control_file)]) if subpeaks: macs_options.append('--call-summits') if passthru_args is not None: macs_options.extend(passthru_args) step = [path_to_macs, 'callpeak'] + macs_options if platform.system() is 'Windows': step.insert(sys.executable, 0) macs_stdout = PolledPipe(logger=logger, level=WARN) macs_stderr = PolledPipe(logger=logger, level=ERROR) logger.debug('Launching %s', ' '.join(step)) job = Popen(step, stdout=macs_stdout.w, stderr=macs_stderr.w, cwd=f.output_dir) pollables = [macs_stdout, macs_stderr] wait_for_job(job, pollables, logger) return '%s\n\n' % ' '.join(step)
bwa_aligner = Popen(bwa_args, stdout=PIPE, stderr=bwa_stderr.w, bufsize=-1) samtools_args = [path_to_samtools, 'view', '-b', '-S', '-o', path_to_unsorted, '-'] logger.info('Launching samtools to encode bwa output as BAM') logger.info(' '.join(samtools_args)) samtools_stdout = PolledPipe(logger=logger, level=logging.WARN) samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR) samtools_viewer = Popen(samtools_args, stdin=bwa_aligner.stdout, stdout=samtools_stdout.w, stderr=samtools_stderr.w, bufsize=-1) logger.debug('Waiting for bwa to finish') pollables = [bwa_stderr, samtools_stdout, samtools_stderr] wait_for_job(bwa_aligner, pollables, logger) if not bwa_aligner.returncode == 0: logger.critical("bwa did not run properly [%d]", bwa_aligner.returncode) samtools_viewer.terminate() samtools_viewer.poll() logger.critical("samtools terminated") return logger.debug('Alignment successfully completed') logger.debug('Waiting for samtools to finish') wait_for_job(samtools_viewer, [samtools_stdout, samtools_stderr], logger) if not samtools_viewer.returncode == 0: logger.critical("samtools view did not run properly [%d]", samtools_viewer.returncode)