def check_binaries(): if not which(MINIMAP_BIN): raise AlignmentException("minimap2 is not installed") if not which(SAMTOOLS_BIN): raise AlignmentException("samtools is not installed") if not which("sort"): raise AlignmentException("UNIX sort utility is not available")
def _run_minimap(reference_file, reads_files, num_proc, mode, out_file, sam_output): #SAM_HEADER = "\'@PG|@HD|@SQ|@RG|@CO\'" work_dir = os.path.dirname(out_file) stderr_file = os.path.join(work_dir, "minimap.stderr") SORT_THREADS = "4" SORT_MEM = "4G" if os.path.getsize( reference_file) > 100 * 1024 * 1024 else "1G" cmdline = [MINIMAP_BIN, reference_file] cmdline.extend(reads_files) cmdline.extend(["-x", mode, "-t", str(num_proc)]) #Produces gzipped SAM sorted by reference name. Since it's not sorted by #read name anymore, it's important that all reads have SEQ. #is sam_output not set, produces PAF alignment #a = SAM output, p = min primary-to-seconday score #N = max secondary alignments #--sam-hit-only = don't output unmapped reads #--secondary-seq = custom option to output SEQ for seqcondary alignment with hard clipping #-L: move CIGAR strings for ultra-long reads to the separate tag #-Q don't output fastq quality if sam_output: tmp_prefix = os.path.join( os.path.dirname(out_file), "sort_" + datetime.datetime.now().strftime("%y%m%d_%H%M%S")) cmdline.extend([ "-a", "-p", "0.5", "-N", "10", "--sam-hit-only", "-L", "-Q", "--secondary-seq" ]) cmdline.extend( ["|", SAMTOOLS_BIN, "view", "-T", reference_file, "-u", "-"]) cmdline.extend([ "|", SAMTOOLS_BIN, "sort", "-T", tmp_prefix, "-O", "bam", "-@", SORT_THREADS, "-l", "1", "-m", SORT_MEM ]) else: pass #paf output enabled by default #cmdline.extend(["|", "grep", "-Ev", SAM_HEADER]) #removes headers #cmdline.extend(["|", "sort", "-k", "3,3", "-T", work_dir, # "--parallel=8", "-S", "4G"]) #cmdline.extend(["|", "gzip", "-1"]) #logger.debug("Running: " + " ".join(cmdline)) try: devnull = open(os.devnull, "wb") #env = os.environ.copy() #env["LC_ALL"] = "C" subprocess.check_call( ["/bin/bash", "-c", "set -o pipefail; " + " ".join(cmdline)], stderr=open(stderr_file, "w"), stdout=open(out_file, "w")) os.remove(stderr_file) except (subprocess.CalledProcessError, OSError) as e: logger.error( "Error running minimap2, terminating. See the alignment error log " " for details: " + stderr_file) raise AlignmentException(str(e))
def _run_minimap(reference_file, reads_files, num_proc, mode, out_file, sam_output): cmdline = [MINIMAP_BIN, reference_file] cmdline.extend(reads_files) cmdline.extend(["-x", mode, "-t", str(num_proc)]) if sam_output: #a = SAM output, p = min primary-to-seconday score #N = max secondary alignments cmdline.extend(["-a", "-p", "0.7", "-N", "10"]) try: devnull = open(os.devnull, "wb") #logger.debug("Running: " + " ".join(cmdline)) subprocess.check_call(cmdline, stderr=devnull, stdout=open(out_file, "wb")) except (subprocess.CalledProcessError, OSError) as e: if e.returncode == -9: logger.error("Looks like the system ran out of memory") raise AlignmentException(str(e))