def align_pipe(fastq_file, pair_file, ref_file, names, align_dir, data): """Perform piped alignment of fastq input files, generating sorted output BAM. """ pair_file = pair_file if pair_file else "" # back compatible -- older files were named with lane information, use sample name now out_file = os.path.join(align_dir, "{0}-sort.bam".format(names["lane"])) if not utils.file_exists(out_file): out_file = os.path.join(align_dir, "{0}-sort.bam".format(dd.get_sample_name(data))) qual_format = data["config"]["algorithm"].get("quality_format", "").lower() min_size = None if data.get("align_split") or fastq_file.endswith(".sdf"): if fastq_file.endswith(".sdf"): min_size = rtg.min_read_size(fastq_file) final_file = out_file out_file, data = alignprep.setup_combine(final_file, data) fastq_file, pair_file = alignprep.split_namedpipe_cls(fastq_file, pair_file, data) else: final_file = None if qual_format == "illumina": fastq_file = alignprep.fastq_convert_pipe_cl(fastq_file, data) if pair_file: pair_file = alignprep.fastq_convert_pipe_cl(pair_file, data) rg_info = novoalign.get_rg_info(names) if not utils.file_exists(out_file) and (final_file is None or not utils.file_exists(final_file)): # If we cannot do piping, use older bwa aln approach if ("bwa-mem" not in dd.get_tools_on(data) and ("bwa-mem" in dd.get_tools_off(data) or not _can_use_mem(fastq_file, data, min_size))): out_file = _align_backtrack(fastq_file, pair_file, ref_file, out_file, names, rg_info, data) else: out_file = _align_mem(fastq_file, pair_file, ref_file, out_file, names, rg_info, data) data["work_bam"] = out_file return data
def align_pipe(fastq_file, pair_file, ref_file, names, align_dir, data): """Perform piped alignment of fastq input files, generating sorted output BAM. """ pair_file = pair_file if pair_file else "" # back compatible -- older files were named with lane information, use sample name now if names["lane"] != dd.get_sample_name(data): out_file = os.path.join(align_dir, "{0}-sort.bam".format(names["lane"])) else: out_file = None if not out_file or not utils.file_exists(out_file): umi_ext = "-cumi" if "umi_bam" in data else "" out_file = os.path.join( align_dir, "{0}-sort{1}.bam".format(dd.get_sample_name(data), umi_ext)) qual_format = data["config"]["algorithm"].get("quality_format", "").lower() min_size = None if data.get("align_split") or fastq_file.endswith(".sdf"): if fastq_file.endswith(".sdf"): min_size = rtg.min_read_size(fastq_file) final_file = out_file out_file, data = alignprep.setup_combine(final_file, data) fastq_file, pair_file = alignprep.split_namedpipe_cls( fastq_file, pair_file, data) else: final_file = None if qual_format == "illumina": fastq_file = alignprep.fastq_convert_pipe_cl(fastq_file, data) if pair_file: pair_file = alignprep.fastq_convert_pipe_cl(pair_file, data) rg_info = novoalign.get_rg_info(names) if not utils.file_exists(out_file) and (final_file is None or not utils.file_exists(final_file)): # If we cannot do piping, use older bwa aln approach if ("bwa-mem" not in dd.get_tools_on(data) and ("bwa-mem" in dd.get_tools_off(data) or not _can_use_mem(fastq_file, data, min_size))): out_file = _align_backtrack(fastq_file, pair_file, ref_file, out_file, names, rg_info, data) else: if is_precollapsed_bam( data) or not hla_on(data) or needs_separate_hla(data): out_file = _align_mem(fastq_file, pair_file, ref_file, out_file, names, rg_info, data) else: out_file = _align_mem_hla(fastq_file, pair_file, ref_file, out_file, names, rg_info, data) data["work_bam"] = out_file # bwakit will corrupt the non-HLA alignments in a UMI collapsed BAM file # (see https://github.com/bcbio/bcbio-nextgen/issues/3069) if needs_separate_hla(data): hla_file = os.path.join(os.path.dirname(out_file), "HLA-" + os.path.basename(out_file)) hla_file = _align_mem_hla(fastq_file, pair_file, ref_file, hla_file, names, rg_info, data) data["hla_bam"] = hla_file return data