def _bcbio_tophat_wrapper(fastq_file, pair_file, ref_file, stage_name, config): bcbio_config = {} stage_config = config["stage"][stage_name] cores = stage_config.get("cores", 1) # use the listed quality format, if there isn't one, try to figure # out what format it is quality_format = stage_config.get("quality_format", None) if quality_format is None: fastq_format = fastqc.detect_fastq_format(fastq_file) quality_format = FASTQ_FORMAT_TO_BCBIO[fastq_format] max_errors = stage_config.get("max_errors", None) options = stage_config.get("options", {}) tophat_loc = config["program"].get("tophat", "tophat") bowtie_loc = config["program"].get("bowtie", "bowtie") out_base = remove_suffix(os.path.basename(fastq_file)) align_dir = os.path.join(config["dir"]["results"], stage_name) bcbio_config["resources"] = {"tophat": {"cores": cores, "options": options}} bcbio_config["algorithm"] = {} bcbio_config["program"] = {} bcbio_config["algorithm"]["quality_format"] = quality_format bcbio_config["algorithm"]["max_errors"] = max_errors bcbio_config["gtf"] = config.get("gtf", None) if bcbio_config["gtf"]: if not file_exists(bcbio_config["gtf"]): raise ValueError("GTF file does not exist. Please check to make sure that " "the value of gtf is set corretly in the configuration file.") sys.exit(1) bcbio_config["program"]["tophat"] = tophat_loc bcbio_config["program"]["bowtie"] = bowtie_loc bcbio_config["program"]["picard"] = config["program"]["picard"] bcbio_config["program"]["gatk"] = {"dir": ""} out_file = tophat.align(fastq_file, pair_file, ref_file, out_base, align_dir, bcbio_config) return out_file
def _bcbio_tophat_wrapper(fastq_file, pair_file, ref_file, stage_name, config): bcbio_config = {} stage_config = config["stage"][stage_name] cores = config["cluster"].get("cores", None) # use the listed quality format, if there isn't one, try to figure # out what format it is quality_format = stage_config.get("quality_format", None) if quality_format is None: fastq_format = fastqc.detect_fastq_format(fastq_file) quality_format = FASTQ_FORMAT_TO_BCBIO[fastq_format] max_errors = stage_config.get("max_errors", None) tophat_loc = config["program"].get("tophat", "tophat") bowtie_loc = config["program"].get("bowtie", "bowtie") out_base = remove_suffix(os.path.basename(fastq_file)) align_dir = os.path.join(config["dir"]["results"], stage_name) bcbio_config["resources"] = {"tophat": {"cores": cores}} bcbio_config["algorithm"] = {} bcbio_config["program"] = {} bcbio_config["algorithm"]["quality_format"] = quality_format bcbio_config["algorithm"]["max_errors"] = max_errors bcbio_config["gtf"] = config.get("gtf", None) bcbio_config["program"]["tophat"] = tophat_loc bcbio_config["program"]["bowtie"] = bowtie_loc out_file = tophat.align(fastq_file, pair_file, ref_file, out_base, align_dir, bcbio_config) os.remove(out_file) out_dir = os.path.dirname(out_file) out_file_fixed = os.path.join(out_dir, out_base + ".sam") os.symlink("accepted_hits.sam", out_file_fixed) return out_file_fixed
def _get_quality_type(in_file): """ get fastq quality format. if multiple types are detected, pick the first one. no quality type is found assume sanger """ fastq_format = detect_fastq_format(in_file) return _FASTQ_TYPE_TO_FLAG.get(fastq_format[0], "sanger")