def default(inputs, outputs, wildcards, log): """ Default FASTQ summary operations. :param inputs: Object containing the input file names :param outputs: Dictionary containing the output file names :param wildcards: Wildcards determined from input file name patterns :param log: The log file :return: None. """ # Set locations of reference files host_index = op.host_index # If either of the paired read files are non-empty, filter them for host reads if not lf.is_empty(inputs.forward) or not lf.is_empty(inputs.reverse): # Run Bowtie 2 on the paired read files subprocess.run([ required_programs["bowtie2"], "-x", host_index, "-1", inputs.forward, "-2", inputs.reverse, "--un-conc", outputs[0], "--no-unal", "-S", "/dev/null" ], stdout=open(log[0], "a"), stderr=subprocess.STDOUT) # Fix output file names subprocess.run( ["mv", re.sub('(\\.[^.]*)$', r'.1\1', outputs[0]), outputs[0]], stdout=open(log[0], "a"), stderr=subprocess.STDOUT) subprocess.run( ["mv", re.sub('(\\.[^.]*)$', r'.2\1', outputs[0]), outputs[1]], stdout=open(log[0], "a"), stderr=subprocess.STDOUT) # Otherwise, if both paired read files were dummy files, create dummy outputs else: subprocess.run(["touch", outputs[0], outputs[1]]) # If the singleton read file is non-empty, filter it for host reads if not lf.is_empty(inputs.singleton): # Run Bowtie 2 on the singleton read file subprocess.run([ required_programs["bowtie2"], "-x", host_index, "-U", inputs.singleton, "--un", outputs[2], "--no-unal", "-S", "/dev/null" ], stdout=open(log[0], "a"), stderr=subprocess.STDOUT) # Otherwise, if the singleton read file was a dummy file, create dummy outputs else: subprocess.run(["touch", outputs[2]])
def default(inputs, outputs, wildcards, log): """ Default ortholog aggregation operations. :param inputs: Object containing the input file names :param outputs: Dictionary containing the output file names :param wildcards: Wildcards determined from input file name patterns :param log: The log file :return: None. """ # If the input file is non-empty, map the reads if not lf.is_empty(inputs.input): mapping = fo.ortholog_to_grouping_directory + wildcards.mapping + op.ortholog_to_grouping_suffix if operating_params["method"] == "empanada": command = [required_programs["empanada"], "-ko", inputs.input, "-ko2path", mapping, "-o", outputs[0], "-oc", outputs[1]] + operating_params["empanada_method"] subprocess.run(command, stdout=open(log[0], "a"), stderr=subprocess.STDOUT) # Otherwise, if the method is unrecognized, just copy the input file to the output else: subprocess.run(["cp", inputs.input, outputs[0]]) subprocess.run(["touch", outputs[1]]) # Otherwise, if the input file is a dummy file, create dummy outputs else: subprocess.run(["touch", outputs[0], outputs[1]])
def default(inputs, outputs, wildcards, log): """ Default gene map operations. :param inputs: Object containing the input file names :param outputs: Dictionary containing the output file names :param wildcards: Wildcards determined from input file name patterns :param log: The log file :return: None. """ # Set locations of reference files if necessary gene_to_ortholog = op.gene_to_ortholog_file # If the input file is non-empty, map the reads if not lf.is_empty(inputs.input): subprocess.run([ fo.source_directory + "ortholog_map.py", inputs.input, operating_params["method"], gene_to_ortholog, "--output", outputs[0] ], stdout=open(log[0], "a"), stderr=subprocess.STDOUT) # Otherwise, if the input file is a dummy file, create a dummy output else: subprocess.run(["touch", outputs[0]])
def default(inputs, outputs, wildcards, log): """ Default FASTQ summary operations. :param inputs: Object containing the input file names :param outputs: Dictionary containing the output file names :param wildcards: Wildcards determined from input file name patterns :param log: The log file :return: None. """ if not lf.is_empty(inputs.pre_forward) or not lf.is_empty(inputs.pre_reverse) or not lf.is_empty(inputs.post_forward) or not lf.is_empty(inputs.post_reverse) or not lf.is_empty(inputs.new_singleton) or not lf.is_empty(inputs.old_singleton): subprocess.run([fo.source_directory + "quality_filter_summary.py", inputs.pre_forward, inputs.pre_reverse, inputs.post_forward, inputs.post_reverse, inputs.new_singleton, inputs.old_singleton, "--output", outputs[0], "--use_sample"], stdout=open(log[0], "a"), stderr=subprocess.STDOUT) else: subprocess.run(["touch", outputs[0]])
def default(inputs, outputs, wildcards, log): """ Default FASTQ summary operations. :param inputs: Object containing the input file names :param outputs: Dictionary containing the output file names :param wildcards: Wildcards determined from input file name patterns :param log: The log file :return: None. """ if not lf.is_empty(inputs.input): subprocess.run([fo.source_directory + "map_reads_to_genes_summary.py", inputs.input, "--output", outputs[0], "--use_sample", "--use_type"], stdout=open(log[0], "a"), stderr=subprocess.STDOUT) else: subprocess.run(["touch", outputs[0]])
def default(inputs, outputs, wildcards, log): """ Default FASTQ summary operations. :param inputs: Object containing the input file names :param outputs: Dictionary containing the output file names :param wildcards: Wildcards determined from input file name patterns :param log: The log file :return: None. """ # Set locations of reference files if necessary n_method = operating_params["method"] in [ "best_n_hits", "best_n_orthologs" ] ortholog_method = operating_params["method"] in [ "best_ortholog", "best_n_orthologs" ] gene_to_ortholog = None if ortholog_method: gene_to_ortholog = op.gene_to_ortholog_file # If the input file is non-empty, map the reads if not lf.is_empty(inputs.input): # Create the shell command to run, adding optional parameters as necessary command = [ fo.source_directory + "hit_filter.py", inputs.input, operating_params["method"], "--output", outputs[0] ] if ortholog_method: command += ["--gene_to_ortholog_map", gene_to_ortholog] if n_method: command += ["-n", operating_params["best_n"]] subprocess.run(command, stdout=open(log[0], "a"), stderr=subprocess.STDOUT) # Otherwise, if the input file is a dummy file, create a dummy output else: subprocess.run(["touch", outputs[0]])
def default(inputs, outputs, wildcards, log): """ Default gene map summary operations. :param inputs: Object containing the input file names :param outputs: Dictionary containing the output file names :param wildcards: Wildcards determined from input file name patterns :param log: The log file :return: None. """ if not lf.is_empty(inputs.input): subprocess.run([ fo.source_directory + "ortholog_aggregation_summary.py", inputs.input, "--grouping_name", wildcards.mapping, "--output", outputs[0] ], stdout=open(log[0], "a"), stderr=subprocess.STDOUT) else: subprocess.run(["touch", outputs[0]])
def default(inputs, outputs, wildcards, log): """ Default ortholog abundance correction operations. :param inputs: Object containing the input file names :param outputs: Dictionary containing the output file names :param wildcards: Wildcards determined from input file name patterns :param log: The log file :return: None. """ # If the input file is non-empty, map the reads if not lf.is_empty(inputs.input): if operating_params["method"] == "musicc": command = [ required_programs["musicc"], inputs.input, "-o", outputs[0] ] + operating_params["musicc_method"] subprocess.run(command, stdout=open(log[0], "a"), stderr=subprocess.STDOUT) elif operating_params["method"] == "relative": subprocess.run([ fo.source_directory + "ortholog_abundance_correction.py", inputs.input, "--output", outputs[0] ], stdout=open(log[0], "a"), stderr=subprocess.STDOUT) # Otherwise, if the method is unrecognized, just copy the input file to the output else: subprocess.run(["cp", inputs.input, outputs[0]]) # Otherwise, if the input file is a dummy file, create a dummy output else: subprocess.run(["touch", outputs[0]])
def default(inputs, outputs, wildcards, log): """ Default FASTQ summary operations. :param inputs: Object containing the input file names :param outputs: Dictionary containing the output file names :param wildcards: Wildcards determined from input file name patterns :param log: The log file :return: None. """ # Set locations of reference files target_database = op.target_database_file # If the input file is non-empty, map the reads if not lf.is_empty(inputs.input): command = [ required_programs["diamond"], operating_params["method"], "--block-size", str(non_essential_params["block_size"]), "--index-chunks", str(non_essential_params["index_chunks"]), "--threads", str(cluster_params["cores"] * op.cpu_to_thread_multiplier), "--db", target_database, "--query", inputs.input, "--out", outputs[0], "--top", str(operating_params["top_percentage"]), "--evalue", str(operating_params["evalue_cutoff"]) ] if operating_params["sensitivity"] != "": command += operating_params["sensitivity"] subprocess.run(command, stdout=open(log[0], "a"), stderr=subprocess.STDOUT) # Otherwise, if the input file is a dummy file, create a dummy output else: subprocess.run(["touch", outputs[0]])
def default(inputs, outputs, wildcards, log): """ Default FASTQ summary operations. :param inputs: Object containing the input file names :param outputs: Dictionary containing the output file names :param wildcards: Wildcards determined from input file name patterns :param log: The log file :return: None. """ trimming_parameters = ["MAXINFO:" + operating_params["max_info"], "MINLEN:" + operating_params["min_len"]] # If either of the paired read files are non-empty, filter them for duplicate reads if not lf.is_empty(inputs.forward) or not lf.is_empty(inputs.reverse): # Assign intermediate files for the separate new singletons new_forward_singletons = outputs[3] + ".forward_singletons.fastq" new_reverse_singletons = outputs[3] + ".reverse_singletons.fastq" # Perform paired-end quality filtering and trimming subprocess.run([required_programs["trimmer"], "PE", inputs.forward, inputs.reverse, outputs[0], new_forward_singletons, outputs[1], new_reverse_singletons] + trimming_parameters, stdout=open(log[0], "a"), stderr=subprocess.STDOUT) # Merge new singletons into single new singleton file with open(outputs[3], "w") as output_file: subprocess.run(["cat", new_forward_singletons, new_reverse_singletons], stdout=output_file, stderr=open(log[0], "a")) subprocess.run(["rm", new_forward_singletons, new_reverse_singletons]) # Add new singletons to combined singleton output file with open(outputs[2], "w") as output_file: subprocess.run(["cat", outputs[3]], stdout=output_file, stderr=open(log[0], "a")) # Otherwise, if both paired read files were dummy files, create dummy outputs else: subprocess.run(["touch", outputs[0], outputs[1], outputs[3]]) # If the singleton read file is non-empty, filter it for host reads if not lf.is_empty(inputs.singleton): # Perform single-end quality filtering and trimming subprocess.run([required_programs["trimmer"], "SE", inputs.singleton, outputs[4]] + trimming_parameters, stdout=open(log[0], "a"), stderr=subprocess.STDOUT) # If we quality filtered the paired-end reads, add singletons to combined singleton output file if not lf.is_empty(inputs.forward) or not lf.is_empty(inputs.reverse): with open(outputs[2], "a") as output_file: subprocess.run(["cat", outputs[4]], stdout=output_file, stderr=open(log[0], "a")) # Otherwise, the combined singleton output file is the same as the results of the singleton quality filtering else: with open(outputs[2], "w") as output_file: subprocess.run(["cat", outputs[4]], stdout=output_file, stderr=open(log[0], "a")) # Otherwise, if the singleton read file was a dummy file, create dummy outputs else: subprocess.run(["touch", outputs[2], outputs[4]])
def default(inputs, outputs, wildcards, log): """ Default FASTQ summary operations. :param inputs: Object containing the input file names :param outputs: Dictionary containing the output file names :param wildcards: Wildcards determined from input file name patterns :param log: The log file :return: None. """ # If either of the paired read files are non-empty, filter them for duplicate reads if not lf.is_empty(inputs.forward) or not lf.is_empty(inputs.reverse): # Convert the paired read files to SAM format paired_sam = inputs.forward + ".paired.sam" subprocess.run([required_programs["picard"], non_essential_params["fastq_to_sam"], "F1=%s" % inputs.forward, "F2=%s" % inputs.reverse, "O=%s" % paired_sam, "V=%s" % non_essential_params["quality_format"], "SO=%s" % non_essential_params["sort_order"], "SM=%s" % wildcards.sample], stdout=open(log[0], "a"), stderr=subprocess.STDOUT) # Mark duplicates marked_output = inputs.forward + ".marked.sam" subprocess.run([required_programs["picard"], non_essential_params["mark_duplicates"], "I=%s" % paired_sam, "O=%s" % marked_output, "M=%s" % outputs[5]]) subprocess.run(["rm", paired_sam], stdout=open(log[0], "a"), stderr=subprocess.STDOUT) # Convert the marked output to a parse-able format formatted_marked_output = inputs.forward + ".samview" with open(formatted_marked_output, "w") as formatted_marked_output_file: subprocess.run([required_programs["samtools"], "view", marked_output], stdout=formatted_marked_output_file, stderr=open(log[0], "a")) subprocess.run(["rm", marked_output]) # Extract duplicate reads from the formatted marked output with open(outputs[3], "w") as marked_read_file: subprocess.run([fo.source_directory + "extract_duplicates.py", formatted_marked_output], stdout=marked_read_file, stderr=open(log[0], "a")) # Remove the marked reads from the original FASTQs with open(outputs[0], "w") as forward_output: subprocess.run([fo.source_directory + "remove_marked_reads.py", outputs[3], inputs.forward], stdout=forward_output, stderr=open(log[0], "a")) with open(outputs[1], "w") as reverse_output: subprocess.run([fo.source_directory + "remove_marked_reads.py", outputs[3], inputs.reverse], stdout=reverse_output, stderr=open(log[0], "a")) # Otherwise, if both paired read files were dummy files, create dummy outputs else: subprocess.run(["touch", outputs[0], outputs[1], outputs[3], outputs[5]]) # If the singleton read file is non-empty, filter it for duplicate reads if not lf.is_empty(inputs.singleton): # Convert the singleton read file to SAM format singleton_sam = inputs.forward + ".singleton.sam" subprocess.run([required_programs["picard"], non_essential_params["fastq_to_sam"], "F1=%s" % inputs.forward, "O=%s" % singleton_sam, "V=%s" % non_essential_params["quality_format"], "SO=%s" % non_essential_params["sort_order"], "SM=%s" % wildcards.sample], stdout=open(log[0], "a"), stderr=subprocess.STDOUT) # Mark duplicates marked_output = inputs.singleton + ".marked.sam" subprocess.run([required_programs["picard"], non_essential_params["mark_duplicates"], "I=%s" % singleton_sam, "O=%s" % marked_output, "M=%s" % outputs[6]], stdout=open(log[0], "a"), stderr=subprocess.STDOUT) subprocess.run(["rm", singleton_sam]) # Convert the marked output to a parse-able format formatted_marked_output = inputs.singleton + ".samview" with open(formatted_marked_output, "w") as formatted_marked_output_file: subprocess.run([required_programs["samtools"], "view", marked_output], stdout=formatted_marked_output_file, stderr=open(log[0], "a")) subprocess.run(["rm", marked_output]) # Extract duplicate reads from the formatted marked output with open(outputs[4], "w") as marked_read_file: subprocess.run([fo.source_directory + "extract_duplicates.py", formatted_marked_output], stdout=marked_read_file, stderr=open(log[0], "a")) # Remove the marked reads from the original FASTQs with open(outputs[2], "w") as singleton_output: subprocess.run([fo.source_directory + "remove_marked_reads.py", outputs[4], inputs.singleton], stdout=singleton_output, stderr=open(log[0], "a")) # Otherwise, if the singleton read file was a dummy file, create dummy outputs else: subprocess.run(["touch", outputs[2], outputs[4], outputs[6]])