Пример #1
0
def default(inputs, outputs, wildcards, log):
    """
    Default FASTQ summary operations.

    :param inputs: Object containing the input file names
    :param outputs: Dictionary containing the output file names
    :param wildcards: Wildcards determined from input file name patterns
    :param log: The log file
    :return: None.
    """

    # Set locations of reference files
    host_index = op.host_index

    # If either of the paired read files are non-empty, filter them for host reads
    if not lf.is_empty(inputs.forward) or not lf.is_empty(inputs.reverse):

        # Run Bowtie 2 on the paired read files
        subprocess.run([
            required_programs["bowtie2"], "-x", host_index, "-1",
            inputs.forward, "-2", inputs.reverse, "--un-conc", outputs[0],
            "--no-unal", "-S", "/dev/null"
        ],
                       stdout=open(log[0], "a"),
                       stderr=subprocess.STDOUT)

        # Fix output file names
        subprocess.run(
            ["mv",
             re.sub('(\\.[^.]*)$', r'.1\1', outputs[0]), outputs[0]],
            stdout=open(log[0], "a"),
            stderr=subprocess.STDOUT)
        subprocess.run(
            ["mv",
             re.sub('(\\.[^.]*)$', r'.2\1', outputs[0]), outputs[1]],
            stdout=open(log[0], "a"),
            stderr=subprocess.STDOUT)

    # Otherwise, if both paired read files were dummy files, create dummy outputs
    else:
        subprocess.run(["touch", outputs[0], outputs[1]])

    # If the singleton read file is non-empty, filter it for host reads
    if not lf.is_empty(inputs.singleton):

        # Run Bowtie 2 on the singleton read file
        subprocess.run([
            required_programs["bowtie2"], "-x", host_index, "-U",
            inputs.singleton, "--un", outputs[2], "--no-unal", "-S",
            "/dev/null"
        ],
                       stdout=open(log[0], "a"),
                       stderr=subprocess.STDOUT)

    # Otherwise, if the singleton read file was a dummy file, create dummy outputs
    else:
        subprocess.run(["touch", outputs[2]])
def default(inputs, outputs, wildcards, log):
    """
    Default ortholog aggregation operations.

    :param inputs: Object containing the input file names
    :param outputs: Dictionary containing the output file names
    :param wildcards: Wildcards determined from input file name patterns
    :param log: The log file
    :return: None.
    """

    # If the input file is non-empty, map the reads
    if not lf.is_empty(inputs.input):

        mapping = fo.ortholog_to_grouping_directory + wildcards.mapping + op.ortholog_to_grouping_suffix

        if operating_params["method"] == "empanada":
            command = [required_programs["empanada"], "-ko", inputs.input, "-ko2path", mapping, "-o", outputs[0], "-oc", outputs[1]] + operating_params["empanada_method"]
            subprocess.run(command,
                           stdout=open(log[0], "a"),
                           stderr=subprocess.STDOUT)

        # Otherwise, if the method is unrecognized, just copy the input file to the output
        else:
            subprocess.run(["cp", inputs.input, outputs[0]])
            subprocess.run(["touch", outputs[1]])

    # Otherwise, if the input file is a dummy file, create dummy outputs
    else:
        subprocess.run(["touch", outputs[0], outputs[1]])
Пример #3
0
def default(inputs, outputs, wildcards, log):
    """
    Default gene map operations.

    :param inputs: Object containing the input file names
    :param outputs: Dictionary containing the output file names
    :param wildcards: Wildcards determined from input file name patterns
    :param log: The log file
    :return: None.
    """

    # Set locations of reference files if necessary
    gene_to_ortholog = op.gene_to_ortholog_file

    # If the input file is non-empty, map the reads
    if not lf.is_empty(inputs.input):

        subprocess.run([
            fo.source_directory + "ortholog_map.py", inputs.input,
            operating_params["method"], gene_to_ortholog, "--output",
            outputs[0]
        ],
                       stdout=open(log[0], "a"),
                       stderr=subprocess.STDOUT)

    # Otherwise, if the input file is a dummy file, create a dummy output
    else:
        subprocess.run(["touch", outputs[0]])
Пример #4
0
def default(inputs, outputs, wildcards, log):
    """
    Default FASTQ summary operations.

    :param inputs: Object containing the input file names
    :param outputs: Dictionary containing the output file names
    :param wildcards: Wildcards determined from input file name patterns
    :param log: The log file
    :return: None.
    """

    if not lf.is_empty(inputs.pre_forward) or not lf.is_empty(inputs.pre_reverse) or not lf.is_empty(inputs.post_forward) or not lf.is_empty(inputs.post_reverse) or not lf.is_empty(inputs.new_singleton) or not lf.is_empty(inputs.old_singleton):
        subprocess.run([fo.source_directory + "quality_filter_summary.py", inputs.pre_forward, inputs.pre_reverse, inputs.post_forward, inputs.post_reverse, inputs.new_singleton, inputs.old_singleton, "--output", outputs[0], "--use_sample"],
                       stdout=open(log[0], "a"),
                       stderr=subprocess.STDOUT)
    else:
        subprocess.run(["touch", outputs[0]])
Пример #5
0
def default(inputs, outputs, wildcards, log):
    """
    Default FASTQ summary operations.

    :param inputs: Object containing the input file names
    :param outputs: Dictionary containing the output file names
    :param wildcards: Wildcards determined from input file name patterns
    :param log: The log file
    :return: None.
    """

    if not lf.is_empty(inputs.input):
        subprocess.run([fo.source_directory + "map_reads_to_genes_summary.py", inputs.input, "--output", outputs[0], "--use_sample", "--use_type"],
                       stdout=open(log[0], "a"),
                       stderr=subprocess.STDOUT)
    else:
        subprocess.run(["touch", outputs[0]])
Пример #6
0
def default(inputs, outputs, wildcards, log):
    """
    Default FASTQ summary operations.

    :param inputs: Object containing the input file names
    :param outputs: Dictionary containing the output file names
    :param wildcards: Wildcards determined from input file name patterns
    :param log: The log file
    :return: None.
    """

    # Set locations of reference files if necessary
    n_method = operating_params["method"] in [
        "best_n_hits", "best_n_orthologs"
    ]
    ortholog_method = operating_params["method"] in [
        "best_ortholog", "best_n_orthologs"
    ]
    gene_to_ortholog = None
    if ortholog_method:
        gene_to_ortholog = op.gene_to_ortholog_file

    # If the input file is non-empty, map the reads
    if not lf.is_empty(inputs.input):

        # Create the shell command to run, adding optional parameters as necessary
        command = [
            fo.source_directory + "hit_filter.py", inputs.input,
            operating_params["method"], "--output", outputs[0]
        ]
        if ortholog_method:
            command += ["--gene_to_ortholog_map", gene_to_ortholog]
        if n_method:
            command += ["-n", operating_params["best_n"]]

        subprocess.run(command,
                       stdout=open(log[0], "a"),
                       stderr=subprocess.STDOUT)

    # Otherwise, if the input file is a dummy file, create a dummy output
    else:
        subprocess.run(["touch", outputs[0]])
Пример #7
0
def default(inputs, outputs, wildcards, log):
    """
    Default gene map summary operations.

    :param inputs: Object containing the input file names
    :param outputs: Dictionary containing the output file names
    :param wildcards: Wildcards determined from input file name patterns
    :param log: The log file
    :return: None.
    """

    if not lf.is_empty(inputs.input):
        subprocess.run([
            fo.source_directory + "ortholog_aggregation_summary.py",
            inputs.input, "--grouping_name", wildcards.mapping, "--output",
            outputs[0]
        ],
                       stdout=open(log[0], "a"),
                       stderr=subprocess.STDOUT)
    else:
        subprocess.run(["touch", outputs[0]])
def default(inputs, outputs, wildcards, log):
    """
    Default ortholog abundance correction operations.

    :param inputs: Object containing the input file names
    :param outputs: Dictionary containing the output file names
    :param wildcards: Wildcards determined from input file name patterns
    :param log: The log file
    :return: None.
    """

    # If the input file is non-empty, map the reads
    if not lf.is_empty(inputs.input):

        if operating_params["method"] == "musicc":
            command = [
                required_programs["musicc"], inputs.input, "-o", outputs[0]
            ] + operating_params["musicc_method"]
            subprocess.run(command,
                           stdout=open(log[0], "a"),
                           stderr=subprocess.STDOUT)

        elif operating_params["method"] == "relative":
            subprocess.run([
                fo.source_directory + "ortholog_abundance_correction.py",
                inputs.input, "--output", outputs[0]
            ],
                           stdout=open(log[0], "a"),
                           stderr=subprocess.STDOUT)

        # Otherwise, if the method is unrecognized, just copy the input file to the output
        else:
            subprocess.run(["cp", inputs.input, outputs[0]])

    # Otherwise, if the input file is a dummy file, create a dummy output
    else:
        subprocess.run(["touch", outputs[0]])
Пример #9
0
def default(inputs, outputs, wildcards, log):
    """
    Default FASTQ summary operations.

    :param inputs: Object containing the input file names
    :param outputs: Dictionary containing the output file names
    :param wildcards: Wildcards determined from input file name patterns
    :param log: The log file
    :return: None.
    """

    # Set locations of reference files
    target_database = op.target_database_file

    # If the input file is non-empty, map the reads
    if not lf.is_empty(inputs.input):

        command = [
            required_programs["diamond"], operating_params["method"],
            "--block-size",
            str(non_essential_params["block_size"]), "--index-chunks",
            str(non_essential_params["index_chunks"]), "--threads",
            str(cluster_params["cores"] * op.cpu_to_thread_multiplier), "--db",
            target_database, "--query", inputs.input, "--out", outputs[0],
            "--top",
            str(operating_params["top_percentage"]), "--evalue",
            str(operating_params["evalue_cutoff"])
        ]
        if operating_params["sensitivity"] != "":
            command += operating_params["sensitivity"]
        subprocess.run(command,
                       stdout=open(log[0], "a"),
                       stderr=subprocess.STDOUT)

    # Otherwise, if the input file is a dummy file, create a dummy output
    else:
        subprocess.run(["touch", outputs[0]])
Пример #10
0
def default(inputs, outputs, wildcards, log):
    """
    Default FASTQ summary operations.

    :param inputs: Object containing the input file names
    :param outputs: Dictionary containing the output file names
    :param wildcards: Wildcards determined from input file name patterns
    :param log: The log file
    :return: None.
    """

    trimming_parameters = ["MAXINFO:" + operating_params["max_info"], "MINLEN:" + operating_params["min_len"]]

    # If either of the paired read files are non-empty, filter them for duplicate reads
    if not lf.is_empty(inputs.forward) or not lf.is_empty(inputs.reverse):

        # Assign intermediate files for the separate new singletons
        new_forward_singletons = outputs[3] + ".forward_singletons.fastq"
        new_reverse_singletons = outputs[3] + ".reverse_singletons.fastq"

        # Perform paired-end quality filtering and trimming
        subprocess.run([required_programs["trimmer"], "PE", inputs.forward, inputs.reverse, outputs[0], new_forward_singletons, outputs[1], new_reverse_singletons] + trimming_parameters,
                       stdout=open(log[0], "a"),
                       stderr=subprocess.STDOUT)

        # Merge new singletons into single new singleton file
        with open(outputs[3], "w") as output_file:
            subprocess.run(["cat", new_forward_singletons, new_reverse_singletons],
                           stdout=output_file,
                           stderr=open(log[0], "a"))
        subprocess.run(["rm", new_forward_singletons, new_reverse_singletons])

        # Add new singletons to combined singleton output file
        with open(outputs[2], "w") as output_file:
            subprocess.run(["cat", outputs[3]],
                           stdout=output_file,
                           stderr=open(log[0], "a"))

    # Otherwise, if both paired read files were dummy files, create dummy outputs
    else:
        subprocess.run(["touch", outputs[0], outputs[1], outputs[3]])

    # If the singleton read file is non-empty, filter it for host reads
    if not lf.is_empty(inputs.singleton):

        # Perform single-end quality filtering and trimming
        subprocess.run([required_programs["trimmer"], "SE", inputs.singleton, outputs[4]] + trimming_parameters,
                       stdout=open(log[0], "a"),
                       stderr=subprocess.STDOUT)

        # If we quality filtered the paired-end reads, add singletons to combined singleton output file
        if not lf.is_empty(inputs.forward) or not lf.is_empty(inputs.reverse):
            with open(outputs[2], "a") as output_file:
                subprocess.run(["cat", outputs[4]],
                               stdout=output_file,
                               stderr=open(log[0], "a"))

        # Otherwise, the combined singleton output file is the same as the results of the singleton quality filtering
        else:
            with open(outputs[2], "w") as output_file:
                subprocess.run(["cat", outputs[4]],
                               stdout=output_file,
                               stderr=open(log[0], "a"))

    # Otherwise, if the singleton read file was a dummy file, create dummy outputs
    else:
        subprocess.run(["touch", outputs[2], outputs[4]])
Пример #11
0
def default(inputs, outputs, wildcards, log):
    """
    Default FASTQ summary operations.

    :param inputs: Object containing the input file names
    :param outputs: Dictionary containing the output file names
    :param wildcards: Wildcards determined from input file name patterns
    :param log: The log file
    :return: None.
    """

    # If either of the paired read files are non-empty, filter them for duplicate reads
    if not lf.is_empty(inputs.forward) or not lf.is_empty(inputs.reverse):

        # Convert the paired read files to SAM format
        paired_sam = inputs.forward + ".paired.sam"
        subprocess.run([required_programs["picard"], non_essential_params["fastq_to_sam"], "F1=%s" % inputs.forward, "F2=%s" % inputs.reverse, "O=%s" % paired_sam, "V=%s" % non_essential_params["quality_format"], "SO=%s" % non_essential_params["sort_order"], "SM=%s" % wildcards.sample],
                       stdout=open(log[0], "a"),
                       stderr=subprocess.STDOUT)

        # Mark duplicates
        marked_output = inputs.forward + ".marked.sam"
        subprocess.run([required_programs["picard"], non_essential_params["mark_duplicates"], "I=%s" % paired_sam, "O=%s" % marked_output, "M=%s" % outputs[5]])
        subprocess.run(["rm", paired_sam],
                       stdout=open(log[0], "a"),
                       stderr=subprocess.STDOUT)

        # Convert the marked output to a parse-able format
        formatted_marked_output = inputs.forward + ".samview"
        with open(formatted_marked_output, "w") as formatted_marked_output_file:
            subprocess.run([required_programs["samtools"], "view", marked_output],
                           stdout=formatted_marked_output_file,
                           stderr=open(log[0], "a"))
        subprocess.run(["rm", marked_output])

        # Extract duplicate reads from the formatted marked output
        with open(outputs[3], "w") as marked_read_file:
            subprocess.run([fo.source_directory + "extract_duplicates.py", formatted_marked_output],
                           stdout=marked_read_file,
                           stderr=open(log[0], "a"))

        # Remove the marked reads from the original FASTQs
        with open(outputs[0], "w") as forward_output:
            subprocess.run([fo.source_directory + "remove_marked_reads.py", outputs[3], inputs.forward],
                           stdout=forward_output,
                           stderr=open(log[0], "a"))
        with open(outputs[1], "w") as reverse_output:
            subprocess.run([fo.source_directory + "remove_marked_reads.py", outputs[3], inputs.reverse],
                           stdout=reverse_output,
                           stderr=open(log[0], "a"))

    # Otherwise, if both paired read files were dummy files, create dummy outputs
    else:
        subprocess.run(["touch", outputs[0], outputs[1], outputs[3], outputs[5]])

    # If the singleton read file is non-empty, filter it for duplicate reads
    if not lf.is_empty(inputs.singleton):

        # Convert the singleton read file to SAM format
        singleton_sam = inputs.forward + ".singleton.sam"
        subprocess.run([required_programs["picard"], non_essential_params["fastq_to_sam"], "F1=%s" % inputs.forward, "O=%s" % singleton_sam, "V=%s" % non_essential_params["quality_format"], "SO=%s" % non_essential_params["sort_order"], "SM=%s" % wildcards.sample],
                       stdout=open(log[0], "a"),
                       stderr=subprocess.STDOUT)

        # Mark duplicates
        marked_output = inputs.singleton + ".marked.sam"
        subprocess.run([required_programs["picard"], non_essential_params["mark_duplicates"], "I=%s" % singleton_sam, "O=%s" % marked_output, "M=%s" % outputs[6]],
                       stdout=open(log[0], "a"),
                       stderr=subprocess.STDOUT)
        subprocess.run(["rm", singleton_sam])

        # Convert the marked output to a parse-able format
        formatted_marked_output = inputs.singleton + ".samview"
        with open(formatted_marked_output, "w") as formatted_marked_output_file:
            subprocess.run([required_programs["samtools"], "view", marked_output],
                           stdout=formatted_marked_output_file,
                           stderr=open(log[0], "a"))
        subprocess.run(["rm", marked_output])

        # Extract duplicate reads from the formatted marked output
        with open(outputs[4], "w") as marked_read_file:
            subprocess.run([fo.source_directory + "extract_duplicates.py", formatted_marked_output],
                           stdout=marked_read_file,
                           stderr=open(log[0], "a"))

        # Remove the marked reads from the original FASTQs
        with open(outputs[2], "w") as singleton_output:
            subprocess.run([fo.source_directory + "remove_marked_reads.py", outputs[4], inputs.singleton],
                           stdout=singleton_output,
                           stderr=open(log[0], "a"))

    # Otherwise, if the singleton read file was a dummy file, create dummy outputs
    else:
        subprocess.run(["touch", outputs[2], outputs[4], outputs[6]])