示例#1
0
def macs2_call_chipseq_peak_job(signal_samples, control_samples, output_dir,
                                name):
    """
    Call ChIP-seq peaks with MACS2 in a slurm job.

    :param list signal_samples: Signal Sample objects.
    :param list control_samples: Background Sample objects.
    :param list output_dir: Parent directory where MACS2 outputs will be stored.
    :param str name: Name of the MACS2 comparison being performed.
    """
    from pypiper.ngstk import NGSTk
    import textwrap

    tk = NGSTk()

    output_path = os.path.join(output_dir, name)

    if not os.path.exists(output_path):
        os.mkdir(output_path)

    job_name = "macs2_{}".format(name)

    # Build job script
    # slurm header
    cmd = tk.slurm_header(job_name,
                          os.path.join(output_path, job_name + ".log"),
                          cpus_per_task=4)

    # load macs2
    cmd += """
\t\t/home/arendeiro/.local/bin/macs2 callpeak -t {0} -c {1} -n {2} --outdir {3}
""".format(" ".join([s.mapped for s in signal_samples]),
           " ".join([s.mapped for s in control_samples]), name, output_path)

    # Slurm footer
    cmd += "\t\t" + tk.slurm_footer() + "\n"

    # Write job to file
    job_file = os.path.join(output_path, name + ".sh")
    with open(job_file, "w") as handle:
        handle.write(textwrap.dedent(cmd))

    # Submit
    tk.slurm_submit_job(job_file)
示例#2
0
def homer_call_chipseq_peak_job(signal_samples, control_samples, output_dir,
                                name):
    """
    Call ChIP-seq peaks with MACS2 in a slurm job.

    :param list signal_samples: Signal Sample objects.
    :param list control_samples: Background Sample objects.
    :param list output_dir: Parent directory where MACS2 outputs will be stored.
    :param str name: Name of the MACS2 comparison being performed.
    """
    from pypiper.ngstk import NGSTk
    import textwrap

    tk = NGSTk()

    output_path = os.path.join(output_dir, name)

    if not os.path.exists(output_path):
        os.mkdir(output_path)

    job_name = "homer_findPeaks_{}".format(name)

    # Build job script
    # slurm header
    cmd = tk.slurm_header(job_name,
                          os.path.join(output_path, job_name + ".log"),
                          cpus_per_task=4)

    # make tag directory for the signal samples
    signal_tag_directory = os.path.join(output_dir,
                                        "homer_tag_dir_" + name + "_signal")
    cmd += """
\t\tmakeTagDirectory {0} {1}
    """.format(signal_tag_directory,
               " ".join([s.filtered for s in signal_samples]))

    # make tag directory for the background samples
    background_tag_directory = os.path.join(
        output_dir, "homer_tag_dir_" + name + "_background")
    cmd += """
\t\tmakeTagDirectory {0} {1}
    """.format(background_tag_directory,
               " ".join([s.filtered for s in control_samples]))

    # call peaks
    output_file = os.path.join(output_dir, name,
                               name + "_homer_peaks.factor.narrowPeak")
    if not os.path.exists(os.path.join(output_dir, name)):
        os.makedirs(os.path.join(output_dir, name))
    cmd += """
\t\tfindPeaks {signal} -style factor -o {output_file} -i {background}
""".format(output_file=output_file,
           background=background_tag_directory,
           signal=signal_tag_directory)

    output_file = os.path.join(output_dir, name,
                               name + "_homer_peaks.histone.narrowPeak")
    if not os.path.exists(os.path.join(output_dir, name)):
        os.makedirs(os.path.join(output_dir, name))
    cmd += """
\t\tfindPeaks {signal} -style histone -o {output_file} -i {background}
""".format(output_file=output_file,
           background=background_tag_directory,
           signal=signal_tag_directory)

    # Slurm footer
    cmd += "\t\t" + tk.slurm_footer() + "\n"

    # Write job to file
    job_file = os.path.join(output_path, name + ".sh")
    with open(job_file, "w") as handle:
        handle.write(textwrap.dedent(cmd))

    # Submit
    tk.slurm_submit_job(job_file)