示例#1
0
def merge(assembled_gtfs, ref_file, gtf_file, num_cores, data):
    """
    run cuffmerge on a set of assembled GTF files
    """
    assembled_file = tempfile.NamedTemporaryFile(delete=False).name
    with open(assembled_file, "w") as temp_handle:
        for assembled in assembled_gtfs:
            temp_handle.write(assembled + "\n")
    out_dir = os.path.join("assembly", "cuffmerge")
    merged_file = os.path.join(out_dir, "merged.gtf")
    out_file = os.path.join(out_dir, "assembled.gtf")
    if file_exists(out_file):
        return out_file
    if not file_exists(merged_file):
        with file_transaction(data, out_dir) as tmp_out_dir:
            cmd = ("cuffmerge -o {tmp_out_dir} --ref-gtf {gtf_file} "
                   "--num-threads {num_cores} --ref-sequence {ref_file} "
                   "{assembled_file}")
            cmd = cmd.format(**locals())
            message = ("Merging the following transcript assemblies with "
                       "Cuffmerge: %s" % ", ".join(assembled_gtfs))
            do.run(cmd, message)
    clean, _ = clean_assembly(merged_file)
    fixed = fix_cufflinks_attributes(gtf_file, clean, data)
    classified = annotate_gtf.annotate_novel_coding(fixed, gtf_file, ref_file,
                                                    data)
    filtered = annotate_gtf.cleanup_transcripts(classified, gtf_file, ref_file)
    shutil.move(filtered, out_file)
    return out_file
示例#2
0
def merge(assembled_gtfs, ref_file, gtf_file, num_cores, data):
    """
    run cuffmerge on a set of assembled GTF files
    """
    assembled_file = tempfile.NamedTemporaryFile(delete=False).name
    with open(assembled_file, "w") as temp_handle:
        for assembled in assembled_gtfs:
            temp_handle.write(assembled + "\n")
    out_dir = os.path.join("assembly", "cuffmerge")
    merged_file = os.path.join(out_dir, "merged.gtf")
    out_file = os.path.join(out_dir, "assembled.gtf")
    if file_exists(out_file):
        return out_file
    with file_transaction(data, out_dir) as tmp_out_dir:
        cmd = ("cuffmerge -o {tmp_out_dir} --ref-gtf {gtf_file} "
               "--num-threads {num_cores} --ref-sequence {ref_file} "
               "{assembled_file}")
        cmd = cmd.format(**locals())
        do.run(cmd, "Merging transcript assemblies with reference.")
    clean, _ = clean_assembly(merged_file)
    fixed = fix_cufflinks_attributes(gtf_file, clean, data)
    classified = annotate_gtf.annotate_novel_coding(fixed, gtf_file, ref_file)
    filtered = annotate_gtf.cleanup_transcripts(classified, gtf_file, ref_file)
    os.rename(filtered, out_file)
    return out_file