def merge(assembled_gtfs, ref_file, gtf_file, num_cores, data): """ run cuffmerge on a set of assembled GTF files """ assembled_file = tempfile.NamedTemporaryFile(delete=False).name with open(assembled_file, "w") as temp_handle: for assembled in assembled_gtfs: temp_handle.write(assembled + "\n") out_dir = os.path.join("assembly", "cuffmerge") merged_file = os.path.join(out_dir, "merged.gtf") out_file = os.path.join(out_dir, "assembled.gtf") if file_exists(out_file): return out_file if not file_exists(merged_file): with file_transaction(data, out_dir) as tmp_out_dir: cmd = ("cuffmerge -o {tmp_out_dir} --ref-gtf {gtf_file} " "--num-threads {num_cores} --ref-sequence {ref_file} " "{assembled_file}") cmd = cmd.format(**locals()) message = ("Merging the following transcript assemblies with " "Cuffmerge: %s" % ", ".join(assembled_gtfs)) do.run(cmd, message) clean, _ = clean_assembly(merged_file) fixed = fix_cufflinks_attributes(gtf_file, clean, data) classified = annotate_gtf.annotate_novel_coding(fixed, gtf_file, ref_file, data) filtered = annotate_gtf.cleanup_transcripts(classified, gtf_file, ref_file) shutil.move(filtered, out_file) return out_file
def merge(assembled_gtfs, ref_file, gtf_file, num_cores, data): """ run cuffmerge on a set of assembled GTF files """ assembled_file = tempfile.NamedTemporaryFile(delete=False).name with open(assembled_file, "w") as temp_handle: for assembled in assembled_gtfs: temp_handle.write(assembled + "\n") out_dir = os.path.join("assembly", "cuffmerge") merged_file = os.path.join(out_dir, "merged.gtf") out_file = os.path.join(out_dir, "assembled.gtf") if file_exists(out_file): return out_file with file_transaction(data, out_dir) as tmp_out_dir: cmd = ("cuffmerge -o {tmp_out_dir} --ref-gtf {gtf_file} " "--num-threads {num_cores} --ref-sequence {ref_file} " "{assembled_file}") cmd = cmd.format(**locals()) do.run(cmd, "Merging transcript assemblies with reference.") clean, _ = clean_assembly(merged_file) fixed = fix_cufflinks_attributes(gtf_file, clean, data) classified = annotate_gtf.annotate_novel_coding(fixed, gtf_file, ref_file) filtered = annotate_gtf.cleanup_transcripts(classified, gtf_file, ref_file) os.rename(filtered, out_file) return out_file