def compute_tss_type(args_srna, cdss, genes, wigs_f, wigs_r): tsss, num_tss = read_tss(args_srna.tss_file) if "TSS_class" not in os.listdir(args_srna.out_folder): os.mkdir(os.path.join(args_srna.out_folder, "TSS_class")) new_tss_file = os.path.join(args_srna.out_folder, "TSS_class", "_".join([args_srna.prefix, "TSS.gff"])) new_tss_fh = open(new_tss_file, "w") num_tss = 0 for tss in tsss: tss_type = compare_tss_cds(tss, cdss, genes) tss.attributes = tss_type[1] tss.attributes["ID"] = "tss" + str(num_tss) tss.attribute_string = "".join([tss_type[0], ";ID=tss", str(num_tss)]) num_tss += 1 wigs_fm = modify_wigs_for_tss_type(wigs_f, "+") wigs_rm = modify_wigs_for_tss_type(wigs_r, "-") final_tsss = fix_primary_type(tsss, wigs_fm, wigs_rm) for tss in final_tsss: tss.attribute_string = ";".join( ["=".join(items) for items in tss.attributes.items()]) new_tss_fh.write("\t".join([ str(field) for field in [ tss.seq_id, tss.source, tss.feature, tss.start, tss.end, tss.score, tss.strand, tss.phase, tss.attribute_string ] ]) + "\n") new_tss_fh.close() wigs_fm = {} wigs_rm = {}
def compute_tss_type(args_srna, cdss, genes, wigs_f, wigs_r): tsss, num_tss = read_tss(args_srna.tss_file) if "TSS_class" not in os.listdir(args_srna.out_folder): os.mkdir(os.path.join(args_srna.out_folder, "TSS_class")) new_tss_file = os.path.join(args_srna.out_folder, "TSS_class", "_".join([args_srna.prefix, "TSS.gff"])) new_tss_fh = open(new_tss_file, "w") num_tss = 0 for tss in tsss: tss_type = compare_tss_cds(tss, cdss, genes) tss.attributes = tss_type[1] tss.attributes["ID"] = "tss" + str(num_tss) tss.attribute_string = "".join([tss_type[0], ";ID=tss", str(num_tss)]) num_tss += 1 wigs_fm = modify_wigs_for_tss_type(wigs_f, "+") wigs_rm = modify_wigs_for_tss_type(wigs_r, "-") final_tsss = fix_primary_type(tsss, wigs_fm, wigs_rm) for tss in final_tsss: tss.attribute_string = ";".join( ["=".join(items) for items in tss.attributes.items()]) new_tss_fh.write("\t".join([str(field) for field in [ tss.seq_id, tss.source, tss.feature, tss.start, tss.end, tss.score, tss.strand, tss.phase, tss.attribute_string]]) + "\n") new_tss_fh.close() wigs_fm = {} wigs_rm = {}
def upstream(tss_file, fasta_file, gff_file, out_class, args_pro, prefix): '''get the upstream sequence of TSS''' if fasta_file is not None: files = { "pri": open("tmp/primary.fa", "w"), "sec": open("tmp/secondary.fa", "w"), "inter": open("tmp/internal.fa", "w"), "anti": open("tmp/antisense.fa", "w"), "orph": open("tmp/orphan.fa", "w") } tsss, seq = read_data(tss_file, fasta_file) num_tss = 0 if not args_pro.source: out = open(out_class, "w") out.write("##gff-version 3\n") cdss, genes = read_gff(gff_file) for tss in tsss: if ("type" not in tss.attributes.keys()) and (args_pro.source): print("Error: The TSS gff file may not generated from ANNOgesic." "Please run with --tss_source!") sys.exit() if args_pro.source: name = ">" + "_".join([str(tss.start), tss.strand, tss.seq_id]) print_fasta(seq, tss, files, name, args_pro.nt_before) else: tss_type = compare_tss_cds(tss, cdss, genes) tss.attributes = tss_type[1] tss.attributes["ID"] = tss.seq_id + "_tss" + str(num_tss) tss.attribute_string = "".join( [tss_type[0], ";ID=", tss.seq_id, "_tss", str(num_tss)]) num_tss += 1 if not args_pro.source: if args_pro.tex_wigs is not None: libs, texs = read_libs(args_pro.input_libs, args_pro.tex_wigs) wigs_f = read_wig( os.path.join(args_pro.wig_path, prefix + "_forward.wig"), "+", libs) wigs_r = read_wig( os.path.join(args_pro.wig_path, prefix + "_reverse.wig"), "+", libs) else: wigs_f = None wigs_r = None sort_tsss = sorted(tsss, key=lambda k: (k.seq_id, k.start, k.end, k.strand)) final_tsss = fix_primary_type(sort_tsss, wigs_f, wigs_r) for tss in final_tsss: name = ">" + "_".join([str(tss.start), tss.strand, tss.seq_id]) tss.attribute_string = ";".join( ["=".join(items) for items in tss.attributes.items()]) out.write("\t".join([ str(field) for field in [ tss.seq_id, tss.source, tss.feature, tss.start, tss.end, tss.score, tss.strand, tss.phase, tss.attribute_string ] ]) + "\n") if fasta_file is not None: print_fasta(seq, tss, files, name, args_pro.nt_before)
def upstream(tss_file, fasta_file, gff_file, out_class, args_pro, prefix): '''get the upstream sequence of TSS''' if fasta_file is not None: files = {"pri": open("tmp/primary.fa", "w"), "sec": open("tmp/secondary.fa", "w"), "inter": open("tmp/internal.fa", "w"), "anti": open("tmp/antisense.fa", "w"), "orph": open("tmp/orphan.fa", "w")} tsss, seq = read_data(tss_file, fasta_file) num_tss = 0 if not args_pro.source: out = open(out_class, "w") out.write("##gff-version 3\n") cdss, genes = read_gff(gff_file) for tss in tsss: if ("type" not in tss.attributes.keys()) and (args_pro.source): print("Error: The TSS gff file may not generated from ANNOgesic." "Please run with --tss_source!") sys.exit() if args_pro.source: name = ">" + "_".join([str(tss.start), tss.strand, tss.seq_id]) print_fasta(seq, tss, files, name, args_pro.nt_before) else: tss_type = compare_tss_cds(tss, cdss, genes) tss.attributes = tss_type[1] tss.attributes["ID"] = tss.seq_id + "_tss" + str(num_tss) tss.attribute_string = "".join([ tss_type[0], ";ID=", tss.seq_id, "_tss", str(num_tss)]) num_tss += 1 if not args_pro.source: if args_pro.tex_wigs is not None: libs, texs = read_libs(args_pro.input_libs, args_pro.tex_wigs) wigs_f = read_wig(os.path.join( args_pro.wig_path, prefix + "_forward.wig"), "+", libs) wigs_r = read_wig(os.path.join( args_pro.wig_path, prefix + "_reverse.wig"), "+", libs) else: wigs_f = None wigs_r = None sort_tsss = sorted(tsss, key=lambda k: (k.seq_id, k.start, k.end, k.strand)) final_tsss = fix_primary_type(sort_tsss, wigs_f, wigs_r) for tss in final_tsss: name = ">" + "_".join([str(tss.start), tss.strand, tss.seq_id]) tss.attribute_string = ";".join( ["=".join(items) for items in tss.attributes.items()]) out.write("\t".join([str(field) for field in [ tss.seq_id, tss.source, tss.feature, tss.start, tss.end, tss.score, tss.strand, tss.phase, tss.attribute_string]]) + "\n") if fasta_file is not None: print_fasta(seq, tss, files, name, args_pro.nt_before)
def upstream(tss_file, fasta_file, gff_file, out_class, args_pro): files = { "pri": open("tmp/primary.fa", "w"), "sec": open("tmp/secondary.fa", "w"), "inter": open("tmp/internal.fa", "w"), "anti": open("tmp/antisense.fa", "w"), "orph": open("tmp/orphan.fa", "w") } tsss, seq = read_data(tss_file, fasta_file) num_tss = 0 if not args_pro.source: out = open(out_class, "w") out.write("##gff-version 3\n") cdss, genes = read_gff(gff_file) for tss in tsss: if args_pro.source is True: name = ">" + "_".join([str(tss.start), tss.strand, tss.seq_id]) print_fasta(seq, tss, files, name, args_pro.nt_before) else: tss_type = compare_tss_cds(tss, cdss, genes) tss.attributes = tss_type[1] tss.attributes["ID"] = "tss" + str(num_tss) tss.attribute_string = "".join( [tss_type[0], ";ID=tss", str(num_tss)]) num_tss += 1 if not args_pro.source: wigs_f = read_wig("tmp/merge_forward.wig", "+") wigs_r = read_wig("tmp/merge_reverse.wig", "-") sort_tsss = sorted(tsss, key=lambda k: (k.seq_id, k.start, k.end, k.strand)) final_tsss = fix_primary_type(sort_tsss, wigs_f, wigs_r) for tss in final_tsss: name = ">" + "_".join([str(tss.start), tss.strand, tss.seq_id]) tss.attribute_string = ";".join( ["=".join(items) for items in tss.attributes.items()]) out.write("\t".join([ str(field) for field in [ tss.seq_id, tss.source, tss.feature, tss.start, tss.end, tss.score, tss.strand, tss.phase, tss.attribute_string ] ]) + "\n") print_fasta(seq, tss, files, name, args_pro.nt_before)