def main(): sorfs = [] pres = [] num_ref = 0 detect = 0 for sorf in Gff3Parser().entries(open(args.benchmark_file)): num_ref += 1 sorfs.append(sorf) for pre in Gff3Parser().entries(open(args.predict_file)): pres.append(pre) for sorf in sorfs: for pre in pres: if pre.strand == sorf.strand: if ((pre.start >= sorf.start) and (pre.end <= sorf.end)) or ( (pre.start <= sorf.start) and (pre.end >= sorf.end)) or ( (pre.start >= sorf.start) and (pre.start <= sorf.end) and (pre.end >= sorf.end)) or ( (pre.start <= sorf.start) and (pre.end >= sorf.start) and (pre.end <= sorf.end)): detect += 1 sorf.attributes["detect"] = True break print("the number of known sORFs which can be detected by ANNOgesic:" + str(detect)) print("the total number of known sORFs:" + str(num_ref)) print("the detection rate:" + str(float(detect) / float(num_ref)))
def main(): trans = {} pres = [] total = 0 detect = 0 for entry in Gff3Parser().entries(open(args.predict_file)): pres.append(entry) fh = open(args.ecocyc_file, "r") for row in csv.reader(fh, delimiter='\t'): if row[0] not in trans.keys(): total += 1 trans[row[0]] = {"start": int(row[1]), "end": int(row[2])} else: if int(row[1]) < trans[row[0]]["start"]: trans[row[0]]["start"] = int(row[1]) if int(row[2]) > trans[row[0]]["end"]: trans[row[0]]["end"] = int(row[2]) for ref in trans.values(): for pre in pres: if ((pre.start >= ref["start"]) and ( pre.end <= ref["end"])) or ( (pre.start <= ref["start"]) and ( pre.end >= ref["end"])) or ( (pre.start >= ref["start"]) and ( pre.start <= ref["end"]) and ( pre.end >= ref["end"])) or ( (pre.start <= ref["start"]) and ( pre.end >= ref["start"]) and ( pre.end <= ref["end"])): detect += 1 break print("the number of published transcripts which can be detected by ANNOgesic:" + str(detect)) print("total number of transcripts in EcoCyc:" + str(total)) print("detection rate:" + str(float(detect)/float(total)))
def main(): terms = [] detect = 0 total = 0 fh = open(args.regulondb_file, "r") for row in csv.reader(fh, delimiter='\t'): if row[3] == "forward": strand = "+" else: strand = "-" total += 1 terms.append({"id": row[0], "start": int(row[1]), "end": int(row[2]), "strand": strand}) if row[3] == "both": terms.append({"id": row[0], "start": int(row[1]), "end": int(row[2]), "strand": "+"}) total += 1 for pre in Gff3Parser().entries(open(args.predict_file)): for ref in terms: if pre.strand == ref["strand"]: if ((pre.start >= ref["start"]) and ( pre.end <= ref["end"])) or ( (pre.start <= ref["start"]) and ( pre.end >= ref["end"])) or ( (pre.start >= ref["start"]) and ( pre.start <= ref["end"]) and ( pre.end >= ref["end"])) or ( (pre.start <= ref["start"]) and ( pre.end >= ref["start"]) and ( pre.end <= ref["end"])): detect += 1 break print("the number of published terminators which can be detected by ANNOgesic:" + str(detect)) print("total number of terminators in RegulonDB:" + str(total)) print("detection rate:" + str(float(detect)/float(total)))
def main(): terms = [] detect = 0 total = 0 fh = open(args.ecocyc_file, "r") for row in csv.reader(fh, delimiter='\t'): if len(row) >= 4: total += 1 terms.append({"id": row[0], "start": int(row[1]), "end": int(row[2])}) tot_term = 0 for pre in Gff3Parser().entries(open(args.predict_file)): tot_term += 1 for ref in terms: if ((pre.start >= ref["start"]) and ( pre.end <= ref["end"])) or ( (pre.start <= ref["start"]) and ( pre.end >= ref["end"])) or ( (pre.start >= ref["start"]) and ( pre.start <= ref["end"]) and ( pre.end >= ref["end"])) or ( (pre.start <= ref["start"]) and ( pre.end >= ref["start"]) and ( pre.end <= ref["end"])): detect += 1 break print("the number of published terminators can be detected by ANNOgesic:" + str(detect)) print("total number of terminators in EcoCyc:" + str(total)) print("detection rate:" + str(float(detect)/float(total)))
def main(): pros = {} tsss = [] total = 0 detect = 0 for entry in Gff3Parser().entries(open(args.predict_file)): tsss.append(entry) fh = open(args.regulondb_file, "r") for row in csv.reader(fh, delimiter='\t'): if (not row[0].startswith("#")) and (row[-1] != "weak"): total += 1 if row[5] == "forward": strand = "+" else: strand = "-" pros[row[1]] = {"start": int(row[3]), "strand": strand} for ref in pros.values(): for pre in tsss: if pre.strand == ref["strand"]: if (math.fabs(ref["start"] - pre.start) <= args.fuzzy): detect += 1 break print("the number of published TSSs which can be detected by ANNOgesic:" + str(detect)) print("the total number of TSSs from Mendoza-Vargas in Regulon DB" + str(total)) print("detection rate:" + str(float(detect)/float(total)))
def main(): pres = [] for entry in Gff3Parser().entries(open(args.predict_file)): pres.append(entry) refs = [] num_ref = 0 # fh = open(args.refseq_file, "r") # for row in csv.reader(fh, delimiter='\t'): # num_ref += 1 # refs.append({"start": int(row[0]), "end": int(row[1]), "strand": row[2]}) for entry in Gff3Parser().entries(open(args.refseq_file)): if entry.feature == "ncRNA": num_ref += 1 refs.append(entry) detect = 0 for ref in refs: for pre in pres: # if pre.strand == ref["strand"]: # if ((pre.start >= ref["start"]) and ( # pre.end <= ref["end"])) or ( # (pre.start <= ref["start"]) and ( # pre.end >= ref["end"])) or ( # (pre.start >= ref["start"]) and ( # pre.start <= ref["end"]) and ( # pre.end >= ref["end"])) or ( # (pre.start <= ref["start"]) and ( # pre.end >= ref["start"]) and ( # pre.end <= ref["end"])): if pre.strand == ref.strand: if ((pre.start >= ref.start) and (pre.end <= ref.end)) or ( (pre.start <= ref.start) and (pre.end >= ref.end)) or ( (pre.start >= ref.start) and (pre.start <= ref.end) and (pre.end >= ref.end)) or ((pre.start <= ref.start) and (pre.end >= ref.start) and (pre.end <= ref.end)): detect += 1 break print("the number of published sRNAs which can be detected by ANNOgesic:" + str(detect)) print("total number of sRNAs in RefSeq:" + str(num_ref)) print("detection rate:" + str(float(detect) / float(num_ref)))
def main(): pros = {} tsss = [] total = 0 detect = 0 for entry in Gff3Parser().entries(open(args.predict_file)): tsss.append(entry) refs = [] fh = open(args.regulondb_file, "r") for row in csv.reader(fh, delimiter='\t'): if not row[0].startswith("#"): if row[5] == "forward": strand = "+" else: strand = "-" total += 1 refs.append({"start": int(row[0]), "end": int(row[1]), "strand": strand}) for ref in refs: ref["start"] = ref["start"] - args.fuzzy ref["end"] = ref["end"] + args.fuzzy for pre in tsss: if pre.strand == ref["strand"]: if ((pre.start >= ref["start"]) and ( pre.end <= ref["end"])) or ( (pre.start <= ref["start"]) and ( pre.end >= ref["end"])) or ( (pre.start >= ref["start"]) and ( pre.start <= ref["end"]) and ( pre.end >= ref["end"])) or ( (pre.start <= ref["start"]) and ( pre.end >= ref["start"]) and ( pre.end <= ref["end"])): detect += 1 break print("the number of reported TSSs which can be detected by ANNOgesic:" + str(detect)) print("total number of TSSs from Salgado et. al in RegulonDB:" + str(detect)) print("detection rate:" + str(float(detect)/float(total)))
def main(): utr5s = {} utr3s = {} detect5 = 0 detect3 = 0 total5 = 0 total3 = 0 fh = open(args.regulondb_file, "r") for row in csv.reader(fh, delimiter='\t'): if not row[0].startswith("#"): if row[4] == "forward": strand = "+" else: strand = "-" if len(row[9]) != 0: if row[0] not in utr5s.keys(): total5 += 1 start = int(row[9].split("-")[0]) end = int(row[9].split("-")[-1]) utr5s[row[0]] = { "start": start, "end": end, "strand": strand } else: start = int(row[9].split("-")[0]) end = int(row[9].split("-")[-1]) if start < utr5s[row[0]]["start"]: utr5s[row[0]]["start"] = start if end < utr5s[row[0]]["end"]: utr5s[row[0]]["end"] = end if len(row[11]) != 0: if row[0] not in utr3s.keys(): total3 += 1 start = int(row[11].split("-")[0]) end = int(row[11].split("-")[-1]) utr3s[row[0]] = { "start": start, "end": end, "strand": strand } else: start = int(row[11].split("-")[0]) end = int(row[11].split("-")[-1]) if start < utr3s[row[0]]["start"]: utr3s[row[0]]["start"] = start if end < utr3s[row[0]]["end"]: utr3s[row[0]]["end"] = end for pre in Gff3Parser().entries(open(args.predict5_file)): for ref in utr5s.values(): if pre.strand == ref["strand"]: if ((pre.start >= ref["start"]) and (pre.end <= ref["end"])) or ( (pre.start <= ref["start"]) and (pre.end >= ref["end"])) or ( (pre.start >= ref["start"]) and (pre.start <= ref["end"]) and (pre.end >= ref["end"])) or ( (pre.start <= ref["start"]) and (pre.end >= ref["start"]) and (pre.end <= ref["end"])): detect5 += 1 break for pre in Gff3Parser().entries(open(args.predict3_file)): for ref in utr3s.values(): if pre.strand == ref["strand"]: if ((pre.start >= ref["start"]) and (pre.end <= ref["end"])) or ( (pre.start <= ref["start"]) and (pre.end >= ref["end"])) or ( (pre.start >= ref["start"]) and (pre.start <= ref["end"]) and (pre.end >= ref["end"])) or ( (pre.start <= ref["start"]) and (pre.end >= ref["start"]) and (pre.end <= ref["end"])): detect3 += 1 break print( "the number of published 5'UTRs which can be detected by ANNOgesic:" + str(detect5)) print( "the number of published 5'UTRs which can be detected by ANNOgesic:" + str(detect3)) print("total number of 5'UTRs in RegulonDB:" + str(total5)) print("total number of 5'UTRs in RegulonDB:" + str(total3)) print("detection rate:" + str(float(detect5) / float(total5))) print("detection rate:" + str(float(detect3) / float(total3)))
editable=True, height=4000, width=5000) return data_table def main(): gos = {} goh = open(args.go_file, "r") for row in csv.reader(goh, delimiter='\t'): gos[row[0]] = row[1].split(";") genes = [] names = {} srnas = [] nh = open(args.name_file, "r") for row in csv.reader(nh, delimiter='\t'): names[row[0]] = row[3] gh = open(args.gff_file, "r") for entry in Gff3Parser().entries(gh): if entry.feature == "gene": genes.append(entry) elif entry.feature == "ncRNA": srnas.append(entry) for srna in srnas: members = {"role": [], "feature": [], "start": [], "end": [], "strand": [], "link": [], "gene_name": [], "GO": [], "cc": [], "pMEM_OD_0.2": [], "pMEM_OD_0.5": [], "pMEM_OD_1": [], "pMEM_t0": [], "pMEM_t1": [], "pMEM_t2": [], "pMEM_ON": [], "TSB_OD_0.2": [], "TSB_OD_0.5": [], "TSB_OD_1": [], "TSB_t0": [], "TSB_t1": [], "TSB_t2": [], "TSB_ON": []} exps ={} infos = [] print("_".join([srna.feature, str(srna.start),
def main(): gffs = {} names = {} quants = [] if args.features == "all": output_file("main.html", mode='inline') data = {"link": [], "features": [], "start": [], "end": [], "strand": [], "gene_name": [], "pMEM_OD_0.2": [], "pMEM_OD_0.5": [], "pMEM_OD_1": [], "pMEM_t0": [], "pMEM_t1": [], "pMEM_t2": [], "pMEM_ON": [], "TSB_OD_0.2": [], "TSB_OD_0.5": [], "TSB_OD_1": [], "TSB_t0": [], "TSB_t1": [], "TSB_t2": [], "TSB_ON": [], "pMEM_OD_0.2_TEX": [], "pMEM_OD_0.5_TEX": [], "pMEM_OD_1_TEX": [], "pMEM_t0_TEX": [], "pMEM_t1_TEX": [], "pMEM_t2_TEX": [], "pMEM_ON_TEX": [], "TSB_OD_0.2_TEX": [], "TSB_OD_0.5_TEX": [], "TSB_OD_1_TEX": [], "TSB_t0_TEX": [], "TSB_t1_TEX": [], "TSB_t2_TEX": [], "TSB_ON_TEX": [], "frag": [], "tss": [], "ps": [], "term": [], "tran": []} nh = open(args.name_file, "r") for row in csv.reader(nh, delimiter='\t'): names[row[0]] = row[3] fh = open(args.gene_quanti_file, "r") for row in csv.reader(fh, delimiter='\t'): if not row[0].startswith("Orientation"): quants.append(row) gff_f = open(args.input_file, "r") for entry in Gff3Parser().entries(gff_f): if entry.feature not in gffs.keys(): gffs[entry.feature] = [] gffs[entry.feature].append(entry) for feature in gffs.keys(): if (feature == args.features) or (args.features == "all"): if feature == "riboswitch": gen_html("riboswitch.html") get_rfam_info(gffs["riboswitch"], gffs, data, quants, "riboswitch") elif feature == "RNA_thermometer": gen_html("RNA_thermometer.html") get_rfam_info(gffs["RNA_thermometer"], gffs, data, quants, "RNA_thermometer") elif feature == "CRISPR": gen_html("CRISPR.html") get_crispr_info(gffs["CRISPR"], gffs, data, quants) elif feature == "tRNA": gen_html("tRNA.html") get_rna_info(gffs["tRNA"], gffs, data, quants, "tRNA") elif feature == "rRNA": gen_html("rRNA.html") get_rna_info(gffs["rRNA"], gffs, data, quants, "rRNA") elif feature == "sORF": gen_html("sORF.html") get_sorf_info(gffs["sORF"], gffs, data, quants) elif feature == "ncRNA": gen_html("ncRNA.html") get_srna_info(gffs["ncRNA"], gffs, data, quants) elif feature == "CDS": gen_html("CDS.html") get_cds_info(gffs["CDS"], gffs, data, quants, names) source = ColumnDataSource(data) columns = gen_column() data_table = DataTable(source=source, columns=columns, height=3000, width=6000) save(widgetbox(data_table))