Exemplo n.º 1
0
def main():
    sorfs = []
    pres = []
    num_ref = 0
    detect = 0
    for sorf in Gff3Parser().entries(open(args.benchmark_file)):
        num_ref += 1
        sorfs.append(sorf)
    for pre in Gff3Parser().entries(open(args.predict_file)):
        pres.append(pre)
    for sorf in sorfs:
        for pre in pres:
            if pre.strand == sorf.strand:
                if ((pre.start >= sorf.start) and (pre.end <= sorf.end)) or (
                    (pre.start <= sorf.start) and (pre.end >= sorf.end)) or (
                        (pre.start >= sorf.start) and
                        (pre.start <= sorf.end) and (pre.end >= sorf.end)) or (
                            (pre.start <= sorf.start) and
                            (pre.end >= sorf.start) and (pre.end <= sorf.end)):
                    detect += 1
                    sorf.attributes["detect"] = True
                    break
    print("the number of known sORFs which can be detected by ANNOgesic:" +
          str(detect))
    print("the total number of known sORFs:" + str(num_ref))
    print("the detection rate:" + str(float(detect) / float(num_ref)))
Exemplo n.º 2
0
def main():
    trans = {}
    pres = []
    total = 0
    detect = 0
    for entry in Gff3Parser().entries(open(args.predict_file)):
        pres.append(entry)
    fh = open(args.ecocyc_file, "r")
    for row in csv.reader(fh, delimiter='\t'):
        if row[0] not in trans.keys():
            total += 1
            trans[row[0]] = {"start": int(row[1]), "end": int(row[2])}
        else:
            if int(row[1]) < trans[row[0]]["start"]:
                trans[row[0]]["start"] = int(row[1])
            if int(row[2]) > trans[row[0]]["end"]:
                trans[row[0]]["end"] = int(row[2])
    for ref in trans.values():
        for pre in pres:
            if ((pre.start >= ref["start"]) and (
                 pre.end <= ref["end"])) or (
                (pre.start <= ref["start"]) and (
                 pre.end >= ref["end"])) or (
                (pre.start >= ref["start"]) and (
                 pre.start <= ref["end"]) and (
                 pre.end >= ref["end"])) or (
                (pre.start <= ref["start"]) and (
                 pre.end >= ref["start"]) and (
                 pre.end <= ref["end"])):
                detect += 1
                break
    print("the number of published transcripts which can be detected by ANNOgesic:" + str(detect))
    print("total number of transcripts in EcoCyc:" + str(total))
    print("detection rate:" + str(float(detect)/float(total)))
Exemplo n.º 3
0
def main():
    terms = []
    detect = 0
    total = 0
    fh = open(args.regulondb_file, "r")
    for row in csv.reader(fh, delimiter='\t'):
        if row[3] == "forward":
            strand = "+"
        else:
            strand = "-"
        total += 1
        terms.append({"id": row[0], "start": int(row[1]),
                      "end": int(row[2]), "strand": strand})
        if row[3] == "both":
            terms.append({"id": row[0], "start": int(row[1]),
                          "end": int(row[2]), "strand": "+"})
            total += 1
    for pre in Gff3Parser().entries(open(args.predict_file)):
        for ref in terms:
            if pre.strand == ref["strand"]:
                if ((pre.start >= ref["start"]) and (
                     pre.end <= ref["end"])) or (
                    (pre.start <= ref["start"]) and (
                     pre.end >= ref["end"])) or (
                    (pre.start >= ref["start"]) and (
                     pre.start <= ref["end"]) and (
                     pre.end >= ref["end"])) or (
                    (pre.start <= ref["start"]) and (
                     pre.end >= ref["start"]) and (
                     pre.end <= ref["end"])):
                    detect += 1
                    break
    print("the number of published terminators which can be detected by ANNOgesic:" + str(detect))
    print("total number of terminators in RegulonDB:" + str(total))
    print("detection rate:" + str(float(detect)/float(total)))
Exemplo n.º 4
0
def main():
    terms = []
    detect = 0
    total = 0
    fh = open(args.ecocyc_file, "r")
    for row in csv.reader(fh, delimiter='\t'):
        if len(row) >= 4:
            total += 1
            terms.append({"id": row[0], "start": int(row[1]),
                          "end": int(row[2])})
    tot_term = 0
    for pre in Gff3Parser().entries(open(args.predict_file)):
        tot_term += 1
        for ref in terms:
            if ((pre.start >= ref["start"]) and (
                 pre.end <= ref["end"])) or (
                (pre.start <= ref["start"]) and (
                 pre.end >= ref["end"])) or (
                (pre.start >= ref["start"]) and (
                 pre.start <= ref["end"]) and (
                 pre.end >= ref["end"])) or (
                (pre.start <= ref["start"]) and (
                 pre.end >= ref["start"]) and (
                 pre.end <= ref["end"])):
                detect += 1
                break
    print("the number of published terminators can be detected by ANNOgesic:" + str(detect))
    print("total number of terminators in EcoCyc:" + str(total))
    print("detection rate:" + str(float(detect)/float(total)))
Exemplo n.º 5
0
def main():
    pros = {}
    tsss = []
    total = 0
    detect = 0
    for entry in Gff3Parser().entries(open(args.predict_file)):
        tsss.append(entry)
    fh = open(args.regulondb_file, "r")
    for row in csv.reader(fh, delimiter='\t'):
        if (not row[0].startswith("#")) and (row[-1] != "weak"):
            total += 1
            if row[5] == "forward":
                strand = "+"
            else:
                strand = "-"
            pros[row[1]] = {"start": int(row[3]), "strand": strand}
    for ref in pros.values():
        for pre in tsss:
            if pre.strand == ref["strand"]:
                if (math.fabs(ref["start"] - pre.start) <= args.fuzzy):
                    detect += 1
                    break
    print("the number of published TSSs which can be detected by ANNOgesic:" + str(detect))
    print("the total number of TSSs from Mendoza-Vargas in Regulon DB" + str(total))
    print("detection rate:" + str(float(detect)/float(total)))
Exemplo n.º 6
0
def main():
    pres = []
    for entry in Gff3Parser().entries(open(args.predict_file)):
        pres.append(entry)
    refs = []
    num_ref = 0
    #    fh = open(args.refseq_file, "r")
    #    for row in csv.reader(fh, delimiter='\t'):
    #        num_ref += 1
    #        refs.append({"start": int(row[0]), "end": int(row[1]), "strand": row[2]})
    for entry in Gff3Parser().entries(open(args.refseq_file)):
        if entry.feature == "ncRNA":
            num_ref += 1
            refs.append(entry)
    detect = 0
    for ref in refs:
        for pre in pres:
            #            if pre.strand == ref["strand"]:
            #                if ((pre.start >= ref["start"]) and (
            #                     pre.end <= ref["end"])) or (
            #                    (pre.start <= ref["start"]) and (
            #                     pre.end >= ref["end"])) or (
            #                    (pre.start >= ref["start"]) and (
            #                     pre.start <= ref["end"]) and (
            #                     pre.end >= ref["end"])) or (
            #                    (pre.start <= ref["start"]) and (
            #                     pre.end >= ref["start"]) and (
            #                     pre.end <= ref["end"])):
            if pre.strand == ref.strand:
                if ((pre.start >= ref.start) and (pre.end <= ref.end)) or (
                    (pre.start <= ref.start) and (pre.end >= ref.end)) or (
                        (pre.start >= ref.start) and (pre.start <= ref.end) and
                        (pre.end >= ref.end)) or ((pre.start <= ref.start) and
                                                  (pre.end >= ref.start) and
                                                  (pre.end <= ref.end)):
                    detect += 1
                    break
    print("the number of published sRNAs which can be detected by ANNOgesic:" +
          str(detect))
    print("total number of sRNAs in RefSeq:" + str(num_ref))
    print("detection rate:" + str(float(detect) / float(num_ref)))
Exemplo n.º 7
0
def main():
    pros = {}
    tsss = []
    total = 0
    detect = 0
    for entry in Gff3Parser().entries(open(args.predict_file)):
        tsss.append(entry)
    refs = []
    fh = open(args.regulondb_file, "r")
    for row in csv.reader(fh, delimiter='\t'):
        if not row[0].startswith("#"):
            if row[5] == "forward":
                strand = "+"
            else:
                strand = "-"
            total += 1
            refs.append({"start": int(row[0]),
                          "end": int(row[1]), "strand": strand})
    for ref in refs:
        ref["start"] = ref["start"] - args.fuzzy
        ref["end"] = ref["end"] + args.fuzzy
        for pre in tsss:
            if pre.strand == ref["strand"]:
                if ((pre.start >= ref["start"]) and (
                     pre.end <= ref["end"])) or (
                    (pre.start <= ref["start"]) and (
                     pre.end >= ref["end"])) or (
                    (pre.start >= ref["start"]) and (
                     pre.start <= ref["end"]) and (
                     pre.end >= ref["end"])) or (
                    (pre.start <= ref["start"]) and (
                     pre.end >= ref["start"]) and (
                     pre.end <= ref["end"])):
                    detect += 1
                    break
    print("the number of reported TSSs which can be detected by ANNOgesic:" + str(detect))
    print("total number of TSSs from Salgado et. al in RegulonDB:" + str(detect))
    print("detection rate:" + str(float(detect)/float(total)))
Exemplo n.º 8
0
def main():
    utr5s = {}
    utr3s = {}
    detect5 = 0
    detect3 = 0
    total5 = 0
    total3 = 0
    fh = open(args.regulondb_file, "r")
    for row in csv.reader(fh, delimiter='\t'):
        if not row[0].startswith("#"):
            if row[4] == "forward":
                strand = "+"
            else:
                strand = "-"
            if len(row[9]) != 0:
                if row[0] not in utr5s.keys():
                    total5 += 1
                    start = int(row[9].split("-")[0])
                    end = int(row[9].split("-")[-1])
                    utr5s[row[0]] = {
                        "start": start,
                        "end": end,
                        "strand": strand
                    }
                else:
                    start = int(row[9].split("-")[0])
                    end = int(row[9].split("-")[-1])
                    if start < utr5s[row[0]]["start"]:
                        utr5s[row[0]]["start"] = start
                    if end < utr5s[row[0]]["end"]:
                        utr5s[row[0]]["end"] = end
            if len(row[11]) != 0:
                if row[0] not in utr3s.keys():
                    total3 += 1
                    start = int(row[11].split("-")[0])
                    end = int(row[11].split("-")[-1])
                    utr3s[row[0]] = {
                        "start": start,
                        "end": end,
                        "strand": strand
                    }
                else:
                    start = int(row[11].split("-")[0])
                    end = int(row[11].split("-")[-1])
                    if start < utr3s[row[0]]["start"]:
                        utr3s[row[0]]["start"] = start
                    if end < utr3s[row[0]]["end"]:
                        utr3s[row[0]]["end"] = end
    for pre in Gff3Parser().entries(open(args.predict5_file)):
        for ref in utr5s.values():
            if pre.strand == ref["strand"]:
                if ((pre.start >= ref["start"]) and
                    (pre.end <= ref["end"])) or (
                        (pre.start <= ref["start"]) and
                        (pre.end >= ref["end"])) or (
                            (pre.start >= ref["start"]) and
                            (pre.start <= ref["end"]) and
                            (pre.end >= ref["end"])) or (
                                (pre.start <= ref["start"]) and
                                (pre.end >= ref["start"]) and
                                (pre.end <= ref["end"])):
                    detect5 += 1
                    break
    for pre in Gff3Parser().entries(open(args.predict3_file)):
        for ref in utr3s.values():
            if pre.strand == ref["strand"]:
                if ((pre.start >= ref["start"]) and
                    (pre.end <= ref["end"])) or (
                        (pre.start <= ref["start"]) and
                        (pre.end >= ref["end"])) or (
                            (pre.start >= ref["start"]) and
                            (pre.start <= ref["end"]) and
                            (pre.end >= ref["end"])) or (
                                (pre.start <= ref["start"]) and
                                (pre.end >= ref["start"]) and
                                (pre.end <= ref["end"])):
                    detect3 += 1
                    break
    print(
        "the number of published 5'UTRs which can be detected by ANNOgesic:" +
        str(detect5))
    print(
        "the number of published 5'UTRs which can be detected by ANNOgesic:" +
        str(detect3))
    print("total number of 5'UTRs in RegulonDB:" + str(total5))
    print("total number of 5'UTRs in RegulonDB:" + str(total3))
    print("detection rate:" + str(float(detect5) / float(total5)))
    print("detection rate:" + str(float(detect3) / float(total3)))
Exemplo n.º 9
0
                           editable=True, height=4000, width=5000)
    return data_table

def main():
    gos = {}
    goh = open(args.go_file, "r")
    for row in csv.reader(goh, delimiter='\t'):
        gos[row[0]] = row[1].split(";")
    genes = []
    names = {}
    srnas = []
    nh = open(args.name_file, "r")
    for row in csv.reader(nh, delimiter='\t'):
        names[row[0]] = row[3]
    gh = open(args.gff_file, "r")
    for entry in Gff3Parser().entries(gh):
        if entry.feature == "gene":
            genes.append(entry)
        elif entry.feature == "ncRNA":
            srnas.append(entry)
    for srna in srnas:
        members = {"role": [], "feature": [], "start": [], "end": [],
                   "strand": [], "link": [], "gene_name": [], "GO": [],
                   "cc": [], "pMEM_OD_0.2": [], "pMEM_OD_0.5": [],
                   "pMEM_OD_1": [], "pMEM_t0": [], "pMEM_t1": [], "pMEM_t2": [],
                   "pMEM_ON": [], "TSB_OD_0.2": [], "TSB_OD_0.5": [],
                   "TSB_OD_1": [], "TSB_t0": [], "TSB_t1": [], "TSB_t2": [],
                   "TSB_ON": []}
        exps ={}
        infos = []
        print("_".join([srna.feature, str(srna.start),
Exemplo n.º 10
0
def main():
    gffs = {}
    names = {}
    quants = []
    if args.features == "all":
        output_file("main.html", mode='inline')
    data = {"link": [], "features": [], "start": [], "end": [], "strand": [],
            "gene_name": [], "pMEM_OD_0.2": [], "pMEM_OD_0.5": [],
            "pMEM_OD_1": [], "pMEM_t0": [], "pMEM_t1": [], "pMEM_t2": [],
            "pMEM_ON": [], "TSB_OD_0.2": [], "TSB_OD_0.5": [],
            "TSB_OD_1": [], "TSB_t0": [], "TSB_t1": [], "TSB_t2": [],
            "TSB_ON": [], "pMEM_OD_0.2_TEX": [], "pMEM_OD_0.5_TEX": [],
            "pMEM_OD_1_TEX": [], "pMEM_t0_TEX": [], "pMEM_t1_TEX": [],
            "pMEM_t2_TEX": [], "pMEM_ON_TEX": [], "TSB_OD_0.2_TEX": [],
            "TSB_OD_0.5_TEX": [], "TSB_OD_1_TEX": [], "TSB_t0_TEX": [],
            "TSB_t1_TEX": [], "TSB_t2_TEX": [], "TSB_ON_TEX": [],
            "frag": [], "tss": [], "ps": [], "term": [], "tran": []}
    nh = open(args.name_file, "r")
    for row in csv.reader(nh, delimiter='\t'):
        names[row[0]] = row[3]
    fh = open(args.gene_quanti_file, "r")
    for row in csv.reader(fh, delimiter='\t'):
        if not row[0].startswith("Orientation"):
            quants.append(row)
    gff_f = open(args.input_file, "r")
    for entry in Gff3Parser().entries(gff_f):
        if entry.feature not in gffs.keys():
            gffs[entry.feature] = []
        gffs[entry.feature].append(entry)
    for feature in gffs.keys():
        if (feature == args.features) or (args.features == "all"):
            if feature == "riboswitch":
                gen_html("riboswitch.html")
                get_rfam_info(gffs["riboswitch"], gffs, data, quants,
                              "riboswitch")
            elif feature == "RNA_thermometer":
                gen_html("RNA_thermometer.html")
                get_rfam_info(gffs["RNA_thermometer"], gffs, data,
                              quants, "RNA_thermometer")
            elif feature == "CRISPR":
                gen_html("CRISPR.html")
                get_crispr_info(gffs["CRISPR"], gffs, data, quants)
            elif feature == "tRNA":
                gen_html("tRNA.html")
                get_rna_info(gffs["tRNA"], gffs, data, quants, "tRNA")
            elif feature == "rRNA":
                gen_html("rRNA.html")
                get_rna_info(gffs["rRNA"], gffs, data, quants, "rRNA")
            elif feature == "sORF":
                gen_html("sORF.html")
                get_sorf_info(gffs["sORF"], gffs, data, quants)
            elif feature == "ncRNA":
                gen_html("ncRNA.html")
                get_srna_info(gffs["ncRNA"], gffs, data, quants)
            elif feature == "CDS":
                gen_html("CDS.html")
                get_cds_info(gffs["CDS"], gffs, data, quants, names)
    source = ColumnDataSource(data)
    columns = gen_column()
    data_table = DataTable(source=source, columns=columns, height=3000, width=6000)
    save(widgetbox(data_table))