Пример #1
0
 def run_analysis(self):
     correct_count = 0  # numver of real miRNA-gene pairs with common gos
     incorrect_count = 0  #number of random miRNA-gene pairs with common go
     all_tfs = []
     all_mirnas = []
     for pmid in self.tfs:
         all_tfs = []
         all_mirnas = []
         correct_count = 0  # numver of real miRNA-gene pairs with common gos
         incorrect_count = 0  # number of random miRNA-gene pairs with common go
         for tf in self.tfs[pmid]:
             all_tfs.append(tf)
             mirna = None
             for mirna_eid in tf.targets:
                 for m in self.mirnas[pmid]:
                     if m.eid == mirna_eid[0]:
                         mirna = m
                         break
                 all_mirnas.append(mirna)
                 # common_gos = set(tf.go_ids).intersection(set(mirna.go_ids))
                 # if len(common_gos) > 0:
                 #     print "{}->{} common gos:{}".format(tf.text, mirna.text, " ".join(common_gos))
                 #     correct_count += 1
         if len(all_tfs) > 1 and len(all_mirnas) > 1:
             for i in range(0, 10):
                 random_tf = random.choice(all_tfs)
                 random_mirna = random.choice(all_mirnas)
                 common_gos = set(random_tf.go_ids).intersection(
                     set(random_mirna.go_ids))
                 if (random_mirna.eid, "miRNA-gene") in random_tf.targets:
                     #if len(common_gos) > 0:
                     if random_mirna.best_go.startswith(
                             "GO:") and random_tf.best_go.startswith("GO"):
                         ss = ssm.simui_go(random_mirna.best_go,
                                           random_tf.best_go)
                         #print "correct:", ss
                         correct_count += ss
                     else:
                         correct_count += 1
                 else:
                     #if len(common_gos) > 0:
                     if random_mirna.best_go.startswith(
                             "GO:") and random_tf.best_go.startswith("GO"):
                         ss = ssm.simui_go(random_mirna.best_go,
                                           random_tf.best_go)
                         print "incorrect:", ss
                         incorrect_count += ss
                     else:
                         incorrect_count += 1
             print "{}-{} ({} mirnas, {} tfs".format(
                 correct_count, incorrect_count, len(all_mirnas),
                 len(all_tfs))
Пример #2
0
 def run_analysis(self):
     correct_count = 0 # numver of real miRNA-gene pairs with common gos
     incorrect_count = 0 #number of random miRNA-gene pairs with common go
     all_tfs = []
     all_mirnas = []
     for pmid in self.tfs:
         all_tfs = []
         all_mirnas = []
         correct_count = 0  # numver of real miRNA-gene pairs with common gos
         incorrect_count = 0  # number of random miRNA-gene pairs with common go
         for tf in self.tfs[pmid]:
             all_tfs.append(tf)
             mirna = None
             for mirna_eid in tf.targets:
                 for m in self.mirnas[pmid]:
                     if m.eid == mirna_eid[0]:
                         mirna = m
                         break
                 all_mirnas.append(mirna)
                 # common_gos = set(tf.go_ids).intersection(set(mirna.go_ids))
                 # if len(common_gos) > 0:
                 #     print "{}->{} common gos:{}".format(tf.text, mirna.text, " ".join(common_gos))
                 #     correct_count += 1
         if len(all_tfs) > 1 and len(all_mirnas) > 1:
             for i in range(0, 10):
                 random_tf = random.choice(all_tfs)
                 random_mirna = random.choice(all_mirnas)
                 common_gos = set(random_tf.go_ids).intersection(set(random_mirna.go_ids))
                 if (random_mirna.eid, "miRNA-gene") in random_tf.targets:
                     #if len(common_gos) > 0:
                     if random_mirna.best_go.startswith("GO:") and random_tf.best_go.startswith("GO"):
                         ss = ssm.simui_go(random_mirna.best_go, random_tf.best_go)
                         #print "correct:", ss
                         correct_count += ss
                     else:
                         correct_count += 1
                 else:
                     #if len(common_gos) > 0:
                     if random_mirna.best_go.startswith("GO:") and random_tf.best_go.startswith("GO"):
                         ss = ssm.simui_go(random_mirna.best_go, random_tf.best_go)
                         print "incorrect:", ss
                         incorrect_count += ss
                     else:
                         incorrect_count += 1
             print "{}-{} ({} mirnas, {} tfs".format(correct_count, incorrect_count, len(all_mirnas), len(all_tfs))
Пример #3
0
def annotate_corpus_relations(corpus, model, corpuspath):

    logging.info("getting relations...")
    # entities, relations = load_gold_relations(reltype)
    logging.info("finding relations...")
    # print entities.keys()[:20]
    for did in corpus.documents:
        for sentence in corpus.documents[did].sentences:
            sentences_mirnas = []
            sentence_tfs = []
            #print sentence.entities.elist
            for entity in sentence.entities.elist[model]:
                if entity.type == "mirna":
                    sentences_mirnas.append(entity)
                elif entity.type == "protein":
                    sentence_tfs.append(entity)
            for mirna in sentences_mirnas:
                for tf in sentence_tfs:
                    ss = ssm.simui_go(mirna.best_go, tf.best_go)
                    if ss > 0:
                        print ss, mirna.text, tf.text, mirna.best_go, tf.best_go

    print "saving corpus..."
    corpus.save(corpuspath)
Пример #4
0
def annotate_corpus_relations(corpus, model, corpuspath):

    logging.info("getting relations...")
    # entities, relations = load_gold_relations(reltype)
    logging.info("finding relations...")
    # print entities.keys()[:20]
    for did in corpus.documents:
        for sentence in corpus.documents[did].sentences:
            sentences_mirnas = []
            sentence_tfs = []
            #print sentence.entities.elist
            for entity in sentence.entities.elist[model]:
                if entity.type == "mirna":
                    sentences_mirnas.append(entity)
                elif entity.type == "protein":
                    sentence_tfs.append(entity)
            for mirna in sentences_mirnas:
                for tf in sentence_tfs:
                    ss = ssm.simui_go(mirna.best_go, tf.best_go)
                    if ss > 0:
                        print ss, mirna.text, tf.text, mirna.best_go, tf.best_go

    print "saving corpus..."
    corpus.save(corpuspath)
Пример #5
0
    def run_ss_analysis(self, pairtype):
        correct_count = 0  # numver of real miRNA-gene pairs with common gos
        incorrect_count = 0  # number of random miRNA-gene pairs with common go
        all_tfs = []
        all_mirnas = []
        diff_count = []
        nexp = 10
        for did in self.documents:
            #for sentence in self.documents[did].sentences:
            all_tfs = []
            all_mirnas = []
            correct_count = 0  # numver of real miRNA-gene pairs with common gos
            incorrect_count = 0  # number of random miRNA-gene pairs with common go
            all_mirnas = self.documents[did].get_entities('goldstandard_mirna')
            all_tfs = self.documents[did].get_entities('goldstandard_protein')
            #count true relations
            if len(all_mirnas) > 0 and len(all_tfs) > 0:
                # for entity in sentence.entities.elist["goldstandard"]:
                #     if entity.type == "protein":
                #         all_tfs.append(entity)
                #     elif entity.type == "mirna":
                #         all_mirnas.append(entity)

                #     correct_count += 1
                #if len(all_tfs) > 1 and len(all_mirnas) > 1:
                # print sentence.sid
                correct = 0
                incorrect = 0
                while correct < nexp and incorrect < nexp:
                    random_tf = random.choice(all_tfs)
                    random_mirna = random.choice(all_mirnas)
                    # print dir(random_mirna)
                    # common_gos = set(random_tf.go_ids).intersection(set(random_mirna.go_ids))
                    if correct < nexp and (random_tf.eid,
                                           pairtype) in random_mirna.targets:
                        # if len(common_gos) > 0:
                        # if random_mirna.best_go.startswith("GO:") and random_tf.best_go.startswith("GO"):
                        # print random_mirna.best_go, random_tf.best_go
                        max_ss = []
                        for mirnago in random_mirna.go_ids:
                            for mirnatf in random_tf.go_ids:
                                ss = ssm.simui_go(mirnago, mirnatf)
                                max_ss.append(ss)
                                #if max_ss > ss:
                                #    max_ss = ss
                                # ss = ssm.simui_go(random_mirna.best_go, random_tf.best_go)
                                # print "correct:", ss
                        if len(max_ss) > 0:
                            correct_count += sum(max_ss) * 1.0 / len(max_ss)
                        # correct_count += len(common_gos)
                        correct += 1
                    elif incorrect < nexp:
                        # if len(common_gos) > 0:
                        # if random_mirna.best_go.startswith("GO:") and random_tf.best_go.startswith("GO"):
                        max_ss = []
                        for mirnago in random_mirna.go_ids:
                            for mirnatf in random_tf.go_ids:
                                # ss = ssm.simui_hindex_go(mirnago, mirnatf, h=0)
                                ss = ssm.simui_go(mirnago, mirnatf)
                                max_ss.append(ss)
                                #if max_ss > ss:
                                #    max_ss = ss
                                # ss = ssm.simui_go(random_mirna.best_go, random_tf.best_go)
                                # print "correct:", ss
                        if len(max_ss) > 0:
                            incorrect_count += sum(max_ss) * 1.0 / len(max_ss)
                        # incorrect_count += len(common_gos)
                        incorrect += 1
                if correct_count != 0 and incorrect_count != 0:
                    print "{}={}-{} ({} mirnas, {} tfs)".format(
                        correct_count / nexp - incorrect_count / nexp,
                        correct_count / nexp, incorrect_count / nexp,
                        len(all_mirnas), len(all_tfs))
                    diff_count.append(correct_count / nexp -
                                      incorrect_count / nexp)

        print sum(diff_count) * 1.0 / len(diff_count)
Пример #6
0
    def run_ss_analysis(self, pairtype):
        correct_count = 0  # numver of real miRNA-gene pairs with common gos
        incorrect_count = 0  # number of random miRNA-gene pairs with common go
        all_tfs = []
        all_mirnas = []
        diff_count = []
        nexp = 10
        for did in self.documents:
            #for sentence in self.documents[did].sentences:
            all_tfs = []
            all_mirnas = []
            correct_count = 0  # numver of real miRNA-gene pairs with common gos
            incorrect_count = 0  # number of random miRNA-gene pairs with common go
            all_mirnas = self.documents[did].get_entities('goldstandard_mirna')
            all_tfs = self.documents[did].get_entities('goldstandard_protein')
            #count true relations
            if len(all_mirnas) > 0 and len(all_tfs) > 0:
                # for entity in sentence.entities.elist["goldstandard"]:
                #     if entity.type == "protein":
                #         all_tfs.append(entity)
                #     elif entity.type == "mirna":
                #         all_mirnas.append(entity)

                    #     correct_count += 1
                #if len(all_tfs) > 1 and len(all_mirnas) > 1:
                    # print sentence.sid
                correct = 0
                incorrect = 0
                while correct < nexp and incorrect < nexp:
                    random_tf = random.choice(all_tfs)
                    random_mirna = random.choice(all_mirnas)
                    # print dir(random_mirna)
                    # common_gos = set(random_tf.go_ids).intersection(set(random_mirna.go_ids))
                    if correct < nexp and (random_tf.eid, pairtype) in random_mirna.targets:
                        # if len(common_gos) > 0:
                        # if random_mirna.best_go.startswith("GO:") and random_tf.best_go.startswith("GO"):
                        # print random_mirna.best_go, random_tf.best_go
                        max_ss = []
                        for mirnago in random_mirna.go_ids:
                            for mirnatf in random_tf.go_ids:
                                ss = ssm.simui_go(mirnago, mirnatf)
                                max_ss.append(ss)
                                #if max_ss > ss:
                                #    max_ss = ss
                                    # ss = ssm.simui_go(random_mirna.best_go, random_tf.best_go)
                                    # print "correct:", ss
                        if len(max_ss) > 0:
                            correct_count += sum(max_ss)*1.0/len(max_ss)
                        # correct_count += len(common_gos)
                        correct += 1
                    elif incorrect < nexp:
                        # if len(common_gos) > 0:
                        # if random_mirna.best_go.startswith("GO:") and random_tf.best_go.startswith("GO"):
                        max_ss = []
                        for mirnago in random_mirna.go_ids:
                            for mirnatf in random_tf.go_ids:
                                # ss = ssm.simui_hindex_go(mirnago, mirnatf, h=0)
                                ss = ssm.simui_go(mirnago, mirnatf)
                                max_ss.append(ss)
                                #if max_ss > ss:
                                #    max_ss = ss
                                    # ss = ssm.simui_go(random_mirna.best_go, random_tf.best_go)
                                    # print "correct:", ss
                        if len(max_ss) > 0:
                            incorrect_count += sum(max_ss)*1.0/len(max_ss)
                        # incorrect_count += len(common_gos)
                        incorrect += 1
                if correct_count != 0 and incorrect_count != 0:
                    print "{}={}-{} ({} mirnas, {} tfs)".format(correct_count / nexp - incorrect_count / nexp,
                                                                correct_count / nexp, incorrect_count / nexp,
                                                                len(all_mirnas), len(all_tfs))
                    diff_count.append(correct_count / nexp - incorrect_count / nexp)

        print sum(diff_count) * 1.0 / len(diff_count)