Пример #1
0
    def computeGlobalStats(self):
        '''
        save global performance statistics
        '''

        csv = "(avg.),MetaMap,BabelFly,TagMe,WordNet\n"

        r1 = self.mmap.count('NaN')
        r2 = self.bnet.count('NaN')
        r3 = self.tagm.count('NaN')
        r4 = self.wnet.count('NaN')

        print "\n===================================="
        print "# of CUIs in corpus           =", 428 * 2
        print "------------------------------------"
        print "# of DBpedia senses in corpus =", len(self.corpus)
        print "# of mmap senses              =", len(self.mmap)
        print "# of mmap non-null senses     =", len(self.mmap) - r1
        print "# of bnet senses              =", len(self.bnet)
        print "# of bnet non-null senses     =", len(self.bnet) - r2
        print "# of tagm senses              =", len(self.tagm)
        print "# of tagm non-null senses     =", len(self.tagm) - r3
        print "# of wnet senses              =", len(self.wnet)
        print "# of wnet non-null senses     =", len(self.wnet) - r4
        print "====================================\n"

        print "\n===================================="
        print "# of red. senses in corpus  =", len(set(self.corpus))
        print "# of red. mmap senses       =", len(set(self.mmap))
        print "# of red. bnet senses       =", len(set(self.bnet))
        print "# of red. tagm senses       =", len(set(self.tagm))
        print "# of red. wnet senses       =", len(set(self.wnet))
        print "====================================\n"

        mmap_pre = precision_score(set(self.corpus), set(self.mmap))
        bnet_pre = precision_score(set(self.corpus), set(self.bnet))
        tagm_pre = precision_score(set(self.corpus), set(self.tagm))
        wnet_pre = precision_score(set(self.corpus), set(self.wnet))

        mmap_rec = recall_score(set(self.corpus), set(self.mmap))
        bnet_rec = recall_score(set(self.corpus), set(self.bnet))
        tagm_rec = recall_score(set(self.corpus), set(self.tagm))
        wnet_rec = recall_score(set(self.corpus), set(self.wnet))

        mmap_f1 = f1_score(set(self.corpus), set(self.mmap))
        bnet_f1 = f1_score(set(self.corpus), set(self.bnet))
        tagm_f1 = f1_score(set(self.corpus), set(self.tagm))
        wnet_f1 = f1_score(set(self.corpus), set(self.wnet))

        csv = csv + "Pre," + format(mmap_pre, '.2f') + ","
        csv = csv + format(bnet_pre, '.2f') + ","
        csv = csv + format(tagm_pre, '.2f') + ","
        csv = csv + format(wnet_pre, '.2f') + "\n"

        csv = csv + "Rec," + format(mmap_rec, '.2f') + ","
        csv = csv + format(bnet_rec, '.2f') + ","
        csv = csv + format(tagm_rec, '.2f') + ","
        csv = csv + format(wnet_rec, '.2f') + "\n"

        csv = csv + "F-1," + format(mmap_f1, '.2f') + ","
        csv = csv + format(bnet_f1, '.2f') + ","
        csv = csv + format(tagm_f1, '.2f') + ","
        csv = csv + format(wnet_f1, '.2f')

        self.g_csv = csv.replace("'", "")

        # generate .tex table
        tex = "\\begin{tabular}{ccccc}\n"
        tex = tex + csv.replace("\n", "\\\ \n")
        tex = tex.replace(",", " & ")
        tex = tex + "\n\end{tabular}"
        self.g_tex = tex
Пример #2
0
    def computeResStats(self):
        '''
        save global performance statistics (resolved)
        '''

        csv = "(avg.),MetaMap,BabelFly,TagMe,WordNet\n"

        r1 = self.mmap.count('NaN')
        r2 = self.bnet.count('NaN')
        r3 = self.tagm.count('NaN')
        r4 = self.wnet.count('NaN')

        print "\n===================================="
        print "# of CUIs in corpus           =", 428 * 2
        print "------------------------------------"
        print "# of DBpedia senses in corpus =", len(self.corpus)
        print "# of mmap senses              =", len(self.mmap)
        print "# of mmap non-null senses     =", len(self.mmap) - r1
        print "# of bnet senses              =", len(self.bnet)
        print "# of bnet non-null senses     =", len(self.bnet) - r2
        print "# of tagm senses              =", len(self.tagm)
        print "# of tagm non-null senses     =", len(self.tagm) - r3
        print "# of wnet senses              =", len(self.wnet)
        print "# of wnet non-null senses     =", len(self.wnet) - r4
        print "====================================\n"

        print "\n===================================="
        print "# of red. senses in corpus  =", len(set(self.corpus))
        print "# of red. mmap senses       =", len(set(self.mmap))
        print "# of red. bnet senses       =", len(set(self.bnet))
        print "# of red. tagm senses       =", len(set(self.tagm))
        print "# of red. wnet senses       =", len(set(self.wnet))
        print "====================================\n"

        # collapse repetitions

        bnet = list(set(self.bnet))
        tagm = list(set(self.tagm))
        wnet = list(set(self.wnet))

        #         mmap = list(set(self.mmap))

        # resolve/normalize URI variants

        print "resolving annotations...\n"
        count = 0
        for uri1 in set(self.corpus):
            print "resolving URI #", count
            time.sleep(1)
            bnet = [
                uri1 if self.spar.isEqual(uri1, uri2) == True else uri2
                for uri2 in bnet
            ]
            print "bnet updated"
            time.sleep(1)
            tagm = [
                uri1 if self.spar.isEqual(uri1, uri2) == True else uri2
                for uri2 in tagm
            ]
            print "tagm updated"
            time.sleep(1)
            wnet = [
                uri1 if self.spar.isEqual(uri1, uri2) == True else uri2
                for uri2 in wnet
            ]
            print "wnet updated"
            count = count + 1

#             mmap = [uri1 if self.spar.isEqual(uri1, uri2)==True else uri2 for uri2 in mmap]

        print "annotations resolved!\n"

        mmap_pre = precision_score(set(self.corpus), set(self.mmap))
        #         mmap_pre = precision_score(set(self.corpus),set(mmap))
        bnet_pre = precision_score(set(self.corpus), set(bnet))
        tagm_pre = precision_score(set(self.corpus), set(tagm))
        wnet_pre = precision_score(set(self.corpus), set(wnet))

        mmap_rec = recall_score(set(self.corpus), set(self.mmap))
        #         mmap_rec = recall_score(set(self.corpus),set(mmap))
        bnet_rec = recall_score(set(self.corpus), set(bnet))
        tagm_rec = recall_score(set(self.corpus), set(tagm))
        wnet_rec = recall_score(set(self.corpus), set(wnet))

        mmap_f1 = f1_score(set(self.corpus), set(self.mmap))
        #         mmap_f1 = f1_score(set(self.corpus),set(mmap))
        bnet_f1 = f1_score(set(self.corpus), set(bnet))
        tagm_f1 = f1_score(set(self.corpus), set(tagm))
        wnet_f1 = f1_score(set(self.corpus), set(wnet))

        print set(self.corpus)
        print set(self.mmap)
        print bnet
        print wnet
        print tagm

        csv = csv + "Pre," + format(mmap_pre, '.2f') + ","
        csv = csv + format(bnet_pre, '.2f') + ","
        csv = csv + format(tagm_pre, '.2f') + ","
        csv = csv + format(wnet_pre, '.2f') + "\n"

        csv = csv + "Rec," + format(mmap_rec, '.2f') + ","
        csv = csv + format(bnet_rec, '.2f') + ","
        csv = csv + format(tagm_rec, '.2f') + ","
        csv = csv + format(wnet_rec, '.2f') + "\n"

        csv = csv + "F-1," + format(mmap_f1, '.2f') + ","
        csv = csv + format(bnet_f1, '.2f') + ","
        csv = csv + format(tagm_f1, '.2f') + ","
        csv = csv + format(wnet_f1, '.2f')

        self.g_csv = csv.replace("'", "")

        # generate .tex table
        tex = "\\begin{tabular}{ccccc}\n"
        tex = tex + csv.replace("\n", "\\\ \n")
        tex = tex.replace(",", " & ")
        tex = tex + "\n\end{tabular}"
        self.g_tex = tex
Пример #3
0
    def computeResStats(self):
        '''
        save global performance statistics (resolved)
        '''
        
        csv = "(avg.),MetaMap,BabelFly,TagMe,WordNet\n"
        
        r1 = self.mmap.count('NaN')
        r2 = self.bnet.count('NaN')     
        r3 = self.tagm.count('NaN')       
        r4 = self.wnet.count('NaN')       
    
        print "\n===================================="
        print "# of CUIs in corpus           =", 428*2  
        print "------------------------------------"            
        print "# of DBpedia senses in corpus =", len(self.corpus)
        print "# of mmap senses              =", len(self.mmap)        
        print "# of mmap non-null senses     =", len(self.mmap) - r1
        print "# of bnet senses              =", len(self.bnet)
        print "# of bnet non-null senses     =", len(self.bnet) - r2
        print "# of tagm senses              =", len(self.tagm)
        print "# of tagm non-null senses     =", len(self.tagm) - r3
        print "# of wnet senses              =", len(self.wnet)  
        print "# of wnet non-null senses     =", len(self.wnet) - r4
        print "====================================\n"
        
        print "\n===================================="        
        print "# of red. senses in corpus  =", len(set(self.corpus))
        print "# of red. mmap senses       =", len(set(self.mmap))   
        print "# of red. bnet senses       =", len(set(self.bnet))        
        print "# of red. tagm senses       =", len(set(self.tagm))  
        print "# of red. wnet senses       =", len(set(self.wnet))                               
        print "====================================\n"          
        
        # collapse repetitions
        
        bnet = list(set(self.bnet))
        tagm = list(set(self.tagm))
        wnet = list(set(self.wnet)) 
        
#         mmap = list(set(self.mmap))       
        
        # resolve/normalize URI variants 

        print "resolving annotations...\n"
        count = 0
        for uri1 in set(self.corpus):
            print "resolving URI #", count
            time.sleep(1)
            bnet = [uri1 if self.spar.isEqual(uri1, uri2)==True else uri2 for uri2 in bnet]
            print "bnet updated"
            time.sleep(1)
            tagm = [uri1 if self.spar.isEqual(uri1, uri2)==True else uri2 for uri2 in tagm]
            print "tagm updated"
            time.sleep(1)
            wnet = [uri1 if self.spar.isEqual(uri1, uri2)==True else uri2 for uri2 in wnet]
            print "wnet updated"
            count = count + 1
            
#             mmap = [uri1 if self.spar.isEqual(uri1, uri2)==True else uri2 for uri2 in mmap]
            
        print "annotations resolved!\n"                            
 
        mmap_pre = precision_score(set(self.corpus),set(self.mmap))
#         mmap_pre = precision_score(set(self.corpus),set(mmap))
        bnet_pre = precision_score(set(self.corpus),set(bnet))
        tagm_pre = precision_score(set(self.corpus),set(tagm))
        wnet_pre = precision_score(set(self.corpus),set(wnet)) 
 
        mmap_rec = recall_score(set(self.corpus),set(self.mmap))
#         mmap_rec = recall_score(set(self.corpus),set(mmap))
        bnet_rec = recall_score(set(self.corpus),set(bnet))
        tagm_rec = recall_score(set(self.corpus),set(tagm))
        wnet_rec = recall_score(set(self.corpus),set(wnet)) 
        
        mmap_f1 = f1_score(set(self.corpus),set(self.mmap))
#         mmap_f1 = f1_score(set(self.corpus),set(mmap))        
        bnet_f1 = f1_score(set(self.corpus),set(bnet))
        tagm_f1 = f1_score(set(self.corpus),set(tagm))
        wnet_f1 = f1_score(set(self.corpus),set(wnet))
        
        print set(self.corpus)
        print set(self.mmap)
        print bnet
        print wnet
        print tagm
         
        csv = csv + "Pre," + format(mmap_pre,'.2f')  + ","
        csv = csv + format(bnet_pre,'.2f') + ","
        csv = csv + format(tagm_pre,'.2f') + ","            
        csv = csv + format(wnet_pre,'.2f') + "\n"
         
        csv = csv + "Rec," + format(mmap_rec,'.2f')  + ","
        csv = csv + format(bnet_rec,'.2f') + ","
        csv = csv + format(tagm_rec,'.2f') + ","
        csv = csv + format(wnet_rec,'.2f') + "\n"        
         
        csv = csv + "F-1," + format(mmap_f1,'.2f')  + ","
        csv = csv + format(bnet_f1,'.2f')  + ","
        csv = csv + format(tagm_f1,'.2f') + ","
        csv = csv + format(wnet_f1,'.2f')
        
        self.g_csv = csv.replace("'","")
Пример #4
0
    def computeGlobalStats(self):
        '''
        save global performance statistics
        '''
        
        csv = "(avg.),MetaMap,BabelFly,TagMe,WordNet\n"
        
        r1 = self.mmap.count('NaN')
        r2 = self.bnet.count('NaN')     
        r3 = self.tagm.count('NaN')       
        r4 = self.wnet.count('NaN')       
    
        print "\n===================================="
        print "# of CUIs in corpus           =", 428*2  
        print "------------------------------------"            
        print "# of DBpedia senses in corpus =", len(self.corpus)
        print "# of mmap senses              =", len(self.mmap)        
        print "# of mmap non-null senses     =", len(self.mmap) - r1
        print "# of bnet senses              =", len(self.bnet)
        print "# of bnet non-null senses     =", len(self.bnet) - r2
        print "# of tagm senses              =", len(self.tagm)
        print "# of tagm non-null senses     =", len(self.tagm) - r3
        print "# of wnet senses              =", len(self.wnet)  
        print "# of wnet non-null senses     =", len(self.wnet) - r4
        print "====================================\n" 
        
        print "\n===================================="        
        print "# of red. senses in corpus  =", len(set(self.corpus))
        print "# of red. mmap senses       =", len(set(self.mmap))   
        print "# of red. bnet senses       =", len(set(self.bnet))        
        print "# of red. tagm senses       =", len(set(self.tagm))  
        print "# of red. wnet senses       =", len(set(self.wnet))                               
        print "====================================\n"                            
 
        mmap_pre = precision_score(set(self.corpus),set(self.mmap))
        bnet_pre = precision_score(set(self.corpus),set(self.bnet))
        tagm_pre = precision_score(set(self.corpus),set(self.tagm))
        wnet_pre = precision_score(set(self.corpus),set(self.wnet)) 
 
        mmap_rec = recall_score(set(self.corpus),set(self.mmap))
        bnet_rec = recall_score(set(self.corpus),set(self.bnet))
        tagm_rec = recall_score(set(self.corpus),set(self.tagm))
        wnet_rec = recall_score(set(self.corpus),set(self.wnet)) 
        
        mmap_f1 = f1_score(set(self.corpus),set(self.mmap))
        bnet_f1 = f1_score(set(self.corpus),set(self.bnet))
        tagm_f1 = f1_score(set(self.corpus),set(self.tagm))
        wnet_f1 = f1_score(set(self.corpus),set(self.wnet))
         
        csv = csv + "Pre," + format(mmap_pre,'.2f')  + ","
        csv = csv + format(bnet_pre,'.2f') + ","
        csv = csv + format(tagm_pre,'.2f') + ","            
        csv = csv + format(wnet_pre,'.2f') + "\n"
         
        csv = csv + "Rec," + format(mmap_rec,'.2f')  + ","
        csv = csv + format(bnet_rec,'.2f') + ","
        csv = csv + format(tagm_rec,'.2f') + ","
        csv = csv + format(wnet_rec,'.2f') + "\n"        
         
        csv = csv + "F-1," + format(mmap_f1,'.2f')  + ","
        csv = csv + format(bnet_f1,'.2f')  + ","
        csv = csv + format(tagm_f1,'.2f') + ","
        csv = csv + format(wnet_f1,'.2f')
        
        self.g_csv = csv.replace("'","")  
Пример #5
0
    def computeGlobalStats(self):
        '''
        save global performance statistics
        '''

        csv = "(avg.),MetaMap,BabelFly,TagMe,WordNet\n"

        r0 = self.corpus.count('NaN')
        r1 = self.mmap.count('NaN')
        r2 = self.bnet.count('NaN')
        r3 = self.tagm.count('NaN')
        r4 = self.wnet.count('NaN')

        print "\n===================================="
        print "# of CUIs in corpus            =", 428 * 2
        print "------------------------------------"
        print "# of DBpedia senses in corpus  =", len(self.corpus)
        print "# of non-null senses in corpus =", len(self.corpus) - r0
        print "# of mmap senses               =", len(self.mmap)
        print "# of mmap non-null senses      =", len(self.mmap) - r1
        print "# of bnet senses               =", len(self.bnet)
        print "# of bnet non-null senses      =", len(self.bnet) - r2
        print "# of tagm senses               =", len(self.tagm)
        print "# of tagm non-null senses      =", len(self.tagm) - r3
        print "# of wnet senses               =", len(self.wnet)
        print "# of wnet non-null senses      =", len(self.wnet) - r4
        print "====================================\n"

        mmap_pre = precision_score(set(self.corpus), set(self.mmap))
        bnet_pre = precision_score(set(self.corpus), set(self.bnet))
        tagm_pre = precision_score(set(self.corpus), set(self.tagm))
        wnet_pre = precision_score(set(self.corpus), set(self.wnet))

        mmap_rec = recall_score(set(self.corpus), set(self.mmap))
        bnet_rec = recall_score(set(self.corpus), set(self.bnet))
        tagm_rec = recall_score(set(self.corpus), set(self.tagm))
        wnet_rec = recall_score(set(self.corpus), set(self.wnet))

        mmap_f1 = f1_score(set(self.corpus), set(self.mmap))
        bnet_f1 = f1_score(set(self.corpus), set(self.bnet))
        tagm_f1 = f1_score(set(self.corpus), set(self.tagm))
        wnet_f1 = f1_score(set(self.corpus), set(self.wnet))

        print "\n===================================="
        print "# of red. senses in corpus  =", len(set(self.corpus))
        print "# of red. mmap senses       =", len(set(self.mmap))
        print "# of red. bnet senses       =", len(set(self.bnet))
        print "# of red. tagm senses       =", len(set(self.tagm))
        print "# of red. wnet senses       =", len(set(self.wnet))
        print "====================================\n"

        csv = csv + "Pre," + format(mmap_pre, '.2f') + ","
        csv = csv + format(bnet_pre, '.2f') + ","
        csv = csv + format(tagm_pre, '.2f') + ","
        csv = csv + format(wnet_pre, '.2f') + "\n"

        csv = csv + "Rec," + format(mmap_rec, '.2f') + ","
        csv = csv + format(bnet_rec, '.2f') + ","
        csv = csv + format(tagm_rec, '.2f') + ","
        csv = csv + format(wnet_rec, '.2f') + "\n"

        csv = csv + "F-1," + format(mmap_f1, '.2f') + ","
        csv = csv + format(bnet_f1, '.2f') + ","
        csv = csv + format(tagm_f1, '.2f') + ","
        csv = csv + format(wnet_f1, '.2f') + "\n"

        self.g_csv = csv.replace("'", "")

        # generate .tex table
        tex = "\\begin{tabular}{ccccc}\n"
        tex = tex + csv.replace("\n", "\\\ \n")
        tex = tex.replace(",", " & ")
        tex = tex + "\n\end{tabular}"
        self.g_tex = tex


#     def computeStats(self):
#         '''
#         save performance statistics
#         and test for statistical
#         significance
#         '''
#
#         csv = "(avg.),MetaMap,BabelFly,TagMe,WordNet\n"
#
#         bnet_acc = avg(self.bnet_a)
#         wnet_acc = avg(self.wnet_a)
#         tagm_acc = avg(self.tagm_a)
#         mmap_acc = avg(self.mmap_a)
#
#         bnet_rec = avg(self.bnet_r)
#         wnet_rec = avg(self.wnet_r)
#         tagm_rec = avg(self.tagm_r)
#         mmap_rec = avg(self.mmap_r)
#
#         bnet_pre = avg(self.bnet_p)
#         wnet_pre = avg(self.wnet_p)
#         tagm_pre = avg(self.tagm_p)
#         mmap_pre = avg(self.mmap_p)
#
#         bnet_f1  = avg(self.bnet_f)
#         wnet_f1  = avg(self.wnet_f)
#         tagm_f1  = avg(self.tagm_f)
#         mmap_f1  = avg(self.mmap_f)
#
#         csv = csv + "Acc," + `format(mmap_acc,'.2f')` + ","
#         csv = csv + `format(bnet_acc,'.2f')` + ","
#         csv = csv + `format(tagm_acc,'.2f')` + ","
#         csv = csv + `format(wnet_acc,'.2f')` + "\n"
#
#         csv = csv + "Pre," + `format(mmap_pre,'.2f')`  + ","
#         csv = csv + `format(bnet_pre,'.2f')` + ","
#         csv = csv + `format(tagm_pre,'.2f')` + ","
#         csv = csv + `format(wnet_pre,'.2f')` + "\n"
#
#         csv = csv + "Rec," + `format(mmap_rec,'.2f')`  + ","
#         csv = csv + `format(bnet_rec,'.2f')` + ","
#         csv = csv + `format(tagm_rec,'.2f')` + ","
#         csv = csv + `format(wnet_rec,'.2f')` + "\n"
#
#         csv = csv + "F-1," + `format(mmap_f1,'.2f')`  + ","
#         csv = csv + `format(bnet_f1,'.2f')`  + ","
#         csv = csv + `format(tagm_f1,'.2f')` + ","
#         csv = csv + `format(wnet_f1,'.2f')` + "\n"
#
#         self.csv = csv.replace("'","")
#
#         # generate .tex table
#         tex  = "\\begin{tabular}{ccccc}\n"
#         tex  = tex + self.csv.replace("\n","\\\ \n")
#         tex  = tex.replace(","," & ")
#         tex  = tex + "\n\end{tabular}"
#         self.tex = tex
#
#         # save files
#         self.json2file("avg")
#
#         # check for statistically significant differences
#         mys = STest()
#
#         print "###################################################"
#         print "TESTS: \taccuracy"
#         print "###################################################"
#         # Kruskal
#         mys.myKruskal(self.mmap_a,self.bnet_a,self.wnet_a,self.tagm_a)
#
#         print "###################################################"
#         print "TESTS: \tprecision"
#         print "###################################################"
#         # Kruskal
#         mys.myKruskal(self.mmap_p,self.bnet_p,self.wnet_p,self.tagm_p)
#
#         print "###################################################"
#         print "TESTS: \trecall"
#         print "###################################################"
#         # pairwise Kruskal
#         mys.myKruskal(self.mmap_r,self.bnet_r,self.wnet_r,self.tagm_r)
#
#         print "###################################################"
#         print "TESTS: \tF-1 measure"
#         print "###################################################"
#         mys.myKruskal(self.mmap_f,self.bnet_f,self.wnet_f,self.tagm_f)
#
#         # generate plot (averages)
#         ExpPlotD()