def computeGlobalStats(self): ''' save global performance statistics ''' csv = "(avg.),MetaMap,BabelFly,TagMe,WordNet\n" r1 = self.mmap.count('NaN') r2 = self.bnet.count('NaN') r3 = self.tagm.count('NaN') r4 = self.wnet.count('NaN') print "\n====================================" print "# of CUIs in corpus =", 428 * 2 print "------------------------------------" print "# of DBpedia senses in corpus =", len(self.corpus) print "# of mmap senses =", len(self.mmap) print "# of mmap non-null senses =", len(self.mmap) - r1 print "# of bnet senses =", len(self.bnet) print "# of bnet non-null senses =", len(self.bnet) - r2 print "# of tagm senses =", len(self.tagm) print "# of tagm non-null senses =", len(self.tagm) - r3 print "# of wnet senses =", len(self.wnet) print "# of wnet non-null senses =", len(self.wnet) - r4 print "====================================\n" print "\n====================================" print "# of red. senses in corpus =", len(set(self.corpus)) print "# of red. mmap senses =", len(set(self.mmap)) print "# of red. bnet senses =", len(set(self.bnet)) print "# of red. tagm senses =", len(set(self.tagm)) print "# of red. wnet senses =", len(set(self.wnet)) print "====================================\n" mmap_pre = precision_score(set(self.corpus), set(self.mmap)) bnet_pre = precision_score(set(self.corpus), set(self.bnet)) tagm_pre = precision_score(set(self.corpus), set(self.tagm)) wnet_pre = precision_score(set(self.corpus), set(self.wnet)) mmap_rec = recall_score(set(self.corpus), set(self.mmap)) bnet_rec = recall_score(set(self.corpus), set(self.bnet)) tagm_rec = recall_score(set(self.corpus), set(self.tagm)) wnet_rec = recall_score(set(self.corpus), set(self.wnet)) mmap_f1 = f1_score(set(self.corpus), set(self.mmap)) bnet_f1 = f1_score(set(self.corpus), set(self.bnet)) tagm_f1 = f1_score(set(self.corpus), set(self.tagm)) wnet_f1 = f1_score(set(self.corpus), set(self.wnet)) csv = csv + "Pre," + format(mmap_pre, '.2f') + "," csv = csv + format(bnet_pre, '.2f') + "," csv = csv + format(tagm_pre, '.2f') + "," csv = csv + format(wnet_pre, '.2f') + "\n" csv = csv + "Rec," + format(mmap_rec, '.2f') + "," csv = csv + format(bnet_rec, '.2f') + "," csv = csv + format(tagm_rec, '.2f') + "," csv = csv + format(wnet_rec, '.2f') + "\n" csv = csv + "F-1," + format(mmap_f1, '.2f') + "," csv = csv + format(bnet_f1, '.2f') + "," csv = csv + format(tagm_f1, '.2f') + "," csv = csv + format(wnet_f1, '.2f') self.g_csv = csv.replace("'", "") # generate .tex table tex = "\\begin{tabular}{ccccc}\n" tex = tex + csv.replace("\n", "\\\ \n") tex = tex.replace(",", " & ") tex = tex + "\n\end{tabular}" self.g_tex = tex
def computeResStats(self): ''' save global performance statistics (resolved) ''' csv = "(avg.),MetaMap,BabelFly,TagMe,WordNet\n" r1 = self.mmap.count('NaN') r2 = self.bnet.count('NaN') r3 = self.tagm.count('NaN') r4 = self.wnet.count('NaN') print "\n====================================" print "# of CUIs in corpus =", 428 * 2 print "------------------------------------" print "# of DBpedia senses in corpus =", len(self.corpus) print "# of mmap senses =", len(self.mmap) print "# of mmap non-null senses =", len(self.mmap) - r1 print "# of bnet senses =", len(self.bnet) print "# of bnet non-null senses =", len(self.bnet) - r2 print "# of tagm senses =", len(self.tagm) print "# of tagm non-null senses =", len(self.tagm) - r3 print "# of wnet senses =", len(self.wnet) print "# of wnet non-null senses =", len(self.wnet) - r4 print "====================================\n" print "\n====================================" print "# of red. senses in corpus =", len(set(self.corpus)) print "# of red. mmap senses =", len(set(self.mmap)) print "# of red. bnet senses =", len(set(self.bnet)) print "# of red. tagm senses =", len(set(self.tagm)) print "# of red. wnet senses =", len(set(self.wnet)) print "====================================\n" # collapse repetitions bnet = list(set(self.bnet)) tagm = list(set(self.tagm)) wnet = list(set(self.wnet)) # mmap = list(set(self.mmap)) # resolve/normalize URI variants print "resolving annotations...\n" count = 0 for uri1 in set(self.corpus): print "resolving URI #", count time.sleep(1) bnet = [ uri1 if self.spar.isEqual(uri1, uri2) == True else uri2 for uri2 in bnet ] print "bnet updated" time.sleep(1) tagm = [ uri1 if self.spar.isEqual(uri1, uri2) == True else uri2 for uri2 in tagm ] print "tagm updated" time.sleep(1) wnet = [ uri1 if self.spar.isEqual(uri1, uri2) == True else uri2 for uri2 in wnet ] print "wnet updated" count = count + 1 # mmap = [uri1 if self.spar.isEqual(uri1, uri2)==True else uri2 for uri2 in mmap] print "annotations resolved!\n" mmap_pre = precision_score(set(self.corpus), set(self.mmap)) # mmap_pre = precision_score(set(self.corpus),set(mmap)) bnet_pre = precision_score(set(self.corpus), set(bnet)) tagm_pre = precision_score(set(self.corpus), set(tagm)) wnet_pre = precision_score(set(self.corpus), set(wnet)) mmap_rec = recall_score(set(self.corpus), set(self.mmap)) # mmap_rec = recall_score(set(self.corpus),set(mmap)) bnet_rec = recall_score(set(self.corpus), set(bnet)) tagm_rec = recall_score(set(self.corpus), set(tagm)) wnet_rec = recall_score(set(self.corpus), set(wnet)) mmap_f1 = f1_score(set(self.corpus), set(self.mmap)) # mmap_f1 = f1_score(set(self.corpus),set(mmap)) bnet_f1 = f1_score(set(self.corpus), set(bnet)) tagm_f1 = f1_score(set(self.corpus), set(tagm)) wnet_f1 = f1_score(set(self.corpus), set(wnet)) print set(self.corpus) print set(self.mmap) print bnet print wnet print tagm csv = csv + "Pre," + format(mmap_pre, '.2f') + "," csv = csv + format(bnet_pre, '.2f') + "," csv = csv + format(tagm_pre, '.2f') + "," csv = csv + format(wnet_pre, '.2f') + "\n" csv = csv + "Rec," + format(mmap_rec, '.2f') + "," csv = csv + format(bnet_rec, '.2f') + "," csv = csv + format(tagm_rec, '.2f') + "," csv = csv + format(wnet_rec, '.2f') + "\n" csv = csv + "F-1," + format(mmap_f1, '.2f') + "," csv = csv + format(bnet_f1, '.2f') + "," csv = csv + format(tagm_f1, '.2f') + "," csv = csv + format(wnet_f1, '.2f') self.g_csv = csv.replace("'", "") # generate .tex table tex = "\\begin{tabular}{ccccc}\n" tex = tex + csv.replace("\n", "\\\ \n") tex = tex.replace(",", " & ") tex = tex + "\n\end{tabular}" self.g_tex = tex
def computeResStats(self): ''' save global performance statistics (resolved) ''' csv = "(avg.),MetaMap,BabelFly,TagMe,WordNet\n" r1 = self.mmap.count('NaN') r2 = self.bnet.count('NaN') r3 = self.tagm.count('NaN') r4 = self.wnet.count('NaN') print "\n====================================" print "# of CUIs in corpus =", 428*2 print "------------------------------------" print "# of DBpedia senses in corpus =", len(self.corpus) print "# of mmap senses =", len(self.mmap) print "# of mmap non-null senses =", len(self.mmap) - r1 print "# of bnet senses =", len(self.bnet) print "# of bnet non-null senses =", len(self.bnet) - r2 print "# of tagm senses =", len(self.tagm) print "# of tagm non-null senses =", len(self.tagm) - r3 print "# of wnet senses =", len(self.wnet) print "# of wnet non-null senses =", len(self.wnet) - r4 print "====================================\n" print "\n====================================" print "# of red. senses in corpus =", len(set(self.corpus)) print "# of red. mmap senses =", len(set(self.mmap)) print "# of red. bnet senses =", len(set(self.bnet)) print "# of red. tagm senses =", len(set(self.tagm)) print "# of red. wnet senses =", len(set(self.wnet)) print "====================================\n" # collapse repetitions bnet = list(set(self.bnet)) tagm = list(set(self.tagm)) wnet = list(set(self.wnet)) # mmap = list(set(self.mmap)) # resolve/normalize URI variants print "resolving annotations...\n" count = 0 for uri1 in set(self.corpus): print "resolving URI #", count time.sleep(1) bnet = [uri1 if self.spar.isEqual(uri1, uri2)==True else uri2 for uri2 in bnet] print "bnet updated" time.sleep(1) tagm = [uri1 if self.spar.isEqual(uri1, uri2)==True else uri2 for uri2 in tagm] print "tagm updated" time.sleep(1) wnet = [uri1 if self.spar.isEqual(uri1, uri2)==True else uri2 for uri2 in wnet] print "wnet updated" count = count + 1 # mmap = [uri1 if self.spar.isEqual(uri1, uri2)==True else uri2 for uri2 in mmap] print "annotations resolved!\n" mmap_pre = precision_score(set(self.corpus),set(self.mmap)) # mmap_pre = precision_score(set(self.corpus),set(mmap)) bnet_pre = precision_score(set(self.corpus),set(bnet)) tagm_pre = precision_score(set(self.corpus),set(tagm)) wnet_pre = precision_score(set(self.corpus),set(wnet)) mmap_rec = recall_score(set(self.corpus),set(self.mmap)) # mmap_rec = recall_score(set(self.corpus),set(mmap)) bnet_rec = recall_score(set(self.corpus),set(bnet)) tagm_rec = recall_score(set(self.corpus),set(tagm)) wnet_rec = recall_score(set(self.corpus),set(wnet)) mmap_f1 = f1_score(set(self.corpus),set(self.mmap)) # mmap_f1 = f1_score(set(self.corpus),set(mmap)) bnet_f1 = f1_score(set(self.corpus),set(bnet)) tagm_f1 = f1_score(set(self.corpus),set(tagm)) wnet_f1 = f1_score(set(self.corpus),set(wnet)) print set(self.corpus) print set(self.mmap) print bnet print wnet print tagm csv = csv + "Pre," + format(mmap_pre,'.2f') + "," csv = csv + format(bnet_pre,'.2f') + "," csv = csv + format(tagm_pre,'.2f') + "," csv = csv + format(wnet_pre,'.2f') + "\n" csv = csv + "Rec," + format(mmap_rec,'.2f') + "," csv = csv + format(bnet_rec,'.2f') + "," csv = csv + format(tagm_rec,'.2f') + "," csv = csv + format(wnet_rec,'.2f') + "\n" csv = csv + "F-1," + format(mmap_f1,'.2f') + "," csv = csv + format(bnet_f1,'.2f') + "," csv = csv + format(tagm_f1,'.2f') + "," csv = csv + format(wnet_f1,'.2f') self.g_csv = csv.replace("'","")
def computeGlobalStats(self): ''' save global performance statistics ''' csv = "(avg.),MetaMap,BabelFly,TagMe,WordNet\n" r1 = self.mmap.count('NaN') r2 = self.bnet.count('NaN') r3 = self.tagm.count('NaN') r4 = self.wnet.count('NaN') print "\n====================================" print "# of CUIs in corpus =", 428*2 print "------------------------------------" print "# of DBpedia senses in corpus =", len(self.corpus) print "# of mmap senses =", len(self.mmap) print "# of mmap non-null senses =", len(self.mmap) - r1 print "# of bnet senses =", len(self.bnet) print "# of bnet non-null senses =", len(self.bnet) - r2 print "# of tagm senses =", len(self.tagm) print "# of tagm non-null senses =", len(self.tagm) - r3 print "# of wnet senses =", len(self.wnet) print "# of wnet non-null senses =", len(self.wnet) - r4 print "====================================\n" print "\n====================================" print "# of red. senses in corpus =", len(set(self.corpus)) print "# of red. mmap senses =", len(set(self.mmap)) print "# of red. bnet senses =", len(set(self.bnet)) print "# of red. tagm senses =", len(set(self.tagm)) print "# of red. wnet senses =", len(set(self.wnet)) print "====================================\n" mmap_pre = precision_score(set(self.corpus),set(self.mmap)) bnet_pre = precision_score(set(self.corpus),set(self.bnet)) tagm_pre = precision_score(set(self.corpus),set(self.tagm)) wnet_pre = precision_score(set(self.corpus),set(self.wnet)) mmap_rec = recall_score(set(self.corpus),set(self.mmap)) bnet_rec = recall_score(set(self.corpus),set(self.bnet)) tagm_rec = recall_score(set(self.corpus),set(self.tagm)) wnet_rec = recall_score(set(self.corpus),set(self.wnet)) mmap_f1 = f1_score(set(self.corpus),set(self.mmap)) bnet_f1 = f1_score(set(self.corpus),set(self.bnet)) tagm_f1 = f1_score(set(self.corpus),set(self.tagm)) wnet_f1 = f1_score(set(self.corpus),set(self.wnet)) csv = csv + "Pre," + format(mmap_pre,'.2f') + "," csv = csv + format(bnet_pre,'.2f') + "," csv = csv + format(tagm_pre,'.2f') + "," csv = csv + format(wnet_pre,'.2f') + "\n" csv = csv + "Rec," + format(mmap_rec,'.2f') + "," csv = csv + format(bnet_rec,'.2f') + "," csv = csv + format(tagm_rec,'.2f') + "," csv = csv + format(wnet_rec,'.2f') + "\n" csv = csv + "F-1," + format(mmap_f1,'.2f') + "," csv = csv + format(bnet_f1,'.2f') + "," csv = csv + format(tagm_f1,'.2f') + "," csv = csv + format(wnet_f1,'.2f') self.g_csv = csv.replace("'","")
def computeGlobalStats(self): ''' save global performance statistics ''' csv = "(avg.),MetaMap,BabelFly,TagMe,WordNet\n" r0 = self.corpus.count('NaN') r1 = self.mmap.count('NaN') r2 = self.bnet.count('NaN') r3 = self.tagm.count('NaN') r4 = self.wnet.count('NaN') print "\n====================================" print "# of CUIs in corpus =", 428 * 2 print "------------------------------------" print "# of DBpedia senses in corpus =", len(self.corpus) print "# of non-null senses in corpus =", len(self.corpus) - r0 print "# of mmap senses =", len(self.mmap) print "# of mmap non-null senses =", len(self.mmap) - r1 print "# of bnet senses =", len(self.bnet) print "# of bnet non-null senses =", len(self.bnet) - r2 print "# of tagm senses =", len(self.tagm) print "# of tagm non-null senses =", len(self.tagm) - r3 print "# of wnet senses =", len(self.wnet) print "# of wnet non-null senses =", len(self.wnet) - r4 print "====================================\n" mmap_pre = precision_score(set(self.corpus), set(self.mmap)) bnet_pre = precision_score(set(self.corpus), set(self.bnet)) tagm_pre = precision_score(set(self.corpus), set(self.tagm)) wnet_pre = precision_score(set(self.corpus), set(self.wnet)) mmap_rec = recall_score(set(self.corpus), set(self.mmap)) bnet_rec = recall_score(set(self.corpus), set(self.bnet)) tagm_rec = recall_score(set(self.corpus), set(self.tagm)) wnet_rec = recall_score(set(self.corpus), set(self.wnet)) mmap_f1 = f1_score(set(self.corpus), set(self.mmap)) bnet_f1 = f1_score(set(self.corpus), set(self.bnet)) tagm_f1 = f1_score(set(self.corpus), set(self.tagm)) wnet_f1 = f1_score(set(self.corpus), set(self.wnet)) print "\n====================================" print "# of red. senses in corpus =", len(set(self.corpus)) print "# of red. mmap senses =", len(set(self.mmap)) print "# of red. bnet senses =", len(set(self.bnet)) print "# of red. tagm senses =", len(set(self.tagm)) print "# of red. wnet senses =", len(set(self.wnet)) print "====================================\n" csv = csv + "Pre," + format(mmap_pre, '.2f') + "," csv = csv + format(bnet_pre, '.2f') + "," csv = csv + format(tagm_pre, '.2f') + "," csv = csv + format(wnet_pre, '.2f') + "\n" csv = csv + "Rec," + format(mmap_rec, '.2f') + "," csv = csv + format(bnet_rec, '.2f') + "," csv = csv + format(tagm_rec, '.2f') + "," csv = csv + format(wnet_rec, '.2f') + "\n" csv = csv + "F-1," + format(mmap_f1, '.2f') + "," csv = csv + format(bnet_f1, '.2f') + "," csv = csv + format(tagm_f1, '.2f') + "," csv = csv + format(wnet_f1, '.2f') + "\n" self.g_csv = csv.replace("'", "") # generate .tex table tex = "\\begin{tabular}{ccccc}\n" tex = tex + csv.replace("\n", "\\\ \n") tex = tex.replace(",", " & ") tex = tex + "\n\end{tabular}" self.g_tex = tex # def computeStats(self): # ''' # save performance statistics # and test for statistical # significance # ''' # # csv = "(avg.),MetaMap,BabelFly,TagMe,WordNet\n" # # bnet_acc = avg(self.bnet_a) # wnet_acc = avg(self.wnet_a) # tagm_acc = avg(self.tagm_a) # mmap_acc = avg(self.mmap_a) # # bnet_rec = avg(self.bnet_r) # wnet_rec = avg(self.wnet_r) # tagm_rec = avg(self.tagm_r) # mmap_rec = avg(self.mmap_r) # # bnet_pre = avg(self.bnet_p) # wnet_pre = avg(self.wnet_p) # tagm_pre = avg(self.tagm_p) # mmap_pre = avg(self.mmap_p) # # bnet_f1 = avg(self.bnet_f) # wnet_f1 = avg(self.wnet_f) # tagm_f1 = avg(self.tagm_f) # mmap_f1 = avg(self.mmap_f) # # csv = csv + "Acc," + `format(mmap_acc,'.2f')` + "," # csv = csv + `format(bnet_acc,'.2f')` + "," # csv = csv + `format(tagm_acc,'.2f')` + "," # csv = csv + `format(wnet_acc,'.2f')` + "\n" # # csv = csv + "Pre," + `format(mmap_pre,'.2f')` + "," # csv = csv + `format(bnet_pre,'.2f')` + "," # csv = csv + `format(tagm_pre,'.2f')` + "," # csv = csv + `format(wnet_pre,'.2f')` + "\n" # # csv = csv + "Rec," + `format(mmap_rec,'.2f')` + "," # csv = csv + `format(bnet_rec,'.2f')` + "," # csv = csv + `format(tagm_rec,'.2f')` + "," # csv = csv + `format(wnet_rec,'.2f')` + "\n" # # csv = csv + "F-1," + `format(mmap_f1,'.2f')` + "," # csv = csv + `format(bnet_f1,'.2f')` + "," # csv = csv + `format(tagm_f1,'.2f')` + "," # csv = csv + `format(wnet_f1,'.2f')` + "\n" # # self.csv = csv.replace("'","") # # # generate .tex table # tex = "\\begin{tabular}{ccccc}\n" # tex = tex + self.csv.replace("\n","\\\ \n") # tex = tex.replace(","," & ") # tex = tex + "\n\end{tabular}" # self.tex = tex # # # save files # self.json2file("avg") # # # check for statistically significant differences # mys = STest() # # print "###################################################" # print "TESTS: \taccuracy" # print "###################################################" # # Kruskal # mys.myKruskal(self.mmap_a,self.bnet_a,self.wnet_a,self.tagm_a) # # print "###################################################" # print "TESTS: \tprecision" # print "###################################################" # # Kruskal # mys.myKruskal(self.mmap_p,self.bnet_p,self.wnet_p,self.tagm_p) # # print "###################################################" # print "TESTS: \trecall" # print "###################################################" # # pairwise Kruskal # mys.myKruskal(self.mmap_r,self.bnet_r,self.wnet_r,self.tagm_r) # # print "###################################################" # print "TESTS: \tF-1 measure" # print "###################################################" # mys.myKruskal(self.mmap_f,self.bnet_f,self.wnet_f,self.tagm_f) # # # generate plot (averages) # ExpPlotD()