Exemplo n.º 1
0
 def load_bless(self):
     print "Loading Bless database"
     self.blessDB=BlessDB(self.parameters)
     self.parameters["filterwords"]=self.blessDB.getEntries()
     self.blessDB.printstats()
Exemplo n.º 2
0
class Evaluator:

    def __init__(self,config):
        with open(config) as fp:
            self.parameters=yaml.safe_load(fp)
        print "Using parameters from "+config
        self.check_config()
        print self.parameters

    def check_config(self):
        #defaults and other init checks
        self.parameters["rellist"]=self.parameters.get("rellist",[])
        self.parameters["debug"]=self.parameters.get("debug",[])
        self.parameters["show"]=self.parameters.get("show",["ranks"])


    def load_thes(self):
        print "Loading thesaurus"
        self.thes = BlessThes(self.parameters)
        self.thes.znorm()
        if "thes" in self.parameters.get("debug"):
            self.thes.displayall()


    def load_bless(self):
        print "Loading Bless database"
        self.blessDB=BlessDB(self.parameters)
        self.parameters["filterwords"]=self.blessDB.getEntries()
        self.blessDB.printstats()

    def run_blesseval(self):
        print "Running Bless Evaluation"

        print "Creating boxplots for relations in:"
        print self.parameters["rellist"]
        relranks=[]
        relsims=[]
        for rel in self.parameters["rellist"]:
            ranks=[]
            sims=[]

            for concept in self.blessDB.entrydict.keys():
                blessed=self.blessDB.entrydict[concept].getRel(rel)#get the semantically related words from BLESS

                if self.parameters.get("nprop",0)<=0:
                    n=self.parameters.get("bestn",1)
                else:
                    n=math.floor(len(blessed)*float(self.parameters["nprop"]))

                (rank,sim)=self.thes.get_sim((concept,self.parameters['pos']),blessed,n) #score according to thesaurus
      #          print concept, rel, blessed,rank,sim
                ranks.append(rank)
                sims.append(sim)
     #           break
            relranks.append(ranks)
            relsims.append(sims)

        plt.figure(1)
        if self.parameters.get("nprop",0)<=0:
            key="best "+str(self.parameters.get("bestn",1))
        else:
            key="best "+str(float(self.parameters["nprop"])*100)+"%"

        if "sims" in self.parameters["show"]:
            self.show_sims(relsims,self.get_position("sims"))

        if "ranks" in self.parameters["show"]:
            self.show_ranks(relranks,self.get_position("ranks"))


        #plt.figure(1)
        plt.title(self.parameters["thesdir"]+": "+key)
        plt.show()



    def get_position(self,item):
        if len(self.parameters["show"])==1:
            return 1
        elif len(self.parameters["show"])<4:
            position=101+len(self.parameters["show"])*10
            for thing in self.parameters["show"]:
                if item==thing:
                    break
                else:
                    position+=1
        else:
            print "Do not know how to position more than 3 subplots"
        return position

    def show_ranks(self, relranks,position):
        if position>1:
            plt.subplot(position)
        plt.boxplot(relranks)
        plt.ylim(1000,0)
        plt.yticks(np.arange(1000,0,-50))
        plt.grid(True)
        plt.ylabel('Rank')
        plt.setp(plt.gca(),'xticklabels',self.parameters["rellist"])


    def show_sims(self,relsims,position):
        if position>1:
            plt.subplot(position)
        plt.boxplot(relsims)

        plt.ylim(0,1)
        plt.yticks(np.arange(0,1,0.05))
        plt.ylabel('Similarity')
        plt.grid(True)
        plt.setp(plt.gca(),'xticklabels',self.parameters["rellist"])

    def run(self):
        self.load_bless()
        self.load_thes()
        if self.parameters.get("blesseval",False):
            self.run_blesseval()