示例#1
0
class BaseIr():
    
    def __init__(self, jobCollection):
         self.tfgetter =  TfGetter()   
         self.jobCollection = jobCollection
         self.processColl(self.jobCollection)  
         
    def processColl(self, jobcoll ):     
         self.jobs = []        
         self.doc_num = 0
         sum_length = 0
         for item in jobcoll.find(): 
             content = irutils.processText(item["summary"])    
             tokens =  self.tfgetter.getTokens(content)
             tf = self.tfgetter.getTf(tokens)      
   #          print "tf=",  tf
             item['tf'] =  tf
             item['length'] = len(tokens)
             self.jobs.append(item)
             self.doc_num+=1
             sum_length += item['length']
         self.avgLength = sum_length/self.doc_num
         print "self.avgLength =", self.avgLength
    
    def matchResume(self, resume):
        self.calculateScores(resume)
        self.jobs.sort(key=lambda x: x["score"], reverse=True)
        return self.jobs  
示例#2
0
class TfIdfGetter():
    
    def __init__(self):
        self.tfgetter =  TfGetter()   
        
    def getTf(self, content):
        tokens =  self.tfgetter.getTokens(content)
        return self.tfgetter.getTf(tokens)

    def saveJobTfIdf(self, jobcoll , idfColl):
         
         df = {}    
         doc_num = 0
         for item in jobcoll.find(): 
             content = irutils.processText(item["summary"])  
             tf = self.getTf(content)
             item['tf'] = tf
             item['wtf'] =  getwtf(tf)
             jobcoll.save(item)
             dfAddTf(df,tf)
             doc_num+=1
         
         idfitem={}
         idfitem['doc_num'] = doc_num
         idfitem['df'] = df
         idf = getIdf(df,doc_num)
         idfitem['idf'] = idf
         idfitem['coll_name'] = jobcoll.name  
         idfitem['date'] = datetime.datetime.now()
        # print idf
         idfColl.save(idfitem)         
         
         for item in jobcoll.find(): 
            wtf = item['wtf']
            item['wtfidf'] , item['length'] = getWtfIdf(wtf,idf)
            jobcoll.save(item)
            
         return idfitem
         
    def getJobTfIdf(self, jobcoll ):     
         jobs = []
         df = {}    
         doc_num = 0
         for item in jobcoll.find(): 
             content = irutils.processText(item["summary"])       
             tf = self.getTf(content)            
             item['wtf'] =  getwtf(tf)
             jobs.append(item)
             dfAddTf(df,tf)
             doc_num+=1       
      
         idf = getIdf(df,doc_num)    
         for item in jobs: 
            wtf = item['wtf']
            item['wtfidf'] , item['length'] = getWtfIdf(wtf,idf)
            
         return idf, jobs
示例#3
0
 def __init__(self):
     self.tfgetter =  TfGetter()   
示例#4
0
 def __init__(self):
     self.tfgetter = TfGetter()
示例#5
0
 def __init__(self, jobCollection):
      self.tfgetter =  TfGetter()   
      self.jobCollection = jobCollection
      self.processColl(self.jobCollection)