Exemplo n.º 1
0
 def __init__(self):
     dictionObj = diction(146860)
     self._prevIdfList = dictionObj._wordIdfList[:10000]
     self._eligibleWordsList = [i[1] for i in self._prevIdfList]
     self._eligibleWords = {}
     for i in self._prevIdfList:
         self._eligibleWords[i[1]] = i[0]
     self._WordDict = {}
     self._testdictOfWordCount = {}
     self._dictOfWordCount = {}
     self._testcategoryDict = {}  # e.g. {'teaching job':[1000,15000,...],...}
     self._testlocationDict = {}  # e.g. {'location':[1000,15000,...],...}
     self._testcompanyDict = {}   # e.g. {'company':[1000,5000,...],...}
     self._testsourceDict = {}    #e.g.  {'source':[1000,5000,...],...}
     self._testjobTimeTermList = []   #e.g. {1:'[full, permanent]', 2:[None,None], 3:'[part, contact]'}
     self._testlocationDocs = {}
     self._testcompanyDocs = {}
     self._testcatDocs = {}
     print 'training features'
     di = diction(146860,1)
     self._ssourceDocs = di._sourceDocs
     self._scatDocs = di._catDocs
     self._scompanyDocs = di._companyDocs
     self._slocDocs = di._locationDocs
     self._testsourceDocs = {}
     print 'dicting...'
     self.setDicts()
     self.retrieveFeatures()
     self.writejobTimetoCSV()
     self.calculateTfIdf()
Exemplo n.º 2
0
 def __init__(self):
     dictionObj = diction(190)
     self._docLength = dictionObj._documentLength
     self._wordDict = dictionObj._universalWordDict
     self._dictOfWordCount = dictionObj._dictOfWordCount
     self._timeDict = {1:[], -1:[]} #1=> full time, -1=> part time
     self._termDict = {1:[], -1:[]} #1=> permanent, -1=> part time
     self.timeTermDict()
     self.tfIdfDict = dictionObj.tfidfInDict()
     self.idfList = dictionObj._wordIdfList
     
     featureObj = diction(190,1)
     self._tempLocDict = featureObj._locationDict
     self._locDict = {}
     self._locDocs = featureObj._locationDocs   # each location in which which docs
     self._tempCompanyDict = featureObj._companyDict
     self._companyDict = {}
     self._companyDocs = featureObj._companyDocs # each company in which which docs
     self.locationSalary() # # e.g. _locDict = {'loc1':[1000, 5000,2000],...}
     self.companySalary()  # e.g. _companyDict = {'comp1':[1000, 5000,2000],...}
     self._catDocs = featureObj._catDocs #e.g. {'cat1':[1,5,8,...],...}
     self._sourceDocs = featureObj._sourceDocs # e.g. similar to category