def execute(self): self.readConfiguration() if self.foldInfo == '[1]': self.printAlgorConfig() # load model from disk or build model if self.isLoad: print 'Loading model %s...' % (self.foldInfo) self.loadModel() else: print 'Initializing model %s...' % (self.foldInfo) self.initModel() print 'Building Model %s...' % (self.foldInfo) self.buildModel() # preict the ratings or item ranking print 'Predicting %s...' % (self.foldInfo) prediction = self.predict() report = classification_report(self.testLabels, prediction, digits=4) currentTime = currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) FileIO.writeFile(self.output['-dir'],self.algorName+'@'+currentTime+self.foldInfo,report) # save model if self.isSave: print 'Saving model %s...' % (self.foldInfo) self.saveModel() print report return report
def evalRatings(self): res = [] #used to contain the text of the result res.append('userId itemId original prediction\n') #predict for userId in self.dao.testSet_u: for ind,item in enumerate(self.dao.testSet_u[userId]): itemId = item[0] originRating = item[1] #predict prediction = self.predict(userId,itemId) #denormalize prediction = denormalize(prediction,self.dao.rScale[-1],self.dao.rScale[0]) ##################################### pred = self.checkRatingBoundary(prediction) # add prediction in order to measure self.dao.testSet_u[userId][ind].append(pred) res.append(userId+' '+itemId+' '+str(originRating)+' '+str(pred)+'\n') currentTime = strftime("%Y-%m-%d %H-%M-%S",localtime(time())) #output prediction result if self.isOutput: outDir = self.output['-dir'] fileName = self.config['recommender']+'@'+currentTime+'-rating-predictions'+self.foldInfo+'.txt' FileIO.writeFile(outDir,fileName,res) print 'The Result has been output to ',abspath(outDir),'.' #output evaluation result outDir = self.output['-dir'] fileName = self.config['recommender'] + '@'+currentTime +'-measure'+ self.foldInfo + '.txt' measure = Measure.ratingMeasure(self.dao.testSet_u) FileIO.writeFile(outDir, fileName, measure)
def dataSplit(data, test_ratio=0.3, output=False, path='./', order=1, binarized=False): if test_ratio >= 1 or test_ratio <= 0: test_ratio = 0.3 testSet = [] trainingSet = [] for entry in data: if random() < test_ratio: if binarized: if entry[2]: testSet.append(entry) else: testSet.append(entry) else: trainingSet.append(entry) if output: FileIO.writeFile(path, 'testSet[' + str(order) + ']', testSet) FileIO.writeFile(path, 'trainingSet[' + str(order) + ']', trainingSet) return trainingSet, testSet
def evalRanking(self): res = [] # used to contain the text of the result N = 0 threshold = 0 N = int(self.ranking['-topN']) if N > 100 or N < 0: print 'N can not be larger than 100! It has been reassigned with 10' N = 10 res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n' ) # predict recList = {} userCount = len(self.data.testSet) rawRes = {} for i, user in enumerate(self.data.testSet): itemSet = {} line = user + ':' predictedItems = self.predict(user) recList[user] = predictedItems if i % 100 == 0: print self.algorName, self.foldInfo, 'progress:' + str( i) + '/' + str(userCount) for item in recList[user]: if self.data.testSet[user].has_key(item[0]): line += '*' line += item + ',' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: fileName = '' outDir = self.output['-dir'] if self.ranking.contains('-topN'): fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' elif self.ranking.contains('-threshold'): fileName = self.config[ 'recommender'] + '@' + currentTime + '-threshold-' + str( threshold) + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print 'The result has been output to ', abspath(outDir), '.' # output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' if self.ranking.contains('-topN'): self.measure = Measure.rankingMeasure(self.data.testSet, recList, rawRes, N) FileIO.writeFile(outDir, fileName, self.measure) print 'The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure))
def execute(self): exec ('from algorithm.rating.' + self.config['recommender'] + ' import ' + self.config['recommender']) if self.evaluation.contains('-cv'): i = 1 for train,test in DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv'])): fold = '['+str(i)+']' recommender = self.config['recommender']+ "(self.config,train,test,fold)" measure = eval(recommender).execute() self.measure.append(measure) i+=1 res = [] for i in range(len(self.measure[0])): measure = self.measure[0][i].split(':')[0] total = 0 for j in range(len(self.measure)): total += float(self.measure[j][i].split(':')[1]) res.append(measure+':'+str(total/len(self.measure))+'\n') outDir = LineConfig(self.config['output.setup'])['-dir'] fileName = self.config['recommender'] +'@'+str(int(self.evaluation['-cv']))+'-fold-cv' + '.txt' FileIO.writeFile(outDir,fileName,res) else: recommender = self.config['recommender']+'(self.config,self.trainingData,self.testData)' eval(recommender).execute()
def execute(self): self.readConfiguration() if self.foldInfo == '[1]': self.printAlgorConfig() # load model from disk or build model if self.isLoad: print ('Loading model %s...' % (self.foldInfo)) self.loadModel() else: print ('Initializing model %s...' % (self.foldInfo)) self.initModel() print ('Building Model %s...' % (self.foldInfo)) self.buildModel() # preict the ratings or item ranking print ('Predicting %s...' % (self.foldInfo)) prediction = self.predict() report = classification_report(self.testLabels, prediction, digits=4) currentTime = currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) FileIO.writeFile(self.output['-dir'],self.algorName+'@'+currentTime+self.foldInfo,report) # save model if self.isSave: print ('Saving model %s...' % (self.foldInfo)) self.saveModel() # print (report) # return report return [i for i in report.split('\n') if len(i)>0][2].split()
def execute(self): self.readConfiguration() if self.foldInfo == '[1]': self.printAlgorConfig() # load model from disk or build model if self.isLoad: print 'Loading model %s...' % (self.foldInfo) self.loadModel() else: print 'Initializing model %s...' % (self.foldInfo) self.initModel() print 'Building Model %s...' % (self.foldInfo) self.buildModel() # preict the ratings or item ranking print 'Predicting %s...' % (self.foldInfo) report = self.predict() currentTime = currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) FileIO.writeFile(self.output['-dir'], self.algorName + '@' + currentTime + self.foldInfo, report) # save model if self.isSave: print 'Saving model %s...' % (self.foldInfo) self.saveModel() return report
def evalRatings(self): res = [] #used to contain the text of the result res.append('userId itemId original prediction\n') #predict for ind, entry in enumerate(self.data.testData): user, item, rating = entry #predict prediction = self.predict(user, item) #denormalize #prediction = denormalize(prediction,self.data.rScale[-1],self.data.rScale[0]) ##################################### pred = self.checkRatingBoundary(prediction) # add prediction in order to measure self.data.testData[ind].append(pred) res.append(user + ' ' + item + ' ' + str(rating) + ' ' + str(pred) + '\n') currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) #output prediction result if self.isOutput: outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-rating-predictions' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print('The result has been output to ', abspath(outDir), '.') #output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.ratingMeasure(self.data.testData) FileIO.writeFile(outDir, fileName, self.measure) print('The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure)))
def evalRanking(self): res = [] # used to contain the text of the result N = int(self.ranking['-topN']) if N > 100 or N < 0: N = 100 res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item is matched\n' ) # predict topNSet = {} userCount = len(self.dao.testSet_u) for i, user in enumerate(self.dao.testSet_u): itemSet = [] line = user + ':' for item in self.dao.item: if not self.dao.rating(user, item): # predict prediction = self.predict(user, item) # denormalize prediction = denormalize(prediction, self.dao.rScale[-1], self.dao.rScale[0]) prediction = round(prediction, 4) #pred = self.checkRatingBoundary(prediction) ##################################### # add prediction in order to measure itemSet.append((item, prediction)) itemSet.sort(key=lambda d: d[1], reverse=True) topNSet[user] = itemSet[0:N] if i % 100 == 0: print self.algorName, self.foldInfo, 'progress:' + str( i) + '/' + str(userCount) for item in topNSet[user]: line += ' (' + item[0] + ',' + str(item[1]) + ')' if self.dao.testSet_u[user].has_key(item[0]): line += '*' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print 'The Result has been output to ', abspath(outDir), '.' #output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.rankingMeasure(self.dao.testSet_u, topNSet, N) FileIO.writeFile(outDir, fileName, self.measure)
def execute(self): #import the algorithm module importStr = 'from algorithm.rating.' + self.config[ 'recommender'] + ' import ' + self.config['recommender'] exec(importStr) if self.evaluation.contains('-cv'): k = int(self.evaluation['-cv']) if k <= 1 or k > 10: k = 3 #create the manager used to communication in multiprocess manager = Manager() m = manager.dict() i = 1 tasks = [] for train, test in DataSplit.crossValidation(self.trainingData, k): fold = '[' + str(i) + ']' if self.config.contains('social'): recommender = self.config[ 'recommender'] + "(self.config,train,test,self.relation,fold)" else: recommender = self.config[ 'recommender'] + "(self.config,train,test,fold)" #create the process p = Process(target=run, args=(m, eval(recommender), i)) tasks.append(p) i += 1 #start the processes for p in tasks: p.start() #wait until all processes are completed for p in tasks: p.join() #compute the mean error of k-fold cross validation self.measure = [dict(m)[i] for i in range(1, k + 1)] res = [] for i in range(len(self.measure[0])): measure = self.measure[0][i].split(':')[0] total = 0 for j in range(k): total += float(self.measure[j][i].split(':')[1]) res.append(measure + ':' + str(total / k) + '\n') #output result outDir = LineConfig(self.config['output.setup'])['-dir'] fileName = self.config['recommender'] + '@' + str( k) + '-fold-cv' + '.txt' FileIO.writeFile(outDir, fileName, res) else: if self.config.contains('social'): recommender = self.config[ 'recommender'] + '(self.config,self.trainingData,self.testData,self.relation)' else: recommender = self.config[ 'recommender'] + '(self.config,self.trainingData,self.testData)' eval(recommender).execute()
def __init__(self, conf): self.conf = conf if not conf.contains('ratings') and not conf.contains('social'): print 'The config file is not in the correct format!' exit(-1) if conf.contains('ratings'): ratingData = FileIO.loadDataSet(conf, conf['ratings']) self.dao = RatingDAO(conf, ratingData) if conf.contains('social'): relationData = FileIO.loadRelationship(conf, conf['social']) self.sao = SocialDAO(conf, relationData)
def __init__(self, config): self.trainingData = [] # training data self.testData = [] # testData self.relation = [] self.measure = [] self.config = config self.ratingConfig = LineConfig(config['ratings.setup']) if self.config.contains('evaluation.setup'): self.evaluation = LineConfig(config['evaluation.setup']) binarized = False bottom = 0 if self.evaluation.contains('-b'): binarized = True bottom = float(self.evaluation['-b']) if self.evaluation.contains('-testSet'): #specify testSet self.trainingData = FileIO.loadDataSet(config, config['ratings'], binarized=binarized, threshold=bottom) self.testData = FileIO.loadDataSet(config, self.evaluation['-testSet'], bTest=True, binarized=binarized, threshold=bottom) elif self.evaluation.contains('-ap'): #auto partition self.trainingData = FileIO.loadDataSet(config, config['ratings'], binarized=binarized, threshold=bottom) self.trainingData,self.testData = DataSplit.\ dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap']),binarized=binarized) elif self.evaluation.contains('-cv'): #cross validation self.trainingData = FileIO.loadDataSet(config, config['ratings'], binarized=binarized, threshold=bottom) #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv'])) else: print('Evaluation is not well configured!') exit(-1) if config.contains('social'): self.socialConfig = LineConfig(self.config['social.setup']) self.relation = FileIO.loadRelationship(config, self.config['social']) print('preprocessing...')
def __init__(self,config): self.trainingData = [] # training data self.testData = [] # testData self.measure = [] self.config =config setup = LineConfig(config['record.setup']) columns = {} labels = setup['-columns'].split(',') delim = '' if setup.contains('-delim'): delim=setup['-delim'] for col in labels: label = col.split(':') columns[label[0]] = int(label[1]) if self.config.contains('evaluation.setup'): self.evaluation = LineConfig(config['evaluation.setup']) binarized = False bottom = 0 if self.evaluation.contains('-b'): binarized = True bottom = float(self.evaluation['-b']) if self.evaluation.contains('-testSet'): #specify testSet self.trainingData = FileIO.loadDataSet(config['record'],columns=columns,binarized=binarized,threshold=bottom,delim=delim) self.testData = FileIO.loadDataSet(self.evaluation['-testSet'],binarized=binarized,columns=columns,threshold=bottom,delim=delim) elif self.evaluation.contains('-ap'): #auto partition self.trainingData = FileIO.loadDataSet(config['record'],columns=columns,binarized=binarized,threshold=bottom,delim=delim) self.trainingData,self.testData = DataSplit.\ dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap'])) elif self.evaluation.contains('-byTime'): self.trainingData = FileIO.loadDataSet(config['record'], columns=columns, binarized=binarized,threshold=bottom, delim=delim) self.testData = [] elif self.evaluation.contains('-cv'): #cross validation self.trainingData = FileIO.loadDataSet(config['record'],columns=columns,binarized=binarized,threshold=bottom,delim=delim) #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv'])) else: print 'Evaluation is not well configured!' exit(-1) # if config.contains('social'): # self.socialConfig = LineConfig(self.config['social.setup']) # self.relation = FileIO.loadRelationship(config,self.config['social']) print 'preprocessing...'
def dataSplit(data, test_ratio=0.3, output=False, path='./', order=1): testSet = [] trainingSet = [] for entry in data: if random() < test_ratio: testSet.append(entry) else: trainingSet.append(entry) if output: FileIO.writeFile(path, 'testSet[' + str(order) + ']', testSet) FileIO.writeFile(path, 'trainingSet[' + str(order) + ']', trainingSet) return trainingSet, testSet
def dataSplit(data,test_ratio = 0.3,output=False,path='./',order=1): if test_ratio>=1 or test_ratio <=0: test_ratio = 0.3 testSet = {} trainingSet = {} for user in data: if random() < test_ratio: testSet[user] = data[user].copy() else: trainingSet[user] = data[user].copy() if output: FileIO.writeFile(path,'testSet['+str(order)+']',testSet) FileIO.writeFile(path, 'trainingSet[' + str(order) + ']', trainingSet) return trainingSet,testSet
def __init__(self,conf): self.config = Config(conf) self.userProfile = FileIO.loadDataSet(self.config,self.config['ratings']) self.itemProfile = defaultdict(dict) self.attackSize = float(self.config['attackSize']) self.fillerSize = float(self.config['fillerSize']) self.selectedSize = float(self.config['selectedSize']) self.targetCount = int(self.config['targetCount']) self.targetScore = float(self.config['targetScore']) self.threshold = float(self.config['threshold']) self.minCount = int(self.config['minCount']) self.maxCount = int(self.config['maxCount']) self.minScore = float(self.config['minScore']) self.maxScore = float(self.config['maxScore']) self.outputDir = self.config['outputDir'] if not os.path.exists(self.outputDir): os.makedirs(self.outputDir) for user in self.userProfile: for item in self.userProfile[user]: self.itemProfile[item][user] = self.userProfile[user][item] self.spamProfile = defaultdict(dict) self.spamItem = defaultdict(list) #items rated by spammers self.targetItems = [] self.itemAverage = {} self.getAverageRating() self.selectTarget() self.startUserID = 0
def __init__(self, conf): self.config = Config(conf) self.userProfile = FileIO.loadDataSet(self.config, self.config['ratings']) self.itemProfile = defaultdict(dict) self.attackSize = float(self.config['attackSize']) self.fillerSize = float(self.config['fillerSize']) self.selectedSize = float(self.config['selectedSize']) self.targetCount = int(self.config['targetCount']) self.targetScore = float(self.config['targetScore']) self.threshold = float(self.config['threshold']) self.minCount = int(self.config['minCount']) self.maxCount = int(self.config['maxCount']) self.outputDir = self.config['outputDir'] if not os.path.exists(self.outputDir): os.makedirs(self.outputDir) for user in self.userProfile: for item in self.userProfile[user]: self.itemProfile[item][user] = self.userProfile[user][item] self.spamProfile = defaultdict(dict) self.spamItem = defaultdict(list) #items rated by spammers self.targetItems = [] self.itemAverage = {} self.getAverageRating() self.selectTarget()
def render(self): self.draw() html ="<html><head><title>Data Analysis</title>\n" \ "<link rel='stylesheet' type='text/css' href='reportStyle.css'/></head>\n" \ "<body><div class='reportTitle'><div class='in'>Data Analysis</div></div>\n" \ "<div class='main'><div class='area1'>\n" \ "<div class='title'><h3>Data Files</h3></div><div class='text'>" if self.conf.contains('ratings'): html += "<b>Rating Data</b>: {rating}".format( rating=abspath(self.conf['ratings'])) if self.conf.contains('social'): html += "<br><b>Social Data</b>: {social}".format( social=abspath(self.conf['social'])) html+="</div></div><div style='padding-top:20px'><center>" \ "<img src='images/header2.png'/></center></div>\n" if self.conf.contains('ratings'): html += "<div class='area1'><div class='title'><h3>Rating Data</h3></div>\n" html += "<div class='text'><b>Rating Scale</b>: {scale}</br>".format( scale=' '.join([str(item) for item in self.dao.rScale])) html += "<b>User Count</b>: {user}<br><b>Item Count</b>: {item}<br><b>Record Count</b>: {record}<br><b>Global Mean</b>: {mean}</div>\n"\ .format(user = str(len(self.dao.user)),item=str(len(self.dao.item)),record = str(len(self.dao.trainingData)), mean = str(round(denormalize(self.dao.globalMean,self.dao.rScale[-1],self.dao.rScale[0]),3))) html += "<center><div class='img'><img src='images/rh.png' width='640px' height='480px'/></div></center>\n" html += "<center><div class='img'><img src='images/rcu.png' width='640px' height='480px'/></div></center>\n" html += "<center><div class='img'><img src='images/rci.png' width='640px' height='480px'/></div></center>\n" html += "</div><div style='padding-top:20px'><center>" \ "<img src='images/header2.png'/></center></div>\n" if self.conf.contains('social'): html += "<div class='area1'><div class='title'><h3>Social Data</h3></div>\n" html += "<div class='text'><b>User Count</b>: {user}<br><b>Relation Count</b>: {relation}<br></div>\n" \ .format(user=str(len(self.sao.user)), relation=str(len(self.sao.relation))) html += "<center><div class='img'><img src='images/ff.png' width='640px' height='480px'/></div></center>\n" html += "<center><div class='img'><img src='images/fd1.png' width='640px' height='480px'/></div></center>\n" html += "<center><div class='img'><img src='images/fd2.png' width='640px' height='480px'/></div></center>\n" html += "</div><div style='padding-top:20px'><center>" \ "<img src='images/header2.png'/></center></div>\n" html += "</div></body></html>" FileIO.writeFile('../visual/visualization/', 'analysis.html', html) print 'The report has been output to', abspath( '../visual/visualization/analysis.html') webbrowser.open(abspath('../visual/visualization/analysis.html'), new=0, autoraise=True)
def evalRanking(self): res = [] # used to contain the text of the result N = int(self.ranking['-topN']) if N > 100 or N < 0: N = 100 res.append( 'userId: recommendations in (itemId, ranking score) pairs\n') # predict topNSet = {} userCount = len(self.dao.testSet_u) for i, userId in enumerate(self.dao.testSet_u): itemSet = {} line = userId + ':' for itemId in self.dao.item: pred = self.predict(userId, itemId) # add prediction in order to measure itemSet[itemId] = pred topNSet[userId] = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True)[0:N] if i % 100 == 0: print 'Progress:' + str(i) + '/' + str(userCount) for item in topNSet[userId]: line += '(' + item[0] + ',' + str(item[1]) + ') ' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print 'The Result has been output to ', abspath(outDir), '.' #output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' measure = Measure.rankingMeasure(self.dao.testSet_u, topNSet, N) FileIO.writeFile(outDir, fileName, measure)
def __init__(self,conf): super(RelationAttack, self).__init__(conf) self.spamLink = defaultdict(list) self.relation = FileIO.loadRelationship(self.config,self.config['social']) self.trustLink = defaultdict(list) self.trusteeLink = defaultdict(list) for u1,u2,t in self.relation: self.trustLink[u1].append(u2) self.trusteeLink[u2].append(u1) self.activeUser = {} # 关注了虚假用户的正常用户 self.linkedUser = {} # 被虚假用户种植过链接的用户
def __init__(self, conf): super(RelationAttack, self).__init__(conf) self.spamLink = defaultdict(list) self.relation = FileIO.loadRelationship(self.config, self.config['social']) self.trustLink = defaultdict(list) self.trusteeLink = defaultdict(list) for u1, u2, t in self.relation: self.trustLink[u1].append(u2) self.trusteeLink[u2].append(u1) self.activeUser = {} # 关注了虚假用户的正常用户 self.linkedUser = {} # 被虚假用户种植过链接的用户
def __init__(self, config): self.trainingData = [] # training data self.testData = [] # testData self.relation = [] self.measure = [] self.config = config self.ratingConfig = LineConfig(config['ratings.setup']) self.labels = FileIO.loadLabels(config['label']) if self.config.contains('evaluation.setup'): self.evaluation = LineConfig(config['evaluation.setup']) if self.evaluation.contains('-testSet'): #specify testSet self.trainingData = FileIO.loadDataSet(config, config['ratings']) self.testData = FileIO.loadDataSet(config, self.evaluation['-testSet'], bTest=True) elif self.evaluation.contains('-ap'): #auto partition self.trainingData = FileIO.loadDataSet(config, config['ratings']) self.trainingData,self.testData = DataSplit.\ dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap'])) elif self.evaluation.contains('-cv'): #cross validation self.trainingData = FileIO.loadDataSet(config, config['ratings']) #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv'])) else: print 'Evaluation is not well configured!' exit(-1) if config.contains('social'): self.socialConfig = LineConfig(self.config['social.setup']) self.relation = FileIO.loadRelationship(config, self.config['social']) print 'preprocessing...'
def __init__(self,config): self.trainingData = [] # training data self.testData = [] # testData self.relation = [] self.measure = [] self.config =config self.ratingConfig = LineConfig(config['ratings.setup']) self.labels = FileIO.loadLabels(config['label']) if self.config.contains('evaluation.setup'): self.evaluation = LineConfig(config['evaluation.setup']) if self.evaluation.contains('-testSet'): #specify testSet self.trainingData = FileIO.loadDataSet(config, config['ratings']) self.testData = FileIO.loadDataSet(config, self.evaluation['-testSet'], bTest=True) elif self.evaluation.contains('-ap'): #auto partition self.trainingData = FileIO.loadDataSet(config,config['ratings']) self.trainingData,self.testData = DataSplit.\ dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap'])) elif self.evaluation.contains('-cv'): #cross validation self.trainingData = FileIO.loadDataSet(config, config['ratings']) #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv'])) else: print 'Evaluation is not well configured!' exit(-1) if config.contains('social'): self.socialConfig = LineConfig(self.config['social.setup']) self.relation = FileIO.loadRelationship(config,self.config['social']) print 'preprocessing...'
def evalRanking(self): res = [] # used to contain the text of the result N = 0 threshold = 0 bThres = False bTopN = False if self.ranking.contains('-topN'): bTopN = True N = int(self.ranking['-topN']) if N > 100 or N < 0: print 'N can not be larger than 100! It has been reassigned with 100' N = 100 elif self.ranking.contains('-threshold'): threshold = float(self.ranking['-threshold']) bThres = True else: print 'No correct evaluation metric is specified!' exit(-1) res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n' ) # predict recList = {} userN = {} userCount = len(self.dao.testSet_u) for i, user in enumerate(self.dao.testSet_u): itemSet = {} line = user + ':' for item in self.dao.item: # predict prediction = self.predict(user, item) # denormalize prediction = denormalize(prediction, self.dao.rScale[-1], self.dao.rScale[0]) #prediction = self.checkRatingBoundary(prediction) #pred = self.checkRatingBoundary(prediction) ##################################### # add prediction in order to measure if bThres: if prediction > threshold: itemSet[item] = prediction else: itemSet[item] = prediction ratedList, ratingList = self.dao.userRated(user) for item in ratedList: del itemSet[self.dao.id2item[item]] itemSet = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True) if self.ranking.contains('-topN'): recList[user] = itemSet[0:N] elif self.ranking.contains('-threshold'): recList[user] = itemSet[:] userN[user] = len(itemSet) if i % 100 == 0: print self.algorName, self.foldInfo, 'progress:' + str( i) + '/' + str(userCount) for item in recList[user]: line += ' (' + item[0] + ',' + str(item[1]) + ')' if self.dao.testSet_u[user].has_key(item[0]): line += '*' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: fileName = '' outDir = self.output['-dir'] if self.ranking.contains('-topN'): fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' elif self.ranking.contains('-threshold'): fileName = self.config[ 'recommender'] + '@' + currentTime + '-threshold-' + str( threshold) + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print 'The Result has been output to ', abspath(outDir), '.' #output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' if self.ranking.contains('-topN'): self.measure = Measure.rankingMeasure(self.dao.testSet_u, recList, N) elif self.ranking.contains('-threshold'): origin = self.dao.testSet_u.copy() for user in origin: temp = {} for item in origin[user]: if origin[user][item] >= threshold: temp[item] = threshold origin[user] = temp self.measure = Measure.rankingMeasure_threshold( origin, recList, userN) FileIO.writeFile(outDir, fileName, self.measure)
def evalRanking(self): res = [] # used to contain the text of the result N = 0 threshold = 0 top = self.ranking['-topN'].split(',') top = [int(num) for num in top] N = int(top[-1]) if N > 100 or N < 0: print 'N can not be larger than 100! It has been reassigned with 10' N = 10 res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n' ) # predict recList = {} userCount = len(self.data.testSet) for i, user in enumerate(self.data.testSet): line = user + ':' if self.data.userRecord.has_key(user): predictedItems = self.predict(user) else: predictedItems = ['0'] * N predicted = {} for k, item in enumerate(predictedItems): predicted[item] = k for item in self.data.userRecord[user]: if predicted.has_key(item[self.recType]): del predicted[item[self.recType]] predicted = sorted(predicted.iteritems(), key=lambda d: d[1]) predictedItems = [item[0] for item in predicted] recList[user] = predictedItems[:N] if i % 100 == 0: print self.algorName, self.foldInfo, 'progress:' + str( i) + '/' + str(userCount) for item in recList[user]: if self.data.testSet[user].has_key(item): line += '*' line += item + ',' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: fileName = '' outDir = self.output['-dir'] if self.ranking.contains('-topN'): fileName = self.config['recommender'] + '@' + currentTime + '-top-' + self.ranking['-topN']\ + 'items' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print 'The result has been output to ', abspath(outDir), '.' # output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.rankingMeasure(self.data.testSet, recList, top, self.data.getSize(self.recType)) FileIO.writeFile(outDir, fileName, self.measure) print 'The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure))
def __init__(self, config, account_DAO=None): self.trainingData = [] # training data self.testData = [] # testData self.relation = [] self.measure = [] self.config = config self.ratingConfig = LineConfig(config['ratings.setup']) # self.accountDAO = account_DAO # self.currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) if self.config.contains('evaluation.setup'): self.evaluation = LineConfig(config['evaluation.setup']) binarized = False bottom = 0 if self.evaluation.contains('-b'): binarized = True bottom = float(self.evaluation['-b']) if self.evaluation.contains('-testSet'): # specify testSet self.trainingData = FileIO.loadDataSet(config, config['ratings'], binarized=binarized, threshold=bottom) self.testData = FileIO.loadDataSet(config, self.evaluation['-testSet'], bTest=True, binarized=binarized, threshold=bottom) elif self.evaluation.contains('-ap'): # auto partition self.trainingData = FileIO.loadDataSet(config, config['ratings'], binarized=binarized, threshold=bottom) self.trainingData, self.testData = DataSplit. \ dataSplit(self.trainingData, test_ratio=float(self.evaluation['-ap']), binarized=binarized) elif self.evaluation.contains('-cv'): # cross validation self.trainingData = FileIO.loadDataSet(config, config['ratings'], binarized=binarized, threshold=bottom) # self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv'])) elif self.evaluation.contains('--account'): self.training_user_item = account_DAO.training_user_item self.training_account_item = account_DAO.training_account_item self.relation = account_DAO.relation self.test_user_item = account_DAO.test_user_item else: print('Evaluation is not well configured!') exit(-1) if config.contains('social'): self.socialConfig = LineConfig(self.config['social.setup']) self.relation = FileIO.loadRelationship(config, self.config['social']) print('preprocessing...')
def evalRanking(self): res = [] # used to contain the text of the result N = 0 threshold = 0 bThres = False bTopN = False if self.ranking.contains('-topN'): bTopN = True N = int(self.ranking['-topN']) if N > 100 or N < 0: print 'N can not be larger than 100! It has been reassigned with 100' N = 100 if N > len(self.dao.item): N = len(self.dao.item) elif self.ranking.contains('-threshold'): threshold = float(self.ranking['-threshold']) bThres = True else: print 'No correct evaluation metric is specified!' exit(-1) res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n' ) # predict recList = {} userN = {} userCount = len(self.dao.testSet_u) for i, user in enumerate(self.dao.testSet_u): itemSet = {} line = user + ':' predictedItems = self.predictForRanking(user) # predictedItems = denormalize(predictedItems, self.dao.rScale[-1], self.dao.rScale[0]) for id, rating in enumerate(predictedItems): # if not self.dao.rating(user, self.dao.id2item[id]): # prediction = self.checkRatingBoundary(prediction) # pred = self.checkRatingBoundary(prediction) ##################################### # add prediction in order to measure # if bThres: # if rating > threshold: # itemSet[self.dao.id2item[id]]= rating # else: itemSet[self.dao.id2item[id]] = rating ratedList, ratingList = self.dao.userRated(user) for item in ratedList: del itemSet[item] Nrecommendations = [] for item in itemSet: if len(Nrecommendations) < N: Nrecommendations.append((item, itemSet[item])) else: break Nrecommendations.sort(key=lambda d: d[1], reverse=True) recommendations = [item[1] for item in Nrecommendations] resNames = [item[0] for item in Nrecommendations] # itemSet = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True) # if bTopN: # find the K biggest scores for item in itemSet: ind = N l = 0 r = N - 1 if recommendations[r] < itemSet[item]: while True: mid = (l + r) / 2 if recommendations[mid] >= itemSet[item]: l = mid + 1 elif recommendations[mid] < itemSet[item]: r = mid - 1 else: ind = mid break if r < l: ind = r break # ind = bisect(recommendations, itemSet[item]) if ind < N - 1: recommendations[ind + 1] = itemSet[item] resNames[ind + 1] = item recList[user] = zip(resNames, recommendations) # elif bThres: # itemSet = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True) # recList[user] = itemSet[:] # userN[user] = len(itemSet) if i % 100 == 0: print self.algorName, self.foldInfo, 'progress:' + str( i) + '/' + str(userCount) for item in recList[user]: line += ' (' + item[0] + ',' + str(item[1]) + ')' if self.dao.testSet_u[user].has_key(item[0]): line += '*' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: fileName = '' outDir = self.output['-dir'] if self.ranking.contains('-topN'): fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' elif self.ranking.contains('-threshold'): fileName = self.config[ 'recommender'] + '@' + currentTime + '-threshold-' + str( threshold) + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print 'The result has been output to ', abspath(outDir), '.' # output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' if self.ranking.contains('-topN'): self.measure = Measure.rankingMeasure(self.dao.testSet_u, recList, N) # elif self.ranking.contains('-threshold'): # origin = self.dao.testSet_u.copy() # for user in origin: # temp = {} # for item in origin[user]: # if origin[user][item] >= threshold: # temp[item] = threshold # origin[user] = temp # self.measure = Measure.rankingMeasure_threshold(origin, recList, userN) FileIO.writeFile(outDir, fileName, self.measure) print 'The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure))
def evalRanking(self): res = [] # used to contain the text of the result if self.ranking.contains('-topN'): top = self.ranking['-topN'].split(',') top = [int(num) for num in top] N = int(top[-1]) if N > 100 or N < 0: print( 'N can not be larger than 100! It has been reassigned with 10' ) N = 10 if N > len(self.data.item): N = len(self.data.item) else: print('No correct evaluation metric is specified!') exit(-1) res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n' ) # predict recList = {} userN = {} userCount = len(self.data.testSet_u) #rawRes = {} for i, user in enumerate(self.data.testSet_u): itemSet = {} line = user + ':' predictedItems = self.predictForRanking(user) # predictedItems = denormalize(predictedItems, self.data.rScale[-1], self.data.rScale[0]) for id, rating in enumerate(predictedItems): # if not self.data.rating(user, self.data.id2item[id]): # prediction = self.checkRatingBoundary(prediction) # pred = self.checkRatingBoundary(prediction) ##################################### # add prediction in order to measure itemSet[self.data.id2item[id]] = rating ratedList, ratingList = self.data.userRated(user) for item in ratedList: del itemSet[item] Nrecommendations = [] for item in itemSet: if len(Nrecommendations) < N: Nrecommendations.append((item, itemSet[item])) else: break Nrecommendations.sort(key=lambda d: d[1], reverse=True) recommendations = [item[1] for item in Nrecommendations] resNames = [item[0] for item in Nrecommendations] # find the N biggest scores for item in itemSet: ind = N l = 0 r = N - 1 if recommendations[r] < itemSet[item]: while r >= l: mid = (r - l) / 2 + l if recommendations[mid] >= itemSet[item]: l = mid + 1 elif recommendations[mid] < itemSet[item]: r = mid - 1 if r < l: ind = r break #move the items backwards if ind < N - 2: recommendations[ind + 2:] = recommendations[ind + 1:-1] resNames[ind + 2:] = resNames[ind + 1:-1] if ind < N - 1: recommendations[ind + 1] = itemSet[item] resNames[ind + 1] = item recList[user] = zip(resNames, recommendations) if i % 100 == 0: print(self.algorName, self.foldInfo, 'progress:' + str(i) + '/' + str(userCount)) for item in recList[user]: line += ' (' + item[0] + ',' + str(item[1]) + ')' if self.data.testSet_u[user].has_key(item[0]): line += '*' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: fileName = '' outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print('The result has been output to ', abspath(outDir), '.') # output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.rankingMeasure(self.data.testSet_u, recList, top) FileIO.writeFile(outDir, fileName, self.measure) print('The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure)))
import sys sys.path.append("..") from tool.config import Config from tool.file import FileIO from algorithm.SoReg import SoReg if __name__ == '__main__': print'='*80 print'this is the algorithm of the test' algor = -1 conf = -1 conf = Config('../conf/SoReg.conf') trainset = [] testset = [] relation = [] ui={} vj={} user_item_avg={} trainset = FileIO.loadDataSet(conf,conf['ratings'],bTest=False) testset = FileIO.loadDataSet(conf,conf['testset'],bTest=True) relation = FileIO.loadRelationship(conf,conf['social']) ui,vj,user_item_avg=SoReg.buildmodel(trainset,relation) SoReg.pred(testset,ui,vj,user_item_avg) #sotre the data
def execute(self): #import the algorithm module try: importStr = 'from algorithm.rating.' + self.config[ 'recommender'] + ' import ' + self.config['recommender'] exec(importStr) except ImportError: importStr = 'from algorithm.ranking.' + self.config[ 'recommender'] + ' import ' + self.config['recommender'] exec(importStr) if self.evaluation.contains('-cv'): k = int(self.evaluation['-cv']) if k <= 1 or k > 10: k = 3 mkl.set_num_threads(max(1, mkl.get_max_threads() / k)) #create the manager manager = Manager() m = manager.dict() i = 1 tasks = [] binarized = False if self.evaluation.contains('-b'): binarized = True for train, test in DataSplit.crossValidation(self.trainingData, k, binarized=binarized): fold = '[' + str(i) + ']' if self.config.contains('social'): recommender = self.config[ 'recommender'] + "(self.config,train,test,self.relation,fold)" else: recommender = self.config[ 'recommender'] + "(self.config,train,test,fold)" #create the process p = Process(target=run, args=(m, eval(recommender), i)) tasks.append(p) i += 1 #start the processes for p in tasks: p.start() if not self.evaluation.contains('-p'): p.join() #wait until all processes are completed if self.evaluation.contains('-p'): for p in tasks: p.join() #compute the mean error of k-fold cross validation self.measure = [dict(m)[i] for i in range(1, k + 1)] res = [] for i in range(len(self.measure[0])): if self.measure[0][i][:3] == 'Top': res.append(self.measure[0][i]) continue measure = self.measure[0][i].split(':')[0] total = 0 for j in range(k): total += float(self.measure[j][i].split(':')[1]) res.append(measure + ':' + str(total / k) + '\n') #output result currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) outDir = LineConfig(self.config['output.setup'])['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-' + str( k) + '-fold-cv' + '.txt' FileIO.writeFile(outDir, fileName, res) print('The result of %d-fold cross validation:\n%s' % (k, ''.join(res))) else: if self.config.contains('social'): recommender = self.config[ 'recommender'] + '(self.config,self.trainingData,self.testData,self.relation)' else: recommender = self.config[ 'recommender'] + '(self.config,self.trainingData,self.testData)' eval(recommender).execute()
def evalRanking(self): res = [] # used to contain the text of the result N = 0 top = self.ranking['-topN'].split(',') top = [int(num) for num in top] N = max(top) if N > 100 or N < 0: print ('N can not be larger than 100! It has been reassigned with 10') N = 10 res.append('userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n') # predict recList = {} userCount = len(self.data.testSet) for i, user in enumerate(self.data.testSet): itemSet = {} line = user + ':' predictedItems = self.predict(user) for id, score in enumerate(predictedItems): itemSet[self.data.id2name[self.recType][id]] = score for item in self.data.userRecord[user]: try: del itemSet[item[self.recType]] except KeyError: pass Nrecommendations = [] for item in itemSet: if len(Nrecommendations) < N: Nrecommendations.append((item, itemSet[item])) else: break Nrecommendations.sort(key=lambda d: d[1], reverse=True) recommendations = [item[1] for item in Nrecommendations] resNames = [item[0] for item in Nrecommendations] # itemSet = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True) # if bTopN: # find the K biggest scores for item in itemSet: ind = N l = 0 r = N - 1 if recommendations[r] < itemSet[item]: while True: mid = (l + r) // 2 if recommendations[mid] >= itemSet[item]: l = mid + 1 elif recommendations[mid] < itemSet[item]: r = mid - 1 else: ind = mid break if r < l: ind = r break # ind = bisect(recommendations, itemSet[item]) if ind < N - 1: recommendations[ind + 1] = itemSet[item] resNames[ind + 1] = item recList[user] = resNames if i % 100 == 0: print (self.algorName, self.foldInfo, 'progress:' + str(i) + '/' + str(userCount)) for item in recList[user]: line += item if item in self.data.testSet[user]: line += '*' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: fileName = '' outDir = self.output['-dir'] if self.ranking.contains('-topN'): fileName = self.config['recommender'] + '@' + currentTime + '-top-' + self.ranking['-topN']\ + 'items' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print ('The result has been output to ', abspath(outDir), '.') # output evaluation result outDir = self.output['-dir'] fileName = self.config['recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.rankingMeasure(self.data.testSet, recList,top,self.data.getSize(self.recType)) FileIO.writeFile(outDir, fileName, self.measure) print ('The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure)))
def execute(self): #import the algorithm module importStr = 'from method.' + self.config['methodName'] + ' import ' + self.config['methodName'] exec (importStr) if self.evaluation.contains('-cv'): k = int(self.evaluation['-cv']) if k <= 1 or k > 10: k = 3 #create the manager used to communication in multiprocess manager = Manager() m = manager.dict() i = 1 tasks = [] for train,test in DataSplit.crossValidation(self.trainingData,k): fold = '['+str(i)+']' if self.config.contains('social'): method = self.config['methodName'] + "(self.config,train,test,self.labels,self.relation,fold)" else: method = self.config['methodName'] + "(self.config,train,test,self.labels,fold)" #create the process p = Process(target=run,args=(m,eval(method),i)) tasks.append(p) i+=1 #start the processes for p in tasks: p.start() #wait until all processes are completed for p in tasks: p.join() #compute the mean error of k-fold cross validation self.measure = [dict(m)[i] for i in range(1,k+1)] res = [] pattern = re.compile('(\d+\.\d+)') countPattern = re.compile('\d+\\n') labelPattern = re.compile('\s\d{1}[^\.|\n|\d]') labels = re.findall(labelPattern, self.measure[0]) values = np.array([0]*9,dtype=float) count = np.array([0,0,0],dtype=int) for report in self.measure: values += np.array(re.findall(pattern,report),dtype=float) count+=np.array(re.findall(countPattern,report),dtype=int) values/=k values=np.around(values,decimals=4) res.append(' precision recall f1-score support\n\n') res.append(' '+labels[0]+' '+' '.join(np.array(values[0:3],dtype=str).tolist())+' '+str(count[0])+'\n') res.append(' '+labels[1]+' '+' '.join(np.array(values[3:6],dtype=str).tolist())+' '+str(count[1])+'\n\n') res.append(' avg/total ' + ' '.join(np.array(values[6:9], dtype=str).tolist()) + ' ' + str(count[2]) + '\n') print 'Total:' print ''.join(res) # for line in lines[1:]: # # measure = self.measure[0][i].split(':')[0] # total = 0 # for j in range(k): # total += float(self.measure[j][i].split(':')[1]) # res.append(measure+':'+str(total/k)+'\n') #output result currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) outDir = LineConfig(self.config['output.setup'])['-dir'] fileName = self.config['methodName'] +'@'+currentTime+'-'+str(k)+'-fold-cv' + '.txt' FileIO.writeFile(outDir,fileName,res) print 'The results have been output to '+abspath(LineConfig(self.config['output.setup'])['-dir'])+'\n' else: if self.config.contains('social'): method = self.config['methodName'] + '(self.config,self.trainingData,self.testData,self.labels,self.relation)' else: method = self.config['methodName'] + '(self.config,self.trainingData,self.testData,self.labels)' eval(method).execute()
def execute(self): #import the algorithm module importStr = 'from method.' + self.config[ 'methodName'] + ' import ' + self.config['methodName'] exec(importStr) if self.evaluation.contains('-cv'): k = int(self.evaluation['-cv']) if k <= 1 or k > 10: k = 3 #create the manager used to communication in multiprocess manager = Manager() m = manager.dict() i = 1 tasks = [] for train, test in DataSplit.crossValidation(self.trainingData, k): fold = '[' + str(i) + ']' if self.config.contains('social'): method = self.config[ 'methodName'] + "(self.config,train,test,self.labels,self.relation,fold)" else: method = self.config[ 'methodName'] + "(self.config,train,test,self.labels,fold)" #create the process p = Process(target=run, args=(m, eval(method), i)) tasks.append(p) i += 1 #start the processes for p in tasks: p.start() #wait until all processes are completed for p in tasks: p.join() #compute the mean error of k-fold cross validation self.measure = [dict(m)[i] for i in range(1, k + 1)] res = [] pattern = re.compile('(\d+\.\d+)') countPattern = re.compile('\d+\\n') labelPattern = re.compile('\s\d{1}[^\.|\n|\d]') labels = re.findall(labelPattern, self.measure[0]) values = np.array([0] * 9, dtype=float) count = np.array([0, 0, 0], dtype=int) for report in self.measure: values += np.array(re.findall(pattern, report), dtype=float) count += np.array(re.findall(countPattern, report), dtype=int) values /= k values = np.around(values, decimals=4) res.append(' precision recall f1-score support\n\n') res.append(' ' + labels[0] + ' ' + ' '.join(np.array(values[0:3], dtype=str).tolist()) + ' ' + str(count[0]) + '\n') res.append(' ' + labels[1] + ' ' + ' '.join(np.array(values[3:6], dtype=str).tolist()) + ' ' + str(count[1]) + '\n\n') res.append(' avg/total ' + ' '.join(np.array(values[6:9], dtype=str).tolist()) + ' ' + str(count[2]) + '\n') print('Total:') print(''.join(res)) # for line in lines[1:]: # # measure = self.measure[0][i].split(':')[0] # total = 0 # for j in range(k): # total += float(self.measure[j][i].split(':')[1]) # res.append(measure+':'+str(total/k)+'\n') #output result currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) outDir = LineConfig(self.config['output.setup'])['-dir'] fileName = self.config[ 'methodName'] + '@' + currentTime + '-' + str( k) + '-fold-cv' + '.txt' FileIO.writeFile(outDir, fileName, res) print('The results have been output to ' + abspath(LineConfig(self.config['output.setup'])['-dir']) + '\n') else: if self.config.contains('social'): method = self.config[ 'methodName'] + '(self.config,self.trainingData,self.testData,self.labels,self.relation)' else: method = self.config[ 'methodName'] + '(self.config,self.trainingData,self.testData,self.labels)' result = eval(method).execute() return result
def evalRanking(self, write_to_file=True, use_now_time=False): res = [] # used to contain the text of the result if self.ranking.contains('-topN'): top = self.ranking['-topN'].split(',') top = [int(num) for num in top] N = max(top) if N > 100 or N < 0: print( 'N can not be larger than 100! It has been reassigned with 10' ) N = 10 if N > len(self.data.item): N = len(self.data.item) else: print('No correct evaluation metric is specified!') exit(-1) res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n' ) # predict recList = {} userN = {} testSample = self.testSample # # multiprocessing way # pool = Pool(12) # dataset = [] # for user, testSample_u in testSample.items(): # identified_user = self.map_from_true_to_identify.get(user, -1) # if identified_user == -1: # continue # dataset.append([user, identified_user, testSample_u]) # # result_generator = pool.imap_unordered(partial(self.get_recommendation, N=N), dataset) # for result in tqdm(result_generator, total=len(dataset), desc='Measuring [{}]'): # user, line, recList_user = result # recList[user] = recList_user # res.append(line) # pool.close() # pool.join() testSample_copy = testSample.copy() for i, user in tqdm(enumerate(testSample), total=len(testSample), desc='Measuring [{}]'.format(self.algorName)): identified_user = self.map_from_true_to_identify.get(user, -1) if identified_user == -1: del testSample_copy[user] continue user, line, recList_user = self.get_recommendation( (user, identified_user, testSample[user]), N) recList[user] = recList_user res.append(line) self.measure = Measure.rankingMeasure(testSample_copy, recList, top) try: self.measure.append("C:{}\n".format(self.C)) except: pass try: self.measure.append("L:{}\n".format(self.L)) except: pass try: self.measure.append("K:{}\n".format(self.K)) except: pass try: self.measure.append("N:{}\n".format(self.N)) except: pass if use_now_time: currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) else: currentTime = self.currentTime if write_to_file: # output prediction result if False and self.isOutput: fileName = '' outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) # output evaluation result outDir = self.output['-dir'] try: fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '_C{}'.format( self.C) + '.txt' except: fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, self.measure) # FileIO.writeFile(outDir, fileName, "C:{}".format(self.C)) print('The result has been output to ', abspath(outDir), '.') print('The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure)))