def evalRatings(self): res = [] #used to contain the text of the result res.append('userId itemId original prediction\n') #predict for userId in self.dao.testSet_u: for ind,item in enumerate(self.dao.testSet_u[userId]): itemId = item[0] originRating = item[1] #predict prediction = self.predict(userId,itemId) #denormalize prediction = denormalize(prediction,self.dao.rScale[-1],self.dao.rScale[0]) ##################################### pred = self.checkRatingBoundary(prediction) # add prediction in order to measure self.dao.testSet_u[userId][ind].append(pred) res.append(userId+' '+itemId+' '+str(originRating)+' '+str(pred)+'\n') currentTime = strftime("%Y-%m-%d %H-%M-%S",localtime(time())) #output prediction result if self.isOutput: outDir = self.output['-dir'] fileName = self.config['recommender']+'@'+currentTime+'-rating-predictions'+self.foldInfo+'.txt' FileIO.writeFile(outDir,fileName,res) print 'The Result has been output to ',abspath(outDir),'.' #output evaluation result outDir = self.output['-dir'] fileName = self.config['recommender'] + '@'+currentTime +'-measure'+ self.foldInfo + '.txt' measure = Measure.ratingMeasure(self.dao.testSet_u) FileIO.writeFile(outDir, fileName, measure)
def performance(self): #res = [] # used to contain the text of the result #res.append('userId itemId original prediction\n') # predict res = [] for ind, entry in enumerate(self.dao.testData): user, item, rating = entry # predict prediction = self.predict(user, item) # denormalize prediction = denormalize(prediction, self.dao.rScale[-1], self.dao.rScale[0]) ##################################### pred = self.checkRatingBoundary(prediction) # add prediction in order to measure res.append([user, item, rating, pred]) #res.append(user + ' ' + item + ' ' + str(rating) + ' ' + str(pred) + '\n') #currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result # if self.isOutput: # outDir = self.output['-dir'] # fileName = self.config['recommender'] + '@' + currentTime + '-rating-predictions' + self.foldInfo + '.txt' # FileIO.writeFile(outDir, fileName, res) # print 'The Result has been output to ', abspath(outDir), '.' # output evaluation result # outDir = self.output['-dir'] # fileName = self.config['recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.ratingMeasure(res) return self.measure
def evalRatings(self): res = [] #used to contain the text of the result res.append('userId itemId original prediction\n') #predict for ind,entry in enumerate(self.dao.testData): user,item,rating = entry #predict prediction = self.predict(user,item) #denormalize prediction = denormalize(prediction,self.dao.rScale[-1],self.dao.rScale[0]) ##################################### pred = self.checkRatingBoundary(prediction) # add prediction in order to measure self.dao.testData[ind].append(pred) res.append(user+' '+item+' '+str(rating)+' '+str(pred)+'\n') currentTime = strftime("%Y-%m-%d %H-%M-%S",localtime(time())) #output prediction result if self.isOutput: outDir = self.output['-dir'] fileName = self.config['recommender']+'@'+currentTime+'-rating-predictions'+self.foldInfo+'.txt' FileIO.writeFile(outDir,fileName,res) print 'The result has been output to ',abspath(outDir),'.' #output evaluation result outDir = self.output['-dir'] fileName = self.config['recommender'] + '@'+currentTime +'-measure'+ self.foldInfo + '.txt' self.measure = Measure.ratingMeasure(self.dao.testData) FileIO.writeFile(outDir, fileName, self.measure) print 'The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure))
def buildModel(self): print 'run the MF_ALS algorithm' print 'training...' iteration = 0 while iteration < self.maxIter: self.loss = 0 I = np.ones(len(self.dao.item)) for user in self.dao.user: C_u = np.ones(len(self.dao.item)) P_u = np.zeros(len(self.dao.item)) uid = self.dao.user[user] for item in self.dao.trainSet_u[user]: iid = self.dao.getItemId(item) r_ui = denormalize(self.dao.trainSet_u[user][item], self.dao.rScale[-1], self.dao.rScale[0]) C_u[iid] += log(1 + r_ui) P_u[iid] = 1 error = (P_u[iid] - self.P[uid].dot(self.Q[iid])) self.loss += C_u[iid] * error**2 Temp = (self.Q.T.dot(self.Q) + (self.Q.T * (C_u - I)).dot(self.Q) + self.regU * np.eye(self.k))**-1 self.P[uid] = (Temp.dot(self.Q.T) * C_u).dot(P_u) I = np.ones(len(self.dao.user)) for item in self.dao.item: C_i = np.ones(len(self.dao.user)) P_i = np.zeros(len(self.dao.user)) iid = self.dao.item[item] for user in self.dao.trainSet_i[item]: uid = self.dao.getUserId(user) r_ui = denormalize(self.dao.trainSet_i[item][user], self.dao.rScale[-1], self.dao.rScale[0]) C_i[uid] += log(r_ui + 1) P_i[uid] = 1 Temp = (self.P.T.dot(self.P) + (self.P.T * (C_i - I)).dot(self.P) + self.regU * np.eye(self.k))**-1 self.Q[iid] = (Temp.dot(self.P.T) * C_i).dot(P_i) # self.loss += (self.P * self.P).sum() + (self.Q * self.Q).sum() iteration += 1 if self.isConverged(iteration): break
def evalRanking(self): res = [] # used to contain the text of the result N = int(self.ranking['-topN']) if N > 100 or N < 0: N = 100 res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item is matched\n' ) # predict topNSet = {} userCount = len(self.dao.testSet_u) for i, user in enumerate(self.dao.testSet_u): itemSet = [] line = user + ':' for item in self.dao.item: if not self.dao.rating(user, item): # predict prediction = self.predict(user, item) # denormalize prediction = denormalize(prediction, self.dao.rScale[-1], self.dao.rScale[0]) prediction = round(prediction, 4) #pred = self.checkRatingBoundary(prediction) ##################################### # add prediction in order to measure itemSet.append((item, prediction)) itemSet.sort(key=lambda d: d[1], reverse=True) topNSet[user] = itemSet[0:N] if i % 100 == 0: print self.algorName, self.foldInfo, 'progress:' + str( i) + '/' + str(userCount) for item in topNSet[user]: line += ' (' + item[0] + ',' + str(item[1]) + ')' if self.dao.testSet_u[user].has_key(item[0]): line += '*' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print 'The Result has been output to ', abspath(outDir), '.' #output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.rankingMeasure(self.dao.testSet_u, topNSet, N) FileIO.writeFile(outDir, fileName, self.measure)
def rating_performance(self): res = [] for ind, entry in enumerate(self.data.testData): user, item, rating = entry # predict prediction = self.predict(user, item) # denormalize prediction = denormalize(prediction, self.data.rScale[-1], self.data.rScale[0]) ##################################### pred = self.checkRatingBoundary(prediction) # add prediction in order to measure res.append([user,item,rating,pred]) self.measure = Measure.ratingMeasure(res) return self.measure
def render(self): self.draw() html ="<html><head><title>Data Analysis</title>\n" \ "<link rel='stylesheet' type='text/css' href='reportStyle.css'/></head>\n" \ "<body><div class='reportTitle'><div class='in'>Data Analysis</div></div>\n" \ "<div class='main'><div class='area1'>\n" \ "<div class='title'><h3>Data Files</h3></div><div class='text'>" if self.conf.contains('ratings'): html += "<b>Rating Data</b>: {rating}".format( rating=abspath(self.conf['ratings'])) if self.conf.contains('social'): html += "<br><b>Social Data</b>: {social}".format( social=abspath(self.conf['social'])) html+="</div></div><div style='padding-top:20px'><center>" \ "<img src='images/header2.png'/></center></div>\n" if self.conf.contains('ratings'): html += "<div class='area1'><div class='title'><h3>Rating Data</h3></div>\n" html += "<div class='text'><b>Rating Scale</b>: {scale}</br>".format( scale=' '.join([str(item) for item in self.dao.rScale])) html += "<b>User Count</b>: {user}<br><b>Item Count</b>: {item}<br><b>Record Count</b>: {record}<br><b>Global Mean</b>: {mean}</div>\n"\ .format(user = str(len(self.dao.user)),item=str(len(self.dao.item)),record = str(len(self.dao.trainingData)), mean = str(round(denormalize(self.dao.globalMean,self.dao.rScale[-1],self.dao.rScale[0]),3))) html += "<center><div class='img'><img src='images/rh.png' width='640px' height='480px'/></div></center>\n" html += "<center><div class='img'><img src='images/rcu.png' width='640px' height='480px'/></div></center>\n" html += "<center><div class='img'><img src='images/rci.png' width='640px' height='480px'/></div></center>\n" html += "</div><div style='padding-top:20px'><center>" \ "<img src='images/header2.png'/></center></div>\n" if self.conf.contains('social'): html += "<div class='area1'><div class='title'><h3>Social Data</h3></div>\n" html += "<div class='text'><b>User Count</b>: {user}<br><b>Relation Count</b>: {relation}<br></div>\n" \ .format(user=str(len(self.sao.user)), relation=str(len(self.sao.relation))) html += "<center><div class='img'><img src='images/ff.png' width='640px' height='480px'/></div></center>\n" html += "<center><div class='img'><img src='images/fd1.png' width='640px' height='480px'/></div></center>\n" html += "<center><div class='img'><img src='images/fd2.png' width='640px' height='480px'/></div></center>\n" html += "</div><div style='padding-top:20px'><center>" \ "<img src='images/header2.png'/></center></div>\n" html += "</div></body></html>" FileIO.writeFile('../visual/visualization/', 'analysis.html', html) print 'The report has been output to', abspath( '../visual/visualization/analysis.html') webbrowser.open(abspath('../visual/visualization/analysis.html'), new=0, autoraise=True)
def evalRanking(self): res = [] # used to contain the text of the result N = 0 threshold = 0 bThres = False bTopN = False if self.ranking.contains('-topN'): bTopN = True N = int(self.ranking['-topN']) if N > 100 or N < 0: print 'N can not be larger than 100! It has been reassigned with 100' N = 100 elif self.ranking.contains('-threshold'): threshold = float(self.ranking['-threshold']) bThres = True else: print 'No correct evaluation metric is specified!' exit(-1) res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n' ) # predict recList = {} userN = {} userCount = len(self.dao.testSet_u) for i, user in enumerate(self.dao.testSet_u): itemSet = {} line = user + ':' for item in self.dao.item: # predict prediction = self.predict(user, item) # denormalize prediction = denormalize(prediction, self.dao.rScale[-1], self.dao.rScale[0]) #prediction = self.checkRatingBoundary(prediction) #pred = self.checkRatingBoundary(prediction) ##################################### # add prediction in order to measure if bThres: if prediction > threshold: itemSet[item] = prediction else: itemSet[item] = prediction ratedList, ratingList = self.dao.userRated(user) for item in ratedList: del itemSet[self.dao.id2item[item]] itemSet = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True) if self.ranking.contains('-topN'): recList[user] = itemSet[0:N] elif self.ranking.contains('-threshold'): recList[user] = itemSet[:] userN[user] = len(itemSet) if i % 100 == 0: print self.algorName, self.foldInfo, 'progress:' + str( i) + '/' + str(userCount) for item in recList[user]: line += ' (' + item[0] + ',' + str(item[1]) + ')' if self.dao.testSet_u[user].has_key(item[0]): line += '*' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: fileName = '' outDir = self.output['-dir'] if self.ranking.contains('-topN'): fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' elif self.ranking.contains('-threshold'): fileName = self.config[ 'recommender'] + '@' + currentTime + '-threshold-' + str( threshold) + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print 'The Result has been output to ', abspath(outDir), '.' #output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' if self.ranking.contains('-topN'): self.measure = Measure.rankingMeasure(self.dao.testSet_u, recList, N) elif self.ranking.contains('-threshold'): origin = self.dao.testSet_u.copy() for user in origin: temp = {} for item in origin[user]: if origin[user][item] >= threshold: temp[item] = threshold origin[user] = temp self.measure = Measure.rankingMeasure_threshold( origin, recList, userN) FileIO.writeFile(outDir, fileName, self.measure)