Exemplo n.º 1
0
class Display(object):
    def __init__(self, conf):
        self.conf = conf
        if not conf.contains('ratings') and not conf.contains('social'):
            print 'The config file is not in the correct format!'
            exit(-1)
        if conf.contains('ratings'):
            ratingData = FileIO.loadDataSet(conf, conf['ratings'])
            self.dao = RatingDAO(conf, ratingData)
        if conf.contains('social'):
            relationData = FileIO.loadRelationship(conf, conf['social'])
            self.sao = SocialDAO(conf, relationData)

    def draw(self):
        print 'draw chart...'
        #rating
        if self.conf.contains('ratings'):
            y = [triple[2] for triple in self.dao.trainingData]
            x = self.dao.rScale
            if len(x) < 20:
                Chart.hist(x, y, len(self.dao.rScale), '#058edc',
                           'Rating Histogram', 'Rating Scale', 'Count',
                           '../visual/visualization/images/rh')
            y = [len(self.dao.userRated(u)[0]) for u in self.dao.user]
            Chart.distribution(y, 'Rating Count Distribution', '',
                               'Rated items count per user',
                               '../visual/visualization/images/rcu')
            y = [len(self.dao.itemRated(i)[0]) for i in self.dao.item]
            Chart.distribution(y, 'Rating Count Distribution', '',
                               'user Rated count per item',
                               '../visual/visualization/images/rci')

        #social
        if self.conf.contains('social'):
            x = [len(self.sao.getFollowers(u)) for u in self.sao.user]
            y = [len(self.sao.getFollowees(u)) for u in self.sao.user]
            Chart.scatter(x, y, 'red', 'Follower&Followee', 'Follower count',
                          'Followee count',
                          '../visual/visualization/images/ff')
            y = [len(self.sao.getFollowers(u)) for u in self.sao.user]
            Chart.distribution(y, 'Followers Distribution', '',
                               'Followers count per user',
                               '../visual/visualization/images/fd1')
            y = [len(self.sao.getFollowees(u)) for u in self.sao.user]
            Chart.distribution(y, 'Followees Distribution', '',
                               'Followees count per user',
                               '../visual/visualization/images/fd2')

    def render(self):
        self.draw()
        html ="<html><head><title>Data Analysis</title>\n" \
              "<link rel='stylesheet' type='text/css' href='reportStyle.css'/></head>\n" \
              "<body><div class='reportTitle'><div class='in'>Data Analysis</div></div>\n" \
              "<div class='main'><div class='area1'>\n" \
              "<div class='title'><h3>Data Files</h3></div><div class='text'>"
        if self.conf.contains('ratings'):
            html += "<b>Rating Data</b>: {rating}".format(
                rating=abspath(self.conf['ratings']))
        if self.conf.contains('social'):
            html += "<br><b>Social Data</b>: {social}".format(
                social=abspath(self.conf['social']))
        html+="</div></div><div style='padding-top:20px'><center>" \
              "<img src='images/header2.png'/></center></div>\n"
        if self.conf.contains('ratings'):
            html += "<div class='area1'><div class='title'><h3>Rating Data</h3></div>\n"
            html += "<div class='text'><b>Rating Scale</b>: {scale}</br>".format(
                scale=' '.join([str(item) for item in self.dao.rScale]))
            html += "<b>User Count</b>: {user}<br><b>Item Count</b>: {item}<br><b>Record Count</b>: {record}<br><b>Global Mean</b>: {mean}</div>\n"\
                .format(user = str(len(self.dao.user)),item=str(len(self.dao.item)),record = str(len(self.dao.trainingData)),
                        mean = str(round(denormalize(self.dao.globalMean,self.dao.rScale[-1],self.dao.rScale[0]),3)))
            html += "<center><div class='img'><img src='images/rh.png' width='640px' height='480px'/></div></center>\n"
            html += "<center><div class='img'><img src='images/rcu.png' width='640px' height='480px'/></div></center>\n"
            html += "<center><div class='img'><img src='images/rci.png' width='640px' height='480px'/></div></center>\n"
            html += "</div><div style='padding-top:20px'><center>" \
              "<img src='images/header2.png'/></center></div>\n"
        if self.conf.contains('social'):
            html += "<div class='area1'><div class='title'><h3>Social Data</h3></div>\n"
            html += "<div class='text'><b>User Count</b>: {user}<br><b>Relation Count</b>: {relation}<br></div>\n" \
                .format(user=str(len(self.sao.user)), relation=str(len(self.sao.relation)))
            html += "<center><div class='img'><img src='images/ff.png' width='640px' height='480px'/></div></center>\n"
            html += "<center><div class='img'><img src='images/fd1.png' width='640px' height='480px'/></div></center>\n"
            html += "<center><div class='img'><img src='images/fd2.png' width='640px' height='480px'/></div></center>\n"
            html += "</div><div style='padding-top:20px'><center>" \
                    "<img src='images/header2.png'/></center></div>\n"

        html += "</div></body></html>"
        FileIO.writeFile('../visual/visualization/', 'analysis.html', html)
        print 'The report has been output to', abspath(
            '../visual/visualization/analysis.html')
        webbrowser.open(abspath('../visual/visualization/analysis.html'),
                        new=0,
                        autoraise=True)
Exemplo n.º 2
0
class Recommender(object):
    def __init__(self, conf, trainingSet, testSet, fold='[1]'):
        self.config = conf
        self.data = None
        self.isSaveModel = False
        self.ranking = None
        self.isLoadModel = False
        self.output = None
        self.isOutput = True
        self.data = RatingDAO(self.config, trainingSet, testSet)
        self.foldInfo = fold
        self.evalSettings = LineConfig(self.config['evaluation.setup'])
        self.measure = []
        self.record = []
        if self.evalSettings.contains('-cold'):
            #evaluation on cold-start users
            threshold = int(self.evalSettings['-cold'])
            removedUser = {}
            for user in self.data.testSet_u:
                if self.data.trainSet_u.has_key(user) and len(
                        self.data.trainSet_u[user]) > threshold:
                    removedUser[user] = 1

            for user in removedUser:
                del self.data.testSet_u[user]

            testData = []
            for item in self.data.testData:
                if not removedUser.has_key(item[0]):
                    testData.append(item)
            self.data.testData = testData

        self.num_users, self.num_items, self.train_size = self.data.trainingSize(
        )

    def readConfiguration(self):
        self.algorName = self.config['recommender']
        self.output = LineConfig(self.config['output.setup'])
        self.isOutput = self.output.isMainOn()
        self.ranking = LineConfig(self.config['item.ranking'])

    def printAlgorConfig(self):
        "show algorithm's configuration"
        print('Algorithm:', self.config['recommender'])
        print('Ratings dataset:', abspath(self.config['ratings']))
        if LineConfig(self.config['evaluation.setup']).contains('-testSet'):
            print(
                'Test set:',
                abspath(
                    LineConfig(self.config['evaluation.setup']).getOption(
                        '-testSet')))
        #print 'Count of the users in training set: ',len()
        print(
            'Training set size: (user count: %d, item count %d, record count: %d)'
            % (self.data.trainingSize()))
        print(
            'Test set size: (user count: %d, item count %d, record count: %d)'
            % (self.data.testSize()))
        print('=' * 80)

    def initModel(self):
        pass

    def buildModel(self):
        'build the model (for model-based algorithms )'
        pass

    def buildModel_tf(self):
        'training model on tensorflow'
        pass

    def saveModel(self):
        pass

    def loadModel(self):
        pass

    def predict(self, u, i):
        pass

    def predictForRanking(self, u):
        pass

    def checkRatingBoundary(self, prediction):
        if prediction > self.data.rScale[-1]:
            return self.data.rScale[-1]
        elif prediction < self.data.rScale[0]:
            return self.data.rScale[0]
        else:
            return round(prediction, 3)

    def evalRatings(self):
        res = []  #used to contain the text of the result
        res.append('userId  itemId  original  prediction\n')
        #predict
        for ind, entry in enumerate(self.data.testData):
            user, item, rating = entry

            #predict
            prediction = self.predict(user, item)
            #denormalize
            #prediction = denormalize(prediction,self.data.rScale[-1],self.data.rScale[0])
            #####################################
            pred = self.checkRatingBoundary(prediction)
            # add prediction in order to measure
            self.data.testData[ind].append(pred)
            res.append(user + ' ' + item + ' ' + str(rating) + ' ' +
                       str(pred) + '\n')
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        #output prediction result
        if self.isOutput:
            outDir = self.output['-dir']
            fileName = self.config[
                'recommender'] + '@' + currentTime + '-rating-predictions' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print('The result has been output to ', abspath(outDir), '.')
        #output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        self.measure = Measure.ratingMeasure(self.data.testData)
        FileIO.writeFile(outDir, fileName, self.measure)
        print('The result of %s %s:\n%s' %
              (self.algorName, self.foldInfo, ''.join(self.measure)))

    def evalRanking(self):
        res = []  # used to contain the text of the result

        if self.ranking.contains('-topN'):
            top = self.ranking['-topN'].split(',')
            top = [int(num) for num in top]
            N = int(top[-1])
            if N > 100 or N < 0:
                print(
                    'N can not be larger than 100! It has been reassigned with 10'
                )
                N = 10
            if N > len(self.data.item):
                N = len(self.data.item)
        else:
            print('No correct evaluation metric is specified!')
            exit(-1)

        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n'
        )
        # predict
        recList = {}
        userN = {}
        userCount = len(self.data.testSet_u)
        #rawRes = {}
        for i, user in enumerate(self.data.testSet_u):
            itemSet = {}
            line = user + ':'
            predictedItems = self.predictForRanking(user)
            # predictedItems = denormalize(predictedItems, self.data.rScale[-1], self.data.rScale[0])
            for id, rating in enumerate(predictedItems):
                # if not self.data.rating(user, self.data.id2item[id]):
                # prediction = self.checkRatingBoundary(prediction)
                # pred = self.checkRatingBoundary(prediction)
                #####################################
                # add prediction in order to measure

                itemSet[self.data.id2item[id]] = rating

            ratedList, ratingList = self.data.userRated(user)
            for item in ratedList:
                del itemSet[item]

            Nrecommendations = []
            for item in itemSet:
                if len(Nrecommendations) < N:
                    Nrecommendations.append((item, itemSet[item]))
                else:
                    break

            Nrecommendations.sort(key=lambda d: d[1], reverse=True)
            recommendations = [item[1] for item in Nrecommendations]
            resNames = [item[0] for item in Nrecommendations]

            # find the N biggest scores
            for item in itemSet:
                ind = N
                l = 0
                r = N - 1

                if recommendations[r] < itemSet[item]:
                    while r >= l:
                        mid = (r - l) / 2 + l
                        if recommendations[mid] >= itemSet[item]:
                            l = mid + 1
                        elif recommendations[mid] < itemSet[item]:
                            r = mid - 1

                        if r < l:
                            ind = r
                            break
                #move the items backwards
                if ind < N - 2:
                    recommendations[ind + 2:] = recommendations[ind + 1:-1]
                    resNames[ind + 2:] = resNames[ind + 1:-1]
                if ind < N - 1:
                    recommendations[ind + 1] = itemSet[item]
                    resNames[ind + 1] = item

            recList[user] = zip(resNames, recommendations)

            if i % 100 == 0:
                print(self.algorName, self.foldInfo,
                      'progress:' + str(i) + '/' + str(userCount))
            for item in recList[user]:
                line += ' (' + item[0] + ',' + str(item[1]) + ')'
                if self.data.testSet_u[user].has_key(item[0]):
                    line += '*'

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            fileName = ''
            outDir = self.output['-dir']
            fileName = self.config[
                'recommender'] + '@' + currentTime + '-top-' + str(
                    N) + 'items' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print('The result has been output to ', abspath(outDir), '.')
        # output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        self.measure = Measure.rankingMeasure(self.data.testSet_u, recList,
                                              top)
        FileIO.writeFile(outDir, fileName, self.measure)
        print('The result of %s %s:\n%s' %
              (self.algorName, self.foldInfo, ''.join(self.measure)))

    def execute(self):
        self.readConfiguration()
        if self.foldInfo == '[1]':
            self.printAlgorConfig()
        #load model from disk or build model
        if self.isLoadModel:
            print('Loading model %s...' % (self.foldInfo))
            self.loadModel()
        else:
            print('Initializing model %s...' % (self.foldInfo))
            self.initModel()
            print('Building Model %s...' % (self.foldInfo))
            try:
                import tensorflow
                if self.evalSettings.contains('-tf'):
                    self.buildModel_tf()
                else:
                    self.buildModel()
            except ImportError:
                self.buildModel()

        #preict the ratings or item ranking
        print('Predicting %s...' % (self.foldInfo))
        if self.ranking.isMainOn():
            self.evalRanking()
        else:
            self.evalRatings()

        #save model
        if self.isSaveModel:
            print('Saving model %s...' % (self.foldInfo))
            self.saveModel()
        # with open(self.foldInfo+'measure.txt','w') as f:
        #     f.writelines(self.record)
        return self.measure
Exemplo n.º 3
0
class Recommender(object):
    def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'):
        self.config = conf
        self.dao = None
        self.isSaveModel = False
        self.ranking = None
        self.isLoadModel = False
        self.output = None
        self.isOutput = True
        self.dao = RatingDAO(self.config, trainingSet, testSet)
        self.foldInfo = fold
        self.measure = []

    def readConfiguration(self):
        self.algorName = self.config['recommender']
        self.output = LineConfig(self.config['output.setup'])
        self.isOutput = self.output.isMainOn()
        self.ranking = LineConfig(self.config['item.ranking'])

    def printAlgorConfig(self):
        "show algorithm's configuration"
        print 'Algorithm:', self.config['recommender']
        print 'Ratings dataset:', abspath(self.config['ratings'])
        if LineConfig(self.config['evaluation.setup']).contains('-testSet'):
            print 'Test set:', abspath(
                LineConfig(
                    self.config['evaluation.setup']).getOption('-testSet'))
        #print 'Count of the users in training set: ',len()
        print 'Training set size: (user count: %d, item count %d, record count: %d)' % (
            self.dao.trainingSize())
        print 'Test set size: (user count: %d, item count %d, record count: %d)' % (
            self.dao.testSize())
        print '=' * 80

    def initModel(self):
        pass

    def buildModel(self):
        'build the model (for model-based algorithms )'
        pass

    def saveModel(self):
        pass

    def loadModel(self):
        pass

    def predict(self, u, i):
        pass

    def predictForRanking(self, u):
        pass

    def checkRatingBoundary(self, prediction):
        if prediction > self.dao.rScale[-1]:
            return self.dao.rScale[-1]
        elif prediction < self.dao.rScale[0]:
            return self.dao.rScale[0]
        else:
            return round(prediction, 3)

    def evalRatings(self):
        res = []  #used to contain the text of the result
        res.append('userId  itemId  original  prediction\n')
        #predict
        for ind, entry in enumerate(self.dao.testData):
            user, item, rating = entry

            #predict
            prediction = self.predict(user, item)
            #denormalize
            prediction = denormalize(prediction, self.dao.rScale[-1],
                                     self.dao.rScale[0])
            #####################################
            pred = self.checkRatingBoundary(prediction)
            # add prediction in order to measure
            self.dao.testData[ind].append(pred)
            res.append(user + ' ' + item + ' ' + str(rating) + ' ' +
                       str(pred) + '\n')
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        #output prediction result
        if self.isOutput:
            outDir = self.output['-dir']
            fileName = self.config[
                'recommender'] + '@' + currentTime + '-rating-predictions' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print 'The Result has been output to ', abspath(outDir), '.'
        #output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        self.measure = Measure.ratingMeasure(self.dao.testData)
        FileIO.writeFile(outDir, fileName, self.measure)

    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = 0
        threshold = 0
        bThres = False
        bTopN = False
        if self.ranking.contains('-topN'):
            bTopN = True
            N = int(self.ranking['-topN'])
            if N > 100 or N < 0:
                print 'N can not be larger than 100! It has been reassigned with 100'
                N = 100
        elif self.ranking.contains('-threshold'):
            threshold = float(self.ranking['-threshold'])
            bThres = True
        else:
            print 'No correct evaluation metric is specified!'
            exit(-1)

        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n'
        )
        # predict
        recList = {}
        userN = {}
        userCount = len(self.dao.testSet_u)
        for i, user in enumerate(self.dao.testSet_u):
            itemSet = {}
            line = user + ':'

            for item in self.dao.item:
                # predict
                prediction = self.predict(user, item)
                # denormalize

                prediction = denormalize(prediction, self.dao.rScale[-1],
                                         self.dao.rScale[0])

                #prediction = self.checkRatingBoundary(prediction)
                #pred = self.checkRatingBoundary(prediction)
                #####################################
                # add prediction in order to measure
                if bThres:
                    if prediction > threshold:
                        itemSet[item] = prediction
                else:
                    itemSet[item] = prediction

            ratedList, ratingList = self.dao.userRated(user)
            for item in ratedList:
                del itemSet[self.dao.id2item[item]]
            itemSet = sorted(itemSet.iteritems(),
                             key=lambda d: d[1],
                             reverse=True)
            if self.ranking.contains('-topN'):
                recList[user] = itemSet[0:N]
            elif self.ranking.contains('-threshold'):
                recList[user] = itemSet[:]
                userN[user] = len(itemSet)

            if i % 100 == 0:
                print self.algorName, self.foldInfo, 'progress:' + str(
                    i) + '/' + str(userCount)
            for item in recList[user]:
                line += ' (' + item[0] + ',' + str(item[1]) + ')'
                if self.dao.testSet_u[user].has_key(item[0]):
                    line += '*'

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            fileName = ''
            outDir = self.output['-dir']
            if self.ranking.contains('-topN'):
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-top-' + str(
                        N) + 'items' + self.foldInfo + '.txt'
            elif self.ranking.contains('-threshold'):
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-threshold-' + str(
                        threshold) + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print 'The Result has been output to ', abspath(outDir), '.'
        #output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        if self.ranking.contains('-topN'):
            self.measure = Measure.rankingMeasure(self.dao.testSet_u, recList,
                                                  N)
        elif self.ranking.contains('-threshold'):
            origin = self.dao.testSet_u.copy()
            for user in origin:
                temp = {}
                for item in origin[user]:
                    if origin[user][item] >= threshold:
                        temp[item] = threshold
                origin[user] = temp
            self.measure = Measure.rankingMeasure_threshold(
                origin, recList, userN)
        FileIO.writeFile(outDir, fileName, self.measure)

    def execute(self):
        self.readConfiguration()
        if self.foldInfo == '[1]':
            self.printAlgorConfig()
        #load model from disk or build model
        if self.isLoadModel:
            print 'Loading model %s...' % (self.foldInfo)
            self.loadModel()
        else:
            print 'Initializing model %s...' % (self.foldInfo)
            self.initModel()
            print 'Building Model %s...' % (self.foldInfo)
            self.buildModel()

        #preict the ratings or item ranking
        print 'Predicting %s...' % (self.foldInfo)
        if self.ranking.isMainOn():
            self.evalRanking()
        else:
            self.evalRatings()

        #save model
        if self.isSaveModel:
            print 'Saving model %s...' % (self.foldInfo)
            self.saveModel()

        return self.measure

    def performance(self):
        #res = []  # used to contain the text of the result
        #res.append('userId  itemId  original  prediction\n')
        # predict
        res = []
        for ind, entry in enumerate(self.dao.testData):
            user, item, rating = entry

            # predict
            prediction = self.predict(user, item)
            # denormalize
            prediction = denormalize(prediction, self.dao.rScale[-1],
                                     self.dao.rScale[0])
            #####################################
            pred = self.checkRatingBoundary(prediction)
            # add prediction in order to measure
            res.append([user, item, rating, pred])
            #res.append(user + ' ' + item + ' ' + str(rating) + ' ' + str(pred) + '\n')
        #currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        # if self.isOutput:
        #     outDir = self.output['-dir']
        #     fileName = self.config['recommender'] + '@' + currentTime + '-rating-predictions' + self.foldInfo + '.txt'
        #     FileIO.writeFile(outDir, fileName, res)
        #     print 'The Result has been output to ', abspath(outDir), '.'
        # output evaluation result
        # outDir = self.output['-dir']
        # fileName = self.config['recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        self.measure = Measure.ratingMeasure(res)
        return self.measure