Пример #1
0
def recommendByCF(userId):
    '''recommend by trained result df predicted_user_df
    userId:
    output:
        a list of newsId
    '''
    try:
        user_rated_newsId = ratings_df.filter(col('userId') == userId).select(
            col('newsId')).rdd.flatMap(lambda x: x).collect()
        print user_rated_newsId
        if len(user_rated_newsId) == 0:
            return False
        predicted_not_rated_df = predicted_user_df.filter(
            ~predicted_user_df.newsId.isin(user_rated_newsId))
        predicted_not_rated_df.cache()
        predicted_highest_not_rated_df = predicted_not_rated_df.sort(
            predicted_not_rated_df.prediction, ascending=False)
        predicted_highest_not_rated_df.show(20, truncate=False)
        predicted_highest_not_rated_df.cache()
        recommendNewsList = predicted_highest_not_rated_df.select(
            col('newsId')).rdd.flatMap(lambda x: x).take(1000)
        Log().write("Recommend to user %d by CF success.Recommend %d news" %
                    (userId, len(recommendNewsList)))
        return recommendNewsList
    except Exception as e:
        Log().write(
            "Something wrong when recommend to user(%d) by CF.Err info : %s" %
            (userId, e), "ERROR")
        return False
Пример #2
0
 def sendByPost(self, dataList, url='127.0.0.1', port=8887):
     '''send str(data) 
     input:
     @data:result of GetNews.getNewFromFile
     '''
     #When it tries to proxy "127.0.0.01/", the proxy gives up and returns a 504 error.
     #so need to setup an empty proxy
     proxy_build = urllib2.ProxyHandler({})
     opener = urllib2.build_opener(proxy_build)
     urllib2.install_opener(opener)
     i = 0
     requrl = 'http://' + url + ':' + str(port) + '/sendactions'
     Log().write("Begin send news to server")
     for d in dataList:
         req = urllib2.Request(requrl)
         req.add_header('Content-Type', 'application/json')
         urllib2.urlopen(req, json.dumps(d))
         i += 1
         if i % 10 == 0:
             Log().write('Already send %d actions to server' % i)
     Log().write('send %d actions to server' % i)
Пример #3
0
def recommendByTag(userId, userTagList=[u'娱乐', u'新闻']):
    '''recommend by the news which is in the tag that user readed and which user not readed
    if a user did not read a new that already pushed . the action rating is 0
    output:
        a dict with key is tag and values are list of news id.
        ex:{'fun':[1,2,3] , 'phone':[5,6,7]}
    '''
    try:
        user_readed_newsId = ratings_df.filter(col('userId') == userId).filter(
            col('rating') > 0).select(
                col('newsId')).rdd.flatMap(lambda x: x).collect()
        if len(user_readed_newsId) > 0:
            user_not_readed_df = sorted_news_df.filter(
                ~sorted_news_df.id.isin(user_readed_newsId))
        else:
            user_not_readed_df = sorted_news_df
        user_not_readed_df.cache()
        if len(userTagList) == 0:
            return False
        returnDict = {}
        i = 0
        for tag in userTagList:
            recommendListByTag = user_not_readed_df.filter(
                col('tag').like(
                    u'%%%s%%' %
                    tag)).select('id').rdd.flatMap(lambda x: x).take(500)
            returnDict.setdefault(tag, recommendListByTag)
            i += len(recommendListByTag)
        Log().write(
            "Recommend to user %d by tag %s success. Recommend %d news" %
            (userId, userTagList, i))

        return returnDict
    except Exception as e:
        Log().write(
            "Something wrong when recommend to user(%d) by tag(%s).Err info : %s"
            % (userId, str(userTagList), e), "ERROR")
        return False
Пример #4
0
 def append(self, onelineData, fileName=fileName):
     '''append data to file
     input:
         @onelineData: json like txt .ex:'{"a": 1, "b": 2}'
     '''
     f = open(fileName, 'a')
     data = json.dumps(onelineData)
     if isinstance(onelineData, dict):
         if 'title' in onelineData.keys():
             Log().write(u"Server get and save new : %s" %
                         onelineData['title'])
     f.write(data)
     f.write('\n')
     f.close()
Пример #5
0
 def getNewFromFile(self, folder, source=u'凤凰新闻'):
     '''red news from folder .the file name in folder is new's titile.
     the content in file is news.
     input:
     @foder : the folder content news file.
     output:
     @result : a dict . 
              ex :[{title:***,tag:****,publishTime:***,from:***,\
              content:**** , 'pv':*** , id:*****},*****]
              tag: ex 'joy,military'
     '''
     debug = False
     result = []
     fileList = os.listdir(folder)
     i = 0
     for new_file in fileList:
         one_new = {}
         new_name = new_file.split('.')[0]
         one_new.setdefault('title', new_name)
         full_file = folder + '/' + new_file
         f = open(full_file, 'r')
         content = ''
         for line in f.readlines():
             content += line.strip()
         codetype = chardet.detect(content)['encoding']
         if codetype == None:
             continue
         content = content.decode(codetype, 'ignore')
         one_new.setdefault('content', content)
         tag = random.sample(tagList, 2)
         tag_s = ''
         for t in tag:
             tag_s += t + ','
         one_new.setdefault('tag', tag_s[:-1])
         one_new.setdefault('from', source)
         one_new.setdefault('pv', random.randint(10, 10000))
         one_new.setdefault('publishTime', random.sample(timeList, 1)[0])
         one_new.setdefault('id', i)
         f.close()
         result.append(one_new)
         Debug().debug(one_new['content'], isdebug=debug)
         Debug().debug(one_new['title'], isdebug=debug)
         i += 1
         if i % 10 == 0:
             Log().write('Finish read %d news' % i)
     return result
Пример #6
0
 def append(self,
            onelineData,
            fileName=fileName,
            structureList=structureList):
     '''append data to file
     input:
         @onelineData: json like txt .ex:'{"userId": 1, "newsId": 2,"rating":10 }'
     '''
     f = open(fileName, 'a')
     data = json.dumps(onelineData)
     if isinstance(onelineData, dict):
         if 'userId' in onelineData.keys():
             Log().write(
                 u'user %s read new %s' %
                 (str(onelineData['userId']), str(onelineData['newsId'])))
     f.write(data)
     f.write('\n')
     f.close()
Пример #7
0
                               labelCol='rating',
                               metricName='rmse')
ranks = [4, 8, 12]
errors = []
models = []
err = 0
min_err = float('inf')
best_rank = -1
for rank in ranks:
    als.setRank(rank)
    model = als.fit(training_df)
    predict_df = model.transform(validation_df)
    predict_ratings_df = predict_df.filter(
        predict_df.prediction != float('nan'))
    error = reg_eval.evaluate(predict_ratings_df)
    Log().write("When training als model , for %d rank the rmse is %f" %
                (rank, error))
    if error < min_err:
        min_err = error
        best_rank = err
        err += 1
    errors.append(error)
    models.append(model)
als.setRank(ranks[best_rank])
Log().write("The best model was traind with rank %d" % ranks[best_rank])

all_rating_model = als.fit(ratings_df)
predict_df = all_rating_model.transform(ratings_df)
predicted_user_df = predict_df.filter(col('prediction') != float('nan'))
predicted_user_df.cache()

Пример #8
0
        f.write('\n')
        f.close()


class SendNewsHandler(tornado.web.RequestHandler):
    def post(self):
        data = tornado.escape.json_decode(self.request.body)
        SaveData2File().append(data)

    def get(self):
        '''get url
        http://127.0.0.1:8888?name=hp
        '''
        self.set_cookie('username', 'peng', expires=time.time() + 900)
        nowamagic = self.get_argument('name')
        print nowamagic


def make_app():
    return tornado.web.Application([
        (r"/sendnews", SendNewsHandler),
    ])


if __name__ == "__main__":
    app = make_app()
    port = 8888
    app.listen(port)
    Log().write('Begin to listen port %d' % port)
    tornado.ioloop.IOLoop.current().start()
Пример #9
0
        f.write(data)
        f.write('\n')
        f.close()


class SendActionsHandler(tornado.web.RequestHandler):
    def post(self):
        data = tornado.escape.json_decode(self.request.body)
        SaveData2File().append(data)

    def get(self):
        '''get url
        http://127.0.0.1:8888?name=hp
        '''
        self.set_cookie('username', 'peng', expires=time.time() + 900)
        nowamagic = self.get_argument('name')
        print nowamagic


def make_app():
    return tornado.web.Application([
        (r"/sendactions", SendActionsHandler),
    ])


if __name__ == "__main__":
    app = make_app()
    port = 8887
    app.listen(port)
    Log().write('Save action server Begin to listen port %d' % port)
    tornado.ioloop.IOLoop.current().start()