Exemplo n.º 1
0
def adsPreferred(user_topic_dic, weibo_all, topic_word_weight_dic, k=30):
    '''
    :param user_topic: 用户的topic偏好
    :param weibo: weibo/ad_weibo列表
    :param topic_word_weight_dic: 不同类别下word的TFIDF权重值
    :return: 返回用户喜欢的k个广告微博
    '''
    adsPreferList = []
    weiboMap = dict()
    ads_midsPrefered = dict()
    # 微博用户的个人信息
    uids = set()
    for weibo in weibo_all:
        weiboSource = weibo["_source"]
        uids.add(weiboSource["uid"])
        #  加上retweet和recomment的字段,适配非线上环境
        #  去掉RUN_TYPE限制,无论01都查找是否存在转发评论数
        for keytobeadded in ['retweeted', 'comment']:
            if keytobeadded not in weiboSource.keys():
                weiboSource[keytobeadded] = 0
        weiboMap[weibo["_source"]["mid"]] = weiboSource

    # 获取待选微博的用户信息
    weibo_user_profiles = search_user_profile_by_user_ids(uids)

    clf = adsClassify()
    ads_midWordsMap = clf.adsPredict(weibo_all)
    for (mid, words) in ads_midWordsMap.items():
        ads_topic = judge_ads_topic(words, topic_word_weight_dic)
        ads_midsPrefered[mid] = user_topic_dic[ads_topic]

    ads_midsPrefered = sorted(ads_midsPrefered.items(),
                              key=lambda ads: ads[1],
                              reverse=True)

    k = min(k, len(ads_midsPrefered))

    for midInfo in ads_midsPrefered[:k]:
        mid = midInfo[0]
        uid = weiboMap[mid]["uid"]
        weiboMap[mid]["weibo_url"] = weiboinfo2url(uid, mid)
        # 可能出现许多userprofile查不到的情况
        if uid in weibo_user_profiles:
            weiboMap[mid]["photo_url"] = weibo_user_profiles[uid]["photo_url"]
            weiboMap[mid]["nick_name"] = weibo_user_profiles[uid]["nick_name"]
        else:
            weiboMap[mid]["photo_url"] = "None"
            weiboMap[mid]["nick_name"] = "None"
        adsPreferList.append(weiboMap[midInfo[0]])
        # 输出相关度值和微博,测试用
        # print midInfo[1], weiboMap[mid]["text"]

    return adsPreferList
Exemplo n.º 2
0
def adsPreferred(user_topic_dic, weibo_all, topic_word_weight_dic, k=30):
    '''
    :param user_topic: 用户的topic偏好
    :param weibo: weibo/ad_weibo列表
    :param topic_word_weight_dic: 不同类别下word的TFIDF权重值
    :return: 返回用户喜欢的k个广告微博
    '''
    adsPreferList = []
    weiboMap = dict()
    ads_midsPrefered = dict()
    # 微博用户的个人信息
    uids = set()

    # 这里的微博已经是ads
    for weibo in weibo_all:
        weiboSource = weibo["_source"]
        uids.add(weiboSource["uid"])
        mid = weiboSource["mid"]
        words = weiboSource["ads_keywords"]
        ads_topic = judge_ads_topic(words, topic_word_weight_dic)
        weiboSource["ads_topic"] = ads_topic
        ads_midsPrefered[mid] = user_topic_dic[ads_topic]

        #  加上retweet和recomment的字段,适配非线上环境
        for keytobeadded in ['retweeted', 'comment']:
            if keytobeadded not in weiboSource.keys():
                weiboSource[keytobeadded] = 0
        weiboMap[mid] = weiboSource

    # 获取待选微博的用户信息
    weibo_user_profiles = search_user_profile_by_user_ids(uids)

    ads_midsPrefered = sorted(ads_midsPrefered.items(),
                              key=lambda ads: ads[1],
                              reverse=True)

    k = min(k, len(ads_midsPrefered))

    for midInfo in ads_midsPrefered[:k]:
        mid = midInfo[0]
        uid = weiboMap[mid]["uid"]
        weiboMap[mid]["weibo_url"] = weiboinfo2url(uid, mid)
        # 可能出现许多userprofile查不到的情况
        if uid in weibo_user_profiles:
            weiboMap[mid]["photo_url"] = weibo_user_profiles[uid]["photo_url"]
            weiboMap[mid]["nick_name"] = weibo_user_profiles[uid]["nick_name"]
        else:
            weiboMap[mid]["photo_url"] = "None"
            weiboMap[mid]["nick_name"] = "None"
        adsPreferList.append(weiboMap[midInfo[0]])

    return adsPreferList
Exemplo n.º 3
0
def localRec(uid, k=200):
    # 运行状态,
    # 0 ->  当前为2016-11-28 00:00:00
    # 1 ->  当前时间
    now_timestamp = datetime2ts(ts2datetime(time.time()))
    if RUN_TYPE == 0:
        now_timestamp = datetime2ts(RUN_TEST_TIME)

    flow_text_index_list = []
    for i in range(7, 0, -1):
        iter_date = ts2datetime(now_timestamp - DAY * i)
        flow_text_index_list.append(flow_text_index_name_pre + iter_date)

    # 获取用户地理位置
    # user_geos = get_user_geo(uid)
    # # 根据位置查询weibo
    # weibo_all = es_flow_text.search(index=flow_text_index_list, doc_type=ads_weibo_index_type,
    #                                 body={"query":{"bool":{"must":
    #                                                                 [{"match":{"keywords_string":"新闻"}},
    #                                                                  {"match":{"geo":"合肥"}}
    #                                                                  ]}},
    #                                            "size": 200
    #                                       })["hits"]["hits"]
    '''可以直接查询长度大于100的但是很慢
    {"query":{"filtered":{"query":{"bool":{"must":[{"match":{"keywords_string":"新闻"}},{"match":{"geo":"合肥"}}]}},"filter":{"regexp":{"text":{"value":".{100,}"}}}}}}
    '''
    ip = get_user_ip(uid)
    ip = ".".join(ip.split(".")[:-2])
    weibo_all = es_flow_text.search(index=flow_text_index_list,
                                    doc_type=ads_weibo_index_type,
                                    body={
                                        "query": {
                                            "bool": {
                                                "must": [{
                                                    "prefix": {
                                                        "text.ip": ip
                                                    }
                                                }]
                                            }
                                        },
                                        "size": 2000
                                    })["hits"]["hits"]

    local_weibo_rec = []
    weibo_user_uids = [weibo["_source"]["uid"] for weibo in weibo_all]
    user_profiles = search_user_profile_by_user_ids(weibo_user_uids)
    exists_ip = set()
    for weibo in weibo_all:
        weibo = weibo["_source"]
        weibo_text = weibo["text"]
        if weibo["ip"] in exists_ip:
            continue
        # 一个ip只选一个
        exists_ip.add(weibo["ip"])
        if not is_suit(weibo_text):
            continue
        weibo["len"] = len(weibo_text)
        try:
            mid = weibo["mid"]
            uid = weibo["uid"]
        except:
            continue
        weibo["weibo_url"] = weiboinfo2url(uid, mid)
        # 可能出现许多userprofile查不到的情况
        if uid in user_profiles:
            weibo["photo_url"] = user_profiles[uid]["photo_url"]
            weibo["nick_name"] = user_profiles[uid]["nick_name"]
        else:
            weibo["photo_url"] = "None"
            weibo["nick_name"] = "None"
            local_weibo_rec.append(weibo)
    return local_weibo_rec