예제 #1
0
def executeES(indexName, typeName, listData):
    current_time = int(time.time())
    # indexName += '_' + ts2datetime(current_time)

    # print 'listData:',listData
    for list_data in listData:

        data = {}
        jsonData = json.loads(list_data)
        for key, val in jsonData.items():
            # print key, '====', val
            data[key] = val
            data['update_time'] = current_time
        print 'indexName', indexName
        print indexName == 'weibo_feedback_follow'
        #print indexName
        if indexName != 'weibo_feedback_group':
            #print data
            # xnr_user_no = uid2xnr_user_no(data["root_uid"])
            if uid2xnr_user_no(data["uid"]):
                xnr_user_no = uid2xnr_user_no(data["uid"])
            else:
                xnr_user_no = uid2xnr_user_no(data["root_uid"])
            #if not xnr_user_no:
            #    continue
            #else:
            #    pass
            #print data["root_uid"]
            #print data['uid']
            try:
                sensor_mark = judge_sensing_sensor(xnr_user_no, data['uid'])
                data['sensor_mark'] = sensor_mark

                trace_follow_mark = judge_trace_follow(xnr_user_no,
                                                       data['uid'])
                data['trace_follow_mark'] = trace_follow_mark

                data['sensitive_info'] = get_sensitive_info(
                    data['timestamp'], data['mid'])
                data['sensitive_user'] = get_sensitive_user(
                    data['timestamp'], data['uid'])
            except:
                pass

        # else:
        #     print 'group index else'
        #     _id = data["mid"]
            """
            # 旧的关注关系存储方式,弃用。@hanmc 2019-1-16 11:49:50
            print 'indexName:', indexName
            if indexName == 'weibo_feedback_follow':
                # 修改 _id、保存至fans_followers_es表
                print "root_uid", data["root_uid"]
                _id = data["root_uid"]+'_'+data["mid"]
                
                save_type = 'followers'
                follow_type = 'follow'
                try:
                    xnr_user_no = uid2xnr_user_no(data["root_uid"])
                    if xnr_user_no:
                        save_to_fans_follow_ES(xnr_user_no,data["uid"],save_type,follow_type)
                        save_to_redis_fans_follow(xnr_user_no,data["uid"],save_type)
                except Exception, e:
                    traceback.print_exc(e)

                    # sensor_mark = judge_sensing_sensor(xnr_user_no,data['uid'])
                    # data['sensor_mark'] = sensor_mark

                    # trace_follow_mark = judge_trace_follow(xnr_user_no,data['uid'])
                    # data['trace_follow_mark'] = trace_follow_mark
                #print 1111111111111111111111111111111111111111111111111111111
                print 'save to es!!!!',es.index(index=indexName, doc_type=typeName, id=_id, body=data)
            """

            # 新的关注关系存储方式
            if indexName == 'weibo_feedback_follow':
                root_uid = data['root_uid']
                uid = data['uid']
                xnr_user_no = uid2xnr_user_no(root_uid)

                sex_info = data['sex']
                if sex_info == 'male':
                    sex = 1
                elif sex_info == 'female':
                    sex = 2
                else:
                    sex = 0

                user_data = {
                    'platform': 'weibo',
                    'xnr_no': xnr_user_no,
                    'xnr_uid': root_uid,
                    'uid': uid,
                    'nickname': data.get('nick_name', ''),
                    'sex': sex,
                    'geo': data.get('geo', ''),
                    'fensi_num': data.get('fans', 0),
                    'guanzhu_num': data.get('follower', 0),
                    'photo_url': data.get('photo_url', ''),
                    'pingtaiguanzhu': 1,
                }
                update_result = update_weibo_xnr_relations(
                    root_uid, uid, user_data, update_portrait_info=True)

            # 新的粉丝关系存储方式
            elif indexName == 'weibo_feedback_fans':
                root_uid = data['root_uid']
                uid = data['uid']
                xnr_user_no = uid2xnr_user_no(root_uid)

                sex_info = data['sex']
                if sex_info == 'male':
                    sex = 1
                elif sex_info == 'female':
                    sex = 2
                else:
                    sex = 0

                user_data = {
                    'platform': 'weibo',
                    'xnr_no': xnr_user_no,
                    'xnr_uid': root_uid,
                    'uid': uid,
                    'nickname': data.get('nick_name', ''),
                    'sex': sex,
                    'geo': data.get('geo', ''),
                    'fensi_num': data.get('fans', 0),
                    'guanzhu_num': data.get('follower', 0),
                    'photo_url': data.get('photo_url', ''),
                    'pingtaifensi': 1,
                }
                update_result = update_weibo_xnr_relations(
                    root_uid, uid, user_data, update_portrait_info=True)
                """
                # 旧的关注关系存储方式,弃用。@hanmc 2019-1-17 11:47:08
                elif indexName == 'weibo_feedback_fans':
                    _id = data["root_uid"]+'_'+data["mid"]
                    xnr_user_no = uid2xnr_user_no(data["root_uid"])
                    save_type = 'fans'
                    follow_type = 'follow'
    
                    if xnr_user_no:
                        save_to_fans_follow_ES(xnr_user_no,data["uid"],save_type,follow_type)
                        save_to_redis_fans_follow(xnr_user_no,data["uid"],save_type)
    
                        # sensor_mark = judge_sensing_sensor(xnr_user_no,data['uid'])
                        # data['sensor_mark'] = sensor_mark
    
                        # trace_follow_mark = judge_trace_follow(xnr_user_no,data['uid'])
                        # data['trace_follow_mark'] = trace_follow_mark
                    try:
                        print 1111111
                        es.get(index=indexName,doc_type=typeName,id=_id)
                    except:
                        print 'save to es!!!!',es.index(index=indexName, doc_type=typeName, id=_id, body=data)
                """

            # print 'indexName', indexName
            # print indexName == 'weibo_feedback_comment'
            elif indexName == 'weibo_feedback_comment':
                print '+++++++++++++++++++++++++++++++++++++++++++++++'
                indexName_date = indexName + '_' + ts2datetime(
                    data['timestamp'])
                date_time = ts2datetime(data['timestamp'])
                # print 'date!!!!!!!',date_time
                # print 'indexName_date:::',indexName_date
                mappings_func = weibo_feedback_comment_mappings
                _id = data["mid"]
                # print "_id", _id
                # print 'comment_id........',_id
                mappings_func(date_time)
                #print 'data:::',data
                #print indexName_date, typeName
                print 'indexName_date', indexName_date
                print 'typeName', typeName
                print 'save to es!!!!', es.index(index=indexName_date,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

            elif indexName == 'weibo_feedback_retweet':
                indexName += '_' + ts2datetime(data['timestamp'])
                indexName_date = indexName + '_' + ts2datetime(
                    data['timestamp'])

                date_time = ts2datetime(data['timestamp'])

                mappings_func = weibo_feedback_retweet_mappings
                _id = data["mid"]
                mappings_func(date_time)
                #print json.dumps(data, ensure_ascii=False)
                print 'save to es!!!!', es.index(index=indexName_date,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

            elif indexName == 'weibo_feedback_at':
                # indexName += '_' + ts2datetime(data['timestamp'])
                indexName_date = indexName + '_' + ts2datetime(
                    data['timestamp'])

                date_time = ts2datetime(data['timestamp'])

                mappings_func = weibo_feedback_at_mappings
                _id = data["mid"]
                mappings_func(date_time)
                print 'intex: ', indexName_date
                print 'doc_type: ', typeName
                print 'id: ', _id

                print 'save to es!!!!', es.index(index=indexName_date,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

            elif indexName == 'weibo_feedback_like':
                # indexName += '_' + ts2datetime(data['timestamp'])
                indexName_date = indexName + '_' + ts2datetime(
                    data['timestamp'])

                date_time = ts2datetime(data['timestamp'])

                mappings_func = weibo_feedback_like_mappings
                _id = data["mid"]
                mappings_func(date_time)
                print 'save to es!!!!', es.index(index=indexName_date,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

            elif indexName == 'weibo_feedback_private':
                # indexName += '_' + ts2datetime(data['timestamp'])
                indexName_date = indexName + '_' + ts2datetime(
                    data['timestamp'])

                date_time = ts2datetime(data['timestamp'])
                mappings_func = weibo_feedback_private_mappings
                _id = data["mid"]
                mappings_func(date_time)
                print 'save to es!!!!', es.index(index=indexName_date,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

        else:

            _id = data["mid"]
            print 'save to es!!!!', es.index(index=indexName,
                                             doc_type=typeName,
                                             id=_id,
                                             body=data)

        # print 'data.........',data
        # print 'indexName....',indexName
        # print '_id......',_id
        # #print 'typeName.....',typeName
        # print 'es...',es

        #print 'save to es!!!!',es.index(index=indexName, doc_type=typeName, id=_id, body=data)

    print 'update %s ES done' % indexName
예제 #2
0
def executeES(indexName, typeName, listData):
    #current_time = int(time.time())
    #indexName += '_' + ts2datetime(current_time)

    #print 'listData:',listData
    for list_data in listData:

        data = {}
        jsonData = json.loads(list_data)
        for key, val in jsonData.items():
            # print key, '====', val
            data[key] = val
            # data['update_time'] = current_time

        if indexName != 'weibo_feedback_group':

            xnr_user_no = uid2xnr_user_no(data["root_uid"])

            sensor_mark = judge_sensing_sensor(xnr_user_no, data['uid'])
            data['sensor_mark'] = sensor_mark

            trace_follow_mark = judge_trace_follow(xnr_user_no, data['uid'])
            data['trace_follow_mark'] = trace_follow_mark

            data['sensitive_info'] = get_sensitive_info(
                data['timestamp'], data['mid'])
            data['sensitive_user'] = get_sensitive_user(
                data['timestamp'], data['uid'])

            if indexName == 'weibo_feedback_follow':
                # 修改 _id、保存至fans_followers_es表
                _id = data["root_uid"] + '_' + data["mid"]
                xnr_user_no = uid2xnr_user_no(data["root_uid"])

                save_type = 'followers'
                follow_type = 'follow'

                if xnr_user_no:
                    save_to_fans_follow_ES(xnr_user_no, data["uid"], save_type,
                                           follow_type)
                    save_to_redis_fans_follow(xnr_user_no, data["uid"],
                                              save_type)

                    # sensor_mark = judge_sensing_sensor(xnr_user_no,data['uid'])
                    # data['sensor_mark'] = sensor_mark

                    # trace_follow_mark = judge_trace_follow(xnr_user_no,data['uid'])
                    # data['trace_follow_mark'] = trace_follow_mark
                print 'save to es!!!!', es.index(index=indexName,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

            elif indexName == 'weibo_feedback_fans':
                _id = data["root_uid"] + '_' + data["mid"]
                xnr_user_no = uid2xnr_user_no(data["root_uid"])
                save_type = 'fans'
                follow_type = 'follow'

                if xnr_user_no:
                    save_to_fans_follow_ES(xnr_user_no, data["uid"], save_type,
                                           follow_type)
                    save_to_redis_fans_follow(xnr_user_no, data["uid"],
                                              save_type)

                    # sensor_mark = judge_sensing_sensor(xnr_user_no,data['uid'])
                    # data['sensor_mark'] = sensor_mark

                    # trace_follow_mark = judge_trace_follow(xnr_user_no,data['uid'])
                    # data['trace_follow_mark'] = trace_follow_mark
                try:
                    es.get(index=indexName, doc_type=typeName, id=_id)
                except:
                    print 'save to es!!!!', es.index(index=indexName,
                                                     doc_type=typeName,
                                                     id=_id,
                                                     body=data)

            elif indexName == 'weibo_feedback_comment':
                indexName_date = indexName + '_' + ts2datetime(
                    data['timestamp'])
                date_time = ts2datetime(data['timestamp'])
                # print 'date!!!!!!!',date_time
                # print 'indexName_date:::',indexName_date
                mappings_func = weibo_feedback_comment_mappings
                _id = data["mid"]
                # print 'comment_id........',_id
                mappings_func(date_time)
                # print 'data:::',data
                print 'save to es!!!!', es.index(index=indexName_date,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

            elif indexName == 'weibo_feedback_retweet':
                # indexName += '_' + ts2datetime(data['timestamp'])
                indexName_date = indexName + '_' + ts2datetime(
                    data['timestamp'])

                date_time = ts2datetime(data['timestamp'])

                mappings_func = weibo_feedback_retweet_mappings
                _id = data["mid"]
                mappings_func(date_time)
                print 'save to es!!!!', es.index(index=indexName_date,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

            elif indexName == 'weibo_feedback_at':
                # indexName += '_' + ts2datetime(data['timestamp'])
                indexName_date = indexName + '_' + ts2datetime(
                    data['timestamp'])

                date_time = ts2datetime(data['timestamp'])

                mappings_func = weibo_feedback_at_mappings
                _id = data["mid"]
                mappings_func(date_time)
                print 'save to es!!!!', es.index(index=indexName_date,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

            elif indexName == 'weibo_feedback_like':
                # indexName += '_' + ts2datetime(data['timestamp'])
                indexName_date = indexName + '_' + ts2datetime(
                    data['timestamp'])

                date_time = ts2datetime(data['timestamp'])

                mappings_func = weibo_feedback_like_mappings
                _id = data["mid"]
                mappings_func(date_time)
                print 'save to es!!!!', es.index(index=indexName_date,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

            elif indexName == 'weibo_feedback_private':
                # indexName += '_' + ts2datetime(data['timestamp'])
                indexName_date = indexName + '_' + ts2datetime(
                    data['timestamp'])

                date_time = ts2datetime(data['timestamp'])
                mappings_func = weibo_feedback_private_mappings
                _id = data["mid"]
                mappings_func(date_time)
                print 'save to es!!!!', es.index(index=indexName_date,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

        else:

            _id = data["mid"]
            print 'save to es!!!!', es.index(index=indexName,
                                             doc_type=typeName,
                                             id=_id,
                                             body=data)

        # print 'data.........',data
        # print 'indexName....',indexName
        # print '_id......',_id
        # #print 'typeName.....',typeName
        # print 'es...',es

        # print 'save to es!!!!',es.index(index=indexName, doc_type=typeName, id=_id, body=data)

    print 'update %s ES done' % indexName
예제 #3
0
def get_related_recommendation(task_detail):
    
    avg_sort_uid_dict = {}

    xnr_user_no = task_detail['xnr_user_no']
    sort_item = task_detail['sort_item']
    es_result = es.get(index=weibo_xnr_index_name,doc_type=weibo_xnr_index_type,id=xnr_user_no)['_source']
    uid = es_result['uid']

    monitor_keywords = es_result['monitor_keywords']
    
    monitor_keywords_list = monitor_keywords.split(',')

    nest_query_list = []
    #print 'monitor_keywords_list::',monitor_keywords_list
    for monitor_keyword in monitor_keywords_list:
        #print 'monitor_keyword::::',monitor_keyword
        nest_query_list.append({'wildcard':{'keywords_string':'*'+monitor_keyword+'*'}})
    
    # else:
    try:
        recommend_list = es.get(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,id=xnr_user_no)['_source']['followers_list']
    except:
        recommend_list = []

    recommend_set_list = list(set(recommend_list))

    if S_TYPE == 'test':
        current_date = S_DATE
    else:
        current_date = ts2datetime(int(time.time()-24*3600))
    
    flow_text_index_name = flow_text_index_name_pre + current_date

    if sort_item != 'friend':

        uid_list = []
        #uid_list = recommend_set_list
        if sort_item == 'influence':
            sort_item = 'user_fansnum'
        query_body_rec = {
            'query':{
                
                'bool':{
                    'should':nest_query_list
                }
            },
            'aggs':{
                'uid_list':{
                    'terms':{'field':'uid','size':TOP_ACTIVE_SOCIAL,'order':{'avg_sort':'desc'} },
                    'aggs':{'avg_sort':{'avg':{'field':sort_item}}}

                }
            }
        }

        es_rec_result = es_flow_text.search(index=flow_text_index_name,doc_type='text',body=query_body_rec)['aggregations']['uid_list']['buckets']
        #print 'es_rec_result///',es_rec_result
        for item in es_rec_result:
            uid = item['key']
            uid_list.append(uid)
            
            avg_sort_uid_dict[uid] = {}

            if sort_item == 'user_fansnum':
                avg_sort_uid_dict[uid]['sort_item_value'] = int(item['avg_sort']['value'])
            else:
                avg_sort_uid_dict[uid]['sort_item_value'] = round(item['avg_sort']['value'],2)

    else:
        if S_TYPE == 'test':
            uid_list = FRIEND_LIST
            #sort_item = 'sensitive'
        else:
            uid_list = []
            '''
            friends_list_results = es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':recommend_set_list})['docs']
            for result in friends_list_results:
                friends_list = friends_list + result['friend_list']
            '''
            friends_list = get_friends_list(recommend_set_list)

            friends_set_list = list(set(friends_list))

            #uid_list = friends_set_list

            sort_item_new = 'fansnum'

            query_body_rec = {
                'query':{
                    'bool':{
                        'must':[
                            {'terms':{'uid':friends_set_list}},
                            {'bool':{
                                'should':nest_query_list
                            }}
                        ]
                    }
                },
                'aggs':{
                    'uid_list':{
                        'terms':{'field':'uid','size':TOP_ACTIVE_SOCIAL,'order':{'avg_sort':'desc'} },
                        'aggs':{'avg_sort':{'avg':{'field':sort_item_new}}}

                    }
                }
            }
            es_friend_result = es_flow_text.search(index=flow_text_index_name,doc_type='text',body=query_body_rec)['aggregations']['uid_list']['buckets']
            
            for item in es_friend_result:
                uid = item['key']
                uid_list.append(uid)
                
                avg_sort_uid_dict[uid] = {}
                
                if not item['avg_sort']['value']:
                    avg_sort_uid_dict[uid]['sort_item_value'] = 0
                else:
                    avg_sort_uid_dict[uid]['sort_item_value'] = int(item['avg_sort']['value'])
                
    results_all = []

    for uid in uid_list:
        #if sort_item == 'friend':
        query_body = {
            'query':{
                'filtered':{
                    'filter':{
                        'term':{'uid':uid}
                    }
                }
            }
        }

        es_results = es_user_portrait.search(index=portrait_index_name,doc_type=portrait_index_type,body=query_body)['hits']['hits']

    
       
        if es_results:
            #print 'portrait--',es_results[0]['_source'].keys()
            for item in es_results:
                uid = item['_source']['uid']
                #nick_name,photo_url = uid2nick_name_photo(uid)
                item['_source']['nick_name'] = uid #nick_name
                item['_source']['photo_url'] = ''#photo_url
                weibo_type = judge_follow_type(xnr_user_no,uid)
                sensor_mark = judge_sensing_sensor(xnr_user_no,uid)

                item['_source']['weibo_type'] = weibo_type
                item['_source']['sensor_mark'] = sensor_mark
                try:
                    del item['_source']['group']
                    del item['_source']['activity_geo_dict']
                except:
                    pass


                if sort_item == 'friend':
                    if S_TYPE == 'test':
                        item['_source']['fansnum'] = item['_source']['fansnum']
                    else:
                        item['_source']['fansnum'] = avg_sort_uid_dict[uid]['sort_item_value']
                elif sort_item == 'sensitive':
                    item['_source']['sensitive'] = avg_sort_uid_dict[uid]['sort_item_value']
                    item['_source']['fansnum'] = item['_source']['fansnum']
                else:
                    item['_source']['fansnum'] = avg_sort_uid_dict[uid]['sort_item_value']

                if S_TYPE == 'test':
                    current_time = datetime2ts(S_DATE)
                else:
                    current_time = int(time.time())

                index_name = get_flow_text_index_list(current_time)

                query_body = {
                    'query':{
                        'bool':{
                            'must':[
                                {'term':{'uid':uid}},
                                {'terms':{'message_type':[1,3]}}
                            ]
                        }
                    },
                    'sort':{'retweeted':{'order':'desc'}},
                    'size':5
                }

                es_weibo_results = es_flow_text.search(index=index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits']

                weibo_list = []
                for weibo in es_weibo_results:
                    weibo = weibo['_source']
                    weibo_list.append(weibo)
                item['_source']['weibo_list'] = weibo_list
                item['_source']['portrait_status'] = True
                results_all.append(item['_source'])
        else:
            item_else = dict()
            item_else['uid'] = uid
            #nick_name,photo_url = uid2nick_name_photo(uid)
            item_else['nick_name'] = uid#nick_name
            item_else['photo_url'] = ''#photo_url
            weibo_type = judge_follow_type(xnr_user_no,uid)
            sensor_mark = judge_sensing_sensor(xnr_user_no,uid)
            item_else['weibo_type'] = weibo_type
            item_else['sensor_mark'] = sensor_mark
            item_else['portrait_status'] = False
            #if sort_item != 'friend':
            #item_else['sort_item_value'] = avg_sort_uid_dict[uid]['sort_item_value']
            # else:
            #     item_else['sort_item_value'] = ''
            

            if S_TYPE == 'test':
                current_time = datetime2ts(S_DATE)
            else:
                current_time = int(time.time())

            index_name = get_flow_text_index_list(current_time)

            query_body = {
                'query':{
                    'term':{'uid':uid}
                },
                'sort':{'retweeted':{'order':'desc'}}
            }

            es_weibo_results = es_flow_text.search(index=index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits']

            weibo_list = []
            for weibo in es_weibo_results:
                item_else['fansnum'] = weibo['_source']['user_fansnum']
                weibo = weibo['_source']
                weibo_list.append(weibo)
            item_else['weibo_list'] = weibo_list
            item_else['friendsnum'] = 0
            item_else['statusnum'] = 0
            if sort_item == 'sensitive':
                item_else['sensitive'] = avg_sort_uid_dict[uid]['sort_item_value']
            else:
                item_else['fansnum'] = avg_sort_uid_dict[uid]['sort_item_value']

            results_all.append(item_else)
            
    
    return results_all