Пример #1
0
def search_user_type(uid_list):
    type_list = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, \
                body={'ids': uid_list},_source=False, fields=['id', 'verified_type'])['docs']
    user_list1 = []
    org_list1 = []
    for i in type_list:
        if i['found'] == False:
            user_list1.append(i['_id'])
        else:
            if not i['fields'].has_key('verified_type'):
                user_list1.append(i['_id'])
                continue
            verified_type = i['fields']['verified_type'][0]
            if int(verified_type) in org_list:
                org_list1.append(i['_id'])
            else:
                user_list1.append(i['_id'])
    return user_list1, org_list1
Пример #2
0
def show_weibo_list(message_type,ts,sort_item):
    query_body = {
        "query": {
            "bool":{
                "must":[
                    {"term":{"type": int(message_type)}},
                    {"term": {"detect_ts": int(ts)}}
                ]
            }
        },
        "size": 100,
        "sort":{sort_item:{"order": "desc"}}
    }

    text_results = []
    uid_list = []
    text_keys = ["text", "retweeted","keywords_string","mid", "comment", "user_fansnum", "timestamp", "geo", "uid"]
    es_results = es_prediction.search(index="social_sensing_text",doc_type="text", body=query_body)["hits"]["hits"]
    if not es_results:
        return []

    for item in es_results:
        item = item["_source"]
        tmp = dict()
        for key in text_keys:
            tmp[key] = item[key]
        text_results.append(tmp)
        uid_list.append(item["uid"])

    profile_results = es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type, body={"ids":uid_list})["docs"]
    for i in range(len(uid_list)):
        tmp_profile = profile_results[i]
        if tmp_profile["found"]:
            tmp = dict()
            tmp["photo_url"] = tmp_profile["_source"]["photo_url"]
            tmp["nick_name"] = tmp_profile["_source"]["nick_name"]
            if not tmp["nick_name"]:
                tmp["nick_name"] = uid_list[i]
            text_results[i].update(tmp)

    return text_results
Пример #3
0
def get_final_submit_user_info(uid_list):
    final_results = []
    try:
        profile_results = es_user_profile.mget(index=profile_index_name,
                                               doc_type=profile_index_type,
                                               body={'ids': uid_list})['docs']
    except:
        profile_results = []
    try:
        bci_history_results = es_bci_history.mget(
            index=bci_history_index_name,
            doc_type=bci_history_index_type,
            body={'ids': uid_list})['docs']
    except:
        bci_history_results = []
    #get bci_history max value
    now_time_ts = time.time()
    search_date_ts = datetime2ts(ts2datetime(now_time_ts - DAY))
    bci_key = 'bci_' + str(search_date_ts)
    query_body = {
        'query': {
            'match_all': {}
        },
        'sort': [{
            bci_key: {
                'order': 'desc'
            }
        }],
        'size': 1
    }
    #try:
    bci_max_result = es_bci_history.search(index=bci_history_index_name,
                                           doc_type=bci_history_index_type,
                                           body=query_body,
                                           _source=False,
                                           fields=[bci_key])['hits']['hits']
    #except:
    #    bci_max_result = {}
    if bci_max_result:
        bci_max_value = bci_max_result[0]['fields'][bci_key][0]
    else:
        bci_max_value = MAX_VALUE
    iter_count = 0
    for uid in uid_list:
        try:
            profile_item = profile_results[iter_count]
        except:
            profile_item = {}
        try:
            bci_history_item = bci_history_results[iter_count]
        except:
            bci_history_item = {}
        if profile_item and profile_item['found'] == True:
            uname = profile_item['_source']['nick_name']
            location = profile_item['_source']['user_location']
        else:
            uname = ''
            location = ''
        if bci_history_item and bci_history_item['found'] == True:
            fansnum = bci_history_item['_source']['user_fansnum']
            statusnum = bci_history_item['_source']['weibo_month_sum']
            try:
                bci = bci_history_item['_source'][bci_key]
                normal_bci = math.log(bci / bci_max_value * 9 + 1, 10) * 100
            except:
                normal_bci = ''
        else:
            fansnum = ''
            statusnum = ''
            normal_bci = ''
        final_results.append(
            [uid, uname, location, fansnum, statusnum, normal_bci])
        iter_count += 1

    return final_results
Пример #4
0
def submit_identify_in_uid(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    operation_type = input_data['operation_type']
    compute_status = input_data['compute_status']
    relation_string = input_data['relation_string']
    recommend_style = input_data['recommend_style']
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    compute_hash_name = 'compute'
    # submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(
        r.hkeys(hashname_sensitive))
    upload_data = input_data['upload_data']
    line_list = upload_data.split('\n')
    uid_list = []
    invalid_uid_list = []
    for line in line_list:
        uid = line.split('\r')[0]
        #if len(uid)==10:
        #    uid_list.append(uid)
        if uid != '':
            uid_list.append(uid)
    if len(invalid_uid_list) != 0:
        return False, 'invalid user info', invalid_uid_list
    #identify the uid is not exist in user_portrait and compute
    #step1: filter in user_portrait
    new_uid_list = []
    have_in_uid_list = []
    try:
        exist_portrait_result = es_user_profile.mget(
            index=profile_index_name,
            doc_type=profile_index_type,
            body={'ids': uid_list},
            _source=False)['docs']
    except:
        exist_portrait_result = []
    if exist_portrait_result:
        for exist_item in exist_portrait_result:
            if exist_item['found'] == False:
                new_uid_list.append(exist_item['_id'])
            else:
                have_in_uid_list.append(exist_item['_id'])
    else:
        new_uid_list = uid_list

    #step2: filter in compute
    new_uid_set = set(new_uid_list)
    compute_set = set(r.hkeys('compute'))
    in_uid_set = list(new_uid_set - compute_set)
    print 'new_uid_set:', new_uid_set
    print 'in_uid_set:', in_uid_set
    if len(in_uid_set) == 0:
        return False, 'all user in'
    #identify the final add user
    final_submit_user_list = []
    for in_item in in_uid_set:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashname_submit, in_item))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system': '0', 'operation': submit_user}
        if operation_type == 'submit':
            r.hset(
                compute_hash_name, in_item,
                json.dumps([
                    in_date, compute_status, relation_string, recommend_style,
                    submit_user, 0
                ]))
            r.hset(hashname_submit, in_item, json.dumps(tmp))
            # r.hset(submit_user_recomment, in_item, '0')
        final_submit_user_list.append(in_item)
    return True, invalid_uid_list, have_in_uid_list, final_submit_user_list
Пример #5
0
         doc_type=DOCTYPE_SENSITIVE_INDEX,
         body=query_sensitive_body,
         _source=False,
         fields=[sensitive_string])['hits']['hits']
     top_sensitive = top_sensitive_result[0]['fields'][sensitive_string][0]
 except Exception, reason:
     print Exception, reason
     top_sensitive = 400
 index_type = 'bci'
 user_bci_result = es_cluster.mget(
     index=index_name,
     doc_type=index_type,
     body={'ids': uid_list},
     _source=True)['docs']  #INFLUENCE,fans,status
 user_profile_result = es_user_profile.mget(index='weibo_user',
                                            doc_type='user',
                                            body={'ids': uid_list},
                                            _source=True)['docs']  #个人姓名,注册地
 # bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list}, fields=['user_fansnum', 'weibo_month_sum'])['docs']
 # sensitive_history_result = es_bci_history.mget(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body={'ids':uid_list}, fields=[sensitive_string], _source=False)['docs']
 max_evaluate_influ = get_evaluate_max(index_name)
 for i in range(0, len(uid_list)):
     uid = uid_list[i]
     bci_dict = user_bci_result[i]
     profile_dict = user_profile_result[i]
     # bci_history_dict = bci_history_result[i]
     # sensitive_history_dict = sensitive_history_result[i]
     #print sensitive_history_dict
     try:
         bci_source = bci_dict['_source']
     except:
         bci_source = None
Пример #6
0
def current_status(mid):
    es_results = es_prediction.get(index="social_sensing_text",
                                   doc_type="text",
                                   id=mid)["_source"]
    uid = es_results["uid"]
    ts = es_results["timestamp"]
    print "mid result: ", es_results
    query_body = {
        "query": {
            "bool": {
                "must": [{
                    "term": {
                        "root_mid": mid
                    }
                }, {
                    "term": {
                        "message_type": 3
                    }
                }]
            }
        },
        "aggs": {
            "hot_uid": {
                "terms": {
                    "field": "directed_uid",
                    "size": 11
                }
            }
        }
    }
    index_list = []
    for i in range(2):
        index_name = flow_text_index_name_pre + ts2datetime(ts)
        if es_flow_text.indices.exists(index=index_name):
            index_list.append(index_name)
        ts = ts + 3600 * 24

    results = es_flow_text.search(
        index=index_list, doc_type=flow_text_index_type,
        body=query_body)["aggregations"]["hot_uid"]["buckets"]
    retweet_dict = dict()
    for item in results:
        iter_uid = item["key"]
        if str(iter_uid) == str(uid):
            continue
        else:
            retweet_dict[str(iter_uid)] = item["doc_count"]

    print "retweet_dict: ", retweet_dict

    query_body = {
        "query": {
            "bool": {
                "must": [{
                    "term": {
                        "root_mid": mid
                    }
                }, {
                    "term": {
                        "message_type": 2
                    }
                }]
            }
        },
        "aggs": {
            "hot_uid": {
                "terms": {
                    "field": "directed_uid",
                    "size": 11
                }
            }
        }
    }

    index_name = flow_text_index_name_pre + ts2datetime(ts)
    results = es_flow_text.search(
        index=index_list, doc_type=flow_text_index_type,
        body=query_body)["aggregations"]["hot_uid"]["buckets"]
    comment_dict = dict()
    for item in results:
        iter_uid = str(item["key"])
        if iter_uid == str(uid):
            continue
        else:
            comment_dict[iter_uid] = item["doc_count"]

    print "comment_dict: ", comment_dict

    # user_profile
    uid_list = list(set(comment_dict.keys()) | set(retweet_dict.keys()))
    profile_results = es_user_profile.mget(index=profile_index_name,
                                           doc_type=profile_index_type,
                                           body={"ids": uid_list})["docs"]
    profile_dict = dict()
    for item in profile_results:
        if item["found"]:
            item = item["_source"]
            iter_uid = str(item["uid"])
            tmp = dict()
            tmp["nick_name"] = item["nick_name"]
            if not tmp["nick_name"]:
                tmp["nick_name"] = iter_uid
            tmp["photo_url"] = item["photo_url"]
            profile_dict[iter_uid] = tmp
        else:
            tmp = dict()
            tmp["nick_name"] = item["_id"]
            tmp["photo_url"] = ""
            profile_dict[iter_uid] = tmp

    hot_retweet_list = []
    retweet_uid_list = retweet_dict.keys()
    retweet_list = es_flow_text.search(index=index_list,
                                       doc_type="text",
                                       body={
                                           "query": {
                                               "bool": {
                                                   "must": [{
                                                       "terms": {
                                                           "uid":
                                                           retweet_uid_list
                                                       }
                                                   }, {
                                                       "term": {
                                                           "root_mid": mid
                                                       }
                                                   }]
                                               }
                                           },
                                           "size": 100
                                       })["hits"]["hits"]
    in_set = set()
    for item in retweet_list:
        item = item["_source"]
        iter_uid = str(item["uid"])
        if iter_uid in in_set:
            continue
        else:
            in_set.add(iter_uid)
        item["retweeted"] = retweet_dict[iter_uid]
        item["comment"] = query_retweeted(iter_uid, mid, ts, 2)  # 获取转发微博的评论量
        item.update(profile_dict[iter_uid])
        hot_retweet_list.append(item)

    hot_retweet_list = sorted(hot_retweet_list,
                              key=lambda x: x["retweeted"],
                              reverse=True)

    hot_comment_list = []
    comment_uid_list = comment_dict.keys()
    comment_list = es_flow_text.search(index=index_list,
                                       doc_type="text",
                                       body={
                                           "query": {
                                               "bool": {
                                                   "must": [{
                                                       "terms": {
                                                           "uid":
                                                           comment_uid_list
                                                       }
                                                   }, {
                                                       "term": {
                                                           "root_mid": mid
                                                       }
                                                   }]
                                               }
                                           },
                                           "size": 100
                                       })["hits"]["hits"]
    in_set = set()
    for item in comment_list:
        item = item["_source"]
        iter_uid = str(item["uid"])
        if iter_uid in in_set:
            continue
        else:
            in_set.add(iter_uid)
        item["comment"] = comment_dict[iter_uid]
        item["retweeted"] = query_retweeted(iter_uid, mid, ts, 3)  # 获取转发微博的评论量
        item.update(profile_dict[iter_uid])
        hot_comment_list.append(item)

    hot_comment_list = sorted(hot_comment_list,
                              key=lambda x: x["comment"],
                              reverse=True)

    results = dict()
    results["hot_retweeted"] = hot_retweet_list
    results["hot_comment"] = hot_comment_list

    return results