Пример #1
0
def query_vary_top_k(index_name, doctype, top_k, sort_index="vary"):
    query_body = {
        "query": {
            "match_all": {}
        },
        "size": top_k,
        "sort": [{sort_index: {"order": "desc"}}]
    }

    result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits']
    uid_list = []
    for item in result:
        uid_list.append(item['_id'])

    portrait_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids":uid_list}, _source=True)['docs']
    profile_result = es_profile.mget(index="weibo_user",doc_type="user", body={"ids":uid_list}, _source=True)['docs']

    return_list = []
    rank = 1
    for i in range(len(result)):
        info = ['','','','','']
        info[0] = rank
        if profile_result[i]['found']:
            info[1] = profile_result[i]['_source'].get('photo_url','')
            info[3] = profile_result[i]['_source'].get('nick_name','')
        info[2] = result[i].get('_id','')
        info[4] = result[i]['_source']['vary']
        if portrait_result[i]['found']:
            info.append('1')
        else:
            info.append('0')
        return_list.append(info)
        rank += 1

    return return_list
Пример #2
0
def get_vary_detail_info(vary_detail_dict, uid_list):
    results = {}
    #get uname
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                            body={'ids':uid_list})['docs']
    except:
        user_portrait_result = []
    uname_dict = {}
    for portrait_item in user_portrait_result:
        uid = portrait_item['_id']
        if portrait_item['found']==True:
            uname = portrait_item['_source']['uname']
            uname_dict[uid] = uname
        else:
            uname_dict[uid] = uid

    #get new vary detail information
    for vary_pattern in vary_detail_dict:
        user_info_list = vary_detail_dict[vary_pattern]
        new_pattern_list = []
        for user_item in user_info_list:
            uid = user_item[0]
            uname= uname_dict[uid]
            start_date = ts2datetime(int(user_item[1]))
            end_date = ts2datetime(int(user_item[2]))
            new_pattern_list.append([uid, uname, start_date, end_date])
        results[vary_pattern] = new_pattern_list

    return results
Пример #3
0
def get_group_list(task_name, submit_user):
    
    results = []
    task_id = submit_user + '-' + task_name
    if RUN_TYPE == 0:
        group_index_name = 'test_group_result'
    try:

        es_results = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source']
        #jln  现在的9200里没有
        #es_results = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source']
    except:
        return results
    uid_list = es_results['uid_list']
    user_portrait_attribute = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list})['docs']
    evaluate_max = get_evaluate_max()
    for item in user_portrait_attribute:
        uid = item['_id']
        try:
            source = item['_source']
            uname = source['uname']
            gender = source['gender']
            location = source['location']
            importance = source['importance']
            normal_importance = math.log(importance / evaluate_max['importance'] * 9 + 1, 10) * 100
            influence = source['influence']
            normal_influence = math.log(influence / evaluate_max['influence'] * 9 + 1, 10) * 100
            activeness = source['activeness']
            normal_activeness = math.log(activeness / evaluate_max['activeness']* 9 + 1, 10) * 100
            sensitive = source['sensitive']
            normal_sensitive = math.log(sensitive/ evaluate_max['sensitive'] * 9 + 1, 10) * 100
            results.append([uid, uname, gender, location, normal_importance, normal_influence, normal_activeness, normal_sensitive])
        except:
            results.append([uid, '', '', '', '', '', '', ''])
    return results
Пример #4
0
def get_group_member_name(task_name, submit_user):
    results = []
    task_id = submit_user + '-' + task_name
    #print es_group_result,group_index_name,group_index_type
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        return results
    uid_list = group_result['uid_list']
    print len(uid_list)
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\
                body={'ids':uid_list})['docs']
    except:
        return results
    print len(user_portrait_result)
    for item in user_portrait_result:
        uid = item['_id']
        if item['found'] == True:
            source = item['_source']
            uname = source['uname']
        else:
            uname = 'unknown'
        #results[uid] = uname
        dic = {}
        dic['ID'] = uid
        dic['name'] = uname
        results.append(dic)

    return results
Пример #5
0
def ajax_get_group_detail():
    task_name = request.args.get('task_name','') # task_name
    user = request.args.get('user', '')
    _id = user + '-' + task_name
    portrait_detail = []
    top_activeness = get_top_influence("activeness")
    top_influence = get_top_influence("influence")
    top_importance = get_top_influence("importance")
    search_result = es.get(index=index_group_manage, doc_type=doc_type_group, id=_id).get('_source', {})
    if search_result:
        try:
            uid_list = json.loads(search_result['uid_list'])
        except:
            uid_list = search_result['uid_list']
        if uid_list:
            search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list}, fields=SOCIAL_SENSOR_INFO)['docs']
            for item in search_results:
                temp = []
                if item['found']:
                    for iter_item in SOCIAL_SENSOR_INFO:
                        if iter_item == "topic_string":
                            temp.append(item["fields"][iter_item][0].split('&'))
                            temp.append(item["fields"][iter_item][0].split('&'))
                        elif iter_item == "activeness":
                            temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100)
                        elif iter_item == "importance":
                            temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100)
                        elif iter_item == "influence":
                            temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100)
                        else:
                            temp.append(item["fields"][iter_item][0])
                    portrait_detail.append(temp)

    return json.dumps(portrait_detail)
Пример #6
0
def get_group_member_name(task_name, submit_user):
    results = []
    task_id = submit_user + '-' + task_name
    #print es_group_result,group_index_name,group_index_type
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        return results
    uid_list = group_result['uid_list']
    print len(uid_list)
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\
                body={'ids':uid_list})['docs']
    except:
        return results
    print len(user_portrait_result)
    for item in user_portrait_result:
        uid = item['_id']
        if item['found'] == True:
            source = item['_source']
            uname = source['uname']
        else:
            uname = 'unknown'
        #results[uid] = uname
        dic = {}
        dic['ID'] = uid
        dic['name'] = uname
        results.append(dic)


    return results
Пример #7
0
def get_vary_detail_info(vary_detail_dict, uid_list):
    results = {}
    #get uname
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                            body={'ids':uid_list})['docs']
    except:
        user_portrait_result = []
    uname_dict = {}
    for portrait_item in user_portrait_result:
        uid = portrait_item['_id']
        if portrait_item['found'] == True:
            uname = portrait_item['_source']['uname']
            uname_dict[uid] = uname
        else:
            uname_dict[uid] = uid

    #get new vary detail information
    for vary_pattern in vary_detail_dict:
        user_info_list = vary_detail_dict[vary_pattern]
        new_pattern_list = []
        for user_item in user_info_list:
            uid = user_item[0]
            uname = uname_dict[uid]
            start_date = ts2datetime(int(user_item[1]))
            end_date = ts2datetime(int(user_item[2]))
            new_pattern_list.append([uid, uname, start_date, end_date])
        results[vary_pattern] = new_pattern_list

    return results
Пример #8
0
def search_portrait_user(es,
                         number,
                         active_index,
                         active_type,
                         portrait_index,
                         portrait_type,
                         field="user_index"):

    return_list = []
    index_exist = es.indices.exists(index=active_index)
    if not index_exist:
        return "no active_index exist"
        sys.exit(0)

    count_s = 0
    count_c = 0
    start = 0
    rank = 1
    while 1:
        search_list = []
        user_list = search_k(es, active_index, active_type, start, field, 100)
        start += 100
        for item in user_list:
            if field == "vary":
                uid = item.get('uid',
                               '0')  # obtain uid, notice "uid" or "user"
            else:
                uid = item.get('user', '0')
            search_list.append(uid)  # uid list

        search_result = es_portrait.mget(index=portrait_index,
                                         doc_type=portrait_type,
                                         body={"ids": search_list},
                                         _source=True)["docs"]
        profile_result = es_profile.mget(index="weibo_user",
                                         doc_type="user",
                                         body={"ids": search_list},
                                         _source=True)["docs"]

        for item in search_result:
            if item["found"]:
                info = ['', '', '', '', '', '']
                info[0] = rank
                index = search_result.index(item)

                if profile_result[index]['found']:
                    info[1] = profile_result[index]['_source'].get(
                        'photo_url', '')
                    info[3] = profile_result[index]['_source'].get(
                        'nick_name', '')
                info[2] = search_result[index].get('_id', '')
                info[4] = user_list[index][field]
                info[5] = "1"
                return_list.append(info)
                rank += 1
                count_c += 1

                if count_c >= int(number):
                    return return_list
Пример #9
0
def search_portrait_user_in_activity(es, number, active_index, active_type, portrait_index, portrait_type, field="user_index"):

    return_list = []
    index_exist = es.indices.exists(index=active_index)
    if not index_exist:
        return "no active_index exist"
        sys.exit(0)

    count_s = 0
    count_c = 0
    start = 0
    rank = 1
    while 1:
        search_list = []
        user_list = search_k(es, active_index, active_type, start, field, 100)
        start += 100
        for item in user_list:
            if field == "vary":
                uid = item.get('uid', '0') # obtain uid, notice "uid" or "user"
            else:
                uid = item.get('user', '0')
            search_list.append(uid) # uid list

        search_result = es_portrait.mget(index=portrait_index, doc_type=portrait_type, body={"ids": search_list}, _source=True)["docs"]
        profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"]

        key_list = ["origin_weibo_retweeted_total_number", "origin_weibo_retweeted_average_number", "origin_weibo_retweeted_top_number", "origin_weibo_retweeted_brust_average", \
                   "origin_weibo_comment_total_number", "origin_weibo_comment_average_number", "origin_weibo_comment_top_number", "origin_weibo_retweeted_brust_average", \
                   "retweeted_weibo_retweeted_total_number", "retweeted_weibo_retweeted_average_number", "retweeted_weibo_retweeted_top_number", "retweeted_weibo_retweeted_brust_average", \
                   "retweeted_weibo_comment_total_number", "retweeted_weibo_comment_average_number", "retweeted_weibo_comment_top_number", "retweeted_weibo_retweeted_brust_average"]
        for item in search_result:
            if item["found"]:
                info = ['','','','','','']
                info[0] = rank
                index = search_result.index(item)

                if profile_result[index]['found']:
                    info[1] = profile_result[index]['_source'].get('photo_url','')
                    info[3] = profile_result[index]['_source'].get('nick_name','')
                info[2] = search_result[index].get('_id','')
                info[4] = user_list[index]['user_index']
                info[5] = "1"
                if field == 'origin_weibo_retweeted_brust_average':
                    info.append(user_list[index]['origin_weibo_retweeted_brust_average'])
                    for key in key_list:
                        info.append(user_list[index][key])
                elif field == 'origin_weibo_comment_brust_average':
                    info.append(user_list[index]['origin_weibo_comment_brust_average'])
                    for key in key_list:
                        info.append(user_list[index][key])
                else:
                    pass
                return_list.append(info)
                rank += 1
                count_c += 1

                if count_c >= int(number):
                    return return_list
Пример #10
0
def search_top_index(index_name, top_k=1, index_type="bci", top=False, sort_order="user_index"):
    query_body = {
        "query": {
            "match_all": {}
        },
        "size": top_k,
        "sort": [{sort_order: {"order": "desc"}}]
    }

    if top:
        result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'][0]['_source'][sort_order]
    else:
        search_result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits']

        uid_list = []
        for item in search_result:
            uid_list.append(item['_id'])
        profile_result = es_profile.mget(index=profile_index_name,doc_type=profile_index_type, body={"ids":uid_list}, _source=True)['docs']
        portrait_result = es_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list}, _source=True)['docs']

        result = []
        rank = 1
        for i in range(len(search_result)):
            info = ['','','','']
            info[0] = rank
            if profile_result[i]['found']:
                info[1] = profile_result[i]['_source'].get('photo_url','')
                info[3] = profile_result[i]['_source'].get('nick_name','')

            info[2] = search_result[i].get('_id','')
            if sort_order in ["user_index","origin_weibo_retweeted_brust_average","origin_weibo_comment_brust_average"]:
                info.append(search_result[i]['_source'][sort_order])
                if portrait_result[i]['found']:
                    info.append("1")
                else:
                    info.append("0")
            elif sort_order == "origin_weibo_retweeted_top_number":
               info.append(search_result[i]['_source']['origin_weibo_retweeted_top_number']) 
               mid = search_result[i]['_source']['origin_weibo_top_retweeted_id']
               info.append(weiboinfo2url(info[2],mid))
               if portrait_result[i]['found']:
                   info.append("1")
               else:
                   info.append("0")
            elif sort_order == "origin_weibo_comment_top_number":
                info.append(search_result[i]['_source']['origin_weibo_comment_top_number'])
                mid = search_result[i]['_source']['origin_weibo_top_comment_id']
                info.append(weiboinfo2url(info[2],mid))
                if portrait_result[i]['found']:
                    info.append("1")
                else:
                    info.append("0")

            rank += 1
            result.append(info)

    return result
Пример #11
0
def get_social_inter_content(uid1, uid2, type_mark):
    weibo_list = []
    #get two type relation about uid1 and uid2
    #search weibo list
    now_ts = int(time.time())
    #run_type
    if RUN_TYPE == 1:
        now_date_ts = datetime2ts(ts2datetime(now_ts))
    else:
        now_date_ts = datetime2ts(RUN_TEST_TIME)
    #uid2uname
    uid2uname = {}
    try:
        portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\
                                body={'ids': [uid1, uid2]}, _source=False, fields=['uid', 'uname'])['docs']
    except:
        portrait_result = []
    
    for item in portrait_result:
        uid = item['_id']
        if item['found'] == True:
            uname = item['fields']['uname'][0]
            uid2uname[uid] = uname
        else:
            uid2uname[uid] = 'unknown'
    #iter date to search weibo list
    for i in range(7, 0, -1):
        iter_date_ts = now_date_ts - i*DAY
        iter_date = ts2datetime(iter_date_ts)
        flow_text_index_name = flow_text_index_name_pre + str(iter_date)
        query = []
        query.append({'bool':{'must':[{'term':{'uid':uid1}}, {'term':{'directed_uid': int(uid2)}}]}})
        if type_mark=='out':
            query.append({'bool':{'must':[{'term':{'uid':uid2}}, {'term':{'directed_uid': int(uid1)}}]}})
        try:
            flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body={'query': {'bool':{'should': query}}, 'sort':[{'timestamp':{'order': 'asc'}}], 'size':MAX_VALUE})['hits']['hits']
        except:
            flow_text_result = []
        for flow_text in flow_text_result:
            source = flow_text['_source']
            weibo = {}
            weibo['timestamp'] = source['timestamp']
            weibo['ip'] = source['ip']
            weibo['geo'] = source['geo']
            weibo['text'] = '\t'.join(source['text'].split('&'))
            weibo['uid'] =  source['uid']
            weibo['uname'] = uid2uname[weibo['uid']]
            weibo['directed_uid'] = str(source['directed_uid'])
            weibo['directed_uname'] = uid2uname[str(source['directed_uid'])]
            weibo_list.append(weibo)

    return weibo_list
Пример #12
0
def search_max_single_field(field, index_name, doctype, top_k=3):

    # field = "origin_weibo_retweeted_top_number", "origin_weibo_comment_top_number"
    query_body = {
        "query": {
            "match_all": {}
        },
        "sort": [{field: {"order": "desc"}}],
        "size": top_k
    }

    
    return_list = []
    rank = 1
    count_c = 0
    start = 0

    while 1:
        search_list = []
        user_list = search_k(es, index_name, doctype, start, field, 100)
        start += 100
        for item in user_list:
            uid = item.get('user','0')
            search_list.append(uid) # uid list

        search_result = es_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": search_list}, _source=True)["docs"]
        profile_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids": search_list}, _source=True)["docs"]

        for i in range(len(search_result)):
            if search_result[i]['found']:
                info = ['','','','','','','1']
                info[0] = rank
                info[2] = search_result[i].get('_id','')

                if profile_result[i]['found']:
                    info[1] = profile_result[i]['_source'].get('photo_url','')
                    info[3] = profile_result[i]['_source'].get('nick_name','')

                if 'retweeted' in field:
                    temp_mid = user_list[i]['origin_weibo_top_retweeted_id']
                    info[5] = weiboinfo2url(info[2], temp_mid)
                    info[4] = user_list[i]['origin_weibo_retweeted_top_number']
                else:
                    temp_mid = user_list[i]['origin_weibo_top_comment_id']
                    info[5] = weiboinfo2url(info[2], temp_mid)
                    info[4] = user_list[i]['origin_weibo_comment_top_number']

                rank += 1
                return_list.append(info)

                if rank >= int(top_k)+1:
                    return return_list
Пример #13
0
def get_activity_weibo(task_name, start_ts, submit_user):
    results = []
    task_id = submit_user + '-' + task_name
    #step1: get task_name uid
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type ,\
                id=task_id, _source=False, fields=['uid_list'])
    except:
        group_result = {}
    if group_result == {}:
        return 'task name invalid'
    try:
        uid_list = group_result['fields']['uid_list']
    except:
        uid_list = []
    if uid_list == []:
        return 'task uid list null'
    #step2: get uid2uname
    uid2uname = {}
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \
                body = {'ids':uid_list}, _source=False, fields=['uname'])['docs']
    except:
        user_portrait_result = []
    for item in user_portrait_result:
        uid = item['_id']
        if item['found'] == True:
            uname = item['fields']['uname'][0]
        uid2uname[uid] = uname
    #step3: search time_segment weibo
    time_segment = FOUR_HOUR
    end_ts = start_ts + time_segment
    time_date = ts2datetime(start_ts)
    flow_text_index_name = flow_text_index_name_pre + time_date
    query = []
    query.append({'terms': {'uid': uid_list}})
    query.append({'range': {'timestamp': {'gte': start_ts, 'lt': end_ts}}})
    try:
        flow_text_es_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, \
                body={'query':{'bool':{'must':query}}, 'sort':'timestamp', 'size':MAX_VALUE})['hits']['hits']
    except:
        flow_text_es_result = []
    for item in flow_text_es_result:
        weibo = {}
        source = item['_source']
        weibo['timestamp'] = ts2date(source['timestamp'])
        weibo['ip'] = source['ip']
        weibo['text'] = source['text']
        weibo['geo'] = '\t'.join(source['geo'])
        results.append(weibo)

    return results
Пример #14
0
def search_tag(es, number, active_index, active_type, portrait_index,
               portrait_type, tag):

    #field_dict = {"domain":"art"}
    return_list = []
    count_s = 0
    count_c = 0
    start = 0
    rank = 1

    while 1:
        search_list = []
        user_list = search_k(es, active_index, active_type, start,
                             "user_index", 10000)
        start += 10000
        for item in user_list:
            uid = item.get('user', '0')
            search_list.append(uid)  # uid list

        search_result = es_portrait.mget(index=portrait_index,
                                         doc_type=portrait_type,
                                         body={"ids": search_list},
                                         _source=True)["docs"]
        profile_result = es_profile.mget(index="weibo_user",
                                         doc_type="user",
                                         body={"ids": search_list},
                                         _source=True)["docs"]
        for item in search_result:
            count_s += 1
            if item['found'] and tag in item['_source']['domain']:
                info = ['', '', '', '', '', '', '']
                info[0] = rank
                index = search_result.index(item)

                if profile_result[index]['found']:
                    info[1] = profile_result[index]['_source'].get(
                        'photo_url', '')
                    info[3] = profile_result[index]['_source'].get(
                        'nick_name', '')
                info[2] = search_result[index].get('_id', '')
                info[4] = user_list[index]['user_index']
                info[5] = search_result[index]['_source'].get('activeness', '')
                info[6] = search_result[index]['_source'].get('importance', '')

                rank += 1
                return_list.append(info)

                if rank >= int(number) + 1:
                    return return_list

        if count_s > 100000:
            return return_list
Пример #15
0
def get_activity_weibo(task_name, start_ts, submit_user):
    results = []
    task_id = submit_user + '-' + task_name
    #step1: get task_name uid
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type ,\
                id=task_id, _source=False, fields=['uid_list'])
    except:
        group_result = {}
    if group_result == {}:
        return 'task name invalid'
    try:
        uid_list = group_result['fields']['uid_list']
    except:
        uid_list = []
    if uid_list == []:
        return 'task uid list null'
    #step2: get uid2uname
    uid2uname = {}
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \
                body = {'ids':uid_list}, _source=False, fields=['uname'])['docs']
    except:
        user_portrait_result = []
    for item in user_portrait_result:
        uid = item['_id']
        if item['found']==True:
            uname = item['fields']['uname'][0]
        uid2uname[uid] = uname
    #step3: search time_segment weibo
    time_segment = FOUR_HOUR
    end_ts = start_ts + time_segment
    time_date = ts2datetime(start_ts)
    flow_text_index_name = flow_text_index_name_pre + time_date
    query = []
    query.append({'terms':{'uid': uid_list}})
    query.append({'range':{'timestamp':{'gte':start_ts, 'lt':end_ts}}})
    try:
        flow_text_es_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, \
                body={'query':{'bool':{'must':query}}, 'sort':'timestamp', 'size':MAX_VALUE})['hits']['hits']
    except:
        flow_text_es_result = []
    for item in flow_text_es_result:
        weibo = {}
        source = item['_source']
        weibo['timestamp'] = ts2date(source['timestamp'])
        weibo['ip'] = source['ip']
        weibo['text'] = source['text']
        weibo['geo'] = '\t'.join(source['geo'])
        results.append(weibo)

    return results
Пример #16
0
def delete_group_results(task_name, submit_user):
    task_id = submit_user + '-' + task_name
    #step1: get group uid list
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        return False
    uid_list = group_result['uid_list']
    #step2: update group_tag in user_portrait
    query_body = {'query': {'term': {'group': task_id}}}
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                body={'ids': uid_list})['docs']
    except:
        user_portrait_result = []
    bulk_action = []
    for item in user_portrait_result:
        uid = item['_id']
        if item['found'] == True:
            try:
                source = item['_source']
            except:
                source = {}
            try:
                group_tag = source['group']
            except:
                group_tag = ''
            if group_tag != '':
                new_group_tag_list = []
                group_tag_list = group_tag.split('&')
                for group_tag_item in group_tag_list:
                    if group_tag_item != task_id and group_tag_item != '[email protected]':
                        new_group_tag_list.append(group_tag_item)
                new_group_tag = '&'.join(new_group_tag_list)
            else:
                new_group_tag = ''
            action = {'update': {'_id': uid}}
            bulk_action.extend([action, {'doc': {'group': new_group_tag}}])
    if bulk_action:
        # print 'bulk_action:', bulk_action
        es_user_portrait.bulk(bulk_action,
                              index=portrait_index_name,
                              doc_type=portrait_index_type)
    #step3: delete group results in group_manage
    try:
        print 'yes delete'
        result = es.delete(index=index_name, doc_type=index_type, id=task_id)
    except:
        return False
    return True
Пример #17
0
def ajax_get_group_detail():
    task_name = request.args.get('task_name', '')  # task_name
    user = request.args.get('user', '')
    _id = user + '-' + task_name
    portrait_detail = []
    top_activeness = get_top_influence("activeness")
    top_influence = get_top_influence("influence")
    top_importance = get_top_influence("importance")
    search_result = es.get(index=index_group_manage,
                           doc_type=doc_type_group,
                           id=_id).get('_source', {})
    if search_result:
        try:
            uid_list = json.loads(search_result['uid_list'])
        except:
            uid_list = search_result['uid_list']
        if uid_list:
            search_results = es.mget(index=portrait_index_name,
                                     doc_type=portrait_index_type,
                                     body={"ids": uid_list},
                                     fields=SOCIAL_SENSOR_INFO)['docs']
            for item in search_results:
                temp = []
                if item['found']:
                    for iter_item in SOCIAL_SENSOR_INFO:
                        if iter_item == "topic_string":
                            temp.append(
                                item["fields"][iter_item][0].split('&'))
                            temp.append(
                                item["fields"][iter_item][0].split('&'))
                        elif iter_item == "activeness":
                            temp.append(
                                math.log(
                                    item['fields']['activeness'][0] /
                                    float(top_activeness) * 9 + 1, 10) * 100)
                        elif iter_item == "importance":
                            temp.append(
                                math.log(
                                    item['fields']['importance'][0] /
                                    float(top_importance) * 9 + 1, 10) * 100)
                        elif iter_item == "influence":
                            temp.append(
                                math.log(
                                    item['fields']['influence'][0] /
                                    float(top_influence) * 9 + 1, 10) * 100)
                        else:
                            temp.append(item["fields"][iter_item][0])
                    portrait_detail.append(temp)

    return json.dumps(portrait_detail)
Пример #18
0
def delete_group_results(task_name, submit_user):
    task_id = submit_user + '-' + task_name
    #step1: get group uid list
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        return False
    uid_list = group_result['uid_list']
    #step2: update group_tag in user_portrait
    query_body = {'query':{'term':{'group': task_id}}}
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                body={'ids': uid_list})['docs']
    except:
        user_portrait_result = []
    bulk_action = []
    for item in user_portrait_result:
        uid = item['_id']
        if item['found'] == True:
            try:
                source = item['_source']
            except:
                source = {}
            try:
                group_tag = source['group']
            except:
                group_tag = ''
            if group_tag != '':
                new_group_tag_list = []
                group_tag_list = group_tag.split('&')
                for group_tag_item in group_tag_list:
                    if group_tag_item != task_id and group_tag_item != '[email protected]':
                        new_group_tag_list.append(group_tag_item)
                new_group_tag = '&'.join(new_group_tag_list)
            else:
                new_group_tag = ''
            action = {'update':{'_id': uid}}
            bulk_action.extend([action, {'doc': {'group': new_group_tag}}])
    if bulk_action:
        print 'bulk_action:', bulk_action
        es_user_portrait.bulk(bulk_action, index=portrait_index_name, doc_type=portrait_index_type)
    #step3: delete group results in group_manage
    try:
        print 'yes delete'
        result = es.delete(index=index_name, doc_type=index_type, id=task_id)
    except:
        return False
    return True
Пример #19
0
def show_vary_detail(task_name, submit_user, vary_pattern):
    results = []
    task_id = submit_user + '-' + task_name
    #identify the task_id exist
    try:
        source = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        return 'group task is not exist'
    #identify the task status=1
    status = source['status']
    if status != 1:
        return 'group task is not completed'
    #get vary detail geo
    try:
        vary_detail_geo = json.loads(source['vary_detail_geo'])
    except:
        vary_detail_geo = {}
    if vary_detail_geo == {}:
        return 'vary detail geo none'
    #get vary_detail
    vary_pattern_list = vary_pattern.split('-')
    vary_pattern_key = '&'.join(vary_pattern_list)
    uid_ts_list = vary_detail_geo[vary_pattern_dict]
    uid_list = [item[0] for item in uid_ts_list]
    #get user name
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                body={'ids':uid_list})['docs']
    except:
        user_portrait_result = []
    uname_dict = {}
    for portrait_item in user_portrait_result:
        uid = portrait_item['_id']
        if portrait_item['found'] == True:
            uname = portrait_item['_source']['uname']
            uname_dict[uid] = uname
        else:
            uname_dict[uid] = uid
    #get vary detail
    new_detail = []
    for vary_item in uid_ts_list:
        uname = uname_dict[vary_item[0]]
        start_date = ts2datetime(vary_item[1])
        end_date = ts2datetime(vary_item[2])
        new_detail.append([vary_item[0], uname, start_date, end_date])

    return new_detail
Пример #20
0
def ajax_get_task_detail_info():
    task_name = request.args.get('task_name', '')  # task_name
    user = request.args.get('user', 'admin')
    _id = user + "-" + task_name
    task_detail = es.get(index=index_manage_sensing_task,
                         doc_type=task_doc_type,
                         id=_id)['_source']
    task_detail["social_sensors"] = json.loads(task_detail["social_sensors"])
    #task_detail['keywords'] = json.loads(task_detail['keywords'])
    #task_detail["sensitive_words"]= json.loads(task_detail["sensitive_words"])
    history_status = json.loads(task_detail['history_status'])
    if history_status:
        temp_list = []
        """
        temp_list.append(history_status[-1])
        print history_status
        for item in history_status[:-1]:
            temp_list.append(item)
        """
        sorted_list = sorted(history_status, key=lambda x: x, reverse=True)
        task_detail['history_status'] = sorted_list
    else:
        task_detail['history_status'] = []
    task_detail['social_sensors_portrait'] = []
    portrait_detail = []

    if task_detail["social_sensors"]:
        search_results = es.mget(index=portrait_index_name,
                                 doc_type=portrait_index_type,
                                 body={"ids":
                                       task_detail["social_sensors"]})['docs']
        if search_results:
            for item in search_results:
                temp = []
                if item['found']:
                    for iter_item in SOCIAL_SENSOR_INFO:
                        if iter_item == "topic_string":
                            temp.append(item["_source"][iter_item].split('&'))
                        else:
                            temp.append(item["_source"][iter_item])
                    portrait_detail.append(temp)
        if portrait_detail:
            portrait_detail = sorted(portrait_detail,
                                     key=lambda x: x[5],
                                     reverse=True)
    task_detail['social_sensors_portrait'] = portrait_detail

    return json.dumps(task_detail)
Пример #21
0
def show_vary_detail(task_name, submit_user, vary_pattern):
    results = []
    task_id = submit_user + '-' + task_name
    #identify the task_id exist
    try:
        source = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        return 'group task is not exist'
    #identify the task status=1
    status = source['status']
    if status != 1:
        return 'group task is not completed'
    #get vary detail geo
    try:
        vary_detail_geo = json.loads(source['vary_detail_geo'])
    except:
        vary_detail_geo = {}
    if vary_detail_geo == {}:
        return 'vary detail geo none'
    #get vary_detail
    vary_pattern_list = vary_pattern.split('-')
    vary_pattern_key = '&'.join(vary_pattern_list)
    uid_ts_list = vary_detail_geo[vary_pattern_dict]
    uid_list = [item[0] for item in uid_ts_list]
    #get user name
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                body={'ids':uid_list})['docs']
    except:
        user_portrait_result = []
    uname_dict = {}
    for portrait_item in user_portrait_result:
        uid = portrait_item['_id']
        if portrait_item['found']==True:
            uname = portrait_item['_source']['uname']
            uname_dict[uid] = uname
        else:
            uname_dict[uid] = uid
    #get vary detail
    new_detail = []
    for vary_item in uid_ts_list:
        uname = uname_dict[vary_item[0]]
        start_date = ts2datetime(vary_item[1])
        end_date = ts2datetime(vary_item[2])
        new_detail.append([vary_item[0], uname, start_date, end_date])
    
    return new_detail
Пример #22
0
def get_group_list(task_name, submit_user):

    results = []
    task_id = submit_user + '-' + task_name
    if RUN_TYPE == 0:
        group_index_name = 'test_group_result'
    try:

        es_results = es_group_result.get(index=group_index_name,
                                         doc_type=group_index_type,
                                         id=task_id)['_source']
        #jln  现在的9200里没有
        #es_results = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source']
    except:
        return results
    uid_list = es_results['uid_list']
    user_portrait_attribute = es_user_portrait.mget(
        index=portrait_index_name,
        doc_type=portrait_index_type,
        body={'ids': uid_list})['docs']
    evaluate_max = get_evaluate_max()
    for item in user_portrait_attribute:
        uid = item['_id']
        try:
            source = item['_source']
            uname = source['uname']
            gender = source['gender']
            location = source['location']
            importance = source['importance']
            normal_importance = math.log(
                importance / evaluate_max['importance'] * 9 + 1, 10) * 100
            influence = source['influence']
            normal_influence = math.log(
                influence / evaluate_max['influence'] * 9 + 1, 10) * 100
            activeness = source['activeness']
            normal_activeness = math.log(
                activeness / evaluate_max['activeness'] * 9 + 1, 10) * 100
            sensitive = source['sensitive']
            normal_sensitive = math.log(
                sensitive / evaluate_max['sensitive'] * 9 + 1, 10) * 100
            results.append([
                uid, uname, gender, location, normal_importance,
                normal_influence, normal_activeness, normal_sensitive
            ])
        except:
            results.append([uid, '', '', '', '', '', '', ''])
    return results
Пример #23
0
def query_vary_top_k(index_name, doctype, top_k, sort_index="vary"):
    query_body = {
        "query": {
            "match_all": {}
        },
        "size": top_k,
        "sort": [{
            sort_index: {
                "order": "desc"
            }
        }]
    }

    result = es.search(index=index_name, doc_type=doctype,
                       body=query_body)['hits']['hits']
    uid_list = []
    for item in result:
        uid_list.append(item['_id'])

    portrait_result = es_portrait.mget(index="user_portrait",
                                       doc_type="user",
                                       body={"ids": uid_list},
                                       _source=True)['docs']
    profile_result = es_profile.mget(index="weibo_user",
                                     doc_type="user",
                                     body={"ids": uid_list},
                                     _source=True)['docs']

    return_list = []
    rank = 1
    for i in range(len(result)):
        info = ['', '', '', '', '']
        info[0] = rank
        if profile_result[i]['found']:
            info[1] = profile_result[i]['_source'].get('photo_url', '')
            info[3] = profile_result[i]['_source'].get('nick_name', '')
        info[2] = result[i].get('_id', '')
        info[4] = result[i]['_source']['vary']
        if portrait_result[i]['found']:
            info.append('1')
        else:
            info.append('0')
        return_list.append(info)
        rank += 1

    return return_list
Пример #24
0
def portrait_user_vary(es, number, active_index, active_type, portrait_index, portrait_type, field="vary"):

    return_list = []
    index_exist = es.indices.exists(index=active_index)
    if not index_exist:
        return "no active_index exist"
        sys.exit(0)

    count_s = 0
    count_c = 0
    start = 0
    rank = 1
    try:
        while 1:
            search_list = []
            user_list = search_k(es, active_index, active_type, start, field, 100)
            start += 100
            for item in user_list:
                uid = item.get('uid', '0') # obtain uid, notice "uid" or "user"
                search_list.append(uid) # uid list
            search_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids": search_list}, _source=True)["docs"]
            profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"]

            for item in search_result:
                count_c += 1
                if item["found"]:
                    info = ['','','','','','1']
                    info[0] = rank
                    index = search_result.index(item)

                    if profile_result[index]['found']:
                        info[1] = profile_result[index]['_source'].get('photo_url','')
                        info[3] = profile_result[index]['_source'].get('nick_name','')
                    info[2] = search_result[index].get('_id','')
                    info[4] = user_list[index]['vary']
                    return_list.append(info)
                    rank += 1
                    if rank == int(number)+1:
                        return return_list

            if count_c > 10000:
                break
    except RequestError:
        print "timeout"

    return return_list
Пример #25
0
def filter_in_uid(input_dict):
    input_uid = input_dict.keys()
    all_count = len(input_uid)
    iter_count = 0
    in_portrait_result = []

    while iter_count < all_count:
        iter_user_list = input_uid[iter_count: iter_count+FILTER_ITER_COUNT]
        try:
            portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                    body={'ids': iter_user_list}, _source=False, fields=['photo_url', 'uname'])['docs']
        except:
            portrait_result = []
        if portrait_result:
            iter_in_portrait = [[item['_id'], item['fields']['uname'][0], item['fields']['photo_url'][0],input_dict[item['_id']]] for item in portrait_result if item['found']==True]
        in_portrait_result.extend(iter_in_portrait)
        iter_count += FILTER_ITER_COUNT
    
    return in_portrait_result
Пример #26
0
def search_tag(es, number, active_index, active_type, portrait_index, portrait_type, tag):

    #field_dict = {"domain":"art"}
    return_list = []
    count_s = 0
    count_c = 0
    start = 0
    rank = 1

    while 1:
        search_list = []
        user_list = search_k(es, active_index, active_type, start, "user_index", 10000)
        start += 10000
        for item in user_list:
            uid = item.get('user', '0')
            search_list.append(uid) # uid list

        search_result = es_portrait.mget(index=portrait_index, doc_type=portrait_type, body={"ids": search_list}, _source=True)["docs"]
        profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"]
        for item in search_result:
            count_s += 1
            if item['found'] and tag in item['_source']['domain']:
                info = ['','','','','','','']
                info[0] = rank
                index = search_result.index(item)

                if profile_result[index]['found']:
                    info[1] = profile_result[index]['_source'].get('photo_url','')
                    info[3] = profile_result[index]['_source'].get('nick_name','')
                info[2] = search_result[index].get('_id','')
                info[4] = user_list[index]['user_index']
                info[5] = search_result[index]['_source'].get('activeness','')
                info[6] = search_result[index]['_source'].get('importance','')

                rank += 1
                return_list.append(info)

                if rank >= int(number)+1:
                   return return_list

        if count_s > 100000:
            return return_list
Пример #27
0
def ajax_get_task_detail_info():
    task_name = request.args.get('task_name','') # task_name
    user = request.args.get('user', 'admin')
    _id = user + "-" + task_name
    task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source']
    task_detail["social_sensors"] = json.loads(task_detail["social_sensors"])
    #task_detail['keywords'] = json.loads(task_detail['keywords'])
    #task_detail["sensitive_words"]= json.loads(task_detail["sensitive_words"])
    history_status = json.loads(task_detail['history_status'])
    if history_status:
        temp_list = []
        """
        temp_list.append(history_status[-1])
        print history_status
        for item in history_status[:-1]:
            temp_list.append(item)
        """
        sorted_list = sorted(history_status, key=lambda x:x, reverse=True)
        task_detail['history_status'] = sorted_list
    else:
        task_detail['history_status'] = []
    task_detail['social_sensors_portrait'] = []
    portrait_detail = []

    if task_detail["social_sensors"]:
        search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": task_detail["social_sensors"]})['docs']
        if search_results:
            for item in search_results:
                temp = []
                if item['found']:
                    for iter_item in SOCIAL_SENSOR_INFO:
                        if iter_item == "topic_string":
                            temp.append(item["_source"][iter_item].split('&'))
                        else:
                            temp.append(item["_source"][iter_item])
                    portrait_detail.append(temp)
        if portrait_detail:
            portrait_detail = sorted(portrait_detail, key=lambda x:x[5], reverse=True)
    task_detail['social_sensors_portrait'] = portrait_detail

    return json.dumps(task_detail)
Пример #28
0
def get_group_member_name(task_name, submit_user):
    results = {}
    task_id = submit_user + '-' + task_name
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        return results
    uid_list = group_result['uid_list']
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\
                body={'ids':uid_list})['docs']
    except:
        return results
    for item in user_portrait_result:
        uid = item['_id']
        if item['found'] == True:
            source = item['_source']
            uname = source['uname']
        else:
            uname = 'unkown'
        results[uid] = uname

    return results
Пример #29
0
def get_group_member_name(task_name, submit_user):
    results = {}
    task_id = submit_user + '-' + task_name
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        return results
    uid_list = group_result['uid_list']
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\
                body={'ids':uid_list})['docs']
    except:
        return results
    for item in user_portrait_result:
        uid = item['_id']
        if item['found'] == True:
            source = item['_source']
            uname = source['uname']
        else:
            uname = 'unkown'
        results[uid] = uname

    return results
Пример #30
0
def group_user_weibo(task_name, submit_user, sort_type):
    weibo_list = []
    now_date = ts2datetime(time.time())
    if sort_type == 'retweet':
        sort_type = 'retweeted'
    #run_type
    if RUN_TYPE == 0:
        now_date = RUN_TEST_TIME
        sort_type = 'timestamp'
    #step1: get group user
    task_id = submit_user + '-' + task_name
    try:
        group_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        group_exist_result = {}
    if not group_exist_result:
        return 'group no exist'
    #step2: get user weibo list
    uid_list = group_exist_result['uid_list']
    for i in range(6, -1, -1):
        iter_date = ts2datetime(datetime2ts(now_date) - i * DAY)
        index_name = flow_text_index_name_pre + iter_date
        try:
            weibo_result = es_flow_text.search(index=index_name, doc_type=flow_text_index_type,\
                    body={'query':{'filtered':{'filter':{'terms':{'uid': uid_list}}}}, 'sort':[{sort_type: {'order': 'desc'}}], 'size':100})['hits']['hits']
        except:
            weibo_result = []
        if weibo_result:
            weibo_list.extend(weibo_result)
    #sort_weibo_list = sorted(weibo_list, key=lambda x:x['_source'][sort_type], reverse=True)[:100]
    sort_weibo_list = weibo_list
    #step3: get user name
    try:
        portrait_exist_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \
                body={'ids':uid_list})['docs']
    except:
        portrait_exist_result = []
    uid2uname_dict = {}
    for portrait_item in portrait_exist_result:
        uid = portrait_item['_id']
        if portrait_item['found'] == True:
            source = portrait_item['_source']
            uname = source['uname']
        else:
            uname = 'unknown'
        uid2uname_dict[uid] = uname
    weibo_list = []
    for weibo_item in sort_weibo_list:
        source = weibo_item['_source']
        mid = source['mid']
        uid = source['uid']
        uname = uid2uname_dict[uid]
        text = source['text']
        ip = source['geo']
        timestamp = source['timestamp']
        date = ts2date(timestamp)
        sentiment = source['sentiment']
        weibo_url = weiboinfo2url(uid, mid)
        #run_type:
        if RUN_TYPE == 1:
            try:
                retweet_count = source['retweeted']
            except:
                retweet_count = 0
            try:
                comment_count = source['comment']
            except:
                comment_count = 0
            try:
                sensitive_score = source['sensitive']
            except:
                sensitive_score = 0
        else:
            retweet_count = 0
            comment_count = 0
            sensitive_score = 0
        city = ip2city(ip)
        weibo_list.append([
            mid, uid, uname, text, ip, city, timestamp, date, retweet_count,
            comment_count, sensitive_score, weibo_url
        ])
    if sort_type == 'timestamp':
        new_weibo_list = sorted(weibo_list, key=lambda x: x[6], reverse=True)
    elif sort_type == 'retweeted':
        new_weibo_list = sorted(weibo_list, key=lambda x: x[8], reverse=True)
    elif sort_type == 'comment':
        new_weibo_list = sorted(weibo_list, key=lambda x: x[9], reverse=True)
    elif sort_type == 'sensitive':
        new_weibo_list = sorted(weibo_list, key=lambda x: x[10], reverse=True)
    return new_weibo_list
Пример #31
0
def get_temporal_rank(task_type, sort="retweeted", number=100):
    number = int(number) - 1
    if int(task_type) == 0:  # 到目前位置
        sort_list = r.zrange("influence_%s" % sort,
                             0,
                             number,
                             withscores=True,
                             desc=True)
    elif int(task_type) == 1:
        sort_list = r.zrange("influence_%s_1" % sort,
                             0,
                             number,
                             withscores=True,
                             desc=True)
    elif int(task_type) == 2:
        sort_list = r.zrange("influence_%s_2" % sort,
                             0,
                             number,
                             withscores=True,
                             desc=True)
    elif int(task_type) == 3:
        sort_list = r.zrange("influence_%s_3" % sort,
                             0,
                             number,
                             withscores=True,
                             desc=True)
    else:
        sort_list = r.zrange("influence_%s_4" % sort,
                             0,
                             number,
                             withscores=True,
                             desc=True)

    uid_list = []
    for item in sort_list:
        uid_list.append(item[0])

    if sort == "retweeted":
        other = "comment"
    else:
        other = "retweeted"

    results = []
    # 查看背景信息
    if uid_list:
        profile_result = es_user_profile.mget(index=profile_index_name,
                                              doc_type=profile_index_type,
                                              body={"ids": uid_list})["docs"]
        bci_result = es_user_profile.mget(
            index="bci_history",
            doc_type="bci",
            body={"ids": uid_list},
            _source=False,
            fields=['user_fansnum', "weibo_month_sum"])["docs"]
        count = 0
        for item in profile_result:
            _id = item['_id']
            index = profile_result.index(item)
            tmp = []
            tmp.append(item['_id'])
            if item['found']:
                item = item['_source']
                tmp.append(item['nick_name'])
                tmp.append(item['statusnum'])
                tmp.append(item['user_location'])
                tmp.append(item['fansnum'])
            else:
                tmp.extend(['', 0, '', 0])
            try:
                user_fansnum = bci_result[count]['fields']['user_fansnum'][0]
                tmp[4] = user_fansnum
            except:
                pass
            try:
                weibo_number = bci_result[count]['fields']["weibo_month_sum"][
                    0]
                tmp[2] = weibo_number
            except:
                pass
            count_1 = int(sort_list[index][1])
            if int(task_type) == 0:
                tmp_count = r.zscore("influence_%s" % other, _id)
                if tmp_count:
                    count_2 = int(tmp_count)
                else:
                    count_2 = 0
            else:
                tmp_count = r.zscore("influence_%s_%s" % (other, task_type),
                                     _id)
                if tmp_count:
                    count_2 = int(tmp_count)
                else:
                    count_2 = 0
            if sort == "retweeted":
                tmp.append(count_1)
                tmp.append(count_2)
            else:
                tmp.append(count_2)
                tmp.append(count_1)
            results.append(tmp)
            count += 1

    if uid_list:
        count = 0
        portrait_result = es_user_portrait.mget(index=portrait_index_name,
                                                doc_type=portrait_index_type,
                                                body={"ids": uid_list})["docs"]
        for item in portrait_result:
            if item['found']:
                results[count].append("1")
            else:
                results[count].append("0")
            count += 1

    return results
Пример #32
0
def search_group_results(task_name, module, submit_user):
    result = {}
    if RUN_TYPE == 0:
        #jln
        #task_id = '媒体'
        #group_index_type='text'
        task_id = submit_user + '-' + task_name
        group_index_type = 'group'
    else:
        task_id = submit_user + '-' + task_name
    #print es_group_result,group_index_name,group_index_type,task_id
    #step1:identify the task_name exist
    try:
        source = es_group_result.get(index=group_index_name, doc_type=group_index_type, \
               id=task_id)['_source']
        print source
    except:
        return 'group task is not exist'
    #step2: identify the task status=1(analysis completed)
    status = source['status']
    if status != 1:
        return 'group task is not completed'
    #step3:get module result
    if module == 'overview':
        result['task_name'] = source['task_name']
        result['submit_date'] = ts2datetime(source['submit_date'])
        result['state'] = source['state']
        result['submit_user'] = source['submit_user']
        result['density_star'] = source['density_star']
        result['activeness_star'] = source['activeness_star']
        result['influence_star'] = source['influence_star']
        result['importance_star'] = source['importance_star']
        #need to delete
        result['tag_vector'] = json.loads(source['tag_vector'])
    elif module == 'basic':
        result['gender'] = json.loads(source['gender'])
        result['verified'] = json.loads(source['verified'])
        result['user_tag'] = json.loads(source['user_tag'])
        result['count'] = source['count']
        result['domain'] = json.loads(source['domain'])
        result['topic'] = json.loads(source['topic'])
    elif module == 'activity':
        result['activity_trend'] = json.loads(source['activity_trend'])
        result['activity_time'] = json.loads(source['activity_time'])
        result['activity_geo_disribution'] = json.loads(
            source['activity_geo_distribution'])
        result['activiy_geo_vary'] = json.loads(source['activity_geo_vary'])
        result['activeness_trend'] = json.loads(source['activeness'])
        result['activeness_his'] = json.loads(source['activeness_his'])
        result['activeness_description'] = source['activeness_description']
        result['online_pattern'] = json.loads(source['online_pattern'])

        #yuanhuiru
        uid_list = source['uid_list']

        user_photo_result = es_user_portrait.mget(index='user_portrait_1222',
                                                  doc_type='user',
                                                  body={'ids': uid_list},
                                                  fields=['photo_url'])['docs']
        influ_value_result = es_user_portrait.mget(index='user_portrait_1222',
                                                   doc_type='user',
                                                   body={'ids': uid_list},
                                                   fields=['influence'
                                                           ])['docs']
        result['photo_url'] = []
        result['influence'] = []
        for item in user_photo_result:
            #uid = item['_id']
            if item['found'] == True:

                source = item['fields']
                photo_url = source['photo_url']
            else:
                photo_url = 'unknown'

            result['photo_url'].append(photo_url)

        #print 'user_photo', result['photo_url']

        for item in influ_value_result:
            #uid = item['_id']
            if item['found'] == True:
                source = item['fields']
                influence = source['influence']
            else:
                influence = 'unknown'

            result['influence'].append(influence)

        #print 'influence', result['influence']

        new_geo = {}
        for uid, geos in result['activity_geo_disribution'].iteritems():
            for geo, count in geos.iteritems():
                geo = geo.split('\t')
                if geo[0] == u'中国':
                    if len(geo) == 1:
                        geo.append(u'未知', u'未知')
                    elif len(geo) == 2:
                        geo.append(u'未知')
                    try:
                        new_geo[geo[1]]['total'] += count
                    except:
                        new_geo[geo[1]] = {'total': count}
                    try:
                        new_geo[geo[1]][geo[2]] += count
                    except:
                        new_geo[geo[1]][geo[2]] = count

        result['new_geo'] = new_geo
        try:
            vary_detail_geo_dict = json.loads(source['vary_detail_geo'])
        except:
            vary_detail_geo_dict = {}

        #uid_list = source['uid_list']

        if vary_detail_geo_dict != {}:
            result['vary_detail_geo'] = get_vary_detail_info(
                vary_detail_geo_dict, uid_list)
        else:
            result['vary_detail_geo'] = {}

        try:
            main_start_geo_dict = json.loads(source['main_start_geo'])
        except:
            main_start_geo_dict = {}
        result['main_start_geo'] = sorted(main_start_geo_dict.items(),
                                          key=lambda x: x[1],
                                          reverse=True)

        try:
            main_end_geo_dict = json.loads(source['main_end_geo'])
        except:
            main_end_geo_dict = {}
        result['main_end_geo'] = sorted(main_end_geo_dict.items(),
                                        key=lambda x: x[1],
                                        reverse=True)
        #all_geo_list = list(set(main_start_geo_dict.keys()) | set(main_end_geo_dict.keys()))
        #result['geo_lat_lng'] = get_lat_lng(all_geo_list)
        print 'result!!!!!!', result
    elif module == 'preference':
        try:
            result['keywords'] = json.loads(source['filter_keyword'])
        except:
            f_keyword = json.loads(source['keywords'])
            key_str = ','.join([key[0] for key in f_keyword])
            filter_dict = get_weibo_single(key_str, n_count=100)
            result['keywords'] = sorted(filter_dict.iteritems(),
                                        key=lambda x: x[1],
                                        reverse=True)
        '''
        keyword_list = json.loads(source['keywords'])
        keyword_dict = dict()
        for item in keyword_list:
            keyword_dict[item[0]] = item[1]

        filter_keyword_dict = keyword_filter(keyword_dict)
        sort_keyword = sorted(filter_keyword_dict.items(), key=lambda x:x[1], reverse=True)
        result['keywords'] = sort_keyword
        '''
        result['hashtag'] = json.loads(source['hashtag'])
        result['sentiment_word'] = json.loads(source['sentiment_word'])
        try:
            result['topic_model'] = json.loads(source['topic_model'])
        except:
            result['topic_model'] = []
        #need to delete
        result['domain'] = json.loads(source['domain'])
        result['topic'] = json.loads(source['topic'])
    elif module == 'influence':
        result['influence_his'] = json.loads(source['influence_his'])
        result['influence_trend'] = json.loads(source['influence'])
        result['influence_in_user'] = json.loads(source['influence_in_user'])
        result['influence_out_user'] = json.loads(source['influence_out_user'])
    elif module == 'social':
        result['in_density'] = source['in_density']
        result['in_inter_user_ratio'] = source['in_inter_user_ratio']
        result['in_inter_weibo_ratio'] = source['in_inter_weibo_ratio']
        result['social_in_record'] = json.loads(source['social_in_record'])
        result['out_inter_user_ratio'] = source['out_inter_user_ratio']
        result['out_inter_weibo_ratio'] = source['out_inter_weibo_ratio']
        result['social_out_record'] = json.loads(source['social_out_record'])
        result['density_description'] = source['density_description']
        result['mention'] = source['mention']
    elif module == 'think':
        result['sentiment_trend'] = json.loads(source['sentiment_trend'])
        result['sentiment_pie'] = json.loads(source['sentiment_pie'])
        result['character'] = json.loads(source['character'])
    return result
Пример #33
0
def portrait_user_vary(es,
                       number,
                       active_index,
                       active_type,
                       portrait_index,
                       portrait_type,
                       field="vary"):

    return_list = []
    index_exist = es.indices.exists(index=active_index)
    if not index_exist:
        return "no active_index exist"
        sys.exit(0)

    count_s = 0
    count_c = 0
    start = 0
    rank = 1
    try:
        while 1:
            search_list = []
            user_list = search_k(es, active_index, active_type, start, field,
                                 100)
            start += 100
            for item in user_list:
                uid = item.get('uid',
                               '0')  # obtain uid, notice "uid" or "user"
                search_list.append(uid)  # uid list
            search_result = es_portrait.mget(index="user_portrait",
                                             doc_type="user",
                                             body={"ids": search_list},
                                             _source=True)["docs"]
            profile_result = es_profile.mget(index="weibo_user",
                                             doc_type="user",
                                             body={"ids": search_list},
                                             _source=True)["docs"]

            for item in search_result:
                count_c += 1
                if item["found"]:
                    info = ['', '', '', '', '', '1']
                    info[0] = rank
                    index = search_result.index(item)

                    if profile_result[index]['found']:
                        info[1] = profile_result[index]['_source'].get(
                            'photo_url', '')
                        info[3] = profile_result[index]['_source'].get(
                            'nick_name', '')
                    info[2] = search_result[index].get('_id', '')
                    info[4] = user_list[index]['vary']
                    return_list.append(info)
                    rank += 1
                    if rank == int(number) + 1:
                        return return_list

            if count_c > 10000:
                break
    except RequestError:
        print "timeout"

    return return_list
Пример #34
0
def search_group_results(task_name, module, submit_user):
    result = {}
    if RUN_TYPE == 0:
        #jln
        #task_id = '媒体'
        #group_index_type='text'
        task_id = submit_user + '-' + task_name
        group_index_type = 'group'
    else:
        task_id = submit_user + '-' + task_name
    #print es_group_result,group_index_name,group_index_type,task_id
    #step1:identify the task_name exist
    try:
        source = es_group_result.get(index=group_index_name, doc_type=group_index_type, \
               id=task_id)['_source']
        print source
    except:
        return 'group task is not exist'
    #step2: identify the task status=1(analysis completed)
    status = source['status']
    if status != 1:
        return 'group task is not completed'
    #step3:get module result
    if module == 'overview':
        result['task_name'] = source['task_name']
        result['submit_date'] = ts2datetime(source['submit_date'])
        result['state'] = source['state']
        result['submit_user'] = source['submit_user']
        result['density_star'] = source['density_star']
        result['activeness_star'] = source['activeness_star']
        result['influence_star'] = source['influence_star']
        result['importance_star'] = source['importance_star']
        #need to delete
        result['tag_vector'] = json.loads(source['tag_vector'])
    elif module == 'basic':
        result['gender'] = json.loads(source['gender'])
        result['verified'] = json.loads(source['verified'])
        result['user_tag'] = json.loads(source['user_tag'])
        result['count'] = source['count']
        result['domain'] = json.loads(source['domain'])
        result['topic'] = json.loads(source['topic'])
    elif module == 'activity':
        result['activity_trend'] = json.loads(source['activity_trend'])
        result['activity_time'] = json.loads(source['activity_time'])
        result['activity_geo_disribution'] = json.loads(source['activity_geo_distribution'])
        result['activiy_geo_vary'] = json.loads(source['activity_geo_vary'])
        result['activeness_trend'] = json.loads(source['activeness'])
        result['activeness_his'] = json.loads(source['activeness_his'])
        result['activeness_description'] = source['activeness_description']
        result['online_pattern'] = json.loads(source['online_pattern'])
        
        #yuanhuiru
        uid_list = source['uid_list']
        
        user_photo_result= es_user_portrait.mget(index='user_portrait_1222', doc_type='user', body={'ids':uid_list}, fields=['photo_url'])['docs']
        influ_value_result= es_user_portrait.mget(index='user_portrait_1222', doc_type='user', body={'ids':uid_list}, fields=['influence'])['docs']
        result['photo_url']=[]
        result['influence']=[]
        for item in user_photo_result:
            #uid = item['_id']
            if item['found']==True:
            
                source = item['fields']
                photo_url = source['photo_url']
            else:
                photo_url = 'unknown'
           
            
            result['photo_url'].append(photo_url)
           
        #print 'user_photo', result['photo_url']
        
        for item in influ_value_result:
            #uid = item['_id']
            if item['found']==True:
                source = item['fields']
                influence = source['influence']
            else:
                influence = 'unknown'

            result['influence'].append(influence)

        #print 'influence', result['influence']
       

        new_geo = {}
        for uid,geos in result['activity_geo_disribution'].iteritems():
            for geo,count in geos.iteritems():
                geo = geo.split('\t')
                if geo[0] == u'中国':
                    if len(geo) == 1:
                        geo.append(u'未知',u'未知')
                    elif len(geo) == 2:
                        geo.append(u'未知')
                    try:
                        new_geo[geo[1]]['total'] += count
                    except:
                        new_geo[geo[1]] = {'total':count}
                    try:
                        new_geo[geo[1]][geo[2]] += count
                    except:
                        new_geo[geo[1]][geo[2]] = count

        result['new_geo'] = new_geo
        try:
            vary_detail_geo_dict = json.loads(source['vary_detail_geo'])
        except:
            vary_detail_geo_dict = {}
        
        #uid_list = source['uid_list']
        
        if vary_detail_geo_dict != {}:
            result['vary_detail_geo'] = get_vary_detail_info(vary_detail_geo_dict, uid_list)
        else:
            result['vary_detail_geo'] = {}

        try:
            main_start_geo_dict = json.loads(source['main_start_geo'])
        except:
            main_start_geo_dict = {}
        result['main_start_geo'] = sorted(main_start_geo_dict.items(), key=lambda x:x[1], reverse=True)

        try:
            main_end_geo_dict = json.loads(source['main_end_geo'])
        except:
            main_end_geo_dict = {}
        result['main_end_geo'] = sorted(main_end_geo_dict.items(), key=lambda x:x[1], reverse=True)
        #all_geo_list = list(set(main_start_geo_dict.keys()) | set(main_end_geo_dict.keys()))
        #result['geo_lat_lng'] = get_lat_lng(all_geo_list)
        print 'result!!!!!!',result
    elif module == 'preference':
        try:
            result['keywords'] = json.loads(source['filter_keyword'])
        except:
            f_keyword = json.loads(source['keywords'])
            key_str = ','.join([key[0] for key in f_keyword])
            filter_dict = get_weibo_single(key_str,n_count=100)
            result['keywords'] = sorted(filter_dict.iteritems(),key=lambda x:x[1],reverse= True)
        '''
        keyword_list = json.loads(source['keywords'])
        keyword_dict = dict()
        for item in keyword_list:
            keyword_dict[item[0]] = item[1]

        filter_keyword_dict = keyword_filter(keyword_dict)
        sort_keyword = sorted(filter_keyword_dict.items(), key=lambda x:x[1], reverse=True)
        result['keywords'] = sort_keyword
        '''
        result['hashtag'] = json.loads(source['hashtag'])
        result['sentiment_word'] = json.loads(source['sentiment_word'])
        try:
            result['topic_model'] = json.loads(source['topic_model'])
        except:
            result['topic_model'] = []
        #need to delete
        result['domain'] = json.loads(source['domain'])
        result['topic'] = json.loads(source['topic'])
    elif module == 'influence':
        result['influence_his'] = json.loads(source['influence_his'])
        result['influence_trend'] = json.loads(source['influence'])
        result['influence_in_user'] = json.loads(source['influence_in_user'])
        result['influence_out_user'] = json.loads(source['influence_out_user'])
    elif module == 'social':
        result['in_density'] = source['in_density']
        result['in_inter_user_ratio'] = source['in_inter_user_ratio']
        result['in_inter_weibo_ratio'] = source['in_inter_weibo_ratio']
        result['social_in_record'] = json.loads(source['social_in_record'])
        result['out_inter_user_ratio'] = source['out_inter_user_ratio']
        result['out_inter_weibo_ratio'] = source['out_inter_weibo_ratio']
        result['social_out_record'] = json.loads(source['social_out_record'])
        result['density_description'] = source['density_description']
        result['mention'] = source['mention']
    elif module == 'think':
        result['sentiment_trend'] = json.loads(source['sentiment_trend'])
        result['sentiment_pie'] = json.loads(source['sentiment_pie'])
        result['character'] = json.loads(source['character'])
    return result
Пример #35
0
def influenced_people(uid, mid, influence_style, date, default_number=20):
# uid 
# which weibo----mid, retweeted weibo ---seek for root_mid
# influence_style: retweeted(0) or comment(1)
    date1 = ts2datetime(datetime2ts(date)).replace('-', '')
    index_name = pre_index + date1
    index_flow_text = pre_text_index + date
    text_result = es.get(index=index_flow_text, doc_type=flow_text_index_type, id=mid)["_source"]
    temp_mid = text_result.get("root_mid",'') #判断微博是否是原创微博
    if temp_mid:
        mid_type = 1 # 非原创微博
    else:
        mid_type = 0 # 原创微博
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                        ]
                    }
                }
            }
        },
        "size": 30000
    }

    if mid_type == 0:
        if int(influence_style) == 0: # origin weibo, all retweeted people
            query_body["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term": {"root_uid": uid}}, {"term": {"message_type": 3}}, {"term": {"root_mid": mid}}])
        else: # commented people
            query_body["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term": {"directed_uid": uid}}, {"term": {"message_type": 2}}, {"term": {"root_mid": mid}}])
    else:
        if int(influence_style) == 0: # origin weibo, all retweeted people
            query_body["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term": {"directed_uid": uid}}, {"term": {"message_type": 3}}, {"term": {"root_mid": temp_mid}}])
        else: # commented people
            query_body["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term": {"directed_uid": uid}}, {"term": {"message_type": 2}}, {"term": {"root_mid": temp_mid}}])
    search_results = es.search(index=index_flow_text, doc_type=flow_text_index_type, body=query_body, fields=["uid"], timeout=30)["hits"]["hits"]
    results = [] # uid_list
    if search_results:
        for item in search_results:
            if int(item["fields"]["uid"][0]) == int(uid):
                pass
            else:
                results.append(item["fields"]["uid"][0])
        results = list(set(results))
    else:
        results = []

    bci_index = "bci_" + date.replace('-','')

    if results:
        portrait_results = es_user_portrait.mget(index=user_portrait, doc_type=portrait_index_type, body={"ids": results}, fields=["domain", "topic_string", "activity_geo_dict","importance", "influence"])["docs"]
        bci_results = es_user_portrait.mget(index=bci_index, doc_type='bci', body={"ids":results}, fields=['user_index'])['docs']
    else:
        portrait_results = {}
        bci_results = {}


    in_portrait = []
    out_portrait = []
    in_portrait_info = []
    retweeted_domain = {}
    retweeted_topic = {}
    retweeted_geo = {}
    average_influence = 0
    total_influence = 0
    count = 0

    if bci_results:
        total_influence = 0
        for item in bci_results:
            if item['found']:
                total_influence += item['fields']['user_index'][0]
    try:
        average_influence = total_influence/len(results)
    except:
        average_influence = 0

    if portrait_results:
        for item in portrait_results:
            if item["found"]:
                temp = []
                count += 1
                temp.append(item['_id'])
                temp.append(item["fields"]["importance"][0])
                in_portrait.append(temp)
                temp_domain = item["fields"]["domain"][0].split('&')
                temp_topic = item["fields"]["topic_string"][0].split('&')
                temp_geo = json.loads(item["fields"]["activity_geo_dict"][0])[-1].keys()
                #total_influence += item["fields"]["influence"][0]
                retweeted_domain = aggregation(temp_domain, retweeted_domain)
                retweeted_topic = aggregation(temp_topic, retweeted_topic)
                retweeted_geo = aggregation(temp_geo, retweeted_geo)
            else:
                out_portrait.append(item['_id'])
        retweeted_domain = proportion(retweeted_domain)
        retweeted_topic = proportion(retweeted_topic)
        retweeted_geo = proportion(retweeted_geo)
        #try:
        #    average_influence = total_influence/count
        #except:
        #    average_influence = 0
    sorted_retweeted_domain = sorted(retweeted_domain.items(),key=lambda x:x[1], reverse=True)
    sorted_retweeted_topic = sorted(retweeted_topic.items(),key=lambda x:x[1], reverse=True)
    sorted_retweeted_geo = sorted(retweeted_geo.items(), key=lambda x:x[1], reverse=True)

    retweeted_results = dict()
    retweeted_results["domian"] = sorted_retweeted_domain[:5]
    retweeted_results["topic"] = sorted_retweeted_topic[:5]
    retweeted_results["geo"] = sorted_retweeted_geo[:5]
    retweeted_results["influence"] = average_influence
    in_portrait = sorted(in_portrait, key=lambda x:x[1], reverse=True)


    temp_list = []
    for item in in_portrait:
        temp_list.append(item[0])
    retweeted_results['in_portrait_number'] = len(temp_list)
    retweeted_results['out_portrait_number'] = len(out_portrait)
    in_portrait_url = get_user_url(temp_list[:default_number])
    out_portrait_url = get_user_url(out_portrait[:default_number])

    return_results = dict()
    return_results["influence_users"] = [in_portrait_url, out_portrait_url]
    return_results["influence_distribution"] = retweeted_results

    return return_results
Пример #36
0
def influenced_user_detail(uid, date, origin_retweeted_mid, retweeted_retweeted_mid, message_type, default_number=20):
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must": [
                        ]
                    }
                }
            }
        },
        "size":100000,
        "sort":{"user_fansnum":{"order":"desc"}}
    }
    #详细影响到的人 
    date1 = str(date).replace('-', '')
    index_name = pre_index + date1
    index_flow_text = pre_text_index + date
    origin_retweeted_uid = [] # influenced user uid_list
    retweeted_retweeted_uid = []
    origin_comment_uid = []
    retweeted_comment_uid = []
    query_origin = copy.deepcopy(query_body)
    query_retweeted = copy.deepcopy(query_body)
    if origin_retweeted_mid: # 所有转发该条原创微博的用户
        query_origin["query"]["filtered"]["filter"]["bool"]["must"].append({"terms": {"root_mid": origin_retweeted_mid}})
        query_origin["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term":{"message_type": message_type}}, {"term":{"root_uid": uid}}])
        origin_retweeted_result = es.search(index=index_flow_text, doc_type=flow_text_index_type, body=query_origin, fields=["uid"])["hits"]["hits"]
        if origin_retweeted_result:
            for item in origin_retweeted_result:
                origin_retweeted_uid.append(item["fields"]["uid"][0])
    if retweeted_retweeted_mid: # 所有评论该条原创微博的用户
        query_retweeted["query"]["filtered"]["filter"]["bool"]["must"].append({"terms": {"root_mid": retweeted_retweeted_mid}})
        query_retweeted["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term":{"message_type": message_type}},{"term": {"directed_uid": uid}}])
        retweeted_retweeted_result = es.search(index=index_flow_text, doc_type=flow_text_index_type, body=query_retweeted, fields=["uid"])["hits"]["hits"]
        if retweeted_retweeted_result:
            for item in retweeted_retweeted_result:
                retweeted_retweeted_uid.append(item["fields"]["uid"][0])
    retweeted_uid_list = [] # all retweeted user list
    retweeted_results = {} # statistics of all retweeted uid information
    retweeted_domain = {}
    retweeted_topic = {}
    retweeted_geo = {}
    bci_results = {}
    in_portrait = []
    out_portrait = []
    average_influence = 0
    total_influence = 0
    count = 0
    all_uid_set = set(origin_retweeted_uid) | set(retweeted_retweeted_uid)

    retweeted_uid_list.extend(origin_retweeted_uid)
    retweeted_uid_list.extend(retweeted_retweeted_uid)
    retweeted_uid_list = list(set(retweeted_uid_list) - set([uid])) # filter uids
    if retweeted_uid_list:
        user_portrait_result = es_user_portrait.mget(index=user_portrait, doc_type=portrait_index_type, body={"ids": retweeted_uid_list}, fields=["domain", "topic_string", "activity_geo_dict","importance", "influence"])["docs"]
        bci_index = "bci_" + date.replace('-', '')
        bci_results = es_cluster.mget(index=bci_index, doc_type="bci", body={"ids":retweeted_uid_list}, fields=['user_index'])["docs"]
        for item in user_portrait_result:
            if item["found"]:
                temp = []
                count += 1
                temp.append(item['_id'])
                temp.append(item["fields"]["importance"][0])
                in_portrait.append(temp)
                temp_domain = item["fields"]["domain"][0].split('&')
                temp_topic = item["fields"]["topic_string"][0].split('&')
                temp_geo = json.loads(item["fields"]["activity_geo_dict"][0])[-1].keys()
                #total_influence += item["fields"]["influence"][0]
                retweeted_domain = aggregation(temp_domain, retweeted_domain)
                retweeted_topic = aggregation(temp_topic, retweeted_topic)
                retweeted_geo = aggregation(temp_geo, retweeted_geo)
            else:
                out_portrait.append(item['_id'])
        retweeted_domain = proportion(retweeted_domain)
        retweeted_topic = proportion(retweeted_topic)
        retweeted_geo = proportion(retweeted_geo)


    if bci_results:
        total_influence = 0
        for item in bci_results:
            if item['found']:
                total_influence += item['fields']['user_index'][0]
    try:
        average_influence = total_influence/len(retweeted_uid_list)
    except:
        average_influence = 0

    sorted_retweeted_domain = sorted(retweeted_domain.items(),key=lambda x:x[1], reverse=True)
    sorted_retweeted_topic = sorted(retweeted_topic.items(),key=lambda x:x[1], reverse=True)
    sorted_retweeted_geo = sorted(retweeted_geo.items(), key=lambda x:x[1], reverse=True)
    retweeted_results["domian"] = sorted_retweeted_domain[:5]
    retweeted_results["topic"] = sorted_retweeted_topic[:5]
    retweeted_results["geo"] = sorted_retweeted_geo[:5]
    retweeted_results["influence"] = average_influence
    in_portrait = sorted(in_portrait, key=lambda x:x[1], reverse=True)

    temp_list = []
    for item in in_portrait:
        temp_list.append(item[0])
    retweeted_results['in_portrait_number'] = len(temp_list)
    retweeted_results['out_portrait_number'] = len(out_portrait)
    in_portrait_url = get_user_url(temp_list[:default_number])
    out_portrait_url = get_user_url(out_portrait[:default_number])
    retweeted_results["in_portrait"] = in_portrait_url
    retweeted_results["out_portrait"] = out_portrait_url
    retweeted_results["total_number"] = len(temp_list) + len(out_portrait)
 

    return retweeted_results
Пример #37
0
def group_user_weibo(task_name, submit_user, sort_type):
    weibo_list = []
    now_date = ts2datetime(time.time())
    if sort_type == 'retweet':
        sort_type = 'retweeted'
    #run_type
    if RUN_TYPE == 0:
        now_date = RUN_TEST_TIME
        sort_type = 'timestamp'
    #step1: get group user
    task_id = submit_user + '-' + task_name
    try:
        group_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        group_exist_result = {}
    if not group_exist_result:
        return 'group no exist'
    #step2: get user weibo list
    uid_list = group_exist_result['uid_list']
    for i in range(6,-1,-1):
        iter_date = ts2datetime(datetime2ts(now_date) - i * DAY)
        index_name = flow_text_index_name_pre + iter_date
        try:
            weibo_result = es_flow_text.search(index=index_name, doc_type=flow_text_index_type,\
                    body={'query':{'filtered':{'filter':{'terms':{'uid': uid_list}}}}, 'sort':[{sort_type: {'order': 'desc'}}], 'size':100})['hits']['hits']
        except:
            weibo_result = []
        if weibo_result:
            weibo_list.extend(weibo_result)
    #sort_weibo_list = sorted(weibo_list, key=lambda x:x['_source'][sort_type], reverse=True)[:100]
    sort_weibo_list = weibo_list
    #step3: get user name
    try:
        portrait_exist_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \
                body={'ids':uid_list})['docs']
    except:
        portrait_exist_result = []
    uid2uname_dict = {}
    for portrait_item in portrait_exist_result:
        uid = portrait_item['_id']
        if portrait_item['found'] == True:
            source = portrait_item['_source']
            uname = source['uname']
        else:
            uname = 'unknown'
        uid2uname_dict[uid] = uname
    weibo_list = []
    for weibo_item in sort_weibo_list:
        source = weibo_item['_source']
        mid = source['mid']
        uid = source['uid']
        uname = uid2uname_dict[uid]
        text = source['text']
        ip = source['geo']
        timestamp = source['timestamp']
        date = ts2date(timestamp)
        sentiment = source['sentiment']
        weibo_url = weiboinfo2url(uid, mid)
        #run_type:
        if RUN_TYPE == 1:
            try:
                retweet_count = source['retweeted']
            except:
                retweet_count = 0
            try:
                comment_count = source['comment']
            except:
                comment_count = 0
            try:
                sensitive_score = source['sensitive']
            except:
                sensitive_score = 0
        else:
            retweet_count = 0
            comment_count = 0
            sensitive_score = 0
        city = ip2city(ip)
        weibo_list.append([mid, uid, uname, text, ip, city, timestamp, date, retweet_count, comment_count, sensitive_score, weibo_url])
    if sort_type == 'timestamp':
        new_weibo_list = sorted(weibo_list, key=lambda x:x[6], reverse=True)
    elif sort_type == 'retweeted':
        new_weibo_list = sorted(weibo_list, key=lambda x:x[8], reverse=True)
    elif sort_type == 'comment':
        new_weibo_list = sorted(weibo_list, key=lambda x:x[9], reverse=True)
    elif sort_type == 'sensitive':
        new_weibo_list = sorted(weibo_list, key=lambda x:x[10], reverse=True)
    return new_weibo_list
Пример #38
0
def search_group_sentiment_weibo(task_name, start_ts, sentiment, submit_user):
    weibo_list = []
    task_id = submit_user + '-' + task_name
    #print es_group_result,group_index_name,group_index_type
    #step1:get task_name uid
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                        id=task_id, _source=False, fields=['uid_list'])
    except:
        group_result = {}
    if group_result == {}:
        return 'task name invalid'
    try:
        uid_list = group_result['fields']['uid_list']
    except:
        uid_list = []
    if uid_list == []:
        return 'task uid list null'
    #step3: get ui2uname
    uid2uname = {}
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                        body={'ids':uid_list}, _source=False, fields=['uname'])['docs']
    except:
        user_portrait_result = []
    for item in user_portrait_result:
        uid = item['_id']
        if item['found']==True:
            uname = item['fields']['uname'][0]
            uid2uname[uid] = uname
        else:
            uid2uname[uid] = 'unknown'
    #step4:iter date to search weibo
    weibo_list = []
    iter_date = ts2datetime(start_ts)
    flow_text_index_name = flow_text_index_name_pre + str(iter_date)
    #step4: get query_body
    if sentiment != '2':
        query_body = [{'terms': {'uid': uid_list}}, {'term':{'sentiment': sentiment}}, \
                {'range':{'timestamp':{'gte':start_ts, 'lt': start_ts+DAY}}}]
    else:
        query_body = [{'terms':{'uid':uid_list}}, {'terms':{'sentiment': SENTIMENT_SECOND}},\
                {'range':{'timestamp':{'gte':start_ts, 'lt':start_ts+DAY}}}]
    try:
        flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                body={'query':{'bool':{'must': query_body}}, 'sort': [{'timestamp':{'order':'asc'}}], 'size': MAX_VALUE})['hits']['hits']
    except:
        flow_text_result = []
    for flow_text_item in flow_text_result:
        source = flow_text_item['_source']
        weibo = {}
        weibo['uid'] = source['uid']
        weibo['uname'] = uid2uname[weibo['uid']]
        weibo['ip'] = source['ip']
        try:
            weibo['geo'] = '\t'.join(source['geo'].split('&'))
        except:
            weibo['geo'] = ''
        weibo['text'] = source['text']
        weibo['timestamp'] = source['timestamp']
        weibo['sentiment'] = source['sentiment']
        weibo_list.append(weibo)

    return weibo_list
Пример #39
0
def get_task_detail_2(task_name, ts, user):
    results = dict()
    index_name = task_name
    _id = user + "-" + task_name
    task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)["_source"]
    task_name = task_detail['task_name']
    social_sensors = json.loads(task_detail['social_sensors'])
    history_status = json.loads(task_detail['history_status'])
    start_time = task_detail['create_at']
    create_by = task_detail['create_by']
    stop_time = task_detail['stop_time']
    remark = task_detail.get('remark', '')
    portrait_detail = []
    count = 0 # 计数

    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")
    top_importance = get_top_influence("importance")

    if social_sensors:
        search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":social_sensors}, fields=SOCIAL_SENSOR_INFO)['docs']
        for item in search_results:
            temp = []
            if item['found']:
                for iter_item in SOCIAL_SENSOR_INFO:
                    if iter_item == "topic_string":
                        temp.append(item["fields"][iter_item][0].split('&'))
                    elif iter_item == "activeness":
                        temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100)
                    elif iter_item == "importance":
                        temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100)
                    elif iter_item == "influence":
                        temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100)
                    else:
                        temp.append(item["fields"][iter_item][0])
                portrait_detail.append(temp)
        portrait_detail = sorted(portrait_detail, key=lambda x:x[5], reverse=True)

    time_series = [] # 时间
    #positive_sentiment_list = [] # 情绪列表
    #neutral_sentiment_list = []
    #negetive_sentiment_list = []
    all_weibo_list = []
    origin_weibo_list = [] # 微博列表
    retweeted_weibo_list = []
    #retweeted_weibo_count = [] # 别人转发他的数量
    #comment_weibo_count = []
    #total_number_count = []
    #burst_time_list = [] # 爆发时间列表
    important_user_set = set() # 重要人物列表
    out_portrait_users = set() # 未入库

    ts = int(ts)
    time_series = history_status
    #for item in history_status:
    #    if int(item[0]) <= ts:
    #        time_series.append(item[0]) # 到目前为止的所有的时间戳

    # get detail task information from es
    if time_series:
        flow_detail = es.mget(index=index_sensing_task, doc_type=_id, body={"ids": time_series})['docs']
    else:
        flow_detail = {}
    if flow_detail:
        for item in flow_detail:
            item = item['_source']
            timestamp = item['timestamp']
            #sentiment_distribution = json.loads(item["sentiment_distribution"])
            #positive_sentiment_list.append(int(sentiment_distribution['1']))
            #negetive_sentiment_list.append(int(sentiment_distribution['2'])+int(sentiment_distribution['3']) \
            #        +int(sentiment_distribution['4'])+int(sentiment_distribution['5'])+int(sentiment_distribution['6']))
            #neutral_sentiment_list.append(int(sentiment_distribution['0']))
            origin_weibo_list.append(item["origin_weibo_number"]) # real
            retweeted_weibo_list.append(item['retweeted_weibo_number']) # real
            all_weibo_list.append(item["origin_weibo_number"]+item['retweeted_weibo_number'])
            #retweeted_weibo_count.append(item['retweeted_weibo_count'])
            #comment_weibo_count.append(item['comment_weibo_count'])
            #total_number_count.append(item['weibo_total_number'])
            temp_important_user_list = json.loads(item['important_users'])
            unfiltered_users = json.loads(item['unfilter_users'])
            temp_out_portrait_users = set(unfiltered_users) - set(temp_important_user_list) # 未入库
            important_user_set = important_user_set | set(temp_important_user_list)
            out_portrait_users = out_portrait_users | set(temp_out_portrait_users)

            #burst_reason = item.get("burst_reason", "")
            #if burst_reason:
            #    burst_time_list.append([timestamp, count, burst_reason])
            count += 1

    ####################################################################################
    # 统计爆发原因,下相应的结论
    """
    weibo_variation_count = 0
    weibo_variation_time = []
    sentiment_variation_count = 0
    sentiment_variation_time = []
    sensitive_variation_count = 0 # sensitive
    sensitive_variation_time = [] # sensitive
    common_variation_count = 0
    common_variation_time = []
    if burst_time_list:
        for item in burst_time_list:
            tmp_common = 0
            x1 = 0
            x2 = 0
            x3 = 0
            if signal_count_varition in item[2]:
                weibo_variation_count += 1
                weibo_variation_time.append([ts2date_min(item[0]), total_number_count[item[1]]])
                x1 = total_number_count[item[1]]
                tmp_common += 1
            if signal_sentiment_varition in item[2]:
                tmp_common += 1
                sentiment_variation_count += 1
                x2 = negetive_sentiment_list[item[1]]
                sentiment_variation_time.append([ts2date_min(item[0]), negetive_sentiment_list[item[1]]])
            if signal_sensitive_variation in item[2]:
                tmp_common += 1
                sensitive_variation_count += 1
                x3 = sensitive_total_number_list[item[1]]
                sensitive_variation_time.append([ts2date_min(item[0]), all_weibo_list[item[1]]])
            if tmp_common >= 2:
                common_variation_count += 1
                common_variation_time.append([ts2date_min(item[0]), x1, x2, x3])

    warning_conclusion = remark
    variation_distribution = []
    if weibo_variation_count:
        variation_distribution.append(weibo_variation_time)
    else:
        variation_distribution.append([])

    if sentiment_variation_count:
        variation_distribution.append(sentiment_variation_time)
    else:
        variation_distribution.append([])

    if sensitive_variation_count:
        variation_distribution.append(sensitive_variation_time)
    else:
        variation_distribution.append([])

    if common_variation_count:
        variation_distribution.append(common_variation_time)
    else:
        variation_distribution.append([])

    results['warning_conclusion'] = warning_conclusion
    results['variation_distribution'] = variation_distribution

    # 每个用户的热度
    """

    # 获取重要用户的个人信息
    important_uid_list = list(important_user_set)
    out_portrait_users_list = list(out_portrait_users)
    social_sensor_set = set(social_sensors)
    user_detail_info = [] #
    out_user_detail_info = []
    if important_uid_list:
        user_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":important_uid_list}, fields=['uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence', 'activeness'])['docs']
        for item in user_results:
            if item['found']:
                temp = []
                #if int(item['fields']['importance'][0]) < IMPORTANT_USER_THRESHOULD:
                #    continue
                temp.append(item['fields']['uid'][0])
                uname = item['fields']['uname'][0]
                if not uname or uname == "未知":
                    uname = item['fields']['uid'][0]
                temp.append(uname)
                temp.append(item['fields']['photo_url'][0])
                temp.append(item['fields']['domain'][0])
                temp.append(item['fields']['topic_string'][0].split('&'))
                #hot_count = count_hot_uid(item['fields']['uid'][0], start_time, stop_time)
                #temp.append(hot_count)
                temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100)
                temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100)
                temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100)
                if item['fields']['uid'][0] in social_sensor_set:
                    temp.append(1)
                else:
                    temp.append(0)
                user_detail_info.append(temp)
    # 排序
    if user_detail_info:
        user_detail_info = sorted(user_detail_info, key=lambda x:x[6], reverse=True)
    else:
        user_detail_info = []

    if out_portrait_users_list:
        profile_results = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":out_portrait_users_list})["docs"]
        bci_index = "bci_" + ts2datetime(ts-DAY).replace('-','')
        influence_results = es.mget(index=bci_index, doc_type="bci", body={"ids":out_portrait_users_list}, fields=["user_index"])['docs']
        bci_results = es_profile.mget(index="bci_history", doc_type="bci", body={"ids":out_portrait_users_list}, fields=['user_fansnum'])['docs']
        top_influence = get_top_all_influence("user_index", ts)
        count = 0
        if profile_results:
            for item in profile_results:
                temp = []
                if item['found']:
                    temp.append(item['_source']['uid'])
                    if item['_source']['nick_name']:
                        temp.append(item['_source']['nick_name'])
                    else:
                        temp.append(item['_source']['uid'])
                    temp.append(item['_source']['user_location'])
                    #temp.append(item['_source']['fansnum'])
                else:
                    temp.append(item['_id'])
                    temp.append(item['_id'])
                    temp.extend([''])
                try:
                    user_fansnum = bci_results[count]["fields"]["user_fansnum"][0]
                except:
                    user_fansnum = 0
                temp.append(user_fansnum)
                temp_influ = influence_results[count]
                if temp_influ.get('found', 0):
                    user_index = temp_influ['fields']['user_index'][0]
                    temp.append(math.log(user_index/float(top_influence)*9+1, 10)*100)
                else:
                    temp.append(0)
                count += 1
                out_user_detail_info.append(temp)
    print len(out_user_detail_info)
    if len(out_user_detail_info):
        print "sort"
        out_user_detail_info = sorted(out_user_detail_info, key=lambda x:x[4], reverse=True)


    revise_time_series = []
    for item in time_series:
        revise_time_series.append(ts2date_min(item))

    results['important_user_detail'] = user_detail_info
    results['out_portrait_user_detail'] = out_user_detail_info
    #results['burst_time'] = burst_time_list # 爆发时间点,以及爆发原因
    results['time_series'] = revise_time_series
    #results['positive_sentiment_list'] = positive_sentiment_list
    #esults['negetive_sentiment_list'] = negetive_sentiment_list
    #results['neutral_sentiment_list'] = neutral_sentiment_list
    results['all_weibo_list'] = all_weibo_list
    results['origin_weibo_list'] = origin_weibo_list
    results['retweeted_weibo_list'] = retweeted_weibo_list
    #results['comment_weibo_count'] = comment_weibo_count
    #results['retweeted_weibo_count'] = retweeted_weibo_count
    #results['total_number_list'] = total_number_count
    results['social_sensors_detail'] = portrait_detail

    return results
Пример #40
0
def search_group_sentiment_weibo(task_name, start_ts, sentiment, submit_user):
    weibo_list = []
    task_id = submit_user + '-' + task_name
    #step1:get task_name uid
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                        id=task_id, _source=False, fields=['uid_list'])
    except:
        group_result = {}
    if group_result == {}:
        return 'task name invalid'
    try:
        uid_list = group_result['fields']['uid_list']
    except:
        uid_list = []
    if uid_list == []:
        return 'task uid list null'
    #step3: get ui2uname
    uid2uname = {}
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                        body={'ids':uid_list}, _source=False, fields=['uname'])['docs']
    except:
        user_portrait_result = []
    for item in user_portrait_result:
        uid = item['_id']
        if item['found'] == True:
            uname = item['fields']['uname'][0]
            uid2uname[uid] = uname
    #step4:iter date to search weibo
    weibo_list = []
    iter_date = ts2datetime(start_ts)
    flow_text_index_name = flow_text_index_name_pre + str(iter_date)
    #step4: get query_body
    if sentiment != '2':
        query_body = [{'terms': {'uid': uid_list}}, {'term':{'sentiment': sentiment}}, \
                {'range':{'timestamp':{'gte':start_ts, 'lt': start_ts+DAY}}}]
    else:
        query_body = [{'terms':{'uid':uid_list}}, {'terms':{'sentiment': SENTIMENT_SECOND}},\
                {'range':{'timestamp':{'gte':start_ts, 'lt':start_ts+DAY}}}]
    try:
        flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                body={'query':{'bool':{'must': query_body}}, 'sort': [{'timestamp':{'order':'asc'}}], 'size': MAX_VALUE})['hits']['hits']
    except:
        flow_text_result = []
    for flow_text_item in flow_text_result:
        source = flow_text_item['_source']
        weibo = {}
        weibo['uid'] = source['uid']
        weibo['uname'] = uid2uname[weibo['uid']]
        weibo['ip'] = source['ip']
        try:
            weibo['geo'] = '\t'.join(source['geo'].split('&'))
        except:
            weibo['geo'] = ''
        weibo['text'] = source['text']
        weibo['timestamp'] = source['timestamp']
        weibo['sentiment'] = source['sentiment']
        weibo_list.append(weibo)

    return weibo_list
Пример #41
0
def get_social_inter_content(uid1, uid2, type_mark):
    weibo_list = []
    #get two type relation about uid1 and uid2
    #search weibo list
    now_ts = int(time.time())
    #run_type
    if RUN_TYPE == 1:
        now_date_ts = datetime2ts(ts2datetime(now_ts))
    else:
        now_date_ts = datetime2ts(RUN_TEST_TIME)
    #uid2uname
    uid2uname = {}
    try:
        portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\
                                body={'ids': [uid1, uid2]}, _source=False, fields=['uid', 'uname'])['docs']
    except:
        portrait_result = []

    for item in portrait_result:
        uid = item['_id']
        if item['found'] == True:
            uname = item['fields']['uname'][0]
            uid2uname[uid] = uname
        else:
            uid2uname[uid] = 'unknown'
    #iter date to search weibo list
    for i in range(7, 0, -1):
        iter_date_ts = now_date_ts - i * DAY
        iter_date = ts2datetime(iter_date_ts)
        flow_text_index_name = flow_text_index_name_pre + str(iter_date)
        query = []
        query.append({
            'bool': {
                'must': [{
                    'term': {
                        'uid': uid1
                    }
                }, {
                    'term': {
                        'directed_uid': int(uid2)
                    }
                }]
            }
        })
        if type_mark == 'out':
            query.append({
                'bool': {
                    'must': [{
                        'term': {
                            'uid': uid2
                        }
                    }, {
                        'term': {
                            'directed_uid': int(uid1)
                        }
                    }]
                }
            })
        try:
            flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body={'query': {'bool':{'should': query}}, 'sort':[{'timestamp':{'order': 'asc'}}], 'size':MAX_VALUE})['hits']['hits']
        except:
            flow_text_result = []
        for flow_text in flow_text_result:
            source = flow_text['_source']
            weibo = {}
            weibo['timestamp'] = source['timestamp']
            weibo['ip'] = source['ip']
            weibo['geo'] = source['geo']
            weibo['text'] = '\t'.join(source['text'].split('&'))
            weibo['uid'] = source['uid']
            weibo['uname'] = uid2uname[weibo['uid']]
            weibo['directed_uid'] = str(source['directed_uid'])
            weibo['directed_uname'] = uid2uname[str(source['directed_uid'])]
            weibo_list.append(weibo)

    return weibo_list