Пример #1
0
def get_group_user_track(uid):
    results = []
    #step1:get user_portrait activity_geo_dict
    try:
        portrait_result = es.get(index=portrait_index_name, doc_type=portrait_index_type,\
                id=uid, _source=False, fields=['activity_geo_dict'])
    except:
        portrait_result = {}
    if portrait_result == {}:
        return 'uid is not in user_portrait'
    activity_geo_dict = json.loads(portrait_result['fields']['activity_geo_dict'][0])
    now_date_ts = datetime2ts(ts2datetime(int(time.time())))
    start_ts = now_date_ts - DAY * len(activity_geo_dict)
    #step2: iter date to get month track
    for geo_item in activity_geo_dict:
        iter_date = ts2datetime(start_ts)
        sort_day_dict = sorted(geo_item.items(), key=lambda x:x[1], reverse=True)
        if sort_day_dict:
            results.append([iter_date, sort_day_dict[0][0]])
        else:
            results.append([iter_date, ''])
        start_ts = start_ts + DAY
    # results= [['2017-03-31', ''], ['2017-04-01', u'\u4e2d\u56fd\t\u56db\u5ddd\t2\u6210\u90fd'], ['2017-04-02', u'\u4e2d\u56fd\t\u56db\u5ddd\t3\u6210\u90fd'], ['2017-04-03', u'\u4e2d\u56fd\t\u56db\u5ddd\t7\u6210\u90fd'], ['2017-04-04', u'\u4e2d\u56fd\t\u56db\u5ddd\t\u6210\u90fd'], ['2017-04-05', u'\u4e2d\u56fd\t\u56db\u5ddd\t\u6210\u90fd'], ['2017-04-06', u'\u4e2d\u56fd\t\u56db\u5ddd\t\u6210\u90fd']]
    geolist = []
    line_list = []
    index_city = 0
    for i in results:
        if i[1] and i[1].split('\t')[0] == u'中国':
            geolist.append(i[1])
    geolist = [i for i in set(geolist)]
    for x in range(len(results)-1):
        if results[x][1] != '' and results[x+1][1]!='' and results[x][1].split('\t')[0] == u'中国' and results[x+1][1].split('\t')[0] == u'中国':
            if results[x][1] !=  results[x+1][1]:
                line_list.append([results[x][1], results[x+1][1]])
    return {'city':geolist, 'line':line_list}
Пример #2
0
def get_group_user_track(uid):
    results = []
    #step1:get user_portrait activity_geo_dict
    try:
        portrait_result = es.get(index=portrait_index_name, doc_type=portrait_index_type,\
                id=uid, _source=False, fields=['activity_geo_dict'])
    except:
        portrait_result = {}
    if portrait_result == {}:
        return 'uid is not in user_portrait'
    activity_geo_dict = json.loads(
        portrait_result['fields']['activity_geo_dict'][0])
    now_date_ts = datetime2ts(ts2datetime(int(time.time())))
    start_ts = now_date_ts - DAY * len(activity_geo_dict)
    #step2: iter date to get month track
    for geo_item in activity_geo_dict:
        iter_date = ts2datetime(start_ts)
        sort_day_dict = sorted(geo_item.items(),
                               key=lambda x: x[1],
                               reverse=True)
        if sort_day_dict:
            results.append([iter_date, sort_day_dict[0][0]])
        else:
            results.append([iter_date, ''])
        start_ts = start_ts + DAY

    return results
def get_people_org_track(activity_geo_dict):#根据用户地理位置计算轨迹

    results = []
    now_date_ts = datetime2ts(ts2datetime(int(time.time())))
    start_ts = now_date_ts - DAY * len(activity_geo_dict)
    #step2: iter date to get month track
    for geo_item in activity_geo_dict:
        iter_date = ts2datetime(start_ts)
        sort_day_dict = sorted(geo_item.items(), key=lambda x:x[1], reverse=True)
        if sort_day_dict:
            results.append([iter_date, sort_day_dict[0][0]])
        else:
            results.append([iter_date, ''])
        start_ts = start_ts + DAY

    geolist = []
    line_list = []
    index_city = 0
    for i in results:
        if i[1] and i[1].split('\t')[0] == u'中国':
            geolist.append(i[1])
    geolist = [i for i in set(geolist)]
    for x in range(len(results)-1):
        if results[x][1] != '' and results[x+1][1]!='' and results[x][1].split('\t')[0] == u'中国' and results[x+1][1].split('\t')[0] == u'中国':
            if results[x][1] !=  results[x+1][1]:
                line_list.append([results[x][1], results[x+1][1]])
    return {'city':geolist, 'line':line_list}
Пример #4
0
def get_user_detail(date, input_result):
    bci_date = ts2datetime(datetime2ts(date) - DAY)
    results = []
    uid_list = input_result
    if date!='all':
        index_name = 'bci_' + ''.join(bci_date.split('-'))
    else:
        now_ts = time.time()
        now_date = ts2datetime(now_ts)
        index_name = 'bci_' + ''.join(now_date.split('-'))
    index_type = 'bci'
    user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs']  #INFLUENCE,fans,status
    user_profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':uid_list}, _source=True)['docs'] #个人姓名,注册地
    max_evaluate_influ = get_evaluate_max(index_name)
    for i in range(0, len(uid_list)):
        uid = uid_list[i]
        bci_dict = user_bci_result[i]
        profile_dict = user_profile_result[i]

        try:
            bci_source = bci_dict['_source']
        except:
            bci_source = None
        if bci_source:
            influence = bci_source['user_index']
            influence = math.log(influence/float(max_evaluate_influ['user_index']) * 9 + 1 ,10)
            influence = influence * 100
        else:
            influence = ''
        try:
            profile_source = profile_dict['_source']
        except:
            profile_source = None
        if profile_source:
            uname = profile_source['nick_name'] 
            location = profile_source['user_location']
            try:
                fansnum = bci_dict['fields']['user_fansnum'][0]
            except:
                fansnum = 0
            try:
                statusnum = bci_dict['fields']['weibo_month_sum'][0]
            except:
                statusnum = 0
        else:
            uname = uid
            location = ''
            try:
                fansnum = bci_dict['fields']['user_fansnum'][0]
            except:
                fansnum = 0
            try:
                statusnum = bci_dict['fields']['weibo_month_sum'][0]
            except:
                statusnum = 0

        results.append({'uid':uid, 'uname':uname, 'location':location, 'fansnum':fansnum, 'statusnum':statusnum, 'influence':round(influence,2)})

    return results
Пример #5
0
def get_user_detail(date,
                    input_result,
                    status,
                    user_type="influence",
                    auth=""):
    bci_date = ts2datetime(datetime2ts(date) - DAY)
    results = []
    if status == 'show_in':
        uid_list = input_result
    if status == 'show_compute':
        uid_list = input_result.keys()
    if status == 'show_in_history':
        uid_list = input_result.keys()
    if date != 'all':
        index_name = 'bci_' + ''.join(bci_date.split('-'))
    else:
        now_ts = time.time()
        now_date = ts2datetime(now_ts)
        index_name = 'bci_' + ''.join(now_date.split('-'))
    tmp_ts = str(datetime2ts(date) - DAY)
    sensitive_string = "sensitive_score_" + tmp_ts
    query_sensitive_body = {
        "query": {
            "match_all": {}
        },
        "size": 1,
        "sort": {
            sensitive_string: {
                "order": "desc"
            }
        }
    }
    try:
        top_sensitive_result = es_bci_history.search(
            index=ES_SENSITIVE_INDEX,
            doc_type=DOCTYPE_SENSITIVE_INDEX,
            body=query_sensitive_body,
            _source=False,
            fields=[sensitive_string])['hits']['hits']
        top_sensitive = top_sensitive_result[0]['fields'][sensitive_string][0]
    except Exception, reason:
        print Exception, reason
        top_sensitive = 400
Пример #6
0
def ajax_recommentation_in():
    #按影响力推荐,按敏感度推荐
    date = request.args.get('date', '2016-11-27') # '2013-09-01'
    recomment_type = request.args.get('type', 'influence')  #influence  sensitive
    submit_user = request.args.get('submit_user', 'admin') # 提交人
    input_ts = datetime2ts(date)
    #run_type
    if RUN_TYPE == 1:
        now_ts = time.time()
    else:
        now_ts = test_time
    if now_ts - 3600*24*7 >= input_ts:
        return json.dumps([])
    else:
        results = recommentation_in(input_ts, recomment_type, submit_user)
    return json.dumps(results)
Пример #7
0
def recommentation_in_auto(date, submit_user):
    results = []
    #run type
    if RUN_TYPE == 1:
        now_date = search_date
    else:
        now_date = ts2datetime(datetime2ts(RUN_TEST_TIME))
    recomment_hash_name = 'recomment_' + now_date + '_auto'
    # print recomment_hash_name,'============'
    recomment_influence_hash_name = 'recomment_' + now_date + '_influence'
    recomment_sensitive_hash_name = 'recomment_' + now_date + '_sensitive'
    recomment_submit_hash_name = 'recomment_' + submit_user + '_' + now_date
    recomment_compute_hash_name = 'compute'
    # #step1: get auto
    # auto_result = r.hget(recomment_hash_name, 'auto')
    # if auto_result:
    #     auto_user_list = json.loads(auto_result)
    # else:
    #     auto_user_list = []
    #step2: get admin user result
    admin_result = r.hget(recomment_hash_name, submit_user)
    admin_user_list = []
    if admin_result:
        admin_result_dict = json.loads(admin_result)
    else:
        return None
    final_result = []
    #step3: get union user and filter compute/influence/sensitive
    for k, v in admin_result_dict.iteritems():
        admin_user_list = v
        union_user_auto_set = set(admin_user_list)
        influence_user = set(r.hkeys(recomment_influence_hash_name))
        sensitive_user = set(r.hkeys(recomment_sensitive_hash_name))
        compute_user = set(r.hkeys(recomment_compute_hash_name))
        been_submit_user = set(r.hkeys(recomment_submit_hash_name))
        filter_union_user = union_user_auto_set - (
            influence_user | sensitive_user | compute_user | been_submit_user)
        auto_user_list = list(filter_union_user)
        #step4: get user detail
        if auto_user_list == []:
            return auto_user_list
        results = get_user_detail(now_date, auto_user_list, 'show_in', 'auto')
        for detail in results:  #add root
            re_detail = detail
            re_detail.append(k)
            final_result.append(re_detail)
    return final_result
Пример #8
0
from knowledge.global_utils import es_event, R_RECOMMENTATION as r
from knowledge.global_utils import es_user_portrait as es, portrait_index_name, portrait_index_type
from knowledge.global_utils import es_related_docs, user_docs_name, user_docs_type, event_docs_name, event_docs_type
from knowledge.global_config import event_task_name, event_task_type 
from utils import recommentation_in, recommentation_in_auto, submit_task, identify_in, submit_event, submit_event_file,\
                  relation_add, search_user, search_event, search_node_time_limit, show_node_detail, edit_node,\
                  deal_user_tag, create_node_or_node_rel, show_relation, update_event, submit_identify_in,\
                  node_delete, delete_relation, deal_event_tag, show_weibo_list, show_wiki, show_wiki_related,show_wiki_basic,\
                  wikinode_exist
from knowledge.time_utils import ts2datetime, datetime2ts, ts2datetimestr
from knowledge.parameter import RUN_TYPE, RUN_TEST_TIME, DAY
from knowledge.global_config import event_analysis_name, event_type
from knowledge.model import PeopleHistory, EventHistory
from knowledge.extensions import db

test_time = datetime2ts(RUN_TEST_TIME)
# from draw_redis import *

# from knowledge.global_utils import event_name_search

mod = Blueprint('construction', __name__, url_prefix='/construction')

@mod.route('/')
@login_required
def construction_main():#导航页
    return render_template('construction/construction_main.html')

@mod.route('/graph_add/')
@login_required
def construction_graph_add():#图谱添加
    return render_template('construction/graph_add.html')
Пример #9
0
def get_final_submit_user_info(uid_list):
    final_results = []
    try:
        profile_results = es_user_profile.mget(index=profile_index_name,
                                               doc_type=profile_index_type,
                                               body={'ids': uid_list})['docs']
    except:
        profile_results = []
    try:
        bci_history_results = es_bci_history.mget(
            index=bci_history_index_name,
            doc_type=bci_history_index_type,
            body={'ids': uid_list})['docs']
    except:
        bci_history_results = []
    #get bci_history max value
    now_time_ts = time.time()
    search_date_ts = datetime2ts(ts2datetime(now_time_ts - DAY))
    bci_key = 'bci_' + str(search_date_ts)
    query_body = {
        'query': {
            'match_all': {}
        },
        'sort': [{
            bci_key: {
                'order': 'desc'
            }
        }],
        'size': 1
    }
    #try:
    bci_max_result = es_bci_history.search(index=bci_history_index_name,
                                           doc_type=bci_history_index_type,
                                           body=query_body,
                                           _source=False,
                                           fields=[bci_key])['hits']['hits']
    #except:
    #    bci_max_result = {}
    if bci_max_result:
        bci_max_value = bci_max_result[0]['fields'][bci_key][0]
    else:
        bci_max_value = MAX_VALUE
    iter_count = 0
    for uid in uid_list:
        try:
            profile_item = profile_results[iter_count]
        except:
            profile_item = {}
        try:
            bci_history_item = bci_history_results[iter_count]
        except:
            bci_history_item = {}
        if profile_item and profile_item['found'] == True:
            uname = profile_item['_source']['nick_name']
            location = profile_item['_source']['user_location']
        else:
            uname = ''
            location = ''
        if bci_history_item and bci_history_item['found'] == True:
            fansnum = bci_history_item['_source']['user_fansnum']
            statusnum = bci_history_item['_source']['weibo_month_sum']
            try:
                bci = bci_history_item['_source'][bci_key]
                normal_bci = math.log(bci / bci_max_value * 9 + 1, 10) * 100
            except:
                normal_bci = ''
        else:
            fansnum = ''
            statusnum = ''
            normal_bci = ''
        final_results.append(
            [uid, uname, location, fansnum, statusnum, normal_bci])
        iter_count += 1

    return final_results
Пример #10
0
     except:
         statusnum = 0
 else:
     uname = uid
     location = ''
     try:
         fansnum = bci_dict['fields']['user_fansnum'][0]
     except:
         fansnum = 0
     try:
         statusnum = bci_dict['fields']['weibo_month_sum'][0]
     except:
         statusnum = 0
 if status == 'show_in':
     if user_type == "sensitive":
         tmp_ts = datetime2ts(date) - DAY
         tmp_data = r_cluster.hget("sensitive_" + str(tmp_ts), uid)
         if tmp_data:
             sensitive_dict = json.loads(tmp_data)
             sensitive_words = sensitive_dict.keys()
         else:
             sensitive_words = []
         if sensitive_history_dict.get('fields', 0):
             #print sensitive_history_dict['fields'][sensitive_string][0]
             #print top_sensitive
             sensitive_value = math.log(
                 sensitive_history_dict['fields'][sensitive_string][0] /
                 float(top_sensitive) * 9 + 1, 10) * 100
             #print "sensitive_value", sensitive_value
         else:
             sensitive_value = 0
Пример #11
0
def group_geo_vary(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name,
                              doc_type=group_type,
                              id=group_id,
                              fields=['people'])
    uid_list = uid_string['fields']['people'][0].split('&')
    activity_geo_vary = {}
    main_start_geo = {}
    main_end_geo = {}
    vary_detail_geo = {}
    activity_geo_distribution_date = {}
    if RUN_TYPE == 1:
        now_ts = int(time.time())
    else:
        now_ts = datetime2ts(RUN_TEST_TIME)
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    try:
        iter_user_dict_list = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \
            body={'ids':uid_list})['docs']
    except:
        iter_user_dict_list = []
    for user_dict in iter_user_dict_list:
        uid = user_dict['_id']
        source = user_dict['_source']
        #attr8: activity_geo_dict---distribution by date
        user_activity_geo = {}
        activity_geo_dict_list = json.loads(source['activity_geo_dict'])
        activity_geo_date_count = len(activity_geo_dict_list)
        iter_ts = now_date_ts - activity_geo_date_count * DAY
        user_date_main_list = []
        for i in range(0, activity_geo_date_count):
            date_item = activity_geo_dict_list[i]
            if iter_ts in activity_geo_distribution_date:
                activity_geo_distribution_date[iter_ts] = union_dict_list(
                    [activity_geo_distribution_date[iter_ts], date_item])
            else:
                activity_geo_distribution_date[iter_ts] = date_item
            #use to get activity_geo vary
            sort_date_item = sorted(date_item.items(),
                                    key=lambda x: x[1],
                                    reverse=True)
            if date_item != {}:
                main_date_city = sort_date_item[0][0]
                try:
                    last_user_date_main_item = user_date_main_list[-1][0]
                except:
                    last_user_date_main_item = ''
                if main_date_city != last_user_date_main_item:
                    user_date_main_list.append([main_date_city, iter_ts])

            iter_ts += DAY
        #attr8: activity_geo_dict---location vary
        if len(user_date_main_list) > 1:
            for i in range(1, len(user_date_main_list)):
                vary_city = [
                    geo_ts_item[0]
                    for geo_ts_item in user_date_main_list[i - 1:i + 1]
                ]
                vary_ts = [
                    geo_ts_item[1]
                    for geo_ts_item in user_date_main_list[i - 1:i + 1]
                ]
                vary_item = '&'.join(vary_city)
                #vary_item = '&'.join(user_date_main_list[i-1:i+1])
                #get activity geo vary for vary table and map
                try:
                    activity_geo_vary[vary_item] += 1
                except:
                    activity_geo_vary[vary_item] = 1
                #get main start geo
                try:
                    main_start_geo[vary_city[0]] += 1
                except:
                    main_start_geo[vary_city[0]] = 1
                #get main end geo
                try:
                    main_end_geo[vary_city[1]] += 1
                except:
                    main_end_geo[vary_city[1]] = 1
                #get vary detail geo
                try:
                    vary_detail_geo[vary_item].append(
                        [uid, vary_ts[0], vary_ts[1]])
                except:
                    vary_detail_geo[vary_item] = [[
                        uid, vary_ts[0], vary_ts[1]
                    ]]
    all_activity_geo = union_dict_list(activity_geo_distribution_date.values())
    sort_all_activity_geo = sorted(all_activity_geo.items(),
                                   key=lambda x: x[1],
                                   reverse=True)
    try:
        main_activity_geo = sort_all_activity_geo[0][0]
    except:
        main_activity_geo = ''


    return  {'main_start_geo':main_start_geo, 'main_end_geo': main_end_geo, \
        'vary_detail_geo': vary_detail_geo, 'activity_geo_vary':activity_geo_vary,\
        'main_activity_geo':main_activity_geo, 'activity_geo_distribution_date':activity_geo_distribution_date}