예제 #1
0
def show_daily_trend():
    date = ts2datetime(time.time())
    index_name = 'user_portrait_network_count'
    index_type = 'network'
    try:
        results = es_network_task.get(index=index_name, doc_type=index_type, id=date)['_source']
    except:
        results = {}
    return results
예제 #2
0
def show_daily_trend():
    date = ts2datetime(time.time())
    index_name = 'user_portrait_network_count'
    index_type = 'network'
    try:
        results = es_network_task.get(index=index_name, doc_type=index_type, id=date)['_source']
    except:
        results = {}
    return results
예제 #3
0
def show_keywords_rank(task_id, sort_type, count):
    try:
        task_found = es_network_task.get(index=network_keywords_index_name, \
                doc_type=network_keywords_index_type, id=task_id)['_source']
    except:
        task_found = {}
        return task_found
    
    search_results = json.loads(task_found['results'])
    sort_results = search_results[sort_type]
    results = []
    uid_list = []
    sort_list = []
    for source_uid, sort_value in sort_results:
        uid_list.append(source_uid)
        sort_list.append(sort_value)
    
    # 查看背景信息
    if uid_list:
        profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list})["docs"]
        for item in profile_result:
            _id = item['_id']
            index = profile_result.index(item)
            tmp = []
            if item['found']:
                item = item['_source']
                tmp.append(item['uid'])
                tmp.append(item['nick_name'])
                tmp.append(item['user_location'])
            else:
                tmp.extend([_id,'',''])
            value = sort_list[index]
            tmp.append(value)
            results.append(tmp)
    
    if uid_list:
        count = 0
        history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list})["docs"]
        for item in history_result:
            if item['found']:
                item = item['_source']
                results[count].extend([item['user_fansnum'], item['weibo_month_sum']])
            else:
                results[count].extend(['',''])
            count += 1
    
    if uid_list:
        count = 0
        portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list})["docs"]
        for item in portrait_result:
            if item['found']:
                results[count].append("1")
            else:
                results[count].append("0")
            count += 1

    return results
예제 #4
0
def search_retweet_network_keywords(task_id, uid):
    results = {}
    task_results = es_network_task.get(index=network_keywords_index_name, \
                doc_type=network_keywords_index_type, id=task_id)['_source']

    start_date = task_results['start_date']
    start_ts = datetime2ts(start_date)
    end_date = task_resuts['end_date']
    end_ts = datetime2ts(end_date)
    iter_date_ts = start_ts
    to_date_ts = end_ts
    iter_query_date_list = [] # ['2013-09-01', '2013-09-02']
    while iter_date_ts <= to_date_ts:
        iter_date = ts2datetime(iter_date_ts)
        iter_query_date_list.append(iter_date)
        iter_date_ts += DAY
    #step2: get iter search flow_text_index_name
    #step2.1: get search keywords list
    query_must_list = []
    keyword_nest_body_list = []
    keywords_string = task_results['query_keywords']
    keywords_list = keywords_string.split('&')
    for keywords_item in keywords_list:
        keyword_nest_body_list.append({'wildcard': {'text': '*' + keywords_item + '*'}})
    query_must_list.append({'bool': {'should': keyword_nest_body_list}})
    network_results = {}
    retweet_query = query_must_list
    be_retweet_query = query_must_list
    #retweet
    retweet_query.append({'term': {'uid': uid}})
    item_results = {}
    for iter_date in iter_query_date_list:
        flow_text_index_name = flow_text_index_name_pre + iter_date
        query_body = {
            'query':{
                'bool':{
                    'must':retweet_query
                }
            },
            'size': 100
        }
        flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body=query_body)['hits']['hits']
        for item in flow_text_result:
            source = item['_source']
            source_uid = source['directed_uid']
            try:
                item_results[source_uid] += 1
            except:
                item_results[source_uid] = 1
    results = retweet_dict2results(uid, item_results)
    network_results['retweet'] = results
    #be_retweet
    retweet_query.append({'term': {'directed_uid': uid}})
    item_results = {}
    for iter_date in iter_query_date_list:
        flow_text_index_name = flow_text_index_name_pre + iter_date
        query_body = {
            'query':{
                'bool':{
                    'must':retweet_query
                }
            },
            'size': 100
        }
        flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body=query_body)['hits']['hits']
        for item in flow_text_result:
            source = item['_source']
            source_uid = source['directed_uid']
            try:
                item_results[source_uid] += 1
            except:
                item_results[source_uid] = 1
    results = retweet_dict2results(uid, item_results)
    network_results['be_retweet'] = results
    return network_results 
예제 #5
0
def search_retweet_network_keywords(task_id, uid):
    results = {}
    task_results = es_network_task.get(index=network_keywords_index_name, \
                doc_type=network_keywords_index_type, id=task_id)['_source']

    start_date = task_results['start_date']
    start_ts = datetime2ts(start_date)
    end_date = task_resuts['end_date']
    end_ts = datetime2ts(end_date)
    iter_date_ts = start_ts
    to_date_ts = end_ts
    iter_query_date_list = [] # ['2013-09-01', '2013-09-02']
    while iter_date_ts <= to_date_ts:
        iter_date = ts2datetime(iter_date_ts)
        iter_query_date_list.append(iter_date)
        iter_date_ts += DAY
    #step2: get iter search flow_text_index_name
    #step2.1: get search keywords list
    query_must_list = []
    keyword_nest_body_list = []
    keywords_string = task_results['query_keywords']
    keywords_list = keywords_string.split('&')
    for keywords_item in keywords_list:
        keyword_nest_body_list.append({'wildcard': {'text': '*' + keywords_item + '*'}})
    query_must_list.append({'bool': {'should': keyword_nest_body_list}})
    network_results = {}
    retweet_query = query_must_list
    be_retweet_query = query_must_list
    #retweet
    retweet_query.append({'term': {'uid': uid}})
    item_results = {}
    for iter_date in iter_query_date_list:
        flow_text_index_name = flow_text_index_name_pre + iter_date
        query_body = {
            'query':{
                'bool':{
                    'must':retweet_query
                }
            },
            'size': 100
        }
        flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body=query_body)['hits']['hits']
        for item in flow_text_result:
            source = item['_source']
            source_uid = source['directed_uid']
            try:
                item_results[source_uid] += 1
            except:
                item_results[source_uid] = 1
    results = retweet_dict2results(uid, item_results)
    network_results['retweet'] = results
    #be_retweet
    retweet_query.append({'term': {'directed_uid': uid}})
    item_results = {}
    for iter_date in iter_query_date_list:
        flow_text_index_name = flow_text_index_name_pre + iter_date
        query_body = {
            'query':{
                'bool':{
                    'must':retweet_query
                }
            },
            'size': 100
        }
        flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body=query_body)['hits']['hits']
        for item in flow_text_result:
            source = item['_source']
            source_uid = source['directed_uid']
            try:
                item_results[source_uid] += 1
            except:
                item_results[source_uid] = 1
    results = retweet_dict2results(uid, item_results)
    network_results['be_retweet'] = results
    return network_results