Exemplo n.º 1
0
def lookup_active_user(classify_id,xnr_id,start_time,end_time):
    time_gap = end_time - start_time
    now_time = time.time()
    test_time_gap = datetime2ts(ts2datetime(now_time)) - datetime2ts(S_DATE_TW)
    if S_TYPE == 'test':
        today_date_time = datetime2ts(S_DATE_TW)
        start_time = start_time - test_time_gap
        end_time = end_time - test_time_gap

    from_date_ts=datetime2ts(ts2datetime(start_time))
    to_date_ts=datetime2ts(ts2datetime(end_time))
    
    bci_index_name = tw_bci_index_name_pre + ''.join(ts2datetime(end_time - DAY))

    userlist = lookup_xnr_concernedusers(xnr_id)

    if classify_id == 1:      
        condition_list=[{'bool':{'must':{'terms':{'uid':userlist}}}}]
    elif classify_id == 2:    
        condition_list=[{'bool':{'must_not':[{'terms':{'uid':userlist}}]}}] 
    elif classify_id == 0:
        condition_list=[{'match_all':{}}]
    print userlist,classify_id,condition_list

    results = []
    for item in condition_list:
        query_body={
            'query':item,
            'size':HOT_WEIBO_NUM,       #查询影响力排名前50的用户即可
            'sort':{'influence':{'order':'desc'}}
            }
        try:
            flow_text_exist=es_xnr.search(index=bci_index_name,\
                    doc_type=tw_bci_index_type,body=query_body)['hits']['hits']
            search_uid_list = [item['_source']['uid'] for item in flow_text_exist]
            user_exist = es_xnr.search(index=twitter_user_index_name,\
                    doc_type=twitter_user_index_type,body={'query':{'terms':{'uid':search_uid_list}}})['hits']['hits']

            user_dict = dict()
            for item in user_exist:
                uid = item['_source']['uid']
                user_dict[uid] = item['_source']
            for item in flow_text_exist:
                influence = item['_source']['influence']
                active = item['_source']['active']
                uid = item['_source']['uid']
                try:
                    user_info = user_dict[uid]
                    uname = user_info['name']
                    location = user_info['locale']
                    link = user_info['link']
                except:
                    uname = ''
                    location = ''
                    link = ''
                results.append({'uid':uid, 'influence':influence, 'active':active, \
                        'uname': uname, 'location':location, 'link': link})
        except Exception,e:
            print e
            results = []
Exemplo n.º 2
0
def lookup_todaytwitter_date_warming(keywords,today_datetime):
    keyword_query_list=[]
    for keyword in keywords:
        keyword_query_list.append({'wildcard':{'text':'*'+keyword.encode('utf-8')+'*'}})

    twitter_flow_text_index_name=get_timets_set_indexset_list(twitter_flow_text_index_name_pre,today_datetime,today_datetime)

    query_body={
        'query':{
            'bool':{
                'should':keyword_query_list
            }
        },
        'size':MAX_WARMING_SIZE
    }
    try:
        temp_result=es_xnr_2.search(index=twitter_flow_text_index_name,doc_type=twitter_flow_text_index_type,body=query_body)['hits']['hits']
        date_result=[]
        for item in temp_result:
            #查询三个指标字段
            tid_result=lookup_tid_attend_index(item['_source']['tid'],today_datetime)
            if tid_result:
                item['_source']['comment']=tid_result['comment']
                item['_source']['share']=tid_result['share']
                item['_source']['favorite']=tid_result['favorite']
            else:
                item['_source']['comment']=0
                item['_source']['share']=0
                item['_source']['favorite']=0 
            #查询用户昵称
            item['_source']['nick_name']=get_user_nickname(item['_source']['uid'])    
            date_result.append(item['_source'])
    except:
            date_result=[]
    return date_result
Exemplo n.º 3
0
def lookup_today_keywords(from_ts,to_ts,xnr_user_no):
    userslist=lookup_xnr_concernedusers(xnr_user_no)
    query_body={
            'query':{
                'filtered':{
                    'filter':{
                        'bool':{
                            'must':[
                                {'terms':{'uid':userslist}},
                                {'range':{'timestamp':{'gte':from_ts,'lte':to_ts}}}
                            ]
                        }
                    }
                }
            },
            'aggs':{
                'keywords':{
                    'terms':{
                        'field':'keywords_string',
                        'size': 50
                    }
                }
            }
        }
    flow_text_index_name = twitter_flow_text_index_name_pre + ts2datetime(to_ts)
    flow_text_exist=es_xnr.search(index=flow_text_index_name,doc_type=twitter_flow_text_index_type,\
                body=query_body)['aggregations']['keywords']['buckets']

    word_dict = dict()
    for item in flow_text_exist:
        word = item['key']
        count = item['doc_count']
        word_dict[word] = count
    return word_dict
Exemplo n.º 4
0
def lookup_history_keywords(from_ts, to_ts, xnr_user_no):
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'term': {
                                'xnr_user_no': xnr_user_no
                            }
                        }, {
                            'range': {
                                'timestamp': {
                                    'gte': from_ts,
                                    'lte': to_ts
                                }
                            }
                        }]
                    }
                }
            }
        }
    }
    es_result=es_xnr.search(index=facebook_keyword_count_index_name,\
            doc_type=facebook_keyword_count_index_type,body=query_body)['hits']['hits']
    if not es_result:
        es_result = dict()
        return es_result
    all_keywords_dict = dict()
    for item in es_result:
        keywords_dict = json.loads(item['_source']['keyword_value_string'])
        all_keywords_dict = union_dict(all_keywords_dict, keywords_dict)
    return all_keywords_dict
Exemplo n.º 5
0
def get_show_fb_xnr(submitter):
    fb_xnr_dict = {}
    query_body = {
        'query': {
            'bool': {
                'must': [{
                    'term': {
                        'submitter': submitter
                    }
                }, {
                    'term': {
                        'create_status': 2
                    }
                }]
            }
        },
        'size': MAX_SEARCH_SIZE
    }
    es_results = es.search(index=fb_xnr_index_name,
                           doc_type=fb_xnr_index_type,
                           body=query_body)['hits']['hits']
    if es_results:
        for result in es_results:
            result = result['_source']
            fb_xnr_dict[result['xnr_user_no']] = result['nick_name']
    return fb_xnr_dict
Exemplo n.º 6
0
def lookup_history_fullkeywords(from_ts, to_ts):
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'range': {
                                'timestamp': {
                                    'gte': from_ts,
                                    'lte': to_ts
                                }
                            }
                        }]
                    }
                }
            }
        },
        'size': 100
    }
    #print 'from_ts:', ts2date(from_ts)
    #print 'to_ts:', ts2date(to_ts)
    es_result=es_xnr.search(index=facebook_full_keyword_index_name,\
            doc_type=facebook_full_keyword_index_type,body=query_body)['hits']['hits']
    if not es_result:
        es_result = dict()
        return es_result
    all_keywords_dict = dict()
    for item in es_result:
        keywords_dict = json.loads(item['_source']['keyword_value_string'])
        all_keywords_dict = union_dict(all_keywords_dict, keywords_dict)
    #print 'history keyword_dict:', all_keywords_dict
    return all_keywords_dict
Exemplo n.º 7
0
def get_tw_xnr_list(user_account, status_start, status_end):
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'term': {
                                'submitter': user_account
                            }
                        }, {
                            'range': {
                                'create_status': {
                                    'gte': status_start,
                                    'lt': status_end
                                }
                            }
                        }]
                    }
                }
            }
        },
        'size': USER_XNR_NUM
    }
    try:
        user_result = es_xnr_2.search(index=tw_xnr_index_name,
                                      doc_type=tw_xnr_index_type,
                                      body=query_body)['hits']['hits']
        xnr_user_no_list = []
        for item in user_result:
            xnr_user_no_list.append(item['_source']['xnr_user_no'])
    except:
        xnr_user_no_list = []
    return xnr_user_no_list
Exemplo n.º 8
0
def get_save_step_one(task_detail):
    es_results = es.search(index=fb_xnr_index_name,doc_type=fb_xnr_index_type,body={'query':{'match_all':{}},\
                    'sort':{'user_no':{'order':'desc'}}})['hits']['hits']
    if es_results:
        user_no_max = es_results[0]['_source']['user_no']
        user_no_current = user_no_max + 1
    else:
        user_no_current = 1
    task_detail['user_no'] = user_no_current
    task_id = user_no2fb_id(user_no_current)  #五位数 WXNR0001
    print 'task_id'
    print task_id
    try:
        item_exist = dict()
        item_exist['user_no'] = task_detail['user_no']
        item_exist['domain_name'] = task_detail['domain_name']
        item_exist['role_name'] = task_detail['role_name']
        item_exist['psy_feature'] = '&'.join(
            task_detail['psy_feature'].encode('utf-8').split(','))
        item_exist['political_side'] = task_detail['political_side']
        item_exist['business_goal'] = '&'.join(
            task_detail['business_goal'].encode('utf-8').split(','))
        # item_exist['daily_interests'] = '&'.join(task_detail['daily_interests'].encode('utf-8').split(','))
        item_exist['monitor_keywords'] = '&'.join(
            task_detail['monitor_keywords'].encode('utf-8').split(','))
        item_exist['create_status'] = 0  # 第一步完成
        print es.index(index=fb_xnr_index_name,
                       doc_type=fb_xnr_index_type,
                       id=task_id,
                       body=item_exist)
        mark = True
    except:
        mark = False
    return mark
Exemplo n.º 9
0
def lookup_history_user_warming(xnr_user_no,start_time,end_time):
    query_body={
       'query':{
            'filtered':{
                'filter':{
                    'bool':{
                        'must':[
                            {'term':{'xnr_user_no':xnr_user_no}},
                            {'range':{
                            	'timestamp':{
                            		'gte':start_time,
                            		'lte':end_time
                            	}
                            }}
                        ]
                    }
                }
            }
        },
        'sort':{'user_sensitive':{'order':'asc'}} ,
        'size':MAX_WARMING_SIZE
    }

    user_warming_list=get_xnr_warming_index_listname(twitter_user_warning_index_name_pre,start_time,end_time)

    try:
        temp_results=es_xnr_2.search(index=user_warming_list,doc_type=twitter_user_warning_index_type,body=query_body)['hits']['hits']
        results=[]
        for item in temp_results:
            results.append(item['_source'])
        results.sort(key=lambda k:(k.get('user_sensitive',0)),reverse=True)
    except:
        results=[]
    # print 'r:',results
    return results   
Exemplo n.º 10
0
def lookup_today_fullkeywords(from_ts, to_ts):

    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'range': {
                                'timestamp': {
                                    'gte': from_ts,
                                    'lte': to_ts
                                }
                            }
                        }]
                    }
                }
            }
        },
        'aggs': {
            'keywords': {
                'terms': {
                    'field': 'keywords_string',
                    'size': 80
                }
            }
        }
    }
    flow_text_index_name = facebook_flow_text_index_name_pre + ts2datetime(
        to_ts)
    try:
        flow_text_exist=es_xnr.search(index=flow_text_index_name,doc_type=facebook_flow_text_index_type,\
                    body=query_body)['aggregations']['keywords']['buckets']

        word_dict = dict()

        word_dict_new = dict()

        keywords_string = ''
        for item in flow_text_exist:
            word = item['key']
            count = item['doc_count']
            word_dict[word] = count

            keywords_string += '&'
            keywords_string += item['key']

        k_dict = extract_keywords(keywords_string)

        for item_item in k_dict:
            keyword = item_item.word
            # print 'keyword::',keyword,type(keyword)
            if word_dict.has_key(keyword):
                word_dict_new[keyword] = word_dict[keyword]
            else:
                word_dict_new[keyword] = 1
            # print 'count:',word_dict_new[keyword]
    except:
        word_dict_new = dict()
    return word_dict_new
Exemplo n.º 11
0
def lookup_history_speech_warming(xnr_user_no,show_type,start_time,end_time):
    show_condition_list=[]
    if show_type == 0: #全部用户
        show_condition_list.append({'must':[{'term':{'xnr_user_no':xnr_user_no}},{'range':{'timestamp':{'gte':start_time,'lte':end_time}}}]})
    elif show_type == 1:#好友
        show_condition_list.append({'must':[{'term':{'content_type':'friends'}},{'term':{'xnr_user_no':xnr_user_no}},{'range':{'timestamp':{'gte':start_time,'lte':end_time}}}]})
    elif show_type == 2:#非好友
        show_condition_list.append({'must':[{'term':{'content_type':'unfriends'}},{'term':{'xnr_user_no':xnr_user_no}},{'range':{'timestamp':{'gte':start_time,'lte':end_time}}}]})
    
    query_body={
        'query':{
            'filtered':{
                'filter':{
                    'bool':show_condition_list[0]
                }
            }
        },
        'size':SPEECH_WARMING_NUM,
        'sort':{'sensitive':{'order':'desc'}}
    }

    speech_warming_list=get_xnr_warming_index_listname(twitter_speech_warning_index_name_pre,start_time,end_time)
    #print speech_warming_list
    try:
        temp_results=es_xnr_2.search(index=speech_warming_list,doc_type=twitter_speech_warning_index_type,body=query_body)['hits']['hits']
        #print temp_results
        results=[]
        for item in temp_results:
            results.append(item['_source'])
        results.sort(key=lambda k:(k.get('sensitive',0)),reverse=True)
    except:
        results=[]
    return results   
Exemplo n.º 12
0
def get_modify_userinfo(task_detail):
    item_dict = {}
    nick_name = task_detail['nick_name']
    location_list = task_detail['location'].encode('utf-8').split(',')
    try:
        item_dict['location_province'] = location_list[0]
        item_dict['location_city'] = location_list[1]
    except:
        item_dict['location_province'] = location_list[0]
        item_dict['location_city'] = location_list[0]

    item_dict['description'] = task_detail['description']
    gender = task_detail['gender']
    if gender == u'男':
        item_dict['gender'] = 'man'
    else:
        item_dict['gender'] = 'woman'

    age = task_detail['age']
    birth_year = time.localtime().tm_year - int(age)
    month = '%02d' % random.randint(0, 13)
    day = '%02d' % random.randint(0, 29)
    item_dict['birth'] = [str(birth_year), month, day]

    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'term': {
                        'nick_name': nick_name
                    }
                }
            }
        }
    }
    es_results = es.search(index=fb_xnr_index_name,
                           doc_type=fb_xnr_index_type,
                           body=query_body)['hits']['hits']
    xnr_result = es_results[0]['_source']
    try:
        fb_mail_account = xnr_result['fb_mail_account']
    except:
        fb_mail_account = ''
    try:
        fb_phone_account = xnr_result['fb_phone_account']
    except:
        fb_phone_account = ''

    if fb_mail_account:
        account_name = fb_mail_account
    else:
        account_name = fb_phone_account
    password = xnr_result['password']
    uid = xnr_result['uid']
    try:
        result = change_userinfo(account_name, password, uid, item_dict)
    except:
        result = False
    return result
Exemplo n.º 13
0
def get_show_example_model():
    es_results = es.search(index=tw_example_model_index_name,doc_type=tw_example_model_index_type,\
        body={'query':{'match_all':{}}})['hits']['hits']
    result_all = []
    for result in es_results:
        result = result['_source']
        result_all.append(result)
    return result_all
Exemplo n.º 14
0
def get_show_domain():
    domain_name_dict = {}
    query_body = {'query':{'match_all':{}},'size':MAX_SEARCH_SIZE}
    es_results = es.search(index=fb_domain_index_name,doc_type=fb_domain_index_type,body=query_body)['hits']['hits']
    if es_results:
        for result in es_results:
            result = result['_source']
            domain_name_dict[result['domain_pinyin']] = result['domain_name']
    return domain_name_dict
Exemplo n.º 15
0
def get_fb_xnr_no():
    user_no_max = 0
    if not r.exists(fb_xnr_max_no):  #如果当前redis没有记录,则去es数据库查找补上
        es_results = es.search(index=fb_xnr_index_name,doc_type=fb_xnr_index_type,body={'query':{'match_all':{}},\
                    'sort':{'user_no':{'order':'desc'}}})['hits']['hits']
        if es_results:
            user_no_max = es_results[0]['_source']['user_no']
    else:  #如果当前redis有记录,则取用
        user_no_max = int(r.get(fb_xnr_max_no))
    return user_no_max
Exemplo n.º 16
0
def get_xnr_info(task_detail):
    nick_name = task_detail['nick_name']
    query_body = {
        'query':{
            'filtered':{
                'filter':{
                    'term':{'nick_name':nick_name}
                }
            }
        }
    }
    es_results = es.search(index=fb_xnr_index_name,doc_type=fb_xnr_index_type,body=query_body)['hits']['hits']
    return es_results
Exemplo n.º 17
0
def get_nick_name_unique(nick_name):
    query_body = {'query': {'term': {'nick_name': nick_name}}}
    es_profile_results = es_user_profile.search(
        index=profile_index_name, doc_type=profile_index_type,
        body=query_body)['hits']['hits']
    es_xnr_results = es.search(index=fb_xnr_index_name,
                               doc_type=fb_xnr_index_type,
                               body=query_body)['hits']['hits']
    if es_profile_results and es_xnr_results:
        mark = False
    else:
        mark = True
    return mark
Exemplo n.º 18
0
def lookup_twitter_date_warming_content(start_year,end_year,date_time,date_name,start_time,end_time,keywords):
    twitter_timing_warning_index_name_list = []
    if start_year != end_year:
        start_year_int = int(start_year)
        end_year_int = int(end_year)
        iter_year = end_year_int
        while iter_year >= start_year_int:
            index_name = twitter_timing_warning_index_name_pre + str(start_year_int) + '-' + date_time
            if es_xnr_2.indices.exists(index=index_name):
                twitter_timing_warning_index_name_list.append(index_name)
            else:
                pass            
            iter_year = iter_year - 1
    else:
        index_name = twitter_timing_warning_index_name_pre + start_year + '-' + date_time
        if es_xnr_2.indices.exists(index=index_name):
            twitter_timing_warning_index_name_list.append(index_name)
        else:
            pass 

    query_body={
       'query':{
            'filtered':{
                'filter':{
                    'bool':{
                        'must':[
                        {'term':{'date_name':date_name}}
                        ]
                    }
                }
            }
        },
        'sort':{'timestamp':{'order':'asc'}} ,
        'size':MAX_WARMING_SIZE
    }
    result=es_xnr_2.search(index=twitter_timing_warning_index_name_list,doc_type=twitter_timing_warning_index_type,body=query_body)['hits']['hits']
    warming_content=[]
    for item in result:
        warming_content.extend(json.loads(item['_source']['twitter_date_warming_content']))

    #当前时间范围内的预警信息
    now_time = int(time.time())
    if now_time >= start_time and now_time <= end_time:
        today_warming=lookup_todaytwitter_date_warming(keywords,now_time)
        warming_content.append(today_warming)
    else:
        pass
    return warming_content
Exemplo n.º 19
0
def get_all_access_level_info(account_name):
    account_name = account_name
    query_body = {"query": {"match_all": {}}, "size": 9999}
    try:
        result = es_xnr_2.search(index=access_control_index_name,
                                 doc_type=access_control_index_type,
                                 body=query_body)['hits']['hits']
        print result
    except Exception as e:
        print e
        return []
    results = []

    if result != []:
        for item in result:
            results.append(item['_source'])
        print(results)
    return results
Exemplo n.º 20
0
def get_hashtag(today_datetime):
    
    twitter_flow_text_index_name=get_timets_set_indexset_list(twitter_flow_text_index_name_pre,today_datetime,today_datetime)
    query_body={
        'query':{
            'filtered':{
                'filter':{
                'bool':{
                    'must':[
                        {'range':{'sensitive':{'gte':1}}}
                    ]
                }}
            }
        },
        'aggs':{
            'all_hashtag':{
                'terms':{'field':'hashtag'},
                'aggs':{'sum_sensitive':{
                    'sum':{'field':'sensitive'}
                }
                }
            }
        },
        'size':5
    }
    twitter_text_exist=es_xnr_2.search(index=twitter_flow_text_index_name,doc_type=twitter_flow_text_index_type,\
                body=query_body)['aggregations']['all_hashtag']['buckets']
    
    hashtag_list = []
    for item in twitter_text_exist:
        event_dict=dict()
        if item['key']:
            event_dict['event_name'] = item['key']
            event_dict['event_count'] = item['doc_count']
            event_dict['event_sensitive'] = item['sum_sensitive']['value']
            hashtag_list.append(event_dict)
        else:
            pass

    hashtag_list.sort(key=lambda k:(k.get('event_sensitive',0),k.get('event_count',0)),reverse=True)
    # print hashtag_list
    return hashtag_list
Exemplo n.º 21
0
def get_show_domain_group_summary(submitter):
    es_result = es.search(index=tw_domain_index_name,doc_type=tw_domain_index_type,\
                body={'query':{'term':{'submitter':submitter}}})['hits']['hits']
    if es_result:
        result_all = []
        for result in es_result:
            item = {}
            result = result['_source']
            item['group_size'] = result['group_size']
            item['domain_name'] = result['domain_name']
            item['create_time'] = result['create_time']
            item['compute_status'] = result['compute_status']
            item['create_type'] = result['create_type']
            item['remark'] = result['remark']
            item['description'] = result['description']
            create_type = json.loads(result['create_type'].encode('utf-8'))
            result_all.append(item)
    else:
        return '当前账户尚未创建渗透领域'
    return result_all
Exemplo n.º 22
0
def show_corpus_facebook(corpus_type):
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'term': {
                        'corpus_type': corpus_type
                    }
                }
            }
        },
        'size': MAX_VALUE
    }
    result = es.search(index=facebook_xnr_corpus_index_name,
                       doc_type=facebook_xnr_corpus_index_type,
                       body=query_body)['hits']['hits']
    results = []
    for item in result:
        item['_source']['id'] = item['_id']
        results.append(item['_source'])
    return results
Exemplo n.º 23
0
def show_condition_corpus_tw(corpus_condition):
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': corpus_condition
                    }
                }
            }
        },
        'size': MAX_VALUE
    }
    result = es.search(index=twitter_xnr_corpus_index_name,
                       doc_type=twitter_xnr_corpus_index_type,
                       body=query_body)['hits']['hits']
    results = []
    for item in result:
        item['_source']['id'] = item['_id']
        results.append(item['_source'])
    return results
Exemplo n.º 24
0
def lookup_tid_attend_index(tid,from_ts,to_ts):
    twitter_count_index_name=get_timets_set_indexset_list(twitter_count_index_name_pre,from_ts,to_ts)
    
    query_body={
        'query':{
            'filtered':{
                'filter':{
                    'bool':{'must':{'term':{'tid':tid}}}
                }
            }
        },
        'size':1,
        'sort':{'update_time':{'order':'desc'}}
    }
    try:
        result=es_xnr.search(index=twitter_count_index_name,doc_type=twitter_count_index_type,body=query_body)['hits']['hits']
        # print 'result:',result,twitter_count_index_name
        tid_result=[]
        for item in result:
            tid_result.append(item['_source'])
    except:
        tid_result=[]
    return tid_result
Exemplo n.º 25
0
def lookup_tid_attend_index(tid,today_datetime):
    twitter_count_index_name=get_timets_set_indexset_list(twitter_count_index_name_pre,today_datetime,today_datetime)
    
    query_body={
        'query':{
            'filtered':{
                'filter':{
                    'bool':{'must':{'term':{'tid':tid}}}
                }
            }
        },
        'size':MAX_WARMING_SIZE,
        'sort':{'update_time':{'order':'desc'}}
    }
    try:
        result=es_xnr_2.search(index=twitter_count_index_name,doc_type=twitter_count_index_type,body=query_body)['hits']['hits']
        print result
        tid_result=[]
        for item in result:
            tid_result.append(item['_source'])
    except:
        tid_result=[]
    return tid_result
Exemplo n.º 26
0
def lookup_today_personal_warming(xnr_user_no,start_time,end_time):
    #查询关注列表
    lookup_type='followers_list'
    followers_list=lookup_xnr_fans_followers(xnr_user_no,lookup_type)

    #查询虚拟人uid
    xnr_uid=lookup_xnr_uid(xnr_user_no)

     #计算敏感度排名靠前的用户
    query_body={
        # 'query':{
        #     'filtered':{
        #         'filter':{
        #             'terms':{'uid':friends_list}
        #         }
        #     }
        # },
        'aggs':{
            'friends_sensitive_num':{
                'terms':{'field':'uid'},
                'aggs':{
                    'sensitive_num':{
                        'sum':{'field':'sensitive'}
                    }
                }                        
            }
            },
        'size':MAX_SEARCH_SIZE
    }

    twitter_flow_text_index_name=get_timets_set_indexset_list(twitter_flow_text_index_name_pre,start_time,end_time)
    
    try:   
        first_sum_result=es_xnr_2.search(index=twitter_flow_text_index_name,doc_type=twitter_flow_text_index_type,\
        body=query_body)['aggregations']['friends_sensitive_num']['buckets']
    except:
        first_sum_result=[]

    #print first_sum_result
    top_userlist=[]
    for i in xrange(0,len(first_sum_result)):
        user_sensitive=first_sum_result[i]['sensitive_num']['value']
        if user_sensitive > 0:
            user_dict=dict()
            user_dict['uid']=first_sum_result[i]['key']
            followers_mark=judge_user_type(user_dict['uid'],followers_list)
            user_dict['sensitive']=user_sensitive*followers_mark
            top_userlist.append(user_dict)
        else:
            pass

    #查询敏感用户的敏感内容
    results=[]
    for user in top_userlist:
        #print user
        user_detail=dict()
        user_detail['uid']=user['uid']
        user_detail['user_sensitive']=user['sensitive']
        user_lookup_id=user['uid']
        print user_lookup_id
        #查询用户昵称
        user_detail['user_name']=get_user_nickname(user['uid'])

        query_body={
            'query':{
                'filtered':{
                    'filter':{
                        'bool':{
                            'must':[
                                {'term':{'uid':user['uid']}},
                                {'range':{'sensitive':{'gte':1,'lte':100}}}
                            ]
                        }
                    }
                }
            },
            'size':MAX_WARMING_SIZE,
            'sort':{'sensitive':{'order':'desc'}}
        }

        try:
            second_result=es_xnr_2.search(index=twitter_flow_text_index_name,doc_type=twitter_flow_text_index_type,body=query_body)['hits']['hits']
        except:
            second_result=[]

        s_result=[]
        for item in second_result:
            #查询三个指标字段
            tid_result=lookup_tid_attend_index(item['_source']['tid'],start_time)
            if tid_result:
                item['_source']['comment']=tid_result['comment']
                item['_source']['share']=tid_result['share']
                item['_source']['favorite']=tid_result['favorite']
            else:
                item['_source']['comment']=0
                item['_source']['share']=0
                item['_source']['favorite']=0 
            
            #查询用户昵称
            item['_source']['nick_name']=get_user_nickname(item['_source']['uid']) 
            s_result.append(item['_source'])

        s_result.sort(key=lambda k:(k.get('sensitive',0)),reverse=True)
        user_detail['content']=json.dumps(s_result)

        user_detail['xnr_user_no']=xnr_user_no
        user_detail['validity']=0
        user_detail['timestamp']=end_time

        results.append(user_detail)

    results.sort(key=lambda k:(k.get('user_sensitive',0)),reverse=True)
    return results
Exemplo n.º 27
0
def create_event_warning(xnr_user_no,today_datetime,write_mark):
    #获取事件名称
    hashtag_list = get_hashtag(today_datetime)
    print 'hashtag_list/:',hashtag_list

    twitter_flow_text_index_name=get_timets_set_indexset_list(twitter_flow_text_index_name_pre,today_datetime,today_datetime)

    #查询关注列表
    lookup_type='followers_list'
    followers_list=lookup_xnr_fans_followers(xnr_user_no,lookup_type)

    event_warming_list=[]
    for event_item in hashtag_list:
        event_warming_content=dict()     #事件名称、主要参与用户、典型微博、事件影响力、事件平均时间
        event_warming_content['event_name']=event_item['event_name']
        event_influence_sum=0
        event_time_sum=0       
        query_body={
            'query':{
                'filtered':{
                    'filter':{
                        'bool':{
                            'must':[
                                {'term':{'hashtag':event_item['event_name']}},
                                {'range':{'sensitive':{'gte':1}}}
                            ]
                        }
                    }
                }
            },
            'size':MAX_WARMING_SIZE,
            'sort':{'sensitive':{'order':'desc'}}
        }       
        event_results=es_xnr_2.search(index=twitter_flow_text_index_name,doc_type=twitter_flow_text_index_type,body=query_body)['hits']['hits']
        if event_results:
            twitter_result=[]
            alluser_num_dict=dict()
            #print 'sencond_time:::',int(time.time())
            for item in event_results:
                #查询三个指标字段
                tid_result=lookup_tid_attend_index(item['_source']['tid'],today_datetime)
                if tid_result:
                    item['_source']['comment']=tid_result['comment']
                    item['_source']['share']=tid_result['share']
                    item['_source']['favorite']=tid_result['favorite']
                else:
                    item['_source']['comment']=0
                    item['_source']['share']=0
                    item['_source']['favorite']=0  
                #print 'event_content:',item['_source']['text']          
                #统计用户信息
                if alluser_num_dict.has_key(str(item['_source']['uid'])):
                    followers_mark=set_intersection(item['_source']['uid'],followers_list)
                    if followers_mark > 0:
                        alluser_num_dict[str(item['_source']['uid'])]=alluser_num_dict[str(item['_source']['uid'])]+1*2
                    else:
                        alluser_num_dict[str(item['_source']['uid'])]=alluser_num_dict[str(item['_source']['uid'])]+1
                else:
                    alluser_num_dict[str(item['_source']['uid'])]=1
                                   

                #计算影响力
                origin_influence_value=(1+item['_source']['comment']+item['_source']['share']+item['_source']['favorite'])*(1+item['_source']['sensitive'])
                followers_value=judge_user_type(item['_source']['uid'],followers_list)
                item['_source']['twitter_influence_value']=origin_influence_value*followers_value
                
                #查询用户昵称
                item['_source']['nick_name']=get_user_nickname(item['_source']['uid']) 
                twitter_result.append(item['_source'])

                #统计影响力、时间
                event_influence_sum=event_influence_sum+item['_source']['twitter_influence_value']
                event_time_sum=event_time_sum+item['_source']['timestamp']            
        
            # print 'third_time:::',int(time.time())
            #典型信息
            twitter_result.sort(key=lambda k:(k.get('twitter_influence_value',0)),reverse=True)
            event_warming_content['main_twitter_info']=json.dumps(twitter_result)

            #事件影响力和事件时间
            number=len(event_results)
            event_warming_content['event_influence']=event_influence_sum/number
            event_warming_content['event_time']=event_time_sum/number


        #对用户进行排序
            alluser_num_dict=sorted(alluser_num_dict.items(),key=lambda d:d[1],reverse=True)
            main_userid_list=[]
            for i in xrange(0,len(alluser_num_dict)):
                main_userid_list.append(alluser_num_dict[i][0])

        #主要参与用户信息
            main_user_info=[]
            user_es_result=es_xnr_2.mget(index=twitter_user_index_name,doc_type=twitter_user_index_type,body={'ids':main_userid_list})['docs']
            # print 'user_es_result:',user_es_result
            for item in user_es_result:

                user_dict=dict()
                if item['found']:
                    user_dict['uid']=item['_id']
                    user_dict['username']=item['_source']['username']
                    if item['_source'].has_key('profileimageurl'):
                        user_dict['profileimageurl']=item['_source']['profileimageurl']
                    else:
                        user_dict['profileimageurl']=''
                    if item['_source'].has_key('statuscount'):
                        user_dict['statuscount']=item['_source']['statuscount']
                    else:
                        user_dict['statuscount']=0
                    if item['_source'].has_key('followerscount'):
                        user_dict['followerscount']=item['_source']['followerscount']
                    else:
                        user_dict['followerscount']=0
                    if item['_source'].has_key('friendscount'):
                        user_dict['friendscount']=item['_source']['friendscount']
                    else:
                        user_dict['friendscount']=0
                else:
                    # user_dict['icon']=''
                    user_dict['uid']=item['_id']
                    user_dict['username']=''
                    user_dict['profileimageurl']=''
                    user_dict['statuscount']=0
                    user_dict['followerscount']=0
                    user_dict['friendscount']=0
                main_user_info.append(user_dict)
            event_warming_content['main_user_info']=json.dumps(main_user_info)



            # print 'fourth_time:::',int(time.time())
            event_warming_content['xnr_user_no']=xnr_user_no
            event_warming_content['validity']=0
            event_warming_content['timestamp']=today_datetime
            now_time=int(time.time())
            task_id=xnr_user_no+'_'+str(now_time) 
        
            #写入数据库           
            if write_mark:
                # print 'today_datetime:::',ts2datetime(today_datetime)
                mark=write_envent_warming(today_datetime,event_warming_content,task_id)
                event_warming_list.append(mark)
            else:
                event_warming_list.append(event_warming_content)

        else:
            pass
        # print 'fifth_time:::',int(time.time())
    return event_warming_list
Exemplo n.º 28
0
def lookup_hot_posts(from_ts, to_ts, xnr_id, classify_id, order_id):
    time_gap = to_ts - from_ts
    now_time = time.time()
    test_time_gap = datetime2ts(ts2datetime(now_time)) - datetime2ts(S_DATE_FB)
    if S_TYPE == 'test':
        today_date_time = datetime2ts(S_DATE_FB)
        from_ts = from_ts - test_time_gap
        to_ts = to_ts - test_time_gap

    from_date_ts = datetime2ts(ts2datetime(from_ts))
    to_date_ts = datetime2ts(ts2datetime(to_ts))
    print 'from_date_ts, to_date_ts:', ts2date(from_date_ts), ts2date(
        to_date_ts)
    print from_date_ts, to_date_ts

    flow_text_index_name_list = get_timets_set_indexset_list(
        facebook_flow_text_index_name_pre, from_ts, to_ts)

    userslist = lookup_xnr_friends(xnr_id)
    #全部用户 0,好友 1,非好友-1
    range_time_list = {
        'range': {
            'timestamp': {
                'gte': int(from_ts),
                'lt': int(to_ts)
            }
        }
    }
    # print range_time_list

    user_condition_list = []
    if classify_id == 1:
        user_condition_list = [{
            'bool': {
                'must': [{
                    'terms': {
                        'uid': userslist
                    }
                }, range_time_list]
            }
        }]
    elif classify_id == 2:
        user_condition_list = [{
            'bool': {
                'must': [range_time_list],
                'must_not': [{
                    'terms': {
                        'uid': userslist
                    }
                }]
            }
        }]
    elif classify_id == 0:
        user_condition_list = [{'bool': {'must': [range_time_list]}}]

    query_body = {
        'query': {
            'filtered': {
                'filter': user_condition_list
            }
        },
        'size': MAX_HOT_POST_SIZE,
        'sort': {
            'timestamp': {
                'order': 'desc'
            }
        }
    }

    # try:
    es_result=es_xnr.search(index=flow_text_index_name_list,doc_type=facebook_flow_text_index_type,\
        body=query_body)['hits']['hits']
    hot_result = []
    for item in es_result:
        #查询三个指标字段
        fid_result = lookup_fid_attend_index(item['_source']['fid'], from_ts,
                                             to_ts)
        if fid_result:
            item['_source']['comment'] = fid_result['comment']
            item['_source']['share'] = fid_result['share']
            item['_source']['favorite'] = fid_result['favorite']
        else:
            item['_source']['comment'] = 0
            item['_source']['share'] = 0
            item['_source']['favorite'] = 0
            #查询用户昵称
        item['_source']['nick_name'] = get_user_nickname(
            item['_source']['uid'])
        hot_result.append(item['_source'])
    # except:
    # hot_result=[]

    if order_id == 1:  #按时间排序
        sort_condition = 'timestamp'
    elif order_id == 2:  #按热度排序
        sort_condition = 'retweeted'
    elif order_id == 3:  #按敏感度排序
        sort_condition = 'sensitive'
    else:  #默认设为按时间排序
        sort_conditiont = 'timestamp'
    if hot_result:
        hot_result.sort(key=lambda k: (k.get(sort_condition, 0)), reverse=True)
        hot_result = hot_result[:50]
    return hot_result
Exemplo n.º 29
0
def show_corpus_class(create_type, corpus_type):
    query_condition = []
    if create_type and corpus_type:
        query_condition.append({
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'term': {
                                'create_type': create_type
                            }
                        }, {
                            'term': {
                                'corpus_type': corpus_type
                            }
                        }]
                    }
                }
            }
        })
    else:
        if create_type:
            query_condition.append({
                'filtered': {
                    'filter': {
                        'bool': {
                            'must': {
                                'term': {
                                    'create_type': create_type
                                }
                            }
                        }
                    }
                }
            })
        elif corpus_type:
            query_condition.append({
                'filtered': {
                    'filter': {
                        'bool': {
                            'must': {
                                'term': {
                                    'corpus_type': corpus_type
                                }
                            }
                        }
                    }
                }
            })
        else:
            query_condition.append({'match_all': {}})

    print 'query_condition', query_condition
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': query_condition
                    }
                }
            }
        },
        'size': MAX_SEARCH_SIZE
    }
    result = es.search(index=facebook_xnr_corpus_index_name,
                       doc_type=facebook_xnr_corpus_index_type,
                       body=query_body)['hits']['hits']
    results = []
    for item in result:
        item['_source']['id'] = item['_id']
        results.append(item['_source'])
    return results
Exemplo n.º 30
0
def export_group_info(domain_name, mail):
    mark = True
    res = {
        'domain_name': domain_name,
        'members_num': 0,
        'create_info': {
            'submitter': '',
            'remark': '',
            'create_type': '',
            'create_time': '',
        },
        'members_uid': [],
        'members_info': {
            #         'uid1': {
            #           'nickname': '',
            #           'gender': '',
            #           'location': '',
            #           'link': '',
            #         }
        },
        'count_info': {
            'location_count': {
                #           'zh_TW': 10,
                #           'us': 5
            },
            #         'gender_count': {
            # #           'f': 0,
            # #           'm': 40
            #         },
            'role_count': {
                #           'role1': 12,
                #           'role2': 7
            },
            'words_preference': {
                #           'w1': 20,
                #           'w2': 10
            },
            'topic_preference': {
                #           't1': 20,
                #           't2': 10
            },
            'political_side': {},
        }
    }
    domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_')

    domain_details = get_show_domain_description(domain_name)
    res['count_info']['political_side'] = domain_details['political_side']
    res['count_info']['role_count'] = domain_details['role_distribute']
    res['count_info']['topic_preference'] = domain_details['topic_preference']
    res['count_info']['words_preference'] = domain_details['word_preference']
    res['members_num'] = domain_details['group_size']

    domain_info = es.get(index=tw_domain_index_name,
                         doc_type=tw_domain_index_type,
                         id=domain_pinyin)['_source']
    res['create_info']['remark'] = domain_info['remark']
    res['create_info']['submitter'] = domain_info['submitter']
    res['create_info']['create_type'] = domain_info['create_type']
    res['create_info']['create_time'] = ts2datetime_full(
        domain_info['create_time'])
    res['members_uid'] = domain_info['member_uids']

    query_body = {
        "query": {
            "bool": {
                "must": [
                    {
                        "terms": {
                            "uid": res['members_uid'],
                        }
                    },
                ]
            }
        },
        "size": 9999,
        "fields": ["locale", "link", "uid", "gender", "username"]
    }
    user_info = es.search(profile_index_name, profile_index_type,
                          query_body)['hits']['hits']
    members_info = {}
    gender_count = {}
    location_count = {}
    for user in user_info:
        item = user['fields']
        uid = item.get('uid', [''])[0]
        #         gender = item.get('gender', [''])[0]
        location = item.get('location', [''])[0]
        members_info[uid] = {
            'nickname': item.get('username', [''])[0],
            #             'gender': gender,
            'location': location,
            'link':
            'https://twitter.com/' + item.get('userscreenname', [''])[0]
        }
        #         if gender:
        #             if gender in gender_count:
        #                 gender_count[gender] += 1
        #             else:
        #                 gender_count[gender] = 1

        if location:
            if location in location_count:
                location_count[location] += 1
            else:
                location_count[location] = 1

    res['members_info'] = members_info
    res['count_info']['location_count'] = location_count
    res['count_info']['gender_count'] = gender_count

    export_filename = EXAMPLE_MODEL_PATH + domain_pinyin + '_' + ts2datetime_full(
        time.time()) + '.json'
    try:
        with open(export_filename, "w") as f:
            json.dump(res, f)
        try:
            sendfile2mail(mail, export_filename)
        except Exception, e:
            print e
    except:
        mark = False
    return mark