def get_users(topic,begin_ts,end_ts): uid_list = set() query_body = { 'query':{ 'bool':{ 'must':[ {'term':{'en_name':topic}}, {'range':{ 'timestamp':{'gte': begin_ts, 'lt':end_ts} } }] } }, 'size':999999999 } result = es_event.search(index=event_text,doc_type=event_text_type, fields=['uid'],body=query_body)['hits']['hits'] for i in result: uid_list.add(i['fields']['uid'][0]) print len(uid_list) if RUN_TYPE == 0: post = datetime2ts(RUN_TEST_TIME) #datetimestr2ts(RUN_TEST_TIME) post = ts2datetimestr(post) else: post = ts2datetimestr(time.time()) print bci_day_pre+post,bci_day_type,es_user_portrait user_result = es_bci.mget(index=bci_day_pre+post ,doc_type=bci_day_type,body={'ids':list(uid_list)})['docs'] user_influence_dict = {} for i in user_result: #print i if i['found']: i = i['_source'] user_influence_dict[i['user']] = i['user_index'] #print i,type(i) #print i['activeness'],i['influence'],i['fansnum'] user = sorted(user_influence_dict.iteritems(),key=lambda x:x[1],reverse=True)[:100] #print user user_dict = {} for i in user: try: result = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=i[0]) u_type = result['_source']['verified_type'] if u_type in auth_list: u_type = auth_type else: u_type = user_type user_dict[i[0]] = {'user_type':u_type,'influ':i[1]} except: user_dict[i[0]] = {'user_type':user_type,'influ':i[1]} try: es_event.update(index=event_analysis_name,doc_type=event_type,id=topic,body={'doc':{'user_results':json.dumps(user_dict)}}) except Exception,e: es_event.index(index=event_analysis_name,doc_type=event_type,id=topic,body={'user_results':json.dumps(user_dict)})
def get_users(topic,begin_ts,end_ts,relation): uid_list = set() query_body = { 'query':{ 'bool':{ 'must':[ {'term':{'en_name':topic}}, # {'wildcard':{'text':'【*】*'}}, {'range':{ 'timestamp':{'gte': begin_ts, 'lt':end_ts} } }] } }, 'size':999999999 } result = es_event.search(index=topic,doc_type=event_text_type, fields=['uid'],body=query_body)['hits']['hits'] for i in result: uid_list.add(i['fields']['uid'][0]) print len(uid_list) if RUN_TYPE == 0: post = datetime2ts(RUN_TEST_TIME) #datetimestr2ts(RUN_TEST_TIME) post = ts2datetimestr(post) else: post = ts2datetimestr(time.time()) print bci_day_pre+post,bci_day_type,es_user_portrait user_result = es_bci.mget(index=bci_day_pre+post ,doc_type=bci_day_type,body={'ids':list(uid_list)})['docs'] user_influence_dict = {} for i in user_result: # print i if i['found']: i = i['_source'] user_influence_dict[i['user']] = i['user_index'] #print i,type(i) #print i['activeness'],i['influence'],i['fansnum'] user = sorted(user_influence_dict.iteritems(),key=lambda x:x[1],reverse=True)[:100] #print user not_in_user_list = event_user_portrait([i[0] for i in user]) user_dict = {} p_list = [] a_list = [] for i in user: # if i[0] not in not_in_user_list: # print es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=i[0]) try: result = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=i[0]) print result u_type = result['_source']['verified_type'] print u_type if u_type in org_list: u_type = auth_type a_list.append(i[0]) else: u_type = user_type p_list.append(i[0]) user_dict[i[0]] = {'user_type':u_type,'influ':i[1]} except: user_dict[i[0]] = {'user_type':user_type,'influ':i[1]} p_list.append(i[0]) print len(a_list),len(p_list) if('discuss' in relation.split('&')): rel_list = [] for i in p_list: resu = create_person(people_node,people_primary,i,node_index_name) if resu != 'Node Wrong': rel_list.append([[2,topic],'discuss',[1,i]]) for i in a_list: resu = create_person(org_node,org_primary,i,org_index_name) if resu != 'Node Wrong': rel_list.append([[2,topic],'discuss',[0,i]]) try: nodes_rels(rel_list) except: pass try: es_event.update(index=event_analysis_name,doc_type=event_type,id=topic,body={'doc':{'user_results':json.dumps(user_dict)}}) except Exception,e: es_event.index(index=event_analysis_name,doc_type=event_type,id=topic,body={'user_results':json.dumps(user_dict)})
if RUN_TYPE == 1: now_date = ts2datetime(ts - 24 * 3600) else: now_date = RUN_TEST_TIME bci_index_name = bci_day_pre + ''.join(now_date.split('-')) #print 'bci_index_name:', bci_index_name try: fansnum_max_results = es_bci.search(index=bci_index_name, doc_type=bci_day_type, body=query_body)['hits']['hits'] except Exception, e: raise e fansnum_max = int(fansnum_max_results[0]['_source']['user_fansnum']) #user_fansnum_dict search_result = es_bci.mget(index=bci_index_name, doc_type=bci_day_type, body={'ids': uid_list}, _source=True)['docs'] user_fansnum_dict = dict() for item in search_result: uid = item['_id'] try: user_fansnum_dict[uid] = item['_source']['user_fansnum'] except: user_fansnum_dict[uid] = 0 return fansnum_max, user_fansnum_dict def topic_en2ch(topic_label): insert_topic_label_list = [] for en_label in topic_label: ch_label = topic_en2ch_dict[en_label]