def domain_create_task(domain_name, create_type, create_time, submitter, description, remark, compute_status=0): task_id = pinyin.get(domain_name, format='strip', delimiter='_') try: es.get(index=fb_domain_index_name, doc_type=fb_domain_index_type, id=task_id)['_source'] return 'domain name exists!' except: try: domain_task_dict = dict() domain_task_dict['domain_pinyin'] = pinyin.get(domain_name, format='strip', delimiter='_') domain_task_dict['domain_name'] = domain_name domain_task_dict['create_type'] = json.dumps(create_type) domain_task_dict['create_time'] = create_time domain_task_dict['submitter'] = submitter domain_task_dict['description'] = description domain_task_dict['remark'] = remark domain_task_dict['compute_status'] = compute_status # print 'domain_task_dict' # print domain_task_dict # print 'before: r.lrange' # print r.lrange(fb_target_domain_detect_queue_name,0,100) r.lpush(fb_target_domain_detect_queue_name, json.dumps(domain_task_dict)) # print 'after: r.lrange' # print r.lrange(fb_target_domain_detect_queue_name,0,100) item_exist = dict() item_exist['domain_pinyin'] = domain_task_dict['domain_pinyin'] item_exist['domain_name'] = domain_task_dict['domain_name'] item_exist['create_type'] = domain_task_dict['create_type'] item_exist['create_time'] = domain_task_dict['create_time'] item_exist['submitter'] = domain_task_dict['submitter'] item_exist['description'] = domain_task_dict['description'] item_exist['remark'] = domain_task_dict['remark'] item_exist['group_size'] = '' item_exist['compute_status'] = 0 # 存入创建信息 print es.index(index=fb_domain_index_name, doc_type=fb_domain_index_type, id=item_exist['domain_pinyin'], body=item_exist) mark = True except Exception, e: print e mark = False return mark
def addto_twitter_corpus(task_detail): flow_text_index_name = twitter_flow_text_index_name_pre + ts2datetime(task_detail['timestamp']) try: corpus_result = es_xnr.get(index=flow_text_index_name,doc_type=twitter_flow_text_index_type,id=task_detail['tid'])['_source'] task_detail['text']=corpus_result['text'] #查询三个指标字段 tid_result=lookup_tid_attend_index(task_detail['tid'],task_detail['timestamp'],task_detail['timestamp']) if tid_result: task_detail['comment']=tid_result['comment'] task_detail['share']=tid_result['share'] task_detail['favorite']=tid_result['favorite'] else: task_detail['comment']=0 task_detail['share']=0 task_detail['favorite']=0 #查询用户昵称 task_detail['nick_name']=get_user_nickname(item['_source']['uid']) except: mark=False try: es_xnr.index(index=twitter_xnr_corpus_index_name,doc_type=twitter_xnr_corpus_index_type,id=task_detail['tid'],body=task_detail) mark=True except: mark=False return mark
def get_save_step_three_1(task_detail): task_id = task_detail['task_id'] # query_body = {'query':{'match_all':{}},'sort':{'user_no':{'order':'desc'}}} # es_result = es.search(index=fb_xnr_index_name,doc_type=fb_xnr_index_type,body=query_body)['hits']['hits'] # task_id = es_result[0]['_source']['xnr_user_no'] item_exist = es.get(index=fb_xnr_index_name, doc_type=fb_xnr_index_type, id=task_id)['_source'] item_exist['uid'] = task_detail['id'] item_exist['nick_name'] = task_detail['nick_name'] item_exist['fb_mail_account'] = task_detail['fb_mail_account'] item_exist['fb_phone_account'] = task_detail['fb_phone_account'] item_exist['password'] = task_detail['password'] item_exist['career'] = task_detail['career'] item_exist['description'] = task_detail['description'] item_exist['age'] = task_detail['age'] item_exist['location'] = task_detail['location'] item_exist['create_status'] = 2 # 创建完成 # 更新 fb_xnr表 print es.update(index=fb_xnr_index_name, doc_type=fb_xnr_index_type, id=task_id, body={'doc': item_exist}) mark = True return mark
def addto_warning_corpus(task_detail): flow_text_index_name = twitter_flow_text_index_name_pre + ts2datetime(task_detail['timestamp']) try: corpus_result = es_xnr_2.get(index=flow_text_index_name,doc_type=twitter_flow_text_index_type,id=task_detail['tid'])['_source'] corpus_result['xnr_user_no'] = task_detail['xnr_user_no'] corpus_result['warning_source'] = task_detail['warning_source'] corpus_result['create_time'] = task_detail['create_time'] corpus_result['validity'] = 1 corpus_result['nick_name'] = get_user_nickname(task_detail['uid']) tid_result=lookup_tid_attend_index(task_detail['tid'],task_detail['timestamp']) if tid_result: corpus_result['comment']=tid_result['comment'] corpus_result['share']=tid_result['share'] corpus_result['favorite']=tid_result['favorite'] else: corpus_result['comment']=0 corpus_result['share']=0 corpus_result['favorite']=0 #查询好友列表 lookup_type='fans_list' friends_list=lookup_xnr_fans_followers(task_detail['xnr_user_no'],lookup_type) set_mark = set_intersection(task_detail['uid'],friends_list) if set_mark > 0: corpus_result['content_type']='friends' else: corpus_result['content_type']='unfriends' es_xnr_2.index(index=twitter_warning_corpus_index_name,doc_type=twitter_warning_corpus_index_type,id=task_detail['tid'],body=corpus_result) mark=True except: mark=False return mark
def get_role_info(domain_pinyin, role_name): role_en = fb_domain_ch2en_dict(role_name) role_info_id = domain_pinyin + '_' + role_en role_info = es.get(index=fb_domain_index_name, doc_type=fb_domain_index_type, id=role_info_id)['_source'] return role_info
def lookup_xnr_uid(xnr_user_no): try: xnr_result=es_xnr_2.get(index=tw_xnr_index_name,doc_type=tw_xnr_index_type,id=xnr_user_no)['_source'] xnr_uid=xnr_result['uid'] except: xnr_uid='' return xnr_uid
def lookup_xnr_fans_followers(xnr_user_no,lookup_type): try: xnr_result=es_xnr_2.get(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,id=xnr_user_no)['_source'] lookup_list=xnr_result[lookup_type] except: lookup_list=[] return lookup_list
def lookup_xnr_concernedusers(xnr_user_no): try: result=es_xnr.get(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,id=xnr_user_no) followers_list=result['_source']['followers_list'] except: followers_list=[] return followers_list
def get_user_nickname(uid): try: result=es_xnr.get(index=twitter_user_index_name,doc_type=twitter_user_index_type,id=uid) user_name=result['_source']['username'] except: user_name='' return user_name
def get_show_domain_group_detail_portrait(domain_name): domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_') es_result = es.get(index=tw_domain_index_name,doc_type=tw_domain_index_type,\ id=domain_pinyin)['_source'] member_uids = es_result['member_uids'] es_mget_result = es_user_portrait.mget(index=portrait_index_name,doc_type=portrait_index_type,\ body={'ids':member_uids})['docs'] result_all = [] for result in es_mget_result: item = {} item['uid'] = '' item['nick_name'] = '' item['photo_url'] = '' item['domain'] = '' item['sensitive'] = '' item['location'] = '' item['fans_num'] = '' item['friends_num'] = '' # item['gender'] = '' item['home_page'] = "" item['influence'] = '' if result['found']: item['uid'] = result['_id'] result = result['_source'] if result.has_key('uname'): item['nick_name'] = result['uname'] if result.has_key('photo_url'): item['photo_url'] = result['photo_url'] if result.has_key('domain'): item['domain'] = result['domain'] if result.has_key('sensitive'): item['sensitive'] = result['sensitive'] if result.has_key('location'): item['location'] = result['location'] if result.has_key('fansnum'): item['fans_num'] = result['fansnum'] if result.has_key('friends_num'): item['friends_num'] = result['friendsnum'] # item['gender'] = result['gender'] if result.has_key('screenname'): item['home_page'] = "https://twitter.com/" + result[ 'screenname'] if result.has_key('influence'): item['influence'] = get_influence_relative( item['uid'], result['influence']) # else: # item['uid'] = result['_id'] # item['nick_name'] = '' # item['photo_url'] = '' # item['domain'] = '' # item['sensitive'] = '' # item['location'] = '' # item['fans_num'] = '' # item['friends_num'] = '' # # item['gender'] = '' # item['home_page'] = "" # item['influence'] = '' result_all.append(item) return result_all
def get_show_domain_role_info(domain_name, role_name): domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_') role_en = tw_domain_ch2en_dict[role_name] task_id = domain_pinyin + '_' + role_en es_result = es.get(index=tw_role_index_name, doc_type=tw_role_index_type, id=task_id)['_source'] return es_result
def lookup_xnr_friends(xnr_user_no): try: xnr_result = es_xnr.get(index=fb_xnr_fans_followers_index_name, doc_type=fb_xnr_fans_followers_index_type, id=xnr_user_no)['_source'] lookup_list = xnr_result['fans_list'] except: lookup_list = [] return lookup_list
def get_access_level_info(account_name): level_info = dict() res = es_xnr_2.get(index=access_control_index_name, doc_type=access_control_index_type, id=account_name) print res level_info_type = res['_source'].get('access_level') level_info['account_name'] = account_name level_info['access_level'] = level_info_type return level_info
def get_role_sort_list(domain_name): domain_pinyin = pinyin.get(domain_name, format='strip',delimiter='_') try: es_result = es.get(index=tw_domain_index_name,doc_type=tw_domain_index_type,id=domain_pinyin)['_source'] role_sort_list_en = json.loads(es_result['role_distribute']) role_sort_list_zh = [] for item in role_sort_list_en: role_zh = tw_domain_en2ch_dict[item[0]] role_sort_list_zh.append(role_zh) return role_sort_list_zh except: return []
def get_modify_base_info(task_detail): xnr_user_no = task_detail['xnr_user_no'] item_exists = es.get(index=tw_xnr_index_name,doc_type=tw_xnr_index_type,id=xnr_user_no)['_source'] if task_detail.has_key('active_time'): item_exists['active_time'] = task_detail['active_time'] if task_detail.has_key('day_post_average'): day_post_average = task_detail['day_post_average'].split('-') item_exists['day_post_average'] = json.dumps(day_post_average) if task_detail.has_key('monitor_keywords'): item_exists['monitor_keywords'] = task_detail['monitor_keywords'] try: es.update(index=tw_xnr_index_name,doc_type=tw_xnr_index_type,body={'doc':item_exists}, id=xnr_user_no) mark = True except Exception,e: print e mark = False
def get_save_step_three_1(task_detail): task_id = task_detail['task_id'] item_exist = es.get(index=tw_xnr_index_name,doc_type=tw_xnr_index_type,id=task_id)['_source'] item_exist['uid'] = task_detail['uid'] item_exist['nick_name'] = task_detail['nick_name'] item_exist['tw_mail_account'] = task_detail['tw_mail_account'] item_exist['tw_phone_account'] = task_detail['tw_phone_account'] item_exist['password'] = task_detail['password'] item_exist['description'] = task_detail['description'] item_exist['age'] = task_detail['age'] item_exist['location'] = task_detail['location'] item_exist['create_status'] = 2 # 创建完成 # 更新 tw_xnr表 print es.update(index=tw_xnr_index_name,doc_type=tw_xnr_index_type,id=task_id,body={'doc':item_exist}) mark =True return mark
def get_show_domain_group_detail_portrait(domain_name): domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_') es_result = es.get(index=fb_domain_index_name,doc_type=fb_domain_index_type,\ id=domain_pinyin)['_source'] member_uids = es_result['member_uids'] es_mget_result = es_user_portrait.mget(index=portrait_index_name,doc_type=portrait_index_type,\ body={'ids':member_uids})['docs'] result_all = [] for result in es_mget_result: item = {} item['uid'] = '' item['nick_name'] = '' # item['photo_url'] = '' item['domain'] = '' item['sensitive'] = '' item['location'] = '' # item['fans_num'] = '' # item['friends_num'] = '' # item['gender'] = '' item['home_page'] = '' # item['home_page'] = 'http://weibo.com/'+result['_id']+'/profile?topnav=1&wvr=6&is_all=1' item['influence'] = '' if result['found']: _id = result['_id'] result = result['_source'] item['uid'] = _id item[ 'home_page'] = "https://www.facebook.com/profile.php?id=" + str( _id) if result.has_key('uname'): item['nick_name'] = result['uname'] if result.has_key('domain'): item['domain'] = result['domain'] if result.has_key('sensitive'): item['sensitive'] = result['sensitive'] if result.has_key('location'): item['location'] = result['location'] if result.has_key('influence'): item['influence'] = get_influence_relative( item['uid'], result['influence']) result_all.append(item) return result_all
def get_role2feature_info(domain_name,role_name): domain_pinyin = pinyin.get(domain_name,format='strip',delimiter='_') role_name_en = fb_domain_ch2en_dict[role_name] _id = domain_pinyin + '_' + role_name_en try: es_result = es.get(index=fb_role_index_name,doc_type=fb_role_index_type,id=_id)['_source'] feature_info_dict = es_result feature_filter_dict = dict() feature_filter_dict['political_side'] = json.loads(feature_info_dict['political_side']) try: feature_filter_dict['psy_feature'] = json.loads(feature_info_dict['psy_feature']) except: feature_filter_dict['psy_feature'] = [] return feature_filter_dict except: return []
def update_access_level_info(account_name, access_level): try: item_exist = es_xnr_2.get(index=access_control_index_name, doc_type=access_control_index_type, id=account_name)['_source'] item_exist['access_level'] = access_level #print item_exist # res = es.update(index=fb_xnr_fans_followers_index_name, doc_type=fb_xnr_fans_followers_index_type, \ # id=xnr_user_no, body={'doc': {'trace_follow_list': trace_follow_list, 'fans_list': followers_list}}) # # print es.update(index=fb_xnr_index_name, doc_type=fb_xnr_index_type, id=task_id, body={'doc': item_exist}) res = es_xnr_2.update(index=access_control_index_name, doc_type=access_control_index_type, id=account_name, body={'doc': item_exist}) except Exception as e: return {"status": "fail"} return {"status": "ok"}
def addto_facebook_corpus(task_detail): flow_text_index_name = facebook_flow_text_index_name_pre + ts2datetime( task_detail['timestamp']) try: corpus_result = es_xnr.get(index=flow_text_index_name, doc_type=facebook_flow_text_index_type, id=task_detail['fid'])['_source'] task_detail['text'] = corpus_result['text'] #查询三个指标字段 fid_result = lookup_fid_attend_index(task_detail['fid'], task_detail['timestamp'], task_detail['timestamp']) if fid_result: task_detail['comment'] = fid_result['comment'] task_detail['share'] = fid_result['share'] task_detail['favorite'] = fid_result['favorite'] else: task_detail['comment'] = 0 task_detail['share'] = 0 task_detail['favorite'] = 0 #查询用户昵称 task_detail['nick_name'] = get_user_nickname(corpus_result['uid']) # task_detail['retweeted']=corpus_result['retweeted'] # task_detail['comment']=corpus_result['comment'] # task_detail['like']=corpus_result['like'] except: mark = False try: es_xnr.index(index=facebook_xnr_corpus_index_name, doc_type=facebook_xnr_corpus_index_type, id=task_detail['fid'], body=task_detail) mark = True except: mark = False return mark
def get_show_domain_description(domain_name): domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_') es_result = es.get(index=tw_domain_index_name,doc_type=tw_domain_index_type,\ id=domain_pinyin)['_source'] item = {} item['group_size'] = es_result['group_size'] item['description'] = es_result['description'] topic_preference_list = json.loads(es_result['topic_preference']) topic_preference_list_chinese = [] for topic_preference_item in topic_preference_list: topic_preference_item_chinese = fb_tw_topic_en2ch_dict[ topic_preference_item[0]] topic_preference_list_chinese.append( [topic_preference_item_chinese, topic_preference_item[1]]) item['topic_preference'] = topic_preference_list_chinese item['word_preference'] = json.loads(es_result['top_keywords']) role_distribute_list = json.loads(es_result['role_distribute']) role_distribute_list_chinese = [] for role_distribute_item in role_distribute_list: role_distribute_item_chinese = tw_domain_en2ch_dict[ role_distribute_item[0]] role_distribute_list_chinese.append( [role_distribute_item_chinese, role_distribute_item[1]]) item['role_distribute'] = role_distribute_list_chinese political_side_list = json.loads(es_result['political_side']) political_side_list_chinese = [] for political_side_item in political_side_list: if political_side_item[0] == 'mid': political_side_list_chinese.append([u'中立', political_side_item[1]]) elif political_side_item[0] == 'right': political_side_list_chinese.append([u'右倾', political_side_item[1]]) else: political_side_list_chinese.append([u'左倾', political_side_item[1]]) item['political_side'] = political_side_list_chinese return item
def report_warming_content(task_detail): report_dict=dict() report_dict['report_type']=task_detail['report_type'] report_dict['report_time']=task_detail['report_time'] report_dict['xnr_user_no']=task_detail['xnr_user_no'] report_dict['event_name']=task_detail['event_name'] report_dict['uid']=task_detail['uid'] report_dict['nick_name']=get_user_nickname(task_detail['uid']) tw_list=[] user_list=[] # print 'type:',type(task_detail['weibo_info']),task_detail['weibo_info'] tw_info=task_detail['tw_info'] for item in tw_info: lookup_mark=False item['timestamp'] = int(item['timestamp']) if task_detail['report_type']==u'人物': twitter_user_warning_index_name = twitter_user_warning_index_name_pre + ts2datetime(item['timestamp']) twitter_user_warming_id=task_detail['xnr_user_no']+'_'+task_detail['uid'] try: twitter_user_result=es_xnr_2.get(index=twitter_user_warning_index_name,doc_type=twitter_user_warning_index_type,id=twitter_user_warming_id)['_source'] user_warning_content=json.dumps(twitter_user_result['content']) for content in user_warning_content: if content['tid'] == item['tid']: lookup_mark=True tw_list.append(content) else: pass except: print 'user_error!' elif task_detail['report_type']==u'言论': twitter_speech_warning_index_name = twitter_speech_warning_index_name_pre + ts2datetime(item['timestamp']) try: twitter_speech_result=es_xnr_2.get(index=twitter_speech_warning_index_name,doc_type=twitter_speech_warning_index_type,id=task_detail['xnr_user_no']+'_'+item['tid'])['_source'] report_dict['uid']=twitter_speech_result['uid'] lookup_mark=True tw_list.append(twitter_speech_result) except: # weibo_timing_warning_index_name = weibo_timing_warning_index_name_pre + ts2datetime(item['timestamp']) print 'speech_error!' elif task_detail['report_type']==u'事件': twitter_event_warning_index_name = twitter_event_warning_index_name_pre + ts2datetime(item['timestamp']) event_warning_id = task_detail['xnr_user_no']+'_'+task_detail['event_name'] try: event_result=es_xnr_2.get(index=twitter_event_warning_index_name,doc_type=twitter_event_warning_index_type,id=event_warning_id)['_source'] event_content=json.dumps(event_result['main_twitter_info']) for event in event_content: if event['tid'] == item['tid']: lookup_mark=True tw_list.append(event) else: pass except: print 'event_error!' elif task_detail['report_type']==u'时间': year = ts2yeartime(item['timestamp']) twitter_timing_warning_index_name = twitter_timing_warning_index_name_pre + year +'_' + task_detail['date_time'] try: time_result=es_xnr_2.search(index=twitter_timing_warning_index_name,doc_type=twitter_timing_warning_index_type,query_body={'query':{'match_all':{}}})['hits']['hits'] time_content=[] for timedata in time_result: for data in timedata['twitter_date_warming_content']: if data['tid'] == item['tid']: lookup_mark=True tw_list.append(data) else: pass except: print 'time_error!' if lookup_mark: pass else: flow_text_index_name = twitter_flow_text_index_name_pre + ts2datetime(item['timestamp']) try: tw_result=es_xnr_2.get(index=flow_text_index_name,doc_type=twitter_flow_text_index_type,id=item['tid'])['_source'] tw_result['nick_name']=get_user_nickname(fb_result['uid']) tid_result=lookup_tid_attend_index(item['tid'],item['timestamp']) if tid_result: tw_result['comment']=tid_result['comment'] tw_result['share']=tid_result['share'] tw_result['favorite']=tid_result['favorite'] else: tw_result['comment']=0 tw_result['share']=0 tw_result['favorite']=0 tw_list.append(tw_result) except: print 'flow_text error!' user_info=task_detail['user_info'] if user_info: for uid in user_info: user=dict() try: user_result=es_xnr_2.get(index=twitter_user_index_name,doc_type=twitter_user_index_type,id=uid)['_source'] user_dict['uid']=item['_id'] user_dict['username']=user_result['username'] if user_result.has_key('talking_about_count'): user_dict['talking_about_count']=user_result['talking_about_count'] else: user_dict['talking_about_count']=0 if user_result.has_key('likes'): user_dict['likes']=user_result['likes'] else: user_dict['likes']=0 if user_result.has_key('category'): user_dict['category']=user_result['category'] else: user_dict['category']='' user_list.append(user) except: user_dict['uid']=item['_id'] user_dict['username']='' user_dict['talking_about_count']=0 user_dict['likes']=0 user_dict['category']='' user_list.append(user) print 'user_list error!' else: pass report_content=dict() report_content['user_list']=user_list report_content['tw_list']=tw_list report_dict['report_content']=json.dumps(report_content) report_id='' if task_detail['report_type'] == u'言论': report_id=weibo_info[0]['tid'] elif task_detail['report_type'] == u'人物': report_id=task_detail['xnr_user_no']+'_'+task_detail['uid'] elif task_detail['report_type'] == u'事件': report_id=task_detail['xnr_user_no']+'_'+task_detail['event_name'] elif task_detail['report_type'] == u'时间': # print weibo_info if tw_info: report_id=tw_info[0]['tid'] else: report_id=str(task_detail['report_time']) if tw_list: report_mark=True else: report_mark=False #预警上报后不再显示问题 now_time=int(time.time()) twitter_report_management_index_name = twitter_report_management_index_name_pre + ts2datetime(now_time) if es_xnr_2.indices.exists(index=twitter_report_management_index_name): pass else: twitter_report_management_mappings() if report_id and report_mark: try: es_xnr_2.index(index=twitter_report_management_index_name,doc_type=twitter_report_management_index_type,id=report_id,body=report_dict) mark=True except: mark=False else: mark=False return mark
def get_domain_info(domain_pinyin): domain_info = es.get(index=fb_domain_index_name, doc_type=fb_domain_index_type, id=domain_pinyin)['_source'] return domain_info
def get_xnr_info_new(xnr_user_no): results = es.get(index=fb_xnr_index_name, doc_type=fb_xnr_index_type, id=xnr_user_no)['_source'] return results
def get_recommend_step_two(task_detail): domain_name = task_detail['domain_name'] role_name = task_detail['role_name'] # daily_interests_list = task_detail['daily_interests'].encode('utf-8').split(',') domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_') role_name_en = fb_domain_ch2en_dict[role_name] _id = domain_pinyin + '_' + role_name_en recommend_results = dict() ## 根据角色信息 es_result = es.get(index=fb_role_index_name, doc_type=fb_role_index_type, id=_id)['_source'] #### 角色实例 nick_name_list = [] user_location_top_list = [] description_list = [] sex_list = [] role_example_dict = {} member_uids = json.loads(es_result['member_uids']) member_uids_results = es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,\ body={'ids':member_uids})['docs'] count = 0 for result in member_uids_results: if result['found'] == True: result = result['_source'] person_url = "https://www.facebook.com/profile.php?id=" + str( result['uid']) if result.has_key('name'): nick_name = result['name'] nick_name_list.append(nick_name) if result.has_key('gender'): if result['gender'] == 'male': sex = 1 elif result['gender'] == 'female': sex = 2 sex_list.append(sex) if result.has_key('description'): description_list.append(result['description']) role_example_dict[result['uid']] = [nick_name, person_url] count += 1 if count > NICK_NAME_TOP: break recommend_results['role_example'] = role_example_dict active_time_list_np = np.array(json.loads(es_result['active_time'])) active_time_list_np_sort = list( np.argsort(-active_time_list_np)[:ACTIVE_TIME_TOP]) recommend_results['active_time'] = active_time_list_np_sort day_post_num = json.loads(es_result['day_post_num']) day_post_num_new = pd.Series(day_post_num) day_post_num_new = day_post_num_new.fillna(0) day_post_num_new = list(day_post_num_new) day_post_num_average = sum(day_post_num_new) / float(len(day_post_num_new)) recommend_results['day_post_num_average'] = day_post_num_average sex_sort = '' if sex_list: sex_list_count = Counter(sex_list) sex_sort = sorted(sex_list_count.items(), key=lambda x: x[1], reverse=True)[:1][0][0] recommend_results['nick_name'] = '&'.join(nick_name_list) recommend_results['role_example'] = recommend_results['role_example'] recommend_results['sex'] = sex_sort recommend_results['user_location'] = '&'.join(user_location_top_list) recommend_results['description'] = '&'.join( description_list[:DESCRIPTION_TOP]) recommend_results['age'] = '' recommend_results['career'] = '' return recommend_results
def get_generate_example_model(domain_name, role_name, mail): export_group_info(domain_name, mail) domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_') role_en = tw_domain_ch2en_dict[role_name] task_id = domain_pinyin + '_' + role_en es_result = es.get(index=tw_role_index_name, doc_type=tw_role_index_type, id=task_id)['_source'] item = es_result # print 'es_result:::',es_result # 政治倾向 political_side = json.loads(item['political_side'])[0][0] if political_side == 'mid': item['political_side'] = u'中立' elif political_side == 'left': item['political_side'] = u'左倾' else: item['political_side'] = u'右倾' # 心理特征 psy_feature_list = [] psy_feature = json.loads(item['psy_feature']) for i in range(TOP_PSY_FEATURE): psy_feature_list.append(psy_feature[i][0]) item['psy_feature'] = '&'.join(psy_feature_list) role_group_uids = json.loads(item['member_uids']) if S_TYPE == 'test': current_time = datetime2ts(S_DATE) else: current_time = int(time.time()) index_name_list = get_flow_text_index_list(current_time) query_body_search = { 'query': { 'filtered': { 'filter': { 'terms': { 'uid': role_group_uids } } } }, 'size': MAX_VALUE, '_source': ['keywords_string'] } es_keyword_results = es_flow_text.search(index=index_name_list,doc_type=flow_text_index_type,\ body=query_body_search)['hits']['hits'] keywords_string = '' for mget_item in es_keyword_results: keywords_string += '&' keywords_string += mget_item['_source']['keywords_string'] k_dict = extract_keywords(keywords_string) monitor_keywords_list = [] for item_item in k_dict: monitor_keywords_list.append(item_item.word.encode('utf-8')) item['monitor_keywords'] = ','.join(monitor_keywords_list) mget_results_user = es_user_portrait.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids': role_group_uids})['docs'] item['nick_name'] = [] for mget_item in mget_results_user: if mget_item['found']: content = mget_item['_source'] item['nick_name'] = '' if content.has_key('username'): item['nick_name'] = content['username'] item['location'] = '' if content.has_key('location'): item['location'] = content['location'] item['description'] = '' if content.has_key('description'): item['description'] = content['description'] item['business_goal'] = u'渗透' # item['daily_interests'] = u'旅游' item['age'] = 30 item['career'] = u'自由职业' active_time_list_np = np.array(json.loads(item['active_time'])) active_time_list_np_sort = np.argsort( -active_time_list_np)[:TOP_ACTIVE_TIME] item['active_time'] = active_time_list_np_sort.tolist() day_post_num_list = np.array(json.loads(item['day_post_num'])) item['day_post_num'] = np.mean(day_post_num_list).tolist() item['role_name'] = role_name task_id_new = 'tw_' + domain_pinyin + '_' + role_en example_model_file_name = EXAMPLE_MODEL_PATH + task_id_new + '.json' try: with open(example_model_file_name, "w") as dump_f: json.dump(item, dump_f) item_dict = dict() item_dict['domain_name'] = domain_name item_dict['role_name'] = role_name es.index(index=tw_example_model_index_name,doc_type=tw_example_model_index_type,\ body=item_dict,id=task_id_new) mark = True except: mark = False return mark
def export_group_info(domain_name, mail): mark = True res = { 'domain_name': domain_name, 'members_num': 0, 'create_info': { 'submitter': '', 'remark': '', 'create_type': '', 'create_time': '', }, 'members_uid': [], 'members_info': { # 'uid1': { # 'nickname': '', # 'gender': '', # 'location': '', # 'link': '', # } }, 'count_info': { 'location_count': { # 'zh_TW': 10, # 'us': 5 }, # 'gender_count': { # # 'f': 0, # # 'm': 40 # }, 'role_count': { # 'role1': 12, # 'role2': 7 }, 'words_preference': { # 'w1': 20, # 'w2': 10 }, 'topic_preference': { # 't1': 20, # 't2': 10 }, 'political_side': {}, } } domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_') domain_details = get_show_domain_description(domain_name) res['count_info']['political_side'] = domain_details['political_side'] res['count_info']['role_count'] = domain_details['role_distribute'] res['count_info']['topic_preference'] = domain_details['topic_preference'] res['count_info']['words_preference'] = domain_details['word_preference'] res['members_num'] = domain_details['group_size'] domain_info = es.get(index=tw_domain_index_name, doc_type=tw_domain_index_type, id=domain_pinyin)['_source'] res['create_info']['remark'] = domain_info['remark'] res['create_info']['submitter'] = domain_info['submitter'] res['create_info']['create_type'] = domain_info['create_type'] res['create_info']['create_time'] = ts2datetime_full( domain_info['create_time']) res['members_uid'] = domain_info['member_uids'] query_body = { "query": { "bool": { "must": [ { "terms": { "uid": res['members_uid'], } }, ] } }, "size": 9999, "fields": ["locale", "link", "uid", "gender", "username"] } user_info = es.search(profile_index_name, profile_index_type, query_body)['hits']['hits'] members_info = {} gender_count = {} location_count = {} for user in user_info: item = user['fields'] uid = item.get('uid', [''])[0] # gender = item.get('gender', [''])[0] location = item.get('location', [''])[0] members_info[uid] = { 'nickname': item.get('username', [''])[0], # 'gender': gender, 'location': location, 'link': 'https://twitter.com/' + item.get('userscreenname', [''])[0] } # if gender: # if gender in gender_count: # gender_count[gender] += 1 # else: # gender_count[gender] = 1 if location: if location in location_count: location_count[location] += 1 else: location_count[location] = 1 res['members_info'] = members_info res['count_info']['location_count'] = location_count res['count_info']['gender_count'] = gender_count export_filename = EXAMPLE_MODEL_PATH + domain_pinyin + '_' + ts2datetime_full( time.time()) + '.json' try: with open(export_filename, "w") as f: json.dump(res, f) try: sendfile2mail(mail, export_filename) except Exception, e: print e except: mark = False return mark