def get_show_domain_group_detail_portrait(domain_name): domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_') es_result = es.get(index=weibo_domain_index_name,doc_type=weibo_domain_index_type,\ id=domain_pinyin)['_source'] member_uids = es_result['member_uids'] es_mget_result = es_user_portrait.mget(index=portrait_index_name,doc_type=portrait_index_type,\ body={'ids':member_uids})['docs'] result_all = [] for result in es_mget_result: item = {} if result['found']: result = result['_source'] item['uid'] = result['uid'] item['nick_name'] = result['uname'] item['photo_url'] = result['photo_url'] item['domain'] = result['domain'] item['sensitive'] = result['sensitive'] item['location'] = result['location'] item['fans_num'] = result['fansnum'] item['friends_num'] = result['friendsnum'] item['gender'] = result['gender'] item['home_page'] = 'http://weibo.com/' + result[ 'uid'] + '/profile?topnav=1&wvr=6&is_all=1' # item['sensor_mark'] = judge_sensing_sensor(xnr_user_no,item['uid']) # item['weibo_type'] = judge_follow_type(xnr_user_no,item['uid']) item['influence'] = get_influence_relative(item['uid'], result['influence']) else: item['uid'] = result['_id'] item['nick_name'] = '' item['photo_url'] = '' item['domain'] = '' item['sensitive'] = '' item['location'] = '' item['fans_num'] = '' item['friends_num'] = '' item['gender'] = '' item['home_page'] = 'http://weibo.com/' + result[ '_id'] + '/profile?topnav=1&wvr=6&is_all=1' # item['sensor_mark'] = judge_sensing_sensor(xnr_user_no,result['_id']) # item['weibo_type'] = judge_follow_type(xnr_user_no,result['_id']) item['influence'] = '' result_all.append(item) return result_all
def get_generate_example_model(domain_name,role_name): domain_pinyin = pinyin.get(domain_name,format='strip',delimiter='_') role_en = domain_ch2en_dict[role_name] task_id = domain_pinyin + '_' + role_en es_result = es.get(index=weibo_role_index_name,doc_type=weibo_role_index_type,id=task_id)['_source'] item = es_result print 'es_result:::',es_result # 政治倾向 political_side = json.loads(item['political_side'])[0][0] if political_side == 'mid': item['political_side'] = u'中立' elif political_side == 'left': item['political_side'] = u'左倾' else: item['political_side'] = u'右倾' # 心理特征 psy_feature_list = [] psy_feature = json.loads(item['psy_feature']) for i in range(TOP_PSY_FEATURE): psy_feature_list.append(psy_feature[i][0]) item['psy_feature'] = '&'.join(psy_feature_list) role_group_uids = json.loads(item['member_uids']) mget_results = es_user_portrait.mget(index=portrait_index_name,doc_type=portrait_index_type,body={'ids':role_group_uids})['docs'] # topic_list = [] # for mget_item in mget_results: # if mget_item['found']: # keywords_list = json.loads(mget_item['_source']['keywords']) # topic_list.extend(keywords_list) # topic_keywords_dict = {} # for topic_item in topic_list: # keyword = topic_item[0] # keyword_count = topic_item[1] # try: # topic_keywords_dict[keyword] += keyword_count # except: # topic_keywords_dict[keyword] = keyword_count # monitor_keywords_list = [] # for i in range(3): # keyword_max = max(topic_keywords_dict,key=topic_keywords_dict.get) # monitor_keywords_list.append(keyword_max) # del topic_keywords_dict[keyword_max] # item['monitor_keywords'] = '&'.join(monitor_keywords_list) if S_TYPE == 'test': current_time = datetime2ts(S_DATE) else: current_time = int(time.time()) index_name_list = get_flow_text_index_list(current_time) query_body_search = { 'query':{ 'filtered':{ 'filter':{ 'terms':{'uid':role_group_uids} } } }, 'size':MAX_VALUE, '_source':['keywords_string'] } es_keyword_results = es_flow_text.search(index=index_name_list,doc_type=flow_text_index_type,\ body=query_body_search)['hits']['hits'] keywords_string = '' for mget_item in es_keyword_results: #print 'mget_item:::',mget_item #if mget_item['found']: keywords_string += '&' keywords_string += mget_item['_source']['keywords_string'] k_dict = extract_keywords(keywords_string) monitor_keywords_list = [] for item_item in k_dict: monitor_keywords_list.append(item_item.word.encode('utf-8')) item['monitor_keywords'] = ','.join(monitor_keywords_list) mget_results_user = es_user_portrait.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':role_group_uids})['docs'] item['nick_name'] = [] for mget_item in mget_results_user: #print 'mget_item:::',mget_item if mget_item['found']: item['nick_name'] = mget_item['_source']['nick_name'] item['location'] = mget_item['_source']['user_location'] item['gender'] = mget_item['_source']['sex'] uid = mget_item['_source']['uid'] try: profile_results = es_user_portrait.get(index=profile_index_name,doc_type=profile_index_type,id=uid)['_source'] if profile_results['description']: item['description'] = profile_results['description'] break except: pass item['business_goal'] = u'渗透' item['daily_interests'] = u'旅游' # if S_TYPE == 'test': # user_mget_results = es.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':role_group_uids})['docs'] # if user_mget_results item['age'] = 30 item['career'] = u'自由职业' active_time_list_np = np.array(json.loads(item['active_time'])) active_time_list_np_sort = np.argsort(-active_time_list_np)[:TOP_ACTIVE_TIME] item['active_time'] = active_time_list_np_sort.tolist() day_post_num_list = np.array(json.loads(item['day_post_num'])) item['day_post_num'] = np.mean(day_post_num_list).tolist() item['role_name'] = role_name task_id_new =domain_pinyin + '_' + role_en example_model_file_name = EXAMPLE_MODEL_PATH + task_id_new + '.json' try: with open(example_model_file_name,"w") as dump_f: json.dump(item,dump_f) item_dict = dict() #item_dict['xnr_user_no'] = xnr_user_no item_dict['domain_name'] = domain_name item_dict['role_name'] = role_name es.index(index=weibo_example_model_index_name,doc_type=weibo_example_model_index_type,\ body=item_dict,id=task_id_new) mark = True except: mark = False return mark