def query_vary_top_k(index_name, doctype, top_k, sort_index="vary"): query_body = { "query": { "match_all": {} }, "size": top_k, "sort": [{sort_index: {"order": "desc"}}] } result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] uid_list = [] for item in result: uid_list.append(item['_id']) portrait_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids":uid_list}, _source=True)['docs'] profile_result = es_profile.mget(index="weibo_user",doc_type="user", body={"ids":uid_list}, _source=True)['docs'] return_list = [] rank = 1 for i in range(len(result)): info = ['','','','',''] info[0] = rank if profile_result[i]['found']: info[1] = profile_result[i]['_source'].get('photo_url','') info[3] = profile_result[i]['_source'].get('nick_name','') info[2] = result[i].get('_id','') info[4] = result[i]['_source']['vary'] if portrait_result[i]['found']: info.append('1') else: info.append('0') return_list.append(info) rank += 1 return return_list
def ajax_get_group_detail(): task_name = request.args.get('task_name','') # task_name user = request.args.get('user', '') _id = user + '-' + task_name portrait_detail = [] top_activeness = get_top_influence("activeness") top_influence = get_top_influence("influence") top_importance = get_top_influence("importance") search_result = es.get(index=index_group_manage, doc_type=doc_type_group, id=_id).get('_source', {}) if search_result: try: uid_list = json.loads(search_result['uid_list']) except: uid_list = search_result['uid_list'] if uid_list: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list}, fields=SOCIAL_SENSOR_INFO)['docs'] for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["fields"][iter_item][0].split('&')) temp.append(item["fields"][iter_item][0].split('&')) elif iter_item == "activeness": temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100) elif iter_item == "importance": temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100) elif iter_item == "influence": temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100) else: temp.append(item["fields"][iter_item][0]) portrait_detail.append(temp) return json.dumps(portrait_detail)
def search_history_delete(date): return_list = [] now_date = date top_influence = get_top_influence("influence") top_activeness = get_top_influence("activeness") top_importance = get_top_influence("importance") fields = ['uid','uname','domain','topic_string','influence','importance','activeness'] temp = r_out.hget("decide_delete_list", now_date) if temp: history_uid_list = json.loads(r_out.hget("decide_delete_list", now_date)) if history_uid_list != []: detail = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":history_uid_list}, _source=True)['docs'] for i in range(len(history_uid_list)): detail_info = [] for item in fields: if item == "topic_string": detail_info.append(','.join(detail[i]['_source'][item].split("&"))) elif item == "influence": detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_influence)*100)) elif item == "importance": detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_importance)*100)) elif item == "activeness": detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_activeness)*100)) else: detail_info.append(detail[i]['_source'][item]) return_list.append(detail_info) return json.dumps(return_list)
def compare_user_portrait(uid_list): user_portrait_result = {} index_name = 'user_portrait_1222' index_type = 'user' user_result = es.mget(index=index_name, doc_type=index_type, body={'ids': uid_list})['docs'] for item in user_result: uid = item['_id'] user_portrait_result[uid] = {} try: source = item['_source'] except: next try: psycho_status = json.loads(source['psycho_status']) except: pasycho_status = {} try: psycho_feature = json.loads(source['psycho_feature']) except: psycho_feature = {} try: activity_geo_dict = json.loads(source['activity_geo_dict']) sort_activity_geo = sorted(activity_geo_dict.items(), key=lambda x: x[1], reverse=True) activity_geo_list = sort_activity_geo[:2] activity_list = [] for item in activity_geo_list: city_list = item[0].split('\t') city = city_list[len(city_list) - 1] activity_list.append(city) except: activity_geo = [] try: hashtag_dict = json.loads(source['hashtag_dict']) except: hashtag_dict = {} user_portrait_result[uid] = { 'uname': source['uname'], 'gender': source['gender'], 'location': source['location'], 'importance': source['importance'], 'activeness': source['activeness'], 'influence': source['influence'], 'fansnum': source['fansnum'], 'statusnum': source['statusnum'], 'friendsnum': source['friendsnum'], 'domain': source['domain'], 'topic': json.loads(source['topic']), 'keywords': json.loads(source['keywords']), 'psycho_status': psycho_status, 'psycho_feature': psycho_feature, 'activity_geo': activity_list, 'hashtag_dict': hashtag_dict } #print 'user_portrait_result:', user_portrait_result return user_portrait_result
def show_out_uid(fields): out_list = [] recommend_dict = r_out.hgetall("recommend_delete_list") recommend_keys = recommend_dict.keys() for iter_key in recommend_keys: out_list.extend(json.loads(r_out.hget("recommend_delete_list",iter_key))) if not out_list: return out_list # no one is recommended to out top_influence = get_top_influence("influence") top_activeness = get_top_influence("activeness") top_importance = get_top_influence("importance") out_list = list(set(out_list)) return_list = [] detail = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":out_list}, _source=True)['docs'] # extract the return dict with the field '_source' filter_uid = all_delete_uid() for i in range(len(out_list)): if detail[i]['_source']['uid'] in filter_uid: continue detail_info = [] for item in fields: if item == "topic": detail_info.append(','.join(detail[i]['_source']['topic_string'].split("&"))) elif item == "influence": detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_influence)*100)) elif item == "importance": detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_importance)*100)) elif item == "activeness": detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_activeness)*100)) else: detail_info.append(detail[i]['_source'][item]) return_list.append(detail_info) return return_list
def search_group_sentiment_weibo(task_name, start_ts, sentiment): weibo_list = [] #step1:get task_name uid try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_name, _source=False, fields=['uid_list']) except: group_result = {} if group_result == {}: return 'task name invalid' try: uid_list = group_result['fields']['uid_list'] except: uid_list = [] if uid_list == []: return 'task uid list null' #step3: get ui2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids':uid_list}, _source=False, fields=['uname'])['docs'] except: user_portrait_result = [] for item in user_portrait_result: uid = item['_id'] if item['found'] == True: uname = item['fields']['uname'][0] uid2uname[uid] = uname #step4:iter date to search weibo weibo_list = [] iter_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + str(iter_date) #step4: get query_body if sentiment != '2': query_body = [{'terms': {'uid': uid_list}}, {'term':{'sentiment': sentiment}}, \ {'range':{'timestamp':{'gte':start_ts, 'lt': start_ts+DAY}}}] else: query_body = [{'terms':{'uid':uid_list}}, {'terms':{'sentiment': SENTIMENT_SECOND}},\ {'range':{'timestamp':{'gte':start_ts, 'lt':start_ts+DAY}}}] try: flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\ body={'query':{'bool':{'must': query_body}}, 'sort': [{'timestamp':{'order':'asc'}}], 'size': MAX_VALUE})['hits']['hits'] except: flow_text_result = [] for flow_text_item in flow_text_result: source = flow_text_item['_source'] weibo = {} weibo['uid'] = source['uid'] weibo['uname'] = uid2uname[weibo['uid']] weibo['ip'] = source['ip'] try: weibo['geo'] = '\t'.join(source['geo'].split('&')) except: weibo['geo'] = '' weibo['text'] = source['text'] weibo['timestamp'] = source['timestamp'] weibo['sentiment'] = source['sentiment'] weibo_list.append(weibo) return weibo_list
def get_vary_detail_info(vary_detail_dict, uid_list): results = {} #get uname try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids':uid_list})['docs'] except: user_portrait_result = [] uname_dict = {} for portrait_item in user_portrait_result: uid = portrait_item['_id'] if portrait_item['found']==True: uname = portrait_item['_source']['uname'] uname_dict[uid] = uname else: uname_dict[uid] = uid #get new vary detail information for vary_pattern in vary_detail_dict: user_info_list = vary_detail_dict[vary_pattern] new_pattern_list = [] for user_item in user_info_list: uid = user_item[0] uname= uname_dict[uid] start_date = ts2datetime(int(user_item[1])) end_date = ts2datetime(int(user_item[2])) new_pattern_list.append([uid, uname, start_date, end_date]) results[vary_pattern] = new_pattern_list return results
def search_history_delete(date): return_list = [] if not date: now_date = time.strftime('%Y%m%d',time.localtime(time.time())) elif date: now_date = date else: pass fields = ['uid','uname','domain','topic_string','influence','importance','activeness'] temp = r_out.hget("decide_delete_list", now_date) if temp: history_uid_list = json.loads(r_out.hget("decide_delete_list", now_date)) if history_uid_list != []: detail = es.mget(index="user_portrait", doc_type="user", body={"ids":history_uid_list}, _source=True)['docs'] for i in range(len(history_uid_list)): detail_info = [] for item in fields: if item == "topic_string": detail_info.append(','.join(detail[i]['_source'][item].split("&"))) else: detail_info.append(detail[i]['_source'][item]) return_list.append(detail_info) return json.dumps(return_list)
def get_group_list(task_name, submit_user): results = [] task_id = submit_user + '-' + task_name try: es_results = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: return results uid_list = es_results['uid_list'] user_portrait_attribute = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list})['docs'] evaluate_max = get_evaluate_max() for item in user_portrait_attribute: uid = item['_id'] try: source = item['_source'] uname = source['uname'] gender = source['gender'] location = source['location'] importance = source['importance'] normal_importance = math.log(importance / evaluate_max['importance'] * 9 + 1, 10) * 100 influence = source['influence'] normal_influence = math.log(influence / evaluate_max['influence'] * 9 + 1, 10) * 100 results.append([uid, uname, gender, location, normal_importance, normal_influence]) except: results.append([uid, '', '', '', '', '']) return results
def get_group_member_name(task_name, submit_user): results = [] task_id = submit_user + '-' + task_name #print es_group_result,group_index_name,group_index_type try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id)['_source'] except: return results uid_list = group_result['uid_list'] print len(uid_list) try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\ body={'ids':uid_list})['docs'] except: return results print len(user_portrait_result) for item in user_portrait_result: uid = item['_id'] if item['found'] == True: source = item['_source'] uname = source['uname'] else: uname = 'unknown' #results[uid] = uname dic = {} dic['ID'] = uid dic['name'] = uname results.append(dic) return results
def show_out_uid(fields): out_list = [] recommend_dict = r_out.hgetall("recommend_delete_list") recommend_keys = recommend_dict.keys() for iter_key in recommend_keys: out_list.extend(json.loads(r_out.hget("recommend_delete_list",iter_key))) if not out_list: return out_list # no one is recommended to out return_list = [] detail = es.mget(index="user_portrait", doc_type="user", body={"ids":out_list}, _source=True)['docs'] # extract the return dict with the field '_source' filter_uid = all_delete_uid() for i in range(len(out_list)): if detail[i]['_source']['uid'] in filter_uid: continue detail_info = [] for item in fields: if item == "topic": detail_info.append(','.join(detail[i]['_source']['topic_string'].split("&"))) else: detail_info.append(detail[i]['_source'][item]) return_list.append(detail_info) return return_list
def submit_identify_in_uname(input_data): date = input_data['date'] submit_user = input_data['user'] upload_data = input_data['upload_data'] # get uname list from upload data uname_list = upload_data.split('\n') uid_list = [] #step1: get uid list from uname profile_exist_result = es_user_profile.search( index=profile_index_name, doc_type=profile_index_type, body={'query': { 'terms': { 'nick_name': uname_list } }}, _source=False)['hits']['hits'] for profile_item in profile_exist_result: uid = profile_item['_id'] uid_list.append(uid) if not uid_list: return 'uname list valid' #step2: filter user not in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs'] new_uid_list = [ exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found'] == False ] if not new_uid_list: return 'uname list all in' #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_list = list(new_uid_set - compute_set) if not in_uid_list: return 'uname list all in' #step3: save submit hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set( r.hkeys(hashname_sensitive)) for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system': '0', 'operation': submit_user} r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') return True
def get_vary_detail_info(vary_detail_dict, uid_list): results = {} #get uname try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids':uid_list})['docs'] except: user_portrait_result = [] uname_dict = {} for portrait_item in user_portrait_result: uid = portrait_item['_id'] if portrait_item['found'] == True: uname = portrait_item['_source']['uname'] uname_dict[uid] = uname else: uname_dict[uid] = uid #get new vary detail information for vary_pattern in vary_detail_dict: user_info_list = vary_detail_dict[vary_pattern] new_pattern_list = [] for user_item in user_info_list: uid = user_item[0] uname = uname_dict[uid] start_date = ts2datetime(int(user_item[1])) end_date = ts2datetime(int(user_item[2])) new_pattern_list.append([uid, uname, start_date, end_date]) results[vary_pattern] = new_pattern_list return results
def get_group_list(task_name): results = [] try: es_results = es.get(index=index_name, doc_type=index_type, id=task_name)['_source'] except: return results #print 'es_result:', es_results['uid_list'], type(es_results['uid_list']) uid_list = es_results['uid_list'] user_portrait_attribute = es.mget(index='user_portrait', doc_type='user', body={'ids':uid_list})['docs'] evaluate_max = get_evaluate_max() for item in user_portrait_attribute: uid = item['_id'] try: source = item['_source'] uname = source['uname'] gender = source['gender'] location = source['location'] importance = source['importance'] normal_importance = math.log(importance / evaluate_max['importance'] * 9 + 1, 10) * 100 influence = source['influence'] normal_influence = math.log(influence / evaluate_max['influence'] * 9 + 1, 10) * 100 results.append([uid, uname, gender, location, normal_importance, normal_influence]) except: results.append([uid]) return results
def search_group_sentiment_weibo(task_name, start_ts, sentiment): weibo_list = [] #step1:get task_name uid try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_name, _source=False, fields=['uid_list']) except: group_result = {} if group_result == {}: return 'task name invalid' try: uid_list = group_result['fields']['uid_list'] except: uid_list = [] if uid_list == []: return 'task uid list null' #step3: get ui2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids':uid_list}, _source=False, fields=['uname'])['docs'] except: user_portrait_result = [] for item in user_portrait_result: uid = item['_id'] if item['found']==True: uname = item['fields']['uname'][0] uid2uname[uid] = uname #step4:iter date to search weibo weibo_list = [] iter_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + str(iter_date) #step4: get query_body if sentiment != '2': query_body = [{'terms': {'uid': uid_list}}, {'term':{'sentiment': sentiment}}, \ {'range':{'timestamp':{'gte':start_ts, 'lt': start_ts+DAY}}}] else: query_body = [{'terms':{'uid':uid_list}}, {'terms':{'sentiment': SENTIMENT_SECOND}},\ {'range':{'timestamp':{'gte':start_ts, 'lt':start_ts+DAY}}}] try: flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\ body={'query':{'bool':{'must': query_body}}, 'sort': [{'timestamp':{'order':'asc'}}], 'size': MAX_VALUE})['hits']['hits'] except: flow_text_result = [] for flow_text_item in flow_text_result: source = flow_text_item['_source'] weibo = {} weibo['uid'] = source['uid'] weibo['uname'] = uid2uname[weibo['uid']] weibo['ip'] = source['ip'] try: weibo['geo'] = '\t'.join(source['geo'].split('&')) except: weibo['geo'] = '' weibo['text'] = source['text'] weibo['timestamp'] = source['timestamp'] weibo['sentiment'] = source['sentiment'] weibo_list.append(weibo) return weibo_list
def ajax_get_group_detail(): task_name = request.args.get('task_name','') # task_name user = request.args.get('user', '') _id = user + '-' + task_name portrait_detail = [] top_activeness = get_top_influence("activeness") top_influence = get_top_influence("influence") top_importance = get_top_influence("importance") search_result = es.get(index=index_group_manage, doc_type=doc_type_group, id=_id).get('_source', {}) if search_result: try: uid_list = json.loads(search_result['uid_list']) except: uid_list = search_result['uid_list'] if uid_list: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list}, fields=SOCIAL_SENSOR_INFO)['docs'] for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["fields"][iter_item][0].split('&')) temp.append(item["fields"][iter_item][0].split('&')) elif iter_item == "activeness": temp.append(math.ceil(item["fields"][iter_item][0]/float(top_activeness)*100)) elif iter_item == "importance": temp.append(math.ceil(item["fields"][iter_item][0]/float(top_importance)*100)) elif iter_item == "influence": temp.append(math.ceil(item["fields"][iter_item][0]/float(top_influence)*100)) else: temp.append(item["fields"][iter_item][0]) portrait_detail.append(temp) return json.dumps(portrait_detail)
def get_group_list(task_name, submit_user): results = [] task_id = submit_user + '-' + task_name try: es_results = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: return results uid_list = es_results['uid_list'] user_portrait_attribute = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list})['docs'] evaluate_max = get_evaluate_max() for item in user_portrait_attribute: uid = item['_id'] try: source = item['_source'] uname = source['uname'] gender = source['gender'] location = source['location'] importance = source['importance'] normal_importance = math.log(importance / evaluate_max['importance'] * 9 + 1, 10) * 100 influence = source['influence'] normal_influence = math.log(influence / evaluate_max['influence'] * 9 + 1, 10) * 100 activeness = source['activeness'] normal_activeness = math.log(activeness / evaluate_max['activeness']* 9 + 1, 10) * 100 sensitive = source['sensitive'] normal_sensitive = math.log(sensitive/ evaluate_max['sensitive'] * 9 + 1, 10) * 100 results.append([uid, uname, gender, location, normal_importance, normal_influence, normal_activeness, normal_sensitive]) except: results.append([uid, '', '', '', '', '', '', '']) return results
def compare_user_portrait_new(uid_list): try: user_portrait_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids':uid_list})['docs'] except: user_portrait_result = [] if user_portrait_result == []: return 'uid_list not exist' #get max evaluate: max_result = get_evaluate_max() user_result = {} #get user psycho status from flow_text user_psycho_status_result = get_psycho_status(uid_list) #iter to get user attr for item in user_portrait_result: if item['found'] != True: return 'uid_list not exist' uid = item['_id'] user_result[uid] = {} source = item['_source'] #attr: uname user_result[uid]['uname'] = source['uname'] #attr: location user_result[uid]['location'] = source['location'] #attr: evaluate index importance = source['importance'] normal_importance = math.log(importance/ max_result['importance'] * 9 + 1, 10) user_result[uid]['importance'] = int(normal_importance * 100) influence = source['influence'] normal_influence = math.log(influence / max_result['influence'] * 9 + 1, 10) user_result[uid]['influence'] = int(normal_influence * 100) activeness = source['activeness'] normal_activeness = math.log(activeness / max_result['activeness'] * 9 + 1, 10) user_result[uid]['activeness'] = int(normal_activeness * 100) #attr: domain user_result[uid]['domain'] = source['domain'] #attr: topic topic_string = source['topic_string'] user_result[uid]['topic'] = topic_string.split('&') #attr: activity geo dict activity_geo_dict_list = json.loads(source['activity_geo_dict']) week_activity_geo_list = activity_geo_dict_list[-7:] week_geo_result = {} for day_geo_dict in week_activity_geo_list: for geo_item in day_geo_dict: try: week_geo_result[geo_item] += 1 except: week_geo_result[geo_item] = 1 sort_week_geo_result = sorted(week_geo_result.items(), key=lambda x:x[1], reverse=True) user_result[uid]['activity_geo'] = [geo_item[0] for geo_item in sort_week_geo_result[:2]] #attr: keywords user_result[uid]['keywords'] = json.loads(source['keywords']) #attr: hashtag user_result[uid]['hashtag'] = json.loads(source['hashtag_dict']) #attr: psycho status user_result[uid]['psycho_status'] = user_psycho_status_result[uid] return user_result
def search_portrait_user(es, number, active_index, active_type, portrait_index, portrait_type, field="user_index"): return_list = [] index_exist = es.indices.exists(index=active_index) if not index_exist: return "no active_index exist" sys.exit(0) count_s = 0 count_c = 0 start = 0 rank = 1 while 1: search_list = [] user_list = search_k(es, active_index, active_type, start, field, 100) start += 100 for item in user_list: if field == "vary": uid = item.get('uid', '0') # obtain uid, notice "uid" or "user" else: uid = item.get('user', '0') search_list.append(uid) # uid list search_result = es_portrait.mget(index=portrait_index, doc_type=portrait_type, body={"ids": search_list}, _source=True)["docs"] profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"] for item in search_result: if item["found"]: info = ['', '', '', '', '', ''] info[0] = rank index = search_result.index(item) if profile_result[index]['found']: info[1] = profile_result[index]['_source'].get( 'photo_url', '') info[3] = profile_result[index]['_source'].get( 'nick_name', '') info[2] = search_result[index].get('_id', '') info[4] = user_list[index][field] info[5] = "1" return_list.append(info) rank += 1 count_c += 1 if count_c >= int(number): return return_list
def search_portrait_user_in_activity(es, number, active_index, active_type, portrait_index, portrait_type, field="user_index"): return_list = [] index_exist = es.indices.exists(index=active_index) if not index_exist: return "no active_index exist" sys.exit(0) count_s = 0 count_c = 0 start = 0 rank = 1 while 1: search_list = [] user_list = search_k(es, active_index, active_type, start, field, 100) start += 100 for item in user_list: if field == "vary": uid = item.get('uid', '0') # obtain uid, notice "uid" or "user" else: uid = item.get('user', '0') search_list.append(uid) # uid list search_result = es_portrait.mget(index=portrait_index, doc_type=portrait_type, body={"ids": search_list}, _source=True)["docs"] profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"] key_list = ["origin_weibo_retweeted_total_number", "origin_weibo_retweeted_average_number", "origin_weibo_retweeted_top_number", "origin_weibo_retweeted_brust_average", \ "origin_weibo_comment_total_number", "origin_weibo_comment_average_number", "origin_weibo_comment_top_number", "origin_weibo_retweeted_brust_average", \ "retweeted_weibo_retweeted_total_number", "retweeted_weibo_retweeted_average_number", "retweeted_weibo_retweeted_top_number", "retweeted_weibo_retweeted_brust_average", \ "retweeted_weibo_comment_total_number", "retweeted_weibo_comment_average_number", "retweeted_weibo_comment_top_number", "retweeted_weibo_retweeted_brust_average"] for item in search_result: if item["found"]: info = ['','','','','',''] info[0] = rank index = search_result.index(item) if profile_result[index]['found']: info[1] = profile_result[index]['_source'].get('photo_url','') info[3] = profile_result[index]['_source'].get('nick_name','') info[2] = search_result[index].get('_id','') info[4] = user_list[index]['user_index'] info[5] = "1" if field == 'origin_weibo_retweeted_brust_average': info.append(user_list[index]['origin_weibo_retweeted_brust_average']) for key in key_list: info.append(user_list[index][key]) elif field == 'origin_weibo_comment_brust_average': info.append(user_list[index]['origin_weibo_comment_brust_average']) for key in key_list: info.append(user_list[index][key]) else: pass return_list.append(info) rank += 1 count_c += 1 if count_c >= int(number): return return_list
def compare_user_portrait(uid_list): user_portrait_result = {} index_name = 'user_portrait' index_type = 'user' user_result = es.mget(index=index_name, doc_type=index_type, body={'ids':uid_list})['docs'] #user_portrait_result = [item['_source'] for item in user_result] #print 'user_result:', user_portrait_result for item in user_result: uid = item['_id'] user_portrait_result[uid] = {} try: source = item['_source'] except: next try: psycho_status = json.loads(source['psycho_status']) except: pasycho_status = {} try: psycho_feature = json.loads(source['psycho_feature']) except: psycho_feature = {} try: activity_geo_dict = json.loads(source['activity_geo_dict']) sort_activity_geo = sorted(activity_geo_dict.items(), key=lambda x:x[1], reverse=True) activity_geo_list = sort_activity_geo[:2] activity_list = [] for item in activity_geo_list: city_list = item[0].split('\t') city = city_list[len(city_list)-1] activity_list.append(city) except: activity_geo = [] try: hashtag_dict = json.loads(source['hashtag_dict']) except: hashtag_dict = {} user_portrait_result[uid] = { 'uname': source['uname'], 'gender': source['gender'], 'location': source['location'], 'importance': source['importance'], 'activeness': source['activeness'], 'influence': source['influence'], 'fansnum':source['fansnum'], 'statusnum':source['statusnum'], 'friendsnum': source['friendsnum'], 'domain': source['domain'], 'topic': json.loads(source['topic']), 'keywords': json.loads(source['keywords']), 'psycho_status': psycho_status, 'psycho_feature': psycho_feature, 'activity_geo': activity_list, 'hashtag_dict': hashtag_dict } #print 'user_portrait_result:', user_portrait_result return user_portrait_result
def submit_identify_in_uname(input_data): date = input_data['date'] submit_user = input_data['user'] operation_type = input_data['operation_type'] upload_data = input_data['upload_data'] # get uname list from upload data uname_list_pre = upload_data.split('\n') uname_list = [item.split('\r')[0] for item in uname_list_pre] uid_list = [] have_in_user_list = [] invalid_user_list = [] valid_uname_list = [] #step1: get uid list from uname profile_exist_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body={'query':{'terms':{'nick_name': uname_list}}}, _source=False, fields=['nick_name'])['hits']['hits'] for profile_item in profile_exist_result: uid = profile_item['_id'] uid_list.append(uid) uname = profile_item['fields']['nick_name'][0] valid_uname_list.append(uname) invalid_user_list = list(set(uname_list) - set(valid_uname_list)) if len(invalid_user_list) != 0: return False, 'invalid user info', invalid_user_list #step2: filter user not in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs'] new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False] have_in_user_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==True] if not new_uid_list: return False, 'all user in' #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_list = list(new_uid_set - compute_set) if not in_uid_list: return False, 'all user in' #step3: save submit hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) #identify final submit user list final_submit_user_list = [] for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system':'0', 'operation': submit_user} if operation_type == 'submit': r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') final_submit_user_list.append(in_item) return True, invalid_user_list, have_in_user_list, final_submit_user_list
def show_keywords_rank(task_id, sort_type, count): try: task_found = es_network_task.get(index=network_keywords_index_name, \ doc_type=network_keywords_index_type, id=task_id)['_source'] except: task_found = {} return task_found search_results = json.loads(task_found['results']) sort_results = search_results[sort_type] results = [] uid_list = [] sort_list = [] for source_uid, sort_value in sort_results: uid_list.append(source_uid) sort_list.append(sort_value) # 查看背景信息 if uid_list: profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list})["docs"] for item in profile_result: _id = item['_id'] index = profile_result.index(item) tmp = [] if item['found']: item = item['_source'] tmp.append(item['uid']) tmp.append(item['nick_name']) tmp.append(item['user_location']) else: tmp.extend([_id,'','']) value = sort_list[index] tmp.append(value) results.append(tmp) if uid_list: count = 0 history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list})["docs"] for item in history_result: if item['found']: item = item['_source'] results[count].extend([item['user_fansnum'], item['weibo_month_sum']]) else: results[count].extend(['','']) count += 1 if uid_list: count = 0 portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list})["docs"] for item in portrait_result: if item['found']: results[count].append("1") else: results[count].append("0") count += 1 return results
def search_top_index(index_name, top_k=1, index_type="bci", top=False, sort_order="user_index"): query_body = { "query": { "match_all": {} }, "size": top_k, "sort": [{sort_order: {"order": "desc"}}] } if top: result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'][0]['_source'][sort_order] else: search_result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] uid_list = [] for item in search_result: uid_list.append(item['_id']) profile_result = es_profile.mget(index="weibo_user",doc_type="user", body={"ids":uid_list}, _source=True)['docs'] portrait_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids":uid_list}, _source=True)['docs'] result = [] rank = 1 for i in range(len(search_result)): info = ['','','',''] info[0] = rank if profile_result[i]['found']: info[1] = profile_result[i]['_source'].get('photo_url','') info[3] = profile_result[i]['_source'].get('nick_name','') info[2] = search_result[i].get('_id','') if sort_order in ["user_index","origin_weibo_retweeted_brust_average","origin_weibo_comment_brust_average"]: info.append(search_result[i]['_source'][sort_order]) if portrait_result[i]['found']: info.append("1") else: info.append("0") elif sort_order == "origin_weibo_retweeted_top_number": info.append(search_result[i]['_source']['origin_weibo_retweeted_top_number']) mid = search_result[i]['_source']['origin_weibo_top_retweeted_id'] info.append(weiboinfo2url(info[2],mid)) if portrait_result[i]['found']: info.append("1") else: info.append("0") elif sort_order == "origin_weibo_comment_top_number": info.append(search_result[i]['_source']['origin_weibo_comment_top_number']) mid = search_result[i]['_source']['origin_weibo_top_comment_id'] info.append(weiboinfo2url(info[2],mid)) if portrait_result[i]['found']: info.append("1") else: info.append("0") rank += 1 result.append(info) return result
def show_daily_rank(period, sort_type, count): index_name = 'user_portrait_network' index_type = 'network' if (len(sort_type.split('_')) > 1): sort = 'rank_' + sort_type + '_' + str(period) #pr_0 else: sort = sort_type + '_' + str(period) #pr_0 query_body = { 'sort':[{sort:{'order': 'desc'}}], 'size': count } try: search_results = es_network_task.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] except: search_results = [] results = [] uid_list = [] sort_list = [] for item in search_results: source = item['_source'] if sort in source: uid_list.append(source['uid']) sort_list.append(source[sort]) # 查看背景信息 if uid_list: profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list})["docs"] for item in profile_result: _id = item['_id'] index = profile_result.index(item) tmp = [] if item['found']: item = item['_source'] tmp.append(item['uid']) tmp.append(item['nick_name']) tmp.append(item['statusnum']) tmp.append(item['user_location']) tmp.append(item['fansnum']) else: tmp.extend([_id,'','','','']) value = sort_list[index] tmp.append(value) results.append(tmp) if uid_list: count = 0 portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list})["docs"] for item in portrait_result: if item['found']: results[count].append("1") else: results[count].append("0") count += 1 return results
def ajax_get_task_detail_info(): task_name = request.args.get('task_name','') # task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=task_name)['_source'] task_detail["social_sensors"] = json.loads(task_detail["social_sensors"]) task_detail['keywords'] = json.loads(task_detail['keywords']) task_detail["sensitive_words"]= json.loads(task_detail["sensitive_words"]) history_status = json.loads(task_detail['history_status']) if history_status: temp_list = [] temp_list.append(history_status[-1]) for item in history_status[:-1]: if int(item[-1]) != 0: temp_list.append(item) sorted_list = sorted(temp_list, key=lambda x:x[0], reverse=True) task_detail['history_status'] = sorted_list else: task_detail['history_status'] = history_status task_detail['social_sensors_portrait'] = [] portrait_detail = [] if task_detail["social_sensors"]: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": task_detail["social_sensors"]})['docs'] if search_results: for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["_source"][iter_item].split('&')) elif iter_item == "influence": top_influence = get_top_influence("influence") influence = math.log(item["_source"][iter_item]/top_influence*9+1, 10)*100 if not influence: influence = 0 temp.append(influence) elif iter_item == "importance": top_importance = get_top_influence("importance") importance = math.log(item["_source"][iter_item]/top_importance*9+1, 10)*100 if not importance: importance = 0 temp.append(importance) elif iter_item == "activeness": top_activeness = get_top_influence("activeness") activeness = math.log(item["_source"][iter_item]/top_activeness*9+1, 10)*100 if not activeness: activeness = 0 temp.append(activeness) else: temp.append(item["_source"][iter_item]) portrait_detail.append(temp) if portrait_detail: portrait_detail = sorted(portrait_detail, key=lambda x:x[5], reverse=True) task_detail['social_sensors_portrait'] = portrait_detail #print task_detail return json.dumps(task_detail)
def submit_identify_in_url(input_data): date = input_data['date'] submit_user = input_data['user'] operation_type = input_data['operation_type'] upload_data = input_data['upload_data'] #step1: get uid list from input_data url url_list_pre = upload_data.split('\n') url_list = [item.split('\r')[0] for item in url_list_pre] uid_list = [] invalid_uid_list = [] have_in_uid_list = [] for url_item in url_list: try: #url_item = 'http://weibo.com/p/1002065727942146/album?.....' url_list = url_item.split('/') uid = url_list[4][-10:] uid_list.append(uid) except: invalid_uid_list.append(url_item) if len(invalid_uid_list)!=0: return False, 'invalid user info', invalid_uid_list #step2: identify uid list is not exist in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=True)['docs'] new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False] have_in_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==True] #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_list = list(new_uid_set - compute_set) if len(in_uid_list)==0: return False, 'all user in' #step3: save hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) #identify the final submit user final_submit_user_list = [] for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system': '0', 'operation': submit_user} if operation_type == 'submit': r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') final_submit_user_list.append(in_item) return True, invalid_uid_list, have_in_uid_list, final_submit_user_list
def get_activity_weibo(task_name, start_ts): results = [] #step1: get task_name uid try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type ,\ id=task_name, _source=False, fields=['uid_list']) except: group_result = {} if group_result == {}: return 'task name invalid' try: uid_list = group_result['fields']['uid_list'] except: uid_list = [] if uid_list == []: return 'task uid list null' #step2: get uid2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body = {'ids':uid_list}, _source=False, fields=['uname'])['docs'] except: user_portrait_result = [] for item in user_portrait_result: uid = item['_id'] if item['found']==True: uname = item['fields']['uname'][0] uid2uname[uid] = uname #step3: search time_segment weibo time_segment = FOUR_HOUR end_ts = start_ts + time_segment time_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + time_date query = [] query.append({'terms':{'uid': uid_list}}) query.append({'range':{'timestamp':{'gte':start_ts, 'lt':end_ts}}}) try: flow_text_es_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, \ body={'query':{'bool':{'must':query}}, 'sort':'timestamp', 'size':MAX_VALUE})['hits']['hits'] except: flow_text_es_result = [] for item in flow_text_es_result: weibo = {} source = item['_source'] weibo['timestamp'] = ts2date(source['timestamp']) weibo['ip'] = source['ip'] weibo['text'] = source['text'] if source['geo']: weibo['geo'] = '\t'.join(source['geo']) else: weibo['geo'] = '' results.append(weibo) return results
def get_activity_weibo(task_name, start_ts): results = [] #step1: get task_name uid try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type ,\ id=task_name, _source=False, fields=['uid_list']) except: group_result = {} if group_result == {}: return 'task name invalid' try: uid_list = group_result['fields']['uid_list'] except: uid_list = [] if uid_list == []: return 'task uid list null' #step2: get uid2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body = {'ids':uid_list}, _source=False, fields=['uname'])['docs'] except: user_portrait_result = [] for item in user_portrait_result: uid = item['_id'] if item['found'] == True: uname = item['fields']['uname'][0] uid2uname[uid] = uname #step3: search time_segment weibo time_segment = FOUR_HOUR end_ts = start_ts + time_segment time_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + time_date query = [] query.append({'terms': {'uid': uid_list}}) query.append({'range': {'timestamp': {'gte': start_ts, 'lt': end_ts}}}) try: flow_text_es_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, \ body={'query':{'bool':{'must':query}}, 'sort':'timestamp', 'size':MAX_VALUE})['hits']['hits'] except: flow_text_es_result = [] for item in flow_text_es_result: weibo = {} source = item['_source'] weibo['timestamp'] = ts2date(source['timestamp']) weibo['ip'] = source['ip'] weibo['text'] = source['text'] if source['geo']: weibo['geo'] = '\t'.join(source['geo']) else: weibo['geo'] = '' results.append(weibo) return results
def show_all_out(): delete_dict = r_out.hgetall('decide_delete_list') delete_keys_list = delete_dict.keys() recommend_out_list = [] for iter_key in delete_keys_list: try: temp = json.loads(r_out.hget('decide_delete_list', iter_key)) except: temp = [] recommend_out_list.extend(temp) recommend_out_list = list(set(recommend_out_list)) #print recommend_out_list top_influence = get_top_influence("influence") top_activeness = get_top_influence("activeness") top_importance = get_top_influence("importance") return_list = [] fields = [ 'uid', 'uname', 'domain', 'topic_string', 'influence', 'importance', 'activeness' ] if recommend_out_list: detail = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": recommend_out_list}, _source=True)['docs'] for i in range(len(detail)): detail_info = [] if detail[i]['found']: for item in fields: if item == "topic_string": detail_info.append(','.join( detail[i]['_source'][item].split('&'))) elif item == "influence": detail_info.append( math.ceil(detail[i]["_source"][item] / float(top_influence) * 100)) elif item == "importance": detail_info.append( math.ceil(detail[i]["_source"][item] / float(top_importance) * 100)) elif item == "activeness": detail_info.append( math.ceil(detail[i]["_source"][item] / float(top_activeness) * 100)) else: detail_info.append(detail[i]['_source'][item]) else: detail_info = [detail[i]['_id'], [], [], [], [], [], []] return_list.append(detail_info) return json.dumps(return_list)
def get_social_inter_content(uid1, uid2, type_mark): weibo_list = [] #get two type relation about uid1 and uid2 #search weibo list now_ts = int(time.time()) #run_type if RUN_TYPE == 1: now_date_ts = datetime2ts(ts2datetime(now_ts)) else: now_date_ts = datetime2ts(RUN_TEST_TIME) #uid2uname uid2uname = {} try: portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\ body={'ids': [uid1, uid2]}, _source=False, fields=['uid', 'uname'])['docs'] except: portrait_result = [] for item in portrait_result: uid = item['_id'] if item['found'] == True: uname = item['fields']['uname'][0] uid2uname[uid] = uname else: uid2uname[uid] = 'unknown' #iter date to search weibo list for i in range(7, 0, -1): iter_date_ts = now_date_ts - i*DAY iter_date = ts2datetime(iter_date_ts) flow_text_index_name = flow_text_index_name_pre + str(iter_date) query = [] query.append({'bool':{'must':[{'term':{'uid':uid1}}, {'term':{'directed_uid': int(uid2)}}]}}) if type_mark=='out': query.append({'bool':{'must':[{'term':{'uid':uid2}}, {'term':{'directed_uid': int(uid1)}}]}}) try: flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\ body={'query': {'bool':{'should': query}}, 'sort':[{'timestamp':{'order': 'asc'}}], 'size':MAX_VALUE})['hits']['hits'] except: flow_text_result = [] for flow_text in flow_text_result: source = flow_text['_source'] weibo = {} weibo['timestamp'] = source['timestamp'] weibo['ip'] = source['ip'] weibo['geo'] = source['geo'] weibo['text'] = '\t'.join(source['text'].split('&')) weibo['uid'] = source['uid'] weibo['uname'] = uid2uname[weibo['uid']] weibo['directed_uid'] = str(source['directed_uid']) weibo['directed_uname'] = uid2uname[str(source['directed_uid'])] weibo_list.append(weibo) return weibo_list
def search_max_single_field(field, index_name, doctype, top_k=3): # field = "origin_weibo_retweeted_top_number", "origin_weibo_comment_top_number" query_body = { "query": { "match_all": {} }, "sort": [{field: {"order": "desc"}}], "size": top_k } return_list = [] rank = 1 count_c = 0 start = 0 while 1: search_list = [] user_list = search_k(es, index_name, doctype, start, field, 100) start += 100 for item in user_list: uid = item.get('user','0') search_list.append(uid) # uid list search_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids": search_list}, _source=True)["docs"] profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"] for i in range(len(search_result)): if search_result[i]['found']: info = ['','','','','','','1'] info[0] = rank info[2] = search_result[i].get('_id','') if profile_result[i]['found']: info[1] = profile_result[i]['_source'].get('photo_url','') info[3] = profile_result[i]['_source'].get('nick_name','') if 'retweeted' in field: temp_mid = user_list[i]['origin_weibo_top_retweeted_id'] info[5] = weiboinfo2url(info[2], temp_mid) info[4] = user_list[i]['origin_weibo_retweeted_top_number'] else: temp_mid = user_list[i]['origin_weibo_top_comment_id'] info[5] = weiboinfo2url(info[2], temp_mid) info[4] = user_list[i]['origin_weibo_comment_top_number'] rank += 1 return_list.append(info) if rank >= int(top_k)+1: return return_list
def search_tag(es, number, active_index, active_type, portrait_index, portrait_type, tag): #field_dict = {"domain":"art"} return_list = [] count_s = 0 count_c = 0 start = 0 rank = 1 while 1: search_list = [] user_list = search_k(es, active_index, active_type, start, "user_index", 10000) start += 10000 for item in user_list: uid = item.get('user', '0') search_list.append(uid) # uid list search_result = es_portrait.mget(index=portrait_index, doc_type=portrait_type, body={"ids": search_list}, _source=True)["docs"] profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"] for item in search_result: count_s += 1 if item['found'] and tag in item['_source']['domain']: info = ['', '', '', '', '', '', ''] info[0] = rank index = search_result.index(item) if profile_result[index]['found']: info[1] = profile_result[index]['_source'].get( 'photo_url', '') info[3] = profile_result[index]['_source'].get( 'nick_name', '') info[2] = search_result[index].get('_id', '') info[4] = user_list[index]['user_index'] info[5] = search_result[index]['_source'].get('activeness', '') info[6] = search_result[index]['_source'].get('importance', '') rank += 1 return_list.append(info) if rank >= int(number) + 1: return return_list if count_s > 100000: return return_list
def delete_group_results(task_name, submit_user): task_id = submit_user + '-' + task_name #step1: get group uid list try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id)['_source'] except: return False uid_list = group_result['uid_list'] #step2: update group_tag in user_portrait query_body = {'query': {'term': {'group': task_id}}} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids': uid_list})['docs'] except: user_portrait_result = [] bulk_action = [] for item in user_portrait_result: uid = item['_id'] if item['found'] == True: try: source = item['_source'] except: source = {} try: group_tag = source['group'] except: group_tag = '' if group_tag != '': new_group_tag_list = [] group_tag_list = group_tag.split('&') for group_tag_item in group_tag_list: if group_tag_item != task_id and group_tag_item != '[email protected]': new_group_tag_list.append(group_tag_item) new_group_tag = '&'.join(new_group_tag_list) else: new_group_tag = '' action = {'update': {'_id': uid}} bulk_action.extend([action, {'doc': {'group': new_group_tag}}]) if bulk_action: print 'bulk_action:', bulk_action es_user_portrait.bulk(bulk_action, index=portrait_index_name, doc_type=portrait_index_type) #step3: delete group results in group_manage try: print 'yes delete' result = es.delete(index=index_name, doc_type=index_type, id=task_id) except: return False return True
def show_detect_result(task_name, submit_user): user_result = [] #step1:identify the task name id exist task_id = submit_user + '-' + task_name try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: task_exist_result = {} if task_exist_result == {}: return 'task name is not exist' #step2:get uid list uid_list = json.loads(task_exist_result['uid_list']) #step3:get user evaluation information---uid/uname/activeness/importance/influence iter_count = 0 uid_count = len(uid_list) while iter_count < uid_count: iter_user_list = uid_list[iter_count:iter_count + DETECT_ITER_COUNT] try: portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body={'ids':iter_user_list}, _source=True)['docs'] except: portrait_result = [] for item in portrait_result: uid = item['_id'] if item['found'] == True: source = item['_source'] uname = source['uname'] evaluate_max = get_evaluate_max() activeness = math.log( source['activeness'] / evaluate_max['activeness'] * 9 + 1, 10) * 100 importance = math.log( source['importance'] / evaluate_max['importance'] * 9 + 1, 10) * 100 influence = math.log( source['influence'] / evaluate_max['influence'] * 9 + 1, 10) * 100 else: uname = u'未知' activeness = u'未知' importance = u'未知' influence = u'未知' user_result.append([uid, uname, activeness, importance, influence]) iter_count += DETECT_ITER_COUNT sort_user_result = sorted(user_result, key=lambda x: x[4], reverse=True) return sort_user_result
def identify_user_out(input_uid_list): out_user_list = [] in_user_list = [] input_len = len(input_uid_list) iter_count = 0 print 'identify user out' #get user list who is out user_portrait while iter_count < input_len: iter_user_list = input_uid_list[iter_count: iter_count+DETECT_ITER_COUNT] try: portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':iter_user_list}, _source=False)['docs'] except: portrait_result = [] for item in portrait_result: uid = item['_id'] if item['found'] != True: out_user_list.append(uid) else: in_user_list.append(uid) iter_count += DETECT_ITER_COUNT print 'get out user portrait information' #get user profile information for out user_portrait iter_count = 0 out_user_count = len(out_user_list) out_user_result = [] while iter_count < out_user_count: iter_user_list = out_user_list[iter_count: iter_count+DETECT_ITER_COUNT] try: profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':iter_user_list}, _source=True)['docs'] except: profile_result = [] for item in profile_result: uid = item['_id'] if item['found']==True: source = item['_source'] uname = source['nick_name'] fansnum = source['fansnum'] statusnum = source['statusnum'] friendsnum = source['friendsnum'] else: uname = u'未知' fansnum = u'未知' statusnum = u'未知' friendsnum = u'未知' out_user_result.append([uid, uname, fansnum, statusnum, friendsnum]) iter_count += DETECT_ITER_COUNT sort_out_user_result = sorted(out_user_result, key=lambda x:x[2], reverse=True) return in_user_list, sort_out_user_result
def get_sensor_detail(task_name, ts, user): index_name = task_name _id = user + "-" + task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)["_source"] social_sensors = json.loads(task_detail['social_sensors']) portrait_detail = [] top_importance = get_top_influence("importance") top_influence = get_top_influence("influence") top_activeness = get_top_influence("activeness") if social_sensors: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": social_sensors}, fields=SOCIAL_SENSOR_INFO)['docs'] for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["fields"][iter_item][0].split('&')) elif iter_item == "activeness": temp.append( math.log( item['fields']['activeness'][0] / float(top_activeness) * 9 + 1, 10) * 100) elif iter_item == "importance": temp.append( math.log( item['fields']['importance'][0] / float(top_importance) * 9 + 1, 10) * 100) elif iter_item == "influence": temp.append( math.log( item['fields']['influence'][0] / float(top_influence) * 9 + 1, 10) * 100) else: temp.append(item["fields"][iter_item][0]) portrait_detail.append(temp) portrait_detail = sorted(portrait_detail, key=lambda x: x[5], reverse=True) else: portrait_detail = [] return portrait_detail
def delete_group_results(task_name, submit_user): task_id = submit_user + '-' + task_name #step1: get group uid list try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id)['_source'] except: return False uid_list = group_result['uid_list'] #step2: update group_tag in user_portrait query_body = {'query':{'term':{'group': task_id}}} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids': uid_list})['docs'] except: user_portrait_result = [] bulk_action = [] for item in user_portrait_result: uid = item['_id'] if item['found'] == True: try: source = item['_source'] except: source = {} try: group_tag = source['group'] except: group_tag = '' if group_tag != '': new_group_tag_list = [] group_tag_list = group_tag.split('&') for group_tag_item in group_tag_list: if group_tag_item != task_id and group_tag_item != '[email protected]': new_group_tag_list.append(group_tag_item) new_group_tag = '&'.join(new_group_tag_list) else: new_group_tag = '' action = {'update':{'_id': uid}} bulk_action.extend([action, {'doc': {'group': new_group_tag}}]) if bulk_action: print 'bulk_action:', bulk_action es_user_portrait.bulk(bulk_action, index=portrait_index_name, doc_type=portrait_index_type) #step3: delete group results in group_manage try: print 'yes delete' result = es.delete(index=index_name, doc_type=index_type, id=task_id) except: return False return True
def show_vary_detail(task_name, submit_user, vary_pattern): results = [] task_id = submit_user + '-' + task_name #identify the task_id exist try: source = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id)['_source'] except: return 'group task is not exist' #identify the task status=1 status = source['status'] if status != 1: return 'group task is not completed' #get vary detail geo try: vary_detail_geo = json.loads(source['vary_detail_geo']) except: vary_detail_geo = {} if vary_detail_geo == {}: return 'vary detail geo none' #get vary_detail vary_pattern_list = vary_pattern.split('-') vary_pattern_key = '&'.join(vary_pattern_list) uid_ts_list = vary_detail_geo[vary_pattern_dict] uid_list = [item[0] for item in uid_ts_list] #get user name try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids':uid_list})['docs'] except: user_portrait_result = [] uname_dict = {} for portrait_item in user_portrait_result: uid = portrait_item['_id'] if portrait_item['found']==True: uname = portrait_item['_source']['uname'] uname_dict[uid] = uname else: uname_dict[uid] = uid #get vary detail new_detail = [] for vary_item in uid_ts_list: uname = uname_dict[vary_item[0]] start_date = ts2datetime(vary_item[1]) end_date = ts2datetime(vary_item[2]) new_detail.append([vary_item[0], uname, start_date, end_date]) return new_detail
def show_vary_detail(task_name, submit_user, vary_pattern): results = [] task_id = submit_user + '-' + task_name #identify the task_id exist try: source = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id)['_source'] except: return 'group task is not exist' #identify the task status=1 status = source['status'] if status != 1: return 'group task is not completed' #get vary detail geo try: vary_detail_geo = json.loads(source['vary_detail_geo']) except: vary_detail_geo = {} if vary_detail_geo == {}: return 'vary detail geo none' #get vary_detail vary_pattern_list = vary_pattern.split('-') vary_pattern_key = '&'.join(vary_pattern_list) uid_ts_list = vary_detail_geo[vary_pattern_dict] uid_list = [item[0] for item in uid_ts_list] #get user name try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids':uid_list})['docs'] except: user_portrait_result = [] uname_dict = {} for portrait_item in user_portrait_result: uid = portrait_item['_id'] if portrait_item['found'] == True: uname = portrait_item['_source']['uname'] uname_dict[uid] = uname else: uname_dict[uid] = uid #get vary detail new_detail = [] for vary_item in uid_ts_list: uname = uname_dict[vary_item[0]] start_date = ts2datetime(vary_item[1]) end_date = ts2datetime(vary_item[2]) new_detail.append([vary_item[0], uname, start_date, end_date]) return new_detail
def ajax_get_task_detail_info(): task_name = request.args.get('task_name', '') # task_name user = request.args.get('user', 'admin') _id = user + "-" + task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source'] task_detail["social_sensors"] = json.loads(task_detail["social_sensors"]) #task_detail['keywords'] = json.loads(task_detail['keywords']) #task_detail["sensitive_words"]= json.loads(task_detail["sensitive_words"]) history_status = json.loads(task_detail['history_status']) if history_status: temp_list = [] """ temp_list.append(history_status[-1]) print history_status for item in history_status[:-1]: temp_list.append(item) """ sorted_list = sorted(history_status, key=lambda x: x, reverse=True) task_detail['history_status'] = sorted_list else: task_detail['history_status'] = [] task_detail['social_sensors_portrait'] = [] portrait_detail = [] if task_detail["social_sensors"]: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": task_detail["social_sensors"]})['docs'] if search_results: for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["_source"][iter_item].split('&')) else: temp.append(item["_source"][iter_item]) portrait_detail.append(temp) if portrait_detail: portrait_detail = sorted(portrait_detail, key=lambda x: x[5], reverse=True) task_detail['social_sensors_portrait'] = portrait_detail return json.dumps(task_detail)
def portrait_user_vary(es, number, active_index, active_type, portrait_index, portrait_type, field="vary"): return_list = [] index_exist = es.indices.exists(index=active_index) if not index_exist: return "no active_index exist" sys.exit(0) count_s = 0 count_c = 0 start = 0 rank = 1 try: while 1: search_list = [] user_list = search_k(es, active_index, active_type, start, field, 100) start += 100 for item in user_list: uid = item.get('uid', '0') # obtain uid, notice "uid" or "user" search_list.append(uid) # uid list search_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids": search_list}, _source=True)["docs"] profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"] for item in search_result: count_c += 1 if item["found"]: info = ['','','','','','1'] info[0] = rank index = search_result.index(item) if profile_result[index]['found']: info[1] = profile_result[index]['_source'].get('photo_url','') info[3] = profile_result[index]['_source'].get('nick_name','') info[2] = search_result[index].get('_id','') info[4] = user_list[index]['vary'] return_list.append(info) rank += 1 if rank == int(number)+1: return return_list if count_c > 10000: break except RequestError: print "timeout" return return_list
def get_user_tag(uid_list): result = {} user_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list})['docs'] for user_item in user_result: uid = user_item['_id'] result[uid] = [] try: source = user_item['_source'] except: source = {} for key in source: if key not in identify_attribute_list: value = source[key] tag_string = key+':'+value result[uid].append(tag_string) return result
def query_vary_top_k(index_name, doctype, top_k, sort_index="vary"): query_body = { "query": { "match_all": {} }, "size": top_k, "sort": [{ sort_index: { "order": "desc" } }] } result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] uid_list = [] for item in result: uid_list.append(item['_id']) portrait_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids": uid_list}, _source=True)['docs'] profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": uid_list}, _source=True)['docs'] return_list = [] rank = 1 for i in range(len(result)): info = ['', '', '', '', ''] info[0] = rank if profile_result[i]['found']: info[1] = profile_result[i]['_source'].get('photo_url', '') info[3] = profile_result[i]['_source'].get('nick_name', '') info[2] = result[i].get('_id', '') info[4] = result[i]['_source']['vary'] if portrait_result[i]['found']: info.append('1') else: info.append('0') return_list.append(info) rank += 1 return return_list
def submit_identify_in_uname(input_data): date = input_data['date'] submit_user = input_data['user'] upload_data = input_data['upload_data'] # get uname list from upload data uname_list = upload_data.split('\n') uid_list = [] #step1: get uid list from uname profile_exist_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body={'query':{'terms':{'nick_name': uname_list}}}, _source=False)['hits']['hits'] for profile_item in profile_exist_result: uid = profile_item['_id'] uid_list.append(uid) if not uid_list: return 'uname list valid' #step2: filter user not in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs'] new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False] if not new_uid_list: return 'uname list all in' #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = r.hkeys('compute') in_uid_list = list(new_uid_set - compute_set) if not in_uid_list: return 'uname list all in' #step3: save submit hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, uid)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system':'0', 'operation': submit_user} r.hset(hashname_submit, uid, json.dumps(tmp)) r.hset(submit_user_recomment, uid, '0') return True
def get_user_tag(uid_list, submit_user): result = {} user_result = es.mget(index=user_index_name, doc_type=user_index_type, body={'ids':uid_list})['docs'] for user_item in user_result: uid = user_item['_id'] result[uid] = [] try: source = user_item['_source'] except: source = {} submit_user_tag = submit_user + '-tag' submit_user_attribute = source.get(submit_user_tag, '') if submit_user_attribute: attribute_list = submit_user_attribute.split('&') for item in attribute_list: result[uid].append(item.replace('-',':')) return result
def filter_in_uid(input_dict): input_uid = input_dict.keys() all_count = len(input_uid) iter_count = 0 in_portrait_result = [] while iter_count < all_count: iter_user_list = input_uid[iter_count: iter_count+FILTER_ITER_COUNT] try: portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids': iter_user_list}, _source=False, fields=['photo_url', 'uname'])['docs'] except: portrait_result = [] if portrait_result: iter_in_portrait = [[item['_id'], item['fields']['uname'][0], item['fields']['photo_url'][0],input_dict[item['_id']]] for item in portrait_result if item['found']==True] in_portrait_result.extend(iter_in_portrait) iter_count += FILTER_ITER_COUNT return in_portrait_result
def submit_identify_in_uid(input_data): date = input_data['date'] submit_user = input_data['user'] hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set( r.hkeys(hashname_sensitive)) upload_data = input_data['upload_data'] line_list = upload_data.split('\n') uid_list = [] for line in line_list: uid = line[:10] if len(uid) == 10: uid_list.append(uid) #identify the uid is not exist in user_portrait and compute #step1: filter in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list}, _source=False)['docs'] for exist_item in exist_portrait_result: if exist_item['found'] == False: new_uid_list.append(exist_item['_id']) #step2: filter in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_set = list(new_uid_set - compute_set) for in_item in in_uid_set: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashtname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system': '0', 'operation': submit_user} r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') return True
def get_user_tag(uid_list, submit_user): result = {} user_result = es.mget(index=user_index_name, doc_type=user_index_type, body={'ids': uid_list})['docs'] for user_item in user_result: uid = user_item['_id'] result[uid] = [] try: source = user_item['_source'] except: source = {} submit_user_tag = submit_user + '-tag' submit_user_attribute = source.get(submit_user_tag, '') if submit_user_attribute: attribute_list = submit_user_attribute.split('&') for item in attribute_list: result[uid].append(item.replace('-', ':')) return result
def show_out_uid(): fields = ["uid", "uname", "location", "statusnum", "fansnum", "domain", "topic_string", "importance", "influence", "activeness", "sensitive"] out_list = [] recommend_dict = r_out.hgetall("recommend_delete_list") recommend_keys = recommend_dict.keys() for iter_key in recommend_keys: out_list.extend(json.loads(r_out.hget("recommend_delete_list",iter_key))) if not out_list: return out_list # no one is recommended to out top_influence = get_top_influence("influence") top_activeness = get_top_influence("activeness") top_importance = get_top_influence("importance") top_sensitive = get_top_influence("sensitive") out_list = list(set(out_list)) return_list = [] #bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":out_list}, fields=['user_fansnum', 'weibo_month_sum'])['docs'] detail = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":out_list}, _source=True)['docs'] # extract the return dict with the field '_source' filter_uid = all_delete_uid() if out_list: for i in range(len(out_list)): if detail[i]['_id'] in filter_uid: continue detail_info = [] for item in fields: if item == "topic_string": detail_info.append(','.join(detail[i]['_source']['topic_string'].split("&"))) elif item == "influence": detail_info.append(math.log(detail[i]["_source"][item]/float(top_influence) * 9 + 1, 10)*100) elif item == "importance": detail_info.append(math.log(detail[i]["_source"][item]/float(top_importance)*9 + 1, 10)*100) elif item == "activeness": detail_info.append(math.log(detail[i]["_source"][item]/float(top_activeness)*9 + 1, 10)*100) elif item == "sensitive": detail_info.append(math.log(detail[i]["_source"][item]/float(top_sensitive)*9 + 1, 10)*100) else: detail_info.append(detail[i]['_source'][item]) return_list.append(detail_info) return return_list