def conclusion_on_influence(uid): # test index_name = copy_portrait_index_name index_type = copy_portrait_index_type total_number = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type)["count"] try: influ_result = es.get(index=index_name, doc_type=index_type, id=uid)["_source"] except: influ_result = {} result = [0, 0, 0, 0, 0, 0, total_number] # aver_activeness, sorted, aver_influence, sorted return result aver_activeness = influ_result.get("aver_activeness", 0) aver_influence = influ_result.get("aver_influence", 0) aver_importance = influ_result.get("aver_importance", 0) influence_query_body = {"query": {"match_all": {}}, "sort": {"aver_influence": {"order": "desc"}}, "size": 1} top_influence = es.search( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body )["hits"]["hits"][0]["sort"][0] importance_query_body = {"query": {"match_all": {}}, "sort": {"aver_importance": {"order": "desc"}}, "size": 1} top_importance = es.search( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body )["hits"]["hits"][0]["sort"][0] activeness_query_body = {"query": {"match_all": {}}, "sort": {"aver_activeness": {"order": "desc"}}, "size": 1} top_activeness = es.search( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body )["hits"]["hits"][0]["sort"][0] influence_query_body = {"query": {"filtered": {"filter": {"range": {"aver_influence": {"gt": aver_influence}}}}}} activeness_query_body = {"query": {"filtered": {"filter": {"range": {"aver_activeness": {"gt": aver_activeness}}}}}} importance_query_body = {"query": {"filtered": {"filter": {"range": {"aver_importance": {"gt": aver_importance}}}}}} influence_count = es.count( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body )["count"] activeness_count = es.count( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body )["count"] importance_count = es.count( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body )["count"] result = [ int(aver_activeness * 100.0 / top_activeness), activeness_count, int(aver_influence * 100.0 / top_influence), influence_count, int(aver_importance * 100.0 / top_importance), importance_count, total_number, ] return result
def ajax_create_task(): # task_name forbid illegal enter task_number = request.args.get('task_number', 1) task_name = request.args.get('task_name','') # must create_by = request.args.get('create_by', 'admin') # 用户 stop_time = request.args.get('stop_time', "default") #timestamp, 1234567890 social_sensors = request.args.get("social_sensors", "") #uid_list, split with "," remark = request.args.get("remark", "") _id = create_by + "-" + str(task_name) exist_es = es.exists(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id) if exist_es: return json.dumps(["0"]) # 任务名不能重合 query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"term":{"processing_status": "1"}}, {"term":{"finish":"0"}} ] } } } } } unfinish_number = es.count(index=index_manage_sensing_task, doc_type=task_doc_type, body=query_body)['count'] if unfinish_number > (int(task_number)-1): return "more than limit" if task_name: task_detail = dict() task_detail["task_name"] = task_name task_detail["create_by"] = create_by # 创建任务, user task_detail["stop_time"] = stop_time task_detail["remark"] = remark if social_sensors: task_detail["social_sensors"] = json.dumps(list(set(social_sensors.split(',')))) else: return json.dumps(['-1']) now_ts = int(time.time()) task_detail["create_at"] = now_ts # now_ts task_detail["warning_status"] = '0' task_detail["finish"] = "0" # not end the task task_detail["history_status"] = json.dumps([]) # ts, keywords, warning_status task_detail['burst_reason'] = '' task_detail['processing_status'] = "1" #任务正在进行 # store task detail into es es.index(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id, body=task_detail) return json.dumps(["1"])
def new_get_user_evaluate(uid): results = {} #get all count in user_portrait query_body = { 'query':{ 'match_all': {} } } all_count_results = es_user_portrait.count(index=portrait_index_name, doc_type=portrait_index_type, body=query_body) if all_count_results['_shards']['successful'] != 0: all_count = all_count_results['count'] else: all_count = '' #get influence from es influence history try: influence_history = ES_COPY_USER_PORTRAIT.get(index=COPY_USER_PORTRAIT_INFLUENCE, doc_type=COPY_USER_PORTRAIT_INFLUENCE_TYPE, \ id = uid)['_source'] except: influence_history = [] #get max value/min value/week ave value if influence_history: week_ave = influence_history['bci_week_ave'] week_ave_rank = get_influence_week_ave_rank(week_ave) influence_item = [week_ave, week_ave_rank] influence_max_min_now_list = get_evaluate_max_min_now(influence_history, 'bci') influence_max_min_now_list.append(all_count) influence_item.extend(influence_max_min_now_list[2:]) results['influence'] = influence_max_min_now_list else: results['influence'] = ['', '', '', '', all_count] #get importance from es importance history try: importance_history = ES_COPY_USER_PORTRAIT.get(index=COPY_USER_PORTRAIT_IMPORTANCE, doc_type=COPY_USER_PORTRAIT_IMPORTANCE_TYPE, \ id = uid)['_source'] except: importance_history = [] #get max value/min value/now value if importance_history: importance_max_min_now_list = get_evaluate_max_min_now(importance_history, 'importance') importance_max_min_now_list.append(all_count) results['importance'] = importance_max_min_now_list else: results['importance'] = ['', '', '', '', all_count] #get activeness from es activeness history try: activeness_history = ES_COPY_USER_PORTRAIT.get(index=COPY_USER_PORTRAIT_ACTIVENESS, doc_type=COPY_USER_PORTRAIT_ACTIVENESS_TYPE ,\ id = uid)['_source'] except: activeness_history = [] #get max value/min value/ now value if activeness_history: activeness_max_min_now_list = get_evaluate_max_min_now(activeness_history, 'activeness') activeness_max_min_now_list.append(all_count) results['activeness'] = activeness_max_min_now_list else: results['activeness'] = ['', '', '', '', all_count] #get sensitive from es sensitive history try: sensitive_history = ES_COPY_USER_PORTRAIT.get(index=COPY_USER_PORTRAIT_SENSITIVE, doc_type=COPY_USER_PORTRAIT_SENSITIVE_TYPE, \ id = uid)['_source'] except: sensitive_history = [] #get max value/min value/ now value if sensitive_history: sensitive_max_min_now_list = get_evaluate_max_min_now(sensitive_history, 'sensitive') sensitive_max_min_now_list.append(all_count) results['sensitive'] = sensitive_max_min_now_list else: results['sensitive'] = ['', '', '', '', all_count] return results
def imagine(uid, query_fields_dict,index_name=portrait_index_name, doctype=portrait_index_type): personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source'] keys_list = query_fields_dict.keys() #需要进行关联的键 keys_list.remove('size') search_dict = {} iter_list = [] # 对搜索的键值进行过滤,去掉无用的键 for iter_key in keys_list: if personal_info[iter_key] == '' or not personal_info[iter_key]: query_fields_dict.pop(iter_key) else: iter_list.append(iter_key) temp = personal_info[iter_key] search_dict[iter_key] = temp.split('&') if len(iter_list) == 0: return [] query_body = { 'query':{ 'function_score':{ 'query':{ 'bool':{ 'must':[ ] } } } } } """ score_standard = {} score_standard["modifier"] = "log1p" if query_fields_dict['field'] == "activeness": score_standard['field'] = "activeness" score_standard['factor'] = 100 elif query_fields_dict['field'] == "importance": score_standard['field'] = "importance" score_standard['factor'] = 0.01 elif query_fields_dict['field'] == 'influence': score_standard['field'] = "influence" score_standard['factor'] = 0.1 else: score_standard['field'] = "influence" score_standard['factor'] = 0 query_body['query']['function_score']['boost_mode'] = "sum" query_body['query']['function_score']['field_value_factor'] = score_standard """ number = es.count(index=index_name, doc_type=doctype, body=query_body)['count'] query_body['size'] = 150 # default number query_number = query_fields_dict['size'] # required number query_fields_dict.pop('size') for (k,v) in query_fields_dict.items(): temp = {} temp_list = [] for iter_key in search_dict[k]: temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*','boost': v}}}) query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}}) filter_uid = all_delete_uid() result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] field_list = ['uid','uname', 'activeness','importance', 'influence'] evaluate_index_list = ['activeness', 'importance', 'influence'] return_list = [] count = 0 if number > 1 and result: if result[0]['_id'] != uid: top_score = result[0]['_score'] else: top_score = result[1]['_score'] #get evaluate max to normal evaluate_max_dict = get_evaluate_max() for item in result: if uid == item['_id'] or uid in filter_uid: score = item['_score'] continue info = [] for field in field_list: if field in evaluate_index_list: value = item['_source'][field] normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100 else: normal_value = item['_source'][field] info.append(normal_value) info.append(item['_score']/top_score*100) return_list.append(info) count += 1 if count == query_number: break return_list.append(number) temp_list = [] for field in field_list: if field in evaluate_index_list: value = personal_info[field] normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100 else: normal_value = personal_info[field] temp_list.append(normal_value) results = [] results.append(temp_list) results.extend(return_list) return results
def imagine(submit_user, uid, query_fields_dict, index_name=portrait_index_name, doctype=portrait_index_type): default_setting_dict = query_fields_dict personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source'] user_tag = submit_user + "-tag" user_tag_string = personal_info.get(user_tag, "") if user_tag_string: tag_pairs_list = user_tag_string.split('&') else: tag_pairs_list = [] tag_dict = dict() if tag_pairs_list: for item in tag_pairs_list: iter_pair = item.split('-') tag_dict[iter_pair[0]] = iter_pair[1] keys_list = [] for k, v in query_fields_dict.iteritems(): if v: keys_list.append(k) #需要进行关联的键 keys_list.remove('size') search_dict = {} iter_list = [] tag_attri_vaule = [] # 对搜索的键值进行过滤,去掉无用的键 for iter_key in keys_list: if iter_key in personal_info: if not personal_info[iter_key] or not query_fields_dict[iter_key]: query_fields_dict.pop(iter_key) continue else: iter_list.append(iter_key) temp = personal_info[iter_key] search_dict[iter_key] = temp.split('&') else: query_fields_dict.pop(iter_key) if tag_dict.get(iter_key, ''): tag_attri_vaule.append(iter_key + "-" + tag_dict[iter_key]) if len(iter_list) == 0 and len(tag_attri_vaule) == 0: return [] query_body = { 'query': { 'function_score': { 'query': { 'bool': { 'must': [] } } } } } number = es.count(index=index_name, doc_type=doctype, body=query_body)['count'] query_body['size'] = 150 # default number query_number = query_fields_dict['size'] # required number query_fields_dict.pop('size') if tag_attri_vaule: query_body['query']['function_score']['query']['bool']['must'].append( {"terms": { user_tag: tag_attri_vaule }}) for (k, v) in query_fields_dict.items(): temp = {} temp_list = [] if k in personal_info and v != 0: for iter_key in search_dict[k]: temp_list.append({ 'wildcard': { k: { 'wildcard': '*' + iter_key + '*', 'boost': v } } }) query_body['query']['function_score']['query']['bool'][ 'must'].append({'bool': { 'should': temp_list }}) filter_uid = all_delete_uid() result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] field_list = ['uid', 'uname', 'activeness', 'importance', 'influence'] evaluate_index_list = ['activeness', 'importance', 'influence'] return_list = [] count = 0 if len(result) > 1 and result: if result[0]['_id'] != uid: top_score = result[0]['_score'] else: top_score = result[1]['_score'] #get evaluate max to normal evaluate_max_dict = get_evaluate_max() for item in result: if uid == item['_id'] or uid in filter_uid: score = item['_score'] continue info = [] for field in field_list: if field in evaluate_index_list: value = item['_source'][field] normal_value = math.log( value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100 else: normal_value = item['_source'][field] if not normal_value: normal_value = item['_id'] info.append(normal_value) info.append(item['_score'] / float(top_score) * 100) return_list.append(info) count += 1 if count == query_number: break return_list.append(number) temp_list = [] for field in field_list: if field in evaluate_index_list: value = personal_info[field] normal_value = math.log( value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100 else: normal_value = personal_info[field] temp_list.append(normal_value) results = [] results.append(temp_list) results.extend(return_list) results.append(default_setting_dict) return results
def conclusion_on_influence(uid): # test index_name = copy_portrait_index_name index_type = copy_portrait_index_type total_number = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type)["count"] try: influ_result = es.get(index=index_name, doc_type=index_type, id=uid)['_source'] except: influ_result = {} result = [0, 0, 0, 0, 0, 0, total_number ] # aver_activeness, sorted, aver_influence, sorted return result aver_activeness = influ_result.get("aver_activeness", 0) aver_influence = influ_result.get("aver_influence", 0) aver_importance = influ_result.get('aver_importance', 0) influence_query_body = { "query": { "match_all": {} }, "sort": { "aver_influence": { "order": "desc" } }, "size": 1 } top_influence = es.search( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body)['hits']['hits'][0]['sort'][0] importance_query_body = { "query": { "match_all": {} }, "sort": { "aver_importance": { "order": "desc" } }, "size": 1 } top_importance = es.search( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body)['hits']['hits'][0]['sort'][0] activeness_query_body = { "query": { "match_all": {} }, "sort": { "aver_activeness": { "order": "desc" } }, "size": 1 } top_activeness = es.search( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body)['hits']['hits'][0]['sort'][0] influence_query_body = { "query": { "filtered": { "filter": { "range": { "aver_influence": { "gt": aver_influence } } } } } } activeness_query_body = { "query": { "filtered": { "filter": { "range": { "aver_activeness": { "gt": aver_activeness } } } } } } importance_query_body = { "query": { "filtered": { "filter": { "range": { "aver_importance": { "gt": aver_importance } } } } } } influence_count = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body)['count'] activeness_count = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body)['count'] importance_count = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body)['count'] result = [ int(aver_activeness * 100.0 / top_activeness), activeness_count, int(aver_influence * 100.0 / top_influence), influence_count, int(aver_importance * 100.0 / top_importance), importance_count, total_number ] return result
def imagine(uid, query_fields_dict,index_name="user_portrait", doctype='user'): """ uid: search users relate to uid query_fields_dict: defined search field weight fields: domain, topic, keywords, psycho_status, psycho_feature, activity_geo, hashtag for example: "domain": 2 domain, psycho_feature """ personal_info = es.get(index="user_portrait", doc_type="user", id=uid, _source=True)['_source'] keys_list = query_fields_dict.keys() keys_list.remove('field') keys_list.remove('size') search_dict = {} iter_list = [] for iter_key in keys_list: if personal_info[iter_key] == '' or not personal_info[iter_key]: query_fields_dict.pop(iter_key) else: iter_list.append(iter_key) temp = personal_info[iter_key] search_dict[iter_key] = temp.split('&') if len(iter_list) == 0: return [] query_body = { 'query':{ 'function_score':{ 'query':{ 'bool':{ 'must':[ ] } }, "field_value_factor":{ } } } } score_standard = {} score_standard["modifier"] = "log1p" if query_fields_dict['field'] == "activeness": score_standard['field'] = "activeness" score_standard['factor'] = 100 elif query_fields_dict['field'] == "importance": score_standard['field'] = "importance" score_standard['factor'] = 0.01 elif query_fields_dict['field'] == 'influence': score_standard['field'] = "influence" score_standard['factor'] = 0.1 else: score_standard['field'] = "influence" score_standard['factor'] = 0 query_body['query']['function_score']['boost_mode'] = "sum" query_body['query']['function_score']['field_value_factor'] = score_standard query_fields_dict.pop('field') number = es.count(index=index_name, doc_type=doctype, body=query_body)['count'] query_body['size'] = 100 # default number query_number = query_fields_dict['size'] # required number query_fields_dict.pop('size') for (k,v) in query_fields_dict.items(): temp = {} temp_list = [] for iter_key in search_dict[k]: temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*','boost': v}}}) query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}}) filter_uid = all_delete_uid() result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] field_list = ['uid','uname', 'activeness','importance', 'influence'] return_list = [] count = 0 for item in result: if uid == item['_id'] or uid in filter_uid: score = item['_score'] continue info = [] for field in field_list: info.append(item['_source'][field]) info.append(item['_score']) return_list.append(info) count += 1 if count == query_number: break return_list.append(number) temp_list = [] for field in field_list: temp_list.append(personal_info[field]) results = [] results.append(temp_list) results.extend(return_list) return results
def user_sort_interface(username, time, sort_scope, sort_norm, arg=None, st=None, et=None, isall=False, task_number=0, number=100): task_number = int(task_number) # print "user_interface:", number user_list = [] if isall: #deal with the situation of all net user if sort_scope == 'all_limit_keyword': #offline job #add job to es index during = (datetime2ts(et) - datetime2ts(st)) / DAY + 1 time = 7 if during > 3: time = 7 elif during > 16: time = 30 running_number = es_user_portrait.count( index='user_rank_keyword_task', doc_type='user_rank_task', body=query_task_number(username))['count'] # print 'running',running_number if running_number > task_number - 1: return "more than limit" search_id = add_task(username, "keyword", "all", 'flow_text_', during, st, et, arg, sort_norm, sort_scope, time, isall, number) #deal with the offline task return {"flag": True, "search_id": search_id} elif sort_scope == 'all_nolimit': #online job # print "all_sort, ", number,sort_norm user_list = all_sort_filter(None, sort_norm, time, False, number) else: if sort_scope == 'in_limit_keyword': #offline job #deal with the offline task during = (datetime2ts(et) - datetime2ts(st)) / DAY + 1 time = 1 if during > 3: time = 7 elif during > 16: time = 30 running_number = es_user_portrait.count( index='user_rank_keyword_task', doc_type='user_rank_task', body=query_task_number(username))['count'] if running_number > task_number - 1: return "more than limit" search_id = add_task(username, "keyword", "in", 'flow_text_', during, st, et, arg, sort_norm, sort_scope, time, isall, number) return {"flag": True, "search_id": search_id} elif sort_scope == 'in_limit_hashtag': during = (datetime2ts(et) - datetime2ts(st)) / DAY + 1 time = 7 if during > 3: time = 7 elif during > 16: time = 30 running_number = es_user_portrait.count( index='user_rank_keyword_task', doc_type='user_rank_task', body=query_task_number(username))['count'] if running_number > task_number - 1: return "more than limit" search_id = add_task(username, "hashtag", "in", 'flow_text_', during, st, et, arg, sort_norm, sort_scope, time, isall, number) return {"flag": True, "search_id": search_id} else: #find the scope #in_limit_topic user_list = in_sort_filter(time, sort_norm, sort_scope, arg, [], False, number) #print user_list result = make_up_user_info(user_list, isall, time, sort_norm) # print "user_list:", len(user_list) return result
def search_attribute_portrait(uid): results = dict() index_name = 'user_portrait' index_type = 'user' try: results = es_user_portrait.get(index=index_name, doc_type=index_type, id=uid)['_source'] except: results = None return None keyword_list = [] if results['keywords']: keywords_dict = json.loads(results['keywords']) sort_word_list = sorted(keywords_dict.items(), key=lambda x:x[1], reverse=True) #print 'sort_word_list:', sort_word_list results['keywords'] = sort_word_list else: results['keywords'] = [] #print 'keywords:', results geo_top = [] if results['activity_geo_dict']: geo_dict = json.loads(results['activity_geo_dict']) sort_geo_dict = sorted(geo_dict.items(), key=lambda x:x[1], reverse=True) geo_top = sort_geo_dict results['activity_geo'] = geo_top else: results['activity_geo'] = [] if results['hashtag_dict']: hashtag_dict = json.loads(results['hashtag_dict']) sort_hashtag_dict = sorted(hashtag_dict.items(), key=lambda x:x[1], reverse=True) results['hashtag_dict'] = sort_hashtag_dict[:5] descriptions = hashtag_description(hashtag_dict) results['hashtag_description'] = descriptions else: results['hashtag_dict'] = [] results['hashtag_description'] = '' emotion_result = {} emotion_conclusion_dict = {} if results['emotion_words']: emotion_words_dict = json.loads(results['emotion_words']) for word_type in emotion_mark_dict: try: word_dict = emotion_words_dict[word_type] if word_type=='126' or word_type=='127': emotion_conclusion_dict[word_type] = word_dict sort_word_dict = sorted(word_dict.items(), key=lambda x:x[1], reverse=True) #print 'sort_word_dict:', sort_word_dict word_list = sort_word_dict[:5] except: word_list = [] emotion_result[emotion_mark_dict[word_type]] = word_list #print 'emotion_words:', type(emotion_result) results['emotion_words'] = emotion_result #emotion_conclusion results['emotion_conclusion'] = get_emotion_conclusion(emotion_conclusion_dict) #topic if results['topic']: topic_dict = json.loads(results['topic']) sort_topic_dict = sorted(topic_dict.items(), key=lambda x:x[1], reverse=True) results['topic'] = sort_topic_dict[:5] else: results['topic'] = [] #domain if results['domain']: domain_string = results['domain'] domain_list = domain_string.split('_') results['domain'] = domain_list else: results['domain'] = [] #emoticon if results['emoticon']: emoticon_dict = json.loads(results['emoticon']) sort_emoticon_dict = sorted(emoticon_dict.items(), key=lambda x:x[1], reverse=True) results['emoticon'] = sort_emoticon_dict[:5] else: results['emoticon'] = [] #online_pattern if results['online_pattern']: online_pattern_dict = json.loads(results['online_pattern']) sort_online_pattern_dict = sorted(online_pattern_dict.items(), key=lambda x:x[1], reverse=True) results['online_pattern'] = sort_online_pattern_dict[:5] else: results['online_pattern'] = [] #psycho_status if results['psycho_status']: psycho_status_dict = json.loads(results['psycho_status']) sort_psycho_status_dict = sorted(psycho_status_dict.items(), key=lambda x:x[1], reverse=True) results['psycho_status'] = sort_psycho_status_dict[:5] else: results['psycho_status'] = [] #psycho_feature if results['psycho_feature']: psycho_feature_list = results['psycho_feature'].split('_') results['psycho_feature'] = psycho_feature_list else: results['psycho_feature'] = [] #state if results['uid']: uid = results['uid'] try: profile_result = es_user_profile.get(index='weibo_user', doc_type='user', id=uid) except: profile_result = None try: user_state = profile_result['_source']['description'] results['description'] = user_state except: results['description'] = '' else: results['uid'] = '' results['description'] = '' if results['importance']: #print results['importance'] query_body = { 'query':{ "range":{ "importance":{ "from": results['importance'], "to": 1000000 } } } } importance_rank = es_user_portrait.count(index=index_name, doc_type=index_type, body=query_body) if importance_rank['_shards']['successful'] != 0: #print 'importance_rank:', importance_rank results['importance_rank'] = importance_rank['count'] else: print 'es_importance_rank error' results['importance_rank'] = 0 else: results['importance_rank'] = 0 if results['activeness']: query_body = { 'query':{ "range":{ "activeness":{ "from":results['activeness'], "to": 1000000 } } } } activeness_rank = es_user_portrait.count(index=index_name, doc_type=index_type, body=query_body) if activeness_rank['_shards']['successful'] != 0: results['activeness_rank'] = activeness_rank['count'] else: print 'es_activess_rank error' results['activeness_rank'] = 0 if results['influence']: query_body = { 'query':{ 'range':{ 'influence':{ 'from':results['influence'], 'to': 1000000 } } } } influence_rank = es_user_portrait.count(index=index_name, doc_type=index_type, body=query_body) if influence_rank['_shards']['successful'] != 0: results['influence_rank'] = influence_rank['count'] else: print 'es_influence_rank error' results['influence_rank'] = 0 #total count in user_portrait query_body ={ 'query':{ 'match_all':{} } } all_count_results = es_user_portrait.count(index=index_name, doc_type=index_type, body=query_body) if all_count_results['_shards']['successful'] != 0: results['all_count'] = all_count_results['count'] else: print 'es_user_portrait error' results['all_count'] = 0 #link conclusion link_ratio = results['link'] results['link_conclusion'] = get_link_conclusion(link_ratio) return results
def imagine(uid, query_fields_dict, index_name=portrait_index_name, doctype=portrait_index_type): personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source'] keys_list = query_fields_dict.keys() #需要进行关联的键 keys_list.remove('size') search_dict = {} iter_list = [] # 对搜索的键值进行过滤,去掉无用的键 for iter_key in keys_list: if personal_info[iter_key] == '' or not personal_info[iter_key]: query_fields_dict.pop(iter_key) else: iter_list.append(iter_key) temp = personal_info[iter_key] search_dict[iter_key] = temp.split('&') if len(iter_list) == 0: return [] query_body = { 'query': { 'function_score': { 'query': { 'bool': { 'must': [] } } } } } """ score_standard = {} score_standard["modifier"] = "log1p" if query_fields_dict['field'] == "activeness": score_standard['field'] = "activeness" score_standard['factor'] = 100 elif query_fields_dict['field'] == "importance": score_standard['field'] = "importance" score_standard['factor'] = 0.01 elif query_fields_dict['field'] == 'influence': score_standard['field'] = "influence" score_standard['factor'] = 0.1 else: score_standard['field'] = "influence" score_standard['factor'] = 0 query_body['query']['function_score']['boost_mode'] = "sum" query_body['query']['function_score']['field_value_factor'] = score_standard """ number = es.count(index=index_name, doc_type=doctype, body=query_body)['count'] query_body['size'] = 150 # default number query_number = query_fields_dict['size'] # required number query_fields_dict.pop('size') for (k, v) in query_fields_dict.items(): temp = {} temp_list = [] for iter_key in search_dict[k]: temp_list.append({ 'wildcard': { k: { 'wildcard': '*' + iter_key + '*', 'boost': v } } }) query_body['query']['function_score']['query']['bool']['must'].append( {'bool': { 'should': temp_list }}) filter_uid = all_delete_uid() result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] field_list = ['uid', 'uname', 'activeness', 'importance', 'influence'] evaluate_index_list = ['activeness', 'importance', 'influence'] return_list = [] count = 0 if number > 1 and result: if result[0]['_id'] != uid: top_score = result[0]['_score'] else: top_score = result[1]['_score'] #get evaluate max to normal evaluate_max_dict = get_evaluate_max() for item in result: if uid == item['_id'] or uid in filter_uid: score = item['_score'] continue info = [] for field in field_list: if field in evaluate_index_list: value = item['_source'][field] normal_value = math.log( value / evaluate_max_dict[field] * 9 + 1, 10) * 100 else: normal_value = item['_source'][field] info.append(normal_value) info.append(item['_score'] / top_score * 100) return_list.append(info) count += 1 if count == query_number: break return_list.append(number) temp_list = [] for field in field_list: if field in evaluate_index_list: value = personal_info[field] normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100 else: normal_value = personal_info[field] temp_list.append(normal_value) results = [] results.append(temp_list) results.extend(return_list) return results
def imagine(submit_user, uid, query_fields_dict,index_name=portrait_index_name, doctype=portrait_index_type): default_setting_dict = query_fields_dict print es,portrait_index_name,portrait_index_type,uid try : personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source'] except: return None user_tag = submit_user + "-tag" user_tag_string = personal_info.get(user_tag, "") if user_tag_string: tag_pairs_list = user_tag_string.split('&') else: tag_pairs_list = [] tag_dict = dict() if tag_pairs_list: for item in tag_pairs_list: iter_pair = item.split('-') tag_dict[iter_pair[0]] = iter_pair[1] keys_list = [] for k, v in query_fields_dict.iteritems(): if v: keys_list.append(k) #需要进行关联的键 keys_list.remove('size') search_dict = {} iter_list = [] tag_attri_vaule = [] # 对搜索的键值进行过滤,去掉无用的键 for iter_key in keys_list: if iter_key in personal_info: if not personal_info[iter_key] or not query_fields_dict[iter_key]: query_fields_dict.pop(iter_key) continue else: iter_list.append(iter_key) temp = personal_info[iter_key] search_dict[iter_key] = temp.split('&') else: query_fields_dict.pop(iter_key) if tag_dict.get(iter_key,''): tag_attri_vaule.append(iter_key+"-"+tag_dict[iter_key]) if len(iter_list) == 0 and len(tag_attri_vaule) == 0: return [] query_body = { 'query':{ 'function_score':{ 'query':{ 'bool':{ 'must':[ ] } } } } } number = es.count(index=index_name, doc_type=doctype, body=query_body)['count'] query_body['size'] = 150 # default number query_number = query_fields_dict['size'] # required number query_fields_dict.pop('size') if tag_attri_vaule: query_body['query']['function_score']['query']['bool']['must'].append({"terms":{user_tag:tag_attri_vaule}}) for (k,v) in query_fields_dict.items(): temp = {} temp_list = [] if k in personal_info and v != 0: for iter_key in search_dict[k]: temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*', 'boost': v}}}) query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}}) filter_uid = all_delete_uid() result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] field_list = ['uid','uname', 'activeness','importance', 'influence'] evaluate_index_list = ['activeness', 'importance', 'influence'] result_list = [] count = 0 if len(result) > 1 and result: if result[0]['_id'] != uid: top_score = result[0]['_score'] else: top_score = result[1]['_score'] #get evaluate max to normal evaluate_max_dict = get_evaluate_max() for item in result: return_dict = {} if uid == item['_id'] or uid in filter_uid: score = item['_score'] continue for field in field_list: if field == 'uid': uid = item['_source'][field] normal_value = uid return_dict['uid'] = uid elif field in evaluate_index_list: value = item['_source'][field] normal_value = math.log(value / float(evaluate_max_dict[field] )* 9 + 1, 10) * 100 return_dict[field] = normal_value else: normal_value = item['_source'][field] return_dict[field] = normal_value return_dict['similiar'] = item['_score']/float(top_score)*100 result_list.append(return_dict) count += 1 if count == query_number: break #return result_list temp_list = [] for field in field_list: if field in evaluate_index_list: value = personal_info[field] normal_value = math.log(value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100 else: normal_value = personal_info[field] temp_list.append(normal_value) results = [] results.append(temp_list) results.extend(result_list) return results
def imagine(uid, query_fields_dict, index_name=portrait_index_name, doctype=portrait_index_type): """ uid: search users relate to uid query_fields_dict: defined search field weight fields: domain, topic_string, keywords, activity_geo, hashtag, character_sentiment, character_text for example: "domain": 2 domain, psycho_feature """ personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)["_source"] keys_list = query_fields_dict.keys() # 需要进行关联的键 keys_list.remove("field") keys_list.remove("size") search_dict = {} iter_list = [] # 对搜索的键值进行过滤,去掉无用的键 for iter_key in keys_list: if personal_info[iter_key] == "" or not personal_info[iter_key]: query_fields_dict.pop(iter_key) else: iter_list.append(iter_key) temp = personal_info[iter_key] search_dict[iter_key] = temp.split("&") if len(iter_list) == 0: return [] query_body = {"query": {"function_score": {"query": {"bool": {"must": []}}, "field_value_factor": {}}}} score_standard = {} score_standard["modifier"] = "log1p" if query_fields_dict["field"] == "activeness": score_standard["field"] = "activeness" score_standard["factor"] = 100 elif query_fields_dict["field"] == "importance": score_standard["field"] = "importance" score_standard["factor"] = 0.01 elif query_fields_dict["field"] == "influence": score_standard["field"] = "influence" score_standard["factor"] = 0.1 else: score_standard["field"] = "influence" score_standard["factor"] = 0 query_body["query"]["function_score"]["boost_mode"] = "sum" query_body["query"]["function_score"]["field_value_factor"] = score_standard query_fields_dict.pop("field") number = es.count(index=index_name, doc_type=doctype, body=query_body)["count"] query_body["size"] = 150 # default number query_number = query_fields_dict["size"] # required number query_fields_dict.pop("size") for (k, v) in query_fields_dict.items(): temp = {} temp_list = [] for iter_key in search_dict[k]: temp_list.append({"wildcard": {k: {"wildcard": "*" + iter_key + "*", "boost": v}}}) query_body["query"]["function_score"]["query"]["bool"]["must"].append({"bool": {"should": temp_list}}) filter_uid = all_delete_uid() result = es.search(index=index_name, doc_type=doctype, body=query_body)["hits"]["hits"] field_list = ["uid", "uname", "activeness", "importance", "influence"] evaluate_index_list = ["activeness", "importance", "influence"] return_list = [] count = 0 if number > 1 and result: if result[0]["_id"] != uid: top_score = result[0]["_score"] else: top_score = result[1]["_score"] # get evaluate max to normal evaluate_max_dict = get_evaluate_max() for item in result: if uid == item["_id"] or uid in filter_uid: score = item["_score"] continue info = [] for field in field_list: if field in evaluate_index_list: value = item["_source"][field] normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100 else: normal_value = item["_source"][field] info.append(normal_value) info.append(item["_score"] / top_score * 100) return_list.append(info) count += 1 if count == query_number: break return_list.append(number) temp_list = [] for field in field_list: temp_list.append(personal_info[field]) results = [] results.append(temp_list) results.extend(return_list) return results