def search_user(item): query_body = { "query": { 'bool': { 'should': [{ "wildcard": { 'uid': '*' + str(item.encode('utf-8')) + '*' } }, { "wildcard": { 'uname': '*' + str(item.encode('utf-8')) + '*' } }] } }, 'size': 100 } only_uid = [] user_uid_list = [] u_nodes_list = {} try: name_results = es_user_portrait.search(index=portrait_name, doc_type=portrait_type, \ body=query_body, fields=['uid','uname'])['hits']['hits'] except: return 'does not exist' for i in name_results: uid = i['fields']['uid'][0] uname = i['fields']['uname'][0] only_uid.append(uid) u_nodes_list[uid] = uname user_uid_list.append([uid, uname]) return user_uid_list
def group_tab_map(group_name, node_type, relation_type, layer): black_country = [u'美国', u'其他', u'法国', u'英国', u'中国', u'局域网'] # black_country = [u'美国',u'其他',u'法国',u'英国'] tab_graph_result = group_tab_graph(group_name, node_type, relation_type, layer) uid_list = [i for i in tab_graph_result['map_uid'] if str(i) != 'null'] print uid_list query_body = { 'filter': { 'terms': { 'uid': uid_list } }, "aggs": { "all_location": { "terms": { "field": "location", "size": 400 } } } } results = es_user_portrait.search(index=portrait_name, doc_type=portrait_type, \ body=query_body)["aggregations"]["all_location"]["buckets"] location_dict = dict() for item in results: if item["key"] == "" or item["key"] == "unknown" or item[ 'key'] == u'其他': continue location_dict[item["key"]] = item["doc_count"] filter_location = dict() for k, v in location_dict.iteritems(): tmp = k.split(' ') if u'北京' in k or u'天津' in k or u'上海' in k or u'重庆' in k or u'香港' in k or u'澳门' in k: try: filter_location[tmp[0]] += v except: filter_location[tmp[0]] = v elif len(tmp) == 1: continue else: if tmp[1] in black_country or u'国' in tmp[1]: continue try: filter_location[tmp[1]] += v except: filter_location[tmp[1]] = v return_results = sorted(filter_location.iteritems(), key=lambda x: x[1], reverse=True) return return_results[:500]
def search_user_time_limit(item, field, start_ts, end_ts, editor): query_body = { "query": { # "uid":uid_list #-------------------!!!!! "bool": { "must": [{ "range": { "submit_ts": { "gte": start_ts, "lte": end_ts } } }], 'should': [{ "wildcard": { 'uid': '*' + str(item.encode('utf-8')) + '*' } }, { "wildcard": { 'uname': '*' + str(item.encode('utf-8')) + '*' } }] } } } try: name_results = es.search(index=portrait_index_name, doc_type=portrait_index_type, \ body=query_body, fields= field)['hits']['hits'] except: return 'does not exist' result = [] for i in name_results: event = [] # if i['found'] == False: # event.append(i['_id']) # continue i_fields = i['fields'] for j in field: if not i_fields.has_key(j): event.append('') continue if j == 'keywords_string': keywords = i_fields[j][0].split('&') keywords = keywords[:5] event.append(keywords) elif j == 'function_mark': tag = deal_editor_tag(i_fields[j][0], editor)[0] event.append(tag) else: event.append(i_fields[j][0]) result.append(event) return result
def get_evaluate_max(): max_result = {} evaluate_index = ['importance', 'influence', 'activeness', 'sensitive'] for evaluate in evaluate_index: query_body = { 'query':{ 'match_all':{} }, 'size': 1, 'sort': [{evaluate: {'order': 'desc'}}] } try: result = es.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)['hits']['hits'] except Exception, e: raise e max_evaluate = result[0]['_source'][evaluate] max_result[evaluate] = max_evaluate
def search_user(item, field, submit_user): query_body = { "query": { 'bool': { 'should': [{ "wildcard": { 'uid': '*' + str(item.encode('utf-8')) + '*' } }, { "wildcard": { 'uname': '*' + str(item.encode('utf-8')) + '*' } }] } }, 'size': 10 } only_uid = [] user_uid_list = [] try: name_results = es.search(index=portrait_index_name, doc_type=portrait_index_type, \ body=query_body, fields= field)['hits']['hits'] except: return 'does not exist' result = [] for i in name_results: event = [] i_fields = i['fields'] for j in field: if not i_fields.has_key(j): event.append('') continue if j == 'keywords_string': keywords = i_fields[j][0].split('&') keywords = keywords[:5] event.append(keywords) elif j == 'function_mark': tag = deal_editor_tag(i_fields[j][0], editor)[0] event.append(tag) else: event.append(i_fields[j][0]) result.append(event) return result
def search_user(item, field): query_body = { "query": { 'bool': { 'should': [{ "wildcard": { 'uid': '*' + str(item.encode('utf-8')) + '*' } }, { "wildcard": { 'uname': '*' + str(item.encode('utf-8')) + '*' } }] } }, 'size': 10 } only_uid = [] user_uid_list = [] u_nodes_list = {} try: name_results = es.search(index=portrait_index_name, doc_type=portrait_index_type, \ body=query_body, fields= field)['hits']['hits'] except: return 'does not exist' for i in name_results: field_list = [] for key in field: try: key1 = i['fields'][key][0] except: key1 = '' field_list.append(key1) user_uid_list.append(field_list) return user_uid_list
def search_user_time_limit(item, field, start_ts, end_ts): query_body = { "query": { # "uid":uid_list #-------------------!!!!! "bool": { "must": [{ "range": { "create_time": { "gte": former_ts, "lte": current_ts } } }] } } } only_uid = [] user_uid_list = [] u_nodes_list = {} try: name_results = es.search(index=portrait_index_name, doc_type=portrait_index_type, \ body=query_body, fields= field)['hits']['hits'] except: return 'does not exist' for i in name_results: field_list = [] for key in field: try: key1 = i['fields'][key][0] except: key1 = '' field_list.append(key1) user_uid_list.append(field_list) return user_uid_list
def search_advance_event(): #高级搜索,事件卡片和图谱 result = {} query_data = {} query = [] query_list = [] condition_num = 0 fuzz_item = ['event_type', 'e_location'] # time_item = [''] # multi_item = ['event_type','topic_string'] simple_fuzz_item = ['en_name', 'name', 'keywords'] item_data = request.args.get('term', '') #print 'item_data:', item_data for item in simple_fuzz_item: if item_data: query_list.append({'wildcard': {item: '*' + item_data + '*'}}) condition_num += 1 if query_list: query.append({'bool': {'should': query_list}}) for item in fuzz_item: item_data = request.args.get(item, '') if item_data: query.append({'wildcard': {item: '*' + item_data + '*'}}) condition_num += 1 # custom_attribute tag_items = request.args.get('tag', '') if tag_items != '': tag_item_list = tag_items.split(',') for tag_item in tag_item_list: attribute_name_value = tag_item.split(':') attribute_name = attribute_name_value[0] attribute_value = attribute_name_value[1] field_key = submit_user + '-tag' if attribute_name and attribute_value: query.append({ 'wildcard': { field_key: '*' + attribute_name + '-' + attribute_value + '*' } }) condition_num += 1 tag_items = request.args.get('tag', '') start_ts = request.args.get('start_ts', '') end_ts_o = int(time.time()) print end_ts_o, 'end_ts_o' end_ts = request.args.get('end_ts', end_ts_o) if start_ts: query.append({'bool':{'must':[{'range':{'start_ts':{'gte':start_ts}}},\ {'range':{'end_ts':{'lte':end_ts}}}]}}) condition_num += 1 # print query,'---------' size = 10 sort = '_score' #print 'query condition:', query if condition_num > 0: result = es_user_portrait.search(index=event_name, doc_type=event_type, \ body={'query':{'bool':{'must':query}}, 'sort':[{sort:{'order':'desc'}}], 'size':size},fields= ['en_name'])['hits']['hits'] else: return 'no filter' result = es_user_portrait.search(index=event_name, doc_type=event_type, \ body={'query':{'match_all':{}}, 'sort':[{sort:{"order":"desc"}}], 'size':size}, fields= ['en_name'])['hits']['hits'] id_list = [] for i in result: id_list.append(i['fields']['en_name'][0]) layer = request.args.get('layer', '1') #1,2,all result = advance_search_card_e(id_list, layer) relation_list2 = [] relation_list2.extend(relation_list) relation_list2.extend(user_event_relation) relation_list2.extend(event_relation_list) relation_list2.extend(['group', 'special_event']) rel_type_str = ','.join(relation_list2) rel_type = request.args.get('rel_type', rel_type_str) #字符串,用逗号人物之间的把关系串联起来 relation_list2 = rel_type.split(',') # graph_info = '' graph_info = advance_search_graph_e(id_list, layer, relation_list) return json.dumps({'card_info': result, 'graph_info': graph_info})
def search_advance_user(): #高级搜索,卡片和图谱 result = {} query_data = {} query = [] query_list = [] condition_num = 0 fuzz_item = ['activity_geo'] multi_item = ['domain', 'topic_string'] simple_fuzz_item = ['uid', 'uname'] item_data = request.args.get('term', '') #print 'item_data:', item_data for item in simple_fuzz_item: if item_data: query_list.append({'wildcard': {item: '*' + item_data + '*'}}) condition_num += 1 if query_list: query.append({'bool': {'should': query_list}}) for item in fuzz_item: item_data = request.args.get(item, '') if item_data: query.append({'wildcard': {item: '*' + item_data + '*'}}) condition_num += 1 # custom_attribute tag_items = request.args.get('tag', '') if tag_items != '': tag_item_list = tag_items.split(',') for tag_item in tag_item_list: attribute_name_value = tag_item.split(':') attribute_name = attribute_name_value[0] attribute_value = attribute_name_value[1] field_key = submit_user + '-tag' if attribute_name and attribute_value: query.append({ 'wildcard': { field_key: '*' + attribute_name + '-' + attribute_value + '*' } }) condition_num += 1 for item in multi_item: nest_body = {} nest_body_list = [] item_data = request.args.get(item, '') if item_data: term_list = item_data.split(',') for term in term_list: nest_body_list.append({'wildcard': {item: '*' + term + '*'}}) condition_num += 1 query.append({'bool': {'should': nest_body_list}}) print query, '---------' size = 10 sort = '_score' #print 'query condition:', query if condition_num > 0: result = es_user_portrait.search(index=portrait_name, doc_type=portrait_type, \ body={'query':{'bool':{'must':query}}, 'sort':[{sort:{'order':'desc'}}], 'size':size},fields= ['uid'])['hits']['hits'] else: return 'no filter' result = es_user_portrait.search(index=portrait_name, doc_type=portrait_type, \ body={'query':{'match_all':{}}, 'sort':[{sort:{"order":"desc"}}], 'size':size}, fields= ['uid'])['hits']['hits'] id_list = [] for i in result: id_list.append(i['fields']['uid'][0]) layer = request.args.get('layer', '0') #0,1,2 result = advance_search_card(id_list, layer) #卡片 relation_list2 = [] relation_list2.extend(relation_list) relation_list2.extend(user_event_relation) relation_list2.extend(event_relation_list) relation_list2.extend(['group', 'special_event']) rel_type_str = ','.join(relation_list2) rel_type = request.args.get('rel_type', rel_type_str) #字符串,用逗号人物之间的把关系串联起来 relation_list2 = rel_type.split(',') print relation_list2, '----' graph_info = advance_search_graph(id_list, layer, relation_list2) return json.dumps({'card_info': result, 'graph_info': graph_info})
def get_node_id(start_node): input_id = [] for node in start_node: node_type = node['node_type'] if node_type == people_node: primary = people_primary neo_index = node_index_name elif node_type == org_node: primary = org_primary neo_index = org_index_name elif node_type == event_node: primary = event_primary neo_index = event_index_name elif node_type == special_event_node: primary = special_event_primary neo_index = special_event_index_name elif node_type == group_node: primary = group_primary neo_index = group_index_name if node['ids']: #输入或者上传id id_list = node['ids'] else: #属性搜索 # condition={'must/should/must_not':{'key1':'value1','key2':'value2'}} condition = node['conditions'] if node_type == people_node or node_type == org_node: #人,机构 if node_type == people_node: try: condition['must'].append( {'terms': { 'verify_type': peo_list }}) except: condition['must'] = [{ 'terms': { 'verify_type': peo_list } }] else: try: condition['must'].append( {'terms': { 'verify_type': org_list }}) except: condition['must'] = [{ 'terms': { 'verify_type': org_list } }] es = es_user_portrait es_index = portrait_index_name es_type = portrait_index_type if node_type == event_node: #事 es = es_event es_index = event_analysis_name es_type = event_type if node_type == group_node: #群体 es = es_group es_index = group_name es_type = group_type if node_type == special_event_node: #专题 es = es_special_event es_index = special_event_name es_type = special_event_type query_body = {'query': {'bool': condition}} print query_body result = es.search(index=es_index, doc_type=es_type, body=query_body)['hits']['hits'] id_list = [i['_id'] for i in result] #'node:node_type(primary=id_list)' print id_list for i in id_list: a = graph.run('start n=node:' + neo_index + '("' + primary + ':' + str(i) + '") return id(n)') for j in a: input_id.append(str(dict(j)['id(n)'])) # input_id.append(graph.run('start n=node:'+neo_index+'("'+primary+':'+str(i)+'") return id(n)')) return input_id
def search_related_u_card(item, submit_user, g_name): evaluate_max = get_evaluate_max() if g_name: g_name = g_name + '_' + submit_user g_name_pinyin = p.get_pinyin(g_name) g_name_pinyin = g_name_pinyin.lower() user_list_string = es_group.get(index=group_name, doc_type=group_type, id=g_name_pinyin,\ fields=['people']) uid_list = [] uid_list = user_list_string['fields']['people'][0].split('&') # print uid_list,'===========' else: uid_list = [] query_body = { "query": { 'bool': { 'should': [{ "wildcard": { 'keywords': '*' + str(item.encode('utf-8')) + '*' } }, { "wildcard": { 'uid': '*' + str(item.encode('utf-8')) + '*' } }, { "wildcard": { 'uname': '*' + str(item.encode('utf-8')) + '*' } }] } }, 'size': 1000 } try: user_result = es.search(index=portrait_index_name, doc_type=portrait_index_type, \ body=query_body, fields=['uid'])['hits']['hits'] except: return 'node does not exist' # print user_result search_uid = [] result = [] for i in user_result: i_fields = i['fields'] search_uid.append(i_fields['uid'][0]) show_id_set = set(search_uid) - set(uid_list) show_id = [i for i in show_id_set] if not show_id: return [] fields_list = [ 'uid', 'uname', 'location', 'influence', 'sensitive', 'activeness', 'keywords_string', 'function_mark' ] user_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body={'ids':show_id}, fields=fields_list)['docs'] for i in user_result: user = [] i_fields = i['fields'] for j in fields_list: if not i_fields.has_key(j): user.append('') continue if j == 'keywords': keywords = i_fields[j][0].split('&') keywords = keywords[:5] user.append(keywords) elif j == 'function_mark': tag = deal_user_tag(i_fields[j][0], submit_user)[0] user.append(tag) elif j in ['influence', 'sensitive', 'activeness']: user.append( math.log(i_fields[j][0] / (evaluate_max[j] * 9 + 1) + 1, 10) * 100) else: user.append(i_fields[j][0]) result.append(user) return result
def search_related_user(item): query_body = { "query": { 'bool': { 'should': [{ "wildcard": { 'uid': '*' + str(item.encode('utf-8')) + '*' } }, { "wildcard": { 'uname': '*' + str(item.encode('utf-8')) + '*' } }] } }, 'size': 10 } only_uid = [] user_uid_list = [] u_nodes_list = {} try: name_results = es_user_portrait.search(index=portrait_name, doc_type=portrait_type, \ body=query_body, fields=['uid','uname'])['hits']['hits'] # print name_results,'@@@@@@@@@@@@@@@@@' except: return 'node does not exist' if len(name_results) == 0: return 'node does not exist' for i in name_results: # print i uid = i['fields']['uid'][0] uname = i['fields']['uname'][0] only_uid.append(uid) u_nodes_list[uid] = uname user_uid_list.append([uid, uname]) print len(user_uid_list) e_nodes_list = {} user_relation = [] mid_uid_list = [] #存放第一层的数据,再以这些为起始点,扩展第二层 mid_eid_list = [] for uid_value in user_uid_list: c_string = 'START s0 = node:node_index(uid="' + str( uid_value[0]) + '") ' c_string += 'MATCH (s0)-[r1]-(s1) return s0,r1,s1 LIMIT 1' result = graph.run(c_string) # print list(result),'-----------------' for i in list(result): start_id = i['s0']['uid'] # # start_id = s0['uid'] relation1 = i['r1'].type() m_id = dict(i['s1']) if m_id.has_key('uid'): middle_id = m_id['uid'] mid_uid_list.append(middle_id) user_name = user_name_search(middle_id) # print middle_id,'2222222222222222222' u_nodes_list[str(middle_id)] = user_name user_relation.append([start_id, relation1, middle_id]) if m_id.has_key('envent_id'): middle_id = m_id['envent_id'] mid_eid_list.append(middle_id) event_name = event_name_search(middle_id) e_nodes_list[str(middle_id)] = event_name user_relation.append([start_id, relation1, middle_id]) print len(mid_uid_list) print len(mid_eid_list), '++++++++++++++++' for mid_uid in mid_uid_list: c_string = 'START s1 = node:node_index(uid="' + str(mid_uid) + '") ' c_string += 'MATCH (s1)-[r2]->(s2:User) return s1,r2,s2 LIMIT 5' # print c_string result = graph.run(c_string) for i in result: start_mid_id = i['s1']['uid'] relation2 = i['r2'].type() end_id = dict(i['s2']) if end_id.has_key('uid'): user_name = user_name_search(end_id['uid']) # print end_id['uid'],'333333333333333333333333' u_nodes_list[end_id['uid']] = user_name user_relation.append([start_mid_id, relation2, end_id['uid']]) if end_id.has_key('envent_id'): event_name = event_name_search(end_id['event_id']) e_nodes_list[end_id['event_id']] = event_name user_relation.append( [start_mid_id, relation2, end_id['envent_id']]) for mid_eid in mid_eid_list: c_string = 'START s1 = node:event_index(event="' + str(mid_eid) + '") ' c_string += 'MATCH (s1)-[r2]->(s2:User) return s1,r2,s2 LIMIT 3' event_result = graph.run(c_string) for i in event_result: relation2 = i['r2'].type() end_id = dict(i['s2']) if end_id.has_key('uid'): # print end_id['uid'],'44444444444444444444444' user_name = user_name_search(end_id['uid']) u_nodes_list[end_id['uid']] = user_name user_relation.append([mid_eid, relation2, end_id['uid']]) if end_id.has_key('envent_id'): event_name = event_name_search(end_id['event_id']) e_nodes_list[end_id['event_id']] = event_name user_relation.append([mid_eid, relation2, end_id['envent_id']]) return {'total_user':len(user_uid_list),'user_nodes':u_nodes_list,'event_nodes':e_nodes_list,\ 'relation':user_relation,'draw_nodes_length':len(u_nodes_list)}
def search_related_user_card(item, layer): # print item,'-------------' query_body = { "query": { 'bool': { 'should': [{ "wildcard": { 'uid': '*' + str(item.encode('utf-8')) + '*' } }, { "wildcard": { 'uname': '*' + str(item.encode('utf-8')) + '*' } }] } }, 'size': 100 } only_uid = [] user_uid_list = [] u_nodes_list = {} try: name_results = es_user_portrait.search(index=portrait_name, doc_type=portrait_type, \ body=query_body, fields=['uid','uname'])['hits']['hits'] # print name_results,'@@@@@@@@@@@@@@@@@' except: return 'node does not exist' if len(name_results) == 0: return 'node does not exist' for i in name_results: uid = i['fields']['uid'][0] uname = i['fields']['uname'][0] only_uid.append(uid) u_nodes_list[uid] = uname user_uid_list.append([uid, uname]) print len(user_uid_list), '=========' if layer == '1': for uid_value in user_uid_list: c_string = 'START s0 = node:node_index(uid="' + str( uid_value[0]) + '") ' c_string += 'MATCH (s0)-[r1]-(s1:User) return s0,r1,s1 LIMIT 100' result = graph.run(c_string) for i in list(result): m_id = dict(i['s1'])['uid'] only_uid.append(m_id) result_card = related_user_search(only_uid, 'activeness') if layer == '2': for uid_value in user_uid_list: c_string = 'START s0 = node:node_index(uid="' + str( uid_value[0]) + '") ' c_string += 'MATCH (s0)-[r1]-()-[r]-(s1:User) return s1 LIMIT 100' result = graph.run(c_string) for i in list(result): m_id = dict(i['s1'])['uid'] only_uid.append(m_id) result_card = related_user_search(only_uid, 'activeness') if layer == 'all': uid_list_all = [] result = search_related_user(item) uid_dict = result['user_nodes'] for k, v in uid_dict.iteritems(): uid_list_all.append(k) result_card = related_user_search(uid_list_all, 'activeness') return result_card