Exemplo n.º 1
0
def search_user(item):
    query_body = {
        "query": {
            'bool': {
                'should': [{
                    "wildcard": {
                        'uid': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'uname': '*' + str(item.encode('utf-8')) + '*'
                    }
                }]
            }
        },
        'size': 100
    }
    only_uid = []
    user_uid_list = []
    u_nodes_list = {}

    try:
        name_results = es_user_portrait.search(index=portrait_name, doc_type=portrait_type, \
                body=query_body, fields=['uid','uname'])['hits']['hits']
    except:
        return 'does not exist'
    for i in name_results:
        uid = i['fields']['uid'][0]
        uname = i['fields']['uname'][0]
        only_uid.append(uid)
        u_nodes_list[uid] = uname
        user_uid_list.append([uid, uname])
    return user_uid_list
Exemplo n.º 2
0
def group_tab_map(group_name, node_type, relation_type, layer):
    black_country = [u'美国', u'其他', u'法国', u'英国', u'中国', u'局域网']
    # black_country = [u'美国',u'其他',u'法国',u'英国']
    tab_graph_result = group_tab_graph(group_name, node_type, relation_type,
                                       layer)
    uid_list = [i for i in tab_graph_result['map_uid'] if str(i) != 'null']
    print uid_list
    query_body = {
        'filter': {
            'terms': {
                'uid': uid_list
            }
        },
        "aggs": {
            "all_location": {
                "terms": {
                    "field": "location",
                    "size": 400
                }
            }
        }
    }
    results = es_user_portrait.search(index=portrait_name, doc_type=portrait_type, \
                body=query_body)["aggregations"]["all_location"]["buckets"]

    location_dict = dict()
    for item in results:
        if item["key"] == "" or item["key"] == "unknown" or item[
                'key'] == u'其他':
            continue
        location_dict[item["key"]] = item["doc_count"]

    filter_location = dict()
    for k, v in location_dict.iteritems():
        tmp = k.split(' ')
        if u'北京' in k or u'天津' in k or u'上海' in k or u'重庆' in k or u'香港' in k or u'澳门' in k:
            try:
                filter_location[tmp[0]] += v
            except:
                filter_location[tmp[0]] = v
        elif len(tmp) == 1:
            continue
        else:
            if tmp[1] in black_country or u'国' in tmp[1]:
                continue
            try:
                filter_location[tmp[1]] += v
            except:
                filter_location[tmp[1]] = v

    return_results = sorted(filter_location.iteritems(),
                            key=lambda x: x[1],
                            reverse=True)
    return return_results[:500]
Exemplo n.º 3
0
def search_user_time_limit(item, field, start_ts, end_ts, editor):
    query_body = {
        "query": {
            # "uid":uid_list #-------------------!!!!!
            "bool": {
                "must": [{
                    "range": {
                        "submit_ts": {
                            "gte": start_ts,
                            "lte": end_ts
                        }
                    }
                }],
                'should': [{
                    "wildcard": {
                        'uid': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'uname': '*' + str(item.encode('utf-8')) + '*'
                    }
                }]
            }
        }
    }
    try:
        name_results = es.search(index=portrait_index_name, doc_type=portrait_index_type, \
                body=query_body, fields= field)['hits']['hits']
    except:
        return 'does not exist'

    result = []
    for i in name_results:
        event = []
        # if i['found'] == False:
        #     event.append(i['_id'])
        #     continue
        i_fields = i['fields']
        for j in field:
            if not i_fields.has_key(j):
                event.append('')
                continue
            if j == 'keywords_string':
                keywords = i_fields[j][0].split('&')
                keywords = keywords[:5]
                event.append(keywords)
            elif j == 'function_mark':
                tag = deal_editor_tag(i_fields[j][0], editor)[0]
                event.append(tag)
            else:
                event.append(i_fields[j][0])
        result.append(event)
    return result
Exemplo n.º 4
0
def get_evaluate_max():
    max_result = {}
    evaluate_index = ['importance', 'influence', 'activeness', 'sensitive']
    for evaluate in evaluate_index:
        query_body = {
            'query':{
                'match_all':{}
                },
            'size': 1,
            'sort': [{evaluate: {'order': 'desc'}}]
            }
        try:
            result = es.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)['hits']['hits']
        except Exception, e:
            raise e
        max_evaluate = result[0]['_source'][evaluate]
        max_result[evaluate] = max_evaluate
Exemplo n.º 5
0
def search_user(item, field, submit_user):
    query_body = {
        "query": {
            'bool': {
                'should': [{
                    "wildcard": {
                        'uid': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'uname': '*' + str(item.encode('utf-8')) + '*'
                    }
                }]
            }
        },
        'size': 10
    }
    only_uid = []
    user_uid_list = []

    try:
        name_results = es.search(index=portrait_index_name, doc_type=portrait_index_type, \
                body=query_body, fields= field)['hits']['hits']
    except:
        return 'does not exist'
    result = []
    for i in name_results:
        event = []
        i_fields = i['fields']
        for j in field:
            if not i_fields.has_key(j):
                event.append('')
                continue
            if j == 'keywords_string':
                keywords = i_fields[j][0].split('&')
                keywords = keywords[:5]
                event.append(keywords)
            elif j == 'function_mark':
                tag = deal_editor_tag(i_fields[j][0], editor)[0]
                event.append(tag)
            else:
                event.append(i_fields[j][0])
        result.append(event)
    return result
Exemplo n.º 6
0
def search_user(item, field):
    query_body = {
        "query": {
            'bool': {
                'should': [{
                    "wildcard": {
                        'uid': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'uname': '*' + str(item.encode('utf-8')) + '*'
                    }
                }]
            }
        },
        'size': 10
    }
    only_uid = []
    user_uid_list = []
    u_nodes_list = {}

    try:
        name_results = es.search(index=portrait_index_name, doc_type=portrait_index_type, \
                body=query_body, fields= field)['hits']['hits']
    except:
        return 'does not exist'

    for i in name_results:
        field_list = []
        for key in field:
            try:
                key1 = i['fields'][key][0]
            except:
                key1 = ''
            field_list.append(key1)
        user_uid_list.append(field_list)
    return user_uid_list
Exemplo n.º 7
0
def search_user_time_limit(item, field, start_ts, end_ts):
    query_body = {
        "query": {
            # "uid":uid_list #-------------------!!!!!
            "bool": {
                "must": [{
                    "range": {
                        "create_time": {
                            "gte": former_ts,
                            "lte": current_ts
                        }
                    }
                }]
            }
        }
    }
    only_uid = []
    user_uid_list = []
    u_nodes_list = {}

    try:
        name_results = es.search(index=portrait_index_name, doc_type=portrait_index_type, \
                body=query_body, fields= field)['hits']['hits']
    except:
        return 'does not exist'

    for i in name_results:
        field_list = []
        for key in field:
            try:
                key1 = i['fields'][key][0]
            except:
                key1 = ''
            field_list.append(key1)
        user_uid_list.append(field_list)
    return user_uid_list
Exemplo n.º 8
0
def search_advance_event():  #高级搜索,事件卡片和图谱
    result = {}
    query_data = {}
    query = []
    query_list = []
    condition_num = 0
    fuzz_item = ['event_type', 'e_location']
    # time_item = ['']
    # multi_item = ['event_type','topic_string']
    simple_fuzz_item = ['en_name', 'name', 'keywords']
    item_data = request.args.get('term', '')
    #print 'item_data:', item_data
    for item in simple_fuzz_item:
        if item_data:
            query_list.append({'wildcard': {item: '*' + item_data + '*'}})
            condition_num += 1
    if query_list:
        query.append({'bool': {'should': query_list}})
    for item in fuzz_item:
        item_data = request.args.get(item, '')
        if item_data:
            query.append({'wildcard': {item: '*' + item_data + '*'}})
            condition_num += 1
    # custom_attribute
    tag_items = request.args.get('tag', '')
    if tag_items != '':
        tag_item_list = tag_items.split(',')
        for tag_item in tag_item_list:
            attribute_name_value = tag_item.split(':')
            attribute_name = attribute_name_value[0]
            attribute_value = attribute_name_value[1]
            field_key = submit_user + '-tag'
            if attribute_name and attribute_value:
                query.append({
                    'wildcard': {
                        field_key:
                        '*' + attribute_name + '-' + attribute_value + '*'
                    }
                })
                condition_num += 1

    tag_items = request.args.get('tag', '')
    start_ts = request.args.get('start_ts', '')
    end_ts_o = int(time.time())
    print end_ts_o, 'end_ts_o'
    end_ts = request.args.get('end_ts', end_ts_o)
    if start_ts:
        query.append({'bool':{'must':[{'range':{'start_ts':{'gte':start_ts}}},\
                                    {'range':{'end_ts':{'lte':end_ts}}}]}})
        condition_num += 1
    # print query,'---------'

    size = 10
    sort = '_score'
    #print 'query condition:', query
    if condition_num > 0:
        result = es_user_portrait.search(index=event_name, doc_type=event_type, \
                    body={'query':{'bool':{'must':query}}, 'sort':[{sort:{'order':'desc'}}], 'size':size},fields= ['en_name'])['hits']['hits']
    else:
        return 'no filter'
        result = es_user_portrait.search(index=event_name, doc_type=event_type, \
                body={'query':{'match_all':{}}, 'sort':[{sort:{"order":"desc"}}], 'size':size}, fields= ['en_name'])['hits']['hits']
    id_list = []
    for i in result:
        id_list.append(i['fields']['en_name'][0])
    layer = request.args.get('layer', '1')  #1,2,all
    result = advance_search_card_e(id_list, layer)

    relation_list2 = []
    relation_list2.extend(relation_list)
    relation_list2.extend(user_event_relation)
    relation_list2.extend(event_relation_list)
    relation_list2.extend(['group', 'special_event'])
    rel_type_str = ','.join(relation_list2)
    rel_type = request.args.get('rel_type', rel_type_str)  #字符串,用逗号人物之间的把关系串联起来
    relation_list2 = rel_type.split(',')
    # graph_info = ''
    graph_info = advance_search_graph_e(id_list, layer, relation_list)
    return json.dumps({'card_info': result, 'graph_info': graph_info})
Exemplo n.º 9
0
def search_advance_user():  #高级搜索,卡片和图谱
    result = {}
    query_data = {}
    query = []
    query_list = []
    condition_num = 0
    fuzz_item = ['activity_geo']
    multi_item = ['domain', 'topic_string']
    simple_fuzz_item = ['uid', 'uname']
    item_data = request.args.get('term', '')
    #print 'item_data:', item_data
    for item in simple_fuzz_item:
        if item_data:
            query_list.append({'wildcard': {item: '*' + item_data + '*'}})
            condition_num += 1
    if query_list:
        query.append({'bool': {'should': query_list}})
    for item in fuzz_item:
        item_data = request.args.get(item, '')
        if item_data:
            query.append({'wildcard': {item: '*' + item_data + '*'}})
            condition_num += 1
    # custom_attribute
    tag_items = request.args.get('tag', '')
    if tag_items != '':
        tag_item_list = tag_items.split(',')
        for tag_item in tag_item_list:
            attribute_name_value = tag_item.split(':')
            attribute_name = attribute_name_value[0]
            attribute_value = attribute_name_value[1]
            field_key = submit_user + '-tag'
            if attribute_name and attribute_value:
                query.append({
                    'wildcard': {
                        field_key:
                        '*' + attribute_name + '-' + attribute_value + '*'
                    }
                })
                condition_num += 1

    for item in multi_item:
        nest_body = {}
        nest_body_list = []
        item_data = request.args.get(item, '')
        if item_data:
            term_list = item_data.split(',')
            for term in term_list:
                nest_body_list.append({'wildcard': {item: '*' + term + '*'}})
            condition_num += 1
            query.append({'bool': {'should': nest_body_list}})
    print query, '---------'

    size = 10
    sort = '_score'
    #print 'query condition:', query
    if condition_num > 0:
        result = es_user_portrait.search(index=portrait_name, doc_type=portrait_type, \
                    body={'query':{'bool':{'must':query}}, 'sort':[{sort:{'order':'desc'}}], 'size':size},fields= ['uid'])['hits']['hits']
    else:
        return 'no filter'
        result = es_user_portrait.search(index=portrait_name, doc_type=portrait_type, \
                body={'query':{'match_all':{}}, 'sort':[{sort:{"order":"desc"}}], 'size':size}, fields= ['uid'])['hits']['hits']
    id_list = []
    for i in result:
        id_list.append(i['fields']['uid'][0])

    layer = request.args.get('layer', '0')  #0,1,2

    result = advance_search_card(id_list, layer)  #卡片
    relation_list2 = []
    relation_list2.extend(relation_list)
    relation_list2.extend(user_event_relation)
    relation_list2.extend(event_relation_list)
    relation_list2.extend(['group', 'special_event'])
    rel_type_str = ','.join(relation_list2)
    rel_type = request.args.get('rel_type', rel_type_str)  #字符串,用逗号人物之间的把关系串联起来
    relation_list2 = rel_type.split(',')
    print relation_list2, '----'
    graph_info = advance_search_graph(id_list, layer, relation_list2)

    return json.dumps({'card_info': result, 'graph_info': graph_info})
Exemplo n.º 10
0
def get_node_id(start_node):
    input_id = []
    for node in start_node:
        node_type = node['node_type']
        if node_type == people_node:
            primary = people_primary
            neo_index = node_index_name
        elif node_type == org_node:
            primary = org_primary
            neo_index = org_index_name
        elif node_type == event_node:
            primary = event_primary
            neo_index = event_index_name
        elif node_type == special_event_node:
            primary = special_event_primary
            neo_index = special_event_index_name
        elif node_type == group_node:
            primary = group_primary
            neo_index = group_index_name
        if node['ids']:  #输入或者上传id
            id_list = node['ids']
        else:  #属性搜索
            # condition={'must/should/must_not':{'key1':'value1','key2':'value2'}}
            condition = node['conditions']
            if node_type == people_node or node_type == org_node:  #人,机构
                if node_type == people_node:
                    try:
                        condition['must'].append(
                            {'terms': {
                                'verify_type': peo_list
                            }})
                    except:
                        condition['must'] = [{
                            'terms': {
                                'verify_type': peo_list
                            }
                        }]
                else:
                    try:
                        condition['must'].append(
                            {'terms': {
                                'verify_type': org_list
                            }})
                    except:
                        condition['must'] = [{
                            'terms': {
                                'verify_type': org_list
                            }
                        }]
                es = es_user_portrait
                es_index = portrait_index_name
                es_type = portrait_index_type
            if node_type == event_node:  #事
                es = es_event
                es_index = event_analysis_name
                es_type = event_type
            if node_type == group_node:  #群体
                es = es_group
                es_index = group_name
                es_type = group_type
            if node_type == special_event_node:  #专题
                es = es_special_event
                es_index = special_event_name
                es_type = special_event_type

            query_body = {'query': {'bool': condition}}
            print query_body
            result = es.search(index=es_index,
                               doc_type=es_type,
                               body=query_body)['hits']['hits']
            id_list = [i['_id'] for i in result]
        #'node:node_type(primary=id_list)'
        print id_list
        for i in id_list:
            a = graph.run('start n=node:' + neo_index + '("' + primary + ':' +
                          str(i) + '") return id(n)')
            for j in a:
                input_id.append(str(dict(j)['id(n)']))
            # input_id.append(graph.run('start n=node:'+neo_index+'("'+primary+':'+str(i)+'") return id(n)'))
    return input_id
Exemplo n.º 11
0
def search_related_u_card(item, submit_user, g_name):
    evaluate_max = get_evaluate_max()
    if g_name:
        g_name = g_name + '_' + submit_user
        g_name_pinyin = p.get_pinyin(g_name)
        g_name_pinyin = g_name_pinyin.lower()
        user_list_string = es_group.get(index=group_name, doc_type=group_type, id=g_name_pinyin,\
                            fields=['people'])
        uid_list = []
        uid_list = user_list_string['fields']['people'][0].split('&')
        # print uid_list,'==========='
    else:
        uid_list = []

    query_body = {
        "query": {
            'bool': {
                'should': [{
                    "wildcard": {
                        'keywords': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'uid': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'uname': '*' + str(item.encode('utf-8')) + '*'
                    }
                }]
            }
        },
        'size': 1000
    }
    try:
        user_result = es.search(index=portrait_index_name, doc_type=portrait_index_type, \
                body=query_body, fields=['uid'])['hits']['hits']
    except:
        return 'node does not exist'
    # print user_result
    search_uid = []
    result = []
    for i in user_result:
        i_fields = i['fields']
        search_uid.append(i_fields['uid'][0])
    show_id_set = set(search_uid) - set(uid_list)
    show_id = [i for i in show_id_set]
    if not show_id:
        return []
    fields_list = [
        'uid', 'uname', 'location', 'influence', 'sensitive', 'activeness',
        'keywords_string', 'function_mark'
    ]
    user_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \
                body={'ids':show_id}, fields=fields_list)['docs']
    for i in user_result:
        user = []
        i_fields = i['fields']
        for j in fields_list:
            if not i_fields.has_key(j):
                user.append('')
                continue
            if j == 'keywords':
                keywords = i_fields[j][0].split('&')
                keywords = keywords[:5]
                user.append(keywords)
            elif j == 'function_mark':
                tag = deal_user_tag(i_fields[j][0], submit_user)[0]
                user.append(tag)
            elif j in ['influence', 'sensitive', 'activeness']:
                user.append(
                    math.log(i_fields[j][0] /
                             (evaluate_max[j] * 9 + 1) + 1, 10) * 100)
            else:
                user.append(i_fields[j][0])
        result.append(user)
    return result
Exemplo n.º 12
0
def search_related_user(item):
    query_body = {
        "query": {
            'bool': {
                'should': [{
                    "wildcard": {
                        'uid': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'uname': '*' + str(item.encode('utf-8')) + '*'
                    }
                }]
            }
        },
        'size': 10
    }
    only_uid = []
    user_uid_list = []
    u_nodes_list = {}

    try:
        name_results = es_user_portrait.search(index=portrait_name, doc_type=portrait_type, \
                body=query_body, fields=['uid','uname'])['hits']['hits']
        # print name_results,'@@@@@@@@@@@@@@@@@'
    except:
        return 'node does not exist'
    if len(name_results) == 0:
        return 'node does not exist'
    for i in name_results:
        # print i
        uid = i['fields']['uid'][0]
        uname = i['fields']['uname'][0]
        only_uid.append(uid)
        u_nodes_list[uid] = uname
        user_uid_list.append([uid, uname])
    print len(user_uid_list)
    e_nodes_list = {}
    user_relation = []
    mid_uid_list = []  #存放第一层的数据,再以这些为起始点,扩展第二层
    mid_eid_list = []
    for uid_value in user_uid_list:
        c_string = 'START s0 = node:node_index(uid="' + str(
            uid_value[0]) + '") '
        c_string += 'MATCH (s0)-[r1]-(s1) return s0,r1,s1 LIMIT 1'

        result = graph.run(c_string)
        # print list(result),'-----------------'
        for i in list(result):
            start_id = i['s0']['uid']
            # # start_id = s0['uid']
            relation1 = i['r1'].type()
            m_id = dict(i['s1'])
            if m_id.has_key('uid'):
                middle_id = m_id['uid']
                mid_uid_list.append(middle_id)
                user_name = user_name_search(middle_id)
                # print middle_id,'2222222222222222222'
                u_nodes_list[str(middle_id)] = user_name
                user_relation.append([start_id, relation1, middle_id])
            if m_id.has_key('envent_id'):
                middle_id = m_id['envent_id']
                mid_eid_list.append(middle_id)
                event_name = event_name_search(middle_id)
                e_nodes_list[str(middle_id)] = event_name
                user_relation.append([start_id, relation1, middle_id])
    print len(mid_uid_list)
    print len(mid_eid_list), '++++++++++++++++'
    for mid_uid in mid_uid_list:
        c_string = 'START s1 = node:node_index(uid="' + str(mid_uid) + '") '
        c_string += 'MATCH (s1)-[r2]->(s2:User) return s1,r2,s2 LIMIT 5'
        # print c_string
        result = graph.run(c_string)
        for i in result:
            start_mid_id = i['s1']['uid']
            relation2 = i['r2'].type()
            end_id = dict(i['s2'])
            if end_id.has_key('uid'):
                user_name = user_name_search(end_id['uid'])
                # print end_id['uid'],'333333333333333333333333'
                u_nodes_list[end_id['uid']] = user_name
                user_relation.append([start_mid_id, relation2, end_id['uid']])
            if end_id.has_key('envent_id'):
                event_name = event_name_search(end_id['event_id'])
                e_nodes_list[end_id['event_id']] = event_name
                user_relation.append(
                    [start_mid_id, relation2, end_id['envent_id']])
    for mid_eid in mid_eid_list:
        c_string = 'START s1 = node:event_index(event="' + str(mid_eid) + '") '
        c_string += 'MATCH (s1)-[r2]->(s2:User) return s1,r2,s2 LIMIT 3'
        event_result = graph.run(c_string)
        for i in event_result:
            relation2 = i['r2'].type()
            end_id = dict(i['s2'])
            if end_id.has_key('uid'):
                # print end_id['uid'],'44444444444444444444444'
                user_name = user_name_search(end_id['uid'])
                u_nodes_list[end_id['uid']] = user_name
                user_relation.append([mid_eid, relation2, end_id['uid']])
            if end_id.has_key('envent_id'):
                event_name = event_name_search(end_id['event_id'])
                e_nodes_list[end_id['event_id']] = event_name
                user_relation.append([mid_eid, relation2, end_id['envent_id']])
    return {'total_user':len(user_uid_list),'user_nodes':u_nodes_list,'event_nodes':e_nodes_list,\
            'relation':user_relation,'draw_nodes_length':len(u_nodes_list)}
Exemplo n.º 13
0
def search_related_user_card(item, layer):
    # print item,'-------------'
    query_body = {
        "query": {
            'bool': {
                'should': [{
                    "wildcard": {
                        'uid': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'uname': '*' + str(item.encode('utf-8')) + '*'
                    }
                }]
            }
        },
        'size': 100
    }
    only_uid = []
    user_uid_list = []
    u_nodes_list = {}

    try:
        name_results = es_user_portrait.search(index=portrait_name, doc_type=portrait_type, \
                body=query_body, fields=['uid','uname'])['hits']['hits']
        # print name_results,'@@@@@@@@@@@@@@@@@'
    except:
        return 'node does not exist'
    if len(name_results) == 0:
        return 'node does not exist'
    for i in name_results:
        uid = i['fields']['uid'][0]
        uname = i['fields']['uname'][0]
        only_uid.append(uid)
        u_nodes_list[uid] = uname
        user_uid_list.append([uid, uname])
    print len(user_uid_list), '========='
    if layer == '1':
        for uid_value in user_uid_list:
            c_string = 'START s0 = node:node_index(uid="' + str(
                uid_value[0]) + '") '
            c_string += 'MATCH (s0)-[r1]-(s1:User) return s0,r1,s1 LIMIT 100'
            result = graph.run(c_string)
            for i in list(result):
                m_id = dict(i['s1'])['uid']
                only_uid.append(m_id)
        result_card = related_user_search(only_uid, 'activeness')
    if layer == '2':
        for uid_value in user_uid_list:
            c_string = 'START s0 = node:node_index(uid="' + str(
                uid_value[0]) + '") '
            c_string += 'MATCH (s0)-[r1]-()-[r]-(s1:User) return s1 LIMIT 100'
            result = graph.run(c_string)
            for i in list(result):
                m_id = dict(i['s1'])['uid']
                only_uid.append(m_id)
        result_card = related_user_search(only_uid, 'activeness')

    if layer == 'all':
        uid_list_all = []
        result = search_related_user(item)
        uid_dict = result['user_nodes']
        for k, v in uid_dict.iteritems():
            uid_list_all.append(k)
        result_card = related_user_search(uid_list_all, 'activeness')

    return result_card