Пример #1
0
def bloom(root, path=None):
    graph_result = {'nodes': [], 'links': [], 'answerpath': [], 'answerlist': []}
    if root is None:
        return graph_result
    pointer = 0  # 待被查询的节点队列指针
    max_level = 3
    # 添加根节点
    entity_info = owlNeo4j.get_entity_info_by_id(root)
    new_node = {'id': 0, 'name': entity_info['name'], 'category': entity_info['label'], 'neoId': root, 'value': 0}
    graph_result['nodes'].append(new_node)
    # 广度递归后续节点,如果层数没有达到上限,并且还存在待被查询的节点,并且目前节点数小于100就继续
    while (len(graph_result['nodes']) > pointer) and (len(graph_result['nodes']) < 100) and (graph_result['nodes'][pointer]['value'] < max_level):
        related_entities = owlNeo4j.get_related_entities_by_id(graph_result['nodes'][pointer]['neoId'])
        for related_entity in related_entities:
            relation = related_entity['name']
            label = related_entity['target_label']
            name = related_entity['target_name']
            neoid = related_entity['target_neoId']
            # 如果这个节点已经存在,只加入新关系,不加入新节点
            flag_exist = False
            for i, node_exist in enumerate(graph_result['nodes']):
                if node_exist['neoId'] == neoid:
                    new_edge = {'id': len(graph_result['links']), 'source': pointer, 'target': i, 'level': graph_result['nodes'][pointer]['value']+1, 'name': relation}
                    graph_result['links'].append(new_edge)
                    flag_exist = True
                    break
            if flag_exist:
                continue
            # 如果这个节点之前不存在,加入新关系,加入新节点
            new_node = {'id': len(graph_result['nodes']), 'name': name, 'category': label, 'neoId': neoid, 'value': graph_result['nodes'][pointer]['value']+1}
            new_edge = {'id': len(graph_result['links']), 'source': pointer, 'target': new_node['id'], 'level': new_node['value'], 'name': relation}
            graph_result['nodes'].append(new_node)
            graph_result['links'].append(new_edge)
        pointer += 1
    graph_result['answerpath'].append(0)
    # 加入答案路径
    if path is not None:
        graph_result['answerlist'] = path
        for index, triple in enumerate(path):
            i = None
            j = None
            for index0, e0 in enumerate(graph_result['nodes']):
                if triple[0]['neoId'] == e0['neoId']:
                    i = index0
                if triple[2]['neoId'] == e0['neoId']:
                    j = index0
            if i is None:
                i = len(graph_result['nodes'])
                graph_result['nodes'].append({'id': i, 'name': triple[0]['name'], 'neoId': triple[0]['neoId'], 'value': 1,
                                              'category': triple[0]['label']})
            if j is None:
                j = len(graph_result['nodes'])
                graph_result['nodes'].append({'id': j, 'name': triple[2]['name'], 'neoId': triple[2]['neoId'], 'value': 1,
                                              'category': triple[2]['label']})
            graph_result['links'].append({'id': len(graph_result['links']), 'source': i, 'target': j, 'name': triple[1]})
            if i not in graph_result['answerpath']:
                graph_result['answerpath'].append(i)
            if j not in graph_result['answerpath']:
                graph_result['answerpath'].append(j)
    return graph_result
Пример #2
0
def entity_search(name=None, neoid=None, autopick=False):
    # 如果按id搜索,直接返回信息
    if neoid is not None:
        return owlNeo4j.get_entity_info_by_id(neoid)
    # 如果是按名搜索,返回同名实体列表
    else:
        results = owlNeo4j.get_entity_list_by_name(name)
        results_sorted = eneities_sort(results)
        if len(results_sorted) == 0:
            return []
        if autopick or (len(results_sorted) == 1):
            return results_sorted[0]
        else:
            return results_sorted
Пример #3
0
def entity(request):
    neoid = request.GET.get('id', None)
    # signal = request.GET.get('signal', None)
    # owlNeo4j.set_KB(signal)
    response = HttpResponse(
        json.dumps(owlNeo4j.get_entity_info_by_id(int(neoid)),
                   ensure_ascii=False))
    response["Access-Control-Allow-Origin"] = "*"
    return response


# def test11(request):
#     s = request.GET.get('str')
#     res = {
#         "code": 0,
#         "len": len(s)
#     }
#
#     response = HttpResponse(json.dumps(res))
#
#     return response
Пример #4
0
def automata(seg_list):
    threshold_1 = 0.6  # 向量相似度匹配的状态转移阈值
    threshold_2 = 0.15  # 关系预测匹配的状态转移阈值
    threshold_3 = 0.4  # 文本答案选择匹配的状态转移阈值
    states = [{
        'header': None,
        'tailer': None,
        'available_words': [],
        'path': [],
        'score': 0
    }]
    caches = {}
    for word in seg_list:
        new_states = []
        for state in states:
            state['available_words'].append(word)
            # 对于START状态
            if (state['header'] is None):
                entity_name = "".join(state['available_words'])
                same_name_entity_list = owlNeo4j.get_entity_list_by_name(
                    entity_name)
                for entity in same_name_entity_list:
                    new_states.append({
                        'header': entity,
                        'tailer': None,
                        'available_words': [],
                        'path': [],
                        'score': 1
                    })
            # 对于非START状态
            else:
                if state['tailer'] is None:
                    source = {
                        'name': state['header']['name'],
                        'label': state['header']['label'],
                        'neoId': state['header']['neoId']
                    }
                else:
                    source = state['tailer']
                if source['neoId'] is None:  # neoId is None 意味着路径走到了一个不可跳转的状态
                    continue
                if source['neoId'] not in caches:  # 整理这个实体的关系与属性集,加入到缓存中等待使用
                    caches[source['neoId']] = []
                    relations = owlNeo4j.get_related_entities_by_id(
                        source['neoId'])
                    for relation in relations:  # 添加关系
                        caches[source['neoId']].append(relation)
                    props = owlNeo4j.get_entity_info_by_id(source['neoId'])
                    for prop in props:  # 添加属性,如果已经有同名关系出现,则该属性不添加
                        if any(prop == relation['name']
                               for relation in caches[source['neoId']]):
                            continue
                        caches[source['neoId']].append({
                            'name':
                            prop,
                            'target_label':
                            '属性值',
                            'target_name':
                            props[prop],
                            'target_neoId':
                            None
                        })
                # 对于所有关系属性逐个进行相似度匹配, 大于阈值就进行状态转移
                link2state_map = {}
                for link in caches[source['neoId']]:
                    score = serviceWord2vec.get_similarity(
                        state['available_words'],
                        list(jieba.cut(link['name'])))
                    if score > threshold_1:
                        # 如果之前没添加过同名关系,直接进行状态转移,记录跳转路径
                        if link['name'] not in link2state_map:
                            new_path = [step for step in state['path']]
                            target = {
                                'name': link['target_name'],
                                'label': link['target_label'],
                                'neoId': link['target_neoId']
                            }
                            new_path.append([source, link['name'], target])
                            new_score = state['score'] * (1 + score -
                                                          threshold_1)
                            new_states.append({
                                'header': state['header'],
                                'tailer': target,
                                'available_words': [],
                                'path': new_path,
                                'score': new_score
                            })
                            link2state_map[link['name']] = len(new_states) - 1
                        # 如果之前已经添加过一个同名关系,说明此关系是多值类(比如:知名校友),直接把此关系追加到同名关系上
                        else:
                            state_num = link2state_map[link['name']]
                            new_tailer = new_states[state_num]['tailer'].copy()
                            new_tailer[
                                'neoId'] = None  # 如果此关系是多值类,则它不能再进行状态转移,所以把tailer neoId标记为None
                            new_states[state_num]['tailer'] = new_tailer
                            target = {
                                'name': link['target_name'],
                                'label': link['target_label'],
                                'neoId': link['target_neoId']
                            }
                            new_states[state_num]['path'].append(
                                [source, link['name'], target])
        states += new_states

    # 选择获取最高评分的那些路径
    max_states = []
    for state in states:
        if (state['header'] is not None):
            if (max_states == []) or (state['score'] > max_states[0]['score']):
                max_states = [state]
            elif (state['score'] == max_states[0]['score']):
                if (state['score']
                        == 1) and (len(state['available_words']) < len(
                            max_states[0]['available_words'])
                                   ):  # 在只识别到了实体的状态里,优先选择最长匹配到的实体
                    max_states = [state]
                else:
                    max_states.append(state)
    # 再对状态里的中心实体根据实体知名度进行排序
    entities = [
        state['header'] for state in max_states if state['header'] is not None
    ]
    entities = serviceKG.eneities_sort(entities)
    # 如果只识别到实体,则返回实体列表,否则返回最优路径
    if (max_states == []) or (max_states[0]['score'] == 0):
        return {'ents': entities, 'path': []}
    else:
        paths = [
            state['path'] for state in max_states
            if state['header'] == entities[0]
        ]
        return {'ents': [entities[0]], 'path': paths[0]}
Пример #5
0
def automata(seg_list):
    threshold_1 = 0.5  # 向量相似度匹配的状态转移阈值
    threshold_2 = 0.15  # 关系预测匹配的状态转移阈值
    threshold_3 = 0.4  # 文本答案选择匹配的状态转移阈值
    states = [{
        'header': None,
        'tailer': None,
        'available_words': [],
        'path': [],
        'score': 0
    }]
    caches = {}
    for word in seg_list:
        new_states = []
        for state in states:
            state['available_words'].append(word)
            # 对于START状态
            if (state['header'] is None):
                entity_name = "".join(state['available_words'])
                same_name_entity_list = owlNeo4j.get_entity_list_by_name(
                    entity_name)
                for entity in same_name_entity_list:
                    new_states.append({
                        'header': entity,
                        'tailer': None,
                        'available_words': [],
                        'path': [],
                        'score': 1
                    })
            # 对于非START状态
            else:
                if state['tailer'] is None:
                    source = {
                        'name': state['header']['name'],
                        'label': state['header']['label'],
                        'neoId': state['header']['neoId']
                    }
                else:
                    source = state['tailer']
                if source['neoId'] is None:  # neoId is None 意味着路径走到了一个不可跳转的状态
                    continue
                if source['neoId'] not in caches:  # 整理这个实体的关系与属性集,加入到缓存中等待使用
                    caches[source['neoId']] = []
                    relations = owlNeo4j.get_related_entities_by_id(
                        source['neoId'])
                    for relation in relations:  # 添加关系
                        caches[source['neoId']].append(relation)
                    props = owlNeo4j.get_entity_info_by_id(source['neoId'])
                    for prop in props:  # 添加属性,如果已经有同名关系出现,则该属性不添加
                        if any(prop == relation['name']
                               for relation in caches[source['neoId']]):
                            continue
                        caches[source['neoId']].append({
                            'name':
                            prop,
                            'target_label':
                            '属性值',
                            'target_name':
                            props[prop],
                            'target_neoId':
                            None
                        })
                # 对于所有关系属性逐个进行相似度匹配, 大于阈值就进行状态转移
                link2state_map = {}
                for link in caches[source['neoId']]:
                    score = serviceWord2vec.get_similarity(
                        state['available_words'],
                        list(jieba.cut(link['name'])))
                    if score > threshold_1:
                        # 如果之前没添加过同名关系,直接进行状态转移,记录跳转路径
                        if link['name'] not in link2state_map:
                            new_path = [step for step in state['path']]
                            target = {
                                'name': link['target_name'],
                                'label': link['target_label'],
                                'neoId': link['target_neoId']
                            }
                            new_path.append([source, link['name'], target])
                            new_score = state['score'] * (1 + score -
                                                          threshold_1)
                            new_states.append({
                                'header': state['header'],
                                'tailer': target,
                                'available_words': [],
                                'path': new_path,
                                'score': new_score
                            })
                            link2state_map[link['name']] = len(new_states) - 1
                        # 如果之前已经添加过一个同名关系,说明此关系是多值类(比如:知名校友),直接把此关系追加到同名关系上
                        else:
                            state_num = link2state_map[link['name']]
                            new_tailer = new_states[state_num]['tailer'].copy()
                            new_tailer[
                                'neoId'] = None  # 如果此关系是多值类,则它不能再进行状态转移,所以把tailer neoId标记为None
                            new_states[state_num]['tailer'] = new_tailer
                            target = {
                                'name': link['target_name'],
                                'label': link['target_label'],
                                'neoId': link['target_neoId']
                            }
                            new_states[state_num]['path'].append(
                                [source, link['name'], target])
        states += new_states

    # 如果没有找到答案,则使用关系预测方法
    if all(state['path'] == [] for state in states):
        relation_p = None
        for state in states:
            if (state['header']
                    is not None) and (state['available_words'] != []):
                source = {
                    'name': state['header']['name'],
                    'label': state['header']['label'],
                    'neoId': state['header']['neoId']
                }
                if relation_p is None:
                    question = '_' + ''.join(state['available_words'])
                    res = owlSubServers.relation_predict(question)
                    if res is None:
                        break
                    relation_p = res['answer']
                    point_predicted = res['point']
                    if point_predicted < threshold_2:
                        break
                # 对于所有关系属性逐个进行相似度匹配, 大于阈值就进行状态转移
                for link in caches[source['neoId']]:
                    score = serviceWord2vec.get_similarity(
                        list(jieba.cut(relation_p)),
                        list(jieba.cut(link['name'])))
                    if score > threshold_1:
                        new_path = [step for step in state['path']]
                        target = {
                            'name': link['target_name'],
                            'label': link['target_label'],
                            'neoId': link['target_neoId']
                        }
                        new_path.append([source, link['name'], target])
                        new_score = state['score'] * (1 + score - threshold_1)
                        states.append({
                            'header': state['header'],
                            'tailer': target,
                            'available_words': [],
                            'path': new_path,
                            'score': new_score
                        })

    # 选择标注了头实体的状态,提取头实体的简介,从文本中选择答案
    if all(state['path'] == [] for state in states):
        for state in states:
            if (state['header']
                    is not None) and (state['available_words'] != []):
                description = state['header']['description']
                res = owlSubServers.answer_selection(str(''.join(seg_list)),
                                                     str(description))
                if res is None:
                    break
                answer = res['answer']
                point = float(res['point'])
                if point > threshold_3:
                    abstract = answer if len(
                        answer) < 10 else answer[:8] + '...'
                    new_path = [step for step in state['path']]
                    source = {
                        'name': state['header']['name'],
                        'label': state['header']['label'],
                        'neoId': state['header']['neoId']
                    }
                    target = {
                        'name': abstract,
                        'label': '实体描述文本',
                        'neoId': None,
                        'ans_from_desc': answer
                    }
                    new_path.append([source, 'description', target])
                    new_score = state['score'] + 0.00001
                    states.append({
                        'header': state['header'],
                        'tailer': target,
                        'available_words': [],
                        'path': new_path,
                        'score': new_score
                    })

    # 选择获取最高评分的那些路径
    max_states = []
    for state in states:
        if (state['header'] is not None):
            if (max_states == []) or (state['score'] > max_states[0]['score']):
                max_states = [state]
            elif (state['score'] == max_states[0]['score']):
                if (state['score']
                        == 1) and (len(state['available_words']) < len(
                            max_states[0]['available_words'])
                                   ):  # 在只识别到了实体的状态里,优先选择最长匹配到的实体
                    max_states = [state]
                else:
                    max_states.append(state)
    # 再对状态里的中心实体根据实体知名度进行排序
    entities = [
        state['header'] for state in max_states if state['header'] is not None
    ]
    entities = serviceKG.eneities_sort(entities)
    # 如果只识别到实体,则返回实体列表,否则返回最优路径
    if (max_states == []) or (max_states[0]['score'] == 1):
        return {'ents': entities, 'path': []}
    else:
        paths = [
            state['path'] for state in max_states
            if state['header'] == entities[0]
        ]
        return {'ents': [entities[0]], 'path': paths[0]}
Пример #6
0
def bloom(root, path=None, question=None):
    if root == '1':  # root为'1'代表该类问题为比较性问题(注意是字符串的'1',不是数字1,root可是能数字1,表示neoId=1的节点)
        result = serviceQA.autocom(question)
        if result != 0:
            return result
    if root == '2':  # root为'2'代表按类别查询
        result = serviceQA.autocate(question)
        if result != 0:
            return result
    if root == '3':  # root为'3'代表按关系查询
        return serviceQA.autolink(question)
    if root == '4':  # root为'4'代表归纳类问题
        result = serviceQA.autoinduce(question)
        if result != 0:
            return result
    if root == '5':  # root为5代表检索型问题
        result = serviceQA.autoret(question)
        if result != 0:
            return result
    if root == '6':  # root为6代表全文信息检索型问题
        result = {'nodes': [], 'links': [], 'answerpath': [], 'answerlist': []}
        source = path[0]
        relation = path[1]
        target = path[2]
        new_node1 = {
            'id': 0,
            'name': source['name'],
            'category': source['category'],
            'neoId': None,
            'content': source['问题'],
            'value': 0
        }
        # new_node2 = {'id': 1, 'name': target['name'], 'category': target['category'], 'neoId': None, 'content': target['ans_from_desc'], 'value': 1}
        # new_edge1 = {'id': 0, 'name': relation, 'level': 1, 'source': 0, 'target': 1}
        result['nodes'].append(new_node1)
        # result['nodes'].append(new_node2)
        # result['links'].append(new_edge1)
        return result
    graph_result = {
        'nodes': [],
        'links': [],
        'answerpath': [],
        'answerlist': []
    }

    if root is None:
        return graph_result
    pointer = 0  # 待被查询的节点队列指针,指向graph_result中结点
    max_level = 2
    # 添加根节点
    entity_info = owlNeo4j.get_entity_info_by_id(root)
    new_node = {
        'id': 0,
        'name': entity_info['name'],
        'category': entity_info['category'],
        'neoId': root,
        'value': 0
    }
    graph_result['nodes'].append(new_node)
    # 广度递归后续节点,如果层数没有达到上限,并且还存在待被查询的节点,并且目前节点数小于100就继续
    while (len(graph_result['nodes']) > pointer) and (len(
            graph_result['nodes']) < 100) and (
                graph_result['nodes'][pointer]['value'] < max_level):
        max_related_entities_num = 9  # if pointer != 0 else None
        related_entities = owlNeo4j.get_twoway_related_entities_by_id(
            graph_result['nodes'][pointer]['neoId'],
            max_num=max_related_entities_num)
        for related_entity in related_entities:
            relation = related_entity['name']
            category = related_entity['target_category']
            name = related_entity['target_name']
            neoid = related_entity['target_neoId']
            # 如果这个节点之前不存在,加入新节点
            id_target = None
            for node_exist in graph_result['nodes']:
                if node_exist['neoId'] == neoid:
                    id_target = node_exist['id']
                    break
            if id_target is None:  #如果是新节点
                new_node = {
                    'id': len(graph_result['nodes']),
                    'name': name,
                    'category': category,
                    'neoId': neoid,
                    'value': graph_result['nodes'][pointer]['value'] + 1
                }  #从0开始重新取id
                graph_result['nodes'].append(new_node)
                id_target = new_node['id']
            # 如果目前与当前实体还没有关系,加入新关系
            if not any(
                ((link_exist['source'] == pointer and link_exist['target'] ==
                  id_target) or (link_exist['source'] == id_target
                                 and link_exist['target'] == pointer))
                    for link_exist in graph_result['links']):
                new_edge = {
                    'id': len(graph_result['links']),
                    'name': relation,
                    'level': graph_result['nodes'][pointer]['value'] + 1
                }
                if related_entity[
                        'positive']:  # positive为True表示正向关系,positive为False表示反向关系
                    new_edge['source'] = pointer
                    new_edge['target'] = id_target
                else:
                    new_edge['source'] = id_target
                    new_edge['target'] = pointer
                graph_result['links'].append(new_edge)
        pointer += 1
    graph_result['answerpath'].append(0)
    # 加入答案路径
    if path is not None:
        graph_result['answerlist'] = path
        for index, triple in enumerate(path):  # index表示索引,triple表示三元组
            i = None
            j = None
            for index0, e0 in enumerate(graph_result['nodes']):
                if triple[0]['neoId'] == e0[
                        'neoId']:  #triple[0]['neoId']表示source结点
                    i = index0
                if triple[2]['neoId'] == e0[
                        'neoId']:  #triple[2]['neoId']表示target结点
                    j = index0
            if i is None:
                i = len(graph_result['nodes'])
                graph_result['nodes'].append({
                    'id': i,
                    'name': triple[0]['name'],
                    'neoId': triple[0]['neoId'],
                    'value': 1,
                    'category': triple[0]['category']
                })
            if j is None:
                j = len(graph_result['nodes'])
                graph_result['nodes'].append({
                    'id': j,
                    'name': triple[2]['name'],
                    'neoId': triple[2]['neoId'],
                    'value': 1,
                    'category': triple[2]['category']
                })
            graph_result['links'].append({
                'id': len(graph_result['links']),
                'source': i,
                'target': j,
                'name': triple[1]
            })
            if i not in graph_result['answerpath']:
                graph_result['answerpath'].append(i)
            if j not in graph_result['answerpath']:
                graph_result['answerpath'].append(j)
    return graph_result