Пример #1
0
    def __find_paths(self, graph, entity_items, relation_items, edges, output_paths=Paths(), used_edges=set()):
        new_output_paths = Paths([])

        if len(relation_items) == 0:
            if len(entity_items) > 0:
                return Paths()
            return output_paths

        used_relations = []
        for relation_item in relation_items:
            for relation in relation_item.uris:
                used_relations = used_relations + [relation]
                for edge in self.find_edges(edges, relation, used_edges):
                    entities = MyList()
                    if not (edge.source_node.are_all_uris_generic() or edge.uri.is_type()):
                        entities.extend(edge.source_node.uris)
                    if not (edge.dest_node.are_all_uris_generic() or edge.uri.is_type()):
                        entities.extend(edge.dest_node.uris)
                    new_paths = self.__find_paths(graph,
                                                  entity_items - LinkedItem.list_contains_uris(entity_items, entities),
                                                  relation_items - LinkedItem.list_contains_uris(relation_items,
                                                                                                 used_relations),
                                                  edges - {edge},
                                                  output_paths=output_paths.extend(edge),
                                                  used_edges=used_edges | set([edge]))
                    new_output_paths.add(new_paths, lambda path: len(path) >= len(graph.relation_items))

        return new_output_paths
Пример #2
0
    def __find_paths_start_with_entities(self,
                                         graph,
                                         entity_items,
                                         relation_items,
                                         edges,
                                         output_paths=Paths(),
                                         used_edges=set()):
        new_output_paths = Paths([])
        for entity_item in entity_items:
            for entity in entity_item.uris:
                for edge in self.find_edges_by_entity(edges, entity,
                                                      used_edges):
                    if not edge.uri.is_type():
                        used_relations = [edge.uri]
                    else:
                        used_relations = edge.dest_node.uris
                    entities = MyList()
                    if not (edge.source_node.are_all_uris_generic()
                            or edge.uri.is_type()):
                        entities.extend(edge.source_node.uris)
                    if not (edge.dest_node.are_all_uris_generic()
                            or edge.uri.is_type()):
                        entities.extend(edge.dest_node.uris)

                    entity_use = entity_items - LinkedItem.list_contains_uris(
                        entity_items, entities)
                    relation_use = relation_items - LinkedItem.list_contains_uris(
                        relation_items, used_relations)
                    edge_use = edges - {edge}

                    new_paths = self.__find_paths(
                        graph,
                        entity_use,
                        relation_use,
                        edge_use,
                        output_paths=output_paths.extend(edge),
                        used_edges=used_edges | set([edge]))
                    # new_paths = self.__find_paths(graph,
                    #                               entity_items - LinkedItem.list_contains_uris(entity_items, entities),
                    #                               relation_items - LinkedItem.list_contains_uris(relation_items,
                    #                                                                              used_relations),
                    #                               edges - {edge},
                    #                               output_paths=output_paths.extend(edge),
                    #                               used_edges=used_edges | set([edge]))
                    new_output_paths.add(
                        new_paths,
                        lambda path: len(path) >= len(graph.relation_items))
        return new_output_paths
Пример #3
0
    def __find_paths_start_with_entities(self, graph, entity_items, relation_items, edges, output_paths=Paths(),
                                         used_edges=set()):
        new_output_paths = Paths([])
        for entity_item in entity_items:
            unavailable_edges = self.find_edges_by_entities(edges, entity_item)
            available_edges = edges - unavailable_edges
            available_entity_items = entity_items - [entity_item]
            for entity in entity_item.uris:
                for edge in self.find_edges_by_entity(edges, entity, used_edges):
                    if not edge.uri.is_type():
                        used_relations = [edge.uri]
                    else:
                        used_relations = edge.dest_node.uris

                    unavailable_relations = LinkedItem.list_contains_uris(relation_items, used_relations)
                    for unavailable_relation in unavailable_relations:
                        available_relations = relation_items-MyList([unavailable_relation])
                        new_paths = self.__find_paths(graph,
                                                      available_entity_items,
                                                      # entity_items - LinkedItem.list_contains_uris(entity_items, entities),
                                                      available_relations,
                                                      edges - {edge},  # available_edges,  # ,
                                                      output_paths=output_paths.extend(edge),
                                                      used_edges=used_edges | set([edge]))
                        new_output_paths.add(new_paths, lambda path: len(path) >= len(graph.relation_items))
        return new_output_paths
Пример #4
0
def query():
    question = request.json['question']
    raw_entities = request.json['entities']
    raw_relations = request.json['relations']

    entities = []
    for item in raw_entities:
        uris = [Uri(uri["uri"], DBpedia.parse_uri, uri["confidence"]) for uri in item["uris"]]
        entities.append(LinkedItem(item["surface"], uris))

    relations = []
    for item in raw_relations:
        uris = [Uri(uri["uri"], DBpedia.parse_uri, uri["confidence"]) for uri in item["uris"]]        
        relations.append(LinkedItem(item["surface"], uris))

    question_type, type_confidence = get_question_type(question)

    count_query = False
    ask_query = False
    if question_type == 2:
        count_query = True
    elif question_type == 1:
        ask_query = True

    generated_queries = []
    combination_mode = True
    if combination_mode:
        combination_list = create_entity_relations_combinations(entities, relations)
        for comb in combination_list:
            # why did Sylvia Liang just generates query if none exists?
            if len(generated_queries) == 0:
                generated_queries.extend(generate_query(question, question_type, comb[0], comb[1], count_query, ask_query))
    else:
        generated_queries = generate_query(question, question_type, entities, relations, count_query, ask_query)
    
    queries = postprocess(generated_queries, count_query, ask_query)

    result = {
        "queries": queries,
        "type": get_question_type_text(question_type),
        "type_confidence": type_confidence
    }

    return jsonify(result)
Пример #5
0
 def __parse(self, dataset, name, top):
     output = []
     for item in dataset[name]:
         uris = []
         for uri in item["uris"]:
             uris.append(Uri(uri["uri"], self.parser, uri["confidence"]))
         start_index, length = item["surface"]
         surface = dataset["question"][start_index:start_index + length]
         output.append(LinkedItem(surface, uris[:top]))
     return output
Пример #6
0
    def do(self, qapair, force_gold=False, top=5):
        entities = []
        relations = []
        for u in qapair.sparql.uris:
            question = qapair.question.text
            mentions = find_mentions(question, [u])
            surface = ""
            if len(mentions) > 0:
                surface = question[mentions[0]["start"]:mentions[0]["end"]]

            linked_item = LinkedItem(surface, [u])
            if u.is_entity():
                entities.append(linked_item)
            if u.is_ontology():
                relations.append(linked_item)

        return entities, relations
Пример #7
0
    o = Orchestrator(None, question_type_classifier, None, parser, True)
    raw_entities = [{
        "surface":
        "",
        "uris": [{
            "confidence": 1,
            "uri": "http://dbpedia.org/resource/Bill_Finger"
        }]
    }]
    entities = []
    for item in raw_entities:
        uris = [
            Uri(uri["uri"], kb.parse_uri, uri["confidence"])
            for uri in item["uris"]
        ]
        entities.append(LinkedItem(item["surface"], uris))

    raw_relations = [{
        "surface":
        "",
        "uris": [{
            "confidence": 1,
            "uri": "http://dbpedia.org/ontology/creator"
        }]
    }, {
        "surface":
        "",
        "uris": [{
            "confidence": 1,
            "uri": "http://dbpedia.org/ontology/ComicsCharacter"
        }]
Пример #8
0
def generate_query():
    if not flask.request.json:
        flask.abort(400)

    question = flask.request.json['question']
    force_count_query = flask.request.json[
        'force_count'] if 'force_count' in flask.request.json else False
    force_bool_query = flask.request.json[
        'force_bool'] if 'force_bool' in flask.request.json else False
    force_list_query = flask.request.json[
        'force_list'] if 'force_list' in flask.request.json else False
    raw_entities = flask.request.json['entities']
    raw_relations = flask.request.json['relations']
    h1_threshold = int(flask.request.json['h1_threshold']
                       ) if 'h1_threshold' in flask.request.json else 9999999
    timeout_threshold = int(flask.request.json['timeout']
                            ) if 'timeout' in flask.request.json else 9999999
    use_cache = bool(flask.request.json['use_cache']
                     ) if 'use_cache' in flask.request.json else True

    hash_key = hash(
        (str(question) + str(raw_entities) + str(raw_relations) +
         str(h1_threshold) + str(force_count_query) + str(force_bool_query) +
         str(force_list_query)).encode('utf-8'))

    if use_cache and hash_key in hash_list:
        return flask.jsonify(hash_list[hash_key]), 201

    logger.info(question)
    entities = []
    for item in raw_entities:
        uris = [
            Uri(uri["uri"], kb.parse_uri, uri["confidence"])
            for uri in item["uris"]
        ]
        entities.append(LinkedItem(item["surface"], uris))

    relations = []
    for item in raw_relations:
        uris = [
            Uri(uri["uri"], kb.parse_uri, uri["confidence"])
            for uri in item["uris"]
        ]
        relations.append(LinkedItem(item["surface"], uris))

    try:
        if len(entities) == 0 or len(relations) == 0:
            raise Exception('Wrong number of input entity/relation!')
        with timeout(timeout_threshold):
            question_type = None
            if force_list_query:
                question_type = 0
            elif force_bool_query:
                question_type = 1
            elif force_count_query:
                question_type = 2

            queries, question_type, type_confidence = queryBuilder.generate_query(
                question, entities, relations, h1_threshold, question_type)
            question_type_str = "list"
            ask_query = False
            count_query = False
            if question_type == 2:
                question_type_str = "count"
                count_query = True
            elif question_type == 1:
                question_type_str = "boolean"
                ask_query = True

            queries = [{
                "query":
                kb.sparql_query(item["where"],
                                "?u_" + str(item["suggested_id"]), count_query,
                                ask_query),
                "confidence":
                item["confidence"]
            } for item in queries]

            result = {
                'queries': queries,
                'type': question_type_str,
                'type_confidence': type_confidence
            }
            if use_cache:
                hash_list[hash_key] = result
                hash_list.save(hash_file)
            return flask.jsonify(result), 201
    except RuntimeError as expt:
        logger.error(expt)
        return flask.jsonify({'error': str(expt)}), 408
    except Exception as expt:
        logger.error(expt)
        return flask.jsonify({'error': str(expt)}), 422
Пример #9
0
def generate_query():
    if not flask.request.json:
        flask.abort(400)

    question = flask.request.json['question']
    raw_entities = flask.request.json['entities']
    raw_relations = flask.request.json['relations']
    h1_threshold = int(flask.request.json['h1_threshold']
                       ) if 'h1_threshold' in flask.request.json else 9999999
    timeout_threshold = int(flask.request.json['timeout']
                            ) if 'timeout' in flask.request.json else 9999999
    use_cache = bool(flask.request.json['use_cache']
                     ) if 'use_cache' in flask.request.json else True

    hash_key = hash(
        str(question) + str(raw_entities) + str(raw_relations) +
        str(h1_threshold))
    if use_cache and hash_key in hash_list:
        return flask.jsonify(hash_list[hash_key]), 201

    logger.info(question)
    entities = []
    for item in raw_entities:
        uris = [
            Uri(uri["uri"], kb.parse_uri, uri["confidence"])
            for uri in item["uris"]
        ]
        entities.append(LinkedItem(item["surface"], uris))

    relations = []
    for item in raw_relations:
        uris = [
            Uri(uri["uri"], kb.parse_uri, uri["confidence"])
            for uri in item["uris"]
        ]
        relations.append(LinkedItem(item["surface"], uris))

    try:
        with timeout(timeout_threshold):
            queries, question_type = queryBuilder.generate_query(
                question, entities, relations, h1_threshold)
            question_type_str = "list"
            ask_query = False
            count_query = False
            if question_type == 2:
                question_type_str = "count"
                count_query = True
            elif question_type == 1:
                question_type_str = "boolean"
                ask_query = True

            queries = [{
                "query":
                kb.sparql_query(item["where"],
                                "?u_" + str(item["suggested_id"]), count_query,
                                ask_query),
                "confidence":
                item["confidence"]
            } for item in queries]

            result = {'queries': queries, 'type': question_type_str}
            if use_cache:
                hash_list[hash_key] = result
                hash_list.save(hash_file)
            return flask.jsonify(result), 201
    except:
        return flask.jsonify({}), 408