Пример #1
0
    def __call__(self, text, subject_begin, subject_end, object_begin, object_end, **kwargs):
        if subject_begin < object_begin:
            text_between_begin = subject_end
            text_between_end = object_begin
            is_reversed = False
        else:
            text_between_begin = object_end
            text_between_end = subject_begin
            is_reversed = True

        info = run_task(Task.SPACY_PROCESS, text)
        tokens = info['tokens']
        idx = info['idx']
        pos = info['pos']

        between_token_indexes = self.__tokens_in_range(tokens, text_between_begin, text_between_end, idx)
        between_token_indexes = list(filter(lambda i: pos[i] != 'PUNCT', between_token_indexes))
        
        between_tokens = [tokens[i] for i in between_token_indexes]
        between_pos    = [pos[i]    for i in between_token_indexes]
        
        between_text = ' '.join(between_tokens)

        candidates = self.pattern_trie.get_value(between_tokens, between_pos)
        ontology_candidates = [DBPEDIA_ONTOLOGY_PREFIX + candidate for candidate in candidates]
        property_candidates = [DBPEDIA_PROPERTY_PREFIX + candidate for candidate in candidates]
        candidates = ontology_candidates + property_candidates
        candidates_vs_confidence = [(candidate, 2/len(candidates)) for candidate in candidates]

        return candidates_vs_confidence
Пример #2
0
 def __map_type(self, node: QueryTree.Node) -> None:
     type_begin, type_end = self.tree.offset_for_node(node)
     node.kb_resources = run_task(
         Task.MAP_TYPE, {
             'text': self.question_text,
             'type_begin': type_begin,
             'type_end': type_end
         })
Пример #3
0
    def __map_entity(self, node: QueryTree.Node) -> None:
        node.children = list(
            filter(
                lambda x: x.type == NodeType.TOKEN,
                node.children))  # TODO: handle type constraints for entities
        entity_begin, entity_end = self.tree.offset_for_node(node)
        node.kb_resources = run_task(
            Task.MAP_ENTITY, {
                'text': self.question_text,
                'entity_begin': entity_begin,
                'entity_end': entity_end
            })

        if not node.kb_resources:
            node.type = NodeType.LITERAL
Пример #4
0
    def __call__(self, query_text: str) -> List[dict]:

        tokens = run_task(Task.TOKENIZE, query_text)
        self.syntax_validator = SyntaxChecker(GRAMMAR_FILE_PATH)
        self.__prepare_input(tokens)
        self.__run_ncrfpp()
        candidates = self.__decode_labels(tokens)

        # Statistically parsed trees might not validate the grammar. Discard invalid trees
        candidates = list(
            filter(lambda tree: self.syntax_validator.validate(tree),
                   candidates))
        candidates = list(
            map(
                lambda tree: tree.to_serializable(
                    SerializationFormat.HIERARCHICAL_DICT), candidates))
        #print('Produced {}/{} valid candidates!'.format(len(candidates), TREE_CANDIDATES_N_BEST))

        return candidates
Пример #5
0
def init(index, text):
    state['tokens'] = tokens = run_task(Task.TOKENIZE, text.strip())
    state['example_index'] = index

    canvas.bind(
        "<Button-1>",
        lambda event: user_create_node(event.x, event.y, state['node_type']))
    canvas.bind("<Key>", on_key)

    for index, token in enumerate(tokens):
        create_node(x=settings['window_width'] * (index / len(tokens)),
                    y=settings['window_height'] - 100,
                    node_type=NodeType.TOKEN,
                    token=index)

    create_node(x=settings['window_width'] // 2,
                y=100,
                node_type=NodeType.ROOT)
    canvas.focus_set()
Пример #6
0
        return node

    root = node_from_dict(tree_dict['tree'])

    # Aggregate unused tokens
    if len(used_nodes) < len(token_nodes):
        unused_container_node = QueryTree.Node(NodeType.UNUSED)
        root.children.append(unused_container_node)

        for node in token_nodes:
            if node not in used_nodes:
                unused_container_node.children.append(node)

    tree = QueryTree(root, tokens)
    return tree


with open('jimmy.ask', 'w') as output_file:
    for tree in trees:
        try:
            index = int(tree['id'])
            tokens = run_task(Task.TOKENIZE, questions[index][1])
            tokens_to_token(tree['tree'])
            query_tree = from_dict(tree, tokens)
            output_file.write(
                query_tree.to_serializable(
                    SerializationFormat.PREFIX_PARANTHESES))
        except:
            print("failed!")
Пример #7
0
        def generate_prior_candidates(generator, node: QueryTree.Node):
            # We use the accumulated constraints so far to generate a query that retrieves all possible relations for this node.
            # Make copies so we don't break the current state
            node_copy = deepcopy(node)
            gen = deepcopy(generator)

            NODE_HANDLERS[node.type](gen=gen,
                                     node=node_copy,
                                     reverse_relation=False)
            in_order_query = gen.generate_query_from_current_state(
                constants.RELATION_EXTRACTION_VARIABLE)
            in_order_candidates = run_task(
                Task.RUN_SPARQL_QUERY, {
                    'query_body':
                    in_order_query,
                    'return_variable':
                    constants.RELATION_EXTRACTION_VARIABLE.replace('?', '')
                })
            in_order_candidates = list(
                filter(lambda x: x not in constants.RELATION_MAPPING_BLACKLIST,
                       in_order_candidates))

            prior_candidates = in_order_candidates

            # We also don't know the order yet (in terms of subject-object) of the triple yet, so we need the relation candidates for the revese order as well.
            if node.type not in {
                    NodeType.ARGMAX, NodeType.ARGMIN, NodeType.ARGNTH,
                    NodeType.TOPN
            }:  # Can't reverse these
                node_copy = deepcopy(node)
                gen = deepcopy(generator)
                NODE_HANDLERS[node.type](gen=gen,
                                         node=node_copy,
                                         reverse_relation=True)
                reverse_order_query = gen.generate_query_from_current_state(
                    constants.RELATION_EXTRACTION_VARIABLE)
                reverse_order_candidates = run_task(
                    Task.RUN_SPARQL_QUERY, {
                        'query_body':
                        reverse_order_query,
                        'return_variable':
                        constants.RELATION_EXTRACTION_VARIABLE.replace(
                            '?', '')
                    })
                reverse_order_candidates = list(
                    filter(
                        lambda x: x not in constants.
                        RELATION_MAPPING_BLACKLIST, reverse_order_candidates))
                reverse_order_candidates = [
                    EQUIVALENT_RELATION_RESOLVER.reverse_relation(candidate)
                    for candidate in reverse_order_candidates
                ]
                prior_candidates.extend(reverse_order_candidates)

            # Remove any candidates that already mapped  in reverse to a child node so as to avoid cycles
            child_relation_nodes = node.collect(RELATION_NODE_TYPES)
            child_relations = []
            for child in child_relation_nodes:
                child_relations.extend(child.kb_resources)
            reversed_child_relations = set([
                EQUIVALENT_RELATION_RESOLVER.reverse_relation(relation)
                for relation in child_relations
            ])
            prior_candidates = list(
                filter(
                    lambda relation: relation not in reversed_child_relations,
                    prior_candidates))

            return prior_candidates
Пример #8
0
    def __map_relation(self, node: QueryTree.Node) -> bool:
        def generate_prior_candidates(generator, node: QueryTree.Node):
            # We use the accumulated constraints so far to generate a query that retrieves all possible relations for this node.
            # Make copies so we don't break the current state
            node_copy = deepcopy(node)
            gen = deepcopy(generator)

            NODE_HANDLERS[node.type](gen=gen,
                                     node=node_copy,
                                     reverse_relation=False)
            in_order_query = gen.generate_query_from_current_state(
                constants.RELATION_EXTRACTION_VARIABLE)
            in_order_candidates = run_task(
                Task.RUN_SPARQL_QUERY, {
                    'query_body':
                    in_order_query,
                    'return_variable':
                    constants.RELATION_EXTRACTION_VARIABLE.replace('?', '')
                })
            in_order_candidates = list(
                filter(lambda x: x not in constants.RELATION_MAPPING_BLACKLIST,
                       in_order_candidates))

            prior_candidates = in_order_candidates

            # We also don't know the order yet (in terms of subject-object) of the triple yet, so we need the relation candidates for the revese order as well.
            if node.type not in {
                    NodeType.ARGMAX, NodeType.ARGMIN, NodeType.ARGNTH,
                    NodeType.TOPN
            }:  # Can't reverse these
                node_copy = deepcopy(node)
                gen = deepcopy(generator)
                NODE_HANDLERS[node.type](gen=gen,
                                         node=node_copy,
                                         reverse_relation=True)
                reverse_order_query = gen.generate_query_from_current_state(
                    constants.RELATION_EXTRACTION_VARIABLE)
                reverse_order_candidates = run_task(
                    Task.RUN_SPARQL_QUERY, {
                        'query_body':
                        reverse_order_query,
                        'return_variable':
                        constants.RELATION_EXTRACTION_VARIABLE.replace(
                            '?', '')
                    })
                reverse_order_candidates = list(
                    filter(
                        lambda x: x not in constants.
                        RELATION_MAPPING_BLACKLIST, reverse_order_candidates))
                reverse_order_candidates = [
                    EQUIVALENT_RELATION_RESOLVER.reverse_relation(candidate)
                    for candidate in reverse_order_candidates
                ]
                prior_candidates.extend(reverse_order_candidates)

            # Remove any candidates that already mapped  in reverse to a child node so as to avoid cycles
            child_relation_nodes = node.collect(RELATION_NODE_TYPES)
            child_relations = []
            for child in child_relation_nodes:
                child_relations.extend(child.kb_resources)
            reversed_child_relations = set([
                EQUIVALENT_RELATION_RESOLVER.reverse_relation(relation)
                for relation in child_relations
            ])
            prior_candidates = list(
                filter(
                    lambda relation: relation not in reversed_child_relations,
                    prior_candidates))

            return prior_candidates

        parent_node = self.tree.find_parent(node)
        if node.type == NodeType.EXISTSRELATION or parent_node.type == NodeType.EXISTS:
            # In case of EXISTS we can't consider prior candidates because mapping implies picking the most probable from
            # them. In this case EXISTS would always yield true. Instead, the strategy is to get the most probable relation from all relation search space.
            prior_candidates = []
        else:
            prior_candidates = generate_prior_candidates(self, node)
            if not prior_candidates:
                # Relax types in case they yield not results (KB might be inconsistent, answer might be a string etc.)
                node.children = list(
                    filter(lambda x: x.type != NodeType.TYPE, node.children))
                prior_candidates = generate_prior_candidates(self, node)

        relation_mapping_input = self.tree.generate_relation_extraction_sequence(
            node)
        relation_mapping_input['candidates'] = prior_candidates
        relations = run_task(Task.MAP_RELATIONS, relation_mapping_input)

        if node.type == NodeType.EXISTSRELATION or parent_node.type == NodeType.EXISTS:
            # In case of existence checking, consider both directions
            relations.extend([
                EQUIVALENT_RELATION_RESOLVER.reverse_relation(relation)
                for relation in relations
            ])

        node.kb_resources = relations