Exemplo n.º 1
0
def test_materialize():
    initial_facts = [
        Fact('q', ['{}'.format(idx), '{}'.format(idx + 1)])
        for idx in range(64)
    ]
    parser = KnowledgeBaseParser(initial_facts)
    parser.predicate_to_index['p'] = 2

    clauses = [
        parse_clause('q(X, Z) :- q(X, Y), q(Y, Z)'),
        parse_clause('p(X, Y) :- q(X, Y)')
    ]

    inferred_facts = materialize(initial_facts, clauses, parser)
    inferred_triples = [(f.argument_names[0], f.predicate_name,
                         f.argument_names[1]) for f in inferred_facts]

    entities = {s
                for (s, _, _) in inferred_triples
                } | {o
                     for (_, _, o) in inferred_triples}

    for e1 in entities:
        for e2 in entities:
            if int(e1) < int(e2):
                assert (str(e1), 'q', str(e2)) in inferred_triples
                assert (str(e1), 'p', str(e2)) in inferred_triples
                print('+')
            else:
                assert (str(e1), 'q', str(e2)) not in inferred_triples
                assert (str(e1), 'p', str(e2)) not in inferred_triples
                print('-')
Exemplo n.º 2
0
def test_losses():
    triples = [
        ('e1', 'p', 'e2'),
        ('e2', 'q', 'e3'),
        ('e1', 'r', 'e2'),
        ('e2', 's', 'e3')
    ]

    def fact(s, p, o):
        return Fact(predicate_name=p, argument_names=[s, o])

    facts = [fact(s, p, o) for s, p, o in triples]
    parser = KnowledgeBaseParser(facts)

    nb_predicates = len(parser.predicate_vocabulary)
    predicate_embedding_size = 100
    predicate_embedding_layer = tf.get_variable('predicates',
                                                shape=[nb_predicates + 1, predicate_embedding_size],
                                                initializer=tf.contrib.layers.xavier_initializer())

    clauses = [parse_clause('p(X, Y) :- q(Y, X)'), parse_clause('r(X, Y) :- s(X, Y)')]
    loss = clauses_to_equality_loss('TransE', clauses, 'l2_sqr',
                                    predicate_embedding_layer, parser.predicate_to_index,
                                    entity_embedding_size=predicate_embedding_size)

    for i in range(32):
        optimizer = tf.train.AdagradOptimizer(0.1)
        minimization_step = optimizer.minimize(loss, var_list=[predicate_embedding_layer])

        init_op = tf.global_variables_initializer()

        with tf.Session() as session:
            session.run(init_op)

            for j in range(32):
                session.run([minimization_step])

                loss_value = session.run([loss])[0]

                p_idx, q_idx = parser.predicate_to_index['p'], parser.predicate_to_index['q']
                r_idx, s_idx = parser.predicate_to_index['r'], parser.predicate_to_index['s']

                predicate_embedding_layer_value = session.run([predicate_embedding_layer])[0]

                p_value, q_value = predicate_embedding_layer_value[p_idx, :], predicate_embedding_layer_value[q_idx, :]
                r_value, s_value = predicate_embedding_layer_value[r_idx, :], predicate_embedding_layer_value[s_idx, :]

                estimated_loss_value = np.square(p_value + q_value).sum() + np.square(r_value - s_value).sum()

                assert loss_value > 0
                assert estimated_loss_value > 0
                np.testing.assert_allclose(loss_value, estimated_loss_value, 4)

    tf.reset_default_graph()
Exemplo n.º 3
0
def _test_adversarial():
    triples = [('john', 'friendOf', 'mark'), ('mark', 'friendOf', 'aleksi'),
               ('mark', 'friendOf', 'dazdrasmygda')]

    def fact(s, p, o):
        return Fact(predicate_name=p, argument_names=[s, o])

    facts = [fact(s, p, o) for s, p, o in triples]
    parser = KnowledgeBaseParser(facts)
    clauses = [parse_clause('friendOf(X, Y) :- friendOf(Y, X)')]

    nb_entities = len(parser.entity_vocabulary)
    nb_predicates = len(parser.predicate_vocabulary)

    entity_embedding_size = 100
    predicate_embedding_size = 100

    entity_embedding_layer = tf.get_variable(
        'entities',
        shape=[nb_entities + 1, entity_embedding_size],
        initializer=tf.contrib.layers.xavier_initializer())

    predicate_embedding_layer = tf.get_variable(
        'predicates',
        shape=[nb_predicates + 1, predicate_embedding_size],
        initializer=tf.contrib.layers.xavier_initializer())

    model_class = models.get_function('TransE')

    similarity_function = similarities.get_function('l1')
    model_parameters = dict(similarity_function=similarity_function)

    batch_size = 1000

    adversarial = Adversarial(
        clauses=clauses,
        parser=parser,
        entity_embedding_layer=entity_embedding_layer,
        predicate_embedding_layer=predicate_embedding_layer,
        model_class=model_class,
        model_parameters=model_parameters,
        batch_size=batch_size)

    init_op = tf.global_variables_initializer()

    with tf.Session() as session:
        session.run(init_op)
        assert len(adversarial.parameters) == 2
        for violating_embeddings in adversarial.parameters:
            shape = session.run(tf.shape(violating_embeddings))
            assert (shape == (batch_size, entity_embedding_size)).all()

        loss_value = session.run(adversarial.loss)
        errors_value = session.run(adversarial.errors)

        var1 = adversarial.parameters[0]
        var2 = adversarial.parameters[1]

        X_values = session.run(var1 if "X" in var1.name else var2)
        Y_values = session.run(var2 if "Y" in var2.name else var1)

        p_value = session.run(
            tf.nn.embedding_lookup(predicate_embedding_layer, 1))

        assert np.array(X_values.shape == (batch_size,
                                           entity_embedding_size)).all()
        assert np.array(Y_values.shape == (batch_size,
                                           entity_embedding_size)).all()
        assert np.array(p_value.shape == (predicate_embedding_size, ))

        head_scores = -np.sum(np.abs((X_values + p_value) - Y_values), axis=1)
        body_scores = -np.sum(np.abs((Y_values + p_value) - X_values), axis=1)

        assert int(errors_value) == np.sum(
            (head_scores < body_scores).astype(int))

        linear_losses = body_scores - head_scores
        np_loss_values = np.sum(linear_losses * (linear_losses > 0))
        assert np.abs(loss_value - np_loss_values) < 1e-3

    tf.reset_default_graph()
Exemplo n.º 4
0
import logging

import pytest

logger = logging.getLogger(__name__)

triples = [('a', 'p', 'b'), ('c', 'p', 'd'), ('a', 'q', 'b')]
facts = [Fact(predicate_name=p, argument_names=[s, o]) for s, p, o in triples]
parser = KnowledgeBaseParser(facts)

nb_entities = len(parser.entity_to_index)
nb_predicates = len(parser.predicate_to_index)

# Clauses
clause_str = 'q(X, Y) :- p(X, Y)'
clauses = [parse_clause(clause_str)]

# Instantiating the model parameters
model_class = models.get_function('TransE')
similarity_function = similarities.get_function('l2_sqr')

model_parameters = dict(similarity_function=similarity_function)


@pytest.mark.closedform
def test_transe_unit_cube():
    for seed in range(32):
        tf.reset_default_graph()

        np.random.seed(seed)
        tf.set_random_seed(seed)
Exemplo n.º 5
0
def main(argv):
    def formatter(prog):
        return argparse.HelpFormatter(prog, max_help_position=100, width=200)

    argparser = argparse.ArgumentParser(
        'Generates a Test-I/Test-II/Test-ALL test sets',
        formatter_class=formatter)

    argparser.add_argument('train', action='store', type=str, default=None)
    argparser.add_argument('valid', action='store', type=str, default=None)
    argparser.add_argument('test', action='store', type=str, default=None)
    argparser.add_argument('clauses', action='store', type=str, default=None)

    argparser.add_argument('--test-I', '-1', type=str, default='./testI.tsv')
    argparser.add_argument('--test-II', '-2', type=str, default='./testII.tsv')

    args = argparser.parse_args(argv)

    train_path, valid_path, test_path = args.train, args.valid, args.test
    test_I_path, test_II_path = args.test_I, args.test_II

    train_triples, _ = read_triples(train_path)
    valid_triples, _ = read_triples(valid_path)
    test_triples, _ = read_triples(test_path)

    def fact(s, p, o):
        return Fact(predicate_name=p, argument_names=[s, o])

    train_facts = [fact(s, p, o) for s, p, o in train_triples]
    valid_facts = [fact(s, p, o) for s, p, o in valid_triples]
    test_facts = [fact(s, p, o) for s, p, o in test_triples]

    parser = KnowledgeBaseParser(train_facts + valid_facts + test_facts)

    clauses_path = args.clauses
    with open(clauses_path, 'r') as f:
        clauses = [parse_clause(line.strip()) for line in f.readlines()]

    for clause in clauses:
        logging.debug('Clause: {}'.format(clause))

    # Put all triples in the form of sets of tuples
    train_triples = {(fact.argument_names[0], fact.predicate_name,
                      fact.argument_names[1])
                     for fact in train_facts}
    valid_triples = {(fact.argument_names[0], fact.predicate_name,
                      fact.argument_names[1])
                     for fact in valid_facts}
    test_triples = {(fact.argument_names[0], fact.predicate_name,
                     fact.argument_names[1])
                    for fact in test_facts}

    m_train_facts = materialize(train_facts, clauses, parser)
    m_train_triples = {(fact.argument_names[0], fact.predicate_name,
                        fact.argument_names[1])
                       for fact in m_train_facts}

    # Check if the sets of triples are non-empty
    assert len(train_triples) > 0
    assert len(valid_triples) > 0
    assert len(test_triples) > 0
    assert len(m_train_triples) > len(train_triples)

    # Check that their intersections are empty (e.g. no test triple appear in the training set etc.)
    assert len(train_triples & valid_triples) == 0
    assert len(train_triples & test_triples) == 0
    assert len(valid_triples & test_triples) == 0

    # Note that some of the test triples can be inferred by directly applying these rules on the training set
    # (pure logical inference). On each dataset, we further split the test set into two parts, test-I and test-II.
    # The former contains triples that cannot be directly inferred by pure logical inference, and the latter the
    # remaining test triples. Table 3 gives some statistics of the datasets, including the number of entities,
    # relations, triples in training/validation/test-I/test-II set, and ground rules.

    # Triples that cannot be directly inferred by pure logical inference
    test_1_triples = test_triples - m_train_triples

    # Triples that can be directly inferred by pure logical inference
    test_2_triples = test_triples & m_train_triples

    nb_1_triples, nb_2_triples, nb_all_triples = len(test_1_triples), len(
        test_2_triples), len(test_triples)
    assert nb_1_triples + nb_2_triples == nb_all_triples
    assert len(test_1_triples | test_2_triples) == nb_all_triples

    logger.info('#Test-I: {}, #Test-II: {}, #Test-ALL: {}'.format(
        nb_1_triples, nb_2_triples, nb_all_triples))

    if test_I_path is not None:
        with open(test_I_path, 'w') as f:
            f.writelines(
                ['{}\t{}\t{}\n'.format(s, p, o) for s, p, o in test_1_triples])

    if test_II_path is not None:
        with open(test_II_path, 'w') as f:
            f.writelines(
                ['{}\t{}\t{}\n'.format(s, p, o) for s, p, o in test_2_triples])
Exemplo n.º 6
0
def main(argv):
    def formatter(prog):
        return argparse.HelpFormatter(prog, max_help_position=100, width=200)

    argparser = argparse.ArgumentParser('Populate a Knowledge Base',
                                        formatter_class=formatter)

    argparser.add_argument('triples', action='store', type=str, default=None)
    argparser.add_argument('clauses', action='store', type=str, default=None)
    argparser.add_argument('--output',
                           '-o',
                           action='store',
                           type=str,
                           default=None)

    args = argparser.parse_args(argv)

    triples_path = args.triples
    clauses_path = args.clauses
    output_path = args.output

    triples, _ = read_triples(triples_path)

    # Parse the clauses using Sebastian's parser
    with open(clauses_path, 'r') as f:
        clauses_str = [line.strip() for line in f.readlines()]
    clauses = [parse_clause(clause_str) for clause_str in clauses_str]

    # Create a set containing all the entities from the triples
    entity_names = {s for (s, _, _) in triples} | {o for (_, _, o) in triples}

    # Create a set containing all predicate names from the triples and clauses
    predicate_names = {p for (_, p, _) in triples}
    for clause in clauses:
        predicate_names |= {clause.head.predicate.name}
        for atom in clause.body:
            predicate_names |= {atom.predicate.name}

    # Associate each entity and predicate to an unique index
    entity_to_idx = {entity: idx for idx, entity in enumerate(entity_names)}
    idx_to_entity = {idx: entity for entity, idx in entity_to_idx.items()}

    predicate_to_idx = {
        predicate: idx
        for idx, predicate in enumerate(predicate_names)
    }
    idx_to_predicate = {
        idx: predicate
        for predicate, idx in predicate_to_idx.items()
    }

    logger.info('Asserting facts ..')

    # Asserting the facts
    for (s, p, o) in triples:
        pyDatalog.assert_fact('p', entity_to_idx[s], predicate_to_idx[p],
                              entity_to_idx[o])

    logger.info('Querying triples ..')

    ans = pyDatalog.ask('p(S, P, O)')
    print(len(ans.answers))

    logger.info('Loading rules ..')

    def atom_to_str(atom):
        atom_predicate_idx = predicate_to_idx[atom.predicate.name]
        atom_arg_0, atom_arg_1 = atom.arguments[0], atom.arguments[1]
        return 'p({}, {}, {})'.format(atom_arg_0, atom_predicate_idx,
                                      atom_arg_1)

    def clause_to_str(clause):
        head, body = clause.head, clause.body
        return '{} <= {}'.format(atom_to_str(head),
                                 ' & '.join([atom_to_str(a) for a in body]))

    rules_str = '\n'.join([clause_to_str(clause) for clause in clauses])

    pyDatalog.load(rules_str)

    logger.info('Querying triples ..')

    ans = pyDatalog.ask('p(S, P, O)')
    answers = sorted(ans.answers)
Exemplo n.º 7
0
def main(argv):
    def formatter(prog):
        return argparse.HelpFormatter(prog, max_help_position=100, width=200)

    argparser = argparse.ArgumentParser('Plot Embeddings',
                                        formatter_class=formatter)
    argparser.add_argument('model', action='store', type=str)
    argparser.add_argument('adversary', action='store', type=str)

    argparser.add_argument('--clauses',
                           '-c',
                           action='store',
                           type=str,
                           default=None)
    argparser.add_argument('--triples',
                           '-t',
                           action='store',
                           type=str,
                           default=None)

    args = argparser.parse_args(argv)

    model_path = args.model
    adversary_path = args.adversary
    clauses_path = args.clauses
    triples_path = args.triples

    with open(model_path, 'rb') as f:
        model_data = pickle.load(f)

    with open(adversary_path, 'rb') as f:
        adversary_data = pickle.load(f)

    entity_embeddings = model_data['entities'][1:, :]
    predicate_embeddings = model_data['predicates'][1:, :]

    variables = adversary_data['variables']

    entity_to_index = model_data['entity_to_index']
    predicate_to_index = model_data['predicate_to_index']

    entity_indices = sorted(set(entity_to_index.values()))
    predicate_indices = sorted(set(predicate_to_index.values()))

    triples = None
    if triples_path is not None:
        triples, _ = read_triples(triples_path)
        triples_idx = [(entity_to_index[s], predicate_to_index[p],
                        entity_to_index[o]) for (s, p, o) in triples]

    clauses = None
    clause_to_ground_mappings = None
    if clauses_path is not None:
        with open(clauses_path, 'r') as f:
            clauses = [parse_clause(line.strip()) for line in f.readlines()]
        clause_to_variable_names = {
            clause: GroundLoss.get_variable_names(clause)
            for clause in clauses
        }
        clause_to_mappings = {
            clause:
            GroundLoss.sample_mappings(clause_to_variable_names[clause],
                                       entity_indices)
            for clause in clauses
        }
        if triples is not None:
            clause_to_ground_mappings = {
                clauses[0]: [{
                    'X': s_idx,
                    'Y': o_idx
                } for (s_idx, _, o_idx) in triples_idx]
            }

    nb_entities = len(entity_to_index)

    for variable_name, embedding in variables.items():
        variable_name = variable_name.split('_')[2]
        entity_to_index[variable_name] = len(entity_to_index) + 1
        entity_embeddings = np.concatenate((entity_embeddings, embedding),
                                           axis=0)

    index_to_entity = {
        index: entity
        for entity, index in entity_to_index.items()
    }
    index_to_predicate = {
        index: predicate
        for predicate, index in predicate_to_index.items()
    }

    projector = MDS(n_components=2, random_state=0)
    np.set_printoptions(suppress=True)

    entity_embeddings_proj = projector.fit_transform(entity_embeddings)

    plt.scatter(entity_embeddings_proj[:nb_entities, 0],
                entity_embeddings_proj[:nb_entities, 1],
                color='c')
    plt.scatter(entity_embeddings_proj[nb_entities:, 0],
                entity_embeddings_proj[nb_entities:, 1],
                color='r')

    # Finding the maximum violators
    if clauses is not None:
        kwargs = {
            'entity_to_index': entity_to_index,
            'entity_embeddings': entity_embeddings,
            'predicate_to_index': predicate_to_index,
            'predicate_embeddings': predicate_embeddings,
            'scoring_function': score_TransE_L1
        }

        for clause, mappings in clause_to_mappings.items():
            mapping_loss_lst = [(mapping, loss_clause(clause, mapping,
                                                      **kwargs))
                                for mapping in mappings]
            # Find the most violating variable assignment (i.e. the variable assignment with the highest loss)
            import operator
            most_violating_mapping = max(mapping_loss_lst,
                                         key=operator.itemgetter(1))[0]

            logger.info(
                'Most violating mapping: {}'.format(most_violating_mapping))
            for variable_name, entity_idx in most_violating_mapping.items():
                plt.scatter(entity_embeddings_proj[entity_idx - 1, 0],
                            entity_embeddings_proj[entity_idx - 1, 1],
                            color='b')

            if clause_to_ground_mappings is not None:
                for clause, mappings in clause_to_ground_mappings.items():
                    mapping_loss_lst = [(mapping,
                                         loss_clause(clause, mapping,
                                                     **kwargs))
                                        for mapping in mappings]
                    # Find the most violating variable assignment (i.e. the variable assignment with the highest loss)
                    import operator
                    most_violating_mapping = max(mapping_loss_lst,
                                                 key=operator.itemgetter(1))[0]

                    logger.info('Most violating ground mapping: {}'.format(
                        most_violating_mapping))
                    for variable_name, entity_idx in most_violating_mapping.items(
                    ):
                        plt.scatter(entity_embeddings_proj[entity_idx - 1, 0],
                                    entity_embeddings_proj[entity_idx - 1, 1],
                                    color='g')

    for index, (x, y) in enumerate(
            zip(entity_embeddings_proj[:, 0], entity_embeddings_proj[:, 1]),
            1):
        label = index_to_entity[index]
        plt.annotate(label,
                     xy=(x, y),
                     xytext=(0, 0),
                     textcoords='offset points')

    plt.show()
Exemplo n.º 8
0
def test_losses():

    hyperparam_configurations = list(cartesian_product(hyperparams))

    for hyperparam_configuration in hyperparam_configurations:
        # Clauses
        clause = parse_clause(hyperparam_configuration['clause'])

        # Instantiating the model parameters
        model_class = models.get_function(hyperparam_configuration['model_name'])
        similarity_function = similarities.get_function('dot')

        unit_cube = hyperparam_configuration['unit_cube']

        for seed in range(4):
            print('Seed {}, Evaluating {}'.format(seed, str(hyperparam_configuration)))

            tf.reset_default_graph()

            np.random.seed(seed)
            tf.set_random_seed(seed)

            entity_embedding_size = np.random.randint(low=1, high=5) * 2
            predicate_embedding_size = entity_embedding_size

            # Instantiating entity and predicate embedding layers
            entity_embedding_layer = tf.get_variable('entities',
                                                     shape=[nb_entities + 1, entity_embedding_size],
                                                     initializer=tf.contrib.layers.xavier_initializer())

            predicate_embedding_layer = tf.get_variable('predicates',
                                                        shape=[nb_predicates + 1, predicate_embedding_size],
                                                        initializer=tf.contrib.layers.xavier_initializer())

            entity_projection = constraints.unit_sphere(entity_embedding_layer, norm=1.0)
            if unit_cube:
                entity_projection = constraints.unit_cube(entity_embedding_layer)

            entity_inputs = tf.placeholder(tf.int32, shape=[None, 2])
            walk_inputs = tf.placeholder(tf.int32, shape=[None, None])

            entity_embeddings = tf.nn.embedding_lookup(entity_embedding_layer, entity_inputs)
            predicate_embeddings = tf.nn.embedding_lookup(predicate_embedding_layer, walk_inputs)

            model_parameters = dict(entity_embeddings=entity_embeddings,
                                    predicate_embeddings=predicate_embeddings,
                                    similarity_function=similarity_function)

            model = model_class(**model_parameters)
            score = model()

            closed_form_lifted = ClosedForm(parser=parser,
                                            predicate_embedding_layer=predicate_embedding_layer,
                                            model_class=model_class,
                                            model_parameters=model_parameters,
                                            is_unit_cube=unit_cube)
            opt_adversarial_loss = closed_form_lifted(clause)

            v_optimizer = tf.train.AdagradOptimizer(learning_rate=1e-2)
            v_training_step = v_optimizer.minimize(opt_adversarial_loss, var_list=[predicate_embedding_layer])

            init_op = tf.global_variables_initializer()

            with tf.Session() as session:
                session.run(init_op)

                session.run([entity_projection])

                def scoring_function(args):
                    return session.run(score, feed_dict={walk_inputs: args[0], entity_inputs: args[1]})

                ground_loss = GroundLoss(clauses=[clause], parser=parser, scoring_function=scoring_function)
                feed_dict = {'X': a_idx, 'Y': b_idx}
                continuous_loss_0 = ground_loss.continuous_error(clause, feed_dict=feed_dict)

                for epoch in range(1, 100 + 1):
                    _ = session.run([v_training_step])
                    print(ground_loss.continuous_error(clause, feed_dict=feed_dict))

                continuous_loss_final = ground_loss.continuous_error(clause, feed_dict=feed_dict)

                assert continuous_loss_0 <= .0 or continuous_loss_final <= continuous_loss_0

        tf.reset_default_graph()
Exemplo n.º 9
0
def main(argv):
    def formatter(prog):
        return argparse.HelpFormatter(prog, max_help_position=100, width=200)

    argparser = argparse.ArgumentParser('Populate a Knowledge Base', formatter_class=formatter)

    argparser.add_argument('triples', action='store', type=str, default=None)
    argparser.add_argument('clauses', action='store', type=str, default=None)
    argparser.add_argument('--output', '-o', action='store', type=str, default=None)

    args = argparser.parse_args(argv)

    triples_path = args.triples
    clauses_path = args.clauses
    output_path = args.output

    triples, _ = read_triples(triples_path)

    # Parse the clauses using Sebastian's parser
    with open(clauses_path, 'r') as f:
        clauses_str = [line.strip() for line in f.readlines()]
    clauses = [parse_clause(clause_str) for clause_str in clauses_str]

    # Create a set containing all predicate names
    predicate_names = {p for (_, p, _) in triples}
    for clause in clauses:
        predicate_names |= {clause.head.predicate.name}
        for atom in clause.body:
            predicate_names |= {atom.predicate.name}

    # The original predicate names might not be handled well by Pyke (it's the case of e.g. Freebase)
    # Replace them with p1, p2, p3 etc.
    predicate_to_idx = {predicate: 'p{}'.format(idx) for idx, predicate in enumerate(predicate_names)}
    idx_to_predicate = {idx: predicate for predicate, idx in predicate_to_idx.items()}

    # Generate a Pyke rule base for reasoning via forward chaining
    rule_str_lst = []
    for idx, clause in enumerate(clauses):
        head, body = clause.head, clause.body
        head_str = '\t\tfacts.{}(${}, ${})'.format(predicate_to_idx[head.predicate.name], head.arguments[0].name, head.arguments[1].name)
        body_str = ''
        for atom in body:
            body_str += '\t\tfacts.{}(${}, ${})\n'.format(predicate_to_idx[atom.predicate.name], atom.arguments[0].name, atom.arguments[1].name)
        rule_str_lst += ['rule_{}\n\tforeach\n{}\n\tassert\n{}\n'.format(idx, body_str, head_str)]

    # Write the Pyke rule base to file
    with open(RULES_KRB_PATH, 'w') as f:
        f.writelines('{}\n'.format(rule_str) for rule_str in rule_str_lst)

    engine = knowledge_engine.engine('.')

    # Assert starting facts, corresponding to the triples already in the Knowledge Graph
    for (s, p, o) in tqdm(triples):
        engine.assert_('facts', predicate_to_idx[p], (s, o))

    engine.activate(os.path.splitext(os.path.basename(RULES_KRB_PATH))[0])

    # For each predicate p, query the reasoning engine ..
    materialized_triples = []
    for predicate_name in tqdm(predicate_names):
        # .. asking for all subject s and object o pairs such that (s, p, o) is entailed by the knowledge base
        with engine.prove_goal('facts.{}($s, $o)'.format(predicate_to_idx[predicate_name])) as gen:
            for vs, plan in gen:
                materialized_triples += [(vs['s'], predicate_name, vs['o'])]

    if output_path is not None:
        # Write the materialized triples to file
        with open(output_path, 'w') as f:
            f.writelines('{}\t{}\t{}\n'.format(s, p, o) for s, p, o in materialized_triples)