def __call__(self): similarity = similarities.get_function(self.similarity_name) loss = -similarity( self.x1, self.inverse(self.x2) if self.is_inverse else self.x2, axis=-1) return loss
def _test_adversarial(): triples = [('john', 'friendOf', 'mark'), ('mark', 'friendOf', 'aleksi'), ('mark', 'friendOf', 'dazdrasmygda')] def fact(s, p, o): return Fact(predicate_name=p, argument_names=[s, o]) facts = [fact(s, p, o) for s, p, o in triples] parser = KnowledgeBaseParser(facts) clauses = [parse_clause('friendOf(X, Y) :- friendOf(Y, X)')] nb_entities = len(parser.entity_vocabulary) nb_predicates = len(parser.predicate_vocabulary) entity_embedding_size = 100 predicate_embedding_size = 100 entity_embedding_layer = tf.get_variable( 'entities', shape=[nb_entities + 1, entity_embedding_size], initializer=tf.contrib.layers.xavier_initializer()) predicate_embedding_layer = tf.get_variable( 'predicates', shape=[nb_predicates + 1, predicate_embedding_size], initializer=tf.contrib.layers.xavier_initializer()) model_class = models.get_function('TransE') similarity_function = similarities.get_function('l1') model_parameters = dict(similarity_function=similarity_function) batch_size = 1000 adversarial = Adversarial( clauses=clauses, parser=parser, entity_embedding_layer=entity_embedding_layer, predicate_embedding_layer=predicate_embedding_layer, model_class=model_class, model_parameters=model_parameters, batch_size=batch_size) init_op = tf.global_variables_initializer() with tf.Session() as session: session.run(init_op) assert len(adversarial.parameters) == 2 for violating_embeddings in adversarial.parameters: shape = session.run(tf.shape(violating_embeddings)) assert (shape == (batch_size, entity_embedding_size)).all() loss_value = session.run(adversarial.loss) errors_value = session.run(adversarial.errors) var1 = adversarial.parameters[0] var2 = adversarial.parameters[1] X_values = session.run(var1 if "X" in var1.name else var2) Y_values = session.run(var2 if "Y" in var2.name else var1) p_value = session.run( tf.nn.embedding_lookup(predicate_embedding_layer, 1)) assert np.array(X_values.shape == (batch_size, entity_embedding_size)).all() assert np.array(Y_values.shape == (batch_size, entity_embedding_size)).all() assert np.array(p_value.shape == (predicate_embedding_size, )) head_scores = -np.sum(np.abs((X_values + p_value) - Y_values), axis=1) body_scores = -np.sum(np.abs((Y_values + p_value) - X_values), axis=1) assert int(errors_value) == np.sum( (head_scores < body_scores).astype(int)) linear_losses = body_scores - head_scores np_loss_values = np.sum(linear_losses * (linear_losses > 0)) assert np.abs(loss_value - np_loss_values) < 1e-3 tf.reset_default_graph()
logger = logging.getLogger(__name__) triples = [('a', 'p', 'b'), ('c', 'p', 'd'), ('a', 'q', 'b')] facts = [Fact(predicate_name=p, argument_names=[s, o]) for s, p, o in triples] parser = KnowledgeBaseParser(facts) nb_entities = len(parser.entity_to_index) nb_predicates = len(parser.predicate_to_index) # Clauses clause_str = 'q(X, Y) :- p(X, Y)' clauses = [parse_clause(clause_str)] # Instantiating the model parameters model_class = models.get_function('TransE') similarity_function = similarities.get_function('l2_sqr') model_parameters = dict(similarity_function=similarity_function) @pytest.mark.closedform def test_transe_unit_cube(): for seed in range(32): tf.reset_default_graph() np.random.seed(seed) tf.set_random_seed(seed) entity_embedding_size = np.random.randint(low=1, high=5) predicate_embedding_size = entity_embedding_size
logger = logging.getLogger(__name__) triples = [('a', 'p', 'b'), ('c', 'p', 'd'), ('a', 'q', 'b')] facts = [Fact(predicate_name=p, argument_names=[s, o]) for s, p, o in triples] parser = KnowledgeBaseParser(facts) nb_entities = len(parser.entity_to_index) nb_predicates = len(parser.predicate_to_index) # Clauses clause_str = 'q(X, Y) :- p(X, Y)' clauses = [parse_clause(clause_str)] # Instantiating the model parameters model_class = models.get_function('DistMult') similarity_function = similarities.get_function('dot') model_parameters = dict(similarity_function=similarity_function) @pytest.mark.closedform def test_distmult_unit_sphere(): for seed in range(32): tf.reset_default_graph() np.random.seed(seed) tf.set_random_seed(seed) entity_embedding_size = np.random.randint(low=1, high=5) predicate_embedding_size = entity_embedding_size
def test_losses(): hyperparam_configurations = list(cartesian_product(hyperparams)) for hyperparam_configuration in hyperparam_configurations: # Clauses clause = parse_clause(hyperparam_configuration['clause']) # Instantiating the model parameters model_class = models.get_function(hyperparam_configuration['model_name']) similarity_function = similarities.get_function('dot') unit_cube = hyperparam_configuration['unit_cube'] for seed in range(4): print('Seed {}, Evaluating {}'.format(seed, str(hyperparam_configuration))) tf.reset_default_graph() np.random.seed(seed) tf.set_random_seed(seed) entity_embedding_size = np.random.randint(low=1, high=5) * 2 predicate_embedding_size = entity_embedding_size # Instantiating entity and predicate embedding layers entity_embedding_layer = tf.get_variable('entities', shape=[nb_entities + 1, entity_embedding_size], initializer=tf.contrib.layers.xavier_initializer()) predicate_embedding_layer = tf.get_variable('predicates', shape=[nb_predicates + 1, predicate_embedding_size], initializer=tf.contrib.layers.xavier_initializer()) entity_projection = constraints.unit_sphere(entity_embedding_layer, norm=1.0) if unit_cube: entity_projection = constraints.unit_cube(entity_embedding_layer) entity_inputs = tf.placeholder(tf.int32, shape=[None, 2]) walk_inputs = tf.placeholder(tf.int32, shape=[None, None]) entity_embeddings = tf.nn.embedding_lookup(entity_embedding_layer, entity_inputs) predicate_embeddings = tf.nn.embedding_lookup(predicate_embedding_layer, walk_inputs) model_parameters = dict(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, similarity_function=similarity_function) model = model_class(**model_parameters) score = model() closed_form_lifted = ClosedForm(parser=parser, predicate_embedding_layer=predicate_embedding_layer, model_class=model_class, model_parameters=model_parameters, is_unit_cube=unit_cube) opt_adversarial_loss = closed_form_lifted(clause) v_optimizer = tf.train.AdagradOptimizer(learning_rate=1e-2) v_training_step = v_optimizer.minimize(opt_adversarial_loss, var_list=[predicate_embedding_layer]) init_op = tf.global_variables_initializer() with tf.Session() as session: session.run(init_op) session.run([entity_projection]) def scoring_function(args): return session.run(score, feed_dict={walk_inputs: args[0], entity_inputs: args[1]}) ground_loss = GroundLoss(clauses=[clause], parser=parser, scoring_function=scoring_function) feed_dict = {'X': a_idx, 'Y': b_idx} continuous_loss_0 = ground_loss.continuous_error(clause, feed_dict=feed_dict) for epoch in range(1, 100 + 1): _ = session.run([v_training_step]) print(ground_loss.continuous_error(clause, feed_dict=feed_dict)) continuous_loss_final = ground_loss.continuous_error(clause, feed_dict=feed_dict) assert continuous_loss_0 <= .0 or continuous_loss_final <= continuous_loss_0 tf.reset_default_graph()