def lambda_batch_log_1minus_prob(nume_min, nume_max, domi_min, domi_max, a, b, c, d, e, f): # this function return the -log(1-p(a, b)) # we want to minimize this value joint_log = batch_log_prob(nume_min, nume_max) domi_log = batch_log_prob(domi_min, domi_max) # batch_size cond_log = joint_log - domi_log # (batch_size) neg_smooth_log_prob = -smooth_prob(cond_log) # because input to log1mexp is positive onemp_neg_smooth_log_prob = -tf_utils.log1mexp(neg_smooth_log_prob) return onemp_neg_smooth_log_prob
def lambda_batch_log_1minus_prob(join_min, join_max, meet_min, meet_max, a, b, c, d): # this function return the -log(1-p(a, b)) # we want to minimize this value joint_log = batch_log_prob(meet_min, meet_max) domi_log = batch_log_prob(a, b) # batch_size cond_log = joint_log - domi_log # (batch_size) neg_smooth_log_prob = -smooth_prob(cond_log) # because input to log1mexp is positive # neg_smooth_log_prob = tf.Print(neg_smooth_log_prob, [tf.reduce_sum(neg_smooth_log_prob)], 'neg debug') onemp_neg_smooth_log_prob = -tf_utils.log1mexp(neg_smooth_log_prob) # onemp_neg_smooth_log_prob = tf.Print(onemp_neg_smooth_log_prob, [tf.reduce_sum(onemp_neg_smooth_log_prob)], 'onem') return onemp_neg_smooth_log_prob
def batch_log_upper_bound(join_min, join_max, a, b, c, d): # join_min: batchsize * embed_size # join_max: batchsize * embed_size # log_prob: batch_size join_log_prob = batch_log_prob(join_min, join_max) join_log_prob_new = tf.reduce_logsumexp(tf.stack( [tf.fill([tf.shape(join_log_prob)[0]], tf.log(0.1)), join_log_prob], axis=1), axis=1) x_log_prob = batch_log_prob(a, b) # batchsize y_log_prob = batch_log_prob(c, d) # batchsize log_xy = tf.reduce_logsumexp(tf.stack([x_log_prob, y_log_prob], axis=1), axis=1) log_upper_bound = join_log_prob_new + tf_utils.log1mexp(join_log_prob_new - log_xy) return log_upper_bound
def lambda_batch_disjoint_box(join_min, join_max, meet_min, meet_max, a, b, c, d): # return log of disjoint dimension errors # <a, b> is the min and max embedding of specific term, <c, d> is the min and max embedding of general term cond = tf.less_equal(meet_max, meet_min) # batchsize * embed_size # choose all those dimensions with true condtions temp_zero = tf.zeros_like(meet_min) meet_min_cond = tf.where(cond, meet_min, temp_zero) #batchsize * embed_size meet_max_cond = tf.where(cond, meet_max, temp_zero) #batchsize * embed_size disjoint_box_log = batch_log_prob(meet_max_cond, meet_min_cond) # neg_smooth_log_prob = -smooth_prob(disjoint_box_log) # because input to log1mexp is positive onemp_neg_smooth_log_prob = -tf_utils.log1mexp(-disjoint_box_log) return onemp_neg_smooth_log_prob
def batch_log_prob(min_embed, max_embed): # min_embed: batchsize * embed_size # max_embed: batchsize * embed_size # log_prob: batch_size # numerically stable log probability of a cube probability if FLAGS.measure == 'uniform': if FLAGS.model == 'poe': log_prob = tf.reduce_sum( tf.log(tf.ones_like(max_embed) - min_embed + 1e-8), axis=1) elif FLAGS.model == 'cube': log_prob = tf.reduce_sum(tf.log((max_embed - min_embed) + 1e-8), axis=1) else: raise ValueError('Expected poe or cube, but receive', FLAGS.model) elif FLAGS.measure == 'exp': log_prob = tf.reduce_sum(-min_embed + tf_utils.log1mexp(max_embed - min_embed), axis=1) else: raise ValueError('Expected uniform or exp, but receive', FLAGS.measure) return log_prob
def lambda_batch_disjoint_box(nume_min, nume_max, domi_min, domi_max, a, b, c, d, e, f): # return log of disjoint dimension errors # <a, b> is the min and max embedding of specific term, <c, d> is the min and max embedding of general term # <e, f> is the min and max embedding of the relation term. cond = tf.less_equal(nume_max, nume_min) # batchsize * embed_size # choose all those dimensions with true condtions meet_min_cond = tf.where(cond, nume_min, tf.ones_like(nume_min)) #batchsize * embed_size meet_max_cond = tf.where(cond, nume_max, tf.zeros_like(nume_min)) #batchsize * embed_size neg_distance = meet_min_cond - meet_max_cond disjoint_box_log = batch_log_prob(meet_max_cond, meet_min_cond) neg_smooth_log_prob = -smooth_prob(disjoint_box_log) # because input to log1mexp is positive cond = tf.squeeze(cond) cond.set_shape([None]) nume_min = tf.squeeze(nume_min) nume_max = tf.squeeze(nume_max) onemp_neg_smooth_log_prob = -tf_utils.log1mexp(neg_smooth_log_prob) # onemp_neg_smooth_log_prob = tf.Print(onemp_neg_smooth_log_prob, [neg_smooth_log_prob, onemp_neg_smooth_log_prob, tf.boolean_mask(tf.squeeze(nume_min), cond), tf.boolean_mask(tf.squeeze(nume_max), cond)], 'debug') return onemp_neg_smooth_log_prob
def lambda_batch_disjoint_box(join_min, join_max, meet_min, meet_max, a, b, c, d): # return log of disjoint dimension errors # <a, b> is the min and max embedding of specific term, <c, d> is the min and max embedding of general term cond = tf.less_equal(meet_max, meet_min) # batchsize * embed_size # choose all those dimensions with true condtions temp_zero = tf.zeros_like(meet_min) temp_one = tf.ones_like(meet_min) meet_min_cond = tf.where(cond, meet_min, temp_one) #batchsize * embed_size meet_max_cond = tf.where(cond, meet_max, temp_zero) #batchsize * embed_size disjoint_box_log = batch_log_prob(meet_max_cond, meet_min_cond) smooth_disjoint = smooth_prob(disjoint_box_log) baseline_meet_min = tf.where(cond, temp_zero, meet_min) baseline_meet_max = tf.where(cond, temp_zero, meet_max) # neg_smooth_log_prob = -smooth_prob(disjoint_box_log) # because input to log1mexp is positive # disjoint_box_log = tf.Print(disjoint_box_log, [disjoint_box_log, smooth_disjoint], 'disjoint box log') onemp_neg_smooth_log_prob = -tf_utils.log1mexp(-smooth_disjoint) # onemp_neg_smooth_log_prob = tf.Print(onemp_neg_smooth_log_prob, [onemp_neg_smooth_log_prob], 'oneemp_neg_smooth_log_prob') onemp_neg_smooth_log_prob += lambda_batch_log_prob_with_meet( baseline_meet_min, baseline_meet_max, a, b, c, d) return onemp_neg_smooth_log_prob
def __init__(self, data, placeholder, FLAGS): self.optimizer = FLAGS.optimizer self.opti_epsilon = FLAGS.epsilon self.lr = FLAGS.learning_rate self.vocab_size = data.vocab_size self.measure = FLAGS.measure self.embed_dim = FLAGS.embed_dim self.batch_size = FLAGS.batch_size self.rel_size = FLAGS.rel_size self.tuple_model = FLAGS.tuple_model self.init_embedding = FLAGS.init_embedding self.rang = tf.range(0, FLAGS.batch_size, 1) self.temperature = tf.Variable(FLAGS.temperature, trainable=False) self.decay_rate = FLAGS.decay_rate self.log_space = FLAGS.log_space # LSTM Params self.term = FLAGS.term self.hidden_dim = FLAGS.hidden_dim self.peephole = FLAGS.peephole self.freeze_grad = FLAGS.freeze_grad self.regularization_method = FLAGS.regularization_method self.marginal_method = FLAGS.marginal_method self.t1x = placeholder['t1_idx_placeholder'] self.t1mask = placeholder['t1_msk_placeholder'] self.t1length = placeholder['t1_length_placeholder'] self.t2x = placeholder['t2_idx_placeholder'] self.t2mask = placeholder['t2_msk_placeholder'] self.t2length = placeholder['t2_length_placeholder'] self.rel = placeholder['rel_placeholder'] self.relmsk = placeholder['rel_msk_placeholder'] self.label = placeholder['label_placeholder'] """Initiate box embeddings""" # init to random values at the start self.min_embed, self.delta_embed = self.init_word_embedding(data) # project the init embeddings to be constrained within the unit-hypercube self.projector = unit_cube.MinMaxHyperCubeProjectorDeltaParam( self.min_embed, self.delta_embed, 0.0, 1e-10) self.project_op = self.projector.project_op """get unit box representation for both term, no matter they are phrases or words""" # For the terms-1 and terms-2 set the min and max embeds self.t1_min_embed, self.t1_max_embed, self.t2_min_embed, self.t2_max_embed = self.get_word_embedding( self.t1x, self.t2x) """get negative example unit box representation, if it's randomly generated during training.""" if FLAGS.neg == 'uniform': neg_num = 1 self.nt1x = tf.random_uniform([self.batch_size * neg_num, 1], 0, self.vocab_size, dtype=tf.int32) self.nt2x = tf.random_uniform([self.batch_size * neg_num, 1], 0, self.vocab_size, dtype=tf.int32) self.nt1_min_embed, self.nt1_max_embed, self.nt2_min_embed, self.nt2_max_embed = self.get_word_embedding( self.nt1x, self.nt2x) # combine the original word embedding with the new embeddings. self.nt1_min_embed = tf.concat( [tf.tile(self.t1_min_embed, [neg_num, 1]), self.nt1_min_embed], axis=0) self.nt1_max_embed = tf.concat( [tf.tile(self.t1_max_embed, [neg_num, 1]), self.nt1_max_embed], axis=0) self.nt2_min_embed = tf.concat( [self.nt2_min_embed, tf.tile(self.t2_min_embed, [neg_num, 1])], axis=0) self.nt2_max_embed = tf.concat( [self.nt2_max_embed, tf.tile(self.t2_max_embed, [neg_num, 1])], axis=0) self.label = tf.concat( [self.label, tf.zeros([self.batch_size * neg_num * 2])], 0) self.t1_uniform_min_embed = tf.concat( [self.t1_min_embed, self.nt1_min_embed], axis=0) self.t1_uniform_max_embed = tf.concat( [self.t1_max_embed, self.nt1_max_embed], axis=0) self.t2_uniform_min_embed = tf.concat( [self.t2_min_embed, self.nt2_min_embed], axis=0) self.t2_uniform_max_embed = tf.concat( [self.t2_max_embed, self.nt2_max_embed], axis=0) conditional_logits, self.meet_min, self.meet_max, self.disjoint, self.nested, self.overlap_volume, self.rhs_volume = self.get_conditional_probability( self.t1_uniform_min_embed, self.t1_uniform_max_embed, self.t2_uniform_min_embed, self.t2_uniform_max_embed) else: conditional_logits, self.meet_min, self.meet_max, self.disjoint, self.nested, self.overlap_volume, self.rhs_volume = self.get_conditional_probability( self.t1_min_embed, self.t1_max_embed, self.t2_min_embed, self.t2_max_embed) evaluation_logits, _, _, _, _, _, _ = self.get_conditional_probability( self.t1_min_embed, self.t1_max_embed, self.t2_min_embed, self.t2_max_embed) self.eval_prob = -evaluation_logits """get conditional probability loss""" # self.cond_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = self.label, logits=conditional_logits)) self.cond_loss = -tf.reduce_mean( tf.multiply(conditional_logits, self.label) + tf.multiply(tf.log(1 - tf.exp(conditional_logits) + 1e-10), 1 - self.label)) if FLAGS.useLossKL: # Subtract the entropy of labels to make it equivalent to KL self.cond_entropy = -tf.reduce_mean( tf.multiply(tf.log(self.label), self.label) + tf.multiply(tf.log(1 - self.label + 1e-10), (1 - self.label + 1e-10))) self.cond_loss -= self.cond_entropy # make the BCE loss to KL self.cond_loss = FLAGS.w1 * self.cond_loss """model marg prob loss""" if FLAGS.w2 > 0.0: if self.log_space: self.max_embed = self.min_embed + tf.exp(self.delta_embed) else: self.max_embed = self.min_embed + self.delta_embed if self.marginal_method == 'universe': self.universe_min = tf.reduce_min(self.min_embed, axis=0, keep_dims=True) self.universe_max = tf.reduce_max(self.max_embed, axis=0, keep_dims=True) self.universe_volume = tf.reduce_prod(tf.nn.softplus( (self.universe_max - self.universe_min) / self.temperature) * self.temperature, axis=-1) self.box_volume = tf.reduce_prod(tf.nn.softplus( (self.max_embed - self.min_embed) / self.temperature) * self.temperature, axis=-1) self.predicted_marginal_logits = tf.log( self.box_volume) - tf.log(self.universe_volume) elif self.marginal_method == 'softplus': self.box_volume = tf.reduce_prod(unit_cube.normalized_softplus( self.delta_embed, self.temperature), axis=-1) self.predicted_marginal_logits = tf.log(self.box_volume) elif self.marginal_method == 'sigmoid': self.box_volume = tf.reduce_prod( unit_cube.sigmoid_normalized_softplus( self.delta_embed, self.temperature), axis=-1) self.predicted_marginal_logits = tf.log(self.box_volume) else: raise ValueError( "Expected either softplus or universe but received", self.marginal_method) self.marginal_probability = tf.constant(data.margina_prob) self.marginal_probability = tf.reshape(self.marginal_probability, [self.vocab_size]) self.marg_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=self.marginal_probability, logits=self.predicted_marginal_logits)) # self.marg_loss = -tf.reduce_mean(tf.multiply(self.predicted_marginal_logits, self.marginal_probability) + # tf.multiply(tf.log(1-tf.exp(self.predicted_marginal_logits)+1e-10), 1-self.marginal_probability)) if FLAGS.useLossKL: # Subtract the entropy of labels to make it equivalent to KL self.marg_entropy = -tf.reduce_mean( tf.multiply(tf.log(self.marginal_probability), self.marginal_probability) + tf.multiply(tf.log(1 - self.marginal_probability + 1e-10), (1 - self.marginal_probability + 1e-10))) self.marg_loss -= self.marg_entropy # make the BCE loss to KL self.marg_loss = FLAGS.w2 * self.marg_loss else: self.marg_loss = tf.constant(0.0) self.debug = tf.constant(0.0) self.temperature_update = tf.assign_sub(self.temperature, FLAGS.decay_rate) if FLAGS.debug: # """model cond prob loss""" self.pos_disjoint = tf.logical_and(tf.cast(self.label, tf.bool), self.disjoint) self.pos_overlap = tf.logical_and(tf.cast(self.label, tf.bool), tf.logical_not(self.disjoint)) self.neg_disjoint = tf.logical_and( tf.logical_not(tf.cast(self.label, tf.bool)), self.disjoint) self.neg_overlap = tf.logical_and( tf.logical_not(tf.cast(self.label, tf.bool)), tf.logical_not(self.disjoint)) self.pos_nested = tf.logical_and(tf.cast(self.label, tf.bool), self.nested) self.neg_nested = tf.logical_and( tf.logical_not(tf.cast(self.label, tf.bool)), self.nested) self.pos_disjoint.set_shape([None]) self.neg_disjoint.set_shape([None]) self.pos_overlap.set_shape([None]) self.neg_overlap.set_shape([None]) self.pos_nested.set_shape([None]) self.neg_nested.set_shape([None]) if self.marginal_method == 'universe': lhs_volume = tf.reduce_prod(tf.nn.softplus( (self.t2_max_embed - self.t2_min_embed) / self.temperature) * self.temperature, axis=-1) logx = tf.log(rhs_volume) - tf.log(self.universe_volume) logy = tf.log(lhs_volume) - tf.log(self.universe_volume) logxy = tf.log(overlap_volume) - tf.log(self.universe_volume) elif self.marginal_method == 'softplus': logx = tf.log( tf.reduce_prod(unit_cube.normalized_softplus( (self.t1_max_embed - self.t1_min_embed), self.temperature), axis=-1)) logy = tf.log( tf.reduce_prod(unit_cube.normalized_softplus( (self.t2_max_embed - self.t2_min_embed), self.temperature), axis=-1)) logxy = tf.log( tf.reduce_prod(unit_cube.normalized_softplus( (self.meet_max - self.meet_min), self.temperature), axis=-1)) elif self.marginal_method == 'sigmoid': logx = tf.log( tf.reduce_prod(unit_cube.sigmoid_normalized_softplus( (self.t1_max_embed - self.t1_min_embed), self.temperature), axis=-1)) logy = tf.log( tf.reduce_prod(unit_cube.sigmoid_normalized_softplus( (self.t2_max_embed - self.t2_min_embed), self.temperature), axis=-1)) logxy = tf.log( tf.reduce_prod(unit_cube.sigmoid_normalized_softplus( (self.meet_max - self.meet_min), self.temperature), axis=-1)) else: raise ValueError( "Expected either softplus or universe but received", self.marginal_method) lognume1 = logxy lognume2 = logx + logy logdomi = 0.5 * (logx + logy + tf_utils.log1mexp(-logx) + tf_utils.log1mexp(-logy)) correlation = tf.exp(lognume1 - logdomi) - tf.exp(lognume2 - logdomi) self.marg_loss = tf.Print(self.marg_loss, [ tf.exp(self.predicted_marginal_logits), self.marginal_probability, self.box_volume ], 'marginal prediction and label') self.cond_loss = tf.Print(self.cond_loss, [tf.exp(conditional_logits), self.label], 'conditional prediction and label') self.cond_loss = tf.Print(self.cond_loss, [ tf.reduce_sum(tf.cast(self.pos_nested, tf.int32)), tf.boolean_mask(tf.exp(conditional_logits), self.pos_nested) ], 'pos nested number') self.cond_loss = tf.Print(self.cond_loss, [ tf.reduce_sum(tf.cast(self.neg_nested, tf.int32)), tf.boolean_mask(tf.exp(conditional_logits), self.neg_nested) ], 'neg nested number') self.cond_loss = tf.Print(self.cond_loss, [ tf.reduce_mean( tf.boolean_mask(tf.exp(conditional_logits), self.pos_disjoint)), tf.reduce_sum(tf.cast(self.pos_disjoint, tf.int32)), tf.count_nonzero( tf.less_equal( tf.boolean_mask(correlation, self.pos_disjoint), 0)), tf.reduce_mean( tf.boolean_mask(tf.exp(logxy), self.pos_disjoint)), tf.reduce_mean(tf.boolean_mask(tf.exp(logx), self.pos_disjoint)), tf.boolean_mask(self.t2_max_embed, self.pos_disjoint), tf.boolean_mask(self.t2_min_embed, self.pos_disjoint) ], 'pos disjoint loss') self.cond_loss = tf.Print(self.cond_loss, [ tf.reduce_mean( tf.boolean_mask(tf.exp(conditional_logits), self.pos_overlap)), tf.reduce_sum(tf.cast(self.pos_overlap, tf.int32)), tf.count_nonzero( tf.less_equal( tf.boolean_mask(correlation, self.pos_overlap), 0)), tf.reduce_mean(tf.boolean_mask(tf.exp(logxy), self.pos_overlap)), tf.reduce_mean(tf.boolean_mask(tf.exp(logx), self.pos_overlap)) ], 'pos overlap loss') self.cond_loss = tf.Print(self.cond_loss, [ tf.reduce_mean( tf.boolean_mask(tf.exp(conditional_logits), self.neg_disjoint)), tf.reduce_sum(tf.cast(self.neg_disjoint, tf.int32)), tf.count_nonzero( tf.less_equal( tf.boolean_mask(correlation, self.neg_disjoint), 0)), tf.reduce_mean( tf.boolean_mask(tf.exp(logxy), self.neg_disjoint)), tf.reduce_mean(tf.boolean_mask(tf.exp(logx), self.neg_disjoint)) ], 'neg disjoint loss') self.cond_loss = tf.Print(self.cond_loss, [ tf.reduce_mean( tf.boolean_mask(tf.exp(conditional_logits), self.neg_overlap)), tf.reduce_sum(tf.cast(self.neg_overlap, tf.int32)), tf.count_nonzero( tf.less_equal( tf.boolean_mask(correlation, self.neg_overlap), 0)), tf.boolean_mask(self.t1x, self.neg_overlap), tf.boolean_mask(self.t2x, self.neg_overlap), tf.reduce_mean(tf.boolean_mask(tf.exp(logxy), self.neg_overlap)), tf.reduce_mean(tf.boolean_mask(tf.exp(logx), self.neg_overlap)) ], 'neg overlap loss') """model regurlization""" if self.regularization_method == 'universe_edge' and FLAGS.r1 > 0.0: self.regularization = FLAGS.r1 * tf.reduce_mean( tf.nn.softplus(self.universe_max - self.universe_min)) elif self.regularization_method == 'delta' and FLAGS.r1 > 0.0: if self.log_space: self.regularization = FLAGS.r1 * tf.reduce_mean( tf.square(tf.exp(self.delta_embed))) else: self.regularization = FLAGS.r1 * tf.reduce_mean( tf.square(self.delta_embed)) else: self.regularization = tf.constant(0.0) """model final loss""" self.loss = self.cond_loss + self.marg_loss + self.regularization """loss gradient""" grads = tf.gradients(self.loss, tf.trainable_variables()) grad_norm = 0.0 for g in grads: grad_norm += tf.reduce_sum(g.values * g.values) grad_norm = tf.sqrt(grad_norm) self.grad_norm = grad_norm