示例#1
0
def lambda_batch_log_1minus_prob(nume_min, nume_max, domi_min, domi_max, a, b,
                                 c, d, e, f):
    # this function return the -log(1-p(a, b))
    # we want to minimize this value
    joint_log = batch_log_prob(nume_min, nume_max)
    domi_log = batch_log_prob(domi_min, domi_max)  # batch_size
    cond_log = joint_log - domi_log  # (batch_size)
    neg_smooth_log_prob = -smooth_prob(cond_log)
    # because input to log1mexp is positive
    onemp_neg_smooth_log_prob = -tf_utils.log1mexp(neg_smooth_log_prob)
    return onemp_neg_smooth_log_prob
示例#2
0
def lambda_batch_log_1minus_prob(join_min, join_max, meet_min, meet_max, a, b,
                                 c, d):
    # this function return the -log(1-p(a, b))
    # we want to minimize this value
    joint_log = batch_log_prob(meet_min, meet_max)
    domi_log = batch_log_prob(a, b)  # batch_size
    cond_log = joint_log - domi_log  # (batch_size)
    neg_smooth_log_prob = -smooth_prob(cond_log)
    # because input to log1mexp is positive
    # neg_smooth_log_prob = tf.Print(neg_smooth_log_prob, [tf.reduce_sum(neg_smooth_log_prob)], 'neg debug')
    onemp_neg_smooth_log_prob = -tf_utils.log1mexp(neg_smooth_log_prob)
    # onemp_neg_smooth_log_prob = tf.Print(onemp_neg_smooth_log_prob, [tf.reduce_sum(onemp_neg_smooth_log_prob)], 'onem')
    return onemp_neg_smooth_log_prob
示例#3
0
def batch_log_upper_bound(join_min, join_max, a, b, c, d):
    # join_min: batchsize * embed_size
    # join_max: batchsize * embed_size
    # log_prob: batch_size
    join_log_prob = batch_log_prob(join_min, join_max)
    join_log_prob_new = tf.reduce_logsumexp(tf.stack(
        [tf.fill([tf.shape(join_log_prob)[0]], tf.log(0.1)), join_log_prob],
        axis=1),
                                            axis=1)
    x_log_prob = batch_log_prob(a, b)  # batchsize
    y_log_prob = batch_log_prob(c, d)  # batchsize
    log_xy = tf.reduce_logsumexp(tf.stack([x_log_prob, y_log_prob], axis=1),
                                 axis=1)
    log_upper_bound = join_log_prob_new + tf_utils.log1mexp(join_log_prob_new -
                                                            log_xy)
    return log_upper_bound
示例#4
0
def lambda_batch_disjoint_box(join_min, join_max, meet_min, meet_max, a, b, c,
                              d):
    # return log of disjoint dimension errors
    # <a, b> is the min and max embedding of specific term, <c, d> is the min and max embedding of general term
    cond = tf.less_equal(meet_max, meet_min)  # batchsize * embed_size
    # choose all those dimensions with true condtions
    temp_zero = tf.zeros_like(meet_min)
    meet_min_cond = tf.where(cond, meet_min,
                             temp_zero)  #batchsize * embed_size
    meet_max_cond = tf.where(cond, meet_max,
                             temp_zero)  #batchsize * embed_size
    disjoint_box_log = batch_log_prob(meet_max_cond, meet_min_cond)
    # neg_smooth_log_prob = -smooth_prob(disjoint_box_log)
    # because input to log1mexp is positive
    onemp_neg_smooth_log_prob = -tf_utils.log1mexp(-disjoint_box_log)
    return onemp_neg_smooth_log_prob
示例#5
0
def batch_log_prob(min_embed, max_embed):
    # min_embed: batchsize * embed_size
    # max_embed: batchsize * embed_size
    # log_prob: batch_size
    # numerically stable log probability of a cube probability
    if FLAGS.measure == 'uniform':
        if FLAGS.model == 'poe':
            log_prob = tf.reduce_sum(
                tf.log(tf.ones_like(max_embed) - min_embed + 1e-8), axis=1)
        elif FLAGS.model == 'cube':
            log_prob = tf.reduce_sum(tf.log((max_embed - min_embed) + 1e-8),
                                     axis=1)
        else:
            raise ValueError('Expected poe or cube, but receive', FLAGS.model)
    elif FLAGS.measure == 'exp':
        log_prob = tf.reduce_sum(-min_embed +
                                 tf_utils.log1mexp(max_embed - min_embed),
                                 axis=1)
    else:
        raise ValueError('Expected uniform or exp, but receive', FLAGS.measure)
    return log_prob
示例#6
0
def lambda_batch_disjoint_box(nume_min, nume_max, domi_min, domi_max, a, b, c,
                              d, e, f):
    # return log of disjoint dimension errors
    # <a, b> is the min and max embedding of specific term, <c, d> is the min and max embedding of general term
    # <e, f> is the min and max embedding of the relation term.
    cond = tf.less_equal(nume_max, nume_min)  # batchsize * embed_size
    # choose all those dimensions with true condtions
    meet_min_cond = tf.where(cond, nume_min,
                             tf.ones_like(nume_min))  #batchsize * embed_size
    meet_max_cond = tf.where(cond, nume_max,
                             tf.zeros_like(nume_min))  #batchsize * embed_size
    neg_distance = meet_min_cond - meet_max_cond
    disjoint_box_log = batch_log_prob(meet_max_cond, meet_min_cond)
    neg_smooth_log_prob = -smooth_prob(disjoint_box_log)
    # because input to log1mexp is positive
    cond = tf.squeeze(cond)
    cond.set_shape([None])
    nume_min = tf.squeeze(nume_min)
    nume_max = tf.squeeze(nume_max)
    onemp_neg_smooth_log_prob = -tf_utils.log1mexp(neg_smooth_log_prob)
    # onemp_neg_smooth_log_prob = tf.Print(onemp_neg_smooth_log_prob, [neg_smooth_log_prob, onemp_neg_smooth_log_prob, tf.boolean_mask(tf.squeeze(nume_min), cond),  tf.boolean_mask(tf.squeeze(nume_max), cond)], 'debug')

    return onemp_neg_smooth_log_prob
示例#7
0
def lambda_batch_disjoint_box(join_min, join_max, meet_min, meet_max, a, b, c,
                              d):
    # return log of disjoint dimension errors
    # <a, b> is the min and max embedding of specific term, <c, d> is the min and max embedding of general term
    cond = tf.less_equal(meet_max, meet_min)  # batchsize * embed_size
    # choose all those dimensions with true condtions
    temp_zero = tf.zeros_like(meet_min)
    temp_one = tf.ones_like(meet_min)
    meet_min_cond = tf.where(cond, meet_min, temp_one)  #batchsize * embed_size
    meet_max_cond = tf.where(cond, meet_max,
                             temp_zero)  #batchsize * embed_size
    disjoint_box_log = batch_log_prob(meet_max_cond, meet_min_cond)
    smooth_disjoint = smooth_prob(disjoint_box_log)
    baseline_meet_min = tf.where(cond, temp_zero, meet_min)
    baseline_meet_max = tf.where(cond, temp_zero, meet_max)
    # neg_smooth_log_prob = -smooth_prob(disjoint_box_log)
    # because input to log1mexp is positive
    # disjoint_box_log = tf.Print(disjoint_box_log, [disjoint_box_log, smooth_disjoint], 'disjoint box log')
    onemp_neg_smooth_log_prob = -tf_utils.log1mexp(-smooth_disjoint)
    # onemp_neg_smooth_log_prob = tf.Print(onemp_neg_smooth_log_prob, [onemp_neg_smooth_log_prob], 'oneemp_neg_smooth_log_prob')
    onemp_neg_smooth_log_prob += lambda_batch_log_prob_with_meet(
        baseline_meet_min, baseline_meet_max, a, b, c, d)
    return onemp_neg_smooth_log_prob
示例#8
0
    def __init__(self, data, placeholder, FLAGS):
        self.optimizer = FLAGS.optimizer
        self.opti_epsilon = FLAGS.epsilon
        self.lr = FLAGS.learning_rate
        self.vocab_size = data.vocab_size
        self.measure = FLAGS.measure
        self.embed_dim = FLAGS.embed_dim
        self.batch_size = FLAGS.batch_size
        self.rel_size = FLAGS.rel_size
        self.tuple_model = FLAGS.tuple_model
        self.init_embedding = FLAGS.init_embedding
        self.rang = tf.range(0, FLAGS.batch_size, 1)
        self.temperature = tf.Variable(FLAGS.temperature, trainable=False)
        self.decay_rate = FLAGS.decay_rate
        self.log_space = FLAGS.log_space
        # LSTM Params
        self.term = FLAGS.term
        self.hidden_dim = FLAGS.hidden_dim
        self.peephole = FLAGS.peephole
        self.freeze_grad = FLAGS.freeze_grad
        self.regularization_method = FLAGS.regularization_method
        self.marginal_method = FLAGS.marginal_method

        self.t1x = placeholder['t1_idx_placeholder']
        self.t1mask = placeholder['t1_msk_placeholder']
        self.t1length = placeholder['t1_length_placeholder']
        self.t2x = placeholder['t2_idx_placeholder']
        self.t2mask = placeholder['t2_msk_placeholder']
        self.t2length = placeholder['t2_length_placeholder']
        self.rel = placeholder['rel_placeholder']
        self.relmsk = placeholder['rel_msk_placeholder']
        self.label = placeholder['label_placeholder']
        """Initiate box embeddings"""
        # init to random values at the start
        self.min_embed, self.delta_embed = self.init_word_embedding(data)

        # project the init embeddings to be constrained within the unit-hypercube
        self.projector = unit_cube.MinMaxHyperCubeProjectorDeltaParam(
            self.min_embed, self.delta_embed, 0.0, 1e-10)
        self.project_op = self.projector.project_op
        """get unit box representation for both term, no matter they are phrases or words"""
        # For the terms-1 and terms-2 set the min and max embeds
        self.t1_min_embed, self.t1_max_embed, self.t2_min_embed, self.t2_max_embed = self.get_word_embedding(
            self.t1x, self.t2x)
        """get negative example unit box representation, if it's randomly generated during training."""
        if FLAGS.neg == 'uniform':
            neg_num = 1
            self.nt1x = tf.random_uniform([self.batch_size * neg_num, 1],
                                          0,
                                          self.vocab_size,
                                          dtype=tf.int32)
            self.nt2x = tf.random_uniform([self.batch_size * neg_num, 1],
                                          0,
                                          self.vocab_size,
                                          dtype=tf.int32)
            self.nt1_min_embed, self.nt1_max_embed, self.nt2_min_embed, self.nt2_max_embed = self.get_word_embedding(
                self.nt1x, self.nt2x)
            # combine the original word embedding with the new embeddings.
            self.nt1_min_embed = tf.concat(
                [tf.tile(self.t1_min_embed, [neg_num, 1]), self.nt1_min_embed],
                axis=0)
            self.nt1_max_embed = tf.concat(
                [tf.tile(self.t1_max_embed, [neg_num, 1]), self.nt1_max_embed],
                axis=0)
            self.nt2_min_embed = tf.concat(
                [self.nt2_min_embed,
                 tf.tile(self.t2_min_embed, [neg_num, 1])],
                axis=0)
            self.nt2_max_embed = tf.concat(
                [self.nt2_max_embed,
                 tf.tile(self.t2_max_embed, [neg_num, 1])],
                axis=0)
            self.label = tf.concat(
                [self.label,
                 tf.zeros([self.batch_size * neg_num * 2])], 0)
            self.t1_uniform_min_embed = tf.concat(
                [self.t1_min_embed, self.nt1_min_embed], axis=0)
            self.t1_uniform_max_embed = tf.concat(
                [self.t1_max_embed, self.nt1_max_embed], axis=0)
            self.t2_uniform_min_embed = tf.concat(
                [self.t2_min_embed, self.nt2_min_embed], axis=0)
            self.t2_uniform_max_embed = tf.concat(
                [self.t2_max_embed, self.nt2_max_embed], axis=0)
            conditional_logits, self.meet_min, self.meet_max, self.disjoint, self.nested, self.overlap_volume, self.rhs_volume = self.get_conditional_probability(
                self.t1_uniform_min_embed, self.t1_uniform_max_embed,
                self.t2_uniform_min_embed, self.t2_uniform_max_embed)
        else:
            conditional_logits, self.meet_min, self.meet_max, self.disjoint, self.nested, self.overlap_volume, self.rhs_volume = self.get_conditional_probability(
                self.t1_min_embed, self.t1_max_embed, self.t2_min_embed,
                self.t2_max_embed)

        evaluation_logits, _, _, _, _, _, _ = self.get_conditional_probability(
            self.t1_min_embed, self.t1_max_embed, self.t2_min_embed,
            self.t2_max_embed)
        self.eval_prob = -evaluation_logits
        """get conditional probability loss"""
        # self.cond_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = self.label, logits=conditional_logits))
        self.cond_loss = -tf.reduce_mean(
            tf.multiply(conditional_logits, self.label) +
            tf.multiply(tf.log(1 - tf.exp(conditional_logits) + 1e-10), 1 -
                        self.label))

        if FLAGS.useLossKL:  # Subtract the entropy of labels to make it equivalent to KL
            self.cond_entropy = -tf.reduce_mean(
                tf.multiply(tf.log(self.label), self.label) +
                tf.multiply(tf.log(1 - self.label + 1e-10),
                            (1 - self.label + 1e-10)))
            self.cond_loss -= self.cond_entropy  # make the BCE loss to KL

        self.cond_loss = FLAGS.w1 * self.cond_loss
        """model marg prob loss"""
        if FLAGS.w2 > 0.0:
            if self.log_space:
                self.max_embed = self.min_embed + tf.exp(self.delta_embed)
            else:
                self.max_embed = self.min_embed + self.delta_embed

            if self.marginal_method == 'universe':
                self.universe_min = tf.reduce_min(self.min_embed,
                                                  axis=0,
                                                  keep_dims=True)
                self.universe_max = tf.reduce_max(self.max_embed,
                                                  axis=0,
                                                  keep_dims=True)
                self.universe_volume = tf.reduce_prod(tf.nn.softplus(
                    (self.universe_max - self.universe_min) / self.temperature)
                                                      * self.temperature,
                                                      axis=-1)
                self.box_volume = tf.reduce_prod(tf.nn.softplus(
                    (self.max_embed - self.min_embed) / self.temperature) *
                                                 self.temperature,
                                                 axis=-1)
                self.predicted_marginal_logits = tf.log(
                    self.box_volume) - tf.log(self.universe_volume)
            elif self.marginal_method == 'softplus':
                self.box_volume = tf.reduce_prod(unit_cube.normalized_softplus(
                    self.delta_embed, self.temperature),
                                                 axis=-1)
                self.predicted_marginal_logits = tf.log(self.box_volume)
            elif self.marginal_method == 'sigmoid':
                self.box_volume = tf.reduce_prod(
                    unit_cube.sigmoid_normalized_softplus(
                        self.delta_embed, self.temperature),
                    axis=-1)
                self.predicted_marginal_logits = tf.log(self.box_volume)
            else:
                raise ValueError(
                    "Expected either softplus or universe but received",
                    self.marginal_method)

            self.marginal_probability = tf.constant(data.margina_prob)
            self.marginal_probability = tf.reshape(self.marginal_probability,
                                                   [self.vocab_size])

            self.marg_loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=self.marginal_probability,
                    logits=self.predicted_marginal_logits))

            # self.marg_loss = -tf.reduce_mean(tf.multiply(self.predicted_marginal_logits, self.marginal_probability) +
            #                                  tf.multiply(tf.log(1-tf.exp(self.predicted_marginal_logits)+1e-10), 1-self.marginal_probability))

            if FLAGS.useLossKL:  # Subtract the entropy of labels to make it equivalent to KL
                self.marg_entropy = -tf.reduce_mean(
                    tf.multiply(tf.log(self.marginal_probability),
                                self.marginal_probability) +
                    tf.multiply(tf.log(1 - self.marginal_probability + 1e-10),
                                (1 - self.marginal_probability + 1e-10)))
                self.marg_loss -= self.marg_entropy  # make the BCE loss to KL

            self.marg_loss = FLAGS.w2 * self.marg_loss

        else:
            self.marg_loss = tf.constant(0.0)
        self.debug = tf.constant(0.0)
        self.temperature_update = tf.assign_sub(self.temperature,
                                                FLAGS.decay_rate)

        if FLAGS.debug:
            # """model cond prob loss"""
            self.pos_disjoint = tf.logical_and(tf.cast(self.label, tf.bool),
                                               self.disjoint)
            self.pos_overlap = tf.logical_and(tf.cast(self.label, tf.bool),
                                              tf.logical_not(self.disjoint))
            self.neg_disjoint = tf.logical_and(
                tf.logical_not(tf.cast(self.label, tf.bool)), self.disjoint)
            self.neg_overlap = tf.logical_and(
                tf.logical_not(tf.cast(self.label, tf.bool)),
                tf.logical_not(self.disjoint))
            self.pos_nested = tf.logical_and(tf.cast(self.label, tf.bool),
                                             self.nested)
            self.neg_nested = tf.logical_and(
                tf.logical_not(tf.cast(self.label, tf.bool)), self.nested)
            self.pos_disjoint.set_shape([None])
            self.neg_disjoint.set_shape([None])
            self.pos_overlap.set_shape([None])
            self.neg_overlap.set_shape([None])
            self.pos_nested.set_shape([None])
            self.neg_nested.set_shape([None])
            if self.marginal_method == 'universe':
                lhs_volume = tf.reduce_prod(tf.nn.softplus(
                    (self.t2_max_embed - self.t2_min_embed) / self.temperature)
                                            * self.temperature,
                                            axis=-1)
                logx = tf.log(rhs_volume) - tf.log(self.universe_volume)
                logy = tf.log(lhs_volume) - tf.log(self.universe_volume)
                logxy = tf.log(overlap_volume) - tf.log(self.universe_volume)
            elif self.marginal_method == 'softplus':
                logx = tf.log(
                    tf.reduce_prod(unit_cube.normalized_softplus(
                        (self.t1_max_embed - self.t1_min_embed),
                        self.temperature),
                                   axis=-1))
                logy = tf.log(
                    tf.reduce_prod(unit_cube.normalized_softplus(
                        (self.t2_max_embed - self.t2_min_embed),
                        self.temperature),
                                   axis=-1))
                logxy = tf.log(
                    tf.reduce_prod(unit_cube.normalized_softplus(
                        (self.meet_max - self.meet_min), self.temperature),
                                   axis=-1))
            elif self.marginal_method == 'sigmoid':
                logx = tf.log(
                    tf.reduce_prod(unit_cube.sigmoid_normalized_softplus(
                        (self.t1_max_embed - self.t1_min_embed),
                        self.temperature),
                                   axis=-1))
                logy = tf.log(
                    tf.reduce_prod(unit_cube.sigmoid_normalized_softplus(
                        (self.t2_max_embed - self.t2_min_embed),
                        self.temperature),
                                   axis=-1))
                logxy = tf.log(
                    tf.reduce_prod(unit_cube.sigmoid_normalized_softplus(
                        (self.meet_max - self.meet_min), self.temperature),
                                   axis=-1))
            else:
                raise ValueError(
                    "Expected either softplus or universe but received",
                    self.marginal_method)
            lognume1 = logxy
            lognume2 = logx + logy
            logdomi = 0.5 * (logx + logy + tf_utils.log1mexp(-logx) +
                             tf_utils.log1mexp(-logy))
            correlation = tf.exp(lognume1 - logdomi) - tf.exp(lognume2 -
                                                              logdomi)
            self.marg_loss = tf.Print(self.marg_loss, [
                tf.exp(self.predicted_marginal_logits),
                self.marginal_probability, self.box_volume
            ], 'marginal prediction and label')
            self.cond_loss = tf.Print(self.cond_loss,
                                      [tf.exp(conditional_logits), self.label],
                                      'conditional prediction and label')
            self.cond_loss = tf.Print(self.cond_loss, [
                tf.reduce_sum(tf.cast(self.pos_nested, tf.int32)),
                tf.boolean_mask(tf.exp(conditional_logits), self.pos_nested)
            ], 'pos nested number')
            self.cond_loss = tf.Print(self.cond_loss, [
                tf.reduce_sum(tf.cast(self.neg_nested, tf.int32)),
                tf.boolean_mask(tf.exp(conditional_logits), self.neg_nested)
            ], 'neg nested number')
            self.cond_loss = tf.Print(self.cond_loss, [
                tf.reduce_mean(
                    tf.boolean_mask(tf.exp(conditional_logits),
                                    self.pos_disjoint)),
                tf.reduce_sum(tf.cast(self.pos_disjoint, tf.int32)),
                tf.count_nonzero(
                    tf.less_equal(
                        tf.boolean_mask(correlation, self.pos_disjoint), 0)),
                tf.reduce_mean(
                    tf.boolean_mask(tf.exp(logxy), self.pos_disjoint)),
                tf.reduce_mean(tf.boolean_mask(tf.exp(logx),
                                               self.pos_disjoint)),
                tf.boolean_mask(self.t2_max_embed, self.pos_disjoint),
                tf.boolean_mask(self.t2_min_embed, self.pos_disjoint)
            ], 'pos disjoint loss')

            self.cond_loss = tf.Print(self.cond_loss, [
                tf.reduce_mean(
                    tf.boolean_mask(tf.exp(conditional_logits),
                                    self.pos_overlap)),
                tf.reduce_sum(tf.cast(self.pos_overlap, tf.int32)),
                tf.count_nonzero(
                    tf.less_equal(
                        tf.boolean_mask(correlation, self.pos_overlap), 0)),
                tf.reduce_mean(tf.boolean_mask(tf.exp(logxy),
                                               self.pos_overlap)),
                tf.reduce_mean(tf.boolean_mask(tf.exp(logx), self.pos_overlap))
            ], 'pos overlap loss')

            self.cond_loss = tf.Print(self.cond_loss, [
                tf.reduce_mean(
                    tf.boolean_mask(tf.exp(conditional_logits),
                                    self.neg_disjoint)),
                tf.reduce_sum(tf.cast(self.neg_disjoint, tf.int32)),
                tf.count_nonzero(
                    tf.less_equal(
                        tf.boolean_mask(correlation, self.neg_disjoint), 0)),
                tf.reduce_mean(
                    tf.boolean_mask(tf.exp(logxy), self.neg_disjoint)),
                tf.reduce_mean(tf.boolean_mask(tf.exp(logx),
                                               self.neg_disjoint))
            ], 'neg disjoint loss')

            self.cond_loss = tf.Print(self.cond_loss, [
                tf.reduce_mean(
                    tf.boolean_mask(tf.exp(conditional_logits),
                                    self.neg_overlap)),
                tf.reduce_sum(tf.cast(self.neg_overlap, tf.int32)),
                tf.count_nonzero(
                    tf.less_equal(
                        tf.boolean_mask(correlation, self.neg_overlap), 0)),
                tf.boolean_mask(self.t1x, self.neg_overlap),
                tf.boolean_mask(self.t2x, self.neg_overlap),
                tf.reduce_mean(tf.boolean_mask(tf.exp(logxy),
                                               self.neg_overlap)),
                tf.reduce_mean(tf.boolean_mask(tf.exp(logx), self.neg_overlap))
            ], 'neg overlap loss')
        """model regurlization"""
        if self.regularization_method == 'universe_edge' and FLAGS.r1 > 0.0:
            self.regularization = FLAGS.r1 * tf.reduce_mean(
                tf.nn.softplus(self.universe_max - self.universe_min))
        elif self.regularization_method == 'delta' and FLAGS.r1 > 0.0:
            if self.log_space:
                self.regularization = FLAGS.r1 * tf.reduce_mean(
                    tf.square(tf.exp(self.delta_embed)))
            else:
                self.regularization = FLAGS.r1 * tf.reduce_mean(
                    tf.square(self.delta_embed))
        else:
            self.regularization = tf.constant(0.0)
        """model final loss"""

        self.loss = self.cond_loss + self.marg_loss + self.regularization
        """loss gradient"""
        grads = tf.gradients(self.loss, tf.trainable_variables())
        grad_norm = 0.0
        for g in grads:
            grad_norm += tf.reduce_sum(g.values * g.values)
        grad_norm = tf.sqrt(grad_norm)
        self.grad_norm = grad_norm