示例#1
0
    def _get_train_op(self, loss):
        """
        for model that gradient can be computed with respect to loss, e.g., LogisticRegression and RankNet
        """
        with tf.name_scope("optimization"):
            if self.params["optimizer_type"] == "nadam":
                optimizer = NadamOptimizer(
                    learning_rate=self.learning_rate,
                    beta1=self.params["beta1"],
                    beta2=self.params["beta2"],
                    epsilon=1e-8,
                    schedule_decay=self.params["schedule_decay"])
            elif self.params["optimizer_type"] == "adam":
                optimizer = tf.train.AdamOptimizer(
                    learning_rate=self.learning_rate,
                    beta1=self.params["beta1"],
                    beta2=self.params["beta2"],
                    epsilon=1e-8)

            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                train_op = optimizer.minimize(loss,
                                              global_step=self.global_step)

        return train_op
示例#2
0
    def _build_factorized_model(self):
        # score
        score = self._score_fn(self.feature)

        #
        S_ij = self.label - tf.transpose(self.label)
        S_ij = tf.maximum(tf.minimum(1., S_ij), -1.)
        P_ij = (1 / 2) * (1 + S_ij)
        s_i_minus_s_j = logits = score - tf.transpose(score)
        sigma = self.params["sigma"]
        lambda_ij = sigma * (
            (1 / 2) * (1 - S_ij) - tf.nn.sigmoid(-sigma * s_i_minus_s_j))

        logloss = tf.nn.sigmoid_cross_entropy_with_logits(logits=s_i_minus_s_j,
                                                          labels=P_ij)

        # only extracted the loss of pairs of the same group
        mask1 = tf.equal(self.qid - tf.transpose(self.qid), 0)
        mask1 = tf.cast(mask1, tf.float32)
        # exclude the pair of sample and itself
        n = tf.shape(self.feature)[0]
        mask2 = tf.ones([n, n]) - tf.diag(tf.ones([n]))
        mask = mask1 * mask2
        num_pairs = tf.reduce_sum(mask)

        loss = tf.cond(tf.equal(num_pairs, 0), lambda: 0.,
                       lambda: tf.reduce_sum(logloss * mask) / num_pairs)

        lambda_ij = lambda_ij * mask

        vars = tf.trainable_variables()
        grads = [
            self._get_derivative(score, Wk, lambda_ij, self.feature)
            for Wk in vars
        ]

        with tf.name_scope("optimization"):
            if self.params["optimizer_type"] == "nadam":
                optimizer = NadamOptimizer(
                    learning_rate=self.learning_rate,
                    beta1=self.params["beta1"],
                    beta2=self.params["beta2"],
                    epsilon=1e-8,
                    schedule_decay=self.params["schedule_decay"])
            elif self.params["optimizer_type"] == "adam":
                optimizer = tf.compat.v1.train.AdamOptimizer(
                    learning_rate=self.learning_rate,
                    beta1=self.params["beta1"],
                    beta2=self.params["beta2"],
                    epsilon=1e-8)

            update_ops = tf.compat.v1.get_collection(
                tf.compat.v1.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                train_op = optimizer.apply_gradients(zip(grads, vars))

        return loss, num_pairs, score, train_op
示例#3
0
    def _get_train_op(self):
        with tf.name_scope("optimization"):
            if self.params["optimizer_type"] == "nadam":
                optimizer = NadamOptimizer(
                    learning_rate=self.learning_rate,
                    beta1=self.params["beta1"],
                    beta2=self.params["beta2"],
                    epsilon=1e-8,
                    schedule_decay=self.params["schedule_decay"])
            elif self.params["optimizer_type"] == "adam":
                optimizer = tf.train.AdamOptimizer(
                    learning_rate=self.learning_rate,
                    beta1=self.params["beta1"],
                    beta2=self.params["beta2"],
                    epsilon=1e-8)

            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                train_op = optimizer.minimize(self.loss,
                                              global_step=self.global_step)

        return train_op
示例#4
0
    def _build_model(self):
        # score
        score = self._score_fn(self.feature)

        #
        S_ij = self.label - tf.transpose(self.label)
        S_ij = tf.maximum(tf.minimum(1., S_ij), -1.)
        P_ij = (1 / 2) * (1 + S_ij)
        s_i_minus_s_j = logits = score - tf.transpose(score)
        sigma = self.params["sigma"]
        lambda_ij = sigma * ((1 / 2) * (1 - S_ij) - tf.nn.sigmoid(-sigma*s_i_minus_s_j))
        # lambda_ij = -sigma * tf.nn.sigmoid(-sigma*s_i_minus_s_j)

        logloss = tf.nn.sigmoid_cross_entropy_with_logits(logits=s_i_minus_s_j, labels=P_ij)

        # only extracted the loss of pairs of the same group
        mask1 = tf.equal(self.qid - tf.transpose(self.qid), 0)
        mask1 = tf.cast(mask1, tf.float32)
        # exclude the pair of sample and itself
        n = tf.shape(self.feature)[0]
        mask2 = tf.ones([n, n]) - tf.diag(tf.ones([n]))
        mask = mask1 * mask2
        num_pairs = tf.reduce_sum(mask)

        loss = tf.cond(tf.equal(num_pairs, 0), lambda: 0., lambda: tf.reduce_sum(logloss * mask) / num_pairs)

        lambda_ij = lambda_ij * mask

        # multiply by delta ndcg
        # current dcg
        index = tf.reshape(tf.range(1., tf.cast(self.batch_size, dtype=tf.float32) + 1), tf.shape(self.label))
        cg_discount = tf.log(1. + index)
        rel = 2 ** self.label - 1
        sorted_rel = 2 ** self.sorted_label - 1
        dcg_m = rel / cg_discount
        dcg = tf.reduce_sum(dcg_m)
        # every possible swapped dcg
        stale_ij = tf.tile(dcg_m, [1, self.batch_size])
        new_ij = rel / tf.transpose(cg_discount)
        stale_ji = tf.transpose(stale_ij)
        new_ji = tf.transpose(new_ij)
        # new dcg
        dcg_new = dcg - stale_ij + new_ij - stale_ji + new_ji
        # delta ndcg
        # sorted_label = tf.contrib.framework.sort(self.label, direction="DESCENDING")
        dcg_max = tf.reduce_sum(sorted_rel / cg_discount)
        ndcg_delta = tf.abs(dcg_new - dcg) / dcg_max
        lambda_ij = lambda_ij * ndcg_delta

        vars = tf.trainable_variables()
        grads = [self._get_derivative(score, Wk, lambda_ij, self.feature) for Wk in vars]

        with tf.name_scope("optimization"):
            if self.params["optimizer_type"] == "nadam":
                optimizer = NadamOptimizer(learning_rate=self.learning_rate, beta1=self.params["beta1"],
                                           beta2=self.params["beta2"], epsilon=1e-8,
                                           schedule_decay=self.params["schedule_decay"])
            elif self.params["optimizer_type"] == "adam":
                optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=self.params["beta1"],
                                                   beta2=self.params["beta2"], epsilon=1e-8)

            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                train_op = optimizer.apply_gradients(zip(grads, vars))

        return loss, num_pairs, score, train_op