def _get_train_op(self, loss): """ for model that gradient can be computed with respect to loss, e.g., LogisticRegression and RankNet """ with tf.name_scope("optimization"): if self.params["optimizer_type"] == "nadam": optimizer = NadamOptimizer( learning_rate=self.learning_rate, beta1=self.params["beta1"], beta2=self.params["beta2"], epsilon=1e-8, schedule_decay=self.params["schedule_decay"]) elif self.params["optimizer_type"] == "adam": optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate, beta1=self.params["beta1"], beta2=self.params["beta2"], epsilon=1e-8) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step=self.global_step) return train_op
def _build_factorized_model(self): # score score = self._score_fn(self.feature) # S_ij = self.label - tf.transpose(self.label) S_ij = tf.maximum(tf.minimum(1., S_ij), -1.) P_ij = (1 / 2) * (1 + S_ij) s_i_minus_s_j = logits = score - tf.transpose(score) sigma = self.params["sigma"] lambda_ij = sigma * ( (1 / 2) * (1 - S_ij) - tf.nn.sigmoid(-sigma * s_i_minus_s_j)) logloss = tf.nn.sigmoid_cross_entropy_with_logits(logits=s_i_minus_s_j, labels=P_ij) # only extracted the loss of pairs of the same group mask1 = tf.equal(self.qid - tf.transpose(self.qid), 0) mask1 = tf.cast(mask1, tf.float32) # exclude the pair of sample and itself n = tf.shape(self.feature)[0] mask2 = tf.ones([n, n]) - tf.diag(tf.ones([n])) mask = mask1 * mask2 num_pairs = tf.reduce_sum(mask) loss = tf.cond(tf.equal(num_pairs, 0), lambda: 0., lambda: tf.reduce_sum(logloss * mask) / num_pairs) lambda_ij = lambda_ij * mask vars = tf.trainable_variables() grads = [ self._get_derivative(score, Wk, lambda_ij, self.feature) for Wk in vars ] with tf.name_scope("optimization"): if self.params["optimizer_type"] == "nadam": optimizer = NadamOptimizer( learning_rate=self.learning_rate, beta1=self.params["beta1"], beta2=self.params["beta2"], epsilon=1e-8, schedule_decay=self.params["schedule_decay"]) elif self.params["optimizer_type"] == "adam": optimizer = tf.compat.v1.train.AdamOptimizer( learning_rate=self.learning_rate, beta1=self.params["beta1"], beta2=self.params["beta2"], epsilon=1e-8) update_ops = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.apply_gradients(zip(grads, vars)) return loss, num_pairs, score, train_op
def _get_train_op(self): with tf.name_scope("optimization"): if self.params["optimizer_type"] == "nadam": optimizer = NadamOptimizer( learning_rate=self.learning_rate, beta1=self.params["beta1"], beta2=self.params["beta2"], epsilon=1e-8, schedule_decay=self.params["schedule_decay"]) elif self.params["optimizer_type"] == "adam": optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate, beta1=self.params["beta1"], beta2=self.params["beta2"], epsilon=1e-8) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(self.loss, global_step=self.global_step) return train_op
def _build_model(self): # score score = self._score_fn(self.feature) # S_ij = self.label - tf.transpose(self.label) S_ij = tf.maximum(tf.minimum(1., S_ij), -1.) P_ij = (1 / 2) * (1 + S_ij) s_i_minus_s_j = logits = score - tf.transpose(score) sigma = self.params["sigma"] lambda_ij = sigma * ((1 / 2) * (1 - S_ij) - tf.nn.sigmoid(-sigma*s_i_minus_s_j)) # lambda_ij = -sigma * tf.nn.sigmoid(-sigma*s_i_minus_s_j) logloss = tf.nn.sigmoid_cross_entropy_with_logits(logits=s_i_minus_s_j, labels=P_ij) # only extracted the loss of pairs of the same group mask1 = tf.equal(self.qid - tf.transpose(self.qid), 0) mask1 = tf.cast(mask1, tf.float32) # exclude the pair of sample and itself n = tf.shape(self.feature)[0] mask2 = tf.ones([n, n]) - tf.diag(tf.ones([n])) mask = mask1 * mask2 num_pairs = tf.reduce_sum(mask) loss = tf.cond(tf.equal(num_pairs, 0), lambda: 0., lambda: tf.reduce_sum(logloss * mask) / num_pairs) lambda_ij = lambda_ij * mask # multiply by delta ndcg # current dcg index = tf.reshape(tf.range(1., tf.cast(self.batch_size, dtype=tf.float32) + 1), tf.shape(self.label)) cg_discount = tf.log(1. + index) rel = 2 ** self.label - 1 sorted_rel = 2 ** self.sorted_label - 1 dcg_m = rel / cg_discount dcg = tf.reduce_sum(dcg_m) # every possible swapped dcg stale_ij = tf.tile(dcg_m, [1, self.batch_size]) new_ij = rel / tf.transpose(cg_discount) stale_ji = tf.transpose(stale_ij) new_ji = tf.transpose(new_ij) # new dcg dcg_new = dcg - stale_ij + new_ij - stale_ji + new_ji # delta ndcg # sorted_label = tf.contrib.framework.sort(self.label, direction="DESCENDING") dcg_max = tf.reduce_sum(sorted_rel / cg_discount) ndcg_delta = tf.abs(dcg_new - dcg) / dcg_max lambda_ij = lambda_ij * ndcg_delta vars = tf.trainable_variables() grads = [self._get_derivative(score, Wk, lambda_ij, self.feature) for Wk in vars] with tf.name_scope("optimization"): if self.params["optimizer_type"] == "nadam": optimizer = NadamOptimizer(learning_rate=self.learning_rate, beta1=self.params["beta1"], beta2=self.params["beta2"], epsilon=1e-8, schedule_decay=self.params["schedule_decay"]) elif self.params["optimizer_type"] == "adam": optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=self.params["beta1"], beta2=self.params["beta2"], epsilon=1e-8) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.apply_gradients(zip(grads, vars)) return loss, num_pairs, score, train_op