Пример #1
0
        def custom_loss(model, x, y_perf, y_rank, sample_weight):
            """Compute loss for i-th label

            Arguments:
                model {[type]} -- [Neural network]
                x {[type]} -- [Feature vector]
                y_perf {[type]} -- [Performances]
                y_rank {[type]} -- [Rankings]
                i {[type]} -- [Label]

            Returns:
                [float64] -- [Loss]
            """
            output = model(x)
            row_indices = tf.range(tf.shape(y_rank)[0])
            y_ind = y_rank - 1
            added_indices_0 = tf.stack([row_indices, y_ind[:, 0]], axis=1)
            added_indices_1 = tf.stack([row_indices, y_ind[:, 1]], axis=1)
            y_hat_0 = tf.gather_nd(output, added_indices_0)
            y_hat_1 = tf.gather_nd(output, added_indices_1)
            reg_loss = tf.reduce_mean(
                tf.multiply(sample_weight,
                            (tf.square(tf.subtract(y_hat_0, y_perf[:, 0])))))
            reg_loss += tf.reduce_mean(
                (tf.square(tf.subtract(y_hat_1, y_perf[:, 1]))))
            rank_loss = tf.reduce_mean(
                tf.multiply(
                    sample_weight,
                    tf.square(
                        tf.maximum(0, epsilon_value - (y_hat_0 - y_hat_1)))))
            return (
                1 - lambda_value
            ) * reg_loss + lambda_value * rank_loss, reg_loss, rank_loss
Пример #2
0
def discriminative_loss(prediction, correct_label, feature_dim, delta_v,
                        delta_d, param_var, param_dist, param_reg):
    ''' Iterate over a batch of prediction/label and cumulate loss
    :return: discriminative loss and its three components
    '''

    # i: 第i个batch, i >= B时循环停止
    def cond(label, batch, out_loss, out_var, out_dist, out_reg, i):
        return tf.less(i, tf.shape(batch)[0])

    def body(label, batch, out_loss, out_var, out_dist, out_reg, i):
        disc_loss, l_var, l_dist, l_reg = discriminative_loss_single(
            prediction[i], correct_label[i], feature_dim, delta_v, delta_d,
            param_var, param_dist, param_reg)
        # 在第i个index下写进后面的value
        out_loss = out_loss.write(i, disc_loss)
        out_var = out_var.write(i, l_var)
        out_dist = out_dist.write(i, l_dist)
        out_reg = out_reg.write(i, l_reg)

        return label, batch, out_loss, out_var, out_dist, out_reg, i + 1

    # TensorArray is a data structure that support dynamic writing
    output_ta_loss = tf.TensorArray(dtype=tf.float32,
                                    size=0,
                                    dynamic_size=True)
    output_ta_var = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
    output_ta_dist = tf.TensorArray(dtype=tf.float32,
                                    size=0,
                                    dynamic_size=True)
    output_ta_reg = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)

    _, _, out_loss_op, out_var_op, out_dist_op, out_reg_op, _ = tf.while_loop(
        cond, body, [
            correct_label, prediction, output_ta_loss, output_ta_var,
            output_ta_dist, output_ta_reg, 0
        ])
    # 将array的元素堆叠成tensor
    out_loss_op = out_loss_op.stack()
    out_var_op = out_var_op.stack()
    out_dist_op = out_dist_op.stack()
    out_reg_op = out_reg_op.stack()

    disc_loss = tf.reduce_mean(out_loss_op)
    l_var = tf.reduce_mean(out_var_op)
    l_dist = tf.reduce_mean(out_dist_op)
    l_reg = tf.reduce_mean(out_reg_op)

    return disc_loss, l_var, l_dist, l_reg
Пример #3
0
        def custom_loss(model, x, y_perf, y_rank, i, sample_weights):
            """Compute loss for i-th label

            Arguments:
                model {[type]} -- [Neural network]
                x {[type]} -- [Feature vector]
                y_perf {[type]} -- [Performances]
                y_rank {[type]} -- [Rankings]
                i {[type]} -- [Label]

            Returns:
                [float64] -- [Loss]
            """
            output = model(x)
            row_indices = tf.range(tf.shape(y_rank)[0])
            y_ind = y_rank - 1
            added_indices_0 = tf.stack([row_indices, y_ind[:, 0]], axis=1)
            added_indices_1 = tf.stack([row_indices, y_ind[:, 1]], axis=1)
            y_hat_0 = tf.gather_nd(output, added_indices_0)
            y_hat_1 = tf.gather_nd(output, added_indices_1)
            y_hat = tf.gather_nd(output,
                                 tf.stack([row_indices, y_ind[:, i]], axis=1))

            reg_loss = tf.reduce_mean(
                tf.multiply(sample_weight,
                            tf.square(tf.subtract(y_hat, y_perf[:, i]))))
            # exp_utils = tf.exp(output)
            exp_utils_ordered = tf.exp(tf.stack([y_hat_1, y_hat_0], axis=1))
            exp_utils = tf.exp(output)
            # exp_utils_ordered = exp_utils[
            #     np.arange(exp_utils.shape[0])[:, np.newaxis], y_ind]
            inv_rank = tf.argsort(y_rank)
            rank_loss = 0.0
            for k in range(0, 2):
                # print("i", i, "k", k)
                # indicator = (1 - y_ind[:, i]) >= k
                indicator = inv_rank[:, i] >= k
                indicator = tf.keras.backend.repeat_elements(indicator[:,
                                                                       None],
                                                             num_labels,
                                                             axis=1)
                denominator = tf.reduce_sum(exp_utils_ordered[:, k:], axis=1)
                rank_loss = tf.add(
                    rank_loss, tf.divide(exp_utils_ordered[:, i], denominator))
            if i < 2:
                rank_loss = tf.subtract(rank_loss, 1)
            rank_loss = tf.reduce_mean(tf.multiply(sample_weight, rank_loss))
            return lambda_value * rank_loss + (1 - lambda_value) * reg_loss, reg_loss, rank_loss
Пример #4
0
    def fit(self,
            num_labels: int,
            rankings: np.ndarray,
            features: np.ndarray,
            performances: np.ndarray,
            sample_weights=None,
            lambda_value=0.5,
            num_epochs=1000,
            learning_rate=0.001,
            batch_size=32,
            seed=1,
            patience=16,
            es_val_ratio=0.3,
            regression_loss="Squared",
            reshuffle_buffer_size=1000,
            early_stop_interval=5,
            log_losses=True,
            hidden_layer_sizes=None,
            activation_function="relu"):
        """Fit the network to the given data.

        Arguments:
            num_labels {int} -- Number of labels in the ranking
            rankings {np.ndarray} -- Ranking of performances
            features {np.ndarray} -- Features
            performances {np.ndarray} -- Performances
            lambda_value {float} -- Lambda
            regression_loss {String} -- Which regression loss
            should be applied, "Squared" and "Absolute" are
            supported
        """
        tf.random.set_seed(seed)

        if sample_weights is None:
            sample_weights = np.ones(features.shape[0])

        # add one column for bias
        np.random.seed(seed)
        num_features = features.shape[1] + 1
        self.network = self.build_network(
            num_labels,
            num_features,
            hidden_layer_sizes=hidden_layer_sizes,
            activation_function=activation_function)

        self.network._make_predict_function()
        self.network.summary()
        self.loss_history = []
        self.es_val_history = []

        # add constant 1 for bias and create tf dataset
        feature_values = np.hstack((features, np.ones((features.shape[0], 1))))
        # print(feature_values.shape)
        # print(performances.shape)

        # split feature and performance data
        feature_values, performances, rankings, sample_weights = shuffle(
            feature_values,
            performances,
            rankings,
            sample_weights,
            random_state=seed)
        val_data = Dataset.from_tensor_slices(
            (feature_values[:int(es_val_ratio * feature_values.shape[0])],
             performances[:int(es_val_ratio * performances.shape[0])],
             rankings[:int(es_val_ratio * rankings.shape[0])],
             sample_weights[:int(es_val_ratio * sample_weights.shape[0])]))
        train_data = Dataset.from_tensor_slices(
            (feature_values[int(es_val_ratio * feature_values.shape[0]):],
             performances[int(es_val_ratio * performances.shape[0]):],
             rankings[int(es_val_ratio * rankings.shape[0]):],
             sample_weights[int(es_val_ratio * sample_weights.shape[0]):]))
        # print(val_data)
        # print(train_data)
        train_data = train_data.batch(batch_size)
        val_data = val_data.batch(1)

        # define custom loss function, i.e. convex combination of the of i-th partial derivative of the negative log-likelihood and squared regression error
        def custom_loss(model, x, y_perf, y_rank, i, sample_weights):
            """Compute loss for i-th label

            Arguments:
                model {[type]} -- [Neural network]
                x {[type]} -- [Feature vector]
                y_perf {[type]} -- [Performances]
                y_rank {[type]} -- [Rankings]
                i {[type]} -- [Label]

            Returns:
                [float64] -- [Loss]
            """
            output = model(x)
            row_indices = tf.range(tf.shape(y_rank)[0])
            y_ind = y_rank - 1
            added_indices_0 = tf.stack([row_indices, y_ind[:, 0]], axis=1)
            added_indices_1 = tf.stack([row_indices, y_ind[:, 1]], axis=1)
            y_hat_0 = tf.gather_nd(output, added_indices_0)
            y_hat_1 = tf.gather_nd(output, added_indices_1)
            y_hat = tf.gather_nd(output,
                                 tf.stack([row_indices, y_ind[:, i]], axis=1))

            reg_loss = tf.reduce_mean(
                tf.multiply(sample_weight,
                            tf.square(tf.subtract(y_hat, y_perf[:, i]))))
            # exp_utils = tf.exp(output)
            exp_utils_ordered = tf.exp(tf.stack([y_hat_1, y_hat_0], axis=1))
            exp_utils = tf.exp(output)
            # exp_utils_ordered = exp_utils[
            #     np.arange(exp_utils.shape[0])[:, np.newaxis], y_ind]
            inv_rank = tf.argsort(y_rank)
            rank_loss = 0.0
            for k in range(0, 2):
                # print("i", i, "k", k)
                # indicator = (1 - y_ind[:, i]) >= k
                indicator = inv_rank[:, i] >= k
                indicator = tf.keras.backend.repeat_elements(indicator[:,
                                                                       None],
                                                             num_labels,
                                                             axis=1)
                denominator = tf.reduce_sum(exp_utils_ordered[:, k:], axis=1)
                rank_loss = tf.add(
                    rank_loss, tf.divide(exp_utils_ordered[:, i], denominator))
            if i < 2:
                rank_loss = tf.subtract(rank_loss, 1)
            rank_loss = tf.reduce_mean(tf.multiply(sample_weight, rank_loss))
            return lambda_value * rank_loss + (1 - lambda_value) * reg_loss, reg_loss, rank_loss

        # define gradient of custom loss function
        def grad(model, x, y_perf, y_rank, i, sample_weights):
            with tf.GradientTape() as tape:
                loss_value, reg_loss, rank_loss = custom_loss(model, x, y_perf, y_rank, i,
                                                              sample_weights)
            return loss_value, tape.gradient(loss_value,
                                             model.trainable_weights), reg_loss, rank_loss

        # # define objective, i.e. convex combination of nll and mse
        # def custom_objective(model, x, y_perf, y_rank, sample_weights):
        #     """Compute loss for i-th label

        #     Arguments:
        #         model {[type]} -- [Neural network]
        #         x {[type]} -- [Feature vector]
        #         y_perf {[type]} -- [Performances]
        #         y_rank {[type]} -- [Rankings]
        #         i {[type]} -- [Label]

        #     Returns:
        #         [float64] -- [Loss]
        #     """
        #     output = model(x)
        #     row_indices = tf.range(tf.shape(y_rank)[0])
        #     y_ind = y_rank - 1
        #     added_indices_0 = tf.stack([row_indices, y_ind[:, 0]], axis=1)
        #     added_indices_1 = tf.stack([row_indices, y_ind[:, 1]], axis=1)
        #     y_hat_0 = tf.gather_nd(output, added_indices_0)
        #     y_hat_1 = tf.gather_nd(output, added_indices_1)
        #     reg_loss = tf.reduce_mean(
        #         tf.multiply(sample_weight,
        #                     (tf.square(tf.subtract(y_hat_0, y_perf[:, 0])))))
        #     reg_loss += tf.reduce_mean(
        #         tf.multiply(sample_weight,
        #                     (tf.square(tf.subtract(y_hat_1, y_perf[:, 1])))))
        #     utils_ordered = tf.stack([y_hat_0, y_hat_1], axis=1)
        #     exp_utils_ordered = tf.exp(utils_ordered)
        #     exp_utils = tf.exp(output)
        #     rank_loss = 0.0
        #     for k in range(0, 2):
        #         logsum = tf.reduce_sum(exp_utils_ordered[:, k:], axis=1)
        #         rank_loss += tf.math.log(logsum)
        #     #     print("rank loss", rank_loss)
        #     # print("rank loss after", tf.reduce_sum(rank_loss))
        #     rank_loss = tf.reduce_sum(tf.multiply(
        #         sample_weight, rank_loss)) - tf.reduce_sum(
        #             tf.multiply(sample_weight, utils_ordered))
        #     return lambda_value * rank_loss + (1 - lambda_value) * reg_loss

        # define objective, i.e. convex combination of nll and mse
        def custom_objective(model, x, y_perf, y_rank, sample_weights):
            obj_val = 0
            for i in range(2):
                obj_val = obj_val + \
                    custom_loss(model, x, y_perf, y_rank, i, sample_weights)[0]
            return obj_val

        # optimizer
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

        best_val_loss = float("inf")
        current_best_weights = self.network.get_weights()
        patience_cnt = 0

        for epoch in range(num_epochs):
            epoch_reg_loss_avg = tf.keras.metrics.Mean()
            epoch_rank_loss_avg = tf.keras.metrics.Mean()

            for x, y_perf, y_rank, sample_weight in train_data:
                tvs = self.network.trainable_weights
                accum_tvs = [
                    tf.Variable(tf.zeros_like(tv.initialized_value()),
                                trainable=False) for tv in tvs
                ]
                zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_tvs]

                reg_loss_sum = 0
                rank_loss_sum = 0

                for i in range(2):
                    loss_value, grads, reg_loss, rank_loss = grad(self.network, x, y_perf, y_rank,
                                                                  i, sample_weight)

                    reg_loss_sum += reg_loss
                    rank_loss_sum += rank_loss

                    for j in range(len(accum_tvs)):
                        accum_tvs[j].assign_add(grads[j])

                # print(loss_value)
                optimizer.apply_gradients(
                    zip(accum_tvs, self.network.trainable_weights))

                epoch_reg_loss_avg(reg_loss_sum)
                epoch_rank_loss_avg(rank_loss_sum)

            if log_losses:
                self.loss_history.append(
                    [float(epoch_reg_loss_avg.result()), float(epoch_rank_loss_avg.result())])

            if epoch % early_stop_interval == 0:
                print("early stopping check")
                losses = []
                for x, y_perf, y_rank, sample_weight in val_data:
                    losses.append(
                        custom_objective(self.network, x, y_perf, y_rank,
                                         sample_weight))
                loss_tensor = np.average(losses)
                current_val_loss = tf.reduce_mean(loss_tensor)
                print("cur val loss", current_val_loss)
                self.es_val_history.append(current_val_loss)
                if current_val_loss < best_val_loss:
                    best_val_loss = current_val_loss
                    current_best_weights = self.network.get_weights()
                    print("new best validation loss", best_val_loss)
                    patience_cnt = 0
                else:
                    patience_cnt += 1
                    print("patience counter", patience_cnt)
                if patience_cnt >= patience:
                    print("early stopping")
                    break
        self.network.set_weights(current_best_weights)
        print("best weights", current_best_weights)
Пример #5
0
def discriminative_loss_single(prediction, correct_label, feature_dim, delta_v,
                               delta_d, param_var, param_dist, param_reg):
    ''' Discriminative loss for a single prediction/label pair.
    :param prediction: inference of network
    :param correct_label: instance label
    :feature_dim: feature dimension of prediction
    :param label_shape: shape of label
    :param delta_v: cutoff variance distance
    :param delta_d: curoff cluster distance
    :param param_var: weight for intra cluster variance
    :param param_dist: weight for inter cluster distances
    :param param_reg: weight regularization
    '''

    ### Reshape so pixels are aligned along a vector
    #correct_label = tf.reshape(correct_label, [label_shape[1] * label_shape[0]])
    reshaped_pred = tf.reshape(prediction, [-1, feature_dim])

    ### Count instances
    unique_labels, unique_id, counts = tf.unique_with_counts(correct_label)

    counts = tf.cast(counts, tf.float32)
    num_instances = tf.size(unique_labels)

    segmented_sum = tf.unsorted_segment_sum(reshaped_pred, unique_id,
                                            num_instances)

    mu = tf.div(segmented_sum, tf.reshape(counts, (-1, 1)))
    mu_expand = tf.gather(mu, unique_id)

    ### Calculate l_var
    #distance = tf.norm(tf.subtract(mu_expand, reshaped_pred), axis=1)
    #tmp_distance = tf.subtract(reshaped_pred, mu_expand)
    tmp_distance = reshaped_pred - mu_expand
    distance = tf.norm(tmp_distance, ord=1, axis=1)

    distance = tf.subtract(distance, delta_v)
    distance = tf.clip_by_value(distance, 0., distance)
    distance = tf.square(distance)

    l_var = tf.unsorted_segment_sum(distance, unique_id, num_instances)
    l_var = tf.div(l_var, counts)
    l_var = tf.reduce_sum(l_var)
    l_var = tf.divide(l_var, tf.cast(num_instances, tf.float32))

    ### Calculate l_dist

    # Get distance for each pair of clusters like this:
    #   mu_1 - mu_1
    #   mu_2 - mu_1
    #   mu_3 - mu_1
    #   mu_1 - mu_2
    #   mu_2 - mu_2
    #   mu_3 - mu_2
    #   mu_1 - mu_3
    #   mu_2 - mu_3
    #   mu_3 - mu_3

    mu_interleaved_rep = tf.tile(mu, [num_instances, 1])
    mu_band_rep = tf.tile(mu, [1, num_instances])
    mu_band_rep = tf.reshape(mu_band_rep,
                             (num_instances * num_instances, feature_dim))

    mu_diff = tf.subtract(mu_band_rep, mu_interleaved_rep)

    # Filter out zeros from same cluster subtraction
    eye = tf.eye(num_instances)
    zero = tf.zeros(1, dtype=tf.float32)
    diff_cluster_mask = tf.equal(eye, zero)
    diff_cluster_mask = tf.reshape(diff_cluster_mask, [-1])
    mu_diff_bool = tf.boolean_mask(mu_diff, diff_cluster_mask)

    #intermediate_tensor = tf.reduce_sum(tf.abs(mu_diff),axis=1)
    #zero_vector = tf.zeros(1, dtype=tf.float32)
    #bool_mask = tf.not_equal(intermediate_tensor, zero_vector)
    #mu_diff_bool = tf.boolean_mask(mu_diff, bool_mask)

    mu_norm = tf.norm(mu_diff_bool, ord=1, axis=1)
    mu_norm = tf.subtract(2. * delta_d, mu_norm)
    mu_norm = tf.clip_by_value(mu_norm, 0., mu_norm)
    mu_norm = tf.square(mu_norm)

    l_dist = tf.reduce_mean(mu_norm)

    def rt_0():
        return 0.

    def rt_l_dist():
        return l_dist

    l_dist = tf.cond(tf.equal(1, num_instances), rt_0, rt_l_dist)

    ### Calculate l_reg
    l_reg = tf.reduce_mean(tf.norm(mu, ord=1, axis=1))

    param_scale = 1.
    l_var = param_var * l_var
    l_dist = param_dist * l_dist
    l_reg = param_reg * l_reg

    loss = param_scale * (l_var + l_dist + l_reg)

    return loss, l_var, l_dist, l_reg
import tensorflow_core as tf

mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

#启动计算图
sess = tf.InteractiveSession()
#占位符
x = tf.placeholder("float", shape=[None, 784])
y_ = tf.placeholder("float", shape=[None, 10])
#权重
W = tf.Variable(tf.zeros([784, 10]))
#偏置
b = tf.Variable(tf.zeros([10]))
#初始化
sess.run(tf.initialize_all_variables())
#预测
y = tf.nn.softmax(tf.matmul(x, W) + b)
# 交叉熵作为损失函数
cross_entropy = -tf.reduce_sum(y_ * tf.log(y))
# 训练---最小化损失函数
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
#循环次数
for i in range(1000):
    batch = mnist.train.next_batch(50)
    train_step.run(feed_dict={x: batch[0], y_: batch[1]})
#判断是否预测正确
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
#计算准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# 打印准确率
print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
Пример #7
0
 def compute_loss(self, true, prediction):
     loss = tf.keras.losses.sparse_categorical_crossentropy(
         true, prediction)
     avg_loss = tf.reduce_mean(loss)
     return avg_loss
Пример #8
0
    def fit(self,
            num_labels: int,
            rankings: np.ndarray,
            features: np.ndarray,
            performances: np.ndarray,
            sample_weights=None,
            lambda_value=0.5,
            epsilon_value=1,
            num_epochs=1000,
            learning_rate=0.001,
            batch_size=32,
            seed=1,
            patience=16,
            es_val_ratio=0.3,
            regression_loss="Squared",
            reshuffle_buffer_size=1000,
            early_stop_interval=5,
            log_losses=True,
            hidden_layer_sizes=None,
            activation_function="relu"):
        """Fit the network to the given data.

        Arguments:
            num_labels {int} -- Number of labels in the ranking
            rankings {np.ndarray} -- Ranking of performances
            features {np.ndarray} -- Features
            performances {np.ndarray} -- Performances
            lambda_value {float} -- Lambda
            regression_loss {String} -- Which regression loss
            should be applied, "Squared" and "Absolute" are
            supported
        """
        tf.random.set_seed(seed)

        if sample_weights is None:
            sample_weights = np.ones(features.shape[0])

        # add one column for bias
        np.random.seed(seed)
        num_features = features.shape[1] + 1
        self.network = self.build_network(
            num_labels,
            num_features,
            hidden_layer_sizes=hidden_layer_sizes,
            activation_function=activation_function)

        self.network._make_predict_function()
        self.network.summary()

        self.loss_history = []
        self.es_val_history = []
        # add constant 1 for bias and create tf dataset
        feature_values = np.hstack((features, np.ones((features.shape[0], 1))))
        # print(feature_values.shape)
        # print(performances.shape)

        # split feature and performance data
        feature_values, performances, rankings, sample_weights = shuffle(
            feature_values,
            performances,
            rankings,
            sample_weights,
            random_state=seed,
        )
        val_data = Dataset.from_tensor_slices(
            (feature_values[:int(es_val_ratio * feature_values.shape[0])],
             performances[:int(es_val_ratio * performances.shape[0])],
             rankings[:int(es_val_ratio * rankings.shape[0])],
             sample_weights[:int(es_val_ratio * sample_weights.shape[0])]))
        train_data = Dataset.from_tensor_slices(
            (feature_values[int(es_val_ratio * feature_values.shape[0]):],
             performances[int(es_val_ratio * performances.shape[0]):],
             rankings[int(es_val_ratio * rankings.shape[0]):],
             sample_weights[int(es_val_ratio * sample_weights.shape[0]):]))
        # print(val_data)
        # print("train data", train_data)
        train_data = train_data.batch(batch_size)
        val_data = val_data.batch(1)

        # define custom loss function

        def custom_loss(model, x, y_perf, y_rank, sample_weight):
            """Compute loss for i-th label

            Arguments:
                model {[type]} -- [Neural network]
                x {[type]} -- [Feature vector]
                y_perf {[type]} -- [Performances]
                y_rank {[type]} -- [Rankings]
                i {[type]} -- [Label]

            Returns:
                [float64] -- [Loss]
            """
            output = model(x)
            row_indices = tf.range(tf.shape(y_rank)[0])
            y_ind = y_rank - 1
            added_indices_0 = tf.stack([row_indices, y_ind[:, 0]], axis=1)
            added_indices_1 = tf.stack([row_indices, y_ind[:, 1]], axis=1)
            y_hat_0 = tf.gather_nd(output, added_indices_0)
            y_hat_1 = tf.gather_nd(output, added_indices_1)
            reg_loss = tf.reduce_mean(
                tf.multiply(sample_weight,
                            (tf.square(tf.subtract(y_hat_0, y_perf[:, 0])))))
            reg_loss += tf.reduce_mean(
                (tf.square(tf.subtract(y_hat_1, y_perf[:, 1]))))
            rank_loss = tf.reduce_mean(
                tf.multiply(
                    sample_weight,
                    tf.square(
                        tf.maximum(0, epsilon_value - (y_hat_0 - y_hat_1)))))
            return (
                1 - lambda_value
            ) * reg_loss + lambda_value * rank_loss, reg_loss, rank_loss

        # define gradient of custom loss function

        def grad(model, x, y_perf, y_rank, sample_weight):
            with tf.GradientTape() as tape:
                loss_value, reg_loss, rank_loss = custom_loss(
                    model, x, y_perf, y_rank, sample_weight)
            return loss_value, tape.gradient(
                loss_value, model.trainable_weights), reg_loss, rank_loss

        # optimizer
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

        best_val_loss = float("inf")
        current_best_weights = self.network.get_weights()
        patience_cnt = 0

        for epoch in range(num_epochs):
            epoch_reg_loss_avg = tf.keras.metrics.Mean()
            epoch_rank_loss_avg = tf.keras.metrics.Mean()
            for x, y_perf, y_rank, sample_weight in train_data:
                loss_value, grads, reg_loss, rank_loss = grad(
                    self.network, x, y_perf, y_rank, sample_weight)
                optimizer.apply_gradients(
                    zip(grads, self.network.trainable_weights))
                epoch_reg_loss_avg(reg_loss)
                epoch_rank_loss_avg(rank_loss)
            if log_losses:
                self.loss_history.append([
                    float(epoch_reg_loss_avg.result()),
                    float(epoch_rank_loss_avg.result())
                ])

            if epoch % early_stop_interval == 0:
                losses = []
                for x, y_perf, y_rank, sample_weight in val_data:
                    losses.append(
                        custom_loss(self.network, x, y_perf, y_rank,
                                    sample_weight))
                loss_tensor = np.average(losses)
                current_val_loss = tf.reduce_mean(loss_tensor)
                print("cur val loss", current_val_loss)
                self.es_val_history.append(current_val_loss)
                if current_val_loss < best_val_loss:
                    best_val_loss = current_val_loss
                    current_best_weights = self.network.get_weights()
                    patience_cnt = 0
                else:
                    patience_cnt += 1
                    print("patience counter", patience_cnt)
                if patience_cnt >= patience:
                    print("early stopping")
                    break
        self.network.set_weights(current_best_weights)