Пример #1
0
    def _finish(self, caches):
        """ """

        if self.clip > 0:
            S_t = [cache['s_t'] for cache in caches]
            S_t, _ = tf.clip_by_global_norm(S_t, self.clip)
            for cache, s_t in zip(caches, S_t):
                cache['s_t'] = s_t

        for cache in caches:
            x_tm1 = cache['x_tm1']
            s_t = cache['s_t']
            updates = cache['updates']
            with tf.name_scope('update_' + x_tm1.op.name), tf.device(
                    x_tm1.device):
                if 'idxs' in cache:
                    idxs = cache['idxs']
                    x_t = tf.scatter_sub(x_tm1, idxs, s_t)
                    if self.chi > 0:
                        x_t_ = tf.gather(x_t, idxs)
                        x_bar_t, t_x_bar = self._sparse_moving_average(
                            x_tm1, idxs, x_t_, 'x', beta=self.chi)
                else:
                    x_t = tf.assign_sub(x_tm1, s_t)
                    if self.chi > 0:
                        x_bar_t, t_x_bar = self._dense_moving_average(
                            x_tm1, x_t, 'x', beta=self.chi)
            updates.append(x_t)
            if self.chi > 0:
                updates.extend([x_bar_t, t_x_bar])

        update_ops = [tf.group(*cache['updates']) for cache in caches]
        return tf.group(*update_ops, name='update')
def _center_loss_func(labels, features, alpha, num_classes, centers,
                      feature_dim):
    assert feature_dim == features.get_shape()[1]
    labels = K.reshape(labels, [-1])
    #labels = K.argmax(labels, axis=1)
    labels = tf.to_int32(labels)
    centers_batch = K.gather(centers, labels)
    diff = (1 - alpha) * (centers_batch - features)
    centers = tf.scatter_sub(centers, labels, diff)
    centers_batch = K.gather(centers, labels)
    loss = K.mean(K.square(features - centers_batch))
    return loss
Пример #3
0
def center_loss(features, label, alfa, nrof_classes):
    """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
       (http://ydwen.github.io/papers/WenECCV16.pdf)
    """
    nrof_features = features.get_shape()[1]
    centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
        initializer=tf.constant_initializer(0), trainable=False)
    label = tf.reshape(label, [-1])
    centers_batch = tf.gather(centers, label)
    diff = (1 - alfa) * (centers_batch - features)
    centers = tf.scatter_sub(centers, label, diff)
    loss = tf.reduce_mean(tf.square(features - centers_batch))
    return loss, centers
Пример #4
0
def center_loss(features, label, alfa, nrof_classes):
    nrof_features = features.get_shape()[1]
    centers = tf.get_variable('centers', [nrof_classes, nrof_features],
                              dtype=tf.float32,
                              initializer=tf.constant_initializer(0),
                              trainable=False)
    label = tf.reshape(label, [-1])
    centers_batch = tf.gather(centers, label)
    diff = (1 - alfa) * (centers_batch - features)
    centers = tf.scatter_sub(centers, label, diff)
    with tf.control_dependencies([centers]):
        loss = tf.reduce_mean(tf.square(features - centers_batch))
    return loss, centers
Пример #5
0
    def _apply_sparse_shared(self, grad_values, grad_indices, var):
        shape = np.array(var.get_shape())
        var_rank = len(shape)
        # For sparse case, we only update the accumulator representing the sparse
        # dimension. In this case SM3 is similar to isotropic adagrad but with
        # better bound (due to the max operator).
        #
        # We do not use the column accumulator because it will updated for
        # every gradient step and will significantly overestimate the gradient
        # square. While, the row accumulator can take advantage of the sparsity
        # in the gradients. Even if one implements the column accumulator - it
        # will result in a no-op because the row accumulators will have lower
        # values.
        #
        # Note that: We do not run this code paths for our experiments in our paper
        # as on TPU all the sparse gradients are densified.
        if var_rank > 1:
            accumulator_var = self.get_slot(var, "accumulator_" + str(0))
            accumulator = tf.gather(accumulator_var, grad_indices)
            shape_for_broadcasting = tf.concat(
                [[tf.shape(accumulator)[0]], [1] * (var_rank - 1)], 0)
            accumulator = tf.reshape(accumulator, shape_for_broadcasting)
            accumulator += grad_values * grad_values
        else:
            accumulator_var = self.get_slot(var, "accumulator")
            accumulator = tf.scatter_add(accumulator_var, grad_indices,
                                         grad_values * grad_values)

        accumulator_inv_sqrt = tf.rsqrt(accumulator + 1e-30)
        scaled_g = (grad_values * accumulator_inv_sqrt)
        updates = []
        with tf.control_dependencies([scaled_g]):
            if var_rank > 1:
                axes = list(range(1, var_rank))
                new_accumulator = tf.reduce_max(accumulator, axis=axes)
                updates = [
                    tf.scatter_update(accumulator_var, grad_indices,
                                      new_accumulator)
                ]
        with tf.control_dependencies(updates):
            return tf.scatter_sub(var, grad_indices,
                                  self._learning_rate_tensor * scaled_g)
Пример #6
0
def center_loss(labels, features, alpha=ALPHA, num_classes=NUM_CLASSES):
    """
    获取center loss及更新样本的center
    :param labels: Tensor,表征样本label,非one-hot编码,shape应为(batch_size,).
    :param features: Tensor,表征样本特征,最后一个fc层的输出,shape应该为(batch_size, num_classes).
    :param alpha: 0-1之间的数字,控制样本类别中心的学习率,细节参考原文.
    :param num_classes: 整数,表明总共有多少个类别,网络分类输出有多少个神经元这里就取多少.
    :return: Tensor, center-loss, shape因为(batch_size,)
    """
    # 获取特征的维数,例如256维
    len_features = features.get_shape()[1]
    # 建立一个Variable,shape为[num_classes, len_features],用于存储整个网络的样本中心,
    # 设置trainable=False是因为样本中心不是由梯度进行更新的
    centers = tf.get_variable('centers', [num_classes, len_features], dtype=tf.float32,
                              initializer=tf.constant_initializer(0), trainable=False)
    # 将label展开为一维的,如果labels已经是一维的,则该动作其实无必要
    labels = tf.reshape(labels, [-1])

    # 根据样本label,获取mini-batch中每一个样本对应的中心值
    centers_batch = tf.gather(centers, labels)

    # 当前mini-batch的特征值与它们对应的中心值之间的差
    diff = centers_batch - features

    # 获取mini-batch中同一类别样本出现的次数,了解原理请参考原文公式(4)
    unique_label, unique_idx, unique_count = tf.unique_with_counts(labels)
    appear_times = tf.gather(unique_count, unique_idx)
    appear_times = tf.reshape(appear_times, [-1, 1])

    diff = diff / tf.cast((1 + appear_times), tf.float32)
    diff = alpha * diff

    # 更新centers
    centers_update_op = tf.scatter_sub(centers, labels, diff)

    # 这里使用tf.control_dependencies更新centers
    with tf.control_dependencies([centers_update_op]):
        # 计算center-loss
        c_loss = tf.nn.l2_loss(features - centers_batch)

    return c_loss
Пример #7
0
    def __init__(self, n_sample, minibatch_sz, m1_inp_shape, m2_inp_shape,
                 m1_layers, m2_layers, msi_layers, m1_cause_init,
                 m2_cause_init, msi_cause_init, reg_m1_causes, reg_m2_causes,
                 reg_msi_causes, lr_m1_causes, lr_m2_causes, lr_msi_causes,
                 reg_m1_filters, reg_m2_filters, reg_msi_filters,
                 lr_m1_filters, lr_m2_filters, lr_msi_filters):

        self.m1_inp_shape = m1_inp_shape
        self.m2_inp_shape = m2_inp_shape
        self.m1_layers = m1_layers
        self.m2_layers = m2_layers
        self.msi_layers = msi_layers

        # create placeholders
        self.x_m1 = tf.placeholder(tf.float32,
                                   shape=[minibatch_sz, m1_inp_shape])
        self.x_m2 = tf.placeholder(tf.float32,
                                   shape=[minibatch_sz, m2_inp_shape])
        self.batch = tf.placeholder(tf.int32, shape=[])

        # create filters and cause for m1
        self.m1_filters = []
        self.m1_causes = []
        for i in range(len(self.m1_layers)):
            filter_name = 'm1_filter_%d' % i
            cause_name = 'm1_cause_%d' % i

            if i == 0:
                self.m1_filters += [
                    tf.get_variable(
                        filter_name,
                        shape=[self.m1_layers[i], self.m1_inp_shape])
                ]
            else:
                self.m1_filters += [
                    tf.get_variable(
                        filter_name,
                        shape=[self.m1_layers[i], self.m1_layers[i - 1]])
                ]

            init = tf.constant_initializer(m1_cause_init[i])
            self.m1_causes += [
                tf.get_variable(cause_name,
                                shape=[n_sample, self.m1_layers[i]],
                                initializer=init)
            ]

        # create filters and cause for m2
        self.m2_filters = []
        self.m2_causes = []
        for i in range(len(self.m2_layers)):
            filter_name = 'm2_filter_%d' % i
            cause_name = 'm2_cause_%d' % i

            if i == 0:
                self.m2_filters += [
                    tf.get_variable(
                        filter_name,
                        shape=[self.m2_layers[i], self.m2_inp_shape])
                ]
            else:
                self.m2_filters += [
                    tf.get_variable(
                        filter_name,
                        shape=[self.m2_layers[i], self.m2_layers[i - 1]])
                ]

            init = tf.constant_initializer(m2_cause_init[i])
            self.m2_causes += [
                tf.get_variable(cause_name,
                                shape=[n_sample, self.m2_layers[i]],
                                initializer=init)
            ]

        # create filters and cause for msi
        self.msi_filters = []
        self.msi_causes = []
        for i in range(len(self.msi_layers)):
            if i == 0:
                # add filters for m1
                filter_name = 'msi_m1_filter'
                self.msi_filters += [
                    tf.get_variable(
                        filter_name,
                        shape=[self.msi_layers[i], self.m1_layers[-1]])
                ]
                # add filters for m2
                filter_name = 'msi_m2_filter'
                self.msi_filters += [
                    tf.get_variable(
                        filter_name,
                        shape=[self.msi_layers[i], self.m2_layers[-1]])
                ]
            else:
                filter_name = 'msi_filter_%d' % i
                self.msi_filters += [
                    tf.get_variable(
                        filter_name,
                        shape=[self.msi_layers[i], self.msi_layers[i - 1]])
                ]

            cause_name = 'msi_cause_%d' % i
            init = tf.constant_initializer(msi_cause_init[i])
            self.msi_causes += [
                tf.get_variable(cause_name,
                                shape=[n_sample, self.msi_layers[i]],
                                initializer=init)
            ]

        # compute predictions
        current_batch = tf.range(self.batch * minibatch_sz,
                                 (self.batch + 1) * minibatch_sz)
        # m1 predictions
        self.m1_minibatch = []
        self.m1_predictions = []
        for i in range(len(self.m1_layers)):
            self.m1_minibatch += [
                tf.gather(self.m1_causes[i], indices=current_batch, axis=0)
            ]
            self.m1_predictions += [
                tf.nn.leaky_relu(
                    tf.matmul(self.m1_minibatch[i], self.m1_filters[i]))
            ]

        # m2 predictions
        self.m2_minibatch = []
        self.m2_predictions = []
        for i in range(len(self.m2_layers)):
            self.m2_minibatch += [
                tf.gather(self.m2_causes[i], indices=current_batch, axis=0)
            ]
            self.m2_predictions += [
                tf.nn.leaky_relu(
                    tf.matmul(self.m2_minibatch[i], self.m2_filters[i]))
            ]

        # msi predictions
        self.msi_minibatch = []
        self.msi_predictions = []
        for i in range(len(self.msi_layers)):
            self.msi_minibatch += [
                tf.gather(self.msi_causes[i], indices=current_batch, axis=0)
            ]
            if i == 0:
                self.msi_predictions += [
                    tf.nn.leaky_relu(
                        tf.matmul(self.msi_minibatch[i], self.msi_filters[i]))
                ]  # m1 prediction
                self.msi_predictions += [
                    tf.nn.leaky_relu(
                        tf.matmul(self.msi_minibatch[i],
                                  self.msi_filters[i + 1]))
                ]  # m2 prediction
            else:
                self.msi_predictions += [
                    tf.nn.leaky_relu(
                        tf.matmul(self.msi_minibatch[i],
                                  self.msi_filters[i + 1]))
                ]

        # add ops for computing gradients for m1 causes and for updating weights
        self.m1_bu_error = []
        self.m1_update_filter = []
        self.m1_cause_grad = []
        for i in range(len(self.m1_layers)):
            if i == 0:
                self.m1_bu_error += [
                    tf.losses.mean_squared_error(
                        self.x_m1,
                        self.m1_predictions[i],
                        reduction=tf.losses.Reduction.NONE)
                ]
            else:
                self.m1_bu_error += [
                    tf.losses.mean_squared_error(
                        tf.stop_gradient(self.m1_minibatch[i - 1]),
                        self.m1_predictions[i],
                        reduction=tf.losses.Reduction.NONE)
                ]

            # compute top-down prediction error
            if len(self.m1_layers) > (i + 1):
                # there are more layers in this modality
                td_error = tf.losses.mean_squared_error(
                    tf.stop_gradient(self.m1_predictions[i + 1]),
                    self.m1_minibatch[i],
                    reduction=tf.losses.Reduction.NONE)
            else:
                # this is the only layer in this modality
                td_error = tf.losses.mean_squared_error(
                    tf.stop_gradient(self.msi_predictions[0]),
                    self.m1_minibatch[i],
                    reduction=tf.losses.Reduction.NONE)

            reg_error = reg_m1_causes[i] * (self.m1_minibatch[i]**2)
            # reg_error = tf.keras.regularizers.l2(reg_m1_causes[i])(self.m1_minibatch[i])
            self.m1_cause_grad += [
                tf.gradients([self.m1_bu_error[i], td_error, reg_error],
                             self.m1_minibatch[i])[0]
            ]

            # ops for updating weights
            reg_error = reg_m1_filters[i] * (self.m1_filters[i]**2)
            m1_filter_grad = tf.gradients([self.m1_bu_error[i], reg_error],
                                          self.m1_filters[i])[0]
            self.m1_update_filter += [
                tf.assign_sub(self.m1_filters[i],
                              lr_m1_filters[i] * m1_filter_grad)
            ]

        # add ops for computing gradients for m2 causes and for updating weights
        self.m2_bu_error = []
        self.m2_update_filter = []
        self.m2_cause_grad = []
        for i in range(len(self.m2_layers)):
            if i == 0:
                self.m2_bu_error += [
                    tf.losses.mean_squared_error(
                        self.x_m2,
                        self.m2_predictions[i],
                        reduction=tf.losses.Reduction.NONE)
                ]
            else:
                self.m2_bu_error += [
                    tf.losses.mean_squared_error(
                        tf.stop_gradient(self.m2_minibatch[i - 1]),
                        self.m2_predictions[i],
                        reduction=tf.losses.Reduction.NONE)
                ]

            # compute top-down prediction error
            if len(self.m2_layers) > (i + 1):
                # there are more layers in this modality
                td_error = tf.losses.mean_squared_error(
                    tf.stop_gradient(self.m2_predictions[i + 1]),
                    self.m2_minibatch[i],
                    reduction=tf.losses.Reduction.NONE)
            else:
                # this is the only layer in this modality
                td_error = tf.losses.mean_squared_error(
                    tf.stop_gradient(self.msi_predictions[1]),
                    self.m2_minibatch[i],
                    reduction=tf.losses.Reduction.NONE)

            reg_error = reg_m2_causes[i] * (self.m2_minibatch[i]**2)
            # reg_error = tf.keras.regularizers.l2(reg_m2_causes[i])(self.m2_minibatch[i])
            self.m2_cause_grad += [
                tf.gradients([self.m2_bu_error[i], td_error, reg_error],
                             self.m2_minibatch[i])[0]
            ]

            # add ops for updating weights
            reg_error = reg_m2_filters[i] * (self.m2_filters[i]**2)
            m2_filter_grad = tf.gradients([self.m2_bu_error[i], reg_error],
                                          self.m2_filters[i])[0]
            self.m1_update_filter += [
                tf.assign_sub(self.m2_filters[i],
                              lr_m2_filters[i] * m2_filter_grad)
            ]
            #else:
            #raise NotImplementedError

        # add ops for computing gradients for msi causes
        self.msi_bu_error = []
        self.msi_reg_error = []
        self.msi_update_filter = []
        self.msi_cause_grad = []
        for i in range(len(self.msi_layers)):
            if i == 0:
                self.msi_bu_error += [
                    tf.losses.mean_squared_error(
                        tf.stop_gradient(self.m1_minibatch[-1]),
                        self.msi_predictions[i],
                        reduction=tf.losses.Reduction.NONE)
                ]
                self.msi_bu_error += [
                    tf.losses.mean_squared_error(
                        tf.stop_gradient(self.m2_minibatch[-1]),
                        self.msi_predictions[i + 1],
                        reduction=tf.losses.Reduction.NONE)
                ]

                self.msi_reg_error += [
                    reg_msi_causes[i] * (self.msi_minibatch[i]**2)
                ]
                # self.msi_reg_error += [tf.keras.regularizers.l2(reg_msi_causes[i])(self.msi_minibatch[i])]
                if len(self.msi_layers) > 1:
                    raise NotImplementedError
                else:
                    self.msi_cause_grad += [
                        tf.gradients([
                            self.msi_bu_error[i], self.msi_bu_error[i + 1],
                            self.msi_reg_error[i]
                        ], self.msi_minibatch[i])[0]
                    ]

                # add ops for updating weights
                reg_error = reg_msi_filters[i] * (self.msi_filters[i]**2)
                msi_filter_grad = tf.gradients(
                    [self.msi_bu_error[i], reg_error], self.msi_filters[i])[0]
                self.msi_update_filter += [
                    tf.assign_sub(self.msi_filters[i],
                                  lr_msi_filters[i] * msi_filter_grad)
                ]
                reg_error = reg_msi_filters[i + 1] * (self.msi_filters[i + 1]**
                                                      2)
                msi_filter_grad = tf.gradients(
                    [self.msi_bu_error[i + 1], reg_error],
                    self.msi_filters[i + 1])[0]
                self.msi_update_filter += [
                    tf.assign_sub(self.msi_filters[i + 1],
                                  lr_msi_filters[i + 1] * msi_filter_grad)
                ]
            else:
                raise NotImplementedError

        # add ops for updating causes
        self.m1_update_cause = []
        self.m2_update_cause = []
        self.msi_update_cause = []
        with tf.control_dependencies(self.m1_cause_grad + self.m2_cause_grad +
                                     self.msi_cause_grad):
            # m1 modality
            for i in range(len(self.m1_layers)):
                self.m1_update_cause += [
                    tf.scatter_sub(self.m1_causes[i],
                                   indices=current_batch,
                                   updates=(lr_m1_causes[i] *
                                            self.m1_cause_grad[i]))
                ]

            # m2 modality
            for i in range(len(self.m2_layers)):
                self.m2_update_cause += [
                    tf.scatter_sub(self.m2_causes[i],
                                   indices=current_batch,
                                   updates=(lr_m2_causes[i] *
                                            self.m2_cause_grad[i]))
                ]

            # msi modality
            for i in range(len(self.msi_layers)):
                self.msi_update_cause += [
                    tf.scatter_sub(self.msi_causes[i],
                                   indices=current_batch,
                                   updates=(lr_msi_causes[i] *
                                            self.msi_cause_grad[i]))
                ]