Exemplo n.º 1
0
 def __call__(self, x):
     regularization = 0.
     sig_x = x  #K.sigmoid(x*5)
     if tf.math.is_nan(x)[0, 0]:
         tf.print('x is nan', x)
     #l1 regularization for individual paths
     #incentivize paths to be sparse
     if self.l1:
         regularization += self.l1 * K.sum(K.abs(x))
     if self.l2:
         regularization += self.l2 * K.sum(K.square(sig_x))
     #incentivize paths to be different
     if self.kc:
         norm_sig_x = K.l2_normalize(sig_x)
         corr = K.dot(norm_sig_x, K.transpose(norm_sig_x))
         for i in range(K.int_shape(corr)[0]):
             for j in range(K.int_shape(corr)[1]):
                 if (j < i):
                     regularization += self.kc * corr[i, j]
     #incentivize paths to be similar in sparsity
     if self.ks:
         sig_x_std = K.var(sig_x, axis=-1)
         std = K.var(sig_x_std)
         regularization += std
     if self.kv:
         sig_x_std = K.var(sig_x, axis=-1)
         regularization -= K.sum(sig_x_std)
     return regularization
Exemplo n.º 2
0
    def __call__(self, y_true, y_pred):
        # There are additional parameters for this function
        # Note: some of the 'modes' for edge behavior do not yet have a
        # gradient definition in the Theano tree
        #   and cannot be used for learning

        kernel = [self.kernel_size, self.kernel_size]
        y_true = K.reshape(y_true, [-1] + list(self.__int_shape(y_pred)[1:]))
        y_pred = K.reshape(y_pred, [-1] + list(self.__int_shape(y_pred)[1:]))

        patches_pred = extract_image_patches(y_pred, kernel, kernel, 'valid',
                                             self.dim_ordering)
        patches_true = extract_image_patches(y_true, kernel, kernel, 'valid',
                                             self.dim_ordering)

        # Reshape to get the var in the cells
        bs, w, h, c1, c2, c3 = self.__int_shape(patches_pred)
        patches_pred = K.reshape(patches_pred, [-1, w, h, c1 * c2 * c3])
        patches_true = K.reshape(patches_true, [-1, w, h, c1 * c2 * c3])
        # Get mean
        u_true = K.mean(patches_true, axis=-1)
        u_pred = K.mean(patches_pred, axis=-1)
        # Get variance
        var_true = K.var(patches_true, axis=-1)
        var_pred = K.var(patches_pred, axis=-1)
        # Get std dev
        covar_true_pred = K.mean(patches_true * patches_pred,
                                 axis=-1) - u_true * u_pred

        ssim = (2 * u_true * u_pred + self.c1) * (2 * covar_true_pred +
                                                  self.c2)
        denom = ((K.square(u_true) + K.square(u_pred) + self.c1) *
                 (var_pred + var_true + self.c2))
        ssim /= denom  # no need for clipping, c1 and c2 make the denom non-zero
        return K.mean((1.0 - ssim) / 2.0)
Exemplo n.º 3
0
def cc_coef(y_true, y_pred):
    mu_y_true = K.mean(y_true)
    mu_y_pred = K.mean(y_pred)
    return 1 - 2 * K.mean(
        (y_true - mu_y_true) *
        (y_pred - mu_y_pred)) / (K.var(y_true) + K.var(y_pred) +
                                 K.mean(K.square(mu_y_pred - mu_y_true)))
Exemplo n.º 4
0
def calculate_style_loss(x, epsilon=1e-5):
    y_trues, y_preds = x
    loss = [
        mse_loss(K.mean(y_true, axis=(1, 2)), K.mean(y_pred, axis=(1, 2))) +
        mse_loss(K.sqrt(K.var(y_true, axis=(1, 2)) + epsilon),
                 K.sqrt(K.var(y_pred, axis=(1, 2)) + epsilon))
        for y_true, y_pred in zip(y_trues, y_preds)
    ]
    return K.sum(loss)
Exemplo n.º 5
0
 def call(self, x):
     content_features, style_features = x
     content_mean = K.mean(content_features, axis=[1, 2], keepdims=True)
     content_std = K.sqrt(
         K.var(content_features, axis=[1, 2], keepdims=True) + self.epsilon)
     style_mean = K.mean(style_features, axis=[1, 2], keepdims=True)
     style_std = K.sqrt(
         K.var(style_features, axis=[1, 2], keepdims=True) + self.epsilon)
     normalized_content_features = (content_features - content_mean) / (
         content_std + self.epsilon) * style_std + style_mean
     return self.alpha * normalized_content_features + (
         1 - self.alpha) * content_features
Exemplo n.º 6
0
    def call(self, inputs, training=None):
        input_shape = K.int_shape(inputs)
        reduction_axes = list(range(0, len(input_shape)))

        if (self.axis is not None):
            del reduction_axes[self.axis]

        del reduction_axes[0]

        mean = K.mean(inputs, reduction_axes, keepdims=True)
        #stddev = K.std(inputs, reduction_axes, keepdims=True) # XXX this yields nans, the paper has a `+ epsilon` under the square root like so:
        variance = K.var(inputs, reduction_axes, keepdims=True) + self.epsilon
        stddev = K.sqrt(variance + self.epsilon)
        normed = (inputs - mean) / stddev

        broadcast_shape = [1] * len(input_shape)
        if self.axis is not None:
            broadcast_shape[self.axis] = input_shape[self.axis]

        if self.scale:
            broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
            normed = tf.multiply(normed, broadcast_gamma)
        if self.center:
            broadcast_beta = K.reshape(self.beta, broadcast_shape)
            normed = tf.math.add(normed, broadcast_beta)
        return normed
Exemplo n.º 7
0
def ccc_error(y_true, y_pred):
    true_mean = K.mean(y_true)
    true_variance = K.var(y_true)
    pred_mean = K.mean(y_pred)
    pred_variance = K.var(y_pred)

    x = y_true - true_mean
    y = y_pred - pred_mean
    rho = K.sum(x * y) / (K.sqrt(K.sum(x**2) * K.sum(y**2)) + K.epsilon())

    std_predictions = K.std(y_pred)
    std_gt = K.std(y_true)

    ccc = 2 * rho * std_gt * std_predictions / (std_predictions**2 +
                                                std_gt**2 +
                                                (pred_mean - true_mean)**2)
    return 1 - ccc
Exemplo n.º 8
0
    def __call__(self, y_true, y_pred):
        """ Call the DSSIM Loss Function.

        Parameters
        ----------
        y_true: tensor or variable
            The ground truth value
        y_pred: tensor or variable
            The predicted value

        Returns
        -------
        tensor
            The DSSIM Loss value

        Notes
        -----
        There are additional parameters for this function. some of the 'modes' for edge behavior
        do not yet have a gradient definition in the Theano tree and cannot be used for learning
        """

        kernel = [self.kernel_size, self.kernel_size]
        y_true = K.reshape(y_true, [-1] + list(self.__int_shape(y_pred)[1:]))
        y_pred = K.reshape(y_pred, [-1] + list(self.__int_shape(y_pred)[1:]))
        patches_pred = self.extract_image_patches(y_pred, kernel, kernel,
                                                  'valid', self.dim_ordering)
        patches_true = self.extract_image_patches(y_true, kernel, kernel,
                                                  'valid', self.dim_ordering)

        # Get mean
        u_true = K.mean(patches_true, axis=-1)
        u_pred = K.mean(patches_pred, axis=-1)
        # Get variance
        var_true = K.var(patches_true, axis=-1)
        var_pred = K.var(patches_pred, axis=-1)
        # Get standard deviation
        covar_true_pred = K.mean(patches_true * patches_pred,
                                 axis=-1) - u_true * u_pred

        ssim = (2 * u_true * u_pred + self.c_1) * (2 * covar_true_pred +
                                                   self.c_2)
        denom = (K.square(u_true) + K.square(u_pred) +
                 self.c_1) * (var_pred + var_true + self.c_2)
        ssim /= denom  # no need for clipping, c_1 + c_2 make the denorm non-zero
        return K.mean((1.0 - ssim) / 2.0)
def ssim_loss(y_true, y_pred):
    kernel = [3, 3]
    k1 = 0.01
    k2 = 0.03
    kernel_size = 3
    max_value = 1.0
    cc1 = (k1 * max_value)**2
    cc2 = (k2 * max_value)**2
    y_true = K.reshape(y_true, [-1] + list(K.int_shape(y_pred)[1:]))
    y_pred = K.reshape(y_pred, [-1] + list(K.int_shape(y_pred)[1:]))

    patches_pred = tf.image.extract_patches(y_pred,
                                            kernel,
                                            kernel,
                                            padding='VALID')
    patches_true = tf.image.extract_patches(y_true,
                                            kernel,
                                            kernel,
                                            padding='VALID')
    # patches_pred = KC.extract_image_patches(
    #     y_pred, kernel, kernel, 'valid', K.image_data_format())
    # patches_true = KC.extract_image_patches(
    #     y_true, kernel, kernel, 'valid', K.image_data_format())

    bs, w, h, c1, c2, c3 = K.int_shape(patches_pred)

    patches_pred = K.reshape(patches_pred, [-1, w, h, c1 * c2 * c3])
    patches_true = K.reshape(patches_true, [-1, w, h, c1 * c2 * c3])
    # Get mean
    u_true = K.mean(patches_true, axis=-1)
    u_pred = K.mean(patches_pred, axis=-1)
    # Get variance
    var_true = K.var(patches_true, axis=-1)
    var_pred = K.var(patches_pred, axis=-1)
    # Get covariance
    covar_true_pred = K.mean(patches_true * patches_pred,
                             axis=-1) - u_true * u_pred

    ssim = (2 * u_true * u_pred + cc1) * (2 * covar_true_pred + cc2)
    denom = (K.square(u_true) + K.square(u_pred) + cc1) * \
        (var_pred + var_true + cc2)
    ssim /= denom

    return K.mean((1.0 - ssim) / 2.0)
    def _moments(self, x):
        axes = range(len(K.int_shape(x)) - 1)
        if K.backend() == "tensorflow":
            return tf.nn.moments(x=x, axes=axes)
        else:
            # TODO: Maybe the following can be optimized a bit?
            mean = K.mean(K.reshape(x, (-1, self.dim)), axis=0)
            var = K.var(K.reshape(x, (-1, self.dim)), axis=0)

            return mean, var
Exemplo n.º 11
0
 def call(self, inputs):
     
     mu_raw = K.mean(inputs, axis=[1,2])
     mu = tf.expand_dims(tf.expand_dims(mu_raw, axis=1), axis=1)
     
     sig_sq_raw = K.var(inputs, axis=[1,2])
     sig_sq = tf.expand_dims(tf.expand_dims(sig_sq_raw, axis=1), 
                             axis=1)
     
     return (inputs - mu)/K.sqrt(sig_sq + EPSILON)
Exemplo n.º 12
0
def CCC(y_true, y_pred):
    '''
    Lin's Concordance correlation coefficient: https://en.wikipedia.org/wiki/Concordance_correlation_coefficient
    Accepting tensors as input

    '''
    # covariance between y_true and y_pred
    N = K.int_shape(y_pred)[-1]
    s_xy = 1.0 / (N - 1.0 + K.epsilon()) * K.sum(
        (y_true - K.mean(y_true)) * (y_pred - K.mean(y_pred)))
    # means
    x_m = K.mean(y_true)
    y_m = K.mean(y_pred)
    # variances
    s_x_sq = K.var(y_true)
    s_y_sq = K.var(y_pred)

    # condordance correlation coefficient
    ccc = (2.0 * s_xy) / (s_x_sq + s_y_sq + (x_m - y_m)**2)

    return ccc
Exemplo n.º 13
0
    def std(content, style, loss_weight):
        content_nc = K.int_shape(content)[-1]
        style_nc = K.int_shape(style)[-1]
        if content_nc != style_nc:
            raise Exception("style_loss() content_nc != style_nc")

        axes = [1, 2]
        c_mean, c_var = K.mean(content, axis=axes,
                               keepdims=True), K.var(content,
                                                     axis=axes,
                                                     keepdims=True)
        s_mean, s_var = K.mean(style, axis=axes,
                               keepdims=True), K.var(style,
                                                     axis=axes,
                                                     keepdims=True)
        c_std, s_std = K.sqrt(c_var + 1e-5), K.sqrt(s_var + 1e-5)

        mean_loss = K.sum(K.square(c_mean - s_mean))
        std_loss = K.sum(K.square(c_std - s_std))

        return (mean_loss + std_loss) * (loss_weight / float(content_nc))
Exemplo n.º 14
0
def neg_ccc(y_true, y_pred):
    """Lin's Concordance correlation coefficient.

    The concordance correlation coefficient is the correlation between two
    variables that fall on the 45 degree line through the origin.
    It is a product of
    - precision (Pearson correlation coefficient) and
    - accuracy (closeness to 45 degree line)

    Interpretation:
    - `rho_c =  1` : perfect agreement
    - `rho_c =  0` : no agreement
    - `rho_c = -1` : perfect disagreement

    Args:
    - y_true: ground truth
    - y_pred: predicted values

    Returns:
    - concordance correlation coefficient (float)
    """

    y_pred = K.flatten(y_pred)
    y_true = K.flatten(y_true)
    sample_means_y_true = K.mean(y_true, axis=0)  # (, m_instances)
    sample_means_y_pred = K.mean(y_pred, axis=0)

    sample_vars_y_true = K.var(y_true, axis=0)
    sample_vars_y_pred = K.var(y_pred, axis=0)

    sample_covs = K.mean(
        (y_true - sample_means_y_true) * (y_pred - sample_means_y_pred),
        axis=0)

    ccc = 2 * sample_covs / (
        sample_vars_y_true + sample_vars_y_pred + (
            (sample_means_y_true - sample_means_y_pred) ** 2
        ))
    # ccc = K.mean(ccc)
    return 1 - ccc
Exemplo n.º 15
0
def BN_loss(inputs, t_layer,rescale):

    sum=0
    for i in range(len(t_layer)):

        #get running mean and variance from batch normalization layer
        running_mean=t_layer[i].moving_mean
        running_var=t_layer[i].moving_variance

        in_mean=b.mean(inputs[i], [0,1,2])
        in_var=b.var(inputs[i], [0,1,2])

        sum += rescale[i] *(tf.norm(running_var - in_var ) + tf.norm(running_mean - in_mean) )
    return sum 
Exemplo n.º 16
0
    def call(self, inputs, **kwargs):
        input_shape = K.int_shape(inputs)
        tensor_input_shape = K.shape(inputs)

        # Prepare broadcasting shape.
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis] // self.groups
        broadcast_shape.insert(1, self.groups)

        reshape_group_shape = K.shape(inputs)
        group_axes = [reshape_group_shape[i] for i in range(len(input_shape))]
        group_axes[self.axis] = input_shape[self.axis] // self.groups
        group_axes.insert(1, self.groups)

        # reshape inputs to new group shape
        group_shape = [group_axes[0], self.groups] + group_axes[2:]
        group_shape = K.stack(group_shape)
        inputs = K.reshape(inputs, group_shape)

        group_reduction_axes = list(range(len(group_axes)))

        mean = K.mean(inputs, axis=group_reduction_axes, keepdims=True)
        variance = K.var(inputs, axis=group_reduction_axes, keepdims=True)

        inputs = (inputs - mean) / (K.sqrt(variance + self.epsilon))

        # prepare broadcast shape
        inputs = K.reshape(inputs, group_shape)

        outputs = inputs

        # In this case we must explicitly broadcast all parameters.
        if self.scale:
            broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
            outputs = outputs * broadcast_gamma

        if self.center:
            broadcast_beta = K.reshape(self.beta, broadcast_shape)
            outputs = outputs + broadcast_beta

        # finally we reshape the output back to the input shape
        outputs = K.reshape(outputs, tensor_input_shape)

        return outputs
Exemplo n.º 17
0
def calculate_binary_weights(conv_layer, M):
    '''
    conv_layer: original layer's W
    '''
    mean = BK.mean(BK.reshape(conv_layer, shape=(-1, )), axis=0)
    variance = BK.var(BK.reshape(conv_layer, shape=(-1, )), axis=0)

    shifted_stddev = -1 + np.array(range(M)) * (2 / (M - 1))
    shifted_stddev = BK.constant(shifted_stddev,
                                 dtype="float32",
                                 name="shifted_stddev")
    shifted_stddev *= BK.sqrt(variance)
    shifted_stddev = BK.reshape(shifted_stddev,
                                shape=[M] + [1] * len(conv_layer.get_shape()))
    binary_weights = conv_layer - mean
    binary_weights = BK.tile(BK.expand_dims(binary_weights, 0),
                             n=[M] + [1] * len(conv_layer.get_shape()))
    binary_weights = BK.sign(binary_weights + shifted_stddev)
    return binary_weights
Exemplo n.º 18
0
    def build(self):

        inputs = Input(self.input_shape)
        mc_samples = Lambda(lambda x: K.repeat_elements(
            x, self.mc_iteration, axis=_batch_axis))(inputs)

        logits = self.predictor(mc_samples)
        probs = self.activation(logits)

        ret_shape = self.predictor.layers[-1].output_shape
        ret_shape = (-1, self.mc_iteration, *ret_shape[1:])

        probs = Lambda(lambda x: K.reshape(x, ret_shape))(probs)

        mean = Lambda(lambda x: K.mean(x, axis=1))(probs)
        mean = Lambda(lambda x: self.reduce_mean(x))(mean)

        variance = Lambda(lambda x: K.var(x, axis=1))(probs)
        variance = Lambda(lambda x: self.reduce_var(x))(variance)

        return Model(inputs=inputs, outputs=[mean, variance])
 def _moments(self, x, axes):
     return (K.mean(x, axis=axes,
                    keepdims=True), K.var(x, axis=axes, keepdims=True))
Exemplo n.º 20
0
def var_pred(y_true, y_pred): return K.mean(K.var(y_pred, axis=(0,1)))


def var_ratio(y_true, y_pred):
Exemplo n.º 21
0
    def call(self, inputs, mask=None):
        input_shape = K.int_shape(inputs)
        if len(input_shape) != 4 and len(input_shape) != 2:
            raise ValueError(
                'Inputs should have rank ' + str(4) + " or " + str(2) +
                '; Received input shape:', str(input_shape))

        if len(input_shape) == 4:
            if self.data_format == 'channels_last':
                batch_size, height, width, channels = input_shape
                if batch_size is None:
                    batch_size = -1

                if channels < self.group:
                    raise ValueError(
                        'Input channels should be larger than group size' +
                        '; Received input channels: ' + str(channels) +
                        '; Group size: ' + str(self.group))

                var_x = K.reshape(inputs, (batch_size, height, width,
                                           self.group, channels // self.group))
                mean = K.mean(var_x, axis=[1, 2, 4], keepdims=True)
                std = K.sqrt(
                    K.var(var_x, axis=[1, 2, 4], keepdims=True) + self.epsilon)
                var_x = (var_x - mean) / std

                var_x = K.reshape(var_x, (batch_size, height, width, channels))
                retval = self.gamma * var_x + self.beta
            elif self.data_format == 'channels_first':
                batch_size, channels, height, width = input_shape
                if batch_size is None:
                    batch_size = -1

                if channels < self.group:
                    raise ValueError(
                        'Input channels should be larger than group size' +
                        '; Received input channels: ' + str(channels) +
                        '; Group size: ' + str(self.group))

                var_x = K.reshape(inputs,
                                  (batch_size, self.group,
                                   channels // self.group, height, width))
                mean = K.mean(var_x, axis=[2, 3, 4], keepdims=True)
                std = K.sqrt(
                    K.var(var_x, axis=[2, 3, 4], keepdims=True) + self.epsilon)
                var_x = (var_x - mean) / std

                var_x = K.reshape(var_x, (batch_size, channels, height, width))
                retval = self.gamma * var_x + self.beta

        elif len(input_shape) == 2:
            reduction_axes = list(range(0, len(input_shape)))
            del reduction_axes[0]
            batch_size, _ = input_shape
            if batch_size is None:
                batch_size = -1

            mean = K.mean(inputs, keepdims=True)
            std = K.sqrt(K.var(inputs, keepdims=True) + self.epsilon)
            var_x = (inputs - mean) / std

            retval = self.gamma * var_x + self.beta
        return retval
def _tf_var(x, axis=None, keepdims=False):
    return K.var(x, axis=axis, keepdims=keepdims)
Exemplo n.º 23
0
    def call(self, inputs, training=None):
        input_shape = K.int_shape(inputs)

        # Prepare broadcasting shape.
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]

        if self.axis != 0:
            del reduction_axes[0]

        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]

        mean_instance = K.mean(inputs, reduction_axes, keepdims=True)
        variance_instance = K.var(inputs, reduction_axes, keepdims=True)

        mean_layer = K.mean(mean_instance, self.axis, keepdims=True)
        temp = variance_instance + K.square(mean_instance)
        variance_layer = K.mean(temp, self.axis,
                                keepdims=True) - K.square(mean_layer)

        def training_phase():
            mean_batch = K.mean(mean_instance, axis=0, keepdims=True)
            variance_batch = K.mean(temp, axis=0,
                                    keepdims=True) - K.square(mean_batch)

            mean_batch_reshaped = K.flatten(mean_batch)
            variance_batch_reshaped = K.flatten(variance_batch)

            if K.backend() != 'cntk':
                sample_size = K.prod(
                    [K.shape(inputs)[axis] for axis in reduction_axes])
                sample_size = K.cast(sample_size, dtype=K.dtype(inputs))

                # sample variance - unbiased estimator of population variance
                variance_batch_reshaped *= sample_size / (sample_size -
                                                          (1.0 + self.epsilon))

            self.add_update([
                K.moving_average_update(self.moving_mean, mean_batch_reshaped,
                                        self.momentum),
                K.moving_average_update(self.moving_variance,
                                        variance_batch_reshaped, self.momentum)
            ], )

            return normalize_func(mean_batch, variance_batch)

        def inference_phase():
            mean_batch = self.moving_mean
            variance_batch = self.moving_variance

            return normalize_func(mean_batch, variance_batch)

        def normalize_func(mean_batch, variance_batch):
            mean_batch = K.reshape(mean_batch, broadcast_shape)
            variance_batch = K.reshape(variance_batch, broadcast_shape)

            mean_weights = K.softmax(self.mean_weights, axis=0)
            variance_weights = K.softmax(self.variance_weights, axis=0)

            mean = (mean_weights[0] * mean_instance +
                    mean_weights[1] * mean_layer +
                    mean_weights[2] * mean_batch)

            variance = (variance_weights[0] * variance_instance +
                        variance_weights[1] * variance_layer +
                        variance_weights[2] * variance_batch)

            outputs = (inputs - mean) / (K.sqrt(variance + self.epsilon))

            if self.scale:
                broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
                outputs = outputs * broadcast_gamma

            if self.center:
                broadcast_beta = K.reshape(self.beta, broadcast_shape)
                outputs = outputs + broadcast_beta

            return outputs

        if training in {0, False}:
            return inference_phase()

        return K.in_train_phase(training_phase,
                                inference_phase,
                                training=training)
Exemplo n.º 24
0
    def call(self, x, mask=None):
        if self.mode == 0 or self.mode == 2:
            assert self.built, 'Layer must be built before being called'
            input_shape = K.int_shape(x)

            reduction_axes = list(range(len(input_shape)))
            del reduction_axes[self.axis]
            broadcast_shape = [1] * len(input_shape)
            broadcast_shape[self.axis] = input_shape[self.axis]

            # mean_batch, var_batch = K.moments(x, reduction_axes, shift=None, keep_dims=False)
            normed, mean_batch, var_batch = K.normalize_batch_in_training(
                x, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon)

            std_batch = (K.sqrt(var_batch + self.epsilon))

            r_max_value = K.get_value(self.r_max)
            r = std_batch / (K.sqrt(self.running_std + self.epsilon))
            r = K.stop_gradient(K.clip(r, 1 / r_max_value, r_max_value))

            d_max_value = K.get_value(self.d_max)
            d = (mean_batch - self.running_mean) / K.sqrt(self.running_std +
                                                          self.epsilon)
            d = K.stop_gradient(K.clip(d, -d_max_value, d_max_value))

            if sorted(reduction_axes) == range(K.ndim(x))[:-1]:
                x_normed_batch = (x - mean_batch) / std_batch
                x_normed = (x_normed_batch * r + d) * self.gamma + self.beta
            else:
                # need broadcasting
                broadcast_mean = K.reshape(mean_batch, broadcast_shape)
                broadcast_std = K.reshape(std_batch, broadcast_shape)
                broadcast_r = K.reshape(r, broadcast_shape)
                broadcast_d = K.reshape(d, broadcast_shape)
                broadcast_beta = K.reshape(self.beta, broadcast_shape)
                broadcast_gamma = K.reshape(self.gamma, broadcast_shape)

                x_normed_batch = (x - broadcast_mean) / broadcast_std
                x_normed = (x_normed_batch * broadcast_r +
                            broadcast_d) * broadcast_gamma + broadcast_beta

            # explicit update to moving mean and standard deviation
            self.add_update([
                K.moving_average_update(self.running_mean, mean_batch,
                                        self.momentum),
                K.moving_average_update(self.running_std, std_batch**2,
                                        self.momentum)
            ], x)

            # update r_max and d_max
            t_val = K.get_value(self.t)
            r_val = self.r_max_value / (
                1 + (self.r_max_value - 1) * np.exp(-t_val))
            d_val = self.d_max_value / (1 + (
                (self.d_max_value / 1e-3) - 1) * np.exp(-(2 * t_val)))
            t_val += float(self.t_delta)

            self.add_update([
                K.update(self.r_max, r_val),
                K.update(self.d_max, d_val),
                K.update(self.t, t_val)
            ], x)

            if self.mode == 0:
                if sorted(reduction_axes) == range(K.ndim(x))[:-1]:
                    x_normed_running = K.batch_normalization(
                        x,
                        self.running_mean,
                        self.running_std,
                        self.beta,
                        self.gamma,
                        epsilon=self.epsilon)
                else:
                    # need broadcasting
                    broadcast_running_mean = K.reshape(self.running_mean,
                                                       broadcast_shape)
                    broadcast_running_std = K.reshape(self.running_std,
                                                      broadcast_shape)
                    broadcast_beta = K.reshape(self.beta, broadcast_shape)
                    broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
                    x_normed_running = K.batch_normalization(
                        x,
                        broadcast_running_mean,
                        broadcast_running_std,
                        broadcast_beta,
                        broadcast_gamma,
                        epsilon=self.epsilon)

                # pick the normalized form of x corresponding to the training phase
                # for batch renormalization, inference time remains same as batchnorm
                x_normed = K.in_train_phase(x_normed, x_normed_running)

        elif self.mode == 1:
            # sample-wise normalization
            m = K.mean(x, axis=self.axis, keepdims=True)
            std = K.sqrt(
                K.var(x, axis=self.axis, keepdims=True) + self.epsilon)
            x_normed_batch = (x - m) / (std + self.epsilon)

            r_max_value = K.get_value(self.r_max)
            r = std / (self.running_std + self.epsilon)
            r = K.stop_gradient(K.clip(r, 1 / r_max_value, r_max_value))

            d_max_value = K.get_value(self.d_max)
            d = (m - self.running_mean) / (self.running_std + self.epsilon)
            d = K.stop_gradient(K.clip(d, -d_max_value, d_max_value))

            x_normed = ((x_normed_batch * r) + d) * self.gamma + self.beta

            # update r_max and d_max
            t_val = K.get_value(self.t)
            r_val = self.r_max_value / (
                1 + (self.r_max_value - 1) * np.exp(-t_val))
            d_val = self.d_max_value / (1 + (
                (self.d_max_value / 1e-3) - 1) * np.exp(-(2 * t_val)))
            t_val += float(self.t_delta)

            self.add_update([
                K.update(self.r_max, r_val),
                K.update(self.d_max, d_val),
                K.update(self.t, t_val)
            ], x)

        return x_normed
Exemplo n.º 25
0
def nmse(y_test, y_pred):

    return K_mean_squared_error(y_test, y_pred) / K.var(y_test)