Пример #1
0
def neural_style_loss(style, combination, num_channels, img_width, img_height):
    assert K.ndim(style) == 3
    assert K.ndim(combination) == 3
    S = gram_matrix(style)
    C = gram_matrix(combination)
    size = img_width * img_height
    return K.sum(K.square(S - C)) / (4. * (num_channels ** 2) * (size ** 2))
Пример #2
0
    def test_binary_accuracy_with_threshold_(self):
        y_true = Input((2,))
        y_pred = Input((2,))
        threshold = K.placeholder((2,))
        acc = binary_accuracy_with_threshold(y_true,y_pred,threshold)
        self.assertEqual(K.ndim(acc), 0)
        binary_accuracy_with_threshold_func = K.function(inputs=[y_true,y_pred,threshold], outputs=[acc])
        acc_val=binary_accuracy_with_threshold_func([np.array([[0,1],[1,0]]),np.array([[0.2,0.6],[0.3,0.1]]),np.array([0.25,0.4])])[0]
        self.assertEqual(round(acc_val,2), 1.00,"acc_val")

        #works on a single threshold
        threshold = K.placeholder(ndim=0)
        acc = binary_accuracy_with_threshold(y_true, y_pred, threshold)
        binary_accuracy_with_threshold_func = K.function(inputs=[y_true, y_pred, threshold], outputs=[acc])
        acc_val = binary_accuracy_with_threshold_func(
            [np.array([[0, 1], [1, 0]]), np.array([[0.2, 0.6], [0.3, 0.1]]), 0.5])[0]
        self.assertEqual(round(acc_val, 2), 0.75, "acc_val")

        # works on 3 dimension inputs
        y_true = Input((None,2))
        y_pred = Input((None,2))
        threshold = K.placeholder((2,))
        acc = binary_accuracy_with_threshold(y_true,y_pred,threshold)
        self.assertEqual(K.ndim(acc), 0)
        binary_accuracy_with_threshold_func = K.function(inputs=[y_true,y_pred,threshold], outputs=[acc])
        acc_val=binary_accuracy_with_threshold_func([np.array([[[0,1]],[[1,0]]]),np.array([[[0.2,0.6]],[[0.3,0.1]]]),np.array([0.25,0.4])])[0]
        self.assertEqual(round(acc_val,2), 1.00,"acc_val")
Пример #3
0
    def buildDecomposition(self):
        q_embedding = self.tensors['q-embedding']
        a_embedding = self.tensors['a-embedding']
        q_match = self.tensors['q-match']
        a_match = self.tensors['a-match']
        # compute q+, q-, a+, a-
        # 注意为什么其他的层不需要加BATCH_SIZE,而这里却突然需要了呢?
        # 原因Lambda的坑,Lambda的ouput_shape不需要指定BATCH_SIZE,会
        # 自行推导:当Lambda的上层输出中含有BATCH_SIZE时,使用改值作
        # 为本层的BATCH_SIZE,如果没有时我就呵呵了,不知道是怎么推的。
        # 因此这层Merge给定BATCH_SIZE是填下层Lambda的坑
        q_channels = Merge(
            mode=lambda x: decomposite(*x),
            output_shape=(self.params['batch_size'], 2, self.q_length, self.wdim),
            name='q-channels'
        )([q_embedding, q_match])

        a_channels = Merge(
            mode=lambda x: decomposite(*x),
            output_shape=(self.params['batch_size'], 2, self.a_length, self.wdim),
            name='a-channels',
        )([a_embedding, a_match])
        print('q_channels', q_channels._keras_shape, K.ndim(q_channels))
        print('a_channels', a_channels._keras_shape, K.ndim(a_channels))
        self.tensors['q-channels'] = q_channels
        self.tensors['a-channels'] = a_channels
Пример #4
0
        def A_network_output(x):
            # The input of this layer is [L, mu, a] in concatenated form. We first split
            # those up.
            idx = 0
            L_flat = x[:, idx:idx + (self.nb_actions * self.nb_actions + self.nb_actions) / 2]
            idx += (self.nb_actions * self.nb_actions + self.nb_actions) / 2
            mu = x[:, idx:idx + self.nb_actions]
            idx += self.nb_actions
            a = x[:, idx:idx + self.nb_actions]
            idx += self.nb_actions

            # Create L and L^T matrix, which we use to construct the positive-definite matrix P.
            Ls = []
            LTs = []
            for idx in xrange(self.batch_size):
                L = K.zeros((self.nb_actions, self.nb_actions))
                L = T.set_subtensor(L[np.tril_indices(self.nb_actions)], L_flat[idx, :])
                diag = K.exp(T.diag(L))
                L = T.set_subtensor(L[np.diag_indices(self.nb_actions)], diag)
                Ls.append(L)
                LTs.append(K.transpose(L))
                # TODO: diagonal elements exp
            L = K.pack(Ls)
            LT = K.pack(LTs)
            P = K.batch_dot(L, LT, axes=(1, 2))
            assert K.ndim(P) == 3

            # Combine a, mu and P into a scalar (over the batches).
            A = -.5 * K.batch_dot(K.batch_dot(a - mu, P, axes=(1, 2)), a - mu, axes=1)
            assert K.ndim(A) == 2
            return A
Пример #5
0
def style_loss(style1, style2):
    assert K.ndim(style1) == 3
    assert K.ndim(style2) == 3
    S = gram_matrix(style1)
    C = gram_matrix(style2)
    channels = 3
    size = img_width * img_height
    return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2))
Пример #6
0
def style_loss(style, combination):
    assert K.ndim(style) == 3
    assert K.ndim(combination) == 3
    S = gram_matrix(style)
    C = gram_matrix(combination)
    channels = 3
    size = img_width * img_height
    return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2))
Пример #7
0
def style_loss(style, combination):
    assert K.ndim(style) == 3
    assert K.ndim(combination) == 3
    S = gram_matrix(style)
    C = gram_matrix(combination)
    channels = 3
    size = img_nrows * img_ncols
    return K.sum(K.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2))
Пример #8
0
def gram_matrix(x):
    """
    the gram matrix of an image tensor (feature-wise outer product)
    :param x: The tensor contains image features
    :return: A gram matrix
    """
    if K.ndim(x) == 4:
        x = x[0, :, :, :]
    assert K.ndim(x) == 3
    features = K.batch_flatten(x)
    gram = K.dot(features, K.transpose(features))
    return gram
Пример #9
0
    def compute_mask(self, x, mask=None):
        if mask is None:
            return None
        #import pdb
        #pdb.set_trace()
        target_dim = K.ndim(x) - 2
        num_reducing = K.ndim(mask) - target_dim
        if num_reducing:
            axes = tuple([-i for i in range(1,num_reducing+1)])
            mask = K.any(mask, axes)

        return mask
Пример #10
0
def softmax(x, axis, mask=None):
    if mask is None:
        mask = K.constant(True)
    mask = K.cast(mask, K.floatx())
    if K.ndim(x) is K.ndim(mask) + 1:
        mask = K.expand_dims(mask)

    m = K.max(x, axis=axis, keepdims=True)
    e = K.exp(x - m) * mask
    s = K.sum(e, axis=axis, keepdims=True)
    s += K.cast(K.cast(s < K.epsilon(), K.floatx()) * K.epsilon(), K.floatx())
    return e / s
Пример #11
0
      def weighted(y_true, y_pred, weights, mask=None):
        assert mask is None
        assert weights is not None
        score_array = fn(y_true, y_pred)

        # reduce score_array to same ndim as weight array
        ndim = K.ndim(score_array)
        weight_ndim = K.ndim(weights)
        score_array = K.mean(score_array, axis=list(range(weight_ndim, ndim)))

        # apply sample weighting
        score_array *= weights
        word_scores = K.sum(score_array, axis=-1)
        return K.mean(word_scores)
Пример #12
0
 def buildFeatures(self, type='shared'):
     assert self.checkTensor('q-channels')
     assert self.checkTensor('a-channels')
     srelu = lambda name: SReLU(name=name)
     features = []
     if type == 'shared':
         q_features = self.linkFeature('q-channels', 'shared-convolution', activation='tanh')
         a_features = self.linkFeature('a-channels', 'shared-convolution', activation='tanh')
     else:
         raise Error('Not Supported')
     print('q-features', q_features._keras_shape, K.ndim(q_features))
     print('a-features', a_features._keras_shape, K.ndim(a_features))
     self.tensors['q-features'] = q_features
     self.tensors['a-features'] = a_features
Пример #13
0
    def build_loss(self):
        r"""Implements the N-dim version of function
        $$TV^{\beta}(x) = \sum_{whc} \left ( \left ( x(h, w+1, c) - x(h, w, c) \right )^{2} +
        \left ( x(h+1, w, c) - x(h, w, c) \right )^{2} \right )^{\frac{\beta}{2}}$$
        to return total variation for all images in the batch.
        """
        image_dims = K.ndim(self.img) - 2

        # Constructing slice [1:] + [:-1] * (image_dims - 1) and [:-1] * (image_dims)
        start_slice = [slice(1, None, None)] + [slice(None, -1, None) for _ in range(image_dims - 1)]
        end_slice = [slice(None, -1, None) for _ in range(image_dims)]
        samples_channels_slice = [slice(None, None, None), slice(None, None, None)]

        # Compute pixel diffs by rolling slices to the right per image dim.
        tv = None
        for i in range(image_dims):
            ss = tuple(samples_channels_slice + start_slice)
            es = tuple(samples_channels_slice + end_slice)
            diff_square = K.square(self.img[utils.slicer[ss]] - self.img[utils.slicer[es]])
            tv = diff_square if tv is None else tv + diff_square

            # Roll over to next image dim
            start_slice = np.roll(start_slice, 1).tolist()
            end_slice = np.roll(end_slice, 1).tolist()

        tv = K.sum(K.pow(tv, self.beta / 2.))
        return normalize(self.img, tv)
Пример #14
0
    def get_output(self, train=False):
        def format_shape(shape):
            if K._BACKEND == 'tensorflow':
                def trf(x):
                    try:
                        return int(x)
                    except TypeError:
                        return x

                return map(trf, shape)
            return shape

        X = self.get_input(train)

        in_shape = format_shape(K.shape(X))
        batch_flatten_len = K.prod(in_shape[:2])
        cast_in_shape = (batch_flatten_len, ) + tuple(in_shape[i] for i in range(2, K.ndim(X)))
        
        pre_outs = self.layer(K.reshape(X, cast_in_shape))
        
        out_shape = format_shape(K.shape(pre_outs))
        cast_out_shape = (in_shape[0], in_shape[1]) + tuple(out_shape[i] for i in range(1, K.ndim(pre_outs)))
        
        outputs = K.reshape(pre_outs, cast_out_shape)
        return outputs
Пример #15
0
    def get_output(self, train = False, get_tuple = False):
        # input shape: (nb_samples, time (padded with zeros), input_dim)
        X = self.get_input(train)
        assert K.ndim(X) == 3
        
        mask = self.get_output_mask(train)
        if mask:
            # apply mask
            X *= K.cast(K.expand_dims(mask), X.dtype)
            masking = True
        else:
            masking = False

        if self.stateful:
            initial_states = self.states
        else:
            initial_states = self.get_initial_states(X)

        last_output, outputs, other_outputs, states = LX.rnn(self.attention_step, X, initial_states, self.contexts,
                                              truncate_gradient=self.truncate_gradient,
                                              go_backwards=self.go_backwards,
                                              masking=masking)
        self.other_outputs = other_outputs
        
        if self.stateful:
            self.updates = []
            for i in range(len(states)):
                self.updates.append((self.states[i], states[i]))

        if self.return_sequences:
            return outputs
        else:
            return last_output
Пример #16
0
    def _step(time, output_ta_t, *states):
        """RNN step function.

        # Arguments
            time: Current timestep value.
            output_ta_t: TensorArray.
            *states: List of states.

        # Returns
            Tuple: `(time + 1,output_ta_t) + tuple(new_states)`
        """
        current_input = input_ta.read(time)
        random_cutoff_prob = tf.random_uniform(
            (num_samples,), minval=0, maxval=1)

        output, new_states = step_function(current_input,
                                           {'initial_states': states,
                                            'random_cutoff_prob': random_cutoff_prob,
                                            'rec_dp_mask': rec_dp_constants})
        # returned output is ( raw/sampled, batch, output_dim)
        axes = [1, 0] + list(range(2, K.ndim(output)))
        output = tf.transpose(output, (axes))
        for state, new_state in zip(states, new_states):
            new_state.set_shape(state.get_shape())
        output_ta_t = output_ta_t.write(time, output)
        return (time + 1, output_ta_t) + tuple(new_states)
Пример #17
0
    def get_output(self, train=False):
        # input shape: (nb_samples, time (padded with zeros), input_dim)
        X = self.get_input(train)
        # mask = self.get_input_mask(train)

        assert K.ndim(X) == 3
        assert K._BACKEND == 'theano'

        # if self.stateful: #TODO: this seems important
        #     initial_states = self.states
        # else:
        #     initial_states = self.get_initial_states(X)
        initial_states = self.states #??

        ## last_output, outputs, states = K.renn(self.step, X,
        ##                                      initial_states,
        ##                                      go_backwards=self.go_backwards)

        
        #todo: ?!?!

        last_output, outputs, states = K.renn(self.step, X,
                                             initial_states,
                                             go_backwards=self.go_backwards)

        if self.stateful:
            self.updates = []
            for i in range(len(states)):
                self.updates.append((self.states[i], states[i]))

        return outputs
Пример #18
0
 def call(self, position):
     inputDim = K.ndim(position)
     positionShape = K.shape(position)
     targetDim = positionShape[-1]
     position = K.reshape(position, (-1, targetDim))
     samples = K.shape(position)[0]
     theta = THT.zeros((samples, 3, 3))
     
     chw = self.toChw(position)
     chw = K.reshape(chw, (samples, targetDim))
     dx = -self.distortion + 2.0 * self.distortion * self.srng.uniform((samples,)) 
     dy = -self.distortion + 2.0 * self.distortion * self.srng.uniform((samples,))
     cX = chw[:, 0] + dx
     cY = chw[:, 1] + dy
     h = K.maximum(chw[:, 2] * (1.0 + self.context), self.minSide)
     w = K.maximum(chw[:, 3] * (1.0 + self.context), self.minSide)
     
     # Calculating the parameters of the transformation
     tx = cX
     ty = cY
     sx = w / 2.0 # Scale x
     sy = h / 2.0 # Scale y
     
     # Setting transformation
     theta = THT.set_subtensor(theta[:, 0, 0], sx)
     theta = THT.set_subtensor(theta[:, 1, 1], sy)
     theta = THT.set_subtensor(theta[:, 0, 2], tx)
     theta = THT.set_subtensor(theta[:, 1, 2], ty)
     theta = THT.set_subtensor(theta[:, 2, 2], 1.0)
     
     thetaShape = K.concatenate([positionShape[:-1], K.shape(theta)[-2:]])
     theta = THT.reshape(theta, thetaShape, ndim=inputDim + 1)
     
     return theta
    def call(self, x, mask=None):
        constants = self.get_constants(x)

        assert K.ndim(x) == 5
        if K._BACKEND == 'tensorflow':
            if not self.input_shape[1]:
                raise Exception('When using TensorFlow, you should define ' +
                                'explicitely the number of timesteps of ' +
                                'your sequences. Make sure the first layer ' +
                                'has a "batch_input_shape" argument ' +
                                'including the samples axis.')

        if self.stateful:
            initial_states = self.states
        else:
            initial_states = self.get_initial_states(x)

        last_output, outputs, states = K.rnn(self.step, x,
                                             initial_states,
                                             go_backwards=self.go_backwards,
                                             mask=mask,
                                             constants=constants)
        if self.stateful:
            self.updates = []
            for i in range(len(states)):
                self.updates.append((self.states[i], states[i]))

        if self.return_sequences:
            return outputs
        else:
            return last_output
Пример #20
0
def gram_matrix(x):
    assert Kr.ndim(x) == 3

    features = Kr.batch_flatten(x)
    gram = Kr.dot(features, Kr.transpose(features))

    return gram
Пример #21
0
 def __call__(self, loss):
     x = self.layer.get_output(True)
     assert K.ndim(x) == 4
     a = K.square(x[:, :, 1:, :-1] - x[:, :, :-1, :-1])
     b = K.square(x[:, :, :-1, 1:] - x[:, :, :-1, :-1])
     loss += self.weight * K.mean(K.sum(K.pow(a + b, 1.25), axis=(1,2,3)))
     return loss
Пример #22
0
    def __call__(self, loss):
        from . import patches

        output = self.layer.get_output(True)
        assert K.ndim(output) == 4
        batch_size = K.shape(output)[0] // 2
        patch_size = self.patch_size
        patch_stride = 1
        generated = output[:batch_size, :, :, :]
        content = output[batch_size:, :, :, :]
        # extract patches from feature maps
        generated_patches, generated_patches_norm = \
            patches.make_patches(generated, patch_size, patch_stride)
        content_patches, content_patches_norm = \
            patches.make_patches(content, patch_size, patch_stride)
        a_patches, a_patches_norm = \
            patches.make_patches(K.variable(self.features_a), patch_size, patch_stride)
        ap_patches, ap_patches_norm = \
            patches.make_patches(K.variable(self.features_ap), patch_size, patch_stride)
        # find best patches and calculate loss
        patch_ids = patches.find_patch_matches(
            content_patches, content_patches_norm,
            a_patches / a_patches_norm)
        best_analogy_patches = K.reshape(
            ap_patches[patch_ids], K.shape(generated_patches))
        loss += self.weight * K.sum(K.square(best_analogy_patches - generated_patches)) / patch_size ** 2
        return loss
Пример #23
0
    def call(self, inputs, **kwargs):
        assert isinstance(inputs, list) and len(inputs) == 3
        first, second, features = inputs[0], inputs[1], inputs[2]
        if not self.from_logits:
            first = kb.clip(first, 1e-10, 1.0)
            second = kb.clip(second, 1e-10, 1.0)
            first_, second_ = kb.log(first), kb.log(second)
        else:
            first_, second_ = first, second
        # embedded_features.shape = (M, T, 1)
        if self.use_intermediate_layer:
            features = kb.dot(features, self.first_kernel)
            features = kb.bias_add(features, self.first_bias, data_format="channels_last")
            features = self.intermediate_activation(features)
        embedded_features = kb.dot(features, self.features_kernel)
        embedded_features = kb.bias_add(
            embedded_features, self.features_bias, data_format="channels_last")
        if self.use_dimension_bias:
            tiling_shape = [1] * (kb.ndim(first)-1) + [kb.shape(first)[-1]]
            embedded_features = kb.tile(embedded_features, tiling_shape)
            embedded_features = kb.bias_add(
                embedded_features, self.dimensions_bias, data_format="channels_last")
        sigma = kb.sigmoid(embedded_features)

        result = weighted_sum(first_, second_, sigma,
                              self.first_threshold, self.second_threshold)
        probs = kb.softmax(result)
        if self.return_logits:
            return [probs, result]
        return probs
Пример #24
0
def criterion_GAN(output, target, use_lsgan=True):
    if use_lsgan:
        diff = output - target
        dims = list(range(1, K.ndim(diff)))
        return K.expand_dims((K.mean(diff ** 2, dims)), 0)
    else:
        return K.mean(K.log(output + 1e-12) * target + K.log(1 - output + 1e-12) * (1 - target))
Пример #25
0
 def call(self, x, mask=None):
     # x: (batch_size, input_length, input_dim)
     if mask is None:
         return K.mean(x, axis=1)  # (batch_size, input_dim)
     else:
         # This is to remove padding from the computational graph.
         if K.ndim(mask) > K.ndim(x):
             # This is due to the bug in Bidirectional that is passing the input mask
             # instead of computing output mask.
             # TODO: Fix the implementation of Bidirectional.
             mask = K.any(mask, axis=(-2, -1))
         if K.ndim(mask) < K.ndim(x):
             mask = K.expand_dims(mask)
         masked_input = switch(mask, x, K.zeros_like(x))
         weights = K.cast(mask / (K.sum(mask) + K.epsilon()), 'float32')
         return K.sum(masked_input * weights, axis=1)  # (batch_size, input_dim)
Пример #26
0
def style_loss(style_image, target_image, style_masks, target_masks):
    '''Calculate style loss between style_image and target_image,
    in all regions.
    '''
    assert 3 == K.ndim(style_image) == K.ndim(target_image)
    assert 3 == K.ndim(style_masks) == K.ndim(target_masks)
    loss = K.variable(0)
    for i in range(nb_labels):
        if K.image_dim_ordering() == 'th':
            style_mask = style_masks[i, :, :]
            target_mask = target_masks[i, :, :]
        else:
            style_mask = style_masks[:, :, i]
            target_mask = target_masks[:, :, i]
        loss += region_style_weight * region_style_loss(style_image, target_image, style_mask, target_mask)
    return loss
def total_variation_loss(x):
    assert K.ndim(x) == 4
    a = K.square(x[:, :, 1:, :img_width - 1] - x[:, :, :img_height - 1, :img_width - 1])
    b = K.square(x[:, :, :img_height - 1, 1:] - x[:, :, :img_width - 1, :img_height - 1])
    #a = K.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, 1:, :img_height-1])
    #b = K.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, :img_width-1, 1:])
    return K.sum(K.pow(a + b, 1.25))
Пример #28
0
        def teacher_forced(h, states):
            # switching from (batch_size, previous_layer_input|true_input, output_dim)
            #    to ( previous_layer_input|true_input, batch_size, output_dim)
            axes = [1, 0] + list(range(2, K.ndim(h)))
            h = K.permute_dimensions(h, axes)

            prev_layer_input = h[0:1, :, :]
            true_input = h[1:, :, :self.units]

            # this should correspond  to true input
            prev_sampled_output = true_input

            if self.implementation == 0:
                x_z = prev_layer_input[0, :, :self.units]
                x_r = prev_layer_input[0, :, self.units: 2 * self.units]
                x_h = prev_layer_input[0, :, 2 * self.units:]
            else:
                raise ValueError('Implementation type ' + self.implementation + ' is invalid')

            z = self.recurrent_activation(x_z + K.dot(h_tm1 * rec_dp_mask[0],
                                                      self.recurrent_kernel_z))
            r = self.recurrent_activation(x_r + K.dot(h_tm1 * rec_dp_mask[1],
                                                      self.recurrent_kernel_r))

            hh = self.activation(x_h +
                                 K.dot(r * h_tm1 * rec_dp_mask[2],
                                       self.recurrent_kernel_h) +
                                 K.dot(r * prev_sampled_output, self.recurrent_kernel_y))

            output = z * h_tm1 + (1. - z) * hh

            return K.stack([output, output])
Пример #29
0
        def free_running(h, states):

            prev_generated_output = initial_states[0][1:, :, :]
            prev_sampled_output = prev_generated_output

            # switching from (batch_size, previous_layer_input|true_input, output_dim)
            #    to ( previous_layer_input|true_input, batch_size, output_dim)
            axes = [1, 0] + list(range(2, K.ndim(h)))
            h = K.permute_dimensions(h, axes)

            prev_layer_input = h[0:1, :, :]

            if self.implementation == 0:
                x_z = prev_layer_input[0, :, :self.units]
                x_r = prev_layer_input[0, :, self.units: 2 * self.units]
                x_h = prev_layer_input[0, :, 2 * self.units:]

            z = self.recurrent_activation(x_z + K.dot(h_tm1 * rec_dp_mask[0],
                                                      self.recurrent_kernel_z))
            r = self.recurrent_activation(x_r + K.dot(h_tm1 * rec_dp_mask[1],
                                                      self.recurrent_kernel_r))

            hh = self.activation(x_h +
                                 K.dot(r * h_tm1 * rec_dp_mask[2],
                                       self.recurrent_kernel_h) +
                                 K.dot(r * prev_sampled_output, self.recurrent_kernel_y))

            output = z * h_tm1 + (1. - z) * hh

            final_output = self.output_sampling(output, random_cutoff_vec)

            return K.stack([output, final_output])
    def call(self, x, mask=None):

        assert self.built, 'Layer must be built before being called'
        input_shape = K.int_shape(x)

        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]

        if sorted(reduction_axes) == range(K.ndim(x))[:-1]:
            x_normed = K.batch_normalization(
                x, self.running_mean, self.running_std,
                self.beta, self.gamma,
                epsilon=self.epsilon)
        else:
            # need broadcasting
            broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape)
            broadcast_running_std = K.reshape(self.running_std, broadcast_shape)
            broadcast_beta = K.reshape(self.beta, broadcast_shape)
            broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
            x_normed = K.batch_normalization(
                x, broadcast_running_mean, broadcast_running_std,
                broadcast_beta, broadcast_gamma,
                epsilon=self.epsilon)

        return x_normed
Пример #31
0
def normalize_vector(x):
    z = K.sum(K.batch_flatten(K.square(x)), axis=1)
    while K.ndim(z) < K.ndim(x):
        z = K.expand_dims(z, axis=-1)
    return x / (K.sqrt(z))
Пример #32
0
def gram_matrix(x):
    assert K.ndim(x) == 3
    features = K.batch_flatten(x)
    gram = K.dot(features, K.transpose(features))
    return gram
Пример #33
0
    def call(self, inputs):
        input_shape = K.shape(inputs)
        ndim = K.ndim(inputs)
        reduction_axes = list(range(ndim))
        del reduction_axes[self.axis]
        del reduction_axes[0]
        input_dim = input_shape[self.axis] // 2
        mu = K.mean(inputs, axis=reduction_axes)
        broadcast_mu_shape = [1] * ndim
        broadcast_mu_shape[self.axis] = input_shape[self.axis]
        broadcast_mu_shape[0] = K.shape(inputs)[0]
        broadcast_mu = K.reshape(mu, broadcast_mu_shape)
        if self.center:
            input_centred = inputs - broadcast_mu
        else:
            input_centred = inputs
        centred_squared = input_centred**2
        if (self.axis == 1 and ndim != 3) or ndim == 2:
            centred_squared_real = centred_squared[:, :input_dim]
            centred_squared_imag = centred_squared[:, input_dim:]
            centred_real = input_centred[:, :input_dim]
            centred_imag = input_centred[:, input_dim:]
        elif ndim == 3:
            centred_squared_real = centred_squared[:, :, :input_dim]
            centred_squared_imag = centred_squared[:, :, input_dim:]
            centred_real = input_centred[:, :, :input_dim]
            centred_imag = input_centred[:, :, input_dim:]
        elif self.axis == -1 and ndim == 4:
            centred_squared_real = centred_squared[:, :, :, :input_dim]
            centred_squared_imag = centred_squared[:, :, :, input_dim:]
            centred_real = input_centred[:, :, :, :input_dim]
            centred_imag = input_centred[:, :, :, input_dim:]
        elif self.axis == -1 and ndim == 5:
            centred_squared_real = centred_squared[:, :, :, :, :input_dim]
            centred_squared_imag = centred_squared[:, :, :, :, input_dim:]
            centred_real = input_centred[:, :, :, :, :input_dim]
            centred_imag = input_centred[:, :, :, :, input_dim:]
        else:
            raise ValueError(
                'Incorrect Layernorm combination of axis and dimensions. axis should be either 1 or -1. '
                'axis: ' + str(self.axis) + '; ndim: ' + str(ndim) + '.')
        if self.scale:
            Vrr = K.mean(centred_squared_real,
                         axis=reduction_axes) + self.epsilon
            Vii = K.mean(centred_squared_imag,
                         axis=reduction_axes) + self.epsilon
            # Vri contains the real and imaginary covariance for each feature map.
            Vri = K.mean(
                centred_real * centred_imag,
                axis=reduction_axes,
            )
        elif self.center:
            Vrr = None
            Vii = None
            Vri = None
        else:
            raise ValueError(
                'Error. Both scale and center in batchnorm are set to False.')

        return complex_normalization(input_centred,
                                     Vrr,
                                     Vii,
                                     Vri,
                                     self.beta,
                                     self.gamma_rr,
                                     self.gamma_ri,
                                     self.gamma_ii,
                                     self.scale,
                                     self.center,
                                     layernorm=True,
                                     axis=self.axis)
Пример #34
0
def total_variation(y):
    assert K.ndim(y) == 4
    a = K.square(y[:, :res - 1, :res - 1, :] - y[:, 1:, :res - 1, :])
    b = K.square(y[:, :res - 1, :res - 1, :] - y[:, :res - 1, 1:, :])
    return K.mean(K.pow(a + b, 2))
Пример #35
0
def ComplexBN(input_centred,
              Vrr,
              Vii,
              Vri,
              beta,
              gamma_rr,
              gamma_ri,
              gamma_ii,
              scale=True,
              center=True,
              layernorm=False,
              axis=-1):

    ndim = K.ndim(input_centred)
    input_dim = K.shape(input_centred)[axis] // 2
    if scale:
        gamma_broadcast_shape = [1] * ndim
        gamma_broadcast_shape[axis] = input_dim
    if center:
        broadcast_beta_shape = [1] * ndim
        broadcast_beta_shape[axis] = input_dim * 2

    if scale:
        standardized_output = complex_standardization(input_centred,
                                                      Vrr,
                                                      Vii,
                                                      Vri,
                                                      layernorm,
                                                      axis=axis)

        # Now we perform th scaling and Shifting of the normalized x using
        # the scaling parameter
        #           [  gamma_rr gamma_ri  ]
        #   Gamma = [  gamma_ri gamma_ii  ]
        # and the shifting parameter
        #    Beta = [beta_real beta_imag].T
        # where:
        # x_real_BN = gamma_rr * x_real_normed + gamma_ri * x_imag_normed + beta_real
        # x_imag_BN = gamma_ri * x_real_normed + gamma_ii * x_imag_normed + beta_imag

        broadcast_gamma_rr = K.reshape(gamma_rr, gamma_broadcast_shape)
        broadcast_gamma_ri = K.reshape(gamma_ri, gamma_broadcast_shape)
        broadcast_gamma_ii = K.reshape(gamma_ii, gamma_broadcast_shape)

        cat_gamma_4_real = K.concatenate(
            [broadcast_gamma_rr, broadcast_gamma_ii], axis=axis)
        cat_gamma_4_imag = K.concatenate(
            [broadcast_gamma_ri, broadcast_gamma_ri], axis=axis)
        if (axis == 1 and ndim != 3) or ndim == 2:
            centred_real = standardized_output[:, :input_dim]
            centred_imag = standardized_output[:, input_dim:]
        elif ndim == 3:
            centred_real = standardized_output[:, :, :input_dim]
            centred_imag = standardized_output[:, :, input_dim:]
        elif axis == -1 and ndim == 4:
            centred_real = standardized_output[:, :, :, :input_dim]
            centred_imag = standardized_output[:, :, :, input_dim:]
        elif axis == -1 and ndim == 5:
            centred_real = standardized_output[:, :, :, :, :input_dim]
            centred_imag = standardized_output[:, :, :, :, input_dim:]
        else:
            raise ValueError(
                'Incorrect Batchnorm combination of axis and dimensions. axis should be either 1 or -1. '
                'axis: ' + str(self.axis) + '; ndim: ' + str(ndim) + '.')
        rolled_standardized_output = K.concatenate(
            [centred_imag, centred_real], axis=axis)
        if center:
            broadcast_beta = K.reshape(beta, broadcast_beta_shape)
            return cat_gamma_4_real * standardized_output + cat_gamma_4_imag * rolled_standardized_output + broadcast_beta
        else:
            return cat_gamma_4_real * standardized_output + cat_gamma_4_imag * rolled_standardized_output
    else:
        if center:
            broadcast_beta = K.reshape(beta, broadcast_beta_shape)
            return input_centred + broadcast_beta
        else:
            return input_centred
Пример #36
0
    def call(self, x, mask=None):
        # TODO: validate input shape

        assert (len(x) == 3)
        L_flat = x[0]
        mu = x[1]
        a = x[2]

        if self.mode == 'full':
            # Create L and L^T matrix, which we use to construct the positive-definite matrix P.
            L = None
            LT = None
            if K.backend() == 'theano':
                import theano.tensor as T
                import theano

                def fn(x, L_acc, LT_acc):
                    x_ = K.zeros((self.nb_actions, self.nb_actions))
                    x_ = T.set_subtensor(x_[np.tril_indices(self.nb_actions)], x)
                    diag = K.exp(T.diag(x_)) + K.epsilon()
                    x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], diag)
                    return x_, x_.T

                outputs_info = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]
                results, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info)
                L, LT = results
            elif K.backend() == 'tensorflow':
                import tensorflow as tf

                # Number of elements in a triangular matrix.
                nb_elems = (self.nb_actions * self.nb_actions + self.nb_actions) // 2

                # Create mask for the diagonal elements in L_flat. This is used to exponentiate
                # only the diagonal elements, which is done before gathering.
                diag_indeces = [0]
                for row in range(1, self.nb_actions):
                    diag_indeces.append(diag_indeces[-1] + (row + 1))
                diag_mask = np.zeros(1 + nb_elems)  # +1 for the leading zero
                diag_mask[np.array(diag_indeces) + 1] = 1
                diag_mask = K.variable(diag_mask)

                # Add leading zero element to each element in the L_flat. We use this zero
                # element when gathering L_flat into a lower triangular matrix L.
                nb_rows = tf.shape(L_flat)[0]
                zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1)
                try:
                    # Old TF behavior.
                    L_flat = tf.concat(1, [zeros, L_flat])
                except TypeError:
                    # New TF behavior
                    L_flat = tf.concat([zeros, L_flat], 1)

                # Create mask that can be used to gather elements from L_flat and put them
                # into a lower triangular matrix.
                tril_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32')
                tril_mask[np.tril_indices(self.nb_actions)] = range(1, nb_elems + 1)

                # Finally, process each element of the batch.
                init = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]

                def fn(a, x):
                    # Exponentiate everything. This is much easier than only exponentiating
                    # the diagonal elements, and, usually, the action space is relatively low.
                    x_ = K.exp(x) + K.epsilon()
                    # Only keep the diagonal elements.
                    x_ *= diag_mask
                    # Add the original, non-diagonal elements.
                    x_ += x * (1. - diag_mask)
                    # Finally, gather everything into a lower triangular matrix.
                    L_ = tf.gather(x_, tril_mask)
                    return [L_, tf.transpose(L_)]

                tmp = tf.scan(fn, L_flat, initializer=init)
                if isinstance(tmp, (list, tuple)):
                    # TensorFlow 0.10 now returns a tuple of tensors.
                    L, LT = tmp
                else:
                    # Old TensorFlow < 0.10 returns a shared tensor.
                    L = tmp[:, 0, :, :]
                    LT = tmp[:, 1, :, :]
            else:
                raise RuntimeError('Unknown Keras backend "{}".'.format(K.backend()))
            assert L is not None
            assert LT is not None
            P = K.batch_dot(L, LT)
        elif self.mode == 'diag':
            if K.backend() == 'theano':
                import theano.tensor as T
                import theano

                def fn(x, P_acc):
                    x_ = K.zeros((self.nb_actions, self.nb_actions))
                    x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], x)
                    return x_

                outputs_info = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]
                P, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info)
            elif K.backend() == 'tensorflow':
                import tensorflow as tf

                # Create mask that can be used to gather elements from L_flat and put them
                # into a diagonal matrix.
                diag_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32')
                diag_mask[np.diag_indices(self.nb_actions)] = range(1, self.nb_actions + 1)

                # Add leading zero element to each element in the L_flat. We use this zero
                # element when gathering L_flat into a lower triangular matrix L.
                nb_rows = tf.shape(L_flat)[0]
                zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1)
                try:
                    # Old TF behavior.
                    L_flat = tf.concat(1, [zeros, L_flat])
                except TypeError:
                    # New TF behavior
                    L_flat = tf.concat([zeros, L_flat], 1)

                # Finally, process each element of the batch.
                def fn(a, x):
                    x_ = tf.gather(x, diag_mask)
                    return x_

                P = tf.scan(fn, L_flat, initializer=K.zeros((self.nb_actions, self.nb_actions)))
            else:
                raise RuntimeError('Unknown Keras backend "{}".'.format(K.backend()))
        assert P is not None
        assert K.ndim(P) == 3

        # Combine a, mu and P into a scalar (over the batches). What we compute here is
        # -.5 * (a - mu)^T * P * (a - mu), where * denotes the dot-product. Unfortunately
        # TensorFlow handles vector * P slightly suboptimal, hence we convert the vectors to
        # 1xd/dx1 matrices and finally flatten the resulting 1x1 matrix into a scalar. All
        # operations happen over the batch size, which is dimension 0.
        prod = K.batch_dot(K.expand_dims(a - mu, 1), P)
        prod = K.batch_dot(prod, K.expand_dims(a - mu, -1))
        A = -.5 * K.batch_flatten(prod)
        assert K.ndim(A) == 2
        return A
Пример #37
0
def complex_standardization(input_centred,
                            Vrr,
                            Vii,
                            Vri,
                            layernorm=False,
                            axis=-1):
    """Complex Standardization of input
    
    Arguments:
        input_centred -- Input Tensor
        Vrr -- Real component of covariance matrix V
        Vii -- Imaginary component of covariance matrix V
        Vri -- Non-diagonal component of covariance matrix V
    
    Keyword Arguments:
        layernorm {bool} -- Normalization (default: {False})
        axis {int} -- Axis for Standardization (default: {-1})
    
    Raises:
        ValueError: Mismatched dimensoins
    
    Returns:
        Complex standardized input
    """

    ndim = K.ndim(input_centred)
    input_dim = K.shape(input_centred)[axis] // 2
    variances_broadcast = [1] * ndim
    variances_broadcast[axis] = input_dim
    if layernorm:
        variances_broadcast[0] = K.shape(input_centred)[0]

    # We require the covariance matrix's inverse square root. That first
    # requires square rooting, followed by inversion (I do this in that order
    # because during the computation of square root we compute the determinant
    # we'll need for inversion as well).

    # tau = Vrr + Vii = Trace. Guaranteed >= 0 because SPD
    tau = Vrr + Vii
    # delta = (Vrr * Vii) - (Vri ** 2) = Determinant. Guaranteed >= 0 because
    # SPD
    delta = (Vrr * Vii) - (Vri**2)

    s = K.sqrt(delta)  # Determinant of square root matrix
    t = K.sqrt(tau + 2 * s)

    # The square root matrix could now be explicitly formed as
    #       [ Vrr+s Vri   ]
    # (1/t) [ Vir   Vii+s ]
    # https://en.wikipedia.org/wiki/Square_root_of_a_2_by_2_matrix
    # but we don't need to do this immediately since we can also simultaneously
    # invert. We can do this because we've already computed the determinant of
    # the square root matrix, and can thus invert it using the analytical
    # solution for 2x2 matrices
    #      [ A B ]             [  D  -B ]
    # inv( [ C D ] ) = (1/det) [ -C   A ]
    # http://mathworld.wolfram.com/MatrixInverse.html
    # Thus giving us
    #           [  Vii+s  -Vri   ]
    # (1/s)(1/t)[ -Vir     Vrr+s ]
    # So we proceed as follows:

    inverse_st = 1.0 / (s * t)
    Wrr = (Vii + s) * inverse_st
    Wii = (Vrr + s) * inverse_st
    Wri = -Vri * inverse_st

    # And we have computed the inverse square root matrix W = sqrt(V)!
    # Normalization. We multiply, x_normalized = W.x.

    # The returned result will be a complex standardized input
    # where the real and imaginary parts are obtained as follows:
    # x_real_normed = Wrr * x_real_centred + Wri * x_imag_centred
    # x_imag_normed = Wri * x_real_centred + Wii * x_imag_centred

    broadcast_Wrr = K.reshape(Wrr, variances_broadcast)
    broadcast_Wri = K.reshape(Wri, variances_broadcast)
    broadcast_Wii = K.reshape(Wii, variances_broadcast)

    cat_W_4_real = K.concatenate([broadcast_Wrr, broadcast_Wii], axis=axis)
    cat_W_4_imag = K.concatenate([broadcast_Wri, broadcast_Wri], axis=axis)

    if (axis == 1 and ndim != 3) or ndim == 2:
        centred_real = input_centred[:, :input_dim]
        centred_imag = input_centred[:, input_dim:]
    elif ndim == 3:
        centred_real = input_centred[:, :, :input_dim]
        centred_imag = input_centred[:, :, input_dim:]
    elif axis == -1 and ndim == 4:
        centred_real = input_centred[:, :, :, :input_dim]
        centred_imag = input_centred[:, :, :, input_dim:]
    elif axis == -1 and ndim == 5:
        centred_real = input_centred[:, :, :, :, :input_dim]
        centred_imag = input_centred[:, :, :, :, input_dim:]
    else:
        raise ValueError(
            'Incorrect Batchnorm combination of axis and dimensions. axis '
            'should be either 1 or -1. '
            'axis: ' + str(axis) + '; ndim: ' + str(ndim) + '.')
    rolled_input = K.concatenate([centred_imag, centred_real], axis=axis)

    output = cat_W_4_real * input_centred + cat_W_4_imag * rolled_input

    #   Wrr * x_real_centered | Wii * x_imag_centered
    # + Wri * x_imag_centered | Wri * x_real_centered
    # -----------------------------------------------
    # = output

    return output
Пример #38
0
def get_probs_from_logits(logits):
    logits_shape = K.shape(logits)
    logits_flat = K.reshape(logits, shape=(-1, logits_shape[K.ndim(logits) - 1]))
    probs_flat = K.softmax(logits_flat)
    return K.reshape(probs_flat, shape=logits_shape)
Пример #39
0
    def call(self, inputs):
        # Note that I is useless, because thee layer cannot be used in graph
        # batch mode.
        if len(inputs) == 3:
            X, A, I = inputs
        else:
            X, A = inputs
            I = None

        # Check if the layer is operating in batch mode (X and A have rank 3)
        batch_mode = K.ndim(A) == 3

        # Optionally compute hidden layer
        if self.h is None:
            Hid = X
        else:
            Hid = K.dot(X, self.kernel_in)
            if self.use_bias:
                Hid = K.bias_add(Hid, self.bias_in)
            if self.activation is not None:
                Hid = self.activation(Hid)

        # Compute cluster assignment matrix
        S = K.dot(Hid, self.kernel_out)
        if self.use_bias:
            S = K.bias_add(S, self.bias_out)
        S = activations.softmax(
            S, axis=-1)  # Apply softmax to get cluster assignments

        # MinCut regularization
        A_pooled = ops.matmul_AT_B_A(S, A)
        num = tf.trace(A_pooled)

        D = ops.degree_matrix(A)
        den = tf.trace(ops.matmul_AT_B_A(S, D))
        cut_loss = -(num / den)
        if batch_mode:
            cut_loss = K.mean(cut_loss)
        self.add_loss(cut_loss)

        # Orthogonality regularization
        SS = ops.matmul_AT_B(S, S)
        I_S = tf.eye(self.k)
        ortho_loss = tf.norm(SS / tf.norm(SS, axis=(-1, -2)) -
                             I_S / tf.norm(I_S),
                             axis=(-1, -2))
        if batch_mode:
            ortho_loss = K.mean(cut_loss)
        self.add_loss(ortho_loss)

        # Pooling
        X_pooled = ops.matmul_AT_B(S, X)
        A_pooled = tf.linalg.set_diag(A_pooled, tf.zeros(
            K.shape(A_pooled)[:-1]))  # Remove diagonal
        A_pooled = ops.normalize_A(A_pooled)

        output = [X_pooled, A_pooled]

        if I is not None:
            I_mean = tf.segment_mean(I, I)
            I_pooled = ops.tf_repeat_1d(I_mean, tf.ones_like(I_mean) * self.k)
            output.append(I_pooled)

        if self.return_mask:
            output.append(S)

        return output
Пример #40
0
 def squash_mask(self, mask):
     if K.ndim(mask) == 2:
         return mask
     elif K.ndim(mask) == 3:
         return K.any(mask, axis=-1)
Пример #41
0
 def call(self, inputs):
     num_axis = K.ndim(inputs)
     inputs = K.permute_dimensions(inputs, range(num_axis)[::-1])
     x_outs = K.gather(inputs, self.idxs)
     x_outs = K.permute_dimensions(x_outs, range(num_axis)[::-1])
     return x_outs
Пример #42
0
    def sampled_softmax_loss(weights,
                             biases,
                             num_sampled,
                             num_classes,
                             labels,
                             inputs,
                             mask=None,
                             num_true=1,
                             sampled_values=None,
                             remove_accidental_hits=True):
        """Computes and returns the sampled softmax training loss.
        This is a faster way to train a softmax classifier over a huge number of
        classes.
        This operation is for training only.  It is generally an underestimate of
        the full softmax loss.
        At inference time, you can compute full softmax probabilities with the
        expression `tf.nn.softmax(tf.matmul(inputs, tf.transpose(weights)) + biases)`.
        See our [Candidate Sampling Algorithms Reference]
        (../../extras/candidate_sampling.pdf)
        Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
        ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.
        Args:
          weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor`
              objects whose concatenation along dimension 0 has shape
              [num_classes, dim].  The (possibly-sharded) class embeddings.
          biases: A `Tensor` of shape `[num_classes]`.  The class biases.
          inputs: A `Tensor` of shape `[time steps, batch_size, dim]`.  The forward
              activations of the input network.
          mask: A tensor of shape [time_steps, batch_size,1].
          labels: A `Tensor` of type `int64` and shape `[time_steps,batch_size,
              num_true]`. The target classes.  Note that this format differs from
              the `labels` argument of `nn.softmax_cross_entropy_with_logits`.
          num_sampled: An `int`.  The number of classes to randomly sample per batch.
          num_classes: An `int`. The number of possible classes.
          num_true: An `int`.  The number of target classes per training example.
          sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`,
              `sampled_expected_count`) returned by a `*_candidate_sampler` function.
              (if None, we default to `log_uniform_candidate_sampler`)
          remove_accidental_hits:  A `bool`.  whether to remove "accidental hits"
              where a sampled class equals one of the target classes.  Default is
              True.
          partition_strategy: A string specifying the partitioning strategy, relevant
              if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
              Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
          name: A name for the operation (optional).
        Returns:
          A `batch_size` 1-D tensor of per-example sampled softmax losses.
        """
        assert K.ndim(inputs) == 3  # time_steps, number_samples, input_dim
        nb_samples = K.cast(K.shape(inputs)[1], K.dtype(weights))

        inputs = K.reshape(inputs, (-1, K.shape(inputs)[2]))
        labels = K.reshape(labels, (-1, 1))
        labels = K.cast(labels, 'int64')

        ce = tf.nn.sampled_softmax_loss(weights=weights,
                                        biases=biases,
                                        inputs=inputs,
                                        labels=labels,
                                        num_sampled=num_sampled,
                                        num_classes=num_classes,
                                        num_true=num_true,
                                        sampled_values=sampled_values,
                                        remove_accidental_hits=remove_accidental_hits)
        if mask is not None:
            mask_flat = K.flatten(mask)  # time_steps*nb_samples
            ce *= mask_flat
        print "--sum--sampled_softmax_loss"
        tmp = sum_op.Sum_op(keepdim=True, dimension=0)(ce)
        tmp = K.squeeze(tmp,0)
        #return K.sum(ce) / nb_samples
        return tmp / nb_samples
Пример #43
0
def ComplexBN(input_centred,
              Vrr,
              Vii,
              Vri,
              beta,
              gamma_rr,
              gamma_ri,
              gamma_ii,
              scale=True,
              center=True,
              layernorm=False,
              axis=-1):
    """Complex Batch Normalization
    
    Arguments:
        input_centred -- input data
        Vrr -- Real component of covariance matrix V
        Vii -- Imaginary component of covariance matrix V
        Vri -- Non-diagonal component of covariance matrix V
        beta -- Lernable shift parameter beta
        gamma_rr -- Scaling parameter gamma - rr component of 2x2 matrix
        gamma_ri -- Scaling parameter gamma - ri component of 2x2 matrix
        gamma_ii -- Scaling parameter gamma - ii component of 2x2 matrix
    
    Keyword Arguments:
        scale {bool} {bool} -- Standardization of input  (default: {True})
        center {bool} -- Mean-shift correction (default: {True})
        layernorm {bool} -- Normalization (default: {False})
        axis {int} -- Axis for Standardization (default: {-1})
    
    Raises:
        ValueError: Dimonsional mismatch
    
    Returns:
        Batch-Normalized Input
    """

    ndim = K.ndim(input_centred)
    input_dim = K.shape(input_centred)[axis] // 2
    if scale:
        gamma_broadcast_shape = [1] * ndim
        gamma_broadcast_shape[axis] = input_dim
    if center:
        broadcast_beta_shape = [1] * ndim
        broadcast_beta_shape[axis] = input_dim * 2

    if scale:
        standardized_output = complex_standardization(input_centred,
                                                      Vrr,
                                                      Vii,
                                                      Vri,
                                                      layernorm,
                                                      axis=axis)

        # Now we perform th scaling and Shifting of the normalized x using
        # the scaling parameter
        #           [  gamma_rr gamma_ri  ]
        #   Gamma = [  gamma_ri gamma_ii  ]
        # and the shifting parameter
        #    Beta = [beta_real beta_imag].T
        # where:
        # x_real_BN = gamma_rr * x_real_normed +
        #             gamma_ri * x_imag_normed + beta_real
        # x_imag_BN = gamma_ri * x_real_normed +
        #             gamma_ii * x_imag_normed + beta_imag

        broadcast_gamma_rr = K.reshape(gamma_rr, gamma_broadcast_shape)
        broadcast_gamma_ri = K.reshape(gamma_ri, gamma_broadcast_shape)
        broadcast_gamma_ii = K.reshape(gamma_ii, gamma_broadcast_shape)

        cat_gamma_4_real = K.concatenate(
            [broadcast_gamma_rr, broadcast_gamma_ii], axis=axis)
        cat_gamma_4_imag = K.concatenate(
            [broadcast_gamma_ri, broadcast_gamma_ri], axis=axis)
        if (axis == 1 and ndim != 3) or ndim == 2:
            centred_real = standardized_output[:, :input_dim]
            centred_imag = standardized_output[:, input_dim:]
        elif ndim == 3:
            centred_real = standardized_output[:, :, :input_dim]
            centred_imag = standardized_output[:, :, input_dim:]
        elif axis == -1 and ndim == 4:
            centred_real = standardized_output[:, :, :, :input_dim]
            centred_imag = standardized_output[:, :, :, input_dim:]
        elif axis == -1 and ndim == 5:
            centred_real = standardized_output[:, :, :, :, :input_dim]
            centred_imag = standardized_output[:, :, :, :, input_dim:]
        else:
            raise ValueError(
                'Incorrect Batchnorm combination of axis and dimensions. axis'
                ' should be either 1 or -1. '
                'axis: ' + str(axis) + '; ndim: ' + str(ndim) + '.')
        rolled_standardized_output = K.concatenate(
            [centred_imag, centred_real], axis=axis)
        if center:
            broadcast_beta = K.reshape(beta, broadcast_beta_shape)
            return cat_gamma_4_real * standardized_output + cat_gamma_4_imag * rolled_standardized_output + broadcast_beta
        else:
            return cat_gamma_4_real * standardized_output + cat_gamma_4_imag * rolled_standardized_output
    else:
        if center:
            broadcast_beta = K.reshape(beta, broadcast_beta_shape)
            return input_centred + broadcast_beta
        else:
            return input_centred
Пример #44
0
def region_style_loss(style_image, target_image, style_mask, target_mask):
    assert 3 == K.ndim(style_image) == K.ndim(target_image)
    assert 2 == K.ndim(style_mask) == K.ndim(target_mask)
    s = gram_matrix(style_image * style_mask) * K.sum(style_mask)
    c = gram_matrix(target_image * target_mask) * K.sum(target_mask)
    return K.sum(K.square(s - c))
Пример #45
0
z = z_in
z = Dense(z_dim, activation='relu')(z)
z = Dense(z_dim, activation='relu')(z)
z = Dense(z_dim, activation='relu')(z)
z = Dense(1, activation='sigmoid')(z)
print('z.shape', z.shape)

LocalDiscriminator = Model(z_in, z)
z_f_1_score = LocalDiscriminator(z_f_1)
z_f_2_score = LocalDiscriminator(z_f_2)
print('z_f_2_score.shape', z_f_2_score.shape)
print('z_f_2.shape', z_f_2.shape)
# local_info_loss = -K.mean(K.log(z_f_1_score+1e-6)+K.log(1-z_f_2_score+1e-6))
local_info_loss = -K.sum(
    K.log(z_f_1_score + 1e-6) + K.log(1 - z_f_2_score + 1e-6),
    axis=list(range(1, K.ndim(z_f_1_score))))
print('local_info_loss.shape', local_info_loss.shape)
local_info_loss = K.mean(local_info_loss)

print('local_info_loss', local_info_loss)

# 用来训练模型
model_train = Model(x_in, [z_z_1_score, z_z_2_score, z_f_1_score, z_f_2_score])
model_train.add_loss(alpha * global_info_loss + beta * local_info_loss +
                     gamma * prior_kl_loss)
# model_train.add_loss(alpha*global_info_loss+gamma*prior_kl_loss)
model_train.compile(optimizer=Adam(1e-3))
model_train.metrics_names.append('global_info_loss')
model_train.metrics_tensors.append(global_info_loss)
print('===============', model_train.metrics_names)
Пример #46
0
    def update_state(self, values, sample_weight=None):
        """Accumulates statistics for computing the metric.

        Args:
          values: Per-example value.
          sample_weight: Optional weighting of each example. Defaults to 1.

        Returns:
          Update op.
        """
        [
            values
        ], sample_weight = metrics_utils.ragged_assert_compatible_and_get_flat_values(  # noqa: E501
            [values], sample_weight)
        try:
            values = tf.cast(values, self._dtype)
        except (ValueError, TypeError):
            msg = (
                "The output of a metric function can only be a single Tensor. "
                f"Received: {values}. ")
            if isinstance(values, dict):
                msg += (
                    "To return a dict of values, implement a custom Metric "
                    "subclass.")
            raise RuntimeError(msg)
        if sample_weight is not None:
            sample_weight = tf.cast(sample_weight, self._dtype)
            # Update dimensions of weights to match with values if possible.
            (
                values,
                _,
                sample_weight,
            ) = losses_utils.squeeze_or_expand_dimensions(
                values, sample_weight=sample_weight)
            try:
                # Broadcast weights if possible.
                sample_weight = tf.__internal__.ops.broadcast_weights(
                    sample_weight, values)
            except ValueError:
                # Reduce values to same ndim as weight array
                ndim = backend.ndim(values)
                weight_ndim = backend.ndim(sample_weight)
                if self.reduction == metrics_utils.Reduction.SUM:
                    values = tf.reduce_sum(values,
                                           axis=list(range(weight_ndim, ndim)))
                else:
                    values = tf.reduce_mean(values,
                                            axis=list(range(weight_ndim,
                                                            ndim)))
            values = tf.multiply(values, sample_weight)

        value_sum = tf.reduce_sum(values)
        with tf.control_dependencies([value_sum]):
            update_total_op = self.total.assign_add(value_sum)

        # Exit early if the reduction doesn't have a denominator.
        if self.reduction == metrics_utils.Reduction.SUM:
            return update_total_op

        # Update `count` for reductions that require a denominator.
        if self.reduction == metrics_utils.Reduction.SUM_OVER_BATCH_SIZE:
            num_values = tf.cast(tf.size(values), self._dtype)
        elif self.reduction == metrics_utils.Reduction.WEIGHTED_MEAN:
            if sample_weight is None:
                num_values = tf.cast(tf.size(values), self._dtype)
            else:
                num_values = tf.reduce_sum(sample_weight)
        else:
            raise NotImplementedError(
                f'Reduction "{self.reduction}" not implemented. Expected '
                '"sum", "weighted_mean", or "sum_over_batch_size".')

        with tf.control_dependencies([update_total_op]):
            return self.count.assign_add(num_values)
Пример #47
0
def gram_matrix(x):
    assert 3 == K.ndim(x)
    feats = K.batch_flatten(x)
    gram = K.dot(feats, K.transpose(feats))
    return gram
Пример #48
0
def total_variation_loss(x):
    assert K.ndim(x) == 4
    a = K.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, 1:, :img_height-1])
    b = K.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, :img_width-1, 1:])
    return K.sum(K.pow(a + b, 1.25))
Пример #49
0
def continuity_loss(x, img_shape):
    h, w = img_shape
    assert K.ndim(x) == 4
    a = K.square(x[:, :, :h - 1, :w - 1] - x[:, :, 1:, :w - 1])
    b = K.square(x[:, :, :h - 1, :w - 1] - x[:, :, :h - 1, 1:])
    return K.sum(K.pow(a + b, 1.25))
    def __init__(self,
                 inputs,
                 depth=None,
                 nb_dense_block=4,
                 growth_rate=12,
                 nb_filter=-1,
                 nb_layers_per_block=-1,
                 bottleneck=False,
                 reduction=0.0,
                 dropout_rate=0.0,
                 weight_decay=1e-4,
                 subsample_initial_block=False,
                 activation_conv='crelu',
                 pooling_func=['max', 'global_average'],
                 include_top=False,
                 classes=None,
                 output_activation=None,
                 *args,
                 **kwargs):
        concat_axis = -1 if K.image_data_format() == "channels_last" else 1

        if reduction != 0.0:
            assert reduction <= 1.0 and reduction > 0.0, 'reduction value must lie between 0.0 and 1.0'

        # layers in each dense block
        if isinstance(nb_layers_per_block, (list, tuple)):
            nb_layers = list(nb_layers_per_block)  # Convert tuple to list

            assert len(nb_layers) == (nb_dense_block), 'If list, nb_layer is used as provided. ' \
                'Note that list size must be (nb_dense_block)'
            final_nb_layer = nb_layers[-1]
            nb_layers = nb_layers[:-1]
        else:
            if nb_layers_per_block == -1:
                assert (
                    depth - 4
                ) % 3 == 0, 'Depth must be 3 N + 4 if nb_layers_per_block == -1'
                count = int((depth - 4) / 3)

                if bottleneck:
                    count = count // 2

                nb_layers = [count for _ in range(nb_dense_block)]
                final_nb_layer = count
            else:
                final_nb_layer = nb_layers_per_block
                nb_layers = [nb_layers_per_block] * nb_dense_block

        # compute initial nb_filter if -1, else accept users initial nb_filter
        if nb_filter <= 0:
            nb_filter = 2 * growth_rate

        # compute compression factor
        compression = 1.0 - reduction

        # Initial convolution
        if subsample_initial_block:
            initial_kernel = 7
            initial_strides = 2
        else:
            initial_kernel = 3
            initial_strides = 1

        x_complex = ComplexConv1D(
            nb_filter,
            initial_kernel,
            strides=initial_strides,
            padding='same',
            use_bias=False,
            spectral_parametrization=False,
            kernel_regularizer=keras.regularizers.l2(weight_decay))(inputs)

        if subsample_initial_block:
            x_complex = ComplexBatchNormalization(axis=concat_axis,
                                                  epsilon=1.1e-5)(x_complex)
            x_complex = layer_activation(x_complex, activation_conv)
            if pooling_func[0] == 'max':
                x_complex = ComplexMaxPooling1D(pool_size=3,
                                                strides=2,
                                                padding='same')(x_complex)
            elif pooling_func[0] == 'average':
                x_complex = ComplexAveragePooling1D(pool_size=3,
                                                    strides=2,
                                                    padding='same')(x_complex)

        # Add dense blocks
        for block_idx in range(nb_dense_block - 1):
            x_complex, nb_filter = dense1d_block(x_complex,
                                                 nb_layers[block_idx],
                                                 nb_filter,
                                                 growth_rate,
                                                 activation=activation_conv,
                                                 bottleneck=bottleneck,
                                                 dropout_rate=dropout_rate,
                                                 weight_decay=weight_decay)

            # add transition_block
            x_complex = transition1d_block(x_complex,
                                           nb_filter,
                                           activation=activation_conv,
                                           compression=compression,
                                           weight_decay=weight_decay)
            nb_filter = int(nb_filter * compression)

        # The last dense_block does not have a transition_block
        x_complex, nb_filter = dense1d_block(x_complex,
                                             final_nb_layer,
                                             nb_filter,
                                             growth_rate,
                                             activation=activation_conv,
                                             bottleneck=bottleneck,
                                             dropout_rate=dropout_rate,
                                             weight_decay=weight_decay)

        x_complex = ComplexBatchNormalization(axis=concat_axis,
                                              epsilon=1.1e-5)(x_complex)
        x_complex = layer_activation(x_complex, activation_conv)

        if include_top:
            assert classes > 0
            if pooling_func[1] == 'global_average':
                x_complex = keras.layers.GlobalAveragePooling1D(
                    name="pool5")(x_complex)
            elif pooling_func[1] == 'complex_average':
                x_complex = ComplexAveragePooling1D(name='pool5')(x_complex)
            elif pooling_func[1] == 'complex_max':
                x_complex = ComplexMaxPooling1D(name='pool5')(x_complex)
            elif pooling_func[1] == 'spectral_average':
                x_complex = SpectralPooling1D(gamma=[0.25, 0.25],
                                              name='pool5')(x_complex)

            if output_activation is None:
                output_activation = 'softmax'

            if K.ndim(x_complex) > 2:
                x_complex = keras.layers.Flatten()(x_complex)

            if output_activation.startswith('complex_'):
                output_activation = output_activation[len('complex_'):]
                x = ComplexDense(classes,
                                 activation=output_activation)(x_complex)
            else:
                x = keras.layers.Dense(classes,
                                       activation=output_activation)(x_complex)
        else:
            x = x_complex

        super(DenseNet1D, self).__init__(inputs=inputs,
                                         outputs=x,
                                         *args,
                                         **kwargs)
    def embed_input(self,
                    input_layer: 'keras.layers.Layer',
                    text_trainer: 'TextTrainer',
                    embedding_name: str="embedding"):
        """
        A combined word-and-characters representation requires some fancy footwork to do the
        embedding properly.

        This method assumes the input shape is (..., sentence_length, word_length + 1), where the
        first integer for each word in the tensor is the word index, and the remaining word_length
        entries is the character sequence.  We'll first split this into two tensors, one of shape
        (..., sentence_length), and one of shape (..., sentence_length, word_length), where the
        first is the word sequence, and the second is the character sequence for each word.  We'll
        pass the word sequence through an embedding layer, as normal, and pass the character
        sequence through a _separate_ embedding layer, then an encoder, to get a word vector out.
        We'll then concatenate the two word vectors, returning a tensor of shape
        (..., sentence_length, embedding_dim * 2).
        """
        # pylint: disable=protected-access
        # So that we end up with even embeddings across different inputs, we'll use half the
        # `embedding_size` in the given `TextTrainer`.
        embedding_size = int(text_trainer.embedding_size / 2)
        # This is happening before any masking is done, so we don't need to worry about the
        # mask_split_axis argument to VectorMatrixSplit.
        words, characters = VectorMatrixSplit(split_axis=-1)(input_layer)
        word_embedding = text_trainer._get_embedded_input(words,
                                                          embedding_size=embedding_size,
                                                          embedding_name='word_' + embedding_name,
                                                          vocab_name='words')
        character_embedding = text_trainer._get_embedded_input(characters,
                                                               embedding_size=embedding_size,
                                                               embedding_name='character_' + embedding_name,
                                                               vocab_name='characters')

        # A note about masking here: we care about the character masks when encoding a character
        # sequence, so we need the mask to be passed to the character encoder correctly.  However,
        # we _don't_ care here about whether the whole word will be masked, as the word_embedding
        # will carry that information, so the output mask returned by the TimeDistributed layer
        # here will be ignored.
        word_encoder = TimeDistributed(
                text_trainer._get_encoder(name="word", fallback_behavior="use default params"))
        # We might need to TimeDistribute this again, if our input has ndim higher than 3.
        for _ in range(3, K.ndim(characters)):
            word_encoder = TimeDistributed(word_encoder, name="timedist_" + word_encoder.name)
        word_encoding = word_encoder(character_embedding)

        merge_mode = lambda inputs: K.concatenate(inputs, axis=-1)
        def merge_shape(input_shapes):
            output_shape = list(input_shapes[0])
            output_shape[-1] += input_shapes[1][-1]
            return tuple(output_shape)
        merge_mask = lambda masks: masks[0]

        # If you're embedding multiple inputs in your model, we need the final merge layer here to
        # have a unique name each time.  In order to get a unique name, we use the name of the
        # input layer.  Except sometimes Keras adds funny things to the end of the input layer, so
        # we'll strip those off.
        input_name = input_layer.name
        if ':' in input_name:
            input_name = input_name.split(':')[0]
        if input_name.split('_')[-1].isdigit():
            input_name = '_'.join(input_name.split('_')[:-1])
        final_embedded_input = merge([word_embedding, word_encoding],
                                     mode=merge_mode,
                                     output_shape=merge_shape,
                                     output_mask=merge_mask,
                                     name='combined_word_embedding_for_' + input_name)
        return final_embedded_input
Пример #52
0
#define the step for gradient ascent
step = 0.5

#create a dictionary with each layer's name for convenience
layer_dict = dict([(layer.name, layer) for layer in model.layers])


#start with initial input, obviously
#print(model.layers[0].input)
input_img = model.layers[0].input
	#Note: the layer index would be -1 instead of 0 but this network uses the first 
	#convolutional layer as the input instead of a fully-conected

layer_output = layer_dict[layer_name].output
print(backend.int_shape(layer_output))
print(backend.ndim(layer_output))
print(backend.shape(layer_output))
#layer_output = model.layers[0].output
for n in range(0,0):
	filter_index = n
	print('Processing filter %d' % filter_index)
	start_time = time.time()
	#build a loss function that maximizes the activation of n filters of the layer considered

	#TODO: select one of these loss function depending on which layer you're using
	#loss = backend.mean(layer_output[:, filter_index]) # loss function for dense layers
	loss = backend.mean(layer_output[:,filter_index,:,:]) # for non-dense layers

	#compute gradient of input picture wrt this loss
	grads = backend.gradients(loss, input_img)[0]
	#normalize gradient to avoid values
Пример #53
0
 def call(self, x, mask=None):
     if mask is not None:
         if K.ndim(x) != K.ndim(mask):
Пример #54
0
def softmax_over_time(x):
    assert (K.ndim(x) > 2)
    e = K.exp(x - K.max(x, axis=1, keepdims=True))
    s = K.sum(e, axis=1, keepdims=True)
    return e / s
Пример #55
0
def gram_matrix(x):
    assert K.ndim(x) == 3
    features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram
Пример #56
0
def getResidualBlock(I, filter_size, featmaps, stage, block, shortcut,
                     convArgs, bnArgs, d):
    """Get residual block."""

    activation = d.act
    drop_prob = d.dropout
    nb_fmaps1, nb_fmaps2 = featmaps
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    if K.image_data_format() == 'channels_first' and K.ndim(I) != 3:
        channel_axis = 1
    else:
        channel_axis = -1

    if d.model == "real":
        O = BatchNormalization(name=bn_name_base + '_2a', **bnArgs)(I)
    elif d.model == "complex":
        O = ComplexBN(name=bn_name_base + '_2a', **bnArgs)(I)
    O = Activation(activation)(O)

    if shortcut == 'regular' or d.spectral_pool_scheme == "nodownsample":
        if d.model == "real":
            O = Conv2D(nb_fmaps1,
                       filter_size,
                       name=conv_name_base + '2a',
                       **convArgs)(O)
        elif d.model == "complex":
            O = ComplexConv2D(nb_fmaps1,
                              filter_size,
                              name=conv_name_base + '2a',
                              **convArgs)(O)
    elif shortcut == 'projection':
        if d.spectral_pool_scheme == "proj":
            O = applySpectralPooling(O, d)
        if d.model == "real":
            O = Conv2D(nb_fmaps1,
                       filter_size,
                       name=conv_name_base + '2a',
                       strides=(2, 2),
                       **convArgs)(O)
        elif d.model == "complex":
            O = ComplexConv2D(nb_fmaps1,
                              filter_size,
                              name=conv_name_base + '2a',
                              strides=(2, 2),
                              **convArgs)(O)

    if d.model == "real":
        O = BatchNormalization(name=bn_name_base + '_2b', **bnArgs)(O)
        O = Activation(activation)(O)
        O = Conv2D(nb_fmaps2,
                   filter_size,
                   name=conv_name_base + '2b',
                   **convArgs)(O)
    elif d.model == "complex":
        O = ComplexBN(name=bn_name_base + '_2b', **bnArgs)(O)
        O = Activation(activation)(O)
        O = ComplexConv2D(nb_fmaps2,
                          filter_size,
                          name=conv_name_base + '2b',
                          **convArgs)(O)

    if shortcut == 'regular':
        O = Add()([O, I])
    elif shortcut == 'projection':
        if d.spectral_pool_scheme == "proj":
            I = applySpectralPooling(I, d)
        if d.model == "real":
            X = Conv2D(
                nb_fmaps2, (1, 1),
                name=conv_name_base + '1',
                strides=(2, 2) if d.spectral_pool_scheme != "nodownsample" else
                (1, 1),
                **convArgs)(I)
            O = Concatenate(channel_axis)([X, O])
        elif d.model == "complex":
            X = ComplexConv2D(
                nb_fmaps2, (1, 1),
                name=conv_name_base + '1',
                strides=(2, 2) if d.spectral_pool_scheme != "nodownsample" else
                (1, 1),
                **convArgs)(I)

            O_real = Concatenate(channel_axis)([GetReal()(X), GetReal()(O)])
            O_imag = Concatenate(channel_axis)([GetImag()(X), GetImag()(O)])
            O = Concatenate(1)([O_real, O_imag])

    return O
Пример #57
0
def total_variation_loss(yTrue, yPred):
    assert K.ndim(yTrue) == 4
    diff = yTrue - yPred
    a = K.square(diff[:, :res - 1, :res - 1, :] - diff[:, 1:, :res - 1, :])
    b = K.square(diff[:, :res - 1, :res - 1, :] - diff[:, :res - 1, 1:, :])
    return K.mean(K.pow(a + b, 2)) # tweak the power?
    def call(self, inputs, training=None):
        assert self.built, 'Layer must be built before being called'
        input_shape = K.int_shape(inputs)

        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]

        mean_batch, var_batch = _moments(inputs,
                                         reduction_axes,
                                         shift=None,
                                         keep_dims=False)
        std_batch = (K.sqrt(var_batch + self.epsilon))

        r = std_batch / (K.sqrt(self.running_variance + self.epsilon))
        r = K.stop_gradient(K.clip(r, 1 / self.r_max, self.r_max))

        d = (mean_batch - self.running_mean) / K.sqrt(self.running_variance +
                                                      self.epsilon)
        d = K.stop_gradient(K.clip(d, -self.d_max, self.d_max))

        if sorted(reduction_axes) == range(K.ndim(inputs))[:-1]:
            x_normed_batch = (inputs - mean_batch) / std_batch
            x_normed = (x_normed_batch * r + d) * self.gamma + self.beta
        else:
            # need broadcasting
            broadcast_mean = K.reshape(mean_batch, broadcast_shape)
            broadcast_std = K.reshape(std_batch, broadcast_shape)
            broadcast_r = K.reshape(r, broadcast_shape)
            broadcast_d = K.reshape(d, broadcast_shape)
            broadcast_beta = K.reshape(self.beta, broadcast_shape)
            broadcast_gamma = K.reshape(self.gamma, broadcast_shape)

            x_normed_batch = (inputs - broadcast_mean) / broadcast_std
            x_normed = (x_normed_batch * broadcast_r +
                        broadcast_d) * broadcast_gamma + broadcast_beta

        # explicit update to moving mean and standard deviation
        mean_update = K.moving_average_update(self.running_mean, mean_batch,
                                              self.momentum)
        variance_update = K.moving_average_update(self.running_variance,
                                                  std_batch**2, self.momentum)
        self.add_update([mean_update, variance_update], inputs)

        # update r_max and d_max
        r_val = self.r_max_value / (1 +
                                    (self.r_max_value - 1) * K.exp(-self.t))
        d_val = (self.d_max_value /
                 (1 + ((self.d_max_value / 1e-3) - 1) * K.exp(-(2 * self.t))))

        self.add_update([
            K.update(self.r_max, r_val),
            K.update(self.d_max, d_val),
            K.update_add(self.t, self.t_delta_tensor)
        ], inputs)

        if training in {0, False}:
            return x_normed
        else:

            def normalize_inference():
                if sorted(reduction_axes) == list(range(K.ndim(inputs)))[:-1]:
                    x_normed_running = K.batch_normalization(
                        inputs,
                        self.running_mean,
                        self.running_variance,
                        self.beta,
                        self.gamma,
                        epsilon=self.epsilon)

                    return x_normed_running
                else:
                    # need broadcasting
                    broadcast_running_mean = K.reshape(self.running_mean,
                                                       broadcast_shape)
                    broadcast_running_std = K.reshape(self.running_variance,
                                                      broadcast_shape)
                    broadcast_beta = K.reshape(self.beta, broadcast_shape)
                    broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
                    x_normed_running = K.batch_normalization(
                        inputs,
                        broadcast_running_mean,
                        broadcast_running_std,
                        broadcast_beta,
                        broadcast_gamma,
                        epsilon=self.epsilon)

                    return x_normed_running

            # pick the normalized form of inputs corresponding to the training phase
            # for batch renormalization, inference time remains same as batchnorm
            x_normed = K.in_train_phase(x_normed,
                                        normalize_inference,
                                        training=training)

            return x_normed
Пример #59
0
    def call(self, inputs, mask=None):
        if type(inputs) is not list or len(inputs) != 2:
            raise Exception('terminal gru runs on list of length 2')

        X = inputs[0]
        true_seq = inputs[1]

        if self.stateful:
            initial_states = self.states
        else:
            initial_states = self.get_initial_states(X)

        # preprocessing makes input into right form for gpu/cpu settings
        # from original GRU code
        recurrent_dropout_constants = self.get_constants(X)[0]
        preprocessed_input = self.preprocess_input(X)

        #################
        ## Section for index matching of true inputs
        #################
        #  Basically, we need to add an extra timestep of just 0s for predicting the first timestep output

        axes = [1, 0] + list(range(2, K.ndim(true_seq)))

        true_seq = K.permute_dimensions(true_seq, axes)
        zeros = K.zeros_like(true_seq[:1, :, :])

        # add a column of zeros, remove last element
        true_seq = K.concatenate(
            [zeros, true_seq[:K.int_shape(true_seq)[0] - 1, :, :]], axis=0)
        shifted_raw_inputs = K.permute_dimensions(true_seq, axes)

        ## concatenate to have same dimension as preprocessed inputs 3xoutput_dim
        # only for self.implementation = 0?
        shifted_raw_inputs = K.concatenate(
            [shifted_raw_inputs, shifted_raw_inputs, shifted_raw_inputs],
            axis=2)

        all_inputs = K.stack([preprocessed_input, shifted_raw_inputs])
        num_dim = K.ndim(all_inputs)
        axes = [1, 2, 0] + list(range(3, num_dim))
        all_inputs = K.permute_dimensions(all_inputs, axes)

        # If not using true sequence, want to feed in a tensor of zeros instead.
        zeros_input_seq = K.zeros_like(preprocessed_input)
        test_phase_all_inputs = K.stack([preprocessed_input, zeros_input_seq])
        test_phase_all_inputs = K.permute_dimensions(test_phase_all_inputs,
                                                     axes)

        all_inputs = K.in_train_phase(all_inputs, test_phase_all_inputs)

        last_output, outputs, states = sampled_rnn(
            self.step,
            all_inputs,
            initial_states,
            self.units,
            self.rnd_seed,
            go_backwards=self.go_backwards,
            rec_dp_constants=recurrent_dropout_constants,
            mask=None)

        if self.return_sequences:
            return outputs
        else:
            return last_output
def repeat_(x, k):
    tile_factor = [1, k] + [1] * (kb.ndim(x) - 1)
    return kb.tile(x[:, None, :], tile_factor)