def call(self, inputs, mask=None):
     if self.force_path:
         output = self._drop_path(inputs)
     else:
         output = K.in_train_phase(self._drop_path(inputs),
                                   self._ave(inputs))
     return output
Пример #2
0
def time_distributed_dense(x, w, b=None, dropout=None,
                           input_dim=None, output_dim=None, timesteps=None, activation='linear'):
    '''Apply y.w + b for every temporal slice y of x.
    '''
    activation = activations.get(activation)

    if not input_dim:
        # won't work with TensorFlow
        input_dim = K.shape(x)[2]
    if not timesteps:
        # won't work with TensorFlow
        timesteps = K.shape(x)[1]
    if not output_dim:
        # won't work with TensorFlow
        output_dim = K.shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x)

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))

    x = K.dot(x, w)
    if b:
        x = x + b
    # reshape to 3D tensor
    x = K.reshape(activation(x), (-1, timesteps, output_dim))
    return x
Пример #3
0
    def call(self, x, mask=None):
        input_shape = self.input_spec[0].shape

        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]

        # case: train mode (uses stats of the current batch)
        mean = K.mean(x, axis=reduction_axes)
        brodcast_mean = K.reshape(mean, broadcast_shape)
        std = K.mean(K.square(x - brodcast_mean) + self.epsilon, axis=reduction_axes)
        std = K.sqrt(std)
        brodcast_std = K.reshape(std, broadcast_shape)
        mean_update = self.momentum * self.running_mean + (1 - self.momentum) * mean
        std_update = self.momentum * self.running_std + (1 - self.momentum) * std
        self.updates = [(self.running_mean, mean_update),
                        (self.running_std, std_update)]
        x_normed = (x - brodcast_mean) / (brodcast_std + self.epsilon)

        # case: test mode (uses running averages)
        brodcast_running_mean = K.reshape(self.running_mean, broadcast_shape)
        brodcast_running_std = K.reshape(self.running_std, broadcast_shape)
        x_normed_running = ((x - brodcast_running_mean) / (brodcast_running_std + self.epsilon))

        # pick the normalized form of x corresponding to the training phase
        x_normed = K.in_train_phase(x_normed, x_normed_running)
        out = K.reshape(self.gamma, broadcast_shape) * x_normed + K.reshape(self.beta, broadcast_shape)

        return out
Пример #4
0
 def call(self, inputs, training=None):
     if self.is_mc_dropout:
         return self.layer.call(self.concrete_dropout(inputs))
     else:
         def relaxed_dropped_inputs():
             return self.layer.call(self.concrete_dropout(inputs))
         return K.in_train_phase(relaxed_dropped_inputs,
                                 self.layer.call(inputs),
                                 training=training)
Пример #5
0
    def sampling(args):
        z_mean, z_log_var = args

        epsilon = K.random_normal_variable(shape=(params['batch_size'], params['hidden_dim']),
                                           mean=0., scale=1.)
        # insert kl loss here

        z_rand = z_mean + K.exp(z_log_var / 2) * kl_loss_var * epsilon
        return K.in_train_phase(z_rand, z_mean)
Пример #6
0
    def call(self, inputs, training=None):
        if 0. < self.rate < 1.:
            noise_shape = self._get_noise_shape(inputs)
            def dropped_inputs():
                return K.dropout(inputs, self.rate, noise_shape,
                                 seed=self.seed)
            return K.in_train_phase(dropped_inputs, inputs, training=training)
            #return tf.cond(tf.squeeze(self.dropoutEnabled) < tf.constant(1), lambda: inputs, lambda: dropped_inputs())

        return inputs
Пример #7
0
 def get_constants(self, x):
     constants = []
     if 0 < self.dropout_U < 1:
         ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
         ones = K.concatenate([ones] * self.output_dim, 1)
         B_U = K.in_train_phase(K.dropout(ones, self.dropout_U), ones)
         constants.append(B_U)
     else:
         constants.append(K.cast_to_floatx(1.))
     if self.consume_less == 'cpu' and 0 < self.dropout_W < 1:
         input_shape = self.input_spec[0].shape
         input_dim = input_shape[-1]
         ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
         ones = K.concatenate([ones] * input_dim, 1)
         B_W = K.in_train_phase(K.dropout(ones, self.dropout_W), ones)
         constants.append(B_W)
     else:
         constants.append(K.cast_to_floatx(1.))
     return constants
 def call(self, x, mask=None):
     if 0. < self.dropout < 1.:
         retain_p = 1. - self.dropout
         B = K.random_binomial((self.input_dim,), p=retain_p) * (1. / retain_p)
         B = K.expand_dims(B)
         W = K.in_train_phase(self.W * B, self.W)
     else:
         W = self.W
     out = K.gather(W, x)
     return out
 def dot_product_attention(self, x, seq_len=None, dropout=0.1, training=None):
     q, k, v = x
     logits = tf.matmul(q, k, transpose_b=True)
     if self.bias:
         logits += self.b
     if seq_len is not None:
         logits = self.mask_logits(logits, seq_len)
     weights = tf.nn.softmax(logits, name="attention_weights")
     weights = K.in_train_phase(K.dropout(weights, dropout), weights, training=training)
     x = tf.matmul(weights, v)
     return x
Пример #10
0
 def call(self, x, mask=None):
     if self.normalize:
         mean, std = self._get_mean_and_std(x)
         broadcast_shape = [1] * K.ndim(x)
         broadcast_shape[self.axis] = K.shape(x)[self.axis]
         broadcast_mean = K.reshape(mean, broadcast_shape)
         broadcast_std = K.reshape(std, broadcast_shape)
         return K.in_train_phase((x - broadcast_mean) /
                                 (broadcast_std + K.epsilon()), x)
     else:
         return x * 1.
Пример #11
0
    def get_constants(self, x):
        constants = []
        if 0 < self.dropout_U < 1:
            ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
            ones = K.concatenate([ones] * self.output_dim, 1)
            B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones) for _ in range(3)]
            constants.append(B_U)
        else:
            constants.append([K.cast_to_floatx(1.) for _ in range(3)])

        if 0 < self.dropout_W < 1:
            input_shape = self.input_spec[0].shape
            input_dim = input_shape[-1]
            ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
            ones = K.concatenate([ones] * input_dim, 1)
            B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(3)]
            constants.append(B_W)
        else:
            constants.append([K.cast_to_floatx(1.) for _ in range(3)])
        return constants
Пример #12
0
 def call(self, x, mask=None):
     if 0. < self.dropout < 1.:
         retain_p = 1. - self.dropout
         B = K.random_binomial((self.input_dim,), p=retain_p) * (1. / retain_p)
         B = K.expand_dims(B)
         W = K.in_train_phase(self.W * B, self.W)
     else:
         W = self.W
     W_ = T.concatenate([self.zeros_vector, W], axis=0)
     out = K.gather(W_, x)
     return out
Пример #13
0
	def __call__(self, loss):
		if not hasattr(self, 'layer'):
			raise Exception('Need to call `set_layer` on '
							'ActivityRegularizer instance '
							'before calling the instance.')
		regularized_loss = loss
		for i in range(len(self.layer.inbound_nodes)):
			output = K.sigmoid(0.1 * self.layer.get_output_at(i))
			#output = self.layer.get_output_at(i)
			p_hat = K.mean(K.abs(output))
			regularized_loss += self.l * kl_divergence(self.p, p_hat)
		return K.in_train_phase(regularized_loss, loss)
Пример #14
0
    def call(self, x, mask=None):
        if isinstance(x, list): 
            x,_ = x
        if mask is not None and isinstance(mask, list):
            mask,_ = mask
        if 0. < self.dropout < 1.:
            retain_p = 1. - self.dropout
            dims = self.W._keras_shape[:-1]
            B = K.random_binomial(dims, p=retain_p) * (1. / retain_p)
            B = K.expand_dims(B)
            W = K.in_train_phase(self.W * B, self.W)
        else:
            W = self.W
        
        if self.mode == 'matrix':
            return K.gather(W,x)
        elif self.mode == 'tensor':
            # quick and dirty: only allowing for 3dim inputs when it's tensor mode
            assert K.ndim(x) == 3
            # put sequence on first; gather; take diagonal across shared batch dimension
            # in other words, W is (B, S, F)
            # incoming x is (B, S, A)
            inds = K.arange(self.W._keras_shape[0])
            #out = K.gather(K.permute_dimensions(W, (1,0,2)), x).diagonal(axis1=0, axis2=3)
            #return K.permute_dimensions(out, (3,0,1,2))
            ### method above doesn't do grads =.=
            # tensor abc goes to bac, indexed onto with xyz, goes to xyzac, 
            # x == a, so shape to xayzc == xxyzc
            # take diagonal on first two: xyzc 
            #out = K.colgather()
            out = K.gather(K.permute_dimensions(W, (1,0,2)), x) 
            out = K.permute_dimensions(out, (0,3,1,2,4))
            out = K.gather(out, (inds, inds))
            return out
        else:
            raise Exception('sanity check. should not be here.')

        #all_dims = T.arange(len(self.W._keras_shape))
        #first_shuffle = [all_dims[self.embed_dim]] + all_dims[:self.embed_dim] + all_dims[self.embed_dim+1:]
        ## 1. take diagonal from 0th to
        ## chang eof tactics
        ## embed on time or embed on batch. that's all I'm supporting.  
        ## if it's embed on time, then, x.ndim+1 is where batch will be, and is what
        ## i need to take the diagonal over. 
        ## now dim shuffle the xdims + 1 to the front.
        #todo: get second shuffle or maybe find diagonal calculations
        #out = K.gather(W, x)
        #return out

        ### reference
        #A = S(np.arange(60).reshape(3,4,5))
        #x = S(np.random.randint(0, 4, (3,4,10)))
        #x_emb = A.dimshuffle(1,0,2)[x].dimshuffle(0,3,1,2,4)[T.arange(A.shape[0]), T.arange(A.shape[0])]
Пример #15
0
 def __call__(self, loss):
     if not hasattr(self, 'layer'):
         raise Exception('Need to call `set_layer` on '
                         'MaskRegularizer instance '
                         'before calling the instance.')
     min_tag_size = self.mask_size**2 * self.min_covered
     factor = min_tag_size / self.max_loss
     out = self.layer.output
     out_sum = out.sum(axis=(1, 2, 3))
     reg_loss = K.switch(out_sum <= min_tag_size,
                         factor*(out_sum - min_tag_size)**2, 0)
     return K.in_train_phase(loss + reg_loss.mean(), loss)
    def __call__(self, loss):
        W = self.p
        WW = T.dot(W.T,W)
        dim1, dim2 = WW.shape.eval() #The number of neurons in the layer
        k = self.k
        o = np.ones(dim1) #initial values for the dominant eigenvector

        #POWER METHOD FOR APPROXIMATING THE DOMINANT EIGENVECTOR (9 ITERATIONS):
        domineigvec = T.dot(WW,T.dot(WW,T.dot(WW,T.dot(WW,T.dot(WW,T.dot(WW,T.dot(WW,T.dot(WW,T.dot(WW,o)))))))))

        WWd = T.dot(WW,domineigvec)
        domineigval = T.dot(WWd,domineigvec)/T.dot(domineigvec,domineigvec) #THE CORRESPONDING DOMINANT EIGENVALUE
        regularized_loss = loss + (domineigval ** 0.5) * self.k #multiplied by the given regularization gain
        return K.in_train_phase(regularized_loss, loss)
Пример #17
0
 def call(self, x, mask=None):
     if K.dtype(x) != 'int32':
         x = K.cast(x, 'int32')
     if 0. < self.dropout < 1.:
         retain_p = 1. - self.dropout
         B = K.random_binomial((self.input_dim,), p=retain_p) * (1. / retain_p)
         B = K.expand_dims(B)
         W = K.in_train_phase(self.W * B, self.W)
     else:
         W = self.W
     denorm = K.sum(W, axis=0)
     W = W / denorm
     out = K.gather(W, x)
     return out
Пример #18
0
 def __call__(self, loss):
     if not hasattr(self, 'layer'):
         raise Exception('Need to call `set_layer` on '
                         'LowFrequenciesRegularizer instance '
                         'before calling the instance. ')
     regularized_loss = K.zeros_like(loss)
     print(self.layer.inbound_nodes)
     for i in range(len(self.layer.inbound_nodes)):
         print(i)
         out = self.layer.get_output_at(i)
         low_freq = gaussian_filter_2d(out, self.sigma)
         regularized_loss += K.sum(K.abs(low_freq)) * self.factor
     print(regularized_loss)
     return K.in_train_phase(loss + regularized_loss, loss)
Пример #19
0
    def get_constants(self, inputs, training=None):
        constants = []
        if 0. < self.recurrent_dropout < 1.:
            ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
            ones = K.tile(ones, (1, self.units))

            def dropped_inputs():
                return K.dropout(ones, self.recurrent_dropout)

            rec_dp_mask = [K.in_train_phase(dropped_inputs,
                                            ones,
                                            training=training) for _ in range(3)]
            constants.append(rec_dp_mask)
        else:
            constants.append([K.cast_to_floatx(1.) for _ in range(3)])
        return constants
    def __call__(self, loss):
        power = 9  # number of iterations of the power method
        W = self.p
        WW = K.dot(K.transpose(W), W)
        dim1, dim2 = K.eval(K.shape(WW))
        k = self.k
        o = np.ones(dim1)  # initial values for the dominant eigenvector

        # power method for approximating the dominant eigenvector:
        domin_eigenvect = K.dot(WW, o)
        for n in range(power - 1):
            domin_eigenvect = K.dot(WW, domin_eigenvect)    
        
        WWd = K.dot(WW, domin_eigenvect)
        domin_eigenval = K.dot(WWd, domin_eigenvect) / K.dot(domin_eigenvect, domin_eigenvect)  # the corresponding dominant eigenvalue
        regularized_loss = loss + (domin_eigenval ** 0.5) * self.k  # multiplied by the given regularization gain
        return K.in_train_phase(regularized_loss, loss)
Пример #21
0
 def __call__(self, loss):
     if not hasattr(self, 'p'):
         raise Exception('Need to call `set_param` on '
                         'WeightRegularizer instance '
                         'before calling the instance. '
                         'Check that you are not passing '
                         'a WeightRegularizer instead of an '
                         'ActivityRegularizer '
                         '(i.e. activity_regularizer="l2" instead '
                         'of activity_regularizer="activity_l2".')
     regularized_loss = loss
     p = self.p
     if self.p_mask is not None:
         p = self.p * self.p_mask
     if self.l1:
         regularized_loss += K.mean(K.abs(p)) * self.l1
     if self.l2:
         regularized_loss += K.mean(K.square(p)) * self.l2
     return K.in_train_phase(regularized_loss, loss)
Пример #22
0
def time_distributed_dense(x, w, b=None, dropout=None,
                            input_dim=None, output_dim=None,
                            timesteps=None, training=None):
    """Apply `y . w + b` for every temporal slice y of x.
    # Arguments
        x: input tensor.
        w: weight matrix.
        b: optional bias vector.
        dropout: wether to apply dropout (same dropout mask
            for every temporal slice of the input).
        input_dim: integer; optional dimensionality of the input.
        output_dim: integer; optional dimensionality of the output.
        timesteps: integer; optional number of timesteps.
        training: training phase tensor or boolean.
    # Returns
        Output tensor.
    """
    if not input_dim:
        input_dim = K.shape(x)[2]
    if not timesteps:
        timesteps = K.shape(x)[1]
    if not output_dim:
        output_dim = K.shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training)

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))
    x = K.dot(x, w)
    if b is not None:
        x = K.bias_add(x, b)
    # reshape to 3D tensor
    if K.backend() == 'tensorflow':
        x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
        x.set_shape([None, None, output_dim])
    else:
        x = K.reshape(x, (-1, timesteps, output_dim))
    return x
    def __call__(self, loss):
        if not hasattr(self, 'p'):
            raise Exception('Need to call `set_param` on '
                            'WeightRegularizer instance '
                            'before calling the instance. '
                            'Check that you are not passing '
                            'a WeightRegularizer instead of an '
                            'ActivityRegularizer '
                            '(i.e. activity_regularizer="l2" instead '
                            'of activity_regularizer="activity_l2".')

        regularized_loss = loss + K.sum(K.abs(self.p)) * self.l1
        regularized_loss += K.sum(K.square(self.p)) * self.l2
        #
        out_dim = self.p.shape.eval()[-1]
        diff_mat = np.eye(out_dim) - np.eye(out_dim, k=1)
        diff_mat[-1, -1] = 0
        d = K.variable(diff_mat)
        regularized_loss += K.sum(K.square(K.dot(self.p, d))) * self.m
        return K.in_train_phase(regularized_loss, loss)
Пример #24
0
    def call(self, x):
        # todo: only optionally apply sigmoid
        # todo: apply viterbi during inference
        x = Activation(K.sigmoid)(x)

        # using K.in_train_phase results in both if and else conditions being
        # computed, which in this case is very expensive. instead, tf.cond
        # is used. Even so, if and else conditions must be wrapped in a lambda
        # to ensure that they are not computed unless that path is chosen.
        if self.viterbi_inference:
            # include this in the graph so that keras knows that the learning phase
            # variable needs to be passed into tensorflows session run.
            x = K.in_train_phase(x, x)

            return Lambda(lambda x: tf.cond(
                K.learning_phase(),
                lambda: self.hmm.forward_backward(x)[0],
                lambda: self.hmm.viterbi_decode_batched(x, onehot=True)[0],
            ))(x)
        else:
            return Lambda(lambda x: self.hmm.forward_backward(x)[0])(x)
Пример #25
0
 def call(self, x):
     if 0. < self.prob < 1.:
         self.layer.kernel = K.in_train_phase(K.dropout(self.layer.kernel, self.prob), self.layer.kernel)
         self.layer.bias = K.in_train_phase(K.dropout(self.layer.bias, self.prob), self.layer.bias)
     return self.layer.call(x)
 def call(self, x, mask=None, training=None):
     x, residual = x
     pred = tf.random_uniform([]) < self.dropout
     x_train = tf.cond(pred, lambda: residual, lambda: tf.nn.dropout(x, 1.0 - self.dropout) + residual)
     x_test = x + residual
     return K.in_train_phase(x_train, x_test, training=training)
Пример #27
0
 def get_yj_means(self):
     return K.transpose(K.in_train_phase(self.mj, self.mjr))
Пример #28
0
 def call(self, inputs, training=None):
     nx = K.random_normal(K.shape(inputs))
     return K.in_train_phase(inputs, nx)
Пример #29
0
    def call(self, x, mask=None):
        if self.mode == 0 or self.mode == 2:
            assert self.built, 'Layer must be built before being called'
            input_shape = K.int_shape(x)

            reduction_axes = list(range(len(input_shape)))
            del reduction_axes[self.axis]
            broadcast_shape = [1] * len(input_shape)
            broadcast_shape[self.axis] = input_shape[self.axis]

            mean_batch, var_batch = _moments(x,
                                             reduction_axes,
                                             shift=None,
                                             keep_dims=False)
            std_batch = (K.sqrt(var_batch + self.epsilon))

            r_max_value = K.get_value(self.r_max)
            r = std_batch / (K.sqrt(self.running_std + self.epsilon))
            r = K.stop_gradient(K.clip(r, 1 / r_max_value, r_max_value))

            d_max_value = K.get_value(self.d_max)
            d = (mean_batch - self.running_mean) / K.sqrt(self.running_std +
                                                          self.epsilon)
            d = K.stop_gradient(K.clip(d, -d_max_value, d_max_value))

            if sorted(reduction_axes) == range(K.ndim(x))[:-1]:
                x_normed_batch = (x - mean_batch) / std_batch
                x_normed = (x_normed_batch * r + d) * self.gamma + self.beta
            else:
                # need broadcasting
                broadcast_mean = K.reshape(mean_batch, broadcast_shape)
                broadcast_std = K.reshape(std_batch, broadcast_shape)
                broadcast_r = K.reshape(r, broadcast_shape)
                broadcast_d = K.reshape(d, broadcast_shape)
                broadcast_beta = K.reshape(self.beta, broadcast_shape)
                broadcast_gamma = K.reshape(self.gamma, broadcast_shape)

                x_normed_batch = (x - broadcast_mean) / broadcast_std
                x_normed = (x_normed_batch * broadcast_r +
                            broadcast_d) * broadcast_gamma + broadcast_beta

            # explicit update to moving mean and standard deviation
            self.add_update([
                K.moving_average_update(self.running_mean, mean_batch,
                                        self.momentum),
                K.moving_average_update(self.running_std, std_batch**2,
                                        self.momentum)
            ], x)

            # update r_max and d_max
            r_val = self.r_max_value / (
                1 + (self.r_max_value - 1) * K.exp(-self.t))
            d_val = self.d_max_value / (1 + (
                (self.d_max_value / 1e-3) - 1) * K.exp(-(2 * self.t)))

            self.add_update([
                K.update(self.r_max, r_val),
                K.update(self.d_max, d_val),
                K.update_add(self.t, K.variable(np.array([self.t_delta])))
            ], x)

            if self.mode == 0:
                if sorted(reduction_axes) == range(K.ndim(x))[:-1]:
                    x_normed_running = K.batch_normalization(
                        x,
                        self.running_mean,
                        self.running_std,
                        self.beta,
                        self.gamma,
                        epsilon=self.epsilon)
                else:
                    # need broadcasting
                    broadcast_running_mean = K.reshape(self.running_mean,
                                                       broadcast_shape)
                    broadcast_running_std = K.reshape(self.running_std,
                                                      broadcast_shape)
                    broadcast_beta = K.reshape(self.beta, broadcast_shape)
                    broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
                    x_normed_running = K.batch_normalization(
                        x,
                        broadcast_running_mean,
                        broadcast_running_std,
                        broadcast_beta,
                        broadcast_gamma,
                        epsilon=self.epsilon)

                # pick the normalized form of x corresponding to the training phase
                # for batch renormalization, inference time remains same as batchnorm
                x_normed = K.in_train_phase(x_normed, x_normed_running)

        elif self.mode == 1:
            # sample-wise normalization
            m = K.mean(x, axis=self.axis, keepdims=True)
            std = K.sqrt(
                K.var(x, axis=self.axis, keepdims=True) + self.epsilon)
            x_normed_batch = (x - m) / (std + self.epsilon)

            r_max_value = K.get_value(self.r_max)
            r = std / (self.running_std + self.epsilon)
            r = K.stop_gradient(K.clip(r, 1 / r_max_value, r_max_value))

            d_max_value = K.get_value(self.d_max)
            d = (m - self.running_mean) / (self.running_std + self.epsilon)
            d = K.stop_gradient(K.clip(d, -d_max_value, d_max_value))

            x_normed = ((x_normed_batch * r) + d) * self.gamma + self.beta

            # update r_max and d_max
            t_val = K.get_value(self.t)
            r_val = self.r_max_value / (
                1 + (self.r_max_value - 1) * np.exp(-t_val))
            d_val = self.d_max_value / (1 + (
                (self.d_max_value / 1e-3) - 1) * np.exp(-(2 * t_val)))
            t_val += float(self.t_delta)

            self.add_update([
                K.update(self.r_max, r_val),
                K.update(self.d_max, d_val),
                K.update(self.t, t_val)
            ], x)

        return x_normed
Пример #30
0
    def step(self, h, states):
        '''
        receives inputs for a time step
        @inp : h - [previous_layer_input, true_input_for_previous_timestep] at train time
               or  [previous_layer_input, zeros] at test time
        @inp : states - a dictionary, contains the following
            - 'initial_states' - state vector
                 - At train time, this includes the true input sequence for the given time step, in addition to the state for the previous time step.
                 - At test time,
            - 'random_cutoff_prob' - random cutoff matrix used for sampling at test time
            - 'rec_dp_mask' - for use with dropout (not tested - may break)

        @return: output - raw output, unsampled
        @return: final_output - output that has been sampled in test case

        '''

        ################
        # Parsing the states vector
        ################
        initial_states = states['initial_states']
        random_cutoff_vec = states['random_cutoff_prob']

        if self.recurrent_dropout > 0:
            rec_dp_mask = states['rec_dp_mask']
        else:
            rec_dp_mask = np.array([1., 1., 1., 1.], dtype='float32')

        h_tm1 = initial_states[0][:1, :, :]

        def teacher_forced(h, states):
            # switching from (batch_size, previous_layer_input|true_input, output_dim)
            #    to ( previous_layer_input|true_input, batch_size, output_dim)
            axes = [1, 0] + list(range(2, K.ndim(h)))
            h = K.permute_dimensions(h, axes)

            prev_layer_input = h[0:1, :, :]
            true_input = h[1:, :, :self.units]

            # this should correspond  to true input
            prev_sampled_output = true_input

            if self.implementation == 0:
                x_z = prev_layer_input[0, :, :self.units]
                x_r = prev_layer_input[0, :, self.units: 2 * self.units]
                x_h = prev_layer_input[0, :, 2 * self.units:]
            else:
                raise ValueError('Implementation type ' + self.implementation + ' is invalid')

            z = self.recurrent_activation(x_z + K.dot(h_tm1 * rec_dp_mask[0],
                                                      self.recurrent_kernel_z))
            r = self.recurrent_activation(x_r + K.dot(h_tm1 * rec_dp_mask[1],
                                                      self.recurrent_kernel_r))

            hh = self.activation(x_h +
                                 K.dot(r * h_tm1 * rec_dp_mask[2],
                                       self.recurrent_kernel_h) +
                                 K.dot(r * prev_sampled_output, self.recurrent_kernel_y))

            output = z * h_tm1 + (1. - z) * hh

            return K.stack([output, output])

        def free_running(h, states):

            prev_generated_output = initial_states[0][1:, :, :]
            prev_sampled_output = prev_generated_output

            # switching from (batch_size, previous_layer_input|true_input, output_dim)
            #    to ( previous_layer_input|true_input, batch_size, output_dim)
            axes = [1, 0] + list(range(2, K.ndim(h)))
            h = K.permute_dimensions(h, axes)

            prev_layer_input = h[0:1, :, :]

            if self.implementation == 0:
                x_z = prev_layer_input[0, :, :self.units]
                x_r = prev_layer_input[0, :, self.units: 2 * self.units]
                x_h = prev_layer_input[0, :, 2 * self.units:]

            z = self.recurrent_activation(x_z + K.dot(h_tm1 * rec_dp_mask[0],
                                                      self.recurrent_kernel_z))
            r = self.recurrent_activation(x_r + K.dot(h_tm1 * rec_dp_mask[1],
                                                      self.recurrent_kernel_r))

            hh = self.activation(x_h +
                                 K.dot(r * h_tm1 * rec_dp_mask[2],
                                       self.recurrent_kernel_h) +
                                 K.dot(r * prev_sampled_output, self.recurrent_kernel_y))

            output = z * h_tm1 + (1. - z) * hh

            final_output = self.output_sampling(output, random_cutoff_vec)

            return K.stack([output, final_output])

        output_2d_tensor = K.in_train_phase(teacher_forced(h, states),
                                            free_running(h, states))

        output_2d_tensor = K.squeeze(output_2d_tensor, 1)

        return output_2d_tensor, [output_2d_tensor]
Пример #31
0
    def call(self, inputs, mask=None, training=None, initial_state=None):
        # input shape: `(samples, time (padded with zeros), input_dim)`
        # note that the .build() method of subclasses MUST define
        # self.input_spec and self.state_spec with complete input shapes.

        # input for training [aux_softmax, ground thruth, dialogue act vector]
        input_length = K.int_shape(inputs[0])[1]
        input_list = inputs

        if self.semantic_condition and self.condition_on_ptm1 and self.generation_only:
            #takes orig_input while training and dialogue act for conditioning
            inputs = input_list[0]
            initial_state = self.get_initial_state(inputs)
            constants = self.get_constants(inputs, training=None)
        elif not self.generation_only and self.semantic_condition and self.condition_on_ptm1:
            #takes the aux the orig input -1 and the dialogue act while training, while testing the o-1 is replaced by ptm1
            aux_inputs = concatenate(inputs=input_list[:2])
            initial_state = self.get_initial_state(aux_inputs)
            constants = self.get_constants(aux_inputs, training=None)
            inputs = K.in_train_phase(aux_inputs, input_list[0])
        elif not self.generation_only and not self.semantic_condition and self.condition_on_ptm1:
            # takes the aux the orig input -1 and the dialogue act while training, while testing the o-1 is replaced by ptm1
            aux_inputs = concatenate(inputs=input_list[:2])
            initial_state = self.get_initial_state(aux_inputs)
            constants = self.get_constants(aux_inputs, training=None)
            inputs = K.in_train_phase(aux_inputs, input_list[0])
        elif not self.generation_only and not self.semantic_condition and not self.condition_on_ptm1:
            #takes aux input for train and testing (vanilla lstm)
            inputs = input_list
            initial_state = self.get_initial_state(inputs)
            constants = self.get_constants(inputs, training=None)
        else:
            inputs = input_list[0]
            initial_state = self.get_initial_state(inputs)
            constants = self.get_constants(inputs, training=None)

        if self.semantic_condition:
            dialogue_act = input_list[-1]
            initial_state = initial_state + [dialogue_act]
            sc_constants = self.get_sc_constants(dialogue_act, training=None)
            constants = constants + sc_constants

        if self.condition_on_ptm1:
            p0 = self.get_initial_p(inputs)
            initial_state += p0

        if isinstance(mask, list):
            mask = mask[0]

        preprocessed_input = self.preprocess_input(inputs, training=None)
        rnn_output = sc_tf_rnn(self.step,
                               preprocessed_input,
                               initial_state,
                               semantic_conditioning=self.semantic_condition,
                               go_backwards=self.go_backwards,
                               mask=mask,
                               constants=constants)

        if self.semantic_condition:
            last_output, outputs, last_da, da_outputs, states = rnn_output
        else:
            last_output, outputs, states = rnn_output

        # Properly set learning phase
        if 0.0 < self.dropout + self.recurrent_dropout + self.sc_dropout:
            last_output._uses_learning_phase = True
            outputs._uses_learning_phase = True

        if self.return_sequences:
            output = outputs
        else:
            output = last_output

        if self.return_state:
            if not isinstance(states, (list, tuple)):
                states = [states]
            else:
                states = list(states)
            output = [output] + states

        if self.semantic_condition and self.return_da:
            output = [output, last_da, da_outputs]

        return output
 def one_zero(x):
     return K.in_train_phase(K.zeros_like(x), K.ones_like(x))
Пример #33
0
 def f(t):
     return K.in_train_phase(K.dot(t, ortho_weights), K.dot(t, ortho_weights_store))
Пример #34
0
  def call(self, inputs):
    x = inputs[0]
    loga = inputs[1]

    return K.in_train_phase(self.call_training(loga, x), self.call_inference(loga, x))
Пример #35
0
 def call(self, x, mask=None):
     if 0. < self.p <= 1.:
         x = K.in_train_phase(hybo_tf(x, p = self.p, shift = self.shift,
                                   unif = self.unif,
                                   just_dropout = self.just_dropout), x)
     return x
Пример #36
0
def PadSymmetricInTestPhase():
    pad = Lambda(lambda x: K.in_train_phase(
        x, tf.pad(x, tf.constant([[0, 0], [2, 2], [2, 2], [0, 0]]), 'SYMMETRIC'
                  )))
    pad.uses_learning_phase = True
    return pad
    def apply_dropout_if_needed(self, _input, training=None):
        def dropped_softmax():
            return K.dropout(_input, 0.5)

        return K.in_train_phase(dropped_softmax, _input,
                                    training=training)
Пример #38
0
 def build(self, input_shape):
     input_dim = input_shape[-1]
     self.kernel = self.add_weight(shape=(input_dim, self.units * 5),
                                   name='kernel',
                                   initializer='glorot_uniform')
     self.recurrent_kernel = self.add_weight(shape=(self.units,
                                                    self.units * 5),
                                             name='recurrent_kernel',
                                             initializer='orthogonal')
     self.cell_kernel = self.add_weight(shape=(self.units, self.units * 5),
                                        name='cell_kernel',
                                        initializer='orthogonal')
     self.up_att_downl = self.add_weight(shape=(1, self.dk),
                                         name='up_att_downl',
                                         initializer='glorot_uniform')
     self.up_att_downr = self.add_weight(shape=(1, self.dk),
                                         name='up_att_downr',
                                         initializer='glorot_uniform')
     self.up_att_randl = self.add_weight(shape=(1, self.dk),
                                         name='up_att_randl',
                                         initializer='glorot_uniform')
     self.up_att_randr = self.add_weight(shape=(1, self.dk),
                                         name='up_att_randr',
                                         initializer='glorot_uniform')
     self.down_att_upl = self.add_weight(shape=(1, self.dk),
                                         name='down_att_upl',
                                         initializer='glorot_uniform')
     self.down_att_upr = self.add_weight(shape=(1, self.dk),
                                         name='down_att_upr',
                                         initializer='glorot_uniform')
     self.down_att_randl = self.add_weight(shape=(1, self.dk),
                                           name='down_att_randl',
                                           initializer='glorot_uniform')
     self.down_att_randr = self.add_weight(shape=(1, self.dk),
                                           name='down_att_randr',
                                           initializer='glorot_uniform')
     self.rand_att_upl = self.add_weight(shape=(1, self.dk),
                                         name='rand_att_upl',
                                         initializer='glorot_uniform')
     self.rand_att_upr = self.add_weight(shape=(1, self.dk),
                                         name='rand_att_upr',
                                         initializer='glorot_uniform')
     self.rand_att_downl = self.add_weight(shape=(1, self.dk),
                                           name='rand_att_downl',
                                           initializer='glorot_uniform')
     self.rand_att_downr = self.add_weight(shape=(1, self.dk),
                                           name='rand_att_downr',
                                           initializer='glorot_uniform')
     self.aggregation = self.add_weight(shape=(self.units * 3, self.units),
                                        name='aggregation',
                                        initializer='glorot_uniform')
     self.bias = self.add_weight(shape=(self.units * 5, ),
                                 name='bias',
                                 initializer='zeros')
     self.built = True
     if self.dropconnect:
         self._kernel = K.dropout(self.kernel, self.dropconnect)
         self._kernel = K.in_train_phase(self._kernel, self.kernel)
         self._recurrent_kernel = K.dropout(self.recurrent_kernel,
                                            self.dropconnect)
         self._recurrent_kernel = K.in_train_phase(self._recurrent_kernel,
                                                   self.recurrent_kernel)
         self._cell_kernel = K.dropout(self.cell_kernel, self.dropconnect)
         self._cell_kernel = K.in_train_phase(self._cell_kernel,
                                              self.cell_kernel)
     else:
         self._kernel = self.kernel
         self._recurrent_kernel = self.recurrent_kernel
         self._cell_kernel = self.cell_kernel
Пример #39
0
def rounded_sigmoid(name="rounded_sigmoid"):
    global rounded_sigmoid_counter
    rounded_sigmoid_counter += 1
    return Lambda(
        lambda x: K.in_train_phase(K.sigmoid(x), K.round(K.sigmoid(x))),
        name="{}_{}".format(name, rounded_sigmoid_counter))
Пример #40
0
 def call(self, x, training=None):
     lenx = tf.shape(x)[1]
     cp = tf.cast(tf.round((tf.cast(lenx, tf.float32) - .5) * self.pos),
                  tf.int32)
     cx = x[:, cp, :]
     return K.in_train_phase(cx, x[:, -1, :], training=training)
Пример #41
0
    def step(self, inputs, states):

        if self.semantic_condition and self.condition_on_ptm1:
            h_tm1 = states[0]
            c_tm1 = states[1]
            d_tm1 = states[2]
            p_tm1 = states[3]
            if self.condition_on_ptm1 and not self.generation_only:
                inputs = K.in_train_phase(
                    inputs, K.concatenate([inputs, p_tm1], axis=1))
            elif self.condition_on_ptm1 and self.generation_only:
                inputs = K.in_train_phase(inputs, p_tm1)
            dp_mask = states[4]
            rec_dp_mask = states[5]
            sc_dp_mask = states[6]
        elif not self.semantic_condition and self.condition_on_ptm1:
            h_tm1 = states[0]
            c_tm1 = states[1]
            p_tm1 = states[2]
            if self.condition_on_ptm1:
                inputs = K.in_train_phase(
                    inputs, K.concatenate([inputs, p_tm1], axis=1))
            dp_mask = states[3]
            rec_dp_mask = states[4]
        elif not self.semantic_condition and not self.condition_on_ptm1:
            h_tm1 = states[0]
            c_tm1 = states[1]
            dp_mask = states[2]
            rec_dp_mask = states[3]
        else:  #self.semantic_condition and not self.condition_pm1
            h_tm1 = states[0]
            c_tm1 = states[1]
            d_tm1 = states[2]
            dp_mask = states[3]
            rec_dp_mask = states[4]
            sc_dp_mask = states[5]

        z = K.dot(inputs * dp_mask[0], self.kernel)
        z += K.dot(h_tm1 * rec_dp_mask[0], self.recurrent_kernel)
        if self.use_bias:
            z = K.bias_add(z, self.bias)

        z0 = z[:, :self.units]
        z1 = z[:, self.units:2 * self.units]
        z2 = z[:, 2 * self.units:3 * self.units]
        z3 = z[:, 3 * self.units:]

        i = self.recurrent_activation(z0)
        f = self.recurrent_activation(z1)

        if self.semantic_condition:
            r = self.recurrent_activation(
                K.dot(inputs * dp_mask[0], self.kernel_r) + self.alpha *
                K.dot(h_tm1 * rec_dp_mask[0], self.recurrent_kernel_r))
            if self.use_bias:
                r = K.bias_add(r, self.bias_r)
            d = r * d_tm1
            c = f * c_tm1 + i * self.activation(z2) + self.activation(
                K.dot(d * sc_dp_mask[0], self.kernel_d))
        else:
            c = f * c_tm1 + i * self.activation(z2)

        o = self.recurrent_activation(z3)

        h = o * self.activation(c)

        #output distibution of target word prob: p in (batch_size, nclasses)
        if self.softmax_temperature is not None:
            p_softmax = K.softmax(
                K.dot(h, self.out_kernel) / self.softmax_temperature)
            p_ret = p_softmax
        else:
            p_softmax = K.softmax(K.dot(h, self.out_kernel))
            p_ret = K.in_train_phase(
                p_softmax,
                K.one_hot(K.argmax(p_softmax, axis=1), self.out_units))

        if 0.0 < self.dropout + self.recurrent_dropout + self.sc_dropout:
            h._uses_learning_phase = True

        if self.semantic_condition and self.condition_on_ptm1:
            return p_softmax, [h, c, d, p_ret]
        elif not self.semantic_condition and self.condition_on_ptm1:
            return p_softmax, [h, c, p_ret]
        elif not self.semantic_condition and not self.condition_on_ptm1:
            return p_softmax, [h, c]
        else:
            return p_softmax, [h, c, d]
Пример #42
0
 def call(self, x):
     return K.in_train_phase(K.dot(x, self.Q), x)
Пример #43
0
 def call(self, x, mask=None):
     return K.in_train_phase(K.relu(x, K.random_uniform(K.shape(x), self.l, self.u)),
                             K.relu(x, self.average))
Пример #44
0
    def call(self, x, mask=None):
        if self.mode == 0 or self.mode == 2:
            assert self.built, 'Layer must be built before being called'
            input_shape = self.input_spec[0].shape

            reduction_axes = list(range(len(input_shape)))
            del reduction_axes[self.axis]
            broadcast_shape = [1] * len(input_shape)
            broadcast_shape[self.axis] = input_shape[self.axis]

            if self.mode == 2:
                x_normed, mean, std = K.normalize_batch_in_training(
                    x,
                    self.gamma,
                    self.beta,
                    reduction_axes,
                    epsilon=self.epsilon)
            else:
                # mode 0
                if self.called_with not in {None, x} and False:
                    raise Exception('You are attempting to share a '
                                    'same `BatchNormalization` layer across '
                                    'different data flows. '
                                    'This is not possible. '
                                    'You should use `mode=2` in '
                                    '`BatchNormalization`, which has '
                                    'a similar behavior but is shareable '
                                    '(see docs for a description of '
                                    'the behavior).')
                self.called_with = x
                x_normed, mean, std = K.normalize_batch_in_training(
                    x,
                    self.gamma,
                    self.beta,
                    reduction_axes,
                    epsilon=self.epsilon)

                self.updates = [
                    K.moving_average_update(self.running_mean, mean,
                                            self.momentum),
                    K.moving_average_update(self.running_std, std,
                                            self.momentum)
                ]

                if sorted(reduction_axes) == range(K.ndim(x))[:-1]:
                    x_normed_running = K.batch_normalization(
                        x,
                        self.running_mean,
                        self.running_std,
                        self.beta,
                        self.gamma,
                        epsilon=self.epsilon)
                else:
                    # need broadcasting
                    broadcast_running_mean = K.reshape(self.running_mean,
                                                       broadcast_shape)
                    broadcast_running_std = K.reshape(self.running_std,
                                                      broadcast_shape)
                    broadcast_beta = K.reshape(self.beta, broadcast_shape)
                    broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
                    x_normed_running = K.batch_normalization(
                        x,
                        broadcast_running_mean,
                        broadcast_running_std,
                        broadcast_beta,
                        broadcast_gamma,
                        epsilon=self.epsilon)

                # pick the normalized form of x corresponding to the training phase
                x_normed = K.in_train_phase(x_normed, x_normed_running)

        elif self.mode == 1:
            # sample-wise normalization
            m = K.mean(x, axis=-1, keepdims=True)
            std = K.sqrt(K.var(x, axis=-1, keepdims=True) + self.epsilon)
            x_normed = (x - m) / (std + self.epsilon)
            x_normed = self.gamma * x_normed + self.beta
        return x_normed
Пример #45
0
 def compute_loss(self, input, output, input_mask=None, output_mask=None):
     l = K.switch(input < self.low, K.abs(input - self.low), 0)
     h = K.switch(input > self.high, K.abs(input - self.high), 0)
     return K.in_train_phase(self.weight*K.mean(h + l), 0)
Пример #46
0
def group_ksparse(x,
                  groups,
                  k,
                  axis_group,
                  axis_sparse,
                  norm=2,
                  alpha=1,
                  epsilon=None):
    if isinstance(axis_group, int):
        axis_group = (axis_group, )
    elif isinstance(axis_group, list):
        axis_group = tuple(axis_group)
    if isinstance(axis_sparse, int):
        axis_sparse = (axis_sparse, )
    elif isinstance(axis_sparse, list):
        axis_sparse = tuple(axis_sparse)
    assert (1 - bool(set(axis_group) & set(axis_sparse)))
    if epsilon is None:
        epsilon = K.epsilon()
    axis_complement = tuple(
        set(range(K.ndim(x))) - set(axis_group) - set(axis_sparse))
    shape_reduce_group = K.prod([K.shape(x)[j] for j in axis_group])
    shape_reduce_sparse = K.prod([K.shape(x)[j] for j in axis_sparse])
    _k = K.minimum(K.in_train_phase(k, alpha * k), shape_reduce_sparse)
    inputs_permute_dimensions = K.permute_dimensions(
        x, axis_complement + axis_sparse + axis_group)
    inputs_permute_dimensions_reshape = K.reshape(
        inputs_permute_dimensions,
        (-1, shape_reduce_sparse, shape_reduce_group))
    norm_group_permute_dimensions_reshape = group_norms(
        inputs=inputs_permute_dimensions_reshape,
        groups=groups,
        axis=-1,
        norm=norm,
        epsilon=epsilon)
    norm_group_permute_dimensions_reshape = K.permute_dimensions(
        norm_group_permute_dimensions_reshape, (0, 2, 1))
    norm_group_permute_dimensions_reshape = K.reshape(
        norm_group_permute_dimensions_reshape, (-1, shape_reduce_sparse))
    _, indices = tf.nn.top_k(norm_group_permute_dimensions_reshape, _k)
    scatter_indices = K.concatenate([
        (K.arange(K.shape(norm_group_permute_dimensions_reshape)[0])[:, None] *
         K.ones((1, _k), dtype='int32'))[:, :, None], indices[:, :, None]
    ])
    scatter_updates = K.ones(
        (K.shape(norm_group_permute_dimensions_reshape)[0], _k))
    mask_group_permute_dimensions_reshape = K.cast(
        tf.scatter_nd(scatter_indices, scatter_updates,
                      K.shape(norm_group_permute_dimensions_reshape)),
        K.floatx())
    mask_group_permute_dimensions_reshape = K.reshape(
        mask_group_permute_dimensions_reshape,
        (-1, groups, shape_reduce_sparse))
    mask_group_permute_dimensions_reshape = K.permute_dimensions(
        mask_group_permute_dimensions_reshape, (0, 2, 1))
    mask_permute_dimensions_reshape = (
        mask_group_permute_dimensions_reshape[:, :, :, None] * K.ones(
            (1, 1, 1, floor_div(shape_reduce_group, groups))))
    mask_permute_dimensions = K.reshape(mask_permute_dimensions_reshape,
                                        K.shape(inputs_permute_dimensions))
    mask = K.permute_dimensions(
        mask_permute_dimensions,
        tuple(np.argsort(axis_complement + axis_sparse + axis_group)))
    return mask * x
Пример #47
0
 def call(self, x, mask=None):
     if 0. < self.p < 1.:
         x = K.in_train_phase(K.dropout(x, level=self.p), x)
     return x
Пример #48
0
 def call(self, x, training=None):
     return K.in_train_phase(K.dot(x, rand_rotate_matrix_symbol()),
                             x,
                             training=training)
Пример #49
0
 def call(self, inputs, mask=None):
     #print("call")
     output = K.in_train_phase(self._drop_path(inputs), self._ave(inputs))
     return output
Пример #50
0
####################
# Initialise model #
####################

# Restrict GPU memory usage
if config.set_gpu is not None:
    conf = tf.ConfigProto()
    conf.gpu_options.allow_growth = True
    conf.gpu_options.visible_device_list = str(config.set_gpu)
    sess = tf.Session(config=conf)
    set_session(sess)
del config.set_gpu

eps_train_var = K.variable(config.train_epsilon)
eps = K.in_train_phase(K.stop_gradient(eps_train_var),
                       K.constant(config.eval_epsilon))
k_train_var = K.variable(1)
k = K.in_train_phase(K.stop_gradient(k_train_var), K.constant(config.min_k))

if config.augmentation:
    mean, std = x_train.mean(axis=(0, 1,
                                   2)), x_train.std(axis=(0, 1, 2)) + 1e-6
    x_train = (x_train - mean) / std
    x_valid = (x_valid - mean) / std
    print("Normalising channels with values", mean, std)
else:
    mean, std = None, None

if config.model_name == "SmallCNN":
    model = SmallCNN(input_shape=input_shape)
elif config.model_name == "MediumCNN":
Пример #51
0
 def get_yj_vars(self):
     return K.transpose(K.in_train_phase(self.vj, self.vjr))
Пример #52
0
    def call(self,
             inputs,
             initial_state=None,
             initial_readout=None,
             ground_truth=None,
             mask=None,
             training=None):
        # input shape: `(samples, time (padded with zeros), input_dim)`
        # note that the .build() method of subclasses MUST define
        # self.input_spec and self.state_spec with complete input shapes.
        if type(mask) is list:
            mask = mask[0]
        if self.model is None:
            raise Exception('Empty RecurrentModel.')
        num_req_states = self.num_states
        if self.readout:
            num_actual_states = num_req_states - 1
        else:
            num_actual_states = num_req_states
        if type(inputs) is list:
            inputs_list = inputs[:]
            inputs = inputs_list.pop(0)
            initial_states = inputs_list[:num_actual_states]
            if len(initial_states) > 0:
                if self._is_optional_input_placeholder(initial_states[0]):
                    initial_states = self.get_initial_state(inputs)
            inputs_list = inputs_list[num_actual_states:]
            if self.readout:
                initial_readout = inputs_list.pop(0)
                if self.teacher_force:
                    ground_truth = inputs_list.pop()
        else:
            if initial_state is not None:
                if not isinstance(initial_state, (list, tuple)):
                    initial_states = [initial_state]
                else:
                    initial_states = list(initial_state)
                if self._is_optional_input_placeholder(initial_states[0]):
                    initial_states = self.get_initial_state(inputs)

            elif self.stateful:
                initial_states = self.states
            else:
                initial_states = self.get_initial_state(inputs)
        if self.readout:
            if initial_readout is None or self._is_optional_input_placeholder(
                    initial_readout):
                output_shape = K.int_shape(_to_list((self.model.output))[0])
                output_ndim = len(output_shape)
                input_ndim = K.ndim(inputs)
                initial_readout = K.zeros_like(inputs)
                slices = [slice(None)] + [0] * (input_ndim - 1)
                initial_readout = initial_readout[slices]  # (batch_size,)
                initial_readout = K.reshape(initial_readout,
                                            (-1, ) + (1, ) * (output_ndim - 1))
                initial_readout = K.tile(initial_readout,
                                         (1, ) + tuple(output_shape[1:]))
            initial_states.append(initial_readout)
            if self.teacher_force:
                if ground_truth is None or self._is_optional_input_placeholder(
                        ground_truth):
                    raise Exception(
                        'ground_truth must be provided for RecurrentModel with teacher_force=True.'
                    )
                if K.backend() == 'tensorflow':
                    with tf.control_dependencies(None):
                        counter = K.zeros((1, ))
                else:
                    counter = K.zeros((1, ))
                counter = K.cast(counter, 'int32')
                initial_states.insert(-1, counter)
                initial_states[-2]
                initial_states.insert(-1, ground_truth)
                num_req_states += 2
        if len(initial_states) != num_req_states:
            raise ValueError('Layer requires ' + str(num_req_states) +
                             ' states but was passed ' +
                             str(len(initial_states)) + ' initial states.')
        input_shape = K.int_shape(inputs)
        if self.unroll and input_shape[1] is None:
            raise ValueError('Cannot unroll a RNN if the '
                             'time dimension is undefined. \n'
                             '- If using a Sequential model, '
                             'specify the time dimension by passing '
                             'an `input_shape` or `batch_input_shape` '
                             'argument to your first layer. If your '
                             'first layer is an Embedding, you can '
                             'also use the `input_length` argument.\n'
                             '- If using the functional API, specify '
                             'the time dimension by passing a `shape` '
                             'or `batch_shape` argument to your Input layer.')
        preprocessed_input = self.preprocess_input(inputs, training=None)
        constants = self.get_constants(inputs, training=None)
        if self.decode:
            initial_states.insert(0, inputs)
            preprocessed_input = K.zeros((1, self.output_length, 1))
            input_length = self.output_length
        else:
            input_length = input_shape[1]
        if self.uses_learning_phase:
            with learning_phase_scope(0):
                last_output_test, outputs_test, states_test, updates = rnn(
                    self.step,
                    preprocessed_input,
                    initial_states,
                    go_backwards=self.go_backwards,
                    mask=mask,
                    constants=constants,
                    unroll=self.unroll,
                    input_length=input_length)
            with learning_phase_scope(1):
                last_output_train, outputs_train, states_train, updates = rnn(
                    self.step,
                    preprocessed_input,
                    initial_states,
                    go_backwards=self.go_backwards,
                    mask=mask,
                    constants=constants,
                    unroll=self.unroll,
                    input_length=input_length)

            last_output = K.in_train_phase(last_output_train,
                                           last_output_test,
                                           training=training)
            outputs = K.in_train_phase(outputs_train,
                                       outputs_test,
                                       training=training)
            states = []
            for state_train, state_test in zip(states_train, states_test):
                states.append(
                    K.in_train_phase(state_train,
                                     state_test,
                                     training=training))

        else:
            last_output, outputs, states, updates = rnn(
                self.step,
                preprocessed_input,
                initial_states,
                go_backwards=self.go_backwards,
                mask=mask,
                constants=constants,
                unroll=self.unroll,
                input_length=input_length)
        states = list(states)
        if self.decode:
            states.pop(0)
        if self.readout:
            states.pop()
            if self.teacher_force:
                states.pop()
                states.pop()
        if len(updates) > 0:
            self.add_update(updates)
        if self.stateful:
            updates = []
            for i in range(len(states)):
                updates.append((self.states[i], states[i]))
            self.add_update(updates, inputs)

        # Properly set learning phase
        if 0 < self.dropout + self.recurrent_dropout:
            last_output._uses_learning_phase = True
            outputs._uses_learning_phase = True

        if self.return_sequences:
            y = outputs
        else:
            y = last_output
        if self.return_states:
            return [y] + states
        else:
            return y
Пример #53
0
def smooth_min(*args):
    return K.in_train_phase(
        -K.logsumexp(-K.stack(args, axis=0), axis=0) + K.log(2.0),
        K.minimum(*args))
Пример #54
0
def smooth_max(*args):
    return K.in_train_phase(
        K.logsumexp(K.stack(args, axis=0), axis=0) - K.log(2.0),
        K.maximum(*args))
Пример #55
0
 def call(self, x, mask=None):
     y_pred = viterbi_decode(x, self.U, self.b_start, self.b_end, mask)
     nb_classes = self.input_spec[0].shape[2]
     y_pred_one_hot = K.one_hot(y_pred, nb_classes)
     return K.in_train_phase(x, y_pred_one_hot)
Пример #56
0
    def call(self, inputs, training=None):
        def noised():
            return inputs + K.random_normal(
                shape=K.shape(inputs), mean=0., stddev=self.stddev)

        return K.in_train_phase(noised, noised, training=training)
        def call(self, inputs, training=None):
                input_shape = K.int_shape(inputs)
		ndim = len(input_shape)
		reduction_axes = list(range(ndim))
		del reduction_axes[self.axis]
		input_dim = input_shape[self.axis] // 4
		mu = K.mean(inputs, axis=reduction_axes)
                broadcast_mu_shape = [1] * len(input_shape)
		broadcast_mu_shape[self.axis] = input_shape[self.axis]
		broadcast_mu = K.reshape(mu, broadcast_mu_shape)
		if self.center:
			input_centred = inputs - broadcast_mu
		else:
			input_centred = inputs
		centred_squared = input_centred ** 2
		start_i = input_dim
		start_j = input_dim*2
		start_k = input_dim*3
		if (self.axis == 1 and ndim != 3) or ndim == 2:
			centred_squared_r = centred_squared[:, :input_dim]
			centred_squared_i = centred_squared[:, input_dim:input_dim*2]
			centred_squared_j = centred_squared[:, input_dim*2:input_dim*3]
			centred_squared_k = centred_squared[:, input_dim*3:]
			centred_r = input_centred[:, :input_dim]
			centred_i = input_centred[:, input_dim:input_dim*2]
			centred_j = input_centred[:, input_dim*2:input_dim*3]
			centred_k = input_centred[:, input_dim*3:]
		elif ndim == 3:
	        	centred_squared_r = centred_squared[:, :, :input_dim]
			centred_squared_i = centred_squared[:, :, input_dim:input_dim*2]
			centred_squared_j = centred_squared[:, :, input_dim*2:input_dim*3]
			centred_squared_k = centred_squared[:, :, input_dim*3:]
			centred_r = input_centred[:, :, :input_dim]
			centred_i = input_centred[:, :, input_dim:input_dim*2]
			centred_j = input_centred[:, :, input_dim*2:input_dim*3]
			centred_k = input_centred[:, :, input_dim*3:]
		elif self.axis == -1 and ndim == 4:
			centred_squared_r = centred_squared[:, :, :, :input_dim]
			centred_squared_i = centred_squared[:, :, :, input_dim:input_dim*2]
			centred_squared_j = centred_squared[:, :, :, input_dim*2:input_dim*3]
			centred_squared_k = centred_squared[:, :, :, input_dim*3:]
			centred_r = input_centred[:, :, :, :input_dim]
			centred_i = input_centred[:, :, :, input_dim:input_dim*2]
			centred_j = input_centred[:, :, :, input_dim*2:input_dim*3]
			centred_k = input_centred[:, :, :, input_dim*3:]
		elif self.axis == -1 and ndim == 5:
			centred_squared_r = centred_squared[:, :, :, :, :input_dim]
			centred_squared_i = centred_squared[:, :, :, :, input_dim:input_dim*2]
			centred_squared_j = centred_squared[:, :, :, :, input_dim*2:input_dim*3]
			centred_squared_k = centred_squared[:, :, :, :, input_dim*3:]
			centred_r = input_centred[:, :, :, :, :input_dim]
			centred_i = input_centred[:, :, :, :, input_dim:input_dim*2]
			centred_j = input_centred[:, :, :, :, input_dim*2:input_dim*3]
			centred_k = input_centred[:, :, :, :, input_dim*3:]
		else:
			raise ValueError(
				'Incorrect Batchnorm combination of axis and dimensions. axis should be either 1 or -1. '
				'axis: ' + str(self.axis) + '; ndim: ' + str(ndim) + '.'
			)
		if self.scale:
		#	#Variances: 
                        Vrr = K.mean(
				centred_squared_r,
				axis=reduction_axes
			) + self.epsilon
			
                        Vii = K.mean(
				centred_squared_i,
				axis=reduction_axes
			) + self.epsilon
			Vjj = K.mean(
				centred_squared_j,
				axis=reduction_axes
			) + self.epsilon
			Vkk = K.mean(
				centred_squared_k,
				axis=reduction_axes
			) + self.epsilon
			
			#Co-Variances:
			Vri = K.mean(
				centred_r * centred_i,
				axis=reduction_axes,
			)
			Vrj = K.mean(
				centred_r * centred_j,
				axis=reduction_axes,
			)
			Vrk = K.mean(
				centred_r * centred_k,
				axis=reduction_axes,
			)
			Vij = K.mean(
				centred_i * centred_j,
				axis=reduction_axes,
			)
			Vik = K.mean(
				centred_i * centred_k,
				axis=reduction_axes,
			)
			Vjk = K.mean(
				centred_j * centred_k,
				axis=reduction_axes,
			)
		elif self.center:
			Vrr = None
			Vri = None
			Vrj = None
			Vrk = None
			Vii = None
			Vij = None
			Vik = None
			Vjj = None
			Vjk = None
			Vkk = None
		else:
			raise ValueError('Error. Both scale and center in batchnorm are set to False.')
		

		input_bn = QuaternionBN( input_centred, Vrr, Vri, Vrj, Vrk, Vii, Vij, Vik, Vjj, Vjk, Vkk,
			self.beta, self.gamma_rr, self.gamma_ri, self.gamma_rj, self.gamma_rk, 
			self.gamma_ii, self.gamma_ij, self.gamma_ik, self.gamma_jj, self.gamma_jk, self.gamma_kk, 
			self.scale, self.center, axis=self.axis)
		if training in {0, False}:
			return input_bn
		else:
			update_list = []
			if self.center:
				update_list.append(K.moving_average_update(self.moving_mean, mu, self.momentum))
			if self.scale:
				update_list.append(K.moving_average_update(self.moving_Vrr, Vrr, self.momentum))
				update_list.append(K.moving_average_update(self.moving_Vri, Vri, self.momentum))
				update_list.append(K.moving_average_update(self.moving_Vrk, Vrk, self.momentum))
				update_list.append(K.moving_average_update(self.moving_Vrj, Vrj, self.momentum))
				update_list.append(K.moving_average_update(self.moving_Vii, Vii, self.momentum))
				update_list.append(K.moving_average_update(self.moving_Vij, Vij, self.momentum))
				update_list.append(K.moving_average_update(self.moving_Vik, Vik, self.momentum))
				update_list.append(K.moving_average_update(self.moving_Vjj, Vjj, self.momentum))
				update_list.append(K.moving_average_update(self.moving_Vjk, Vjk, self.momentum))
				update_list.append(K.moving_average_update(self.moving_Vkk, Vkk, self.momentum))
			self.add_update(update_list, inputs)

			def normalize_inference():
				if self.center:
					inference_centred = inputs - K.reshape(self.moving_mean, broadcast_mu_shape)
				else:
					inference_centred = inputs
				return QuaternionBN(
					inference_centred, self.moving_Vrr, self.moving_Vri, self.moving_Vrj, self.moving_Vrk,
					self.moving_Vii, self.moving_Vij,self.moving_Vik, self.moving_Vjj, self.moving_Vjk, self.moving_Vkk,
					self.beta, self.gamma_rr, self.gamma_ri, self.gamma_rj, self.gamma_rk, self.gamma_ii, self.gamma_ij, 
					self.gamma_ik, self.gamma_jj, self.gamma_jk, self.gamma_kk, self.scale, self.center, axis=self.axis)

		## Pick the normalized form corresponding to the training phase.
		return K.in_train_phase(input_bn,normalize_inference,training=training)
	def call(self, inputs, training=None):
		input_shape = K.int_shape(inputs)
		ndim = len(input_shape)
		reduction_axes = list(range(ndim))
		del reduction_axes[self.axis]
		input_dim = input_shape[self.axis] // 2
		mu = K.mean(inputs, axis=reduction_axes)
		broadcast_mu_shape = [1] * len(input_shape)
		broadcast_mu_shape[self.axis] = input_shape[self.axis]
		broadcast_mu = K.reshape(mu, broadcast_mu_shape)
		if self.center:
			input_centred = inputs - broadcast_mu
		else:
			input_centred = inputs
		centred_squared = input_centred ** 2
		if (self.axis == 1 and ndim != 3) or ndim == 2:
			centred_squared_real = centred_squared[:, :input_dim]
			centred_squared_imag = centred_squared[:, input_dim:]
			centred_real = input_centred[:, :input_dim]
			centred_imag = input_centred[:, input_dim:]
		elif ndim == 3:
			centred_squared_real = centred_squared[:, :, :input_dim]
			centred_squared_imag = centred_squared[:, :, input_dim:]
			centred_real = input_centred[:, :, :input_dim]
			centred_imag = input_centred[:, :, input_dim:]
		elif self.axis == -1 and ndim == 4:
			centred_squared_real = centred_squared[:, :, :, :input_dim]
			centred_squared_imag = centred_squared[:, :, :, input_dim:]
			centred_real = input_centred[:, :, :, :input_dim]
			centred_imag = input_centred[:, :, :, input_dim:]
		elif self.axis == -1 and ndim == 5:
			centred_squared_real = centred_squared[:, :, :, :, :input_dim]
			centred_squared_imag = centred_squared[:, :, :, :, input_dim:]
			centred_real = input_centred[:, :, :, :, :input_dim]
			centred_imag = input_centred[:, :, :, :, input_dim:]
		else:
			raise ValueError(
				'Incorrect Batchnorm combination of axis and dimensions. axis should be either 1 or -1. '
				'axis: ' + str(self.axis) + '; ndim: ' + str(ndim) + '.'
			)
		if self.scale:
			Vrr = K.mean(
				centred_squared_real,
				axis=reduction_axes
			) + self.epsilon
			Vii = K.mean(
				centred_squared_imag,
				axis=reduction_axes
			) + self.epsilon
			# Vri contains the real and imaginary covariance for each feature map.
			Vri = K.mean(
				centred_real * centred_imag,
				axis=reduction_axes,
			)
		elif self.center:
			Vrr = None
			Vii = None
			Vri = None
		else:
			raise ValueError('Error. Both scale and center in batchnorm are set to False.')
		

		input_bn = ComplexBN(
			input_centred, Vrr, Vii, Vri,
			self.beta, self.gamma_rr, self.gamma_ri,
			self.gamma_ii, self.scale, self.center,
			axis=self.axis
		)

		if training in {0, False}:
			return input_bn
		else:
			update_list = []
			if self.center:
				update_list.append(K.moving_average_update(self.moving_mean, mu, self.momentum))
			if self.scale:
				update_list.append(K.moving_average_update(self.moving_Vrr, Vrr, self.momentum))
				update_list.append(K.moving_average_update(self.moving_Vii, Vii, self.momentum))
				update_list.append(K.moving_average_update(self.moving_Vri, Vri, self.momentum))
			self.add_update(update_list, inputs)

			def normalize_inference():
				if self.center:
					inference_centred = inputs - K.reshape(self.moving_mean, broadcast_mu_shape)
				else:
					inference_centred = inputs
                                
                                return ComplexBN(
					inference_centred, self.moving_Vrr, self.moving_Vii,
					self.moving_Vri, self.beta, self.gamma_rr, self.gamma_ri,
					self.gamma_ii, self.scale, self.center, axis=self.axis
				)

		# Pick the normalized form corresponding to the training phase.
		return K.in_train_phase(input_bn,
								normalize_inference,
								training=training)
def assister_loss(y_true, y_pred):
    return KB.in_train_phase(KL.categorical_crossentropy(y_true, y_pred),
                             KB.zeros_like(KL.categorical_crossentropy(y_true, y_pred)))
 def call(self, inputs):
     def noised():
         return inputs + K.random_uniform(shape=K.shape(inputs),
                                          minval=self.minval,
                                          maxval=self.maxval)
     return K.in_train_phase(noised, noised)