Exemplo n.º 1
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = self.iterations + 1
        lr_t = self.lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))

        shapes = [K.get_variable_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            p_t = p - self.get_param_learning_rate_t(p,t,lr_t) * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            new_p = p_t
            # apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)
            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 2
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = self.iterations + 1
        lr_t = self.lr / (1. - K.pow(self.beta_1, t))

        shapes = [K.get_variable_shape(p) for p in params]
        # zero init of 1st moment
        ms = [K.zeros(shape) for shape in shapes]
        # zero init of exponentially weighted infinity norm
        us = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + ms + us

        for p, g, m, u in zip(params, grads, ms, us):

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            u_t = K.maximum(self.beta_2 * u, K.abs(g))
            p_t = p - self.get_param_learning_rate_t(p,t,lr_t) * m_t / (u_t + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(u, u_t))

            new_p = p_t
            # apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)
            self.updates.append(K.update(p, new_p))
        return self.updates
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * K.cast(self.iterations,
                                                  K.dtype(self.decay))))
        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + moments
        for p, g, m in zip(params, grads, moments):

            if p.name in self.lr_mult:
                multiplied_lr = lr * self.lr_mult[p.name]
            else:
                multiplied_lr = lr

            v = self.momentum * m - multiplied_lr * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + self.momentum * v - multiplied_lr * g
            else:
                new_p = p + v

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 4
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        lr = self.lr * (1. / (1. + self.decay * self.iterations))
        self.updates = [K.update_add(self.iterations, 1)]

        # momentum
        shapes = [K.get_variable_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + moments
        for p, g, m in zip(params, grads, moments):
            v = self.momentum * m - self.get_param_learning_rate(p, lr) * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + self.momentum * v - lr * g
            else:
                new_p = p + v

            # apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 5
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        shapes = [K.get_variable_shape(p) for p in params]
        accumulators = [K.zeros(shape) for shape in shapes]
        delta_accumulators = [K.zeros(shape) for shape in shapes]
        self.weights = accumulators + delta_accumulators
        self.updates = []

        for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators):
            # update accumulator
            new_a = self.rho * a + (1. - self.rho) * K.square(g)
            self.updates.append(K.update(a, new_a))

            # use the new accumulator and the *old* delta_accumulator
            update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon)

            new_p = p - get_learing_rate(p,self.lr) * update
            # apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)
            self.updates.append(K.update(p, new_p))

            # update delta_accumulator
            new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update)
            self.updates.append(K.update(d_a, new_d_a))
        return self.updates
Exemplo n.º 6
0
    def call(self, inputs):
        kernel_shape = K.int_shape(self.kernel)
        if not self.renormalize:
            w = K.reshape(self.kernel,
                          (kernel_shape[0], kernel_shape[1] * kernel_shape[2] *
                           kernel_shape[3], kernel_shape[-1]))
            sigma, u_bar = max_singular_val(
                w,
                self.u,
                transpose=lambda x: ktf.transpose(x, [0, 2, 1]),
                fully_differentiable=self.fully_diff_spectral,
                ip=self.spectral_iterations)
            sigma = K.reshape(sigma, (self.number_of_classes, 1, 1, 1, 1))
        else:
            w = K.reshape(self.kernel, (-1, kernel_shape[-1]))
            sigma, u_bar = max_singular_val(
                w,
                self.u,
                fully_differentiable=self.fully_diff_spectral,
                ip=self.spectral_iterations)

        self.add_update(K.update(self.u, u_bar))

        kernel = self.kernel
        self.kernel = self.kernel / sigma
        outputs = super(SNConditionalConv2D, self).call(inputs)
        self.kernel = kernel

        return outputs
Exemplo n.º 7
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            if p.name in self.clips.keys():
                c = K.eval(self.clips[p.name])
                if self.verbose > 0:
                    print("Clipping variable", p.name, " to ", c)
                new_p = K.clip(new_p, c[0], c[1])

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 8
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. /
                   (1. +
                    self.decay * K.cast(self.iterations, K.dtype(self.decay))))

        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + moments
        for p, g, m in zip(params, grads, moments):

            if self.lr_multipliers != None:
                if p.name in self.lr_multipliers:
                    new_lr = lr * self.lr_multipliers[p.name]
                else:
                    new_lr = lr
            else:
                new_lr = lr

            if self.momentum_multipliers != None:
                if p.name in self.momentum_multipliers:
                    new_momentum = self.momentum * \
                                   self.momentum_multipliers[p.name]
                else:
                    new_momentum = self.momentum
            else:
                new_momentum = self.momentum

            v = new_momentum * m - new_lr * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + new_momentum * v - new_lr * g
            else:
                new_p = p + v

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 9
0
    def call(self, inputs, training=None):
        if training is None:
            training = bk.learning_phase()
        training = bk.get_value(training)

        if training:
            dtype = self.embedding.dtype
            bk.update(self.call_cnt, self.call_cnt + 1)
            if self.period is not None and self.call_cnt % self.period == 0:
                self.adjust()

            @tf.custom_gradient
            def __delegate(_x, _y):
                x = bk.cast(_x, dtype)
                if 1 == self._target_dim:
                    y = bk.cast(_y, dtype) * bk.ones_like(x)
                else:
                    y = bk.reshape(
                        bk.expand_dims(bk.cast(_y, dtype), 1) *
                        bk.ones_like(bk.expand_dims(x, -1)),
                        (-1, self.input_dim * self._target_dim))

                def _grad(dy):
                    seg_indices = self._calc_seg_indices(
                        x, self.cur_min, self.cur_max)
                    seg_embeddings = bk.gather(self.embedding, seg_indices)
                    self._update_embedding(x, y, seg_indices, seg_embeddings)

                    dys = diff_by_col_num(self.embedding,
                                          col_num=self.seg_num,
                                          direction='both')
                    cur_dy = bk.gather((dys[0] + dys[1]) / 2, seg_indices)
                    if 1 == self._target_dim:
                        cur_dy *= dy
                    else:
                        cur_dy = bk.reshape(
                            cur_dy,
                            (-1, self.input_dim,
                             self._target_dim)) * bk.expand_dims(dy, 1)
                        cur_dy = bk.sum(cur_dy, axis=-1) / self.input_dim

                    return cur_dy * self.grad_ease, dy

                return _y, _grad

            return __delegate(inputs[0], inputs[1])
        return inputs[-1]
Exemplo n.º 10
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        print(lr)

        t = K.cast(self.iterations, K.floatx()) + 1
        '''Bias corrections according to the Adam paper
        '''
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            '''Schedule multiplier eta_t = 1 for simple AdamW
            According to the AdamW paper, eta_t can be fixed, decay, or 
            also be used for warm restarts (AdamWR to come). 
            '''
            eta_t = 1.
            p_t = p - eta_t * (lr_t * m_t / (K.sqrt(v_t) + self.epsilon))
            if self.weight_decay != 0:
                '''Normalized weight decay according to the AdamW paper
                '''
                w_d = self.weight_decay * K.sqrt(
                    self.batch_size / (self.samples_per_epoch * self.epochs))
                p_t = p_t - eta_t * (w_d * p)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        shapes = [K.int_shape(p) for p in params]
        accumulators = [
            K.zeros(shape, name='accumulator_' + str(i))
            for (i, shape) in enumerate(shapes)
        ]
        delta_accumulators = [
            K.zeros(shape, name='delta_accumulator_' + str(i))
            for (i, shape) in enumerate(shapes)
        ]
        self.weights = [self.iterations] + accumulators + delta_accumulators
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.learning_rate
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        shapes = [K.int_shape(p) for p in params]
        moments = [
            K.zeros(shape, name='moment_' + str(i))
            for (i, shape) in enumerate(shapes)
        ]

        for p, g, a, d_a, m in zip(params, grads, accumulators,
                                   delta_accumulators, moments):
            v = self.momentum * m - lr * g
            # update accumulator
            new_a = self.rho * a + (1. - self.rho) * K.square(g)
            self.updates.append(K.update(a, new_a))

            # use the new accumulator and the *old* delta_accumulator
            update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a +
                                                             self.epsilon)
            new_p = p - lr * update + v  # Add Stochastic Gradient Step Here?

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))

            # update delta_accumulator
            new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update)
            self.updates.append(K.update(d_a, new_d_a))
        return self.updates
Exemplo n.º 12
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        self.updates.append(K.update_add(self.state_counter, 1))
        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))
        shapes = [K.int_shape(p) for p in params]
        grad_mean = [K.zeros(shape) for shape in shapes]
        prev_weights = [p for p in params]
        self.weights = [self.iterations] + grad_mean + prev_weights
        old_grads = self.get_gradients(loss, prev_weights)
        for p, g, g_mean, prev, old_g in zip(params, grads, grad_mean,
                                             prev_weights, old_grads):
            #update part
            grad = g + g_mean - old_g
            v = -lr * grad
            new_p = p + v
            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)
            new_p = K.switch(self.state_counter > self.mean_calculation_step,
                             new_p, p)
            self.updates.append(K.update(p, new_p))

            #statistics part
            grad_stat = K.switch(
                self.state_counter <= self.mean_calculation_step,
                g * (1.0 / self.mean_calculation_step), K.zeros_like(g))
            self.updates.append(K.update_add(g_mean, grad_stat))
            #switch statistics --> update
            temp_params = K.switch(
                self.state_counter <= self.mean_calculation_step, p, prev)
            self.updates.append(K.update(prev, temp_params))
            #switch update --> statistics
            temp_g_mean = K.switch(
                K.equal(self.state_counter,
                        self.mean_calculation_step + self.update_step),
                K.zeros_like(g_mean), g_mean)
            self.updates.append(K.update(g_mean, temp_g_mean))

        counter = K.switch(
            self.state_counter > self.mean_calculation_step + self.update_step,
            K.constant(0, dtype='int64'), self.state_counter)
        self.updates.append(K.update(self.state_counter, counter))
        return self.updates
Exemplo n.º 13
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))
        final_lr = self.final_lr * lr / self.base_lr
        lower_bound = final_lr * (1.0 - 1.0 / (self.gamma * t + 1.0))
        upper_bound = final_lr * (1.0 + 1.0 / (self.gamma * t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                step = lr_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                step = lr_t / (K.sqrt(v_t) + self.epsilon)
            p_t = p - K.minimum(K.maximum(step, lower_bound),
                                upper_bound) * m_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 14
0
 def get_updates(self, params, loss, contraints=None):
     self.updates = [K.update_add(self.iterations, 1)]
     grads        = self.get_gradients(loss, params)
     shapes       = [K.int_shape(p) for p in params]
     old_grads    = [K.zeros(shape) for shape in shapes]
     weights      = [K.zeros(shape) for shape in shapes]
     
     # Learning Rate
     learning_rate = self.learning_rate
     if self.initial_decay > 0:
         learning_rate *= (1. / (1. + self.decay * self.iterations))
         
     t = self.iterations + 1
     
     # Line 2 - initialise current weights
     
     zeta      = [K.ones(shape) for shape in shapes]
     Z         = [K.zeros(shape) for shape in shapes]
     theta     = [K.zeros(shape) for shape in shapes]
     
     for p, g, w, expMA, prevZ, prevTheta, old_g in zip(params, grads, weights, zeta, Z, theta, old_grads):
         change      = g * old_g
         pos_change  = K.greater(change,0.)
         neg_change  = K.less(change,0.)
         
         # Line 3-8: For all t in [1..t] do the following
         
         zeta_t      = K.switch(pos_change,
                                K.minimum(expMA * self.eta_plus, self.zeta_max),
                                K.switch(neg_change, K.maximum(expmA * self.eta_minus, self.zeta_min), expMA))
         zeta_clip   = K.clip(zeta_t, self.zeta_min, self.zeta_max)
         
         # Lines 9-12: Update weights for t with amendments as proposed for line 11
         
         Z_t         = (self.alpha * prevZ) + ((1 - self.alpha) * zeta_t)
         theta_t     = (self.alpha * prevTheta) + ((1 - self.alpha) * K.square(g))
         wChange     = - (learning_rate * (zeta_clip /zeta_t) * g) / K.sqrt(theta_t + self.epsilon)
         new_weight = w + wChange 
         p_update    = p - w + new_weight
     
         self.updates.append(K.update(p,p_update))
         self.updates.append(K.update(w,new_weight))
         self.updates.append(K.update(expMA,zeta_t))
         self.updates.append(K.update(prevZ,Z_t))
         self.updates.append(K.update(prevTheta,theta_t))
     
     return self.updates
Exemplo n.º 15
0
def set_weights_for_training(model, fine_tune, layer_num=[81, 174]):
    """
    Takes a model and a training state i.e. fine_tune = True
    and sets weights accordingly. Fine-tuning unlocks
    from layer 81 - res4a_branch2a


    Input:

        model - ResNet_UNet model by default, can be any model

        fine_tune - bool to signify training state

        layer_num - layer to lock/unlock from. default is
                    173 add_16, where 174 is up_sampling2d_1

    Output:

        None
    """
    if not fine_tune:
        print("[INFO] base model...")
        # ResNet layers
        for layer in model.layers[0:layer_num[1]]:
            # Opens up mean and variance for training
            if hasattr(layer, 'moving_mean') and hasattr(layer, 'moving_variance'):
                layer.trainable = True
                K.eval(K.update(layer.moving_mean, K.zeros_like(layer.moving_mean)))
                K.eval(K.update(layer.moving_variance, K.zeros_like(layer.moving_variance)))
            else:
                layer.trainable = False

        # UNet layers
        for layer in model.layers[layer_num[1]::]:
            layer.trainable = True
    else:
        print("[INFO] fine tuning model...")
        # ResNet layers
        for layer in model.layers[layer_num[0]:layer_num[1]]:
            layer.trainable = True
            # Opens up mean and variance for training
            if hasattr(layer, 'moving_mean') and hasattr(layer, 'moving_variance'):
                K.eval(K.update(layer.moving_mean, K.zeros_like(layer.moving_mean)))
                K.eval(K.update(layer.moving_variance, K.zeros_like(layer.moving_variance)))
        # UNet layers
        for layer in model.layers[layer_num[1]::]:
            layer.trainable = True
Exemplo n.º 16
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        self.updates.append(K.update_add(self.t_cur, 1))

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))
        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + moments

        total_iterations = self.total_iterations
        # Cosine annealing
        if self.use_cosine_annealing and total_iterations != 0:
            self.eta_t = _compute_eta_t(self)
        self.lr_t = lr * self.eta_t  # for external tracking

        for p, g, m in zip(params, grads, moments):
            # Learning rate multipliers
            lr_t = self.lr
            if self.lr_multipliers is not None:
                lr_t = _apply_lr_multiplier(self, lr_t, p)

            v = self.momentum * m - self.eta_t * lr_t * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                p_t = p + self.momentum * v - self.eta_t * lr_t * g
            else:
                p_t = p + v

            # Weight decays
            if p.name in self.weight_decays.keys() and total_iterations != 0:
                p_t = _apply_weight_decays(self, p, p_t)
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))

        self._init_notified = True
        return self.updates
Exemplo n.º 17
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.learning_rate
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p), name='m_' + str(i))
            for (i, p) in enumerate(params)
        ]
        vs = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p), name='v_' + str(i))
            for (i, p) in enumerate(params)
        ]

        vhats = [K.zeros(1, name='vhat_' + str(i)) for i in range(len(params))]

        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g + (
                (self.prior_prec * p) / self.train_set_size)
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            #m_t = m_t  / (1. - self.beta_1) # bias correction
            #v_t = v_t / (1. - self.beta_2) # bias correction
            p_t = (p + self.epsilon) - lr_t * m_t / (
                K.sqrt(v_t) + self.prior_prec / self.train_set_size)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))

            return self.updates
Exemplo n.º 18
0
    def get_updates(self, loss, params):

        grads = K.gradients(loss, params)
        flattenedgrads = [K.flatten(x) for x in grads]
        G = K.concatenate(flattenedgrads)
        self.updates = []

        dP = self.dP
        xi = self.xi

        if self.initial_decay > 0:
            dP *= (1. / (1. + self.decay * self.iterations))
            self.updates.append(K.update_add(self.iterations, 1))

        shapes = [K.get_variable_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]

        flattenedmoments = [K.flatten(x) for x in moments]
        F = K.concatenate(flattenedmoments)

        self.weights = [self.iterations] + moments

        IGG=K.sum(G * G)
        IFF=K.sum(F * F)
        IGF=K.sum(G * F)
        dQ=-xi*dP*K.sqrt(IGG)
        lamda2= 0.5*K.sqrt((IFF*IGG-IGF*IGF)/(IGG*dP*dP-dQ*dQ))
        lamda1=(-2*lamda2*dQ+IGF)/IGG

        for p, g, m in zip(params, grads, moments):

            cond=K.greater(IFF,0.0)

            v = K.switch(cond, -((lamda1/(2*lamda2))*g)+((1/(2*lamda2))*m), -dP * g)

            self.updates.append(K.update(m, v))

            new_p = p + v

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))

        return self.updates
    def get_updates_by_grads(self, grads, params):
        updates = []

        for g, p in zip(grads, params):
            new_p = p - self.lr * g
            updates.append(K.update(p, new_p))

        return updates
Exemplo n.º 20
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        shapes = [K.get_variable_shape(p) for p in params]
        accumulators = [K.zeros(shape) for shape in shapes]
        self.weights = accumulators
        self.updates = []

        for p, g, a in zip(params, grads, accumulators):
            new_a = a + K.square(g)  # update accumulator
            self.updates.append(K.update(a, new_a))
            new_p = p - get_learing_rate(p, self.lr) * g / (K.sqrt(new_a) + self.epsilon)
            # apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)
            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 21
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            m_t_hat = m_t / (1. - K.pow(self.beta_1, t))
            v_t_hat = v_t / (1. - K.pow(self.beta_2, t))

            p_dash = m_t_hat / (K.sqrt(v_t_hat + self.epsilon))

            if self.weight_decay > 0.:
                wd = self.weight_decay * p
                p_dash = p_dash + wd

            r1 = K.sqrt(K.sum(K.square(p)))
            r2 = K.sqrt(K.sum(K.square(p_dash)))

            r = r1 / r2
            eta = r * lr

            p_t = p - eta * p_dash

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 22
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            if self.minThres:
                pred = v_t > self.beta_3
                v_t = array_ops.where(pred, array_ops.zeros_like(v_t), v_t)

            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 23
0
    def inject(self, model):
        """Inject the Lookahead algorithm for the given model.
        The following code is modified from keras's _make_train_function method.
        See: https://github.com/keras-team/keras/blob/master/keras/engine/training.py#L497
        """
        if not hasattr(model, 'train_function'):
            raise RuntimeError('You must compile your model before using it.')

        model._check_trainable_weights_consistency()

        if model.train_function is None:
            inputs = (model._feed_inputs + model._feed_targets +
                      model._feed_sample_weights)
            if model._uses_dynamic_learning_phase():
                inputs += [K.learning_phase()]
            fast_params = model._collected_trainable_weights

            with K.name_scope('training'):
                with K.name_scope(model.optimizer.__class__.__name__):
                    training_updates = model.optimizer.get_updates(
                        params=fast_params, loss=model.total_loss)
                    slow_params = [K.variable(p) for p in fast_params]
                fast_updates = (model.updates + training_updates +
                                model.metrics_updates)

                slow_updates, copy_updates = [], []
                for p, q in zip(fast_params, slow_params):
                    slow_updates.append(K.update(q, q + self.alpha * (p - q)))
                    copy_updates.append(K.update(p, q))

                # Gets loss and metrics. Updates weights at each call.
                fast_train_function = K.function(inputs, [model.total_loss] +
                                                 model.metrics_tensors,
                                                 updates=fast_updates,
                                                 name='fast_train_function',
                                                 **model._function_kwargs)

                def F(inputs):
                    self.count += 1
                    R = fast_train_function(inputs)
                    if self.count % self.k == 0:
                        K.batch_get_value(slow_updates)
                        K.batch_get_value(copy_updates)
                    return R

                model.train_function = F
Exemplo n.º 24
0
    def get_updates(self, params, gparams):
        self.updates = [K.update_add(self.iterations, 1)]
        t = self.iterations + 1.
        lr_t = self.lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (
            1. - K.pow(self.beta_1, t))

        for p, g, m, v in zip(params, gparams, self.ms, self.vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            new_p = p_t
            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 25
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)  # change
        # var_list = tf.trainable_variables()
        # grads = self.compute_sanitized_gradients(loss, var_list)
        # sanitized_grads = []
        # for px_grad, v in zip(px_grads, var_list):

        #   # tensor_name = GetTensorOpName(v)
        #   #tensorname
        #   #tensor_name=tensor_name,
        #     sanitized_grad = self._sanitizer.sanitize(
        #         px_grad, self._eps_delta, sigma=self._sigma,
        #         add_noise=add_noise,
        #         num_examples=self._batches_per_lot * tf.slice(
        #           tf.shape(px_grad), [0], [1]))
        #     sanitized_grads.append(sanitized_grad)

        self.updates = [K.update_add(self.iterations, 1)]
        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))
        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + moments
        for p, g, m in zip(params, grads, moments):
            # g = sanitize(g,self._eps_delta, sigma=self._sigma,
            #     add_noise=add_noise,
            #     num_examples=self._batches_per_lot * tf.slice(
            #       tf.shape(px_grad), [0], [1]))

            v = self.momentum * m - lr * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + self.momentum * v - lr * g
            else:
                new_p = p + v

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 26
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = K.cast(self.iterations, K.floatx()) + 1

        # Due to the recommendations in [2], i.e. warming momentum schedule
        momentum_cache_t = self.beta_1 * (
            1. - 0.5 *
            (K.pow(K.cast_to_floatx(0.96), t * self.schedule_decay)))
        momentum_cache_t_1 = self.beta_1 * (
            1. - 0.5 * (K.pow(K.cast_to_floatx(0.96),
                              (t + 1) * self.schedule_decay)))
        m_schedule_new = self.m_schedule * momentum_cache_t
        m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
        self.updates.append((self.m_schedule, m_schedule_new))

        shapes = [K.int_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]

        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            # the following equations given in [1]
            g_prime = g / (1. - m_schedule_new)
            m_t = self.beta_1 * m + (1. - self.beta_1) * g
            m_t_prime = m_t / (1. - m_schedule_next)
            v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g)
            v_t_prime = v_t / (1. - K.pow(self.beta_2, t))
            m_t_bar = (1. - momentum_cache_t) * g_prime + (momentum_cache_t_1 *
                                                           m_t_prime)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon)
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 27
0
    def call(self, inputs, training=None):
        x = inputs
        assert not isinstance(x, list)

        # Compute the minibatch statistics
        mean, var = self._moments(x)
        sigma = K.sqrt(var + self.epsilon)

        # If in training phase set rmax, dmax large so that we use the moving
        # averages to do the normalization
        rmax = K.in_train_phase(self.rmax, K.constant(1e5), training)
        dmax = K.in_train_phase(self.dmax, K.constant(1e5), training)

        # Compute the corrections based on rmax, dmax
        r = K.stop_gradient(
            self._clip(sigma / self.moving_sigma, 1. / rmax, rmax))
        d = K.stop_gradient(
            self._clip((mean - self.moving_mean) / self.moving_sigma, -dmax,
                       dmax))

        # Actually do the normalization and the rescaling
        xnorm = ((x - mean) / sigma) * r + d
        y = self.gamma * xnorm + self.beta

        # Add the moving average updates
        self.add_update([
            K.moving_average_update(self.moving_mean, mean, self.momentum),
            K.moving_average_update(self.moving_sigma, sigma, self.momentum)
        ], x)

        # Add the r, d updates
        rmax_prog = K.minimum(1., self.steps / self.rmax_dur)
        dmax_prog = K.minimum(1., self.steps / self.dmax_dur)
        self.add_update([
            K.update_add(self.steps, 1),
            K.update(self.rmax,
                     self.rmax_0 + rmax_prog * (self.rmax_inf - self.rmax_0)),
            K.update(self.dmax,
                     self.dmax_0 + dmax_prog * (self.dmax_inf - self.dmax_0))
        ])

        # Fix the output's uses learning phase
        y._uses_learning_phase = rmax._uses_learning_phase

        return y
Exemplo n.º 28
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)

        self.updates = [K.update_add(self.iterations, 1)]

        completed_updates = K.cast(
            tf.floordiv(self.iterations, self.accum_iters), K.floatx())
        t = completed_updates + 1

        update_switch = K.equal((self.iterations + 1) % self.accum_iters, 0)
        update_switch = K.cast(update_switch, K.floatx())

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        gs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]

        self.weights = [self.iterations] + ms + vs

        for p, g, m, v, tg in zip(params, grads, ms, vs, gs):
            sum_grad = tg + g
            avg_grad = sum_grad / self.accum_iters_float

            m_t = m * self.beta_1 + (1. - self.beta_1) * avg_grad
            v_t = v * self.beta_2 + (1. - self.beta_2) * K.square(avg_grad)

            m_hat = m_t / (1. - K.pow(self.beta_1, t))
            v_hat = v_t / (1. - K.pow(self.beta_2, t))

            u = m_hat / (K.sqrt(v_hat) + self.epsilon) + self.weight_decay * p

            r = K.sqrt(K.sum(K.square(p))) / K.sqrt(K.sum(K.square(u)))

            p_t = p - self.lr * r * u

            self.updates.append(
                K.update(m, (1 - update_switch) * m + update_switch * m_t))
            self.updates.append(
                K.update(v, (1 - update_switch) * v + update_switch * v_t))
            self.updates.append(K.update(tg, (1 - update_switch) * sum_grad))

            self.updates.append(
                K.update(p, (1 - update_switch) * p + update_switch * p_t))

            return self.updates
Exemplo n.º 29
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. /
                   (1. +
                    self.decay * K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                denom = (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                denom = (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            # Partial momentum adaption
            new_p = p - (lr_t * (m_t / (denom**(self.partial * 2))))

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))

        return self.updates
Exemplo n.º 30
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr / (1. - K.pow(self.beta_1, t))

        shapes = [K.int_shape(p) for p in params]
        # zero init of 1st moment
        ms = [K.zeros(shape) for shape in shapes]
        # zero init of exponentially weighted infinity norm
        us = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + ms + us

        for p, g, m, u in zip(params, grads, ms, us):
            # Update lr
            new_lr_t = lr_t
            if self.lr_multipliers is not None:
                matched_layer = [
                    x for x in self.lr_multipliers.keys() if x in p.name]
                if matched_layer:
                    new_lr_t = lr_t * self.lr_multipliers[matched_layer[0]]

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            u_t = K.maximum(self.beta_2 * u, K.abs(g))
            p_t = p - new_lr_t * m_t / (u_t + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(u, u_t))
            new_p = p_t

            # Weight_decay
            new_p -= new_lr_t * self.wd * p

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 31
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        new_iter_op = tf.assign_add(self.iterations, 1)
        self.updates = []

        lr = self.lr
        with tf.control_dependencies([new_iter_op]):
            if self.initial_decay > 0:
                lr *= (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

            accum_switch = K.cast(K.equal(self.iterations % self.accum_iters,
                                          0),
                                  dtype=K.floatx())
            t = K.cast(self.iterations // self.accum_iters, K.floatx()) + 1
        accum_iters = K.cast(self.accum_iters, dtype=K.floatx())

        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        gs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, gp, m, v, ga in zip(params, grads, ms, vs, gs):
            g = (ga + gp) / accum_iters
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(
                K.update(m, (1 - accum_switch) * m + accum_switch * m_t))
            self.updates.append(
                K.update(v, (1 - accum_switch) * v + accum_switch * v_t))
            self.updates.append(K.update(ga, (1 - accum_switch) * (ga + gp)))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(
                K.update(p, (1 - accum_switch) * p + accum_switch * new_p))
        return self.updates
Exemplo n.º 32
0
    def call(self, inputs, **kwargs):
        # inputs.shape=[None, input_num_capsule, input_dim_vector]
        # Expand dims to [None, input_num_capsule, 1, 1, input_dim_vector]
        inputs_expand = K.expand_dims(K.expand_dims(inputs, 2), 2)

        # Replicate num_capsule dimension to prepare being multiplied by W
        # Now it has shape = [None, input_num_capsule, num_capsule, 1, input_dim_vector]
        inputs_tiled = K.tile(inputs_expand, [1, 1, self.num_capsule, 1, 1])
        """  
        # Compute inputs * W by expanding the first dim of W. More time-consuming and need batch_size.
        # Prepare the dimension of W
        # Now W has shape  = [batch_size, input_num_capsule, num_capsule, input_dim_vector, dim_vector]
        w_tiled = K.tile(K.expand_dims(self.W, 0), [self.batch_size, 1, 1, 1, 1])
        
        # Transformed vectors, shape = [batch_size, input_num_capsule, num_capsule, 1, dim_vector]
        inputs_hat = K.batch_dot(inputs_tiled, w_tiled, [4, 3])
        """
        # Compute `inputs * W` by scanning inputs_tiled on dimension 0.
        # This is faster but requires Tensorflow.
        # shape = [None, input_num_capsule, num_capsule, 1, dim_vector]
        inputs_hat = tf.scan(lambda ac, x: K.batch_dot(x, self.W, [3, 2]),
                             elems=inputs_tiled,
                             initializer=K.zeros([
                                 self.input_num_capsule, self.num_capsule, 1,
                                 self.dim_vector
                             ]))
        """
        # Routing algorithm V1. Use tf.while_loop in a dynamic way.
        def body(i, b, outputs):
            c = K.softmax(b)
            c_expand = K.expand_dims(K.expand_dims(K.expand_dims(c, 2), 2), 0)
            outputs = K.sum(c_expand * inputs_hat, 1, keepdims=True)
            outputs = squash(outputs)
            b = b + K.sum(inputs_hat * outputs, [0, -2, -1])
            return [i-1, b, outputs]

        cond = lambda i, b, inputs_hat: i > 0
        loop_vars = [K.constant(self.num_routing), self.bias, K.sum(inputs_hat, 1, keepdims=True)]
        _, self.bias, outputs = tf.while_loop(cond, body, loop_vars)"""

        # Routing algorithm V2. Use for iteration. V2 and V1 both work without much difference on performance
        for _ in range(self.num_routing):
            c = K.softmax(self.bias)
            c_expand = K.expand_dims(K.expand_dims(K.expand_dims(c, 2), 2), 0)
            outputs = K.sum(c_expand * inputs_hat, 1, keepdims=True)
            outputs = squash(outputs)
            self.bias = K.update(
                self.bias,
                self.bias + K.sum(inputs_hat * outputs, [0, -2, -1]))

        # Handling with no routing scenario. Prior bias will always be zero.
        if self.num_routing == 0:
            c = K.softmax(self.bias)
            c_expand = K.expand_dims(K.expand_dims(K.expand_dims(c, 2), 2), 0)
            outputs = squash(K.sum(c_expand * inputs_hat, 1, keepdims=True))

        return K.reshape(outputs, [-1, self.num_capsule, self.dim_vector])
Exemplo n.º 33
0
 def call(self, x, mask=None):
     batch_count = K.cast(K.prod(K.shape(x)[:2]), K.floatx())
     batch_mean = K.mean(x, axis=(0, 1))
     batch_var = K.var(x, axis=(0, 1))
     total_count = self._count + batch_count
     # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
     delta = batch_mean - self._mean
     m_a = self._var * self._count
     m_b = batch_var * batch_count
     M2 = m_a + m_b + K.square(delta) * self._count * batch_count / total_count
     # add updates to the graph
     self.add_update([
         K.update(self._mean, self._mean + delta * batch_count / total_count),
         K.update(self._var, M2 / total_count),
         K.update(self._count, total_count)
     ])
     # dummy addition to suppress Keras warning
     return x + 0
        def conditional_update(cond, variable, new_value):
            '''Helper function to create updates that only happen when cond is True. Writes to
            self.updates and returns the new variable.

            Note: K.update(x, x) is cheap, but K.update_add(x, K.zeros_like(x)) can be expensive.
            '''
            maybe_new_value = K.switch(cond, new_value, variable)
            self.updates.append(K.update(variable, maybe_new_value))
            return maybe_new_value
Exemplo n.º 35
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        accumulators = [
            K.variable(value=K.get_value(p), dtype='float32') for p in params
        ]
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        mu = self.mu
        c = self.c
        l1 = c * K.pow(lr, 0.5 + mu) * K.pow(
            K.cast(self.iterations, K.floatx()) + 1, mu)
        for p, g, a in zip(params, grads, accumulators):
            new_a = a - lr * g
            self.updates.append(K.update(a, new_a))
            new_p = K.softthreshold(new_a, l1)
            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 36
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        self.updates = []

        lr = self.lr
        print("lr", K.get_value(lr))
        # momentum
        shapes = [K.get_variable_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + moments
        for p, g, m in zip(params, grads, moments):
            #v = self.momentum * m - lr * g  # velocity
            v = -1. * lr * g
            #print (K.get_value(g))
            self.updates.append(K.update(m, v))
            new_p = p + v
            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 37
0
def merge_updates(updates):
    """Average repeated updates of the same variable"""
    merged_updates = {}
    for update in updates:
        variable, value = unpack_assignment(update)
        key = variable_key(variable)
        if key not in merged_updates:
            merged_updates[key] = [variable, []]
        merged_updates[key][1].append(value)
    ret = []
    for k, v in iteritems(merged_updates):
        variable = v[0]
        values = v[1]
        n = len(values)
        if n == 1:
            ret.append(K.update(variable, value[0]))
        else:
            ret.append(K.update(variable, sum(values) / n))
    return ret
Exemplo n.º 38
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = self.iterations + 1

        # Due to the recommendations in [2], i.e. warming momentum schedule
        momentum_cache_t = self.beta_1 * (1. - 0.5 * (K.pow(0.96, t * self.schedule_decay)))
        momentum_cache_t_1 = self.beta_1 * (1. - 0.5 * (K.pow(0.96, (t + 1) * self.schedule_decay)))
        m_schedule_new = self.m_schedule * momentum_cache_t
        m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
        self.updates.append((self.m_schedule, m_schedule_new))

        shapes = [K.get_variable_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]

        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            # the following equations given in [1]
            g_prime = g / (1. - m_schedule_new)
            m_t = self.beta_1 * m + (1. - self.beta_1) * g
            m_t_prime = m_t / (1. - m_schedule_next)
            v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g)
            v_t_prime = v_t / (1. - K.pow(self.beta_2, t))
            m_t_bar = (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            p_t = p - get_learing_rate(p, self.lr) * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon)
            new_p = p_t

            # apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)
            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 39
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)

        self.updates = [K.update_add(self.iterations, 1)]
        t = self.iterations + 1

        loss_prev = K.variable(0)
        shapes = [K.get_variable_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]

        ch_fact_lbound = K.switch(K.greater(loss, loss_prev), 1+self.thl, 1/(1+self.thu))
        ch_fact_ubound = K.switch(K.greater(loss, loss_prev), 1+self.thu, 1/(1+self.thl))
        loss_ch_fact = loss / loss_prev
        loss_ch_fact = K.switch(K.lesser(loss_ch_fact, ch_fact_lbound), ch_fact_lbound, loss_ch_fact)
        loss_ch_fact = K.switch(K.greater(loss_ch_fact, ch_fact_ubound), ch_fact_ubound, loss_ch_fact)
        loss_hat = K.switch(K.greater(t, 1), loss_prev * loss_ch_fact, loss)

        d_den = K.switch(K.greater(loss_hat, loss_prev), loss_prev, loss_hat)
        d_t = (self.beta_3 * self.d) + (1. - self.beta_3) * K.abs((loss_hat - loss_prev) / d_den)
        d_t = K.switch(K.greater(t, 1), d_t, 1.)
        self.updates.append(K.update(self.d, d_t))

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            mhat_t = m_t / (1. - K.pow(self.beta_1, t))
            self.updates.append(K.update(m, m_t))

            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            vhat_t = v_t / (1. - K.pow(self.beta_2, t))
            self.updates.append(K.update(v, v_t))

            p_t = p - (self.lr / (1. + (self.iterations * self.decay))) * mhat_t / ((K.sqrt(vhat_t) * d_t) + self.epsilon)
            self.updates.append(K.update(p, p_t))

        self.updates.append(K.update(loss_prev, loss_hat))
        return self.updates
Exemplo n.º 40
0
    def WGAN_train(self, loss_function, D_lr, G_lr, clamp, lamda):
        assert loss_function=='gradient_penalty' or loss_function=='clip'

        x_real = Input(shape=self.image_size)
        fake_vectors = Input(shape=(self.random_vector_size,))
        x_fake = self.generator(fake_vectors)
        loss_real = K.mean(self.discriminator(x_real))
        loss_fake = K.mean(self.discriminator(x_fake))

        # loss for generator
        loss = -loss_fake
        training_updates = RMSprop(lr=G_lr).get_updates(loss, self.generator.trainable_weights)
        G_train = K.function([fake_vectors], [loss], training_updates)

        # clip step
        if loss_function == 'clip':
            # loss for discriminator
            loss = loss_fake - loss_real
            training_updates = RMSprop(lr=D_lr).get_updates(loss, self.discriminator.trainable_weights)
            D_train = K.function([x_real, fake_vectors], [loss_real, loss_fake], training_updates)

            # clip
            clamp_lower, clamp_upper = clamp * -1., clamp
            weights_clip = [K.update(x, K.clip(x, clamp_lower, clamp_upper)) for x in self.discriminator.trainable_weights]
            D_clamp = K.function([], [], weights_clip)

            return D_train, G_train, D_clamp

        # gradient penalty step
        else:
            # loss for discriminator
            e = K.placeholder(shape=(None, 1, 1, 1))
            x_mixed = Input(shape=self.image_size, tensor=e * x_real + (1 - e) * x_fake)
            x_mixed_gradient = K.gradients(self.discriminator(x_mixed), [x_mixed])[0]
            x_mixed_gradient_norm = K.sqrt(K.sum(K.square(x_mixed_gradient), axis=[1, 2, 3]))  # not norm in batch_size
            gradient_penalty = K.mean(K.square(x_mixed_gradient_norm - 1))
            loss = loss_fake - loss_real + lamda * gradient_penalty
            training_updates = RMSprop(lr=D_lr).get_updates(loss, self.discriminator.trainable_weights)
            D_train = K.function([x_real, fake_vectors, e], [loss_real, loss_fake], training_updates)

            return D_train, G_train, None
def unroll(updates, uupdates, depth):
    replace = {k: v for k, v in unpack_assignments(uupdates)}
    updates_t = unpack_assignments(updates)
    for i in range(depth):
        updates_t = [(k, clone_replace(v, replace)) for k, v in updates_t]
    return [K.update(a, b) for a, b in updates_t]