Пример #1
0
    def call(self, inputs, training=None):
        if not self.trainable:
            training = False
        else:
            # The learning phase flag is a bool tensor (0 = test, 1 = train)
            training = K.learning_phase()

        if training is not False:
            K.update_add(self.iterations, 1)
            # compute current mean&var
            mini_mean, mini_variance = tf.nn.moments(inputs, axes=[0,1,2])
            # affine the inputs
            x = (inputs - self.steps_mean) / K.sqrt(self.steps_variance + self.epsilon)
            x = self.gamma * x + self.beta
            # update the moving params
            K.moving_average_update(self.moving_mean, mini_mean, self.momentum)
            K.moving_average_update(self.moving_variance, mini_variance, self.momentum)
            # update the short-term params under specific condition
            cond = K.equal(self.iterations % self.steps_per_update, 0)
            K.switch(cond, lambda: self.steps_mean*0, K.update_add(self.steps_mean, mini_mean))
            K.switch(cond, lambda: self.steps_variance*0, K.update_add(self.steps_variance, mini_mean))
        else:
            # affine
            scale = self.gamma / K.sqrt(self.moving_variance + self.epsilon)
            x = inputs * scale + (self.beta - self.moving_mean * scale)
        return x
    def __call__(self, y_true, y_pred):
        """Computes the number of true positives in a batch.

        # Arguments
            y_true: Tensor, batch_wise labels
            y_pred: Tensor, batch_wise predictions

        # Returns
            The total number of true positives seen this epoch at the
                completion of the batch.
        """
        y_true = K.cast(y_true, 'int32')
        y_pred = K.cast(K.round(y_pred), 'int32')

        # False positives calculations
        false_pos = K.cast(K.sum(K.cast(K.greater(y_true, y_pred) ,'int32')), 'int32')
        current_false_pos = self.false_positives * 1
        self.add_update(K.update_add(self.false_positives,
                                     false_pos),
                        inputs=[y_true, y_pred])

        # True positive  calculations
        correct_preds = K.cast(K.equal(y_pred, y_true), 'int32')
        true_pos = K.cast(K.sum(correct_preds * y_true), 'int32')
        current_true_pos = self.true_positives * 1
        self.add_update(K.update_add(self.true_positives,
                                     true_pos),
                        inputs=[y_true, y_pred])
        # Combine
        precision = (K.cast(self.true_positives, 'float32') / (K.cast(self.true_positives, 'float32') + K.cast(self.false_positives, 'float32') + K.cast(K.epsilon(), 'float32')))
        self.add_update(K.update(self.precision,
                                     precision),
                        inputs=[y_true, y_pred])
        
        return precision        
Пример #3
0
    def __call__(self, y_true, y_pred):
        """Computes the kappa in a batch.
        # Arguments
            y_true: Tensor, batch_wise labels
            y_pred: Tensor, batch_wise predictions
        # Returns
            The kappa seen this epoch at the completion of the batch.
        """
        y_true = K.cast(y_true, 'int32')
        y_pred = K.cast(K.round(y_pred), 'int32')
        true_pos = K.cast(K.sum(y_pred * y_true), 'int32')
        true_neg = K.cast(K.sum((1 - y_pred) * (1 - y_true)), 'int32')
        false_pos = K.cast(K.sum(y_pred * (1 - y_true)), 'int32')
        false_neg = K.cast(K.sum((1 - y_pred) * y_true), 'int32')

        self.add_update(K.update_add(self.true_positives, true_pos),
                        inputs=[y_true, y_pred])
        self.add_update(K.update_add(self.true_negative, true_neg),
                        inputs=[y_true, y_pred])
        self.add_update(K.update_add(self.false_positives, false_pos),
                        inputs=[y_true, y_pred])
        self.add_update(K.update_add(self.false_negative, false_neg),
                        inputs=[y_true, y_pred])

        true_pos = K.cast(self.true_positives * 1, "float32")
        true_neg = K.cast(self.true_negative * 1, "float32")
        false_pos = K.cast(self.false_positives * 1, "float32")
        false_neg = K.cast(self.false_negative * 1, "float32")

        sm = true_pos + true_neg + false_pos + false_neg
        obs_agree = (true_pos + true_neg) / sm
        poss_pos = (true_pos + false_neg) * (true_pos + false_pos) / (sm**2)
        poss_neg = (true_neg + false_neg) * (true_neg + false_pos) / (sm**2)
        poss_agree = poss_pos + poss_neg
        return (obs_agree - poss_agree) / (1 - poss_agree + K.epsilon())
Пример #4
0
    def get_updates(self, loss, params):
        with K.name_scope(self.__class__.__name__):
            self.slow_weights = [K.variable(p) for p in params]

        opt_updates = self.opt.get_updates(loss, params)
        update_dict = {
            t.op.inputs[0].name: t.op.inputs[1]
            for t in opt_updates
        }
        p_names = [p.name for p in params]
        other_ops = [
            t for t in opt_updates if t.op.inputs[0].name not in p_names
        ]

        self.updates = [K.update_add(self.iterations, 1)]
        self.updates += other_ops

        with tf.control_dependencies([self.updates[0]]):
            condition = K.equal(self.iterations % self.k, 0)
            for fast_w, slow_w in zip(params, self.slow_weights):
                self.updates.append(
                    K.switch(
                        condition,
                        lambda: K.update(
                            fast_w,
                            K.update_add(
                                slow_w,
                                (K.update(fast_w, update_dict[fast_w.name]) -
                                 slow_w) * self.alpha,
                            ),
                        ),
                        lambda: K.update(fast_w, update_dict[fast_w.name]),
                    ))
        self.weights = self.opt.weights + self.slow_weights
        return self.updates
Пример #5
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        self.updates.append(K.update_add(self.t_cur, 1))

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]

        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        total_iterations = self.total_iterations
        # Cosine annealing
        if self.use_cosine_annealing and total_iterations != 0:
            self.eta_t = _compute_eta_t(self)
        self.lr_t = lr_t * self.eta_t  # for external tracking
        lr_t_premult = lr_t

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            # Learning rate multipliers
            if self.lr_multipliers is not None:
                lr_t = _apply_lr_multiplier(self, lr_t_premult, p)

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            # Weight decays
            if p.name in self.weight_decays.keys() and total_iterations != 0:
                p_t = _apply_weight_decays(self, p, p_t)
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))

        self._init_notified = True
        return self.updates
Пример #6
0
    def adjust(self):
        dtype = self.embedding.dtype

        unchanged_mask = bk.cast(0 == self.update_cnt, dtype)
        paved_embedding = self._pave_embedding(self.embedding) * unchanged_mask
        unpaved_embedding = self.embedding * unchanged_mask
        bk.update_add(self.embedding, (1 - self.pave_momentum) *
                      (paved_embedding - unpaved_embedding))

        inv_seg_scale = (self.cur_max - self.cur_min) / self.seg_num
        x = bk.expand_dims(bk.arange(0, self.seg_num, dtype=dtype),
                           axis=-1) * inv_seg_scale + (
                               self.cur_min + self.val_epsilon * inv_seg_scale)
        x = bk.concatenate(
            [x, x + (1. - 2 * self.val_epsilon) * inv_seg_scale], axis=0)
        y = bk.transpose(
            bk.reshape(
                self.embedding,
                (self.input_dim * self._target_dim, -1)))[int(self.mask_zero):]
        y = bk.concatenate([y, y], axis=0)
        seg_indices = self._calc_seg_indices(x, self.moving_min,
                                             self.moving_max)
        tmp_embedding, tmp_cnt = self._sum_seg_embeddings(seg_indices, y)

        bk.update(
            self.embedding,
            tmp_embedding / (tmp_cnt + bk.cast(0 == tmp_cnt, dtype=dtype)))
        unmatched_mask = bk.cast(0 == self.embedding, dtype)
        bk.update_add(self.embedding,
                      self._pave_embedding(self.embedding) * unmatched_mask)

        bk.update(self.update_cnt, bk.zeros_like(self.update_cnt))
        bk.update(self.cur_min, self.moving_min)
        bk.update(self.cur_max, self.moving_max)
Пример #7
0
    def call(self, inputs, training=None):
        if training is None:
            training = bk.learning_phase()
        training = bk.get_value(training)

        indices, outputs = [[] for i in range(self.phase)], []
        for i in range(self.phase):
            trainable = self.embed_trainable_list[i]
            cur_outputs = []
            for j, feats_idx in enumerate(self.feat_idx_list[i]):
                inds = self._calc_indices(
                    bk.transpose(bk.gather(bk.transpose(inputs), feats_idx)),
                    self.min_val_list[i][j], self.max_val_list[i][j],
                    self.seg_num_list[i][j], self.seg_num_mul_list[i][j])
                if trainable:
                    indices[i].append(inds)
                if self.index_learnable:
                    cur_outputs.append(make_gather_in_flow()(
                        self.embedding_list[i][j], inds))
                else:
                    cur_outputs.append(
                        bk.gather(self.embedding_list[i][j],
                                  bk.cast(inds, 'int32')))
            outputs.append(
                bk.concatenate(cur_outputs
                               ) if len(cur_outputs) > 1 else cur_outputs[0])

        if training:
            bk.update(self.call_cnt, self.call_cnt + 1)

            for i in range(self.phase):
                for j, inds in enumerate(indices[i]):
                    update_cnt = self.update_cnt_list[i][j]
                    inds = bk.cast(inds, 'int64')
                    if self.unique_supported:
                        ind, _, cnts = tf.unique_with_counts(bk.flatten(inds))
                        tmp_cnt = tf.sparse.to_dense(
                            tf.sparse.reorder(
                                tf.SparseTensor(
                                    bk.expand_dims(ind, -1),
                                    bk.cast(cnts, update_cnt.dtype),
                                    update_cnt.shape)))
                    else:
                        tmp_cnt = bk.map_fn(lambda ele: bk.sum(
                            bk.cast(ele == inds, inputs.dtype)),
                                            bk.arange(0,
                                                      update_cnt.shape[0],
                                                      dtype=inds.dtype),
                                            dtype=inputs.dtype)
                    bk.update_add(update_cnt, tmp_cnt)

            if self.period is not None and self.call_cnt % self.period == 0:
                self.adjust()

        return outputs
Пример #8
0
    def __call__(self, y_true, y_pred):
        y_true, y_pred = _sanitize(y_true, y_pred, self.threshold)
        true_pos = _tp(y_true, y_pred)
        false_neg = _fn(y_true, y_pred)

        self.add_update(K.update_add(self.tp, true_pos),
                        inputs=[y_true, y_pred])
        self.add_update(K.update_add(self.fn, false_neg),
                        inputs=[y_true, y_pred])

        return self.fn / (self.fn + self.tp + self.eps)
Пример #9
0
        def __call__(self, y_true, y_pred):
            total = self.total + K.shape(y_true)[0]
            correct = self.correct + K.sum(K.equal(y_true, K.round(y_pred)))

            self.add_update(K.update_add(self.total,
                                         K.shape(y_true)[0]),
                            inputs=[y_true, y_pred])
            self.add_update(K.update_add(
                self.correct, K.sum(K.equal(y_true, K.round(y_pred)))),
                            inputs=[y_true, y_pred])

            return correct / total
Пример #10
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        self.updates.append(K.update_add(self.t_cur, 1))

        lr = self.learning_rate
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))
        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [
            K.zeros(shape, name='moment_' + str(i))
            for (i, shape) in enumerate(shapes)
        ]
        self.weights = [self.iterations] + moments

        total_iterations = self.total_iterations
        # Cosine annealing
        if self.use_cosine_annealing and total_iterations != 0:
            self.eta_t = _compute_eta_t(self)
        self.lr_t = lr * self.eta_t  # for external tracking

        for p, g, m in zip(params, grads, moments):
            # Learning rate multipliers
            lr_t = self.learning_rate
            if self.lr_multipliers is not None:
                lr_t = _apply_lr_multiplier(self, lr_t, p)

            v = self.momentum * m - self.eta_t * lr_t * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                p_t = p + self.momentum * v - self.eta_t * lr_t * g
            else:
                p_t = p + v

            # Weight decays
            if p.name in self.weight_decays.keys() and total_iterations != 0:
                p_t = _apply_weight_decays(self, p, p_t)
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))

        self._init_notified = True
        return self.updates
Пример #11
0
 def get_updates(self, loss, params):
     self.updates = [
         K.update_add(self.iterations, 1),
         K.update_add(self.optimizer.iterations, K.cast(self.cond, 'int64')),
     ]
     # (gradient accumulation)
     self.accum_grads = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
     grads = self.get_gradients(loss, params)
     for g, ag in zip(grads, self.accum_grads):
         self.updates.append(K.update(ag, K.switch(self.cond, g, ag + g)))
     # optimizer (inheriting updates of original optimizer)
     self.updates.extend(self.optimizer.get_updates(loss, params)[1:])
     self.weights.extend(self.optimizer.weights)
     return self.updates
Пример #12
0
    def adjust(self):
        for i in range(self.phase):
            if self.embed_trainable_list[i]:
                for j, embedding in enumerate(self.embedding_list[i]):
                    update_cnt = self.update_cnt_list[i][j]
                    unchanged_mask = bk.expand_dims(
                        bk.cast(0 == update_cnt, embedding.dtype))
                    paved_embedding = self._pave_embedding(
                        embedding,
                        self.seg_num_list[i][j][-1]) * unchanged_mask
                    unpaved_embedding = embedding * unchanged_mask

                    bk.update_add(embedding, (1 - self.pave_momentum) *
                                  (paved_embedding - unpaved_embedding))
                    bk.update(update_cnt, bk.zeros_like(update_cnt))
Пример #13
0
    def call(self, inputs):

        S = K.dot(inputs, self.kernel)
        if self.use_bias:
            S = K.bias_add(S, self.bias)
        if self.activation is not None:
            output = self.activation(S)

        delta = (output - self.gamma * S)
        K.update_add(self.kernel,
                     2 * self.lr * K.dot(K.transpose(inputs), delta))
        if self.use_bias:
            K.update_add(self.bias, 2 * self.lr * K.mean(delta, axis=0))

        return output
Пример #14
0
    def get_updates(self, loss, params):
        tower_gradvars = []
        gdev_list = self._gdev_list

        global_scope = tf.get_variable_scope()
        for idev, device in enumerate(gdev_list):
            with tf.device(device), \
                    tf.variable_scope(global_scope, reuse=idev > 0), \
                    tf.name_scope('tower_%i' % idev):
                grads = self.optimizer.compute_gradients(loss, params)

            gradvars = zip(grads, params)
            tower_gradvars.append(gradvars)

        tower_gradvars = all_avg_gradients(tower_gradvars,
                                           gdev_list,
                                           usenccl=False)

        self.updates = [K.update_add(self.iterations, 1)]

        for device_num, device in enumerate(gdev_list):
            with tf.device(device):
                gradvars = tower_gradvars[device_num]
                opt_update = self.optimizer.apply_gradients(
                    grads, global_step=self.iterations)
            self.updates.append(opt_update)

        return self.updates
Пример #15
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))
        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + moments
        for p, g, m in zip(params, grads, moments):
            v = self.momentum * m - lr * g  # velocity
            self.updates.append(K.update(m, v))
            noise = K.random_normal(shape=K.shape(p),
                                    stddev=self.gradient_noise_coefficient)
            # temperature_update =
            if self.nesterov:
                new_p = p + self.momentum * v - lr * g + lr * noise
            else:
                new_p = p + v + lr * noise

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Пример #16
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]

        self.weights = [self.iterations] + ms

        for p, g, m in zip(params, grads, ms):
            m_t = (self.beta * m) + (1. - self.beta) * g

            p_t = p - lr * m_t

            self.updates.append(K.update(m, m_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Пример #17
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr / (1. - K.pow(self.beta_1, t))

        shapes = [K.int_shape(p) for p in params]
        # zero init of 1st moment
        ms = [K.zeros(shape) for shape in shapes]
        # zero init of exponentially weighted infinity norm
        us = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + ms + us

        for p, g, m, u in zip(params, grads, ms, us):

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            u_t = K.maximum(self.beta_2 * u, K.abs(g))
            p_t = p - lr_t * m_t / (u_t + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(u, u_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Пример #18
0
 def get_updates(self, loss, params):
     grads = self.optimizer.compute_gradients(loss, params)
     self.updates = [K.update_add(self.iterations, 1)]
     opt_update = self.optimizer.apply_gradients(
         grads, global_step=self.iterations)
     self.updates.append(opt_update)
     return self.updates
Пример #19
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        accumulators = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params
        ]
        self.weights = accumulators
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        for p, g, a in zip(params, grads, accumulators):
            # update accumulator
            new_a = self.rho * a + (1. - self.rho) * K.square(g)
            self.updates.append(K.update(a, new_a))
            new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon)

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Пример #20
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = self.iterations + 1
        lr_t = self.lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))

        shapes = [K.get_variable_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            p_t = p - self.get_param_learning_rate_t(p,t,lr_t) * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            new_p = p_t
            # apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)
            self.updates.append(K.update(p, new_p))
        return self.updates
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * K.cast(self.iterations,
                                                  K.dtype(self.decay))))
        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + moments
        for p, g, m in zip(params, grads, moments):

            if p.name in self.lr_mult:
                multiplied_lr = lr * self.lr_mult[p.name]
            else:
                multiplied_lr = lr

            v = self.momentum * m - multiplied_lr * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + self.momentum * v - multiplied_lr * g
            else:
                new_p = p + v

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Пример #22
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * self.iterations))

        t = self.iterations + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        shapes = [K.get_variable_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            new_p = p_t
            # apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)
            self.updates.append(K.update(p, new_p))
        return self.updates
Пример #23
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * self.iterations))

        shapes = [K.get_variable_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]
        mems = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + ms + vs + mems

        for p, g, m, v, mem in zip(params, grads, ms, vs, mems):
            r = 1. / (1. + mem)
            m_t = (1. - r) * m + r * g
            v_t = (1. - r) * v + r * K.square(g)
            denoise = K.square(m_t) / (v_t + self.epsilon)
            p_t = p - g * K.minimum(lr, denoise) / (K.sqrt(v_t) + self.epsilon)
            mem_t = 1. + mem * (1. - denoise)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            self.updates.append(K.update(mem, mem_t))

            new_p = p_t
            # apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)
            self.updates.append(K.update(p, new_p))
        return self.updates
Пример #24
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        lr = self.lr * (1. / (1. + self.decay * self.iterations))
        self.updates = [K.update_add(self.iterations, 1)]

        # momentum
        shapes = [K.get_variable_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + moments
        for p, g, m in zip(params, grads, moments):
            v = self.momentum * m - self.get_param_learning_rate(p, lr) * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + self.momentum * v - lr * g
            else:
                new_p = p + v

            # apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Пример #25
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = self.iterations + 1
        lr_t = self.lr / (1. - K.pow(self.beta_1, t))

        shapes = [K.get_variable_shape(p) for p in params]
        # zero init of 1st moment
        ms = [K.zeros(shape) for shape in shapes]
        # zero init of exponentially weighted infinity norm
        us = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + ms + us

        for p, g, m, u in zip(params, grads, ms, us):

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            u_t = K.maximum(self.beta_2 * u, K.abs(g))
            p_t = p - self.get_param_learning_rate_t(p,t,lr_t) * m_t / (u_t + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(u, u_t))

            new_p = p_t
            # apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)
            self.updates.append(K.update(p, new_p))
        return self.updates
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        # Applies bounds on actual learning rate
        step_size = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                          (1. - K.pow(self.beta_1, t)))

        final_lr = self.final_lr * lr / self.base_lr
        lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.))
        upper_bound = final_lr * (1. + 1. / (self.gamma * t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsbound:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            # apply weight decay
            if self.weight_decay != 0.:
                g += self.weight_decay * K.stop_gradient(p)

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            if self.amsbound:
                vhat_t = K.maximum(vhat, v_t)
                denom = (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                denom = (K.sqrt(v_t) + self.epsilon)

            # Compute the bounds
            step_size_p = step_size * K.ones_like(denom)
            step_size_p_bound = step_size_p / denom
            bounded_lr_t = m_t * K.minimum(K.maximum(step_size_p_bound,
                                                     lower_bound), upper_bound)

            p_t = p - bounded_lr_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Пример #27
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        shapes = [K.int_shape(p) for p in params]
        accumulators = [K.zeros(shape) for shape in shapes]
        delta_accumulators = [K.zeros(shape) for shape in shapes]
        self.weights = accumulators + delta_accumulators
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        for p, g, a, d_a in zip(params, grads, accumulators,
                                delta_accumulators):
            # update accumulator
            new_a = self.rho * a + (1. - self.rho) * K.square(g)
            self.updates.append(K.update(a, new_a))

            # use the new accumulator and the *old* delta_accumulator
            update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a +
                                                             self.epsilon)
            new_p = p - lr * update

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))

            # update delta_accumulator
            new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update)
            self.updates.append(K.update(d_a, new_d_a))
        return self.updates
Пример #28
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        wd = self.wd  # decoupled weight decay (3/4)

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * K.cast(self.iterations,
                                                  K.dtype(self.decay))))
        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + moments
        for p, g, m in zip(params, grads, moments):
            v = self.momentum * m - lr * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + self.momentum * v - lr * g  - lr * wd * p  # decoupled weight decay (4/4)
            else:
                new_p = p + v - lr * wd * p # decoupled weight decay

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Пример #29
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        wd = self.wd  # decoupled weight decay (3/4)

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * K.cast(self.iterations,
                                                  K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            # decoupled weight decay (4/4)
            p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) - lr * wd * p

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Пример #30
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        accumulators = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params
        ]
        self.weights = accumulators
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        for p, g, a in zip(params, grads, accumulators):
            # update accumulator
            new_a = self.rho * a + (1. - self.rho) * K.square(g)
            self.updates.append(K.update(a, new_a))
            new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon)

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            if p.name in self.clips.keys():
                if self.verbose > 0:
                    print("CLpping variable", p.name, " to ",
                          self.clips[p.name])
                c = K.eval(self.clips[p.name])
                new_p = K.clip(new_p, c[0], c[1])

            self.updates.append(K.update(p, new_p))
        return self.updates
        def get_updates(self, params, constraints, loss):
            grads = self.get_gradients(loss, params)
            self.updates = []

            lr = self.lr
            if self.inital_decay > 0:
                lr *= (1. / (1. + self.decay * self.iterations))**0.75
                self.updates.append(K.update_add(self.iterations, 1))

            # momentum
            shapes = [K.get_variable_shape(p) for p in params]
            moments = [K.zeros(shape) for shape in shapes]
            self.weights = [self.iterations] + moments
            for p, g, m in zip(params, grads, moments):
                v = self.momentum * m - lr * g  # velocity
                self.updates.append(K.update(m, v))

                if self.nesterov:
                    new_p = p + self.momentum * v - lr * g
                else:
                    new_p = p + v

                # apply constraints
                if p in constraints:
                    c = constraints[p]
                    new_p = c(new_p)

                self.updates.append(K.update(p, new_p))
            return self.updates
Пример #32
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        t = K.cast(self.iterations, K.floatx()) + 1
        epoch = (t // 98) + 1  # CIFAR10, CIFAR100
        # epoch = (t//118) + 1    #  MNIST
        beta2 = 1 - (self.gamma / epoch)
        alpha_t = lr / ((epoch + 2) * (epoch**(1 / 2)))
        momentum = epoch / (epoch + 2)

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]

        vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            v_t = (beta2 * v) + (1 - beta2) * K.square(g)
            v_hat = (K.sqrt(v_t)) + (self.delta / (epoch**(1 / 2)))
            m_t = momentum * m + alpha_t * g / v_hat

            p_t = p - m_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Пример #33
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        adam_lr = self.adam_lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))
            adam_lr = adam_lr * (1. / (1. + self.decay * K.cast(
                self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        adam_lr_t = adam_lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                               (1. - K.pow(self.beta_1, t)))

        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.ms = K.zeros(K.int_shape(params[0]), dtype=K.dtype(params[0]))
        self.vs = K.zeros(K.int_shape(params[0]), dtype=K.dtype(params[0]))
        self.weights = [self.iterations] + moments + vhats + [self.ms
                                                              ] + [self.vs]
        for i, (p, g, m, vhat) in enumerate(zip(params, grads, moments,
                                                vhats)):
            v = self.momentum * m - lr * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + self.momentum * v - lr * g
            else:
                new_p = p + v

            if i == 0 and self.e2efs_layer is not None:
                nnz = K.sum(K.cast(K.greater(p, 0.), K.floatx()))
                m_t = (self.beta_1 * self.ms) + (1. - self.beta_1) * g
                v_t = (self.beta_2 *
                       self.vs) + (1. - self.beta_2) * K.square(g)
                if self.amsgrad:
                    vhat_t = K.maximum(vhat, v_t)
                    p_t = p - adam_lr_t * m_t / (K.sqrt(vhat_t) + K.epsilon())
                    self.updates.append(K.update(vhat, vhat_t))
                else:
                    p_t = p - adam_lr_t * m_t / (K.sqrt(v_t) + K.epsilon())

                self.updates.append(K.update(self.ms, m_t))
                self.updates.append(K.update(self.vs, v_t))
                new_p = K.switch(K.less_equal(nnz, self.e2efs_layer.units),
                                 new_p, p_t)

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.learning_rate
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))
        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [
            K.zeros(shape, name='moment_' + str(i))
            for (i, shape) in enumerate(shapes)
        ]
        self.weights = [self.iterations] + moments
        for p, g, m in zip(params, grads, moments):
            v = self.momentum * m - lr * g  # velocity
            self.updates.append(K.update(m, v))

            new_p = p + v

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Пример #35
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. /
                   (1. +
                    self.decay * K.cast(self.iterations, K.dtype(self.decay))))
        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + moments
        for p, g, m in zip(params, grads, moments):

            if p.name in self.lr_mult:
                multiplied_lr = lr * self.lr_mult[p.name]
            else:
                multiplied_lr = lr

            v = self.momentum * m - multiplied_lr * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + self.momentum * v - multiplied_lr * g
            else:
                new_p = p + v

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Пример #36
0
        def __call__(self, y_true, y_pred):
            """Computes the number of true positives in a batch.

            # Arguments
                y_true: Tensor, batch_wise labels
                y_pred: Tensor, batch_wise predictions

            # Returns
                The total number of true positives seen this epoch at the
                    completion of the batch.
            """
            y_true = K.cast(y_true, 'int32')
            y_pred = K.cast(K.round(y_pred), 'int32')
            correct_preds = K.cast(K.equal(y_pred, y_true), 'int32')
            true_pos = K.cast(K.sum(correct_preds * y_true), 'int32')
            current_true_pos = self.true_positives * 1
            self.add_update(K.update_add(self.true_positives,
                                         true_pos),
                            inputs=[y_true, y_pred])
            return current_true_pos + true_pos
Пример #37
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = self.iterations + 1

        # Due to the recommendations in [2], i.e. warming momentum schedule
        momentum_cache_t = self.beta_1 * (1. - 0.5 * (K.pow(0.96, t * self.schedule_decay)))
        momentum_cache_t_1 = self.beta_1 * (1. - 0.5 * (K.pow(0.96, (t + 1) * self.schedule_decay)))
        m_schedule_new = self.m_schedule * momentum_cache_t
        m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
        self.updates.append((self.m_schedule, m_schedule_new))

        shapes = [K.get_variable_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]

        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            # the following equations given in [1]
            g_prime = g / (1. - m_schedule_new)
            m_t = self.beta_1 * m + (1. - self.beta_1) * g
            m_t_prime = m_t / (1. - m_schedule_next)
            v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g)
            v_t_prime = v_t / (1. - K.pow(self.beta_2, t))
            m_t_bar = (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            p_t = p - get_learing_rate(p, self.lr) * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon)
            new_p = p_t

            # apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)
            self.updates.append(K.update(p, new_p))
        return self.updates
Пример #38
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)

        self.updates = [K.update_add(self.iterations, 1)]
        t = self.iterations + 1

        loss_prev = K.variable(0)
        shapes = [K.get_variable_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]

        ch_fact_lbound = K.switch(K.greater(loss, loss_prev), 1+self.thl, 1/(1+self.thu))
        ch_fact_ubound = K.switch(K.greater(loss, loss_prev), 1+self.thu, 1/(1+self.thl))
        loss_ch_fact = loss / loss_prev
        loss_ch_fact = K.switch(K.lesser(loss_ch_fact, ch_fact_lbound), ch_fact_lbound, loss_ch_fact)
        loss_ch_fact = K.switch(K.greater(loss_ch_fact, ch_fact_ubound), ch_fact_ubound, loss_ch_fact)
        loss_hat = K.switch(K.greater(t, 1), loss_prev * loss_ch_fact, loss)

        d_den = K.switch(K.greater(loss_hat, loss_prev), loss_prev, loss_hat)
        d_t = (self.beta_3 * self.d) + (1. - self.beta_3) * K.abs((loss_hat - loss_prev) / d_den)
        d_t = K.switch(K.greater(t, 1), d_t, 1.)
        self.updates.append(K.update(self.d, d_t))

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            mhat_t = m_t / (1. - K.pow(self.beta_1, t))
            self.updates.append(K.update(m, m_t))

            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            vhat_t = v_t / (1. - K.pow(self.beta_2, t))
            self.updates.append(K.update(v, v_t))

            p_t = p - (self.lr / (1. + (self.iterations * self.decay))) * mhat_t / ((K.sqrt(vhat_t) * d_t) + self.epsilon)
            self.updates.append(K.update(p, p_t))

        self.updates.append(K.update(loss_prev, loss_hat))
        return self.updates