示例#1
0
 def call(self, inputs, reverse=False, ddi=False, **kwargs):
     logscale_factor = 3.
     x = inputs
     reduce_axis = list(range(K.ndim(inputs)))[:-1]
     if not reverse:
         log_scale = self.log_scale
         bias = self.bias
         if ddi:
             x_var = tf.reduce_mean(x**2, reduce_axis, keepdims=True)
             init_scale = tf.log(1. /
                                 (tf.sqrt(x_var) + 1e-6)) / logscale_factor
             init_bias = tf.reduce_mean(x, reduce_axis, keepdims=True)
             log_scale = K.switch(K.all(K.equal(self.log_scale, 0.)),
                                  init_scale, self.log_scale)
             bias = K.switch(K.all(K.equal(self.bias, 0.)), -init_bias,
                             self.bias)
             self.add_update(K.update_add(
                 self.log_scale,
                 K.switch(K.all(K.equal(self.log_scale, 0.)), init_scale,
                          K.zeros_like(init_scale))),
                             inputs=x)
             self.add_update(K.update_add(
                 self.bias,
                 K.switch(K.all(K.equal(self.bias, 0.)), -init_bias,
                          K.zeros_like(init_bias))),
                             inputs=x)
         return (x + bias) * K.exp(log_scale)
     else:
         return x / K.exp(self.log_scale) - self.bias
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):

            # Learning rate multipliers
            if self.multipliers:
                multiplier = [
                    mult for mult in self.multipliers if mult in p.name
                ]
            else:
                multiplier = None
            if multiplier:
                new_lr_t = lr_t * self.multipliers[multiplier[0]]
                if self.debug_verbose:
                    print('Setting {} to learning rate {}'.format(
                        multiplier[0], new_lr_t))
                    print(K.get_value(new_lr_t))
            else:
                new_lr_t = lr_t
                if self.debug_verbose:
                    print('No change in learning rate {}'.format(p.name))
                    print(K.get_value(new_lr_t))
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                p_t = p - new_lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                p_t = p - new_lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
示例#3
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        shapes = [K.int_shape(p) for p in params]
        prev_grads = [
            K.zeros(shape, name='prev_grad_' + str(i))
            for (i, shape) in enumerate(shapes)
        ]
        ds = [
            K.zeros(shape, name='d_' + str(i))
            for (i, shape) in enumerate(shapes)
        ]
        vs = [
            K.zeros(shape, name='v_' + str(i))
            for (i, shape) in enumerate(shapes)
        ]
        self.weights = [self.iterations] + ds + vs + prev_grads

        for p, g, pg, v, d in zip(params, grads, prev_grads, vs, ds):
            v_t = self.momentum * v - self.lr * g
            self.updates.append(K.update(v, v_t))

            d_t = self.momentum * d + (1 - self.momentum) * (g - pg)
            self.updates.append(K.update(d, d_t))
            self.updates.append(K.update(pg, g))

            new_p = p + v_t + self.kd * d_t
            self.updates.append(K.update(p, new_p))

        return self.updates
示例#4
0
    def get_updates(self, loss, params):
        self.updates = [
            K.update_add(self.iterations, 1),
            K.update_add(self.optimizer.iterations, K.constant(self.cond, "int64"))
        ]

        # accumulate gradients
        self.accum_grads = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        grads = self.get_gradients(loss, params)
        for g, ag in zip(grads, self.accum_grads):
            self.updates.append(K.update(ag, K.switch(self.cond, ag * 0, ag + g)))

        self.updates.extend(self.optimizer.get_updates()[1:])
        self.weights.extend(self.optimizer.weights)

        return self.updates
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        # Applies bounds on actual learning rate
        step_size = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                          (1. - K.pow(self.beta_1, t)))

        final_lr = self.final_lr * lr / self.base_lr
        lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.))
        upper_bound = final_lr * (1. + 1. / (self.gamma * t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsbound:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            # apply weight decay
            if self.weight_decay != 0.:
                g += self.weight_decay * K.stop_gradient(p)

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            if self.amsbound:
                vhat_t = K.maximum(vhat, v_t)
                denom = (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                denom = (K.sqrt(v_t) + self.epsilon)

            # Compute the bounds
            step_size_p = step_size * K.ones_like(denom)
            step_size_p_bound = step_size_p / denom
            bounded_lr_t = m_t * K.minimum(
                K.maximum(step_size_p_bound, lower_bound), upper_bound)

            p_t = p - bounded_lr_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
示例#6
0
 def get_updates(self, loss, params):
     assert params == self.predictions_keras_model.weights
     wave_function_jacobian_minus_mean = None
     if not (self.iterative_solver and self.compute_jvp_instead_of_full_jacobian):
         wave_function_jacobian_minus_mean = self.get_wave_function_jacobian_minus_mean()
     energy_grad = self.get_energy_grad(loss, wave_function_jacobian_minus_mean)
     flat_gradient = self.compute_wave_function_gradient_covariance_inverse_multiplication(
         energy_grad, wave_function_jacobian_minus_mean)
     self.updates = [K.update_add(self.iterations, 1)]
     self.updates += self.apply_complex_gradient(flat_gradient * (-1.0 + 0j))
     return self.updates
示例#7
0
文件: optimizers.py 项目: lmc00/TFG
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        '''Bias corrections according to the Adam paper
        '''
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):

            ####################################################
            # Add a lr multiplier for vars outside excluded_vars
            if p.name in self.excluded_vars:
                multiplied_lr_t = lr_t
            else:
                multiplied_lr_t = lr_t * self.lr_mult
            ###################################################

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            '''Schedule multiplier eta_t = 1 for simple AdamW
            According to the AdamW paper, eta_t can be fixed, decay, or 
            also be used for warm restarts (AdamWR to come). 
            '''
            eta_t = 1.
            p_t = p - eta_t * (multiplied_lr_t * m_t / (K.sqrt(v_t) + self.epsilon))
            if self.weight_decay != 0:
                '''Normalized weight decay according to the AdamW paper
                '''
                w_d = self.weight_decay * K.sqrt(self.batch_size / (self.samples_per_epoch * self.epochs))
                p_t = p_t - eta_t * (w_d * p)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
    def get_updates_Padam(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        base_lr = self._optimizer.learning_rate
        if self.initial_decay > 0:
            base_lr = base_lr * (1. / (1. + self.decay * K.cast(
                self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = base_lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                          (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            if self._get_multiplier(p) is None:
                multiplier = 1.0
            else:
                multiplier = self._get_multiplier(p)
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                denom = (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                denom = (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            # Partial momentum adaption.
            new_p = p - (lr_t * multiplier * (m_t /
                                              (denom**(self.partial * 2))))

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
示例#9
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        shapes = [K.int_shape(p) for p in params]
        prev_grads = [
            K.zeros(shape, name='prev_grad_' + str(i))
            for (i, shape) in enumerate(shapes)
        ]
        self.weights = [self.iterations] + prev_grads

        for p, g, pg in zip(params, grads, prev_grads):
            new_p = p - self.lr * g + self.kd * (g - pg)
            self.updates.append(K.update(pg, g))
            self.updates.append(K.update(p, new_p))

        return self.updates
示例#10
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        beta_1_t = K.pow(self.beta_1, t)
        beta_2_t = K.pow(self.beta_2, t)
        rho = 2 / (1 - self.beta_2) - 1
        rho_t = rho - 2 * t * beta_2_t / (1 - beta_2_t)
        r_t = K.sqrt(
            K.relu(rho_t - 4) * K.relu(rho_t - 2) * rho / ((rho - 4) *
                                                           (rho - 2) * rho_t))
        flag = K.cast(rho_t > 4, K.floatx())

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            mhat_t = m_t / (1 - beta_1_t)
            vhat_t = K.sqrt(v_t / (1 - beta_2_t))
            p_t = p - lr * mhat_t * (flag * r_t / (vhat_t + self.epsilon) +
                                     (1 - flag))

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
示例#11
0
 def __call__(self, *args, **kwargs):
     gs = tf.train.get_global_step()
     if gs is None:
         # if not set - create a variable
         self.global_step = K.variable(tf.zeros(shape=(), dtype=tf.int64),
                                       dtype=tf.int64,
                                       name="lr_global_step")
         tf.train.global_step(K.get_session(), self.global_step)
         gs = K.update_add(self.global_step,
                           1)  ###tf.train.get_global_step()
     else:
         self.global_step = gs
     assert (gs is not None)
     gstep = tf.cast(gs, dtype=tf.float32)
     lr_up = K.exp(self.step_accelerate_log * gstep) * self.min_lr
     lr_down = K.exp(self.step_deccelerate_log *
                     (gstep - self.step_max_lr)) * self.max_lr
     lr = K.switch(K.less(gs, self.step_max_lr), lr_up, lr_down)
     if self.tensorboardimage and not self.added_scalar_to_tensorboard:
         self.tensorboardimage.add_scalar("learning_rate", lr)
         self.added_scalar_to_tensorboard = True  # add once
     return lr
示例#12
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        wd = self.wd * self.wd_normalizer  # decoupled weight decay (4/6)

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. /
                       (1. + self.decay *
                        math_ops.cast(self.iterations, K.dtype(self.decay))))
        eta_t = lr / self.init_lr  # decoupled weight decay (5/6)

        with ops.control_dependencies(
            [state_ops.assign_add(self.iterations, 1)]):
            t = math_ops.cast(self.iterations, K.floatx())
        """Bias corrections according to the Adam paper."""
        lr_t = lr * (K.sqrt(1. - math_ops.pow(self.beta_2, t)) /
                     (1. - math_ops.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g)
            p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
            p_t -= eta_t * wd * p  # decoupled weight decay (6/6)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
示例#13
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        # decoupled weight decay (4/6)
        wd = self.wd

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * K.cast(self.iterations,
                                                  K.dtype(self.decay))))
        # decoupled weight decay (5/6)
        eta_t = lr / self.init_lr

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            # decoupled weight decay (6/6)
            p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) - eta_t * wd * p

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
示例#14
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = K.cast(self.iterations, K.floatx()) + 1

        lr = K.switch(
            t <= self.warmup_steps,
            self.lr * (t / self.warmup_steps),
            self.min_lr + (self.lr - self.min_lr) *
            (1.0 - K.minimum(t, self.decay_steps) / self.decay_steps),
        )

        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p), name='m_{}'.format(i))
            for i, p in enumerate(params)
        ]
        vs = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p), name='v_{}'.format(i))
            for i, p in enumerate(params)
        ]
        if self.amsgrad:
            vhats = [
                K.zeros(K.int_shape(p),
                        dtype=K.dtype(p),
                        name='vh_{}'.format(i)) for i, p in enumerate(params)
            ]
        else:
            vhats = [
                K.zeros(1, dtype=K.dtype(p), name='vh_{}'.format(i))
                for i, p in enumerate(params)
            ]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                p_t = m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                p_t = m_t / (K.sqrt(v_t) + self.epsilon)

            if self.initial_weight_decay > 0.0:
                if self.weight_decay_pattern is None:
                    p_t += self.weight_decay * p
                else:
                    for pattern in self.weight_decay_pattern:
                        if pattern in p.name:
                            p_t += self.weight_decay * p
                            break
            p_t = p - lr_t * p_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
示例#15
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            beta2_t = self.beta_2 ** t
            N_sma_max = 2 / (1 - self.beta_2) - 1
            N_sma = N_sma_max - 2 * t * beta2_t / (1 - beta2_t)

            # apply weight decay
            if self.weight_decay != 0.:
                p_wd = p - self.weight_decay * lr * p
            else:
                p_wd = None

            if p_wd is None:
                p_ = p
            else:
                p_ = p_wd

            def gt_path():
                step_size = lr * K.sqrt(
                    (1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max /
                    (N_sma_max - 2)) / (1 - self.beta_1 ** t)

                denom = K.sqrt(v_t) + self.epsilon
                p_t = p_ - step_size * (m_t / denom)

                return p_t

            def lt_path():
                step_size = lr / (1 - self.beta_1 ** t)
                p_t = p_ - step_size * m_t

                return p_t

            p_t = K.switch(N_sma > 5, gt_path, lt_path)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
示例#16
0
    def fit(self, V, verbose=1):
        """Train RBM with the data V.
        
        Parameters
        ----------
        V : 2d numpy array
            Visible data (batch size x input_dim).
        verbose : integer
            Verbose mode (default, 1).
        """
        num_step = V.shape[0] // self.hps['batch_size'] \
            if V.shape[0] % self.hps['batch_size'] == 0 else V.shape[0] // self.hps['batch_size'] + 1 # Exception processing?

        for k in range(self.hps['epochs']):
            if verbose == 1:
                print(k + 1, '/', self.hps['epochs'], ' epochs', end='\r')

            if self.mode == MODE_VISIBLE_BERNOULLI:
                # Contrastive divergence.
                v_pos = self.input_visible
                h_pos = self.transform
                v_neg = K.cast(K.less(
                    K.random_uniform(shape=(self.hps['batch_size'],
                                            V.shape[1])),
                    K.sigmoid(
                        K.dot(h_pos, K.transpose(self.rbm_weight)) +
                        self.visible_bias)),
                               dtype=np.float32)
                h_neg = K.sigmoid(
                    K.dot(v_neg, self.rbm_weight) + self.hidden_bias)
                update = K.transpose(K.transpose(K.dot(K.transpose(v_pos), h_pos)) \
                                     - K.dot(K.transpose(h_neg), v_neg))
                self.rbm_weight_update_func = K.function(
                    [self.input_visible],
                    [K.update_add(self.rbm_weight, self.hps['lr'] * update)])
                self.hidden_bias_update_func = K.function([self.input_visible]
                                                , [K.update_add(self.hidden_bias, self.hps['lr'] \
                                                * (K.sum(h_pos, axis=0) - K.sum(h_neg, axis=0)))])
                self.visible_bias_update_func = K.function([self.input_visible]
                                                , [K.update_add(self.visible_bias, self.hps['lr'] \
                                                * (K.sum(v_pos, axis=0) - K.sum(v_neg, axis=0)))])

                # Create the fist visible nodes sampling object.
                self.sample_first_visible = K.function([self.input_visible],
                                                       [v_neg])
            elif self.mode == MODE_VISIBLE_GAUSSIAN:
                # Contrastive divergence.
                v_pos = self.input_visible
                h_pos = self.transform
                v_neg = Ke.multivariate_normal_diag(
                    loc=(K.dot(h_pos, K.transpose(self.rbm_weight)) +
                         self.visible_bias),
                    scale_diag=np.ones(shape=(self.hps['batch_size'],
                                              V.shape[1]))).sample()
                h_neg = K.sigmoid(
                    K.dot(v_neg, self.rbm_weight) + self.hidden_bias)
                update = K.transpose(K.transpose(K.dot(K.transpose(v_pos), h_pos)) \
                                     - K.dot(K.transpose(h_neg), v_neg))
                self.rbm_weight_update_func = K.function(
                    [self.input_visible],
                    [K.update_add(self.rbm_weight, self.hps['lr'] * update)])
                self.hidden_bias_update_func = K.function([self.input_visible]
                                                , [K.update_add(self.hidden_bias, self.hps['lr'] \
                                                * (K.sum(h_pos, axis=0) - K.sum(h_neg, axis=0)))])
                self.visible_bias_update_func = K.function([self.input_visible]
                                                , [K.update_add(self.visible_bias, self.hps['lr'] \
                                                * (K.sum(v_pos, axis=0) - K.sum(v_neg, axis=0)))])

                # Create the fist visible nodes sampling object.
                self.sample_first_visible = K.function([self.input_visible],
                                                       [v_neg])
            else:
                pass

            for i in range(num_step):
                if i == (num_step - 1):
                    if self.mode == MODE_VISIBLE_BERNOULLI:
                        # Contrastive divergence.
                        v_pos = self.input_visible
                        h_pos = self.transform
                        v_neg = K.cast(K.less(
                            K.random_uniform(shape=(
                                V.shape[0] -
                                int(i * self.hps['batch_size'], V.shape[1]))),
                            K.sigmoid(
                                K.dot(h_pos, K.transpose(self.rbm_weight)) +
                                self.visible_bias)),
                                       dtype=np.float32)
                        h_neg = K.sigmoid(
                            K.dot(v_neg, self.rbm_weight) + self.hidden_bias)
                        update = K.transpose(K.transpose(K.dot(K.transpose(v_pos), h_pos)) \
                                             - K.dot(K.transpose(h_neg), v_neg))
                        self.rbm_weight_update_func = K.function(
                            [self.input_visible], [
                                K.update_add(self.rbm_weight,
                                             self.hps['lr'] * update)
                            ])
                        self.hidden_bias_update_func = K.function([self.input_visible]
                                                        , [K.update_add(self.hidden_bias, self.hps['lr'] \
                                                        * (K.sum(h_pos, axis=0) - K.sum(h_neg, axis=0)))])
                        self.visible_bias_update_func = K.function([self.input_visible]
                                                        , [K.update_add(self.visible_bias, self.hps['lr'] \
                                                        * (K.sum(v_pos, axis=0) - K.sum(v_neg, axis=0)))])

                        # Create the fist visible nodes sampling object.
                        self.sample_first_visible = K.function(
                            [self.input_visible], [v_neg])
                    elif self.mode == MODE_VISIBLE_GAUSSIAN:
                        # Contrastive divergence.
                        v_pos = self.input_visible
                        h_pos = self.transform
                        v_neg = Ke.multivariate_normal_diag(
                            loc=(K.dot(h_pos, K.transpose(self.rbm_weight)) +
                                 self.visible_bias),
                            scale_diag=np.ones(shape=(
                                V.shape[0] -
                                int(i * self.hps['batch_size'], V.shape[1])
                            ))).sample()
                        h_neg = K.sigmoid(
                            K.dot(v_neg, self.rbm_weight) + self.hidden_bias)
                        update = K.transpose(K.transpose(K.dot(K.transpose(v_pos), h_pos)) \
                                             - K.dot(K.transpose(h_neg), v_neg))
                        self.rbm_weight_update_func = K.function(
                            [self.input_visible], [
                                K.update_add(self.rbm_weight,
                                             self.hps['lr'] * update)
                            ])
                        self.hidden_bias_update_func = K.function([self.input_visible]
                                                        , [K.update_add(self.hidden_bias, self.hps['lr'] \
                                                        * (K.sum(h_pos, axis=0) - K.sum(h_neg, axis=0)))])
                        self.visible_bias_update_func = K.function([self.input_visible]
                                                        , [K.update_add(self.visible_bias, self.hps['lr'] \
                                                        * (K.sum(v_pos, axis=0) - K.sum(v_neg, axis=0)))])

                        # Create the fist visible nodes sampling object.
                        self.sample_first_visible = K.function(
                            [self.input_visible], [v_neg])
                    else:
                        pass

                    V_batch = [V[int(i * self.hps['batch_size']):V.shape[0]]]

                    # Train.
                    self.rbm_weight_update_func(V_batch)
                    self.hidden_bias_update_func(V_batch)
                    self.visible_bias_update_func(V_batch)
                else:
                    V_batch = [
                        V[int(i * self.hps['batch_size']):int(
                            (i + 1) * self.hps['batch_size'])]
                    ]

                    # Train.
                    self.rbm_weight_update_func(V_batch)
                    self.hidden_bias_update_func(V_batch)
                    self.visible_bias_update_func(V_batch)

                # Calculate a training score by each step.
                # Free energy of the input visible nodes.
                fe = self.cal_free_energy(V_batch)

                # Free energy of the first sampled visible nodes.
                V_p_batch = self.sample_first_visible(V_batch)
                fe_p = self.cal_free_energy(V_p_batch)

                score = np.mean(np.abs(fe[0] - fe_p[0]))  # Scale?
                print('\n{0:d}/{1:d}, score: {2:f}'.format(
                    i + 1, num_step, score))