Пример #1
0
    def init_param_updates(self, layer, parameter):
        epoch = self.variables.epoch
        prev_first_moment = parameter.prev_first_moment
        prev_second_moment = parameter.prev_second_moment
        #        step = asfloat(self.variables.step)
        step = 0.001
        beta1 = asfloat(self.beta1)
        beta2 = asfloat(self.beta2)
        epsilon = asfloat(self.epsilon)

        gradient = T.grad(self.variables.error_func, wrt=parameter)

        n_parameters = count_parameters(self)
        self.variables.hessian = theano.shared(value=asfloat(
            np.zeros((n_parameters, n_parameters))),
                                               name='hessian_inverse')
        parameters = list(iter_parameters(self))
        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters)

        first_moment = (beta1 * prev_first_moment +
                        asfloat(1. - beta1) * gradient)
        second_moment = (beta2 * prev_second_moment +
                         asfloat(1. - beta2) * gradient**2)

        first_moment_bias_corrected = first_moment / (1. - beta1**epoch)
        second_moment_bias_corrected = second_moment / (1. - beta2**epoch)

        parameter_delta = first_moment_bias_corrected * (
            T.sqrt(second_moment_bias_corrected) + epsilon)

        return [(prev_first_moment, first_moment),
                (prev_second_moment, second_moment),
                (parameter, parameter - step * parameter_delta),
                (self.variables.hessian, hessian_matrix)]
Пример #2
0
    def init_train_updates(self):
        step = self.variables.step
        previous_delta = self.variables.prev_delta
        previous_gradient = self.variables.prev_gradient

        n_parameters = count_parameters(self)
        parameters = list(iter_parameters(self))
        param_vector = parameters2vector(self)

        gradients = T.grad(self.variables.error_func, wrt=parameters)
        full_gradient = T.concatenate([grad.flatten() for grad in gradients])

        beta = self.update_function(previous_gradient, full_gradient,
                                    previous_delta)
        parameter_delta = ifelse(
            T.eq(T.mod(self.variables.epoch, n_parameters), 1), -full_gradient,
            -full_gradient + beta * previous_delta)
        updated_parameters = param_vector + step * parameter_delta

        updates = [
            (previous_gradient, full_gradient),
            (previous_delta, parameter_delta),
        ]
        parameter_updates = setup_parameter_updates(parameters,
                                                    updated_parameters)
        updates.extend(parameter_updates)

        return updates
Пример #3
0
    def init_param_updates(self, layer, parameter):
        epoch = self.variables.epoch
        prev_first_moment = parameter.prev_first_moment
        prev_weighted_inf_norm = parameter.prev_weighted_inf_norm

        step = self.variables.step
        beta1 = self.beta1
        beta2 = self.beta2

        n_parameters = count_parameters(self)
        self.variables.hessian = theano.shared(
            value=asfloat(np.zeros((n_parameters, n_parameters))),
            name='hessian_inverse')
        parameters = list(iter_parameters(self))
        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters
        )

        gradient = T.grad(self.variables.error_func, wrt=parameter)

        first_moment = beta1 * prev_first_moment + (1 - beta1) * gradient
        weighted_inf_norm = T.maximum(beta2 * prev_weighted_inf_norm,
                                      T.abs_(gradient))

        parameter_delta = (
            (1 / (1 - beta1 ** epoch)) *
            (first_moment / (weighted_inf_norm + self.epsilon))
        )

        return [
            (prev_first_moment, first_moment),
            (prev_weighted_inf_norm, weighted_inf_norm),
            (parameter, parameter - step * parameter_delta),(self.variables.hessian, hessian_matrix)
        ]
Пример #4
0
    def init_train_updates(self):
        step = self.variables.step
        previous_delta = self.variables.prev_delta
        previous_gradient = self.variables.prev_gradient

        n_parameters = count_parameters(self)
        parameters = list(iter_parameters(self))
        param_vector = parameters2vector(self)

        gradients = T.grad(self.variables.error_func, wrt=parameters)
        full_gradient = T.concatenate([grad.flatten() for grad in gradients])

        beta = self.update_function(previous_gradient, full_gradient,
                                    previous_delta)
        parameter_delta = ifelse(
            T.eq(T.mod(self.variables.epoch, n_parameters), 1),
            -full_gradient,
            -full_gradient + beta * previous_delta
        )
        updated_parameters = param_vector + step * parameter_delta

        updates = [
            (previous_gradient, full_gradient),
            (previous_delta, parameter_delta),
        ]
        parameter_updates = setup_parameter_updates(parameters,
                                                    updated_parameters)
        updates.extend(parameter_updates)

        return updates
Пример #5
0
 def init_variables(self):
     super(LeakStepAdaptation, self).init_variables()
     n_parameters = count_parameters(self)
     self.variables.leak_average = theano.shared(
         value=asfloat(np.zeros(n_parameters)),
         name='leak_average'
     )
Пример #6
0
    def init_variables(self):
        super(ConjugateGradient, self).init_variables()
        n_parameters = count_parameters(self)

        self.variables.update(prev_delta=theano.shared(
            name="prev_delta",
            value=asfloat(np.zeros(n_parameters)),
        ),
                              prev_gradient=theano.shared(
                                  name="prev_gradient",
                                  value=asfloat(np.zeros(n_parameters)),
                              ))
Пример #7
0
    def init_variables(self):
        super(ConjugateGradient, self).init_variables()
        n_parameters = count_parameters(self)

        self.variables.update(
            prev_delta=theano.shared(
                name="prev_delta",
                value=asfloat(np.zeros(n_parameters)),
            ),
            prev_gradient=theano.shared(
                name="prev_gradient",
                value=asfloat(np.zeros(n_parameters)),
            )
        )
Пример #8
0
    def init_train_updates(self):
        n_parameters = count_parameters(self)
        parameters = list(iter_parameters(self))
        param_vector = parameters2vector(self)
        penalty_const = asfloat(self.penalty_const)

        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters)
        hessian_inverse = T.nlinalg.matrix_inverse(hessian_matrix +
                                                   penalty_const *
                                                   T.eye(n_parameters))

        updated_parameters = param_vector - hessian_inverse.dot(full_gradient)
        updates = setup_parameter_updates(parameters, updated_parameters)

        return updates
Пример #9
0
    def init_train_updates(self):
        n_parameters = count_parameters(self)
        parameters = list(iter_parameters(self))
        param_vector = parameters2vector(self)
        penalty_const = asfloat(self.penalty_const)

        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters
        )
        hessian_inverse = T.nlinalg.matrix_inverse(
            hessian_matrix + penalty_const * T.eye(n_parameters)
        )

        updated_parameters = param_vector - hessian_inverse.dot(full_gradient)
        updates = setup_parameter_updates(parameters, updated_parameters)

        return updates
Пример #10
0
    def init_train_updates(self):
        network_output = self.variables.network_output
        prediction_func = self.variables.train_prediction_func
        last_error = self.variables.last_error
        error_func = self.variables.error_func
        mu = self.variables.mu

        new_mu = ifelse(
            T.lt(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        mse_for_each_sample = T.mean((network_output - prediction_func)**2,
                                     axis=1)

        params = list(iter_parameters(self))
        param_vector = parameters2vector(self)
        #######################################################################################
        n_parameters = count_parameters(self)
        self.variables.hessian = theano.shared(value=asfloat(
            np.zeros((n_parameters, n_parameters))),
                                               name='hessian_inverse')
        parameters = list(iter_parameters(self))
        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters)

        #######################################################################################
        J = compute_jaccobian(mse_for_each_sample, params)
        n_params = J.shape[1]

        updated_params = param_vector - T.nlinalg.matrix_inverse(
            J.T.dot(J) + new_mu * T.eye(n_params)).dot(
                J.T).dot(mse_for_each_sample)

        updates = [(mu, new_mu), [(self.variables.hessian, hessian_matrix)]]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates
Пример #11
0
    def init_param_updates(self, layer, parameter):

        n_parameters = count_parameters(self)
        self.variables.hessian = theano.shared(value=asfloat(
            np.zeros((n_parameters, n_parameters))),
                                               name='hessian_inverse')

        parameters = list(iter_parameters(self))
        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters)

        prev_mean_squred_grad = parameter.prev_mean_squred_grad
        step = self.variables.step
        gradient = T.grad(self.variables.error_func, wrt=parameter)

        mean_squred_grad = (self.decay * prev_mean_squred_grad +
                            (1 - self.decay) * gradient**2)
        parameter_delta = gradient / T.sqrt(mean_squred_grad + self.epsilon)

        return [
            (prev_mean_squred_grad, mean_squred_grad),
            (parameter, parameter - step * parameter_delta),
        ]
Пример #12
0
 def init_variables(self):
     super(LeakStepAdaptation, self).init_variables()
     n_parameters = count_parameters(self)
     self.variables.leak_average = theano.shared(value=asfloat(
         np.zeros(n_parameters)),
                                                 name='leak_average')
Пример #13
0
 def init_variables(self):
     super(Hessian, self).init_variables()
     n_parameters = count_parameters(self)
     self.variables.hessian = theano.shared(value=asfloat(
         np.zeros((n_parameters, n_parameters))),
                                            name='hessian_inverse')