Exemplo n.º 1
0
    def init_param_updates(self, layer, parameter):
        epoch = self.variables.epoch
        prev_first_moment = parameter.prev_first_moment
        prev_weighted_inf_norm = parameter.prev_weighted_inf_norm

        step = self.variables.step
        beta1 = self.beta1
        beta2 = self.beta2

        n_parameters = count_parameters(self)
        self.variables.hessian = theano.shared(
            value=asfloat(np.zeros((n_parameters, n_parameters))),
            name='hessian_inverse')
        parameters = list(iter_parameters(self))
        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters
        )

        gradient = T.grad(self.variables.error_func, wrt=parameter)

        first_moment = beta1 * prev_first_moment + (1 - beta1) * gradient
        weighted_inf_norm = T.maximum(beta2 * prev_weighted_inf_norm,
                                      T.abs_(gradient))

        parameter_delta = (
            (1 / (1 - beta1 ** epoch)) *
            (first_moment / (weighted_inf_norm + self.epsilon))
        )

        return [
            (prev_first_moment, first_moment),
            (prev_weighted_inf_norm, weighted_inf_norm),
            (parameter, parameter - step * parameter_delta),(self.variables.hessian, hessian_matrix)
        ]
Exemplo n.º 2
0
    def init_param_updates(self, layer, parameter):
        epoch = self.variables.epoch
        prev_first_moment = parameter.prev_first_moment
        prev_second_moment = parameter.prev_second_moment
        #        step = asfloat(self.variables.step)
        step = 0.001
        beta1 = asfloat(self.beta1)
        beta2 = asfloat(self.beta2)
        epsilon = asfloat(self.epsilon)

        gradient = T.grad(self.variables.error_func, wrt=parameter)

        n_parameters = count_parameters(self)
        self.variables.hessian = theano.shared(value=asfloat(
            np.zeros((n_parameters, n_parameters))),
                                               name='hessian_inverse')
        parameters = list(iter_parameters(self))
        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters)

        first_moment = (beta1 * prev_first_moment +
                        asfloat(1. - beta1) * gradient)
        second_moment = (beta2 * prev_second_moment +
                         asfloat(1. - beta2) * gradient**2)

        first_moment_bias_corrected = first_moment / (1. - beta1**epoch)
        second_moment_bias_corrected = second_moment / (1. - beta2**epoch)

        parameter_delta = first_moment_bias_corrected * (
            T.sqrt(second_moment_bias_corrected) + epsilon)

        return [(prev_first_moment, first_moment),
                (prev_second_moment, second_moment),
                (parameter, parameter - step * parameter_delta),
                (self.variables.hessian, hessian_matrix)]
Exemplo n.º 3
0
    def init_train_updates(self):
        network_output = self.variables.network_output
        prediction_func = self.variables.train_prediction_func
        last_error = self.variables.last_error
        error_func = self.variables.error_func
        mu = self.variables.mu

        new_mu = ifelse(
            T.lt(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        mse_for_each_sample = T.mean((network_output - prediction_func)**2,
                                     axis=1)

        params = list(iter_parameters(self))
        param_vector = parameters2vector(self)
        #######################################################################################
        n_parameters = count_parameters(self)
        self.variables.hessian = theano.shared(value=asfloat(
            np.zeros((n_parameters, n_parameters))),
                                               name='hessian_inverse')
        parameters = list(iter_parameters(self))
        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters)

        #######################################################################################
        J = compute_jaccobian(mse_for_each_sample, params)
        n_params = J.shape[1]

        updated_params = param_vector - T.nlinalg.matrix_inverse(
            J.T.dot(J) + new_mu * T.eye(n_params)).dot(
                J.T).dot(mse_for_each_sample)

        updates = [(mu, new_mu), [(self.variables.hessian, hessian_matrix)]]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates
Exemplo n.º 4
0
    def init_param_updates(self, layer, parameter):

        n_parameters = count_parameters(self)
        self.variables.hessian = theano.shared(value=asfloat(
            np.zeros((n_parameters, n_parameters))),
                                               name='hessian_inverse')

        parameters = list(iter_parameters(self))
        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters)

        prev_mean_squred_grad = parameter.prev_mean_squred_grad
        step = self.variables.step
        gradient = T.grad(self.variables.error_func, wrt=parameter)

        mean_squred_grad = (self.decay * prev_mean_squred_grad +
                            (1 - self.decay) * gradient**2)
        parameter_delta = gradient / T.sqrt(mean_squred_grad + self.epsilon)

        return [
            (prev_mean_squred_grad, mean_squred_grad),
            (parameter, parameter - step * parameter_delta),
        ]