Пример #1
0
    def test_line_search_exceptions(self):
        testcases = [
            # Invalid c1 values
            dict(c1=-1, c2=0.5, maxiter=1),
            dict(c1=0, c2=0.5, maxiter=1),
            dict(c1=1, c2=0.5, maxiter=1),

            # Invalid c2 values
            dict(c2=-1, c1=0.5, maxiter=1),
            dict(c2=0, c1=0.5, maxiter=1),
            dict(c2=1, c1=0.5, maxiter=1),

            # c1 > c2
            dict(c1=0.5, c2=0.1, maxiter=1),

            # Invalid `maxiter` values
            dict(c1=0.05, c2=0.1, maxiter=-10),
            dict(c1=0.05, c2=0.1, maxiter=0),
        ]

        for testcase in testcases:
            error_desc = "Line search for {}".format(testcase)
            with self.assertRaises(ValueError, msg=error_desc):
                func = lambda x: x
                wolfe.line_search(f=func, f_deriv=func, **testcase)
Пример #2
0
    def test_line_search_exceptions(self):
        testcases = [
            # Invalid c1 values
            dict(c1=-1, c2=0.5, maxiter=1),
            dict(c1=0, c2=0.5, maxiter=1),
            dict(c1=1, c2=0.5, maxiter=1),

            # Invalid c2 values
            dict(c2=-1, c1=0.5, maxiter=1),
            dict(c2=0, c1=0.5, maxiter=1),
            dict(c2=1, c1=0.5, maxiter=1),

            # c1 > c2
            dict(c1=0.5, c2=0.1, maxiter=1),

            # Invalid `maxiter` values
            dict(c1=0.05, c2=0.1, maxiter=-10),
            dict(c1=0.05, c2=0.1, maxiter=0),
        ]

        def func(x):
            return x

        for testcase in testcases:
            error_desc = "Line search for {}".format(testcase)
            with self.assertRaises(ValueError, msg=error_desc):
                wolfe.line_search(f=func, f_deriv=func, **testcase)
Пример #3
0
    def init_train_updates(self):
        network_inputs = self.variables.network_inputs
        network_output = self.variables.network_output
        inv_hessian = self.variables.inv_hessian
        prev_params = self.variables.prev_params
        prev_full_gradient = self.variables.prev_full_gradient

        params = parameter_values(self.connection)
        param_vector = T.concatenate([param.flatten() for param in params])

        gradients = T.grad(self.variables.error_func, wrt=params)
        full_gradient = T.concatenate([grad.flatten() for grad in gradients])

        new_inv_hessian = ifelse(
            T.eq(self.variables.epoch, 1), inv_hessian,
            self.update_function(inv_hessian, param_vector - prev_params,
                                 full_gradient - prev_full_gradient))
        param_delta = -new_inv_hessian.dot(full_gradient)
        layers_and_parameters = list(iter_parameters(self.layers))

        def prediction(step):
            updated_params = param_vector + step * param_delta

            # This trick allow us to replace shared variables
            # with theano variables and get output from the network
            start_pos = 0
            for layer, attrname, param in layers_and_parameters:
                end_pos = start_pos + param.size
                updated_param_value = T.reshape(
                    updated_params[start_pos:end_pos], param.shape)
                setattr(layer, attrname, updated_param_value)
                start_pos = end_pos

            output = self.connection.output(*network_inputs)

            # Restore previous parameters
            for layer, attrname, param in layers_and_parameters:
                setattr(layer, attrname, param)

            return output

        def phi(step):
            return self.error(network_output, prediction(step))

        def derphi(step):
            error_func = self.error(network_output, prediction(step))
            return T.grad(error_func, wrt=step)

        step = asfloat(line_search(phi, derphi))
        updated_params = param_vector + step * param_delta
        updates = setup_parameter_updates(params, updated_params)

        updates.extend([
            (inv_hessian, new_inv_hessian),
            (prev_params, param_vector),
            (prev_full_gradient, full_gradient),
        ])

        return updates
Пример #4
0
    def init_train_updates(self):
        network_input = self.variables.network_input
        network_output = self.variables.network_output
        inv_hessian = self.variables.inv_hessian
        prev_params = self.variables.prev_params
        prev_full_gradient = self.variables.prev_full_gradient

        params = list(iter_parameters(self))
        param_vector = parameters2vector(self)

        gradients = T.grad(self.variables.error_func, wrt=params)
        full_gradient = T.concatenate([grad.flatten() for grad in gradients])

        new_inv_hessian = ifelse(
            T.eq(self.variables.epoch, 1),
            inv_hessian,
            self.update_function(inv_hessian,
                                 param_vector - prev_params,
                                 full_gradient - prev_full_gradient)
        )
        param_delta = -new_inv_hessian.dot(full_gradient)

        def prediction(step):
            # TODO: I need to update this ugly solution later
            updated_params = param_vector + step * param_delta

            layer_input = network_input
            start_pos = 0
            for layer in self.layers:
                for param in layer.parameters:
                    end_pos = start_pos + param.size
                    parameter_name, parameter_id = param.name.split('_')
                    setattr(layer, parameter_name, T.reshape(
                        updated_params[start_pos:end_pos],
                        param.shape
                    ))
                    start_pos = end_pos
                layer_input = layer.output(layer_input)
            return layer_input

        def phi(step):
            return self.error(network_output, prediction(step))

        def derphi(step):
            error_func = self.error(network_output, prediction(step))
            return T.grad(error_func, wrt=step)

        step = asfloat(line_search(phi, derphi))
        updated_params = param_vector + step * param_delta
        updates = setup_parameter_updates(params, updated_params)

        updates.extend([
            (inv_hessian, new_inv_hessian),
            (prev_params, param_vector),
            (prev_full_gradient, full_gradient),
        ])

        return updates
Пример #5
0
    def test_wolfe_linear_search(self):
        x_current = 3
        grad = 2 * (x_current - 5.5)

        def square(step):
            x_new = x_current - step * grad
            return (x_new - 5.5)**2

        def square_deriv(step):
            # Derivative with respect to the step
            return -grad * 2 * ((x_current - step * grad) - 5.5)

        x_star = wolfe.line_search(square, square_deriv)
        x_star = self.eval(x_star)

        self.assertEqual(square(0), 6.25)
        self.assertAlmostEqual(square(x_star), 0, places=2)
        self.assertAlmostEqual(x_star, 0.5, places=2)
Пример #6
0
    def find_optimal_step(self, parameter_vector, parameter_update):
        network_inputs = self.variables.network_inputs
        network_output = self.variables.network_output
        layers_and_parameters = list(iter_parameters(self.layers))

        def prediction(step):
            step = asfloat(step)
            updated_params = parameter_vector + step * parameter_update

            # This trick allow us to replace shared variables
            # with tensorflow variables and get output from the network
            start_pos = 0
            for layer, attrname, param in layers_and_parameters:
                end_pos = start_pos + get_variable_size(param)
                updated_param_value = tf.reshape(
                    updated_params[start_pos:end_pos], param.shape)
                setattr(layer, attrname, updated_param_value)
                start_pos = end_pos

            output = self.connection.output(*network_inputs)

            # Restore previous parameters
            for layer, attrname, param in layers_and_parameters:
                setattr(layer, attrname, param)

            return output

        def phi(step):
            return self.error(network_output, prediction(step))

        def derphi(step):
            error_func = self.error(network_output, prediction(step))
            gradient, = tf.gradients(error_func, step)
            return gradient

        return line_search(phi, derphi, self.wolfe_maxiter, self.wolfe_c1,
                           self.wolfe_c2)
Пример #7
0
    def init_train_updates(self):
        network_input = self.variables.network_input
        network_output = self.variables.network_output
        inv_hessian = self.variables.inv_hessian
        prev_params = self.variables.prev_params
        prev_full_gradient = self.variables.prev_full_gradient

        params = parameter_values(self.connection)
        param_vector = parameters2vector(self)

        gradients = T.grad(self.variables.error_func, wrt=params)
        full_gradient = T.concatenate([grad.flatten() for grad in gradients])

        new_inv_hessian = ifelse(
            T.eq(self.variables.epoch, 1),
            inv_hessian,
            self.update_function(inv_hessian,
                                 param_vector - prev_params,
                                 full_gradient - prev_full_gradient)
        )
        param_delta = -new_inv_hessian.dot(full_gradient)
        layers_and_parameters = list(iter_parameters(self.layers))

        def prediction(step):
            updated_params = param_vector + step * param_delta

            # This trick allow us to replace shared variables
            # with theano variables and get output from the network
            start_pos = 0
            for layer, attrname, param in layers_and_parameters:
                end_pos = start_pos + param.size
                updated_param_value = T.reshape(
                    updated_params[start_pos:end_pos],
                    param.shape
                )
                setattr(layer, attrname, updated_param_value)
                start_pos = end_pos

            output = self.connection.output(network_input)

            # We need to replace back parameter to shared variable
            for layer, attrname, param in layers_and_parameters:
                setattr(layer, attrname, param)

            return output

        def phi(step):
            return self.error(network_output, prediction(step))

        def derphi(step):
            error_func = self.error(network_output, prediction(step))
            return T.grad(error_func, wrt=step)

        step = asfloat(line_search(phi, derphi))
        updated_params = param_vector + step * param_delta
        updates = setup_parameter_updates(params, updated_params)

        updates.extend([
            (inv_hessian, new_inv_hessian),
            (prev_params, param_vector),
            (prev_full_gradient, full_gradient),
        ])

        return updates