示例#1
0
文件: adadelta.py 项目: itdxer/neupy
    def init_param_updates(self, layer, parameter):
        step = self.variables.step
        epsilon = self.epsilon
        parameter_shape = parameter.get_value().shape

        prev_mean_squred_grad = theano.shared(
            name="{}/prev-mean-squred-grad".format(parameter.name),
            value=asfloat(np.zeros(parameter_shape)),
        )
        prev_mean_squred_dx = theano.shared(
            name="{}/prev-mean-squred-dx".format(parameter.name),
            value=asfloat(np.zeros(parameter_shape)),
        )

        gradient = T.grad(self.variables.error_func, wrt=parameter)

        mean_squred_grad = (
            self.decay * prev_mean_squred_grad +
            (1 - self.decay) * gradient ** 2
        )
        parameter_delta = gradient * (
            T.sqrt(prev_mean_squred_dx + epsilon) /
            T.sqrt(mean_squred_grad + epsilon)
        )
        mean_squred_dx = (
            self.decay * prev_mean_squred_dx +
            (1 - self.decay) * parameter_delta ** 2
        )

        return [
            (prev_mean_squred_grad, mean_squred_grad),
            (prev_mean_squred_dx, mean_squred_dx),
            (parameter, parameter - step * parameter_delta),
        ]
示例#2
0
文件: quickprop.py 项目: itdxer/neupy
    def init_param_updates(self, layer, parameter):
        step = self.variables.step

        parameter_shape = T.shape(parameter).eval()
        prev_delta = theano.shared(
            name="{}/prev-delta".format(parameter.name),
            value=asfloat(np.zeros(parameter_shape)),
        )
        prev_gradient = theano.shared(
            name="{}/prev-grad".format(parameter.name),
            value=asfloat(np.zeros(parameter_shape)),
        )

        gradient = T.grad(self.variables.error_func, wrt=parameter)
        grad_delta = T.abs_(prev_gradient - gradient)

        parameter_delta = ifelse(
            T.eq(self.variables.epoch, 1),
            gradient,
            T.clip(
                T.abs_(prev_delta) * gradient / grad_delta,
                -self.upper_bound,
                self.upper_bound
            )
        )
        return [
            (parameter, parameter - step * parameter_delta),
            (prev_gradient, gradient),
            (prev_delta, parameter_delta),
        ]
示例#3
0
文件: wolfe.py 项目: mayblue9/neupy
def quadratic_minimizer(x_a, y_a, y_prime_a, x_b, y_b, bound_size_ratio=0.1):
    """
    Finds the minimizer for a quadratic polynomial that
    goes through the points (x_a, y_a), (x_b, y_b) with derivative
    at x_a of y_prime_a.

    Parameters
    ----------
    x_a : float or theano variable
        Left point ``a`` in the ``x`` axis.
    y_a : float or theano variable
        Output from function ``y`` at point ``a``.
    y_prime_a : float or theano variable
        Output from function ``y'`` (``y`` derivative) at
        point ``a``.
    x_b : float or theano variable
        Right point ``a`` in the ``x`` axis.
    y_b : float or theano variable
        Output from function ``y`` at point ``b``.
    bound_size_ratio : float
        Value control acceptable bounds for interpolation. If value
        close to one of the points interpolation result will be ignored.
        The bigger ratio, the more likely to reject interpolation.
        Value needs to be between ``0`` and ``1``. Defaults to ``0.1``.

    Returns
    -------
    object
        Theano variable that after evaluation is equal to
        point ``x`` which is minimizer for quadratic function.
    """

    if not 0 <= bound_size_ratio < 1:
        raise ValueError("Value ``bound_size_ratio`` need to be a float "
                         "between 0 and 1, got {}".format(bound_size_ratio))

    # The main formula works for the region [0, a] we need to
    # shift function to the left side and put point ``a``
    # at ``0`` position.
    x_range = x_b - x_a
    coef = (y_b - y_a - y_prime_a * x_range) / (x_range ** 2)
    minimizer = -y_prime_a / (asfloat(2) * coef) + x_a
    bound_size_ratio = asfloat(bound_size_ratio)

    return T.switch(
        sequential_or(
            # Handle bad cases
            T.eq(x_range, zero),
            coef <= zero,

            T.gt(minimizer, x_b - bound_size_ratio * x_range),
            T.lt(minimizer, x_a + bound_size_ratio * x_range),
        ),
        x_a + asfloat(0.5) * x_range,
        # Since we shifted funciton to the left, we need to shift
        # the result to the right to make it correct for
        # the specified region. That's why we are adding ``x_a``
        # at the end.
        -y_prime_a / (asfloat(2) * coef) + x_a
    )
示例#4
0
    def test_upscale_layer(self):
        input_value = np.array([
            [1, 2, 3, 4],
            [5, 6, 7, 8],
        ]).reshape((1, 1, 2, 4))
        expected_output = np.array([
            [1, 1, 2, 2, 3, 3, 4, 4],
            [1, 1, 2, 2, 3, 3, 4, 4],
            [1, 1, 2, 2, 3, 3, 4, 4],
            [5, 5, 6, 6, 7, 7, 8, 8],
            [5, 5, 6, 6, 7, 7, 8, 8],
            [5, 5, 6, 6, 7, 7, 8, 8],
        ]).reshape((1, 1, 6, 8))

        upscale_layer = layers.Upscale((3, 2))
        connection = layers.Input((1, 2, 4)) > upscale_layer

        x = T.tensor4('x')
        actual_output = upscale_layer.output(x)
        actual_output = actual_output.eval({x: asfloat(input_value)})

        np.testing.assert_array_almost_equal(
            asfloat(expected_output),
            actual_output
        )
示例#5
0
文件: adamax.py 项目: itdxer/neupy
    def init_param_updates(self, layer, parameter):
        epoch = self.variables.epoch
        step = self.variables.step
        beta1 = self.beta1
        beta2 = self.beta2

        parameter_shape = T.shape(parameter).eval()
        prev_first_moment = theano.shared(
            name="{}/prev-first-moment".format(parameter.name),
            value=asfloat(np.zeros(parameter_shape)),
        )
        prev_weighted_inf_norm = theano.shared(
            name="{}/prev-weighted-inf-norm".format(parameter.name),
            value=asfloat(np.zeros(parameter_shape)),
        )

        gradient = T.grad(self.variables.error_func, wrt=parameter)

        first_moment = beta1 * prev_first_moment + (1 - beta1) * gradient
        weighted_inf_norm = T.maximum(beta2 * prev_weighted_inf_norm,
                                      T.abs_(gradient))

        parameter_delta = (
            (1 / (1 - beta1 ** epoch)) *
            (first_moment / (weighted_inf_norm + self.epsilon))
        )

        return [
            (prev_first_moment, first_moment),
            (prev_weighted_inf_norm, weighted_inf_norm),
            (parameter, parameter - step * parameter_delta),
        ]
    def test_mixture_of_experts(self):
        dataset = datasets.load_diabetes()
        data, target = asfloat(dataset.data), asfloat(dataset.target)
        insize, outsize = data.shape[1], 1

        input_scaler = preprocessing.MinMaxScaler((-1 ,1))
        output_scaler = preprocessing.MinMaxScaler()
        x_train, x_test, y_train, y_test = cross_validation.train_test_split(
            input_scaler.fit_transform(data),
            output_scaler.fit_transform(target.reshape(-1, 1)),
            train_size=0.8
        )

        n_epochs = 10
        scaled_y_test = output_scaler.inverse_transform(y_test)
        scaled_y_test = scaled_y_test.reshape((y_test.size, 1))

        # -------------- Train single GradientDescent -------------- #

        bpnet = algorithms.GradientDescent(
            (insize, 20, outsize),
            step=0.1,
            verbose=False
        )
        bpnet.train(x_train, y_train, epochs=n_epochs)
        network_output = bpnet.predict(x_test)
        network_error = rmsle(output_scaler.inverse_transform(network_output),
                              scaled_y_test)

        # -------------- Train ensemlbe -------------- #

        moe = algorithms.MixtureOfExperts(
            networks=[
                algorithms.Momentum(
                    (insize, 20, outsize),
                    step=0.1,
                    batch_size=1,
                    verbose=False
                ),
                algorithms.Momentum(
                    (insize, 20, outsize),
                    step=0.1,
                    batch_size=1,
                    verbose=False
                ),
            ],
            gating_network=algorithms.Momentum(
                layers.Softmax(insize) > layers.Output(2),
                step=0.1,
                verbose=False
            )
        )
        moe.train(x_train, y_train, epochs=n_epochs)
        ensemble_output = moe.predict(x_test)
        ensemlbe_error = rmsle(
            output_scaler.inverse_transform(ensemble_output),
            scaled_y_test
        )

        self.assertGreater(network_error, ensemlbe_error)
示例#7
0
文件: conjgrad.py 项目: itdxer/neupy
    def init_variables(self):
        super(ConjugateGradient, self).init_variables()
        n_parameters = count_parameters(self.connection)

        self.variables.update(
            prev_delta=theano.shared(name="conj-grad/prev-delta", value=asfloat(np.zeros(n_parameters))),
            prev_gradient=theano.shared(name="conj-grad/prev-gradient", value=asfloat(np.zeros(n_parameters))),
        )
示例#8
0
    def test_batch_norm_as_shared_variable(self):
        gamma = theano.shared(value=asfloat(np.ones(2)))
        beta = theano.shared(value=asfloat(2 * np.ones(2)))

        batch_norm = layers.BatchNorm(gamma=gamma, beta=beta)
        layers.Input(10) > batch_norm

        self.assertIs(gamma, batch_norm.gamma)
        self.assertIs(beta, batch_norm.beta)
示例#9
0
    def test_concatenate_basic(self):
        concat_layer = layers.Concatenate(axis=1)

        x1 = T.tensor4()
        x2 = T.tensor4()
        y = theano.function([x1, x2], concat_layer.output(x1, x2))

        x1_tensor4 = asfloat(np.random.random((1, 2, 3, 4)))
        x2_tensor4 = asfloat(np.random.random((1, 8, 3, 4)))
        output = y(x1_tensor4, x2_tensor4)

        self.assertEqual((1, 10, 3, 4), output.shape)
示例#10
0
    def test_elementwise_basic(self):
        elem_layer = layers.Elementwise(merge_function=T.add)

        x1 = T.matrix()
        x2 = T.matrix()
        y = theano.function([x1, x2], elem_layer.output(x1, x2))

        x1_matrix = asfloat(np.random.random((10, 2)))
        x2_matrix = asfloat(np.random.random((10, 2)))

        expected_output = x1_matrix + x2_matrix
        actual_output = y(x1_matrix, x2_matrix)
        np.testing.assert_array_almost_equal(expected_output, actual_output)
示例#11
0
 def init_layers(self):
     super(Adamax, self).init_layers()
     for layer in self.layers:
         for parameter in layer.parameters:
             parameter_shape = T.shape(parameter).eval()
             parameter.prev_first_moment = theano.shared(
                 name="prev_first_moment_" + parameter.name,
                 value=asfloat(np.zeros(parameter_shape)),
             )
             parameter.prev_weighted_inf_norm = theano.shared(
                 name="prev_weighted_inf_norm_" + parameter.name,
                 value=asfloat(np.zeros(parameter_shape)),
             )
示例#12
0
 def init_layers(self):
     super(Quickprop, self).init_layers()
     for layer in self.layers:
         for parameter in layer.parameters:
             parameter_shape = T.shape(parameter).eval()
             parameter.prev_delta = theano.shared(
                 name="prev_delta_" + parameter.name,
                 value=asfloat(np.zeros(parameter_shape)),
             )
             parameter.prev_gradient = theano.shared(
                 name="prev_grad_" + parameter.name,
                 value=asfloat(np.zeros(parameter_shape)),
             )
示例#13
0
 def init_layers(self):
     super(Adadelta, self).init_layers()
     for layer in self.layers:
         for parameter in layer.parameters:
             parameter_shape = T.shape(parameter).eval()
             parameter.prev_mean_squred_grad = theano.shared(
                 name="prev_mean_squred_grad_" + parameter.name,
                 value=asfloat(np.zeros(parameter_shape)),
             )
             parameter.prev_mean_squred_dx = theano.shared(
                 name="prev_mean_squred_dx_" + parameter.name,
                 value=asfloat(np.zeros(parameter_shape)),
             )
示例#14
0
    def test_jacobian_for_levenberg_marquardt(self):
        w1 = theano.shared(name='w1', value=asfloat(np.array([[1]])))
        b1 = theano.shared(name='b1', value=asfloat(np.array([0])))
        w2 = theano.shared(name='w2', value=asfloat(np.array([[2]])))
        b2 = theano.shared(name='b2', value=asfloat(np.array([1])))

        x = T.matrix('x')
        y = T.matrix('y')
        output = ((x.dot(w1.T) + b1) ** 2).dot(w2.T) + b2
        error_func = T.mean((y - output), axis=1)

        x_train = asfloat(np.array([[1, 2, 3]]).T)
        y_train = asfloat(np.array([[1, 2, 3]]).T)
        output_expected = asfloat(np.array([[3, 9, 19]]).T)

        np.testing.assert_array_almost_equal(
            output.eval({x: x_train}),
            output_expected
        )

        jacobian_expected = asfloat(np.array([
            [-4, -4, -1, -1],
            [-16, -8, -4, -1],
            [-36, -12, -9, -1],
        ]))
        jacobian_actual = compute_jacobian(error_func, [w1, b1, w2, b2])
        np.testing.assert_array_almost_equal(
            jacobian_expected,
            jacobian_actual.eval({x: x_train, y: y_train})
        )
示例#15
0
    def test_categorical_hinge_without_one_hot_encoding(self):
        targets = asfloat(np.array([2, 0]))
        predictions = asfloat(np.array([
            [0.1, 0.2, 0.7],
            [0.0, 0.9, 0.1],
        ]))
        expected = asfloat(np.array([0.5, 1.9]).mean())

        prediction_var = T.matrix()
        target_var = T.vector()

        error_output = errors.categorical_hinge(target_var, prediction_var)
        actual = error_output.eval({prediction_var: predictions,
                                    target_var: targets})
        self.assertAlmostEqual(expected, actual)
示例#16
0
def golden_search(f, maxstep=50, maxiter=1024, tol=1e-5):
    """ Identify best step for function in specific direction.

    Parameters
    ----------
    f : func
    maxstep : float
        Defaults to ``50``.
    maxiter : int
        Defaults to ``1024``.
    tol : float
        Defaults to ``1e-5``.

    Returns
    -------
    float
        Identified optimal step.
    """

    golden_ratio = asfloat((math.sqrt(5) - 1) / 2)

    def interval_reduction(a, b, c, d, tol):
        fc = f(c)
        fd = f(d)

        a, b, c, d = ifelse(
            T.lt(fc, fd),
            [a, d, d - golden_ratio * (d - a), c],
            [c, b, d, c + golden_ratio * (b - c)]
        )

        stoprule = theano.scan_module.until(
            T.lt(T.abs_(c - d), tol)
        )
        return [a, b, c, d], stoprule

    a = T.constant(asfloat(0))
    b = maxstep
    c = b - golden_ratio * (b - a)
    d = a + golden_ratio * (b - a)

    (a, b, c, d), _ = theano.scan(
        interval_reduction,
        outputs_info=[a, b, c, d],
        non_sequences=[asfloat(tol)],
        n_steps=maxiter
    )
    return (a[-1] + b[-1]) / 2
示例#17
0
    def initialize(self):
        super(BatchNorm, self).initialize()

        input_shape = as_tuple(None, self.input_shape)
        ndim = len(input_shape)

        if self.axes is None:
            # If ndim == 4 then axes = (0, 2, 3)
            # If ndim == 2 then axes = (0,)
            self.axes = tuple(axis for axis in range(ndim) if axis != 1)

        if any(axis >= ndim for axis in self.axes):
            raise ValueError("Cannot apply batch normalization on the axis "
                             "that doesn't exist.")

        opposite_axes = find_opposite_axes(self.axes, ndim)
        parameter_shape = [input_shape[axis] for axis in opposite_axes]

        if any(parameter is None for parameter in parameter_shape):
            unknown_dim_index = parameter_shape.index(None)
            raise ValueError("Cannot apply batch normalization on the axis "
                             "with unknown size over the dimension #{} "
                             "(0-based indeces).".format(unknown_dim_index))

        self.running_mean = theano.shared(
            name='running_mean_{}'.format(self.layer_id),
            value=asfloat(np.zeros(parameter_shape))
        )
        self.running_inv_std = theano.shared(
            name='running_inv_std_{}'.format(self.layer_id),
            value=asfloat(np.ones(parameter_shape))
        )

        if isinstance(self.gamma, number_type):
            self.gamma = np.ones(parameter_shape) * self.gamma

        if isinstance(self.beta, number_type):
            self.beta = np.ones(parameter_shape) * self.beta

        self.gamma = theano.shared(
            name='gamma_{}'.format(self.layer_id),
            value=asfloat(self.gamma),
        )
        self.beta = theano.shared(
            name='beta_{}'.format(self.layer_id),
            value=asfloat(self.beta),
        )
        self.parameters = [self.gamma, self.beta]
示例#18
0
 def test_rmsle(self):
     actual = np.e ** (np.array([1, 2, 3, 4])) - 1
     predicted = np.e ** (np.array([4, 3, 2, 1])) - 1
     self.assertEqual(
         asfloat(np.sqrt(5)),
         estimators.rmsle(actual, predicted)
     )
示例#19
0
 def test_rmse(self):
     actual = np.array([0, 1, 2, 3])
     predicted = np.array([3, 2, 1, 0])
     self.assertEqual(
         asfloat(np.sqrt(5)),
         estimators.rmse(actual, predicted)
     )
示例#20
0
    def test_save_link_to_assigned_connections(self):
        # Tree structure:
        #
        #                       Sigmoid(10)
        #                      /
        # Input(10) - Sigmoid(5)
        #                      \
        #                       Softmax(10)
        #
        input_layer = layers.Input(10)
        minimized = input_layer > layers.Sigmoid(5)
        reconstructed = minimized > layers.Sigmoid(10)
        classifier = minimized > layers.Softmax(20)

        x = T.matrix()
        y_minimized = theano.function([x], minimized.output(x))
        y_reconstructed = theano.function([x], reconstructed.output(x))
        y_classifier = theano.function([x], classifier.output(x))

        x_matrix = asfloat(np.random.random((3, 10)))
        minimized_output = y_minimized(x_matrix)
        self.assertEqual((3, 5), minimized_output.shape)

        reconstructed_output = y_reconstructed(x_matrix)
        self.assertEqual((3, 10), reconstructed_output.shape)

        classifier_output = y_classifier(x_matrix)
        self.assertEqual((3, 20), classifier_output.shape)
示例#21
0
    def test_dict_based_inputs_into_connection(self):
        # Tree structure:
        #
        # Input(10) - Sigmoid(5) - Sigmoid(10)
        #
        input_layer = layers.Input(10)
        hidden_layer = layers.Sigmoid(5)
        output_layer = layers.Sigmoid(10)

        minimized = input_layer > hidden_layer
        reconstructed = minimized > output_layer

        x = T.matrix()
        y_minimized = theano.function([x], minimized.output(x))

        x_matrix = asfloat(np.random.random((3, 10)))
        minimized_output = y_minimized(x_matrix)
        self.assertEqual((3, 5), minimized_output.shape)

        h_output = T.matrix()
        y_reconstructed = theano.function(
            [h_output],
            reconstructed.output({output_layer: h_output})
        )
        reconstructed_output = y_reconstructed(minimized_output)
        self.assertEqual((3, 10), reconstructed_output.shape)
示例#22
0
文件: rprop.py 项目: itdxer/neupy
 def init_prev_delta(self, parameter):
     parameter_shape = T.shape(parameter).eval()
     self.prev_delta = theano.shared(
         name="{}/prev-delta".format(parameter.name),
         value=asfloat(np.zeros(parameter_shape)),
     )
     return self.prev_delta
示例#23
0
    def test_parallel_layer(self):
        input_layer = layers.Input((3, 8, 8))
        parallel_layer = layers.join(
            [[
                layers.Convolution((11, 5, 5)),
            ], [
                layers.Convolution((10, 3, 3)),
                layers.Convolution((5, 3, 3)),
            ]],
            layers.Concatenate(),
        )
        output_layer = layers.MaxPooling((2, 2))

        conn = layers.join(input_layer, parallel_layer)
        output_connection = layers.join(conn, output_layer)

        x = T.tensor4()
        y = theano.function([x], conn.output(x))

        x_tensor4 = asfloat(np.random.random((10, 3, 8, 8)))
        output = y(x_tensor4)
        self.assertEqual(output.shape, (10, 11 + 5, 4, 4))

        output_function = theano.function([x], output_connection.output(x))
        final_output = output_function(x_tensor4)
        self.assertEqual(final_output.shape, (10, 11 + 5, 2, 2))
示例#24
0
 def init_variables(self):
     super(LeakStepAdaptation, self).init_variables()
     n_parameters = count_parameters(self)
     self.variables.leak_average = theano.shared(
         value=asfloat(np.zeros(n_parameters)),
         name='leak_average'
     )
示例#25
0
文件: base.py 项目: itdxer/neupy
def create_shared_parameter(value, name, shape):
    """
    Creates NN parameter as Theano shared variable.

    Parameters
    ----------
    value : array-like, Theano variable, scalar or Initializer
        Default value for the parameter.

    name : str
        Shared variable name.

    shape : tuple
        Parameter's shape.

    Returns
    -------
    Theano shared variable.
    """
    if isinstance(value, (T.sharedvar.SharedVariable, T.Variable)):
        return value

    if isinstance(value, init.Initializer):
        value = value.sample(shape)

    return theano.shared(value=asfloat(value), name=name, borrow=True)
示例#26
0
文件: leak_step.py 项目: itdxer/neupy
 def init_variables(self):
     super(LeakStepAdaptation, self).init_variables()
     n_parameters = count_parameters(self.connection)
     self.variables.leak_average = theano.shared(
         name='leak-step-adapt/leak-average',
         value=asfloat(np.zeros(n_parameters)),
     )
示例#27
0
    def test_connection_output(self):
        input_value = asfloat(np.random.random((10, 2)))

        connection = layers.Input(2) > layers.Relu(10) > layers.Relu(1)
        output_value = connection.output(input_value).eval()

        self.assertEqual(output_value.shape, (10, 1))
示例#28
0
    def output(self, input_value):
        if not self.input_shape:
            raise LayerConnectionError("Layer `{}` doesn't have defined "
                                       "input shape. Probably it doesn't "
                                       "have an input layer.".format(self))

        half = self.n // 2
        squared_value = input_value ** 2

        n_samples = input_value.shape[0]
        channel = input_value.shape[1]
        height = input_value.shape[2]
        width = input_value.shape[3]

        zero = asfloat(0)
        extra_channels = T.alloc(zero, n_samples, channel + 2 * half,
                                 height, width)
        squared_value = T.set_subtensor(
            extra_channels[:, half:half + channel, :, :],
            squared_value
        )
        scale = self.k

        for i in range(self.n):
            scale += self.alpha * squared_value[:, i:i + channel, :, :]

        scale = scale ** self.beta
        return input_value / scale
示例#29
0
    def test_elementwise_in_connections(self):
        input_layer = layers.Input(2)
        hidden_layer_1 = layers.Relu(1, weight=init.Constant(1),
                                     bias=init.Constant(0))
        hidden_layer_2 = layers.Relu(1, weight=init.Constant(2),
                                     bias=init.Constant(0))
        elem_layer = layers.Elementwise(merge_function=T.add)

        connection = layers.join(input_layer, hidden_layer_1, elem_layer)
        connection = layers.join(input_layer, hidden_layer_2, elem_layer)
        connection.initialize()

        self.assertEqual(elem_layer.output_shape, (1,))

        x = T.matrix()
        y = theano.function([x], connection.output(x))

        test_input = asfloat(np.array([
            [0, 1],
            [-1, -1],
        ]))
        actual_output = y(test_input)
        expected_output = np.array([
            [3],
            [0],
        ])
        np.testing.assert_array_almost_equal(expected_output, actual_output)
示例#30
0
文件: base.py 项目: EdwardBetts/neupy
def create_shared_parameter(value, name, shape, init_method, bounds):
    """ Creates NN parameter as Theano shared variable.

    Parameters
    ----------
    value : array-like, theano shared variable or None
        Default value for the parameter. If value eqaul to ``None``
        parameter will be created bsaed on the ``init_method`` value.
    name : str
        Sahred variable name.
    shape : tuple
        Parameter shape.
    init_method : str
        Weight initialization procedure name.
    bounds : tuple
        Specific parameter for the one of the ``init_method``
        argument.

    Returns
    -------
    Theano shared variable.
    """
    if isinstance(value, T.sharedvar.TensorSharedVariable):
        return value

    if value is None:
        value = generate_weight(shape, bounds, init_method)

    return theano.shared(value=asfloat(value), name=name, borrow=True)
示例#31
0
    def test_select_network_branch(self):
        network = layers.join(layers.Input(10, name='input-1'), [[
            layers.Relu(1, name='relu-1'),
        ], [
            layers.Relu(2, name='relu-2'),
        ]])

        self.assertEqual(network.input_shape, (10, ))
        self.assertEqual(network.output_shape, [(1, ), (2, )])
        self.assertEqual(len(network), 3)

        relu_1_network = network.end('relu-1')
        self.assertEqual(relu_1_network.input_shape, (10, ))
        self.assertEqual(relu_1_network.output_shape, (1, ))
        self.assertEqual(len(relu_1_network), 2)

        x_test = asfloat(np.ones((7, 10)))
        y_predicted = self.eval(relu_1_network.output(x_test))
        self.assertEqual(y_predicted.shape, (7, 1))

        relu_2_network = network.end('relu-2')
        self.assertEqual(relu_2_network.input_shape, (10, ))
        self.assertEqual(relu_2_network.output_shape, (2, ))
        self.assertEqual(len(relu_2_network), 2)
示例#32
0
 def activation_function(self, input_value):
     alpha = asfloat(self.alpha)
     return T.nnet.elu(input_value, alpha)
示例#33
0
def save_dict(network):
    """
    Save network into the dictionary.

    Parameters
    ----------
    network : network, list of layer or network

    Returns
    -------
    dict
        Saved parameters and information about network in dictionary
        using specific format. Learn more about the NeuPy's storage
        format in the official documentation.

    Examples
    --------
    >>> from neupy import layers, storage
    >>>
    >>> network = layers.Input(10) >> layers.Softmax(3)
    >>> layers_data = storage.save_dict(network)
    >>>
    >>> layers_data.keys()
    ['layers', 'graph', 'metadata']
    """
    network = extract_network(network)
    network.create_variables()

    session = tf_utils.tensorflow_session()
    tf_utils.initialize_uninitialized_variables()

    data = {
        'metadata': {
            'language': 'python',
            'library': 'neupy',
            'version': neupy.__version__,
            'created': strftime("%a, %d %b %Y %H:%M:%S %Z", gmtime()),
        },
        # Make it as a list in order to save the right order
        # of paramters, otherwise it can be convert to the dictionary.
        'graph': network.layer_names_only(),
        'layers': [],
    }

    for layer in network:
        parameters = {}
        configs = {}

        for attrname, parameter in layer.variables.items():
            parameters[attrname] = {
                'value': asfloat(session.run(parameter)),
                'trainable': parameter.trainable,
            }

        for option_name in layer.options:
            if option_name not in parameters:
                configs[option_name] = getattr(layer, option_name)

        data['layers'].append({
            'class_name': layer.__class__.__name__,
            'name': layer.name,
            'parameters': parameters,
            'configs': configs,
        })

    return data
示例#34
0
 def test_rmsle(self):
     actual = np.e**(np.array([1, 2, 3, 4])) - 1
     predicted = np.e**(np.array([4, 3, 2, 1])) - 1
     self.assertEqual(asfloat(np.sqrt(5)),
                      estimators.rmsle(actual, predicted))
示例#35
0
    def test_binary_crossentropy(self):
        predicted = asfloat(np.array([0.1, 0.9, 0.2, 0.5]))
        actual = asfloat(np.array([0, 1, 0, 1]))

        error = errors.binary_crossentropy(actual, predicted)
        self.assertAlmostEqual(0.28, self.eval(error), places=2)
示例#36
0
 def init_variables(self):
     super(LevenbergMarquardt, self).init_variables()
     self.variables.update(
         mu=theano.shared(name='mu', value=asfloat(self.mu)),
         last_error=theano.shared(name='last_error', value=np.nan),
     )
示例#37
0
def step_decay(initial_value, reduction_freq, start_iter=0, name='step'):
    """
    Algorithm minimizes learning step monotonically after
    each iteration.

    .. math::
        \\alpha_{t + 1} = \\frac{\\alpha_{0}}{1 + \\frac{t}{m}}

    where :math:`\\alpha` is a step, :math:`t` is an iteration number
    and :math:`m` is a ``reduction_freq`` parameter.

    .. code-block:: python

        step = initial_value / (1 + current_iteration / reduction_freq)

    Notes
    -----
    Step will be reduced faster when you have smaller training batches.

    Parameters
    ----------
    initial_value : float
        Initial value for the learning rate. It's the learning rate
        returned during the first iteration.

    reduction_freq : int
        Parameter controls step reduction frequency. The larger the
        value the slower step parameter decreases.

        For instance, if ``reduction_freq=100``
        and ``step=0.12`` then after ``100`` iterations ``step`` is
        going to be equal to ``0.06`` (which is ``0.12 / 2``),
        after ``200`` iterations ``step`` is going to be equal to
        ``0.04`` (which is ``0.12 / 3``) and so on.

    start_iter : int
        Start iteration. At has to be equal to ``0`` when network just
        started the training. Defaults to ``0``.

    name : str
        Learning rate's variable name. Defaults to ``step``.

    Examples
    --------
    >>> from neupy import algorithms
    >>> from neupy.layers import *
    >>>
    >>> optimizer = algorithms.Momentum(
    ...     Input(5) >> Relu(10) >> Sigmoid(1),
    ...     step=algorithms.step_decay(
    ...         initial_value=0.1,
    ...         reduction_freq=100,
    ...     )
    ... )
    """
    step, iteration = init_variables(initial_value, start_iter, name)
    reduction_freq = asfloat(reduction_freq)

    step_update = initial_value / (1 + iteration / reduction_freq)
    updated_step = step.assign(step_update)
    tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, updated_step)

    with tf.control_dependencies([updated_step]):
        next_iteration = iteration.assign(iteration + 1)
        tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, next_iteration)

    return step
示例#38
0
    def test_functions(self):
        Case = namedtuple("Case", "func X answer")

        testcases = [
            Case(func=cg.fletcher_reeves,
                 X=(
                     asfloat(np.array([1.35, 0.3])),
                     asfloat(np.array([0.11, -0.5])),
                     asfloat(np.array([0, 0])),
                 ),
                 answer=0.137),
            Case(func=cg.polak_ribiere,
                 X=(
                     asfloat(np.array([1., -0.5])),
                     asfloat(np.array([1.2, -0.45])),
                     asfloat(np.array([0, 0])),
                 ),
                 answer=0.174),
            Case(func=cg.hentenes_stiefel,
                 X=(
                     asfloat(np.array([1., -0.5])),
                     asfloat(np.array([1.2, -0.45])),
                     asfloat(np.array([0.2, 0.05])),
                 ),
                 answer=5.118),
            Case(func=cg.liu_storey,
                 X=(
                     asfloat(np.array([1., -0.5])),
                     asfloat(np.array([1.2, -0.45])),
                     asfloat(np.array([0.2, 0.05])),
                 ),
                 answer=-1.243),
            Case(func=cg.dai_yuan,
                 X=(
                     asfloat(np.array([1., -0.5])),
                     asfloat(np.array([1.2, -0.45])),
                     asfloat(np.array([0.2, 0.05])),
                 ),
                 answer=38.647),
        ]

        for testcase in testcases:
            result = self.eval(testcase.func(*testcase.X))
            self.assertAlmostEqual(result, testcase.answer, places=1)
示例#39
0
 def free_energy(visible_sample):
     wx_b = T.dot(visible_sample, self.weight) + self.hidden_bias
     visible_bias_term = T.dot(visible_sample, self.visible_bias)
     hidden_term = T.log(asfloat(1) + T.exp(wx_b)).sum(axis=1)
     return -visible_bias_term - hidden_term
示例#40
0
def target_function(network, x, y):
    weight = network.layers[1].weight
    new_weight = np.array([[x], [y]])
    weight.set_value(asfloat(new_weight))
    return network.prediction_error(input_data, target_data)
示例#41
0
    plt.figure(figsize=(10, 10))
    plt.suptitle('RBM componenets', size=16)

    for index, image in enumerate(weight.T, start=1):
        plt.subplot(10, 10, index)
        plt.imshow(image.reshape((28, 28)), cmap=plt.cm.gray)

        plt.xticks([])
        plt.yticks([])

    plt.show()


utils.reproducible()

X, _ = datasets.fetch_openml('mnist_784', version=1, return_X_y=True)
X = asfloat(X > 130)

rbm = algorithms.RBM(
    n_visible=784,
    n_hidden=100,
    step=0.01,
    batch_size=20,

    verbose=True,
    shuffle_data=True,
)
rbm.train(X, X, epochs=10)
plot_rbm_components(rbm)
示例#42
0
 def scorer(network, X, y):
     y = asfloat(y)
     result = asfloat(network.predict(X))
     return self.eval(errors.rmsle(result[:, 0], y))
示例#43
0
    def test_functions(self):
        Case = namedtuple("Case", "func input_data answer")

        testcases = [
            Case(func=cg.fletcher_reeves,
                 input_data=(
                     np.array([1.35, 0.3]),
                     np.array([0.11, -0.5]),
                     np.array([0, 0]),
                 ),
                 answer=0.137),
            Case(func=cg.polak_ribiere,
                 input_data=(
                     np.array([1., -0.5]),
                     np.array([1.2, -0.45]),
                     np.array([0, 0]),
                 ),
                 answer=0.174),
            Case(func=cg.hentenes_stiefel,
                 input_data=(
                     np.array([1., -0.5]),
                     np.array([1.2, -0.45]),
                     np.array([0.2, 0.05]),
                 ),
                 answer=5.118),
            Case(func=cg.conjugate_descent,
                 input_data=(
                     np.array([1., -0.5]),
                     np.array([1.2, -0.45]),
                     np.array([0.2, 0.05]),
                 ),
                 answer=-7.323),
            Case(func=cg.liu_storey,
                 input_data=(
                     np.array([1., -0.5]),
                     np.array([1.2, -0.45]),
                     np.array([0.2, 0.05]),
                 ),
                 answer=1.243),
            Case(func=cg.dai_yuan,
                 input_data=(
                     np.array([1., -0.5]),
                     np.array([1.2, -0.45]),
                     np.array([0.2, 0.05]),
                 ),
                 answer=38.647),
        ]

        for testcase in testcases:
            input_data = asfloat(np.array(testcase.input_data))
            variables = T.vectors(3)
            # For functions some input variables can be optional and we
            # ignore them during the computation. This solution cause errors
            # related to the Theano computational graph, because we
            # do not use all defined variables. That's why we need
            # simple hack that fix this issue and do not add changes to
            # the output result.
            hack = asfloat(0) * variables[-1][0]
            output_func = theano.function(variables,
                                          testcase.func(*variables) + hack)
            result = output_func(*input_data)
            self.assertAlmostEqual(result, testcase.answer, places=1)
示例#44
0
文件: wolfe.py 项目: degerli/neupy
def quadratic_minimizer(x_a, y_a, y_prime_a, x_b, y_b, bound_size_ratio=0.1):
    """
    Finds the minimizer for a quadratic polynomial that
    goes through the points (x_a, y_a), (x_b, y_b) with derivative
    at x_a of y_prime_a.

    Parameters
    ----------
    x_a : float or tensorflow variable
        Left point ``a`` in the ``x`` axis.
    y_a : float or tensorflow variable
        Output from function ``y`` at point ``a``.
    y_prime_a : float or tensorflow variable
        Output from function ``y'`` (``y`` derivative) at
        point ``a``.
    x_b : float or tensorflow variable
        Right point ``a`` in the ``x`` axis.
    y_b : float or tensorflow variable
        Output from function ``y`` at point ``b``.
    bound_size_ratio : float
        Value control acceptable bounds for interpolation. If value
        close to one of the points interpolation result will be ignored.
        The bigger ratio, the more likely to reject interpolation.
        Value needs to be between ``0`` and ``1``. Defaults to ``0.1``.

    Returns
    -------
    object
        Tensorfow variable that after evaluation is equal to
        point ``x`` which is minimizer for quadratic function.
    """

    if not 0 <= bound_size_ratio < 1:
        raise ValueError("Value ``bound_size_ratio`` need to be a float "
                         "between 0 and 1, got {}".format(bound_size_ratio))

    # The main formula works for the region [0, a] we need to
    # shift function to the left side and put point ``a``
    # at ``0`` position.
    x_range = x_b - x_a
    coef = (y_b - y_a - y_prime_a * x_range) / (x_range ** asfloat(2))
    minimizer = -y_prime_a / (asfloat(2) * coef) + x_a
    bound_size_ratio = asfloat(bound_size_ratio)

    return tf.where(
        sequential_or(
            # Handle bad cases
            tf.equal(x_range, 0),
            coef <= 0,

            tf.is_nan(minimizer),
            tf.greater(minimizer, x_b - bound_size_ratio * x_range),
            tf.less(minimizer, x_a + bound_size_ratio * x_range),
        ),
        x_a + asfloat(0.5) * x_range,
        # Since we shifted funciton to the left, we need to shift
        # the result to the right to make it correct for
        # the specified region. That's why we are adding ``x_a``
        # at the end.
        -y_prime_a / (asfloat(2) * coef) + x_a
    )
示例#45
0
文件: wolfe.py 项目: degerli/neupy
def cubic_minimizer(x_a, y_a, y_prime_a, x_b, y_b, x_c, y_c,
                    bound_size_ratio=0.2):
    """
    Finds the minimizer for a cubic polynomial that goes
    through the points (x_a, y_a), (x_b, y_b), and (x_c, y_c)
    with derivative at ``x_a`` of y_prime_a.

    Parameters
    ----------
    x_a : float or tensorflow variable
        First point ``a`` in the ``x`` axis.
    y_a : float or tensorflow variable
        Output from function ``y`` at point ``a``.
    y_prime_a : float or tensorflow variable
        Output from function ``y'`` (``y`` derivative) at
        point ``a``.
    x_b : float or tensorflow variable
        Second point ``b`` in the ``x`` axis.
    y_b : float or tensorflow variable
        Output from function ``y`` at point ``b``.
    x_c : float or tensorflow variable
        Third point ``c`` in the ``x`` axis.
    y_c : float or tensorflow variable
        Output from function ``y`` at point ``c``.
    bound_size_ratio : float
        Value control acceptable bounds for interpolation. If
        value is close to one of the points than interpolation
        result will be ignored. The bigger the ratio, the more
        likely it's going to reject interpolation. Value needs
        to be between ``0`` and ``1``. Defaults to ``0.1``.

    Returns
    -------
    object
        Tensorfow variable that after evaluation is equal to
        the point ``x`` which is a minimizer for the cubic function.
    """

    if not 0 <= bound_size_ratio < 1:
        raise ValueError("The `bound_size_ratio` value should be a float "
                         "number between 0 and 1, got {}"
                         "".format(bound_size_ratio))

    bound_size_ratio = asfloat(bound_size_ratio)

    from_a2b_dist = x_b - x_a
    from_a2c_dist = x_c - x_a

    denominator = (
        (from_a2b_dist * from_a2c_dist) ** asfloat(2) *
        (from_a2b_dist - from_a2c_dist)
    )
    tau_ab = y_b - y_a - y_prime_a * from_a2b_dist
    tau_ac = y_c - y_a - y_prime_a * from_a2c_dist

    alpha = (
        from_a2c_dist ** asfloat(2) * tau_ab -
        from_a2b_dist ** asfloat(2) * tau_ac
    ) / denominator
    beta = (
        from_a2b_dist ** asfloat(3) * tau_ac -
        from_a2c_dist ** asfloat(3) * tau_ab
    ) / denominator
    radical = beta ** asfloat(2) - asfloat(3) * alpha * y_prime_a

    minimizer = x_a + (-beta + tf.sqrt(radical)) / (asfloat(3) * alpha)

    return tf.where(
        sequential_or(
            # Handle bad cases
            radical < 0,

            tf.equal(x_a, x_b),
            tf.equal(x_a, x_c),
            tf.equal(x_b, x_c),
            tf.equal(alpha, 0),

            tf.is_nan(minimizer),
            tf.greater(minimizer, x_b - bound_size_ratio * from_a2b_dist),
            tf.less(minimizer, x_a + bound_size_ratio * from_a2b_dist),
        ),
        quadratic_minimizer(x_a, y_a, y_prime_a, x_b, y_b),
        minimizer,
    )
示例#46
0
文件: wolfe.py 项目: degerli/neupy
def zoom(x_low, x_high, y_low, y_high, y_deriv_low,
         f, f_deriv, y0, y_deriv_0, c1, c2, maxiter=10):
    """
    Notes
    -----
    Part of the optimization algorithm in `scalar_search_wolfe2`.

    Parameters
    ----------
    x_low : float
        Step size

    x_high : float
        Step size

    y_low : float
        Value of f at x_low

    y_high : float
        Value of f at x_high

    y_deriv_low : float
        Value of derivative at x_low

    f : callable f(x)
        Generates computational graph

    f_deriv : callable f'(x)
        Generates computational graph

    y0 : float
        Value of f for ``x = 0``

    y_deriv_0 : float
        Value of the derivative for ``x = 0``

    c1 : float
        Parameter for Armijo condition rule.

    c2 : float
        Parameter for curvature condition rule.

    maxiter : int
        Maximum number of iterations. Defaults to ``10``.
    """

    def zoom_itertion_step(_, x_low, y_low, y_deriv_low, x_high, y_high,
                           x_recent, y_recent, x_star):

        x_new = cubic_minimizer(
            x_low, y_low, y_deriv_low,
            x_high, y_high,
            x_recent, y_recent)

        y_new = f(x_new)
        y_deriv_new = f_deriv(x_new)

        continue_searching_condition = sequential_or(
            y_new > (y0 + c1 * x_new * y_deriv_0),
            y_new >= y_low,
            tf.abs(y_deriv_new) > (-c2 * y_deriv_0),
        )

        condition1 = tf.logical_or(
            y_new > (y0 + c1 * x_new * y_deriv_0),
            y_new >= y_low
        )
        condition2 = y_deriv_new * (x_high - x_low) >= 0

        x_recent = tf.where(
            tf.logical_or(condition1, condition2), x_high, x_low)
        y_recent = tf.where(
            tf.logical_or(condition1, condition2), y_high, y_low)
        x_high = tf.where(
            condition1, x_new, tf.where(condition2, x_low, x_high))
        y_high = tf.where(
            condition1, y_new, tf.where(condition2, y_low, y_high))

        x_low = tf.where(condition1, x_low, x_new)
        y_low = tf.where(condition1, y_low, y_new)
        y_deriv_low = tf.where(condition1, y_deriv_low, y_deriv_new)

        x_star = x_new

        return [
            continue_searching_condition,
            x_low, y_low, y_deriv_low,
            x_high, y_high,
            y_recent, x_recent,
            x_star
        ]

    zero = tf.constant(asfloat(0))
    x_recent = zero
    y_recent = y0

    outs = tf.while_loop(
        cond=lambda condition, *args: condition,
        body=zoom_itertion_step,
        loop_vars=[
            True,
            x_low, y_low, y_deriv_low,
            x_high, y_high,
            x_recent, y_recent,
            zero,
        ],
        back_prop=False,
        maximum_iterations=maxiter,
    )
    return outs[-1]
示例#47
0
    def test_mae(self):
        predicted = asfloat(np.array([1, 2, 3]))
        target = asfloat(np.array([3, 2, 1]))

        actual = errors.mae(target, predicted)
        self.assertAlmostEqual(self.eval(actual), 4 / 3., places=3)
示例#48
0
文件: test_utils.py 项目: disc5/neupy
 def test_smallest_positive_number(self):
     epsilon = smallest_positive_number()
     self.assertNotEqual(0, asfloat(1) - (asfloat(1) - asfloat(epsilon)))
     self.assertEqual(0, asfloat(1) - (asfloat(1) - asfloat(epsilon / 10)))
示例#49
0
 def test_rmse(self):
     actual = np.array([0, 1, 2, 3])
     predicted = np.array([3, 2, 1, 0])
     self.assertEqual(asfloat(np.sqrt(5)),
                      estimators.rmse(actual, predicted))
示例#50
0
 def output(self, value):
     if not self.training_state:
         return 2 * asfloat(value < 0) - 1
     return value
示例#51
0
文件: wolfe.py 项目: degerli/neupy
def line_search(f, f_deriv, maxiter=20, c1=1e-4, c2=0.9):
    """
    Find ``x`` that satisfies strong Wolfe conditions.
    ``x > 0`` is assumed to be a descent direction.

    Parameters
    ----------
    f : callable f(x)
        Objective scalar function.

    f_deriv : callable f'(x)
        Objective function derivative.

    maxiter : int
        Maximum number of iterations. Defaults ``20``.

    c1 : float
        Parameter for Armijo condition rule. Defaults ``1e-4``.

    c2 : float
        Parameter for curvature condition rule. Defaults ``0.9``.

    Returns
    -------
    Variable
        Value ``x`` that satisfies strong Wolfe conditions and
        minimize function ``f``.

    Notes
    -----
    Uses the line search algorithm to enforce strong Wolfe
    conditions.  See Wright and Nocedal, 'Numerical Optimization',
    1999, pg. 59-60.
    For the zoom phase it uses an algorithm by [...].
    """

    if not 0 < c1 < 1:
        raise ValueError("c1 should be a float between 0 and 1")

    if not 0 < c2 < 1:
        raise ValueError("c2 should be a float between 0 and 1")

    if c2 < c1:
        raise ValueError("c2 needs to be greater than c1")

    if maxiter <= 0:
        raise ValueError("maxiter needs to be greater than 0")

    c1, c2 = asfloat(c1), asfloat(c2)

    def search_iteration_step(condition, x_previous, x_current, y_previous,
                              y_current, y_deriv_previous, iteration, x_star):

        y_deriv_current = f_deriv(x_current)
        x_new = x_current * asfloat(2)
        y_new = f(x_new)

        condition1 = tf.logical_or(
            y_current > (y0 + c1 * x_current * y_deriv_0),
            tf.logical_and(
                y_current >= y_previous,
                tf.not_equal(iteration, 1),
            )
        )
        condition2 = tf.abs(y_deriv_current) <= -c2 * y_deriv_0
        condition3 = y_deriv_current >= 0

        x_star = tf.where(
            condition1,
            zoom(
                x_previous, x_current, y_previous,
                y_current, y_deriv_previous,
                f, f_deriv, y0, y_deriv_0, c1, c2
            ),
            tf.where(
                condition2,
                x_current,
                tf.where(
                    condition3,
                    zoom(
                        x_current, x_previous, y_current,
                        y_previous, y_deriv_current,
                        f, f_deriv, y0, y_deriv_0, c1, c2
                    ),
                    x_new,
                ),
            ),
        )
        y_deriv_previous_new = tf.where(
            condition1,
            y_deriv_previous,
            y_deriv_current
        )

        is_any_condition_satisfied = sequential_or(
            condition1, condition2, condition3)

        y_current_new = tf.where(
            is_any_condition_satisfied,
            y_current,
            y_new
        )
        continue_searching_condition = tf.logical_and(
            tf.not_equal(x_new, 0),
            tf.logical_not(is_any_condition_satisfied),
        )

        return [
            continue_searching_condition,
            x_current, x_new, y_current, y_current_new,
            y_deriv_previous_new, iteration + 1, x_star
        ]

    one = tf.constant(asfloat(1))
    zero = tf.constant(asfloat(0))

    x0, x1 = zero, one
    y0, y1 = f(x0), f(x1)
    y_deriv_0 = f_deriv(x0)

    outs = tf.while_loop(
        cond=lambda condition, *args: condition,
        body=search_iteration_step,
        loop_vars=[True, x0, x1, y0, y1, y_deriv_0, 1, zero],
        back_prop=False,
        maximum_iterations=maxiter,
    )
    return outs[-1]
示例#52
0
    def init_methods(self):
        def free_energy(visible_sample):
            with tf.name_scope('free-energy'):
                wx = tf.matmul(visible_sample, self.weight)
                wx_b = wx + self.hidden_bias

                visible_bias_term = dot(visible_sample, self.visible_bias)

                # We can get infinity when wx_b is a relatively large number
                # (maybe 100). Taking exponent makes it even larger and
                # for with float32 it can convert it to infinity. But because
                # number is so large we don't care about +1 value before taking
                # logarithms and therefore we can just pick value as it is
                # since our operation won't change anything.
                hidden_terms = tf.where(
                    # exp(30) is such a big number that +1 won't
                    # make any difference in the outcome.
                    tf.greater(wx_b, 30),
                    wx_b,
                    tf.log1p(tf.exp(wx_b)),
                )

                hidden_term = tf.reduce_sum(hidden_terms, axis=1)
                return -(visible_bias_term + hidden_term)

        def visible_to_hidden(visible_sample):
            with tf.name_scope('visible-to-hidden'):
                wx = tf.matmul(visible_sample, self.weight)
                wx_b = wx + self.hidden_bias
                return tf.nn.sigmoid(wx_b)

        def hidden_to_visible(hidden_sample):
            with tf.name_scope('hidden-to-visible'):
                wx = tf.matmul(hidden_sample, self.weight, transpose_b=True)
                wx_b = wx + self.visible_bias
                return tf.nn.sigmoid(wx_b)

        def sample_hidden_from_visible(visible_sample):
            with tf.name_scope('sample-hidden-to-visible'):
                hidden_prob = visible_to_hidden(visible_sample)
                hidden_sample = random_binomial(hidden_prob)
                return hidden_sample

        def sample_visible_from_hidden(hidden_sample):
            with tf.name_scope('sample-visible-to-hidden'):
                visible_prob = hidden_to_visible(hidden_sample)
                visible_sample = random_binomial(visible_prob)
                return visible_sample

        network_input = self.variables.network_input
        network_hidden_input = self.variables.network_hidden_input
        input_shape = tf.shape(network_input)
        n_samples = input_shape[0]

        weight = self.weight
        h_bias = self.hidden_bias
        v_bias = self.visible_bias
        h_samples = self.variables.h_samples
        step = asfloat(self.step)

        with tf.name_scope('positive-values'):
            # We have to use `cond` instead of `where`, because
            # different if-else cases might have different shapes
            # and it triggers exception in tensorflow.
            v_pos = tf.cond(
                tf.equal(n_samples, self.batch_size), lambda: network_input,
                lambda: random_sample(network_input, self.batch_size))
            h_pos = visible_to_hidden(v_pos)

        with tf.name_scope('negative-values'):
            v_neg = sample_visible_from_hidden(h_samples)
            h_neg = visible_to_hidden(v_neg)

        with tf.name_scope('weight-update'):
            weight_update = (
                tf.matmul(v_pos, h_pos, transpose_a=True) -
                tf.matmul(v_neg, h_neg, transpose_a=True)) / asfloat(n_samples)

        with tf.name_scope('hidden-bias-update'):
            h_bias_update = tf.reduce_mean(h_pos - h_neg, axis=0)

        with tf.name_scope('visible-bias-update'):
            v_bias_update = tf.reduce_mean(v_pos - v_neg, axis=0)

        with tf.name_scope('flipped-input-features'):
            # Each row will have random feature marked with number 1
            # Other values will be equal to 0
            possible_feature_corruptions = tf.eye(self.n_visible)
            corrupted_features = random_sample(possible_feature_corruptions,
                                               n_samples)

            rounded_input = tf.round(network_input)
            # If we scale input values from [0, 1] range to [-1, 1]
            # than it will be easier to flip feature values with simple
            # multiplication.
            scaled_rounded_input = 2 * rounded_input - 1
            scaled_flipped_rounded_input = (
                # for corrupted_features we convert 0 to 1 and 1 to -1
                # in this way after multiplication we will flip all
                # signs where -1 in the transformed corrupted_features
                (-2 * corrupted_features + 1) * scaled_rounded_input)
            # Scale it back to the [0, 1] range
            flipped_rounded_input = (scaled_flipped_rounded_input + 1) / 2

        with tf.name_scope('pseudo-likelihood-loss'):
            # Stochastic pseudo-likelihood
            error = tf.reduce_mean(self.n_visible * tf.log_sigmoid(
                free_energy(flipped_rounded_input) -
                free_energy(rounded_input)))

        with tf.name_scope('gibbs-sampling'):
            gibbs_sampling = sample_visible_from_hidden(
                sample_hidden_from_visible(network_input))

        initialize_uninitialized_variables()
        self.methods.update(train_epoch=function(
            [network_input],
            error,
            name='rbm/train-epoch',
            updates=[
                (weight, weight + step * weight_update),
                (h_bias, h_bias + step * h_bias_update),
                (v_bias, v_bias + step * v_bias_update),
                (h_samples, random_binomial(p=h_neg)),
            ]),
                            prediction_error=function(
                                [network_input],
                                error,
                                name='rbm/prediction-error',
                            ),
                            diff1=function(
                                [network_input],
                                free_energy(flipped_rounded_input),
                                name='rbm/diff1-error',
                            ),
                            diff2=function(
                                [network_input],
                                free_energy(rounded_input),
                                name='rbm/diff2-error',
                            ),
                            visible_to_hidden=function(
                                [network_input],
                                visible_to_hidden(network_input),
                                name='rbm/visible-to-hidden',
                            ),
                            hidden_to_visible=function(
                                [network_hidden_input],
                                hidden_to_visible(network_hidden_input),
                                name='rbm/hidden-to-visible',
                            ),
                            gibbs_sampling=function(
                                [network_input],
                                gibbs_sampling,
                                name='rbm/gibbs-sampling',
                            ))
示例#53
0
文件: wolfe.py 项目: degerli/neupy
    def search_iteration_step(condition, x_previous, x_current, y_previous,
                              y_current, y_deriv_previous, iteration, x_star):

        y_deriv_current = f_deriv(x_current)
        x_new = x_current * asfloat(2)
        y_new = f(x_new)

        condition1 = tf.logical_or(
            y_current > (y0 + c1 * x_current * y_deriv_0),
            tf.logical_and(
                y_current >= y_previous,
                tf.not_equal(iteration, 1),
            )
        )
        condition2 = tf.abs(y_deriv_current) <= -c2 * y_deriv_0
        condition3 = y_deriv_current >= 0

        x_star = tf.where(
            condition1,
            zoom(
                x_previous, x_current, y_previous,
                y_current, y_deriv_previous,
                f, f_deriv, y0, y_deriv_0, c1, c2
            ),
            tf.where(
                condition2,
                x_current,
                tf.where(
                    condition3,
                    zoom(
                        x_current, x_previous, y_current,
                        y_previous, y_deriv_current,
                        f, f_deriv, y0, y_deriv_0, c1, c2
                    ),
                    x_new,
                ),
            ),
        )
        y_deriv_previous_new = tf.where(
            condition1,
            y_deriv_previous,
            y_deriv_current
        )

        is_any_condition_satisfied = sequential_or(
            condition1, condition2, condition3)

        y_current_new = tf.where(
            is_any_condition_satisfied,
            y_current,
            y_new
        )
        continue_searching_condition = tf.logical_and(
            tf.not_equal(x_new, 0),
            tf.logical_not(is_any_condition_satisfied),
        )

        return [
            continue_searching_condition,
            x_current, x_new, y_current, y_current_new,
            y_deriv_previous_new, iteration + 1, x_star
        ]
    def test_mixture_of_experts(self):
        dataset = datasets.load_diabetes()
        data, target = asfloat(dataset.data), asfloat(dataset.target)
        insize, outsize = data.shape[1], 1

        input_scaler = preprocessing.MinMaxScaler((-1 ,1))
        output_scaler = preprocessing.MinMaxScaler()
        x_train, x_test, y_train, y_test = cross_validation.train_test_split(
            input_scaler.fit_transform(data),
            output_scaler.fit_transform(target.reshape(-1, 1)),
            train_size=0.8
        )

        n_epochs = 10
        scaled_y_test = output_scaler.inverse_transform(y_test)
        scaled_y_test = scaled_y_test.reshape((y_test.size, 1))

        # -------------- Train single GradientDescent -------------- #

        bpnet = algorithms.GradientDescent(
            (insize, 20, outsize),
            step=0.1,
            verbose=False
        )
        bpnet.train(x_train, y_train, epochs=n_epochs)
        network_output = bpnet.predict(x_test)
        network_error = rmsle(output_scaler.inverse_transform(network_output),
                              scaled_y_test)

        # -------------- Train ensemlbe -------------- #

        moe = algorithms.MixtureOfExperts(
            networks=[
                algorithms.Momentum(
                    (insize, 20, outsize),
                    step=0.1,
                    batch_size=1,
                    verbose=False
                ),
                algorithms.Momentum(
                    (insize, 20, outsize),
                    step=0.1,
                    batch_size=1,
                    verbose=False
                ),
            ],
            gating_network=algorithms.Momentum(
                layers.Input(insize) > layers.Softmax(2),
                step=0.1,
                verbose=False
            )
        )
        moe.train(x_train, y_train, epochs=n_epochs)
        ensemble_output = moe.predict(x_test)

        ensemlbe_error = rmsle(
            output_scaler.inverse_transform(ensemble_output),
            scaled_y_test
        )

        self.assertGreater(network_error, ensemlbe_error)
示例#55
0
    def init_methods(self):
        def free_energy(visible_sample):
            wx_b = T.dot(visible_sample, self.weight) + self.hidden_bias
            visible_bias_term = T.dot(visible_sample, self.visible_bias)
            hidden_term = T.log(asfloat(1) + T.exp(wx_b)).sum(axis=1)
            return -visible_bias_term - hidden_term

        def visible_to_hidden(visible_sample):
            wx_b = T.dot(visible_sample, self.weight) + self.hidden_bias
            return T.nnet.sigmoid(wx_b)

        def hidden_to_visible(hidden_sample):
            wx_b = T.dot(hidden_sample, self.weight.T) + self.visible_bias
            return T.nnet.sigmoid(wx_b)

        def sample_hidden_from_visible(visible_sample):
            theano_random = self.theano_random
            hidden_prob = visible_to_hidden(visible_sample)
            hidden_sample = theano_random.binomial(n=1,
                                                   p=hidden_prob,
                                                   dtype=theano.config.floatX)
            return hidden_sample

        def sample_visible_from_hidden(hidden_sample):
            theano_random = self.theano_random
            visible_prob = hidden_to_visible(hidden_sample)
            visible_sample = theano_random.binomial(n=1,
                                                    p=visible_prob,
                                                    dtype=theano.config.floatX)
            return visible_sample

        network_input = self.variables.network_input
        n_samples = asfloat(network_input.shape[0])
        theano_random = self.theano_random

        weight = self.weight
        h_bias = self.hidden_bias
        v_bias = self.visible_bias
        h_samples = self.variables.h_samples
        step = asfloat(self.step)

        sample_indeces = theano_random.random_integers(
            low=0, high=n_samples - 1, size=(self.batch_size, ))
        v_pos = ifelse(
            T.eq(n_samples, self.batch_size),
            network_input,
            # In case if final batch has less number of
            # samples then expected
            network_input[sample_indeces])
        h_pos = visible_to_hidden(v_pos)

        v_neg = sample_visible_from_hidden(h_samples)
        h_neg = visible_to_hidden(v_neg)

        weight_update = v_pos.T.dot(h_pos) - v_neg.T.dot(h_neg)
        h_bias_update = (h_pos - h_neg).mean(axis=0)
        v_bias_update = (v_pos - v_neg).mean(axis=0)

        # Stochastic pseudo-likelihood
        feature_index_to_flip = theano_random.random_integers(
            low=0,
            high=self.n_visible - 1,
        )
        rounded_input = T.round(network_input)
        rounded_input = network_input
        rounded_input_flip = T.set_subtensor(
            rounded_input[:, feature_index_to_flip],
            1 - rounded_input[:, feature_index_to_flip])
        error = T.mean(self.n_visible * T.log(
            T.nnet.sigmoid(
                free_energy(rounded_input_flip) - free_energy(rounded_input))))

        self.methods.update(train_epoch=theano.function(
            [network_input],
            error,
            name='algo:rbm/func:train-epoch',
            updates=[
                (weight, weight + step * weight_update / n_samples),
                (h_bias, h_bias + step * h_bias_update),
                (v_bias, v_bias + step * v_bias_update),
                (h_samples, asint(theano_random.binomial(n=1, p=h_neg))),
            ]),
                            prediction_error=theano.function(
                                [network_input],
                                error,
                                name='algo:rbm/func:prediction-error',
                            ),
                            visible_to_hidden=theano.function(
                                [network_input],
                                visible_to_hidden(network_input),
                                name='algo:rbm/func:visible-to-hidden',
                            ),
                            hidden_to_visible=theano.function(
                                [network_input],
                                hidden_to_visible(network_input),
                                name='algo:rbm/func:hidden-to-visible',
                            ),
                            gibbs_sampling=theano.function(
                                [network_input],
                                sample_visible_from_hidden(
                                    sample_hidden_from_visible(network_input)),
                                name='algo:rbm/func:gibbs-sampling',
                            ))
示例#56
0
"""
Main source code from Pylearn2 library:
https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/\
optimization/linesearch.py
"""

import theano
import theano.tensor as T
from theano.ifelse import ifelse

from neupy.utils import asfloat

one = T.constant(asfloat(1))
zero = T.constant(asfloat(0))

theano_true = T.constant(1)
theano_false = T.constant(0)


def sequential_or(*conditions):
    """
    Use ``or`` operator between all conditions. Function is just
    a syntax sugar that make long Theano logical conditions looks
    less ugly.

    Parameters
    ----------
    *conditions
        Conditions that returns ``True`` or ``False``
    """
    first_condition, other_conditions = conditions[0], conditions[1:]
示例#57
0
def random_weight(shape):
    initializer = init.Normal()
    weight = initializer.sample(shape)
    return tf.Variable(asfloat(weight), dtype=tf.float32)
示例#58
0
def line_search(f, f_deriv, maxiter=20, c1=1e-4, c2=0.9):
    """
    Find ``x`` that satisfies strong Wolfe conditions.
    ``x > 0`` is assumed to be a descent direction.

    Parameters
    ----------
    f : callable f(x)
        Objective scalar function.
    f_deriv : callable f'(x)
        Objective function derivative (can be None)
    maxiter : int
        Maximum number of iterations.
    c1 : float
        Parameter for Armijo condition rule.
    c2 : float
        Parameter for curvature condition rule.

    Returns
    -------
    Theano object
        Value ``x`` that satisfies strong Wolfe conditions and
        minimize function ``f``.

    Notes
    -----
    Uses the line search algorithm to enforce strong Wolfe
    conditions.  See Wright and Nocedal, 'Numerical Optimization',
    1999, pg. 59-60.
    For the zoom phase it uses an algorithm by [...].
    """

    if not 0 < c1 < 1:
        raise ValueError("c1 should be a float between 0 and 1")

    if not 0 < c2 < 1:
        raise ValueError("c2 should be a float between 0 and 1")

    if c2 < c1:
        raise ValueError("c2 needs to be greater than c1")

    if maxiter <= 0:
        raise ValueError("maxiter needs to be greater than 0")

    c1, c2 = asfloat(c1), asfloat(c2)

    def search_iteration_step(x_previous, x_current, y_previous, y_current,
                              y_deriv_previous, is_first_iteration, x_star):

        y_deriv_current = f_deriv(x_current)

        x_new = x_current * asfloat(2)
        y_new = f(x_new)

        condition1 = T.or_(
            y_current > (y0 + c1 * x_current * y_deriv_0),
            T.and_(y_current >= y_previous, T.bitwise_not(is_first_iteration)))
        condition2 = T.abs_(y_deriv_current) <= -c2 * y_deriv_0
        condition3 = y_deriv_current >= zero

        x_star = ifelse(
            condition1,
            zoom(x_previous, x_current, y_previous, y_current,
                 y_deriv_previous, f, f_deriv, y0, y_deriv_0, c1, c2),
            ifelse(
                condition2,
                x_current,
                ifelse(
                    condition3,
                    zoom(x_current, x_previous, y_current, y_previous,
                         y_deriv_current, f, f_deriv, y0, y_deriv_0, c1, c2),
                    x_new,
                ),
            ),
        )
        y_deriv_previous_new = ifelse(condition1, y_deriv_previous,
                                      y_deriv_current)

        is_any_condition_satisfied = sequential_or(condition1, condition2,
                                                   condition3)
        y_current_new = ifelse(is_any_condition_satisfied, y_current, y_new)
        return ([
            x_current, x_new, y_current, y_current_new, y_deriv_previous_new,
            theano_false, x_star
        ],
                theano.scan_module.scan_utils.until(
                    sequential_or(
                        T.eq(x_new, zero),
                        is_any_condition_satisfied,
                    )))

    x0, x1 = zero, one
    y0, y1 = f(x0), f(x1)
    y_deriv_0 = f_deriv(x0)

    c1 = T.as_tensor_variable(c1)
    c2 = T.as_tensor_variable(c2)

    outs, _ = theano.scan(
        search_iteration_step,
        outputs_info=[x0, x1, y0, y1, y_deriv_0, theano_true, zero],
        n_steps=maxiter)
    x_star = outs[-1][-1]

    return x_star
示例#59
0
文件: leak_step.py 项目: disc5/neupy
 def init_variables(self):
     super(LeakStepAdaptation, self).init_variables()
     n_parameters = count_parameters(self)
     self.variables.leak_average = theano.shared(value=asfloat(
         np.zeros(n_parameters)),
                                                 name='leak_average')
示例#60
0
import copy
from functools import partial

import numpy as np

from neupy import algorithms, init, layers
from neupy.layers import Input, Sigmoid
from neupy.utils import asfloat

from helpers import compare_networks
from base import BaseTestCase

simple_x_train = asfloat(
    np.array([
        [0.1, 0.1, 0.2],
        [0.2, 0.3, 0.4],
        [0.1, 0.7, 0.2],
    ]))
simple_y_train = asfloat(np.array([
    [0.2, 0.2],
    [0.3, 0.3],
    [0.5, 0.5],
]))


class RPROPTestCase(BaseTestCase):
    def setUp(self):
        super(RPROPTestCase, self).setUp()
        self.network = Input(3) > Sigmoid(10) > Sigmoid(2)

    def test_rprop(self):