Пример #1
0
class GRNN(LazyLearning, BaseNetwork):
    """ Generalized Regression Neural Network.

    Parameters
    ----------
    std : float
        standard deviation for PDF function, default to 0.1.
    {Verbose.verbose}

    Methods
    -------
    {LazyLearning.train}
    {BaseSkeleton.predict}
    {BaseSkeleton.fit}

    Examples
    --------
    >>> from sklearn import datasets
    >>> from sklearn.cross_validation import train_test_split
    >>> from neupy import algorithms, estimators, environment
    >>>
    >>> environment.reproducible()
    >>>
    >>> dataset = datasets.load_diabetes()
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     dataset.data, dataset.target, train_size=0.7,
    ...     random_state=0
    ... )
    >>>
    >>> nw = algorithms.GRNN(std=0.1, verbose=False)
    >>> nw.train(x_train, y_train)
    >>> result = nw.predict(x_test)
    >>> estimators.rmsle(result, y_test)
    0.4245120142774001
    """
    std = BoundedProperty(default=0.1, minval=0)

    def train(self, input_train, target_train, copy=True):
        input_train = format_data(input_train, copy=copy)
        target_train = format_data(target_train, copy=copy)

        if target_train.shape[1] != 1:
            raise ValueError("Target value must be one dimensional array")

        LazyLearning.train(self, input_train, target_train)

    def predict(self, input_data):
        super(GRNN, self).predict(input_data)

        input_data = format_data(input_data)

        input_data_size = input_data.shape[1]
        train_data_size = self.input_train.shape[1]

        if input_data_size != train_data_size:
            raise ValueError("Input data must contains {0} features, got "
                             "{1}".format(train_data_size, input_data_size))

        ratios = pdf_between_data(self.input_train, input_data, self.std)
        return (dot(self.target_train.T, ratios) / ratios.sum(axis=0)).T
Пример #2
0
class Hessian(NoStepSelection, GradientDescent):
    """
    Hessian gradient decent optimization. This GD algorithm
    variation using second derivative information helps choose better
    gradient direction and as a consequence better weight update
    parameter after eqch epoch.

    Parameters
    ----------
    penalty_const : float
        Inverse hessian could be singular matrix. For this reason
        algorithm include penalty that add to hessian matrix identity
        multiplied by defined constant. Defaults to ``1``.
    {GradientDescent.Parameters}

    Attributes
    ----------
    {GradientDescent.Attributes}

    Methods
    -------
    {GradientDescent.Methods}

    See Also
    --------
    :network:`HessianDiagonal` : Hessian diagonal approximation.
    """
    penalty_const = BoundedProperty(default=1, minval=0)

    def init_variables(self):
        super(Hessian, self).init_variables()
        n_parameters = count_parameters(self)
        self.variables.hessian = theano.shared(value=asfloat(
            np.zeros((n_parameters, n_parameters))),
                                               name='hessian_inverse')

    def init_train_updates(self):
        n_parameters = count_parameters(self)
        parameters = list(iter_parameters(self))
        param_vector = parameters2vector(self)
        penalty_const = asfloat(self.penalty_const)

        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters)
        hessian_inverse = T.nlinalg.matrix_inverse(hessian_matrix +
                                                   penalty_const *
                                                   T.eye(n_parameters))

        # removed adding an identity matrix to
        #        hessian_inverse = T.nlinalg.matrix_inverse(hessian_matrix)
        updated_parameters = param_vector - hessian_inverse.dot(full_gradient)
        updates = setup_parameter_updates(parameters, updated_parameters)
        updates.append((self.variables.hessian, hessian_matrix))

        return updates
Пример #3
0
class Hessian(NoStepSelection, GradientDescent):
    """ Hessian gradient decent optimization. This GD algorithm
    variation using second derivative information helps choose better
    gradient direction and as a consequence better weight update
    parameter after eqch epoch.

    Parameters
    ----------
    penalty_const : float
        Inverse hessian could be singular matrix. For this reason
        algorithm include penalty that add to hessian matrix identity
        multiplied by defined constant. Defaults to ``1``.
    {GradientDescent.addons}
    {ConstructableNetwork.connection}
    {ConstructableNetwork.error}
    {BaseNetwork.show_epoch}
    {BaseNetwork.shuffle_data}
    {BaseNetwork.epoch_end_signal}
    {BaseNetwork.train_end_signal}
    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}
    {SupervisedLearning.train}
    {BaseSkeleton.fit}
    {BaseNetwork.plot_errors}

    See Also
    --------
    :network:`HessianDiagonal` : Hessian diagonal approximation.
    """
    penalty_const = BoundedProperty(default=1, minval=0)

    def init_train_updates(self):
        n_parameters = count_parameters(self)
        parameters = list(iter_parameters(self))
        param_vector = parameters2vector(self)
        penalty_const = asfloat(self.penalty_const)

        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters)
        hessian_inverse = T.nlinalg.matrix_inverse(hessian_matrix +
                                                   penalty_const *
                                                   T.eye(n_parameters))

        updated_parameters = param_vector - hessian_inverse.dot(full_gradient)
        updates = setup_parameter_updates(parameters, updated_parameters)

        return updates
Пример #4
0
class WeightDecay(WeightUpdateConfigurable):
    """
    Weight decay algorithm penalizes large weights. Also known as
    L2-regularization.

    Parameters
    ----------
    decay_rate : float
        Controls training penalties during the parameter updates.
        The larger the value the stronger effect regularization
        has during the training. Defaults to ``0.1``.

    Warns
    -----
    {WeightUpdateConfigurable.Warns}

    Examples
    --------
    >>> from neupy import algorithms
    >>> bpnet = algorithms.GradientDescent(
    ...     (2, 4, 1),
    ...     step=0.1,
    ...     decay_rate=0.1,
    ...     addons=[algorithms.WeightDecay]
    ... )

    See Also
    --------
    :network:`WeightElimination`
    """
    decay_rate = BoundedProperty(default=0.1, minval=0)

    def init_train_updates(self):
        original_updates = super(WeightDecay, self).init_train_updates()
        parameters = [param for _, _, param in iter_parameters(self.layers)]
        modified_updates = []

        step = self.variables.step
        decay_rate = asfloat(self.decay_rate)

        for parameter, updated in original_updates:
            if parameter in parameters:
                updated -= step * decay_rate * parameter
            modified_updates.append((parameter, updated))

        return modified_updates
Пример #5
0
class WeightDecay(WeightUpdateConfigurable):
    """
    Weight decay algorithm penalizes large weights and
    limits the freedom in network. The algorithm is able
    to solve one of the possible problems of network's
    overfitting.

    Parameters
    ----------
    decay_rate : float
        Controls the effect of penalties on the update
        network weights. Defaults to ``0.1``.

    Warns
    -----
    {WeightUpdateConfigurable.Warns}

    Examples
    --------
    >>> from neupy import algorithms
    >>> bpnet = algorithms.GradientDescent(
    ...     (2, 4, 1),
    ...     step=0.1,
    ...     decay_rate=0.1,
    ...     addons=[algorithms.WeightDecay]
    ... )

    See Also
    --------
    :network:`WeightElimination`
    """
    decay_rate = BoundedProperty(default=0.1, minval=0)

    def init_param_updates(self, layer, parameter):
        updates = super(WeightDecay, self).init_param_updates(
            layer, parameter
        )
        step = self.variables.step
        updates_mapper = dict(updates)
        updates_mapper[parameter] -= step * self.decay_rate * parameter
        return list(updates_mapper.items())
Пример #6
0
class Quickprop(GradientDescent):
    """
    Quickprop :network:`GradientDescent` algorithm optimization.

    Parameters
    ----------
    upper_bound : float
        Maximum possible value for weight update.
        Defaults to ``1``.

    {GradientDescent.Parameters}

    Attributes
    ----------
    {GradientDescent.Attributes}

    Methods
    -------
    {GradientDescent.Methods}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> qpnet = algorithms.Quickprop((2, 3, 1))
    >>> qpnet.train(x_train, y_train)

    See Also
    --------
    :network:`GradientDescent` : GradientDescent algorithm.
    """
    upper_bound = BoundedProperty(default=1, minval=0)

    def init_param_updates(self, layer, parameter):
        step = self.variables.step

        parameter_shape = T.shape(parameter).eval()
        prev_delta = theano.shared(
            name="{}/prev-delta".format(parameter.name),
            value=asfloat(np.zeros(parameter_shape)),
        )
        prev_gradient = theano.shared(
            name="{}/prev-grad".format(parameter.name),
            value=asfloat(np.zeros(parameter_shape)),
        )

        gradient = T.grad(self.variables.error_func, wrt=parameter)
        grad_delta = T.abs_(prev_gradient - gradient)

        parameter_delta = ifelse(
            T.eq(self.variables.epoch, 1),
            gradient,
            T.clip(
                T.abs_(prev_delta) * gradient / grad_delta,
                -self.upper_bound,
                self.upper_bound
            )
        )
        return [
            (parameter, parameter - step * parameter_delta),
            (prev_gradient, gradient),
            (prev_delta, parameter_delta),
        ]
Пример #7
0
class LeakStepAdaptation(SingleStepConfigurable):
    """
    Leak Learning Rate Adaptation algorithm is a step
    adaptation procedure in backpropagation algortihm.

    Parameters
    ----------
    leak_size : float
        Defaults to ``0.01``. This variable identified
        proportion, so it's always between 0 and 1.
        Typically this value is small.

    alpha : float
        The ``alpha`` is control total step update ratio.
        Defaults to ``0.001``. Typically this value is small.

    beta : float
        This similar to ``alpha``, but it control ration
        only for update matrix norms. Defaults to ``20``.
        Typically this value is bigger than ``1``.

    Warns
    -----
    {SingleStepConfigurable.Warns}

    Examples
    --------
    >>> from neupy import algorithms
    >>> bpnet = algorithms.GradientDescent(
    ...     (2, 4, 1),
    ...     addons=[algorithms.LeakStepAdaptation]
    ... )

    References
    ----------
    [1] Noboru M. "Adaptive on-line learning in changing
        environments", 1997

    [2] LeCun, "Efficient BackProp", 1998
    """
    leak_size = ProperFractionProperty(default=0.01)
    alpha = BoundedProperty(default=0.001, minval=0)
    beta = BoundedProperty(default=20, minval=0)

    def init_variables(self):
        super(LeakStepAdaptation, self).init_variables()

        n_parameters = count_parameters(self.connection)
        self.variables.leak_average = tf.Variable(
            tf.zeros(n_parameters),
            name="leak-step-adapt/leak-average",
            dtype=tf.float32,
        )

    def init_train_updates(self):
        updates = super(LeakStepAdaptation, self).init_train_updates()

        alpha = asfloat(self.alpha)
        beta = asfloat(self.beta)
        leak_size = asfloat(self.leak_size)

        step = self.variables.step
        leak_average = self.variables.leak_average

        parameters = parameter_values(self.connection)
        gradients = tf.gradients(self.variables.error_func, parameters)
        full_gradient = tf.concat([flatten(grad) for grad in gradients],
                                  axis=0)

        leak_avarage_update = ((1 - leak_size) * leak_average +
                               leak_size * full_gradient)
        new_step = step + alpha * step * (beta * tf.norm(leak_avarage_update) -
                                          step)

        updates.extend([
            (leak_average, leak_avarage_update),
            (step, new_step),
        ])

        return updates
Пример #8
0
class LinearSearch(SingleStepConfigurable):
    """ Linear search for the step selection. Basicly this algorithms
    try different steps and compute your predicted error, after few
    iteration it will chose one which was better.

    Parameters
    ----------
    tol : float
        Tolerance for termination, default to ``0.1``. Can be any number
        greater that zero.
    search_method : 'gloden', 'brent'
        Linear search method. Can be ``golden`` for golden search or ``brent``
        for Brent's search, default to ``golden``.

    Warns
    -----
    {SingleStepConfigurable.Warns}

    Examples
    --------
    >>> from sklearn import datasets, preprocessing
    >>> from sklearn.cross_validation import train_test_split
    >>> from neupy import algorithms, layers, estimators, environment
    >>>
    >>> environment.reproducible()
    >>>
    >>> dataset = datasets.load_boston()
    >>> data, target = dataset.data, dataset.target
    >>>
    >>> data_scaler = preprocessing.MinMaxScaler()
    >>> target_scaler = preprocessing.MinMaxScaler()
    >>>
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     data_scaler.fit_transform(data),
    ...     target_scaler.fit_transform(target),
    ...     train_size=0.85
    ... )
    >>>
    >>> cgnet = algorithms.ConjugateGradient(
    ...     connection=[
    ...         layers.Input(13),
    ...         layers.Sigmoid(50),
    ...         layers.Sigmoid(1),
    ...     ],
    ...     search_method='golden',
    ...     addons=[algorithms.LinearSearch],
    ...     verbose=False
    ... )
    >>>
    >>> cgnet.train(x_train, y_train, epochs=100)
    >>> y_predict = cgnet.predict(x_test).round(1)
    >>>
    >>> real = target_scaler.inverse_transform(y_test)
    >>> predicted = target_scaler.inverse_transform(y_predict)
    >>>
    >>> error = estimators.rmsle(real, predicted)
    >>> error
    0.20752676697596578

    See Also
    --------
    :network:`ConjugateGradient`
    """

    tol = BoundedProperty(default=0.1, minval=0)
    maxiter = BoundedProperty(default=10, minval=1)
    search_method = ChoiceProperty(choices=['golden', 'brent'],
                                   default='golden')

    def train_epoch(self, input_train, target_train):
        train_epoch = self.methods.train_epoch
        prediction_error = self.methods.prediction_error

        params = [param for param, _ in self.init_train_updates()]
        param_defaults = [param.get_value() for param in params]

        def setup_new_step(new_step):
            for param_default, param in zip(param_defaults, params):
                param.set_value(param_default)

            self.variables.step.set_value(asfloat(new_step))
            train_epoch(input_train, target_train)
            # Train epoch returns neural network error that was before
            # training epoch step, that's why we need to compute
            # it second time.
            error = prediction_error(input_train, target_train)

            return np.where(np.isnan(error), np.inf, error)

        options = {'xtol': self.tol}
        if self.search_method == 'brent':
            options['maxiter'] = self.maxiter

        res = minimize_scalar(
            setup_new_step,
            tol=self.tol,
            method=self.search_method,
            options=options,
        )

        return setup_new_step(res.x)
Пример #9
0
class GRNN(BaseSkeleton):
    """
    Generalized Regression Neural Network (GRNN). Network applies
    only to the regression problems.

    Parameters
    ----------
    std : float
        Standard deviation for PDF function.
        If your input features have high values than standard
        deviation should also be high. For instance, if input features
        from range ``[0, 20]`` that standard deviation should be
        also a big value like ``10`` or ``15``. Small values will
        lead to bad prediction.

    {Verbose.verbose}

    Notes
    -----
    - GRNN Network is sensitive for cases when one input feature
      has higher values than the other one. Input data has to be
      normalized before training.

    - Standard deviation has to match the range of the input features
      Check ``std`` parameter description for more information.

    - The bigger training dataset the slower prediction.
      Algorithm is much more efficient for small datasets.

    - Network uses lazy learning which mean that network doesn't
      need iterative training. It just stores parameters
      and use them to make a predictions.

    Methods
    -------
    train(X_train, y_train, copy=True)
        Network just stores all the information about the data and use
        it for the prediction. Parameter ``copy`` copies input data
        before saving it inside the network.

    predict(X)
        Return prediction per each sample in the ``X``.

    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn import datasets, preprocessing
    >>> from sklearn.model_selection import train_test_split
    >>> from neupy import algorithms
    >>>
    >>> dataset = datasets.load_diabetes()
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     preprocessing.minmax_scale(dataset.data),
    ...     preprocessing.minmax_scale(dataset.target.reshape(-1, 1)),
    ...     test_size=0.3,
    ... )
    >>>
    >>> nw = algorithms.GRNN(std=0.1, verbose=False)
    >>> nw.train(x_train, y_train)
    >>>
    >>> y_predicted = nw.predict(x_test)
    >>> mse = np.mean((y_predicted - y_test) ** 2)
    >>> mse
    0.05280970704568171
    """
    std = BoundedProperty(minval=0)

    def __init__(self, std, verbose=False):
        self.std = std
        self.X_train = None
        self.y_train = None
        super(GRNN, self).__init__(verbose=verbose)

    def train(self, X_train, y_train, copy=True):
        """
        Trains network. PNN doesn't actually train, it just stores
        input data and use it for prediction.

        Parameters
        ----------
        X_train : array-like (n_samples, n_features)

        y_train : array-like (n_samples,)
            Target variable should be vector or matrix
            with one feature column.

        copy : bool
            If value equal to ``True`` than input matrices will
            be copied. Defaults to ``True``.

        Raises
        ------
        ValueError
            In case if something is wrong with input data.
        """
        X_train = format_data(X_train, copy=copy)
        y_train = format_data(y_train, copy=copy)

        if y_train.shape[1] != 1:
            raise ValueError("Target value must be one dimensional array")

        self.X_train = X_train
        self.y_train = y_train

        if X_train.shape[0] != y_train.shape[0]:
            raise ValueError("Number of samples in the input and target "
                             "datasets are different")

    def predict(self, X):
        """
        Make a prediction from the input data.

        Parameters
        ----------
        X : array-like (n_samples, n_features)

        Raises
        ------
        ValueError
            In case if something is wrong with input data.

        Returns
        -------
        array-like (n_samples,)
        """
        if self.X_train is None:
            raise NotTrained(
                "Cannot make a prediction. Network hasn't been trained yet")

        X = format_data(X)

        if X.shape[1] != self.X_train.shape[1]:
            raise ValueError("Input data must contain {0} features, got {1}"
                             "".format(self.X_train.shape[1], X.shape[1]))

        ratios = pdf_between_data(self.X_train, X, self.std)
        return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
Пример #10
0
class LevenbergMarquardt(NoStepSelection, GradientDescent):
    """ Levenberg-Marquardt algorithm.

    Notes
    -----
    * Network minimizes only Mean Squared Error function.

    Parameters
    ----------
    mu : float
        Control invertion for J.T * J matrix, defaults to `0.1`.
    mu_update_factor : float
        Factor to decrease the mu if update decrese the error, otherwise
        increse mu by the same factor.
    error: {{'mse'}}
        Levenberg-Marquardt works only for quadratic functions.
        Defaults to ``mse``.
    {GradientDescent.addons}
    {ConstructableNetwork.connection}
    {BaseNetwork.step}
    {BaseNetwork.show_epoch}
    {BaseNetwork.shuffle_data}
    {BaseNetwork.epoch_end_signal}
    {BaseNetwork.train_end_signal}
    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}
    {SupervisedLearning.train}
    {BaseSkeleton.fit}
    {BaseNetwork.plot_errors}

    Examples
    --------
    Simple example

    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> lmnet = algorithms.LevenbergMarquardt(
    ...     (2, 3, 1),
    ...     verbose=False
    ... )
    >>> lmnet.train(x_train, y_train)

    Diabets dataset example

    >>> import numpy as np
    >>> from sklearn import datasets, preprocessing
    >>> from sklearn.cross_validation import train_test_split
    >>> from neupy import algorithms, layers
    >>> from neupy.estimators import rmsle
    >>>
    >>> dataset = datasets.load_diabetes()
    >>> data, target = dataset.data, dataset.target
    >>>
    >>> data_scaler = preprocessing.MinMaxScaler()
    >>> target_scaler = preprocessing.MinMaxScaler()
    >>>
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     data_scaler.fit_transform(data),
    ...     target_scaler.fit_transform(target),
    ...     train_size=0.85
    ... )
    >>>
    >>> # Network
    ... lmnet = algorithms.LevenbergMarquardt(
    ...     connection=[
    ...         layers.Sigmoid(10),
    ...         layers.Sigmoid(40),
    ...         layers.Output(1),
    ...     ],
    ...     mu_update_factor=2,
    ...     mu=0.1,
    ...     step=0.25,
    ...     show_epoch=10,
    ...     use_bias=False,
    ...     verbose=False
    ... )
    >>> lmnet.train(x_train, y_train, epochs=100)
    >>> y_predict = lmnet.predict(x_test)
    >>>
    >>> error = rmsle(target_scaler.inverse_transform(y_test),
    ...               target_scaler.inverse_transform(y_predict).round())
    >>> error
    0.47548200957888398

    See Also
    --------
    :network:`GradientDescent` : GradientDescent algorithm.
    """

    mu = BoundedProperty(default=0.01, minval=0)
    mu_update_factor = BoundedProperty(default=5, minval=1)
    error = ChoiceProperty(default='mse', choices={'mse': errors.mse})

    def init_variables(self):
        super(LevenbergMarquardt, self).init_variables()
        self.variables.update(
            mu=theano.shared(name='mu', value=asfloat(self.mu)),
            last_error=theano.shared(name='last_error', value=np.nan),
        )

    def init_train_updates(self):
        network_output = self.variables.network_output
        prediction_func = self.variables.train_prediction_func
        last_error = self.variables.last_error
        error_func = self.variables.error_func
        mu = self.variables.mu

        new_mu = ifelse(
            T.lt(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        mse_for_each_sample = T.mean((network_output - prediction_func)**2,
                                     axis=1)

        params = list(iter_parameters(self))
        param_vector = parameters2vector(self)

        J = compute_jaccobian(mse_for_each_sample, params)
        n_params = J.shape[1]

        updated_params = param_vector - T.nlinalg.matrix_inverse(
            J.T.dot(J) + new_mu * T.eye(n_params)).dot(
                J.T).dot(mse_for_each_sample)

        updates = [(mu, new_mu)]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates

    def on_epoch_start_update(self, epoch):
        super(LevenbergMarquardt, self).on_epoch_start_update(epoch)

        last_error = self.errors.last()
        if last_error is not None:
            self.variables.last_error.set_value(last_error)
Пример #11
0
class WeightElimination(WeightUpdateConfigurable):
    """ Weight Elimination algorithm penalizes large weights and limits the
    freedom in network. The algorithm is able to solve one of the possible
    problems of network overfitting.

    Parameters
    ----------
    decay_rate : float
        Controls the effect of penalties on the update network weights.
        Defaults to ``0.1``.
    zero_weight : float
        Second important parameter for weights penalization. Defaults
        to ``1``. Small value can make all weights close to zero. Big value
        will make less significant contribution in weight update. That mean
        with a big value ``zero_weight`` network allow higher values for
        the weights.

    Warns
    -----
    {WeightUpdateConfigurable.Warns}

    Examples
    --------
    >>> from neupy import algorithms
    >>> bpnet = algorithms.GradientDescent(
    ...     (2, 4, 1),
    ...     step=0.1,
    ...     verbose=False,
    ...     addons=[algorithms.WeightElimination]
    ... )

    See Also
    --------
    :network:`WeightDecay` : Weight Decay penalty.

    Notes
    -----
    Before adding that regularization parameter carefully choose
    ``decay_rate`` and ``zero_weight`` parameters for the problem.
    Invalid parameters could significatly reduce weight sizes and norm
    could be near zero.

    .. [1] Weigend, A. S.; Rumelhart, D. E. & Huberman, B. A. (1991), \
        Generalization by Weight-Elimination with Application to Forecasting, \
        in Richard P. Lippmann; John E. Moody & David S. Touretzky, ed., \
        Advances in Neural Information Processing Systems, San Francisco, \
        CA: Morgan Kaufmann, pp. 875--882 .
    """
    decay_rate = BoundedProperty(default=0.1, minval=0)
    zero_weight = BoundedProperty(default=1, minval=0)

    def init_param_updates(self, layer, parameter):
        updates = super(WeightElimination,
                        self).init_param_updates(layer, parameter)

        step = self.variables.step
        decay_koef = self.decay_rate * step
        zero_weight_square = self.zero_weight**2

        updates_mapper = dict(updates)
        updates_mapper[parameter] -= decay_koef * (
            (2 * parameter / zero_weight_square) /
            (1 + (parameter**2) / zero_weight_square)**2)

        return list(updates_mapper.items())
Пример #12
0
 class A(Configurable):
     bounded_property = BoundedProperty(minval=-1, maxval=1)
Пример #13
0
class RPROP(StepSelectionBuiltIn, GradientDescent):
    """ RPROP :network:`GradientDescent` algorithm optimization.

    Parameters
    ----------
    minstep : float
        Minimum possible value for step. Defaults to ``0.1``.
    maxstep : float
        Maximum possible value for step. Defaults to ``50``.
    increase_factor : float
        Increase factor for step in case when gradient doesn't change
        sign compare to previous epoch.
    decrease_factor : float
        Decrease factor for step in case when gradient changes sign
        compare to previous epoch.
    {GradientDescent.addons}
    {ConstructableNetwork.connection}
    {ConstructableNetwork.error}
    {BaseNetwork.show_epoch}
    {BaseNetwork.shuffle_data}
    {BaseNetwork.epoch_end_signal}
    {BaseNetwork.train_end_signal}
    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}
    {SupervisedLearning.train}
    {BaseSkeleton.fit}
    {BaseNetwork.plot_errors}

    Examples
    --------
    Simple example

    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> rpropnet = algorithms.RPROP(
    ...     (2, 3, 1),
    ...     verbose=False
    ... )
    >>> rpropnet.train(x_train, y_train)

    See Also
    --------
    :network:`IRPROPPlus` : iRPROP+ algorithm.
    :network:`GradientDescent` : GradientDescent algorithm.
    """

    # This properties correct upper and lower bounds for steps.
    minstep = BoundedProperty(default=0.1, minval=0)
    maxstep = BoundedProperty(default=50, minval=0)

    # This properties increase/decrease step by deviding it to
    # some coeffitient.
    increase_factor = BoundedProperty(minval=1, default=1.2)
    decrease_factor = ProperFractionProperty(default=0.5)

    def init_layers(self):
        super(RPROP, self).init_layers()
        for layer in self.layers:
            for parameter in layer.parameters:
                parameter_shape = T.shape(parameter).eval()
                parameter.prev_delta = theano.shared(
                    name="prev_delta_" + parameter.name,
                    value=asfloat(np.zeros(parameter_shape)),
                )
                parameter.prev_gradient = theano.shared(
                    name="prev_grad_" + parameter.name,
                    value=asfloat(np.zeros(parameter_shape)),
                )
                parameter.steps = theano.shared(
                    name="steps_{}" + parameter.name,
                    value=asfloat(np.ones(parameter_shape) * self.step),
                )

    def init_prev_delta(self, parameter):
        return parameter.prev_delta

    def init_param_updates(self, layer, parameter):
        gradient = T.grad(self.variables.error_func, wrt=parameter)

        steps = parameter.steps
        prev_delta = self.init_prev_delta(parameter)
        prev_gradient = parameter.prev_gradient

        grad_product = prev_gradient * gradient
        negative_gradients = T.lt(grad_product, 0)

        updated_steps = T.clip(
            T.switch(
                T.gt(grad_product, 0),
                steps * self.increase_factor,
                T.switch(
                    negative_gradients,
                    steps * self.decrease_factor,
                    steps
                )
            ),
            self.minstep,
            self.maxstep,
        )
        gradient_signs = T.switch(T.lt(gradient, 0), -1, 1)
        parameter_delta = T.switch(
            negative_gradients,
            prev_delta,
            gradient_signs * updated_steps
        )
        updated_prev_gradient = T.switch(negative_gradients, 0, gradient)

        return [
            (parameter, parameter - parameter_delta),
            (steps, updated_steps),
            (prev_gradient, updated_prev_gradient),
            (parameter.prev_delta, -parameter_delta),
        ]
Пример #14
0
class LevenbergMarquardt(NoStepSelection, GradientDescent):
    """ Levenberg-Marquardt algorithm.

    Notes
    -----
    * Network minimizes only Mean Squared Error function.

    Parameters
    ----------
    mu : float
        Control invertion for J.T * J matrix, defaults to `0.1`.
    mu_update_factor : float
        Factor to decrease the mu if update decrese the error, otherwise
        increse mu by the same factor. Defaults to ``1.2``
    error: {{'mse'}}
        Levenberg-Marquardt works only for quadratic functions.
        Defaults to ``mse``.
    {GradientDescent.addons}
    {ConstructableNetwork.connection}
    {BaseNetwork.step}
    {BaseNetwork.show_epoch}
    {BaseNetwork.shuffle_data}
    {BaseNetwork.epoch_end_signal}
    {BaseNetwork.train_end_signal}
    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}
    {SupervisedLearning.train}
    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> lmnet = algorithms.LevenbergMarquardt(
    ...     (2, 3, 1),
    ...     verbose=False
    ... )
    >>> lmnet.train(x_train, y_train)

    See Also
    --------
    :network:`GradientDescent` : GradientDescent algorithm.
    """

    mu = BoundedProperty(default=0.01, minval=0)
    mu_update_factor = BoundedProperty(default=1.2, minval=1)
    error = ChoiceProperty(default='mse', choices={'mse': errors.mse})

    def init_variables(self):
        super(LevenbergMarquardt, self).init_variables()
        self.variables.update(
            mu=theano.shared(name='mu', value=asfloat(self.mu)),
            last_error=theano.shared(name='last_error', value=np.nan),
        )

    def init_train_updates(self):
        network_output = self.variables.network_output
        prediction_func = self.variables.train_prediction_func
        last_error = self.variables.last_error
        error_func = self.variables.error_func
        mu = self.variables.mu

        new_mu = ifelse(
            T.lt(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        mse_for_each_sample = T.mean((network_output - prediction_func)**2,
                                     axis=1)

        params = list(iter_parameters(self))
        param_vector = parameters2vector(self)

        J = compute_jaccobian(mse_for_each_sample, params)
        n_params = J.shape[1]

        updated_params = param_vector - T.nlinalg.matrix_inverse(
            J.T.dot(J) + new_mu * T.eye(n_params)).dot(
                J.T).dot(mse_for_each_sample)

        updates = [(mu, new_mu)]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates

    def on_epoch_start_update(self, epoch):
        super(LevenbergMarquardt, self).on_epoch_start_update(epoch)

        last_error = self.errors.last()
        if last_error is not None:
            self.variables.last_error.set_value(last_error)
Пример #15
0
class Quickprop(GradientDescent):
    """ Quickprop :network:`GradientDescent` algorithm optimization.

    Parameters
    ----------
    upper_bound : float
        Maximum possible value for weight update. Defaults to ``1``.
    {GradientDescent.addons}
    {ConstructableNetwork.connection}
    {ConstructableNetwork.error}
    {BaseNetwork.step}
    {BaseNetwork.show_epoch}
    {BaseNetwork.shuffle_data}
    {BaseNetwork.epoch_end_signal}
    {BaseNetwork.train_end_signal}
    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}
    {SupervisedLearning.train}
    {BaseSkeleton.fit}

    Examples
    --------
    Simple example

    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> qpnet = algorithms.Quickprop(
    ...     (2, 3, 1),
    ...     verbose=False
    ... )
    >>> qpnet.train(x_train, y_train)

    See Also
    --------
    :network:`GradientDescent` : GradientDescent algorithm.
    """
    upper_bound = BoundedProperty(default=1, minval=0)

    def init_layers(self):
        super(Quickprop, self).init_layers()
        for layer in self.layers:
            for parameter in layer.parameters:
                parameter_shape = T.shape(parameter).eval()
                parameter.prev_delta = theano.shared(
                    name="prev_delta_" + parameter.name,
                    value=asfloat(np.zeros(parameter_shape)),
                )
                parameter.prev_gradient = theano.shared(
                    name="prev_grad_" + parameter.name,
                    value=asfloat(np.zeros(parameter_shape)),
                )

    def init_param_updates(self, layer, parameter):
        step = self.variables.step
        gradient = T.grad(self.variables.error_func, wrt=parameter)

        prev_delta = parameter.prev_delta
        prev_gradient = parameter.prev_gradient
        grad_delta = T.abs_(prev_gradient - gradient)

        parameter_delta = ifelse(
            T.eq(self.variables.epoch, 1), gradient,
            T.clip(
                T.abs_(prev_delta) * gradient / grad_delta, -self.upper_bound,
                self.upper_bound))
        return [
            (parameter, parameter - step * parameter_delta),
            (prev_gradient, gradient),
            (prev_delta, parameter_delta),
        ]
Пример #16
0
class HebbRule(BaseStepAssociative):
    """ Hebbian Learning Unsupervised Neural Network.
    Network can learn associations from data and emulate similar behaviour
    as dog in Pavlov experiment.

    Notes
    -----
    * Network always generate weights which contains ``0`` weight for \
    conditioned stimulus and ``1`` otherwise. This setup helps you controll \
    your default state for learning features. Other type of weight you can \
    setup as optional parameter ``weight`` in input layer.
    * No bias.

    Parameters
    ----------
    decay_rate : float
        Decay rate is control your network weights. It helps network
        'forgote' information and control weight sizes. Without this
        parameter network weight will grow. Defaults to ``0.2``.
    {BaseAssociative.n_inputs}
    {BaseAssociative.n_outputs}
    {BaseStepAssociative.n_unconditioned}
    {BaseAssociative.weight}
    {BaseStepAssociative.bias}
    {BaseNetwork.step}
    {BaseNetwork.show_epoch}
    {BaseNetwork.shuffle_data}
    {BaseNetwork.epoch_end_signal}
    {BaseNetwork.train_end_signal}
    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}
    {BaseAssociative.train}
    {BaseSkeleton.fit}
    {BaseNetwork.plot_errors}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms, layers
    >>>
    ... pavlov_dog_data = np.array([
    ...     [1, 0],  # food, no bell
    ...     [1, 1],  # food, bell
    ... ])
    >>> dog_test_cases = np.array([
    ...     [0, 0],  # no food, no bell
    ...     [0, 1],  # no food, bell
    ...     [1, 0],  # food, no bell
    ...     [1, 1],  # food, bell
    ... ])
    >>>
    >>> hebbnet = algorithms.HebbRule(
    ...     layers.Step(2) > layers.Output(1),
    ...     n_unconditioned=1,
    ...     step=0.1,
    ...     decay_rate=0.8,
    ...     verbose=False
    ... )
    >>> hebbnet.train(pavlov_dog_data, epochs=2)
    >>> hebbnet.predict(dog_test_cases)
    array([[-1],
           [ 1],
           [ 1],
           [ 1]])
    """

    decay_rate = BoundedProperty(default=0.2, minval=0)

    def weight_delta(self, input_row, layer_output):
        n_unconditioned = self.n_unconditioned
        weight = self.weight[n_unconditioned:, :]
        delta = input_row[:, n_unconditioned:].T.dot(layer_output)
        return -self.decay_rate * weight + self.step * delta
Пример #17
0
class Hessian(BaseOptimizer):
    """
    Hessian gradient decent optimization, also known as Newton's method. This
    algorithm uses second-order derivative (hessian matrix) in order to
    choose correct step during the training iteration. Because of this,
    method doesn't have ``step`` parameter.

    Parameters
    ----------
    penalty_const : float
        Inverse hessian could be singular matrix. For this reason
        algorithm include penalty that add to hessian matrix identity
        multiplied by defined constant. Defaults to ``1``.

    {BaseOptimizer.network}

    {BaseOptimizer.loss}

    {BaseOptimizer.regularizer}

    {BaseOptimizer.show_epoch}

    {BaseOptimizer.shuffle_data}

    {BaseOptimizer.signals}

    {BaseOptimizer.verbose}

    Attributes
    ----------
    {BaseOptimizer.Attributes}

    Methods
    -------
    {BaseOptimizer.Methods}

    Notes
    -----
    - Method requires all training data during propagation, which means
      it cannot be trained with mini-batches.

    - This method calculates full hessian matrix which means it will compute
      matrix with NxN parameters, where N = number of parameters in the
      network.

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>> from neupy.layers import *
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> network = Input(2) >> Sigmoid(3) >> Sigmoid(1)
    >>> optimizer = algorithms.Hessian(network)
    >>> optimizer.train(x_train, y_train)

    See Also
    --------
    :network:`HessianDiagonal` : Hessian diagonal approximation.
    """
    penalty_const = BoundedProperty(default=1, minval=0)
    step = WithdrawProperty()

    def init_train_updates(self):
        penalty_const = asfloat(self.penalty_const)

        n_parameters = self.network.n_parameters
        variables = self.network.variables
        parameters = [var for var in variables.values() if var.trainable]
        param_vector = make_single_vector(parameters)

        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.loss, parameters
        )
        parameter_update = tf.matrix_solve(
            hessian_matrix + penalty_const * tf.eye(n_parameters),
            tf.reshape(full_gradient, [-1, 1])
        )
        updated_parameters = param_vector - flatten(parameter_update)
        updates = setup_parameter_updates(parameters, updated_parameters)

        return updates
Пример #18
0
class RPROP(StepSelectionBuiltIn, BaseGradientDescent):
    """
    Resilient backpropagation (RPROP) is an optimization
    algorithm for supervised learning.

    RPROP algorithm takes into account only direction of the gradient
    and completely ignores its magnitude. Every weight values has a unique
    step size associated with it (by default all of the are equal to ``step``).

    The rule is following, when gradient direction changes (sign of the
    gradient) we decrease step size for specific weight multiplying it by
    ``decrease_factor`` and if sign stays the same than we increase step
    size for this specific weight multiplying it by ``increase_factor``.

    The step size is always bounded by ``minstep`` and ``maxstep``.

    Notes
    -----
    Algorithm doesn't work with mini-batches.

    Parameters
    ----------
    minstep : float
        Minimum possible value for step. Defaults to ``0.001``.

    maxstep : float
        Maximum possible value for step. Defaults to ``10``.

    increase_factor : float
        Increase factor for step in case when gradient doesn't change
        sign compare to previous epoch.

    decrease_factor : float
        Decrease factor for step in case when gradient changes sign
        compare to previous epoch.

    {BaseGradientDescent.Parameters}

    Attributes
    ----------
    {BaseGradientDescent.Attributes}

    Methods
    -------
    {BaseGradientDescent.Methods}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> rpropnet = algorithms.RPROP((2, 3, 1))
    >>> rpropnet.train(x_train, y_train)

    See Also
    --------
    :network:`IRPROPPlus` : iRPROP+ algorithm.
    :network:`GradientDescent` : GradientDescent algorithm.
    """

    # This properties correct upper and lower bounds for steps.
    minstep = BoundedProperty(default=0.001, minval=0)
    maxstep = BoundedProperty(default=10, minval=0)

    # This properties increase/decrease step by deviding it to
    # some coeffitient.
    increase_factor = BoundedProperty(minval=1, default=1.2)
    decrease_factor = ProperFractionProperty(default=0.5)

    def update_prev_delta(self, prev_delta):
        return prev_delta

    def init_train_updates(self):
        updates = []

        for layer, parameter, gradient in self.iter_params_and_grads():
            with tf.variable_scope(parameter.op.name):
                steps = tf.Variable(
                    # Steps will be decreased after the first iteration,
                    # because all previous gradients are equal to zero.
                    # In order to make sure that network will use the same
                    # step per every weight we re-scale step and after the
                    # first iteration it will be multiplied by
                    # ``decrease_factor`` and scaled back to the default
                    # step value.
                    tf.ones_like(parameter) * self.step,
                    name="steps",
                    dtype=tf.float32,
                )
                prev_delta = tf.Variable(
                    tf.zeros(parameter.shape),
                    name="prev-delta",
                    dtype=tf.float32,
                )
                # We collect only signs since it ensures numerical stability
                # after multiplication when we deal with small numbers.
                prev_gradient_sign = tf.Variable(
                    tf.zeros(parameter.shape),
                    name="prev-grad-sign",
                    dtype=tf.float32,
                )

            updated_prev_delta = self.update_prev_delta(prev_delta)
            gradient_sign = tf.sign(gradient)

            grad_sign_product = gradient_sign * prev_gradient_sign
            gradient_changed_sign = tf.equal(grad_sign_product, -1)

            updated_steps = tf.clip_by_value(
                tf.where(
                    tf.equal(grad_sign_product, 1),
                    steps * self.increase_factor,
                    tf.where(
                        gradient_changed_sign,
                        steps * self.decrease_factor,
                        steps,
                    )
                ),
                self.minstep,
                self.maxstep,
            )
            parameter_delta = tf.where(
                gradient_changed_sign,
                # If we subtract previous negative weight update it means
                # that we will revert weight update that has been  applied
                # in the previous iteration.
                -updated_prev_delta,
                updated_steps * gradient_sign,
            )
            # Making sure that during the next iteration sign, after
            # we multiplied by the new gradient, won't be negative.
            # Otherwise, the same roll back using previous delta
            # won't make much sense.
            clipped_gradient_sign = tf.where(
                gradient_changed_sign,
                tf.zeros_like(gradient_sign),
                gradient_sign,
            )

            updates.extend([
                (parameter, parameter - parameter_delta),
                (steps, updated_steps),
                (prev_gradient_sign, clipped_gradient_sign),
                (prev_delta, parameter_delta),
            ])

        return updates
Пример #19
0
class ModifiedRelaxation(BaseLinearNetwork):
    """ Modified Relaxation Neural Network. Simple linear network. If the
    output value of the network received more than the set limit, the
    weight is updated in the same way as the :network:`LMS`, if less
    than the set value - the update will be in proportion to the
    expected result.

    Parameters
    ----------
    dead_zone_radius : float
        Indicates the line between stable outcome network output and
        weak, and depending on the result of doing different updates.
    {BaseLinearNetwork.connection}
    {ConstructableNetwork.error}
    {BaseNetwork.step}
    {BaseNetwork.show_epoch}
    {BaseNetwork.shuffle_data}
    {BaseNetwork.epoch_end_signal}
    {BaseNetwork.train_end_signal}
    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}
    {SupervisedLearning.train}
    {BaseSkeleton.fit}
    {BaseNetwork.plot_errors}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> input_data = np.array([[1, 0], [2, 2], [3, 3], [0, 0]])
    >>> target_data = np.array([[1], [0], [0], [1]])
    >>>
    >>> mrnet = algorithms.ModifiedRelaxation((2, 1), step=1, verbose=False)
    >>> mrnet.train(input_data, target_data, epochs=100)
    >>> mrnet.predict(np.array([[4, 4], [0, 0]]))
    array([[0],
           [1]])

    See Also
    --------
    :network:`LMS` : LMS Neural Network.
    """

    dead_zone_radius = BoundedProperty(default=0.1, minval=0)

    def init_layer_updates(self, layer):
        prediction_func = self.variables.train_prediction_func
        network_output = self.variables.network_output
        network_input = self.variables.network_input
        step = self.variables.step

        normalized_input = network_input / network_input.norm(L=2)
        summated_output = network_input.dot(layer.weight) + layer.bias
        linear_error = prediction_func - network_output
        update = T.where(
            T.abs_(summated_output) >= self.dead_zone_radius, linear_error,
            network_output)

        weight_delta = normalized_input.T.dot(update)
        bias_delta = linear_error.sum(axis=0)

        return [
            (layer.weight, layer.weight - step * weight_delta),
            (layer.bias, layer.bias - step * bias_delta),
        ]
Пример #20
0
class PNN(LazyLearning, BaseNetwork):
    """ Probabilistic Neural Network for classification.

    Parameters
    ----------
    std : float
        standard deviation for PDF function, default to 0.1.
    {Verbose.verbose}

    Methods
    -------
    {LazyLearning.train}
    {BaseSkeleton.predict}
    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>>
    >>> from sklearn import datasets
    >>> from sklearn import metrics
    >>> from sklearn.cross_validation import train_test_split
    >>> from neupy import algorithms, environment
    >>>
    >>> environment.reproducible()
    >>>
    >>> dataset = datasets.load_digits()
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     dataset.data, dataset.target, train_size=0.7
    ... )
    >>>
    >>> nw = algorithms.PNN(std=10, verbose=False)
    >>> nw.train(x_train, y_train)
    >>> result = nw.predict(x_test)
    >>> metrics.accuracy_score(y_test, result)
    0.98888888888888893
    """
    std = BoundedProperty(default=0.1, minval=0)

    def __init__(self, **options):
        super(PNN, self).__init__(**options)
        self.classes = None

    def train(self, input_train, target_train, copy=True):
        input_train = format_data(input_train, copy=copy)
        target_train = format_data(target_train, copy=copy)

        LazyLearning.train(self, input_train, target_train)

        if target_train.shape[1] != 1:
            raise ValueError("Target value must be in 1 dimention")

        classes = self.classes = unique(target_train)
        number_of_classes = classes.size
        row_comb_matrix = self.row_comb_matrix = zeros(
            (number_of_classes, input_train.shape[0]))
        class_ratios = self.class_ratios = zeros(number_of_classes)

        for i, class_name in enumerate(classes):
            class_val_positions = (target_train == i)
            row_comb_matrix[i, class_val_positions.ravel()] = 1
            class_ratios[i] = np_sum(class_val_positions)

    def predict_proba(self, input_data):
        raw_output = self.predict_raw(input_data)

        total_output_sum = raw_output.sum(axis=0).reshape(
            (raw_output.shape[1], 1))
        return raw_output.T / total_output_sum

    def predict_raw(self, input_data):
        input_data = format_data(input_data)
        super(PNN, self).predict(input_data)

        if self.classes is None:
            raise ValueError("Train network before predict data")

        input_data_size = input_data.shape[1]
        train_data_size = self.input_train.shape[1]

        if input_data_size != train_data_size:
            raise ValueError("Input data must contains {0} features, got "
                             "{1}".format(train_data_size, input_data_size))

        class_ratios = self.class_ratios
        pdf_outputs = pdf_between_data(self.input_train, input_data, self.std)
        return dot(self.row_comb_matrix, pdf_outputs) / class_ratios.reshape(
            (class_ratios.size, 1))

    def predict(self, input_data):
        raw_output = self.predict_raw(input_data)
        return self.classes[raw_output.argmax(axis=0)]
Пример #21
0
class GRNN(LazyLearningMixin, BaseNetwork):
    """
    Generalized Regression Neural Network (GRNN). Network applies
    only to the regression problems.

    Parameters
    ----------
    std : float
        Standard deviation for PDF function, defaults to ``0.1``.
        If your input features have high values than standard
        deviation should also be high. For instance, if input features
        from range ``[0, 20]`` that standard deviation should be
        also a big value like ``10`` or ``15``. Small values will
        lead to bad prediction.

    {Verbose.verbose}

    Notes
    -----
    - GRNN Network is sensitive for cases when one input feature has
      higher values than the other one. Before use it make sure that
      input values are normalized and have similar scales.

    - Make sure that standard deviation in the same range as
      input features. Check ``std`` parameter description for
      more information.

    - The bigger training dataset the slower prediction.
      It's much more efficient for small datasets.

    {LazyLearningMixin.Notes}

    Methods
    -------
    {LazyLearningMixin.train}

    {BaseSkeleton.predict}

    {BaseSkeleton.fit}

    Examples
    --------
    >>> from sklearn import datasets, preprocessing
    >>> from sklearn.model_selection import train_test_split
    >>> from neupy import algorithms, estimators, environment
    >>>
    >>> environment.reproducible()
    >>>
    >>> dataset = datasets.load_diabetes()
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     preprocessing.minmax_scale(dataset.data),
    ...     preprocessing.minmax_scale(dataset.target.reshape((-1, 1))),
    ...     test_size=0.3,
    ... )
    >>>
    >>> nw = algorithms.GRNN(std=0.1, verbose=False)
    >>> nw.train(x_train, y_train)
    >>>
    >>> y_predicted = nw.predict(x_test)
    >>> estimators.rmse(y_predicted, y_test)
    0.2381013391408185
    """
    std = BoundedProperty(default=0.1, minval=0)

    def train(self, input_train, target_train, copy=True):
        """
        Trains network. PNN doesn't actually train, it just stores
        input data and use it for prediction.

        Parameters
        ----------
        input_train : array-like (n_samples, n_features)

        target_train : array-like (n_samples,)
            Target variable should be vector or matrix
            with one feature column.

        copy : bool
            If value equal to ``True`` than input matrices will
            be copied. Defaults to ``True``.

        Raises
        ------
        ValueError
            In case if something is wrong with input data.
        """
        input_train = format_data(input_train, copy=copy)
        target_train = format_data(target_train, copy=copy)

        n_target_features = target_train.shape[1]
        if n_target_features != 1:
            raise ValueError("Target value must be one dimensional array")

        LazyLearningMixin.train(self, input_train, target_train)

    def predict(self, input_data):
        """
        Make a prediction from the input data.

        Parameters
        ----------
        input_data : array-like (n_samples, n_features)

        Raises
        ------
        ValueError
            In case if something is wrong with input data.

        Returns
        -------
        array-like (n_samples,)
        """
        if self.input_train is None:
            raise NotTrained("Cannot make a prediction. Network "
                             "hasn't been trained yet")

        input_data = format_data(input_data)

        input_data_size = input_data.shape[1]
        train_data_size = self.input_train.shape[1]

        if input_data_size != train_data_size:
            raise ValueError("Input data must contain {0} features, got "
                             "{1}".format(train_data_size, input_data_size))

        ratios = pdf_between_data(self.input_train, input_data, self.std)
        return (dot(self.target_train.T, ratios) / ratios.sum(axis=0)).T
Пример #22
0
class Hessian(StepSelectionBuiltIn, GradientDescent):
    """
    Hessian gradient decent optimization. This GD algorithm
    variation using second derivative information helps choose better
    gradient direction and as a consequence better weight update
    parameter after each epoch.

    Parameters
    ----------
    penalty_const : float
        Inverse hessian could be singular matrix. For this reason
        algorithm include penalty that add to hessian matrix identity
        multiplied by defined constant. Defaults to ``1``.

    {GradientDescent.connection}

    {GradientDescent.error}

    {GradientDescent.show_epoch}

    {GradientDescent.shuffle_data}

    {GradientDescent.epoch_end_signal}

    {GradientDescent.train_end_signal}

    {GradientDescent.verbose}

    {GradientDescent.addons}

    Attributes
    ----------
    {GradientDescent.Attributes}

    Methods
    -------
    {GradientDescent.Methods}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> mnet = algorithms.Hessian((2, 3, 1))
    >>> mnet.train(x_train, y_train)

    See Also
    --------
    :network:`HessianDiagonal` : Hessian diagonal approximation.
    """
    penalty_const = BoundedProperty(default=1, minval=0)

    step = WithdrawProperty()

    def init_train_updates(self):
        n_parameters = count_parameters(self.connection)
        parameters = parameter_values(self.connection)
        param_vector = T.concatenate([param.flatten() for param in parameters])
        penalty_const = asfloat(self.penalty_const)
        print n_parameters
        self.variables.hessian = theano.shared(value=asfloat(
            np.zeros((n_parameters, n_parameters))),
                                               name='hessian_inverse')
        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters)
        updated_parameters = hessian_matrix
        updates = setup_parameter_updates([self.variables.hessian],
                                          updated_parameters)

        return updates
Пример #23
0
class LevenbergMarquardt(StepSelectionBuiltIn, BaseGradientDescent):
    """
    Levenberg-Marquardt algorithm is a variation of the Newton's method.
    It minimizes MSE error. The algorithm approximates Hessian matrix using
    dot product between two jacobian matrices.

    Notes
    -----
    - Method requires all training data during propagation, which means
      it's not allowed to use mini-batches.

    - Network minimizes only Mean Squared Error (MSE) loss function.

    - Efficient for small training datasets, because it
      computes gradient per each sample separately.

    - Efficient for small-sized networks.

    Parameters
    ----------
    {BaseGradientDescent.connection}

    mu : float
        Control invertion for J.T * J matrix, defaults to ``0.1``.

    mu_update_factor : float
        Factor to decrease the mu if update decrese the error, otherwise
        increse mu by the same factor. Defaults to ``1.2``

    error : {{``mse``}}
        Levenberg-Marquardt works only for quadratic functions.
        Defaults to ``mse``.

    {BaseGradientDescent.show_epoch}

    {BaseGradientDescent.shuffle_data}

    {BaseGradientDescent.epoch_end_signal}

    {BaseGradientDescent.train_end_signal}

    {BaseGradientDescent.verbose}

    {BaseGradientDescent.addons}

    Attributes
    ----------
    {BaseGradientDescent.Attributes}

    Methods
    -------
    {BaseGradientDescent.Methods}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> lmnet = algorithms.LevenbergMarquardt((2, 3, 1))
    >>> lmnet.train(x_train, y_train)

    See Also
    --------
    :network:`BaseGradientDescent` : BaseGradientDescent algorithm.
    """
    mu = BoundedProperty(default=0.01, minval=0)
    mu_update_factor = BoundedProperty(default=1.2, minval=1)
    error = ChoiceProperty(default='mse', choices={'mse': errors.mse})

    step = WithdrawProperty()

    def init_variables(self):
        super(LevenbergMarquardt, self).init_variables()
        self.variables.update(
            mu=tf.Variable(self.mu, name='lev-marq/mu'),
            last_error=tf.Variable(np.nan, name='lev-marq/last-error'),
        )

    def init_train_updates(self):
        network_output = self.variables.network_output
        prediction_func = self.variables.train_prediction_func
        last_error = self.variables.last_error
        error_func = self.variables.error_func
        mu = self.variables.mu

        new_mu = tf.where(
            tf.less(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        err_for_each_sample = flatten((network_output - prediction_func) ** 2)

        params = parameter_values(self.connection)
        param_vector = make_single_vector(params)

        J = compute_jacobian(err_for_each_sample, params)
        J_T = tf.transpose(J)
        n_params = J.shape[1]

        parameter_update = tf.matrix_solve(
            tf.matmul(J_T, J) + new_mu * tf.eye(n_params.value),
            tf.matmul(J_T, tf.expand_dims(err_for_each_sample, 1))
        )
        updated_params = param_vector - flatten(parameter_update)

        updates = [(mu, new_mu)]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates

    def on_epoch_start_update(self, epoch):
        super(LevenbergMarquardt, self).on_epoch_start_update(epoch)

        last_error = self.errors.last()
        if last_error is not None:
            self.variables.last_error.load(last_error, tensorflow_session())
Пример #24
0
class PNN(BaseSkeleton):
    """
    Probabilistic Neural Network (PNN). Network applies only to
    the classification problems.

    Notes
    -----
    - PNN Network is sensitive for cases when one input feature
      has higher values than the other one. Input data has to be
      normalized before training.

    - Standard deviation has to match the range of the input features
      Check ``std`` parameter description for more information.

    - The bigger training dataset the slower prediction.
      Algorithm is much more efficient for small datasets.

    - Network uses lazy learning which mean that network doesn't
      need iterative training. It just stores parameters
      and use them to make a predictions.

    Parameters
    ----------
    std : float
        Standard deviation for the Probability Density Function (PDF).
        If your input features have high values than standard deviation
        should also be high. For instance, if input features from range
        ``[0, 20]`` that standard deviation should be also a big value
        like ``10`` or ``15``. Small values will lead to bad prediction.

    batch_size : int or None
        Set up min-batch size. The ``None`` value will ensure that all data
        samples will be propagated through the network at once.
        Defaults to ``128``.

    {Verbose.verbose}

    Methods
    -------
    train(X_train, y_train, copy=True)
        Network just stores all the information about the data and use
        it for the prediction. Parameter ``copy`` copies input data
        before saving it inside the network.

        The ``y_train`` argument should be a vector or
        matrix with one feature column.

    predict(X)
        Return classes associated with each sample in the ``X``.

    predict_proba(X)
        Predict probabilities for each class.

    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>>
    >>> from sklearn import datasets, metrics
    >>> from sklearn.model_selection import train_test_split
    >>> from neupy import algorithms
    >>>
    >>> dataset = datasets.load_digits()
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     dataset.data, dataset.target, test_size=0.3
    ... )
    >>>
    >>> pnn = algorithms.PNN(std=10, verbose=False)
    >>> pnn.train(x_train, y_train)
    >>>
    >>> y_predicted = pnn.predict(x_test)
    >>> metrics.accuracy_score(y_test, y_predicted)
    0.98888888888888893
    """
    std = BoundedProperty(minval=0)
    batch_size = IntProperty(default=128, minval=0, allow_none=True)

    def __init__(self, std, batch_size=128, verbose=False):
        self.std = std
        self.batch_size = batch_size

        self.classes = None
        self.X_train = None
        self.y_train = None

        super(PNN, self).__init__(batch_size=batch_size, verbose=verbose)

    def train(self, X_train, y_train, copy=True):
        """
        Trains network. PNN doesn't actually train, it just stores
        input data and use it for prediction.

        Parameters
        ----------
        X_train : array-like (n_samples, n_features)

        y_train : array-like (n_samples,)
            Target variable should be vector or matrix
            with one feature column.

        copy : bool
            If value equal to ``True`` than input matrices will
            be copied. Defaults to ``True``.

        Raises
        ------
        ValueError
            In case if something is wrong with input data.
        """
        X_train = format_data(X_train, copy=copy)
        y_train = format_data(y_train, copy=copy, make_float=False)

        self.X_train = X_train
        self.y_train = y_train

        if X_train.shape[0] != y_train.shape[0]:
            raise ValueError(
                "Number of samples in the input and "
                "target datasets are different")

        if y_train.shape[1] != 1:
            raise ValueError(
                "Target value should be vector or "
                "matrix with only one column")

        classes = self.classes = np.unique(y_train)
        n_classes = classes.size
        n_samples = X_train.shape[0]

        class_ratios = self.class_ratios = np.zeros(n_classes)
        row_comb_matrix = self.row_comb_matrix = np.zeros(
            (n_classes, n_samples))

        for i, class_name in enumerate(classes):
            class_name = classes[i]
            class_val_positions = (y_train == class_name)
            row_comb_matrix[i, class_val_positions.ravel()] = 1
            class_ratios[i] = np.sum(class_val_positions)

    def predict_proba(self, X):
        """
        Predict probabilities for each class.

        Parameters
        ----------
        X : array-like (n_samples, n_features)

        Returns
        -------
        array-like (n_samples, n_classes)
        """
        outputs = iters.apply_batches(
            function=self.predict_raw,
            inputs=format_data(X),
            batch_size=self.batch_size,
            show_progressbar=self.logs.enable,
        )
        raw_output = np.concatenate(outputs, axis=1)

        total_output_sum = raw_output.sum(axis=0).reshape((-1, 1))
        return raw_output.T / total_output_sum

    def predict_raw(self, X):
        """
        Raw prediction.

        Parameters
        ----------
        X : array-like (n_samples, n_features)

        Raises
        ------
        NotTrained
            If network hasn't been trained.

        ValueError
            In case if something is wrong with input data.

        Returns
        -------
        array-like (n_samples, n_classes)
        """
        if self.classes is None:
            raise NotTrained(
                "Cannot make a prediction. Network hasn't been trained yet")

        if X.shape[1] != self.X_train.shape[1]:
            raise ValueError(
                "Input data must contain {0} features, got {1}"
                "".format(self.X_train.shape[1],  X.shape[1]))

        class_ratios = self.class_ratios.reshape((-1, 1))
        pdf_outputs = pdf_between_data(self.X_train, X, self.std)

        return np.dot(self.row_comb_matrix, pdf_outputs) / class_ratios

    def predict(self, X):
        """
        Predicts class from the input data.

        Parameters
        ----------
        X : array-like (n_samples, n_features)

        Returns
        -------
        array-like (n_samples,)
        """
        outputs = iters.apply_batches(
            function=self.predict_raw,
            inputs=format_data(X),
            batch_size=self.batch_size,
            show_progressbar=self.logs.enable,
        )

        raw_output = np.concatenate(outputs, axis=1)
        return self.classes[raw_output.argmax(axis=0)]
Пример #25
0
class WeightElimination(WeightUpdateConfigurable):
    """
    Weight Elimination algorithm penalizes large weights
    and limits the freedom in network. The algorithm is
    able to solve one of the possible problems of network
    overfitting.

    Parameters
    ----------
    decay_rate : float
        Controls the effect of penalties on the update
        network weights. Defaults to ``0.1``.

    zero_weight : float
        Second important parameter for weights penalization.
        Defaults to ``1``. Small value can make all weights
        close to zero. Big value will make less significant
        contribution in weights update. Which mean that with
        a bigger value of the ``zero_weight`` parameter network
        allows higher values for the weights.

    Warns
    -----
    {WeightUpdateConfigurable.Warns}

    Examples
    --------
    >>> from neupy import algorithms
    >>> bpnet = algorithms.GradientDescent(
    ...     (2, 4, 1),
    ...     step=0.1,
    ...     decay_rate=0.1,
    ...     addons=[algorithms.WeightElimination]
    ... )

    See Also
    --------
    :network:`WeightDecay` : Weight Decay penalty.

    Notes
    -----
    Before adding that regularization parameter carefully
    choose ``decay_rate`` and ``zero_weight`` parameters
    for the problem. Invalid parameters can make weight
    very close to the origin (all values become
    close to zero).

    References
    ----------
    [1] Weigend, A. S.; Rumelhart, D. E. & Huberman, B. A. (1991),
        Generalization by Weight-Elimination with Application to
        Forecasting, in Richard P. Lippmann; John E. Moody & David S.
        Touretzky, ed., Advances in Neural Information Processing
        Systems, San Francisco, CA: Morgan Kaufmann, pp. 875--882 .
    """
    decay_rate = BoundedProperty(default=0.1, minval=0)
    zero_weight = BoundedProperty(default=1, minval=0)

    def init_train_updates(self):
        original_updates = super(WeightElimination, self).init_train_updates()
        parameters = [param for _, _, param in iter_parameters(self.layers)]
        modified_updates = []

        step = self.variables.step
        decay_koef = asfloat(self.decay_rate * step)
        zero_weight_square = asfloat(self.zero_weight**2)

        for parameter, updated in original_updates:
            if parameter in parameters:
                updated -= decay_koef * (
                    (2 * parameter / zero_weight_square) /
                    tf.square(1 + tf.square(parameter) / zero_weight_square))
            modified_updates.append((parameter, updated))

        return modified_updates
Пример #26
0
class HebbRule(BaseStepAssociative):
    """
    Neural Network with Hebbian Learning. It's an unsupervised algorithm.
    Network can learn associations from the data.

    Notes
    -----
    - Network always generates weights that contains ``0``
      weight for the conditioned stimulus and ``1`` for the other.
      Such initialization helps to control your default state
      for the feature learning.

    Parameters
    ----------
    decay_rate : float
        Decay rate controls network's weights. It helps network to
        'forget' information and control weight's size. Without this
        parameter network's weights will increase fast.
        Defaults to ``0.2``.

    {BaseStepAssociative.Parameters}

    Methods
    -------
    {BaseStepAssociative.Methods}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> pavlov_dog_data = np.array([
    ...     [1, 0],  # food, no bell
    ...     [1, 1],  # food, bell
    ... ])
    >>> dog_test_cases = np.array([
    ...     [0, 0],  # no food, no bell
    ...     [0, 1],  # no food, bell
    ...     [1, 0],  # food, no bell
    ...     [1, 1],  # food, bell
    ... ])
    >>>
    >>> hebbnet = algorithms.HebbRule(
    ...     n_inputs=2,
    ...     n_outputs=1,
    ...     n_unconditioned=1,
    ...     step=0.1,
    ...     decay_rate=0.8,
    ...     verbose=False
    ... )
    >>> hebbnet.train(pavlov_dog_data, epochs=2)
    >>> hebbnet.predict(dog_test_cases)
    array([[0],
           [1],
           [1],
           [1]])
    """
    decay_rate = BoundedProperty(default=0.2, minval=0)

    def weight_delta(self, input_row, layer_output):
        n_unconditioned = self.n_unconditioned
        weight = self.weight[n_unconditioned:, :]
        delta = input_row[:, n_unconditioned:].T.dot(layer_output)
        return -self.decay_rate * weight + self.step * delta
Пример #27
0
class ErrDiffStepUpdate(SingleStepConfigurable):
    """
    This algorithm make step update base on error difference between
    epochs.

    Parameters
    ----------
    update_for_smaller_error : float
        Multiplies this option to ``step`` in if the error
        was less than in previous epochs. Defaults to ``1.05``.
        Value can't be less than ``1``.

    update_for_bigger_error : float
        Multiplies this option to ``step`` in if the error
        was more than in previous epochs. Defaults to ``0.7``.

    error_difference : float
        The value indicates how many had to increase the
        error from the previous epochs that would produce
        reduction step. Defaults to ``1.04``.
        Value can't be less than ``1``.

    Warns
    -----
    {SingleStepConfigurable.Warns}

    Examples
    --------
    >>> from neupy import algorithms
    >>>
    >>> bpnet = algorithms.GradientDescent(
    ...     (2, 4, 1),
    ...     step=0.1,
    ...     verbose=False,
    ...     addons=[algorithms.ErrDiffStepUpdate]
    ... )
    """
    update_for_smaller_error = BoundedProperty(default=1.05, minval=1)
    update_for_bigger_error = ProperFractionProperty(default=0.7)
    error_difference = BoundedProperty(default=1.04, minval=1)

    def init_variables(self):
        self.variables.update(
            last_error=tf.Variable(
                np.nan,
                name='err-diff-step-update/last-error',
            ),
            previous_error=tf.Variable(
                np.nan,
                name='err-diff-step-update/previous-error',
            ),
        )
        super(ErrDiffStepUpdate, self).init_variables()

    def init_train_updates(self):
        updates = super(ErrDiffStepUpdate, self).init_train_updates()

        step = self.variables.step
        last_error = self.variables.last_error
        previous_error = self.variables.previous_error

        step_update_condition = tf.where(
            last_error < previous_error,
            self.update_for_smaller_error * step,
            tf.where(
                last_error > self.update_for_bigger_error * previous_error,
                self.update_for_bigger_error * step,
                step
            )
        )
        updates.append((step, step_update_condition))
        return updates

    def on_epoch_start_update(self, epoch):
        super(ErrDiffStepUpdate, self).on_epoch_start_update(epoch)

        previous_error = self.errors.previous()
        if previous_error:
            session = tensorflow_session()
            last_error = self.errors.last()

            self.variables.last_error.load(last_error, session)
            self.variables.previous_error.load(previous_error, session)
Пример #28
0
class LevenbergMarquardt(BaseOptimizer):
    """
    Levenberg-Marquardt algorithm is a variation of the Newton's method.
    It minimizes MSE error. The algorithm approximates Hessian matrix using
    dot product between two jacobian matrices.

    Notes
    -----
    - Method requires all training data during propagation, which means
      it's not allowed to use mini-batches.

    - Network minimizes only Mean Squared Error (MSE) loss function.

    - Efficient for small training datasets, because it
      computes gradient per each sample separately.

    - Efficient for small-sized networks.

    Parameters
    ----------
    {BaseOptimizer.network}

    mu : float
        Control invertion for J.T * J matrix, defaults to ``0.1``.

    mu_update_factor : float
        Factor to decrease the mu if update decrese the error, otherwise
        increse mu by the same factor. Defaults to ``1.2``

    error : {{``mse``}}
        Levenberg-Marquardt works only for quadratic functions.
        Defaults to ``mse``.

    {BaseOptimizer.show_epoch}

    {BaseOptimizer.shuffle_data}

    {BaseOptimizer.signals}

    {BaseOptimizer.verbose}

    Attributes
    ----------
    {BaseOptimizer.Attributes}

    Methods
    -------
    {BaseOptimizer.Methods}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>> from neupy.layers import *
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> network = Input(2) >> Sigmoid(3) >> Sigmoid(1)
    >>> optimizer = algorithms.LevenbergMarquardt(network)
    >>> optimizer.train(x_train, y_train)

    See Also
    --------
    :network:`BaseOptimizer` : BaseOptimizer algorithm.
    """
    mu = BoundedProperty(default=0.01, minval=0)
    mu_update_factor = BoundedProperty(default=1.2, minval=1)
    loss = ChoiceProperty(default='mse', choices={'mse': objectives.mse})

    step = WithdrawProperty()
    regularizer = WithdrawProperty()

    def init_functions(self):
        self.variables.update(
            mu=tf.Variable(self.mu, name='lev-marq/mu'),
            last_error=tf.Variable(np.nan, name='lev-marq/last-error'),
        )
        super(LevenbergMarquardt, self).init_functions()

    def init_train_updates(self):
        training_outputs = self.network.training_outputs
        last_error = self.variables.last_error
        error_func = self.variables.loss
        mu = self.variables.mu

        new_mu = tf.where(
            tf.less(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        err_for_each_sample = flatten((self.target - training_outputs)**2)

        variables = self.network.variables
        params = [var for var in variables.values() if var.trainable]
        param_vector = make_single_vector(params)

        J = compute_jacobian(err_for_each_sample, params)
        J_T = tf.transpose(J)
        n_params = J.shape[1]

        parameter_update = tf.matrix_solve(
            tf.matmul(J_T, J) + new_mu * tf.eye(n_params.value),
            tf.matmul(J_T, tf.expand_dims(err_for_each_sample, 1)))
        updated_params = param_vector - flatten(parameter_update)

        updates = [(mu, new_mu)]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates

    def one_training_update(self, X_train, y_train):
        if self.errors.train:
            last_error = self.errors.train[-1]
            self.variables.last_error.load(last_error, tensorflow_session())

        return super(LevenbergMarquardt,
                     self).one_training_update(X_train, y_train)
Пример #29
0
class LeakStepAdaptation(SingleStepConfigurable):
    """ Leak Learning Rate Adaptation algorithm for step adaptation procedure
    in backpropagation algortihm. By default every layer has the same value
    as ``step`` parameter in network, but after first training epoch they
    must be different.

    Parameters
    ----------
    leak_size : float
        Defaults to ``0.01``. This variable identified proportion, so it's
        always between 0 and 1. Usualy this value is small.
    alpha : float
        The ``alpha`` is control total step update ratio (It's similar to
        step role in weight update procedure). Defaults to ``0.001``.
        Typical this value is small.
    beta : float
        This similar to ``alpha``, but it control ration only for update
        matrix norms. Defaults to ``20``.
        Typical this value is > 1.
    beta : float

    Warns
    -----
    {SingleStepConfigurable.Warns}

    Examples
    --------
    >>> from neupy import algorithms
    >>>
    >>> bpnet = algorithms.GradientDescent(
    ...     (2, 4, 1),
    ...     addons=[algorithms.LeakStepAdaptation]
    ... )
    >>>

    .. [1] Noboru M. "Adaptive on-line learning in changing
        environments", 1997
    .. [2] LeCun, "Efficient BackProp", 1998
    """
    leak_size = ProperFractionProperty(default=0.01)
    alpha = BoundedProperty(default=0.001, minval=0)
    beta = BoundedProperty(default=20, minval=0)

    def init_variables(self):
        super(LeakStepAdaptation, self).init_variables()
        n_parameters = count_parameters(self)
        self.variables.leak_average = theano.shared(value=asfloat(
            np.zeros(n_parameters)),
                                                    name='leak_average')

    def init_train_updates(self):
        updates = super(LeakStepAdaptation, self).init_train_updates()

        alpha = self.alpha
        beta = self.beta
        leak_size = self.leak_size

        step = self.variables.step
        leak_average = self.variables.leak_average

        parameters = list(iter_parameters(self))
        gradients = T.grad(self.variables.error_func, wrt=parameters)
        full_gradient = T.concatenate([grad.flatten() for grad in gradients])

        leak_avarage_update = ((1 - leak_size) * leak_average +
                               leak_size * full_gradient)
        new_step = step + alpha * step * (
            beta * leak_avarage_update.norm(L=2) - step)

        updates.extend([
            (leak_average, leak_avarage_update),
            (step, new_step),
        ])

        return updates
Пример #30
0
class PNN(BaseNetwork, LazyLearningMixin, MinibatchTrainingMixin):
    """
    Probabilistic Neural Network (PNN). Network applies only to
    the classification problems.

    Notes
    -----
    - PNN Network is sensitive for cases when one input feature
      has higher values than the other one. Before use it make
      sure that input values are normalized and have similar scales.

    - Make sure that standard deviation in the same range as
      input features. Check ``std`` parameter description for
      more information.

    - The bigger training dataset the slower prediction.
      It's much more efficient for small datasets.

    {LazyLearningMixin.Notes}

    Parameters
    ----------
    std : float
        Standard deviation for the Probability Density Function (PDF).
        Defaults to ``0.1``. If your input features have high values
        than standard deviation should also be high. For instance,
        if input features from range ``[0, 20]`` that standard
        deviation should be also a big value like ``10`` or ``15``.
        Small values will lead to bad prediction.

    {MinibatchTrainingMixin.batch_size}

    {BaseNetwork.verbose}

    Methods
    -------
    {LazyLearningMixin.train}

        The ``target_train`` argument should be a vector or
        matrix with one feature column.

    {BaseSkeleton.predict}

    predict_proba(input_data)
        Predict probabilities for each class.

    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>>
    >>> from sklearn import datasets, metrics
    >>> from sklearn.model_selection import train_test_split
    >>> from neupy import algorithms, environment
    >>>
    >>> environment.reproducible()
    >>>
    >>> dataset = datasets.load_digits()
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     dataset.data, dataset.target, train_size=0.7
    ... )
    >>>
    >>> pnn = algorithms.PNN(std=10, verbose=False)
    >>> pnn.train(x_train, y_train)
    >>>
    >>> y_predicted = pnn.predict(x_test)
    >>> metrics.accuracy_score(y_test, y_predicted)
    0.98888888888888893
    """
    std = BoundedProperty(default=0.1, minval=0)

    def __init__(self, **options):
        super(PNN, self).__init__(**options)
        self.classes = None

    def train(self, input_train, target_train, copy=True):
        """
        Trains network. PNN doesn't actually train, it just stores
        input data and use it for prediction.

        Parameters
        ----------
        input_train : array-like (n_samples, n_features)

        target_train : array-like (n_samples,)
            Target variable should be vector or matrix
            with one feature column.

        copy : bool
            If value equal to ``True`` than input matrices will
            be copied. Defaults to ``True``.

        Raises
        ------
        ValueError
            In case if something is wrong with input data.
        """
        input_train = format_data(input_train, copy=copy)
        target_train = format_data(target_train, copy=copy, make_float=False)

        LazyLearningMixin.train(self, input_train, target_train)

        n_target_features = target_train.shape[1]
        if n_target_features != 1:
            raise ValueError("Target value should be a vector or a "
                             "matrix with one column")

        classes = self.classes = np.unique(target_train)
        n_classes = classes.size
        n_samples = input_train.shape[0]

        class_ratios = self.class_ratios = np.zeros(n_classes)
        row_comb_matrix = self.row_comb_matrix = np.zeros(
            (n_classes, n_samples))

        for i, class_name in enumerate(classes):
            class_name = classes[i]
            class_val_positions = (target_train == class_name)
            row_comb_matrix[i, class_val_positions.ravel()] = 1
            class_ratios[i] = np.sum(class_val_positions)

    def predict_proba(self, input_data):
        """
        Predict probabilities for each class.

        Parameters
        ----------
        input_data : array-like (n_samples, n_features)

        Returns
        -------
        array-like (n_samples, n_classes)
        """
        outputs = self.apply_batches(
            function=self.predict_raw,
            input_data=format_data(input_data),
            description='Prediction batches',
            show_progressbar=True,
            show_error_output=False,
        )
        raw_output = np.concatenate(outputs, axis=1)

        total_output_sum = raw_output.sum(axis=0).reshape((-1, 1))
        return raw_output.T / total_output_sum

    def predict_raw(self, input_data):
        """
        Raw prediction.

        Parameters
        ----------
        input_data : array-like (n_samples, n_features)

        Raises
        ------
        NotTrained
            If network hasn't been trained.

        ValueError
            In case if something is wrong with input data.

        Returns
        -------
        array-like (n_samples, n_classes)
        """
        if self.classes is None:
            raise NotTrained("Cannot make a prediction. Network "
                             "hasn't been trained yet")

        input_data_size = input_data.shape[1]
        train_data_size = self.input_train.shape[1]

        if input_data_size != train_data_size:
            raise ValueError("Input data must contain {0} features, got "
                             "{1}".format(train_data_size, input_data_size))

        class_ratios = self.class_ratios.reshape((-1, 1))
        pdf_outputs = pdf_between_data(self.input_train, input_data, self.std)

        return np.dot(self.row_comb_matrix, pdf_outputs) / class_ratios

    def predict(self, input_data):
        """
        Predicts class from the input data.

        Parameters
        ----------
        input_data : array-like (n_samples, n_features)

        Returns
        -------
        array-like (n_samples,)
        """
        outputs = self.apply_batches(
            function=self.predict_raw,
            input_data=format_data(input_data),
            description='Prediction batches',
            show_progressbar=True,
            show_error_output=False,
        )
        raw_output = np.concatenate(outputs, axis=1)
        return self.classes[raw_output.argmax(axis=0)]