示例#1
0
def test_adadelta():
    """
    Make sure that learning_rule.AdaDelta obtains the same parameter values as
    with a hand-crafted AdaDelta implementation, given a dummy model and
    learning rate scaler for each parameter.

    Reference:
    "AdaDelta: An Adaptive Learning Rate Method", Matthew D. Zeiler.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    decay = 0.95

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=AdaDelta(decay),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['g2'] = np.zeros(param_shape)
        state[param]['dx2'] = np.zeros(param_shape)

    def adadelta_manual(model, state):
        inc = []
        rval = []
        for scale, param in izip(scales, model.get_params()):
            pstate = state[param]
            param_val = param.get_value()
            # begin adadelta
            pstate['g2'] = decay * pstate['g2'] + (1 - decay) * param_val**2
            rms_g_t = np.sqrt(pstate['g2'] + scale * learning_rate)
            rms_dx_tm1 = np.sqrt(pstate['dx2'] + scale * learning_rate)
            dx_t = -rms_dx_tm1 / rms_g_t * param_val
            pstate['dx2'] = decay * pstate['dx2'] + (1 - decay) * dx_t**2
            rval += [param_val + dx_t]
        return rval

    manual = adadelta_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))

    manual = adadelta_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))
示例#2
0
    def __init__(self,
                 layers,
                 random_state=None,
                 learning_rule='sgd',
                 learning_rate=0.01,
                 learning_momentum=0.9,
                 dropout=False,
                 batch_size=1,
                 n_iter=None,
                 n_stable=50,
                 f_stable=0.001,
                 valid_set=None,
                 valid_size=0.0,
                 verbose=False,
                 **params):

        self.layers = []
        for i, layer in enumerate(layers):
            assert isinstance(layer, Layer),\
                "Specify each layer as an instance of a `sknn.mlp.Layer` object."

            # Layer names are optional, if not specified then generate one.
            if layer.name is None:
                label = "hidden" if i < len(layers) - 1 else "output"
                layer.name = "%s%i" % (label, i)

            # sklearn may pass layers in as additional named parameters, remove them.
            if layer.name in params:
                del params[layer.name]

            self.layers.append(layer)

        # Don't support any additional parameters that are not in the constructor.
        # These are specified only so `get_params()` can return named layers, for double-
        # underscore syntax to work.
        assert len(params) == 0,\
            "The specified additional parameters are unknown."

        self.random_state = random_state
        self.learning_rule = learning_rule
        self.learning_rate = learning_rate
        self.learning_momentum = learning_momentum
        self.dropout = dropout if type(dropout) is float else (
            0.5 if dropout else 0.0)
        self.batch_size = batch_size
        self.n_iter = n_iter
        self.n_stable = n_stable
        self.f_stable = f_stable
        self.valid_set = valid_set
        self.valid_size = valid_size
        self.verbose = verbose

        self.unit_counts = None
        self.input_space = None
        self.mlp = None
        self.weights = None
        self.vs = None
        self.ds = None
        self.trainer = None
        self.f = None
        self.train_set = None
        self.best_valid_error = float("inf")

        self.cost = "Dropout" if dropout else None
        if learning_rule == 'sgd':
            self._learning_rule = None
        # elif learning_rule == 'adagrad':
        #     self._learning_rule = AdaGrad()
        elif learning_rule == 'adadelta':
            self._learning_rule = AdaDelta()
        elif learning_rule == 'momentum':
            self._learning_rule = Momentum(learning_momentum)
        elif learning_rule == 'nesterov':
            self._learning_rule = Momentum(learning_momentum,
                                           nesterov_momentum=True)
        elif learning_rule == 'rmsprop':
            self._learning_rule = RMSProp()
        else:
            raise NotImplementedError(
                "Learning rule type `%s` is not supported." % learning_rule)

        self._setup()