Python SGD.setup примеры, pylearn2.training_algorithms.sgd.SGD.setup Python примеры использования

Пример #1

0

Показать файл

Файл: test_sgd.py Проект: AlexArgus/pylearn2

        def run_algorithm():
            unsupported_modes = ['random_slice', 'random_uniform']
            algorithm = SGD(learning_rate,
                            cost,
                            batch_size=batch_size,
                            train_iteration_mode=mode,
                            monitoring_dataset=None,
                            termination_criterion=termination_criterion,
                            update_callbacks=None,
                            init_momentum=None,
                            set_batch_size=False)

            algorithm.setup(dataset=dataset, model=model)

            raised = False
            try:
                algorithm.train(dataset)
            except ValueError:
                print mode
                assert mode in unsupported_modes
                raised = True
            if mode in unsupported_modes:
                assert raised
                return True
            return False

Пример #2

0

Показать файл

        def run_algorithm():
            unsupported_modes = ['random_slice', 'random_uniform']
            algorithm = SGD(learning_rate,
                            cost,
                            batch_size=5,
                            train_iteration_mode=mode,
                            monitoring_dataset=None,
                            termination_criterion=termination_criterion,
                            update_callbacks=None,
                            init_momentum=None,
                            set_batch_size=False)

            algorithm.setup(dataset=dataset, model=model)

            raised = False
            try:
                algorithm.train(dataset)
            except ValueError:
                print mode
                assert mode in unsupported_modes
                raised = True
            if mode in unsupported_modes:
                assert raised
                return True
            return False

Пример #3

0

Показать файл

Файл: test_learning_rule.py Проект: rudaoshi/pylearn2

def test_adadelta():
    """
    Make sure that learning_rule.AdaDelta obtains the same parameter values as
    with a hand-crafted AdaDelta implementation, given a dummy model and
    learning rate scaler for each parameter.

    Reference:
    "AdaDelta: An Adaptive Learning Rate Method", Matthew D. Zeiler.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    decay = 0.95

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=AdaDelta(decay),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['g2'] = np.zeros(param_shape)
        state[param]['dx2'] = np.zeros(param_shape)

    def adadelta_manual(model, state):
        inc = []
        rval = []
        for scale, param in izip(scales, model.get_params()):
            pstate = state[param]
            param_val = param.get_value()
            # begin adadelta
            pstate['g2'] = decay * pstate['g2'] + (1 - decay) * param_val**2
            rms_g_t = np.sqrt(pstate['g2'] + scale * learning_rate)
            rms_dx_tm1 = np.sqrt(pstate['dx2'] + scale * learning_rate)
            dx_t = -rms_dx_tm1 / rms_g_t * param_val
            pstate['dx2'] = decay * pstate['dx2'] + (1 - decay) * dx_t**2
            rval += [param_val + dx_t]
        return rval

    manual = adadelta_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))

    manual = adadelta_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))

Пример #4

0

Показать файл

Файл: test_learning_rule.py Проект: AlexArgus/pylearn2

def test_adadelta():
    """
    Make sure that learning_rule.AdaDelta obtains the same parameter values as
    with a hand-crafted AdaDelta implementation, given a dummy model and
    learning rate scaler for each parameter.

    Reference:
    "AdaDelta: An Adaptive Learning Rate Method", Matthew D. Zeiler.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    decay = 0.95

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=AdaDelta(decay),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['g2'] = np.zeros(param_shape)
        state[param]['dx2'] = np.zeros(param_shape)

    def adadelta_manual(model, state):
        inc = []
        rval = []
        for scale, param in izip(scales, model.get_params()):
            pstate = state[param]
            param_val = param.get_value()
            # begin adadelta
            pstate['g2'] = decay * pstate['g2'] + (1 - decay) * param_val ** 2
            rms_g_t = np.sqrt(pstate['g2'] + scale * learning_rate)
            rms_dx_tm1 = np.sqrt(pstate['dx2'] + scale * learning_rate)
            dx_t = -rms_dx_tm1 / rms_g_t * param_val
            pstate['dx2'] = decay * pstate['dx2'] + (1 - decay) * dx_t ** 2
            rval += [param_val + dx_t]
        return rval

    manual = adadelta_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(np.allclose(manual_param, sgd_param.get_value())
               for manual_param, sgd_param
               in izip(manual, model.get_params()))

    manual = adadelta_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(np.allclose(manual_param, sgd_param.get_value())
               for manual_param, sgd_param in
               izip(manual, model.get_params()))

Пример #5

0

Показать файл

Файл: test_sgd.py Проект: rudaoshi/pylearn2

def test_lr_scalers():
    """
    Tests that SGD respects Model.get_lr_scalers
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    learning_rate = .001

    class ModelWithScalers(Model):
        def __init__(self):
            super(ModelWithScalers, self).__init__()
            self._params = [sharedX(np.zeros(shape)) for shape in shapes]
            self.input_space = VectorSpace(1)

        def __call__(self, X):
            # Implemented only so that DummyCost would work
            return X

        def get_lr_scalers(self):
            return dict(zip(self._params, scales))

    model = ModelWithScalers()

    dataset = ArangeDataset(1)

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=Momentum(.0),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    manual = [
        param - learning_rate * scale for param, scale in zip(manual, scales)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

    manual = [
        param - learning_rate * scale for param, scale in zip(manual, scales)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

Пример #6

0

Показать файл

Файл: test_sgd.py Проект: allansp84/pylearn2

def test_lr_scalers():
    """
    Tests that SGD respects Model.get_lr_scalers
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1,), (9,), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    learning_rate = .001

    class ModelWithScalers(Model):
        def __init__(self):
            super(ModelWithScalers, self).__init__()
            self._params = [sharedX(np.zeros(shape)) for shape in shapes]
            self.input_space = VectorSpace(1)

        def __call__(self, X):
            # Implemented only so that DummyCost would work
            return X

        def get_lr_scalers(self):
            return dict(zip(self._params, scales))

    model = ModelWithScalers()

    dataset = ArangeDataset(1)

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=Momentum(.0),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    manual = [param - learning_rate * scale for param, scale in
              zip(manual, scales)]

    sgd.train(dataset=dataset)

    assert all(np.allclose(manual_param, sgd_param.get_value())
               for manual_param, sgd_param
               in zip(manual, model.get_params()))

    manual = [param - learning_rate * scale
              for param, scale
              in zip(manual, scales)]

    sgd.train(dataset=dataset)

    assert all(np.allclose(manual_param, sgd_param.get_value())
               for manual_param, sgd_param
               in zip(manual, model.get_params()))

Пример #7

0

Показать файл

Файл: test_sgd.py Проект: AlexArgus/pylearn2

def test_sgd_unspec_num_mon_batch():

    # tests that if you don't specify a number of
    # monitoring batches, SGD configures the monitor
    # to run on all the data

    m = 25

    visited = [False] * m
    rng = np.random.RandomState([25, 9, 2012])
    X = np.zeros((m, 1))
    X[:, 0] = np.arange(m)
    dataset = DenseDesignMatrix(X=X)

    model = SoftmaxModel(1)

    learning_rate = 1e-3
    batch_size = 5

    cost = DummyCost()

    algorithm = SGD(learning_rate,
                    cost,
                    batch_size=batch_size,
                    monitoring_batches=None,
                    monitoring_dataset=dataset,
                    termination_criterion=None,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    algorithm.setup(dataset=dataset, model=model)

    monitor = Monitor.get_monitor(model)

    X = T.matrix()

    def tracker(*data):
        X, = data
        assert X.shape[1] == 1
        for i in xrange(X.shape[0]):
            visited[int(X[i, 0])] = True

    monitor.add_channel(name='tracker',
                        ipt=X,
                        val=0.,
                        prereqs=[tracker],
                        data_specs=(model.get_input_space(),
                                    model.get_input_source()))

    monitor()

    if False in visited:
        print visited
        assert False

Пример #8

0

Показать файл

def test_sgd_unspec_num_mon_batch():

    # tests that if you don't specify a number of
    # monitoring batches, SGD configures the monitor
    # to run on all the data

    m = 25

    visited = [False] * m
    rng = np.random.RandomState([25, 9, 2012])
    X = np.zeros((m, 1))
    X[:, 0] = np.arange(m)
    dataset = DenseDesignMatrix(X=X)

    model = SoftmaxModel(1)

    learning_rate = 1e-3
    batch_size = 5

    cost = DummyCost()

    algorithm = SGD(learning_rate,
                    cost,
                    batch_size=5,
                    monitoring_batches=None,
                    monitoring_dataset=dataset,
                    termination_criterion=None,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    algorithm.setup(dataset=dataset, model=model)

    monitor = Monitor.get_monitor(model)

    X = T.matrix()

    def tracker(*data):
        X, = data
        assert X.shape[1] == 1
        for i in xrange(X.shape[0]):
            visited[int(X[i, 0])] = True

    monitor.add_channel(name='tracker',
                        ipt=X,
                        val=0.,
                        prereqs=[tracker],
                        data_specs=(model.get_input_space(),
                                    model.get_input_source()))

    monitor()

    if False in visited:
        print visited
        assert False

Пример #9

0

Показать файл

Файл: test_learning_rule.py Проект: ASAPPinc/pylearn2

def test_adagrad():
    """
    Make sure that learning_rule.AdaGrad obtains the same parameter values as
    with a hand-crafted AdaGrad implementation, given a dummy model and
    learning rate scaler for each parameter.

    Reference:
    "Adaptive subgradient methods for online learning and
    stochastic optimization", Duchi J, Hazan E, Singer Y.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=AdaGrad(),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['sg2'] = np.zeros(param_shape)

    def adagrad_manual(model, state):
        rval = []
        for scale, param in izip(scales, model.get_params()):
            pstate = state[param]
            param_val = param.get_value()
            # begin adadelta
            pstate['sg2'] += param_val ** 2
            dx_t = - (scale * learning_rate
                      / np.sqrt(pstate['sg2'])
                      * param_val)
            rval += [param_val + dx_t]
        return rval

    manual = adagrad_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(np.allclose(manual_param, sgd_param.get_value())
               for manual_param, sgd_param
               in izip(manual, model.get_params()))

    manual = adagrad_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(np.allclose(manual_param, sgd_param.get_value())
               for manual_param, sgd_param in
               izip(manual, model.get_params()))

Пример #10

0

Показать файл

def test_adagrad():
    """
    Make sure that learning_rule.AdaGrad obtains the same parameter values as
    with a hand-crafted AdaGrad implementation, given a dummy model and
    learning rate scaler for each parameter.

    Reference:
    "Adaptive subgradient methods for online learning and
    stochastic optimization", Duchi J, Hazan E, Singer Y.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=AdaGrad(),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['sg2'] = np.zeros(param_shape)

    def adagrad_manual(model, state):
        rval = []
        for scale, param in izip(scales, model.get_params()):
            pstate = state[param]
            param_val = param.get_value()
            # begin adadelta
            pstate['sg2'] += param_val**2
            dx_t = -(scale * learning_rate / np.sqrt(pstate['sg2']) *
                     param_val)
            rval += [param_val + dx_t]
        return rval

    manual = adagrad_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))

    manual = adagrad_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))

Пример #11

0

Показать файл

Файл: test_sgd.py Проект: capybaralet/current

def test_lr_scalers_momentum():
    """
    Tests that SGD respects Model.get_lr_scalers when using
    momentum.
    """

    cost = SumOfParams()

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    learning_rate = .001

    class ModelWithScalers(Model):
        def __init__(self):
            self._params = [sharedX(np.zeros(shape)) for shape in shapes]
            self.input_space = VectorSpace(1)

        def get_lr_scalers(self):
            return dict(zip(self._params, scales))

    model = ModelWithScalers()

    dataset = ArangeDataset(1)

    momentum = 0.5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              init_momentum=momentum,
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    inc = [-learning_rate * scale for param, scale in zip(manual, scales)]
    manual = [param + i for param, i in zip(manual, inc)]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

    manual = [
        param - learning_rate * scale + i * momentum
        for param, scale, i in zip(manual, scales, inc)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

Пример #12

0

Показать файл

Файл: test_learning_rule.py Проект: rudaoshi/pylearn2

def test_rmsprop():
    """
    Make sure that learning_rule.RMSProp obtains the same parameter values as
    with a hand-crafted RMSProp implementation, given a dummy model and
    learning rate scaler for each parameter.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    learning_rate = .001
    decay = 0.90
    max_scaling = 1e5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=RMSProp(decay),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['g2'] = np.zeros(param_shape)

    def rmsprop_manual(model, state):
        inc = []
        rval = []
        epsilon = 1. / max_scaling
        for scale, param in izip(scales, model.get_params()):
            pstate = state[param]
            param_val = param.get_value()
            # begin rmsprop
            pstate['g2'] = decay * pstate['g2'] + (1 - decay) * param_val**2
            rms_g_t = np.maximum(np.sqrt(pstate['g2']), epsilon)
            dx_t = -scale * learning_rate / rms_g_t * param_val
            rval += [param_val + dx_t]
        return rval

    manual = rmsprop_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))

Пример #13

0

Показать файл

Файл: test_learning_rule.py Проект: AlexArgus/pylearn2

def test_rmsprop():
    """
    Make sure that learning_rule.RMSProp obtains the same parameter values as
    with a hand-crafted RMSProp implementation, given a dummy model and
    learning rate scaler for each parameter.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1,), (9,), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    learning_rate = .001
    decay = 0.90
    max_scaling = 1e5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=RMSProp(decay),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['g2'] = np.zeros(param_shape)

    def rmsprop_manual(model, state):
        inc = []
        rval = []
        epsilon = 1. / max_scaling
        for scale, param in izip(scales, model.get_params()):
            pstate = state[param]
            param_val = param.get_value()
            # begin rmsprop
            pstate['g2'] = decay * pstate['g2'] + (1 - decay) * param_val ** 2
            rms_g_t = np.maximum(np.sqrt(pstate['g2']), epsilon)
            dx_t = - scale * learning_rate / rms_g_t * param_val
            rval += [param_val + dx_t]
        return rval

    manual = rmsprop_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(np.allclose(manual_param, sgd_param.get_value())
               for manual_param, sgd_param
               in izip(manual, model.get_params()))

Пример #14

0

Показать файл

Файл: test_learning_rule.py Проект: rudaoshi/pylearn2

def test_nesterov_momentum():
    """
    Make sure that learning_rule.Momentum obtains the same parameter values as
    with a hand-crafted sgd w/ momentum implementation, given a dummy model and
    learning rate scaler for each parameter.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    momentum = 0.5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=Momentum(momentum, nesterov_momentum=True),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    vel = [-learning_rate * scale for scale in scales]
    updates = [
        -learning_rate * scale + v * momentum
        for scale, v in izip(scales, vel)
    ]
    manual = [param + update for param, update in izip(manual, updates)]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))

    vel = [
        -learning_rate * scale + i * momentum
        for scale, i in izip(scales, vel)
    ]
    updates = [
        -learning_rate * scale + v * momentum
        for scale, v in izip(scales, vel)
    ]
    manual = [param + update for param, update in izip(manual, updates)]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))

Пример #15

0

Показать файл

Файл: test_sgd.py Проект: alito/pylearn2

def test_lr_scalers_momentum():
    """
    Tests that SGD respects Model.get_lr_scalers when using
    momentum.
    """

    cost = SumOfParams()

    scales = [ .01, .02, .05, 1., 5. ]
    shapes = [(1,), (9,), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    learning_rate = .001

    class ModelWithScalers(Model):
        def __init__(self):
            self._params = [sharedX(np.zeros(shape)) for shape in shapes]
            self.input_space = VectorSpace(1)

        def get_lr_scalers(self):
            return dict(zip(self._params, scales))

    model = ModelWithScalers()

    dataset = ArangeDataset(1)

    momentum = 0.5

    sgd = SGD(cost=cost, learning_rate=learning_rate, init_momentum=momentum,
            batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    inc = [ - learning_rate * scale for param, scale in
            zip(manual, scales)]
    manual = [param + i for param, i in zip(manual, inc)]

    sgd.train(dataset=dataset)

    assert all(np.allclose(manual_param, sgd_param.get_value()) for manual_param,
            sgd_param in zip(manual, model.get_params()))

    manual = [param - learning_rate * scale + i * momentum for param, scale, i in
            zip(manual, scales, inc)]

    sgd.train(dataset=dataset)

    assert all(np.allclose(manual_param, sgd_param.get_value()) for manual_param,
            sgd_param in zip(manual, model.get_params()))

Пример #16

0

Показать файл

Файл: test_sgd.py Проект: AlexArgus/pylearn2

def test_sgd_sequential():

    # tests that requesting train_iteration_mode = 'sequential'
    # works

    dim = 1
    batch_size = 3
    m = 5 * batch_size

    dataset = ArangeDataset(m)

    model = SoftmaxModel(dim)

    learning_rate = 1e-3
    batch_size = 5

    visited = [False] * m

    def visit(X):
        assert X.shape[1] == 1
        assert np.all(X[1:] == X[0:-1]+1)
        start = int(X[0, 0])
        if start > 0:
            assert visited[start - 1]
        for i in xrange(batch_size):
            assert not visited[start+i]
            visited[start+i] = 1

    data_specs = (model.get_input_space(), model.get_input_source())
    cost = CallbackCost(visit, data_specs)

    # We need to include this so the test actually stops running at some point
    termination_criterion = EpochCounter(5)

    algorithm = SGD(learning_rate,
                    cost,
                    batch_size=batch_size,
                    train_iteration_mode='sequential',
                    monitoring_dataset=None,
                    termination_criterion=termination_criterion,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    algorithm.setup(dataset=dataset, model=model)

    algorithm.train(dataset)

    assert all(visited)

Пример #17

0

Показать файл

def test_sgd_sequential():

    # tests that requesting train_iteration_mode = 'sequential'
    # works

    dim = 1
    batch_size = 3
    m = 5 * batch_size

    dataset = ArangeDataset(m)

    model = SoftmaxModel(dim)

    learning_rate = 1e-3
    batch_size = 5

    visited = [False] * m

    def visit(X):
        assert X.shape[1] == 1
        assert np.all(X[1:] == X[0:-1] + 1)
        start = int(X[0, 0])
        if start > 0:
            assert visited[start - 1]
        for i in xrange(batch_size):
            assert not visited[start + i]
            visited[start + i] = 1

    data_specs = (model.get_input_space(), model.get_input_source())
    cost = CallbackCost(visit, data_specs)

    # We need to include this so the test actually stops running at some point
    termination_criterion = EpochCounter(5)

    algorithm = SGD(learning_rate,
                    cost,
                    batch_size=5,
                    train_iteration_mode='sequential',
                    monitoring_dataset=None,
                    termination_criterion=termination_criterion,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    algorithm.setup(dataset=dataset, model=model)

    algorithm.train(dataset)

    assert all(visited)

Пример #18

0

Показать файл

def test_momentum():
    """
    Make sure that learning_rule.Momentum obtains the same parameter values as
    with a hand-crafted sgd w/ momentum implementation, given a dummy model and
    learning rate scaler for each parameter.
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    learning_rate = .001
    momentum = 0.5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=Momentum(momentum),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    inc = [-learning_rate * scale for param, scale in zip(manual, scales)]
    manual = [param + i for param, i in zip(manual, inc)]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

    manual = [
        param - learning_rate * scale + i * momentum
        for param, scale, i in zip(manual, scales, inc)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

Пример #19

0

Показать файл

Файл: test_learning_rule.py Проект: AlexArgus/pylearn2

def test_nesterov_momentum():
    """
    Make sure that learning_rule.Momentum obtains the same parameter values as
    with a hand-crafted sgd w/ momentum implementation, given a dummy model and
    learning rate scaler for each parameter.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    momentum = 0.5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=Momentum(momentum, nesterov_momentum=True),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    vel = [-learning_rate * scale for scale in scales]
    updates = [-learning_rate * scale + v * momentum
               for scale, v in izip(scales, vel)]
    manual = [param + update for param, update in izip(manual, updates)]

    sgd.train(dataset=dataset)

    assert all(np.allclose(manual_param, sgd_param.get_value())
               for manual_param, sgd_param
               in izip(manual, model.get_params()))

    vel = [-learning_rate * scale + i * momentum
           for scale, i in izip(scales, vel)]
    updates = [-learning_rate * scale + v * momentum
               for scale, v in izip(scales, vel)]
    manual = [param + update for param, update in izip(manual, updates)]

    sgd.train(dataset=dataset)

    assert all(np.allclose(manual_param, sgd_param.get_value())
               for manual_param, sgd_param
               in izip(manual, model.get_params()))

Пример #20

0

Показать файл

def test_lr_scalers_momentum():
    """
    Tests that SGD respects Model.get_lr_scalers when using
    momentum.
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    learning_rate = .001
    momentum = 0.5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              init_momentum=momentum,
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    inc = [-learning_rate * scale for param, scale in zip(manual, scales)]
    manual = [param + i for param, i in zip(manual, inc)]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

    manual = [
        param - learning_rate * scale + i * momentum
        for param, scale, i in zip(manual, scales, inc)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

Пример #21

0

Показать файл

Файл: test_sgd.py Проект: AlexArgus/pylearn2

def test_lr_scalers_momentum():
    """
    Tests that SGD respects Model.get_lr_scalers when using
    momentum.
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1,), (9,), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    learning_rate = .001
    momentum = 0.5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              init_momentum=momentum,
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    inc = [-learning_rate * scale for param, scale in zip(manual, scales)]
    manual = [param + i for param, i in zip(manual, inc)]

    sgd.train(dataset=dataset)

    assert all(np.allclose(manual_param, sgd_param.get_value())
               for manual_param, sgd_param
               in zip(manual, model.get_params()))

    manual = [param - learning_rate * scale + i * momentum
              for param, scale, i in
              zip(manual, scales, inc)]

    sgd.train(dataset=dataset)

    assert all(np.allclose(manual_param, sgd_param.get_value())
               for manual_param, sgd_param
               in zip(manual, model.get_params()))

Пример #22

0

Показать файл

Файл: test_learning_rule.py Проект: JackyRen/pylearn2

def test_momentum():
    """
    Make sure that learning_rule.Momentum obtains the same parameter values as
    with a hand-crafted sgd w/ momentum implementation, given a dummy model and
    learning rate scaler for each parameter.
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1,), (9,), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    learning_rate = .001
    momentum = 0.5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=Momentum(momentum),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    inc = [-learning_rate * scale for scale in scales]
    manual = [param + i for param, i in izip(manual, inc)]

    sgd.train(dataset=dataset)

    assert all(np.allclose(manual_param, sgd_param.get_value())
               for manual_param, sgd_param in
               izip(manual, model.get_params()))

    manual = [param - learning_rate * scale + i * momentum
              for param, scale, i in izip(manual, scales, inc)]

    sgd.train(dataset=dataset)

    assert all(np.allclose(manual_param, sgd_param.get_value())
               for manual_param, sgd_param in
               izip(manual, model.get_params()))

Пример #23

0

Показать файл

Файл: test_learning_rule.py Проект: 123fengye741/pylearn2

def prepare_adagrad_test(dataset_type='arange', model_type='random'):
    """
    Factor out common code for AdaGrad tests.
    Parameters
    ----------
    dataset_type : string, optional
        Can use either `arange` to use an ArangeDataset instance or
        `zeros` to create an all-zeros DenseDesignMatrix.
    model_type : string, optional
        How to initialize the model; `random` will initialize parameters
        to random values, `zeros` to zero.
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales, init_type=model_type)
    if dataset_type == 'arange':
        dataset = ArangeDataset(1)
    elif dataset_type == 'zeros':
        X = np.zeros((1, 1))
        X[:, 0] = np.arange(1)
        dataset = DenseDesignMatrix(X)
    else:
        raise ValueError('Unknown value for dataset_type: %s',
                         dataset_type)

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=AdaGrad(),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['sg2'] = np.zeros(param_shape)

    return (cost, model, dataset, sgd, state)

Пример #24

0

Показать файл

Файл: test_learning_rule.py Проект: yo-ga/TextDetector

def prepare_adagrad_test(dataset_type='arange', model_type='random'):
    """
    Factor out common code for AdaGrad tests.
    Parameters
    ----------
    dataset_type : string, optional
        Can use either `arange` to use an ArangeDataset instance or
        `zeros` to create an all-zeros DenseDesignMatrix.
    model_type : string, optional
        How to initialize the model; `random` will initialize parameters
        to random values, `zeros` to zero.
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales, init_type=model_type)
    if dataset_type == 'arange':
        dataset = ArangeDataset(1)
    elif dataset_type == 'zeros':
        X = np.zeros((1, 1))
        X[:, 0] = np.arange(1)
        dataset = DenseDesignMatrix(X)
    else:
        raise ValueError('Unknown value for dataset_type: %s', dataset_type)

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=AdaGrad(),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['sg2'] = np.zeros(param_shape)

    return (cost, model, dataset, sgd, state)

Пример #25

0

Показать файл

class SequenceTaggerNetwork(Model):
    def __init__(self,
                 dataset,
                 w2i,
                 t2i,
                 featurizer,
                 edim=None,
                 hdims=None,
                 fedim=None,
                 max_epochs=100,
                 use_momentum=False,
                 lr=.01,
                 lr_lin_decay=None,
                 lr_scale=False,
                 lr_monitor_decay=False,
                 valid_stop=False,
                 reg_factors=None,
                 dropout=False,
                 dropout_params=None,
                 embedding_init=None,
                 embedded_model=None,
                 monitor_train=True,
                 plot_monitor=None,
                 num=False):
        super(SequenceTaggerNetwork, self).__init__()
        self.vocab_size = dataset.vocab_size
        self.window_size = dataset.window_size
        self.total_feats = dataset.total_feats
        self.feat_num = dataset.feat_num
        self.n_classes = dataset.n_classes
        self.max_epochs = max_epochs
        if edim is None:
            edim = 50
        if hdims is None:
            hdims = [100]
        if fedim is None:
            fedim = 5
        self.edim = edim
        self.fedim = fedim
        self.hdims = hdims

        self.w2i = w2i
        self.t2i = t2i
        self.featurizer = featurizer

        self._create_tagger()

        A_value = numpy.random.uniform(low=-.1,
                                       high=.1,
                                       size=(self.n_classes + 2,
                                             self.n_classes))
        self.A = sharedX(A_value, name='A')
        self.use_momentum = use_momentum
        self.lr = lr
        self.lr_lin_decay = lr_lin_decay
        self.lr_monitor_decay = lr_monitor_decay
        self.lr_scale = lr_scale
        self.valid_stop = valid_stop
        self.reg_factors = reg_factors
        self.close_cache = {}
        self.dropout_params = dropout_params
        self.dropout = dropout or self.dropout_params is not None
        self.hdims = hdims
        self.monitor_train = monitor_train
        self.num = num
        self.plot_monitor = plot_monitor
        if embedding_init is not None:
            self.set_embedding_weights(embedding_init)

    def _create_tagger(self):
        self.tagger = WordTaggerNetwork(self.vocab_size, self.window_size,
                                        self.total_feats, self.feat_num,
                                        self.hdims, self.edim, self.fedim,
                                        self.n_classes)

    def _create_data_specs(self, dataset):
        self.input_space = CompositeSpace([
            dataset.data_specs[0].components[i]
            for i in xrange(len(dataset.data_specs[0].components) - 1)
        ])
        self.output_space = dataset.data_specs[0].components[-1]

        self.input_source = dataset.data_specs[1][:-1]
        self.target_source = dataset.data_specs[1][-1]

    def __getstate__(self):
        d = {}
        d['vocab_size'] = self.vocab_size
        d['window_size'] = self.window_size
        d['feat_num'] = self.feat_num
        d['total_feats'] = self.total_feats
        d['n_classes'] = self.n_classes
        d['input_space'] = self.input_space
        d['output_space'] = self.output_space
        d['input_source'] = self.input_source
        d['target_source'] = self.target_source
        d['A'] = self.A
        d['tagger'] = self.tagger
        d['w2i'] = self.w2i
        d['t2i'] = self.t2i
        d['featurizer'] = self.featurizer
        d['max_epochs'] = self.max_epochs
        d['use_momentum'] = self.use_momentum
        d['lr'] = self.lr
        d['lr_lin_decay'] = self.lr_lin_decay
        d['lr_monitor_decay'] = self.lr_monitor_decay
        d['lr_scale'] = self.lr_scale
        d['valid_stop'] = self.valid_stop
        d['reg_factors'] = self.reg_factors
        d['dropout'] = self.dropout
        d['dropout_params'] = self.dropout_params
        d['monitor_train'] = self.monitor_train
        d['num'] = self.num
        d['plot_monitor'] = self.plot_monitor
        return d

    def fprop(self, data):
        tagger_out = self.tagger.fprop(data)
        probs = T.concatenate([self.A, tagger_out])
        return probs

    def dropout_fprop(self,
                      data,
                      default_input_include_prob=0.5,
                      input_include_probs=None,
                      default_input_scale=2.0,
                      input_scales=None,
                      per_example=True):
        if input_scales is None:
            input_scales = {'input': 1.0}
        if input_include_probs is None:
            input_include_probs = {'input': 1.0}
        if self.dropout_params is not None:
            if len(self.dropout_params) == len(self.tagger.layers) - 1:
                input_include_probs['tagger_out'] = self.dropout_params[-1]
                input_scales['tagger_out'] = 1.0 / self.dropout_params[-1]
                for i, p in enumerate(self.dropout_params[:-1]):
                    input_include_probs['h{0}'.format(i)] = p
                    input_scales['h{0}'.format(i)] = 1.0 / p
        tagger_out = self.tagger.dropout_fprop(data,
                                               default_input_include_prob,
                                               input_include_probs,
                                               default_input_scale,
                                               input_scales, per_example)
        probs = T.concatenate([self.A, tagger_out])
        return probs

    @functools.wraps(Model.get_lr_scalers)
    def get_lr_scalers(self):
        if not self.lr_scale:
            return {}
        d = self.tagger.get_lr_scalers()
        d[self.A] = 1. / self.n_classes
        return d

    @functools.wraps(Model.get_params)
    def get_params(self):
        return self.tagger.get_params() + [self.A]

    def create_adjustors(self):
        initial_momentum = .5
        final_momentum = .99
        start = 1
        saturate = self.max_epochs
        self.momentum_adjustor = learning_rule.MomentumAdjustor(
            final_momentum, start, saturate)
        self.momentum_rule = learning_rule.Momentum(initial_momentum,
                                                    nesterov_momentum=True)

        if self.lr_monitor_decay:
            self.learning_rate_adjustor = MonitorBasedLRAdjuster(
                high_trigger=1.,
                shrink_amt=0.9,
                low_trigger=.95,
                grow_amt=1.1,
                channel_name='train_objective')
        elif self.lr_lin_decay:
            self.learning_rate_adjustor = LinearDecayOverEpoch(
                start, saturate, self.lr_lin_decay)

    def compute_used_inputs(self):
        seen = {'words': set(), 'feats': set()}
        for sen_w in self.dataset['train'].X1:
            seen['words'] |= reduce(lambda x, y: set(x) | set(y), sen_w, set())
        for sen_f in self.dataset['train'].X2:
            seen['feats'] |= reduce(lambda x, y: set(x) | set(y), sen_f, set())
        words = set(xrange(len(self.w2i)))
        feats = set(xrange(self.total_feats))
        self.notseen = {
            'words': numpy.array(sorted(words - seen['words'])),
            'feats': numpy.array(sorted(feats - seen['feats']))
        }

    def set_dataset(self, data):
        self._create_data_specs(data['train'])
        self.dataset = data
        self.compute_used_inputs()
        self.tagger.notseen = self.notseen

    def create_algorithm(self, data, save_best_path=None):
        self.set_dataset(data)
        self.create_adjustors()
        term = EpochCounter(max_epochs=self.max_epochs)
        if self.valid_stop:
            cost_crit = MonitorBased(channel_name='valid_objective',
                                     prop_decrease=.0,
                                     N=3)
            term = And(criteria=[cost_crit, term])

        #(layers, A_weight_decay)
        coeffs = None
        if self.reg_factors:
            rf = self.reg_factors
            lhdims = len(self.tagger.hdims)
            l_inputlayer = len(self.tagger.layers[0].layers)
            coeffs = ([[rf] * l_inputlayer] + ([rf] * lhdims) + [rf], rf)
        cost = SeqTaggerCost(coeffs, self.dropout)
        self.cost = cost

        self.mbsb = MonitorBasedSaveBest(channel_name='valid_objective',
                                         save_path=save_best_path)
        mon_dataset = dict(self.dataset)
        if not self.monitor_train:
            del mon_dataset['train']

        _learning_rule = (self.momentum_rule if self.use_momentum else None)
        self.algorithm = SGD(
            batch_size=1,
            learning_rate=self.lr,
            termination_criterion=term,
            monitoring_dataset=mon_dataset,
            cost=cost,
            learning_rule=_learning_rule,
        )

        self.algorithm.setup(self, self.dataset['train'])
        if self.plot_monitor:
            cn = ["valid_objective", "test_objective"]
            if self.monitor_train:
                cn.append("train_objective")
            plots = Plots(channel_names=cn, save_path=self.plot_monitor)
            self.pm = PlotManager([plots], freq=1)
            self.pm.setup(self, None, self.algorithm)

    def train(self):
        while True:
            if not self.algorithm.continue_learning(self):
                break
            self.algorithm.train(dataset=self.dataset['train'])
            self.monitor.report_epoch()
            self.monitor()
            self.mbsb.on_monitor(self, self.dataset['valid'], self.algorithm)
            if self.use_momentum:
                self.momentum_adjustor.on_monitor(self, self.dataset['valid'],
                                                  self.algorithm)
            if hasattr(self, 'learning_rate_adjustor'):
                self.learning_rate_adjustor.on_monitor(self,
                                                       self.dataset['valid'],
                                                       self.algorithm)
            if hasattr(self, 'pm'):
                self.pm.on_monitor(self, self.dataset['valid'], self.algorithm)

    def prepare_tagging(self):
        X = self.get_input_space().make_theano_batch(batch_size=1)
        Y = self.fprop(X)
        self.f = theano.function([X[0], X[1]], Y)
        self.start = self.A.get_value()[0]
        self.end = self.A.get_value()[1]
        self.A_value = self.A.get_value()[2:]

    def process_input(self, words, feats):
        return self.f(words, feats)

    def tag_sen(self, words, feats, debug=False, return_probs=False):
        if not hasattr(self, 'f'):
            self.prepare_tagging()
        y = self.process_input(words, feats)
        tagger_out = y[2 + self.n_classes:]
        res = viterbi(self.start, self.A_value, self.end, tagger_out,
                      self.n_classes, return_probs)
        if return_probs:
            return res / res.sum(axis=1)[:, numpy.newaxis]
            #return res.reshape((1, len(res)))

        if debug:
            return numpy.array([[e] for e in res[1]]), tagger_out
        return numpy.array([[e] for e in res[1]])

    def get_score(self, dataset, mode='pwp'):
        self.prepare_tagging()
        tagged = (self.tag_sen(w, f) for w, f in izip(dataset.X1, dataset.X2))
        gold = dataset.y
        good, bad = 0., 0.
        if mode == 'pwp':
            for t, g in izip(tagged, gold):
                g = g.argmax(axis=1)
                t = t.flatten()
                good += sum(t == g)
                bad += sum(t != g)
            return [good / (good + bad)]
        elif mode == 'f1':
            i2t = [t for t, i in sorted(self.t2i.items(), key=lambda x: x[1])]
            f1c = FScCounter(i2t, binary_input=False)
            gold = map(lambda x: x.argmax(axis=1), gold)
            tagged = map(lambda x: x.flatten(), tagged)
            return f1c.count_score(gold, tagged)

    def set_embedding_weights(self, embedding_init):
        # load embedding with gensim
        from gensim.models import Word2Vec
        try:
            m = Word2Vec.load_word2vec_format(embedding_init, binary=False)
            edim = m.layer1_size
        except UnicodeDecodeError:
            try:
                m = Word2Vec.load_word2vec_format(embedding_init, binary=True)
                edim = m.layer1_size
            except UnicodeDecodeError:
                # not in word2vec format
                m = Word2Vec.load(embedding_init)
                edim = m.layer1_size
        except ValueError:
            # glove model
            m = {}
            if embedding_init.endswith('gz'):
                fp = gzip.open(embedding_init)
            else:
                fp = open(embedding_init)
            for l in fp:
                le = l.split()
                m[le[0].decode('utf-8')] = numpy.array(
                    [float(e) for e in le[1:]], dtype=theano.config.floatX)
                edim = len(le) - 1

        if edim != self.edim:
            raise Exception("Embedding dim and edim doesn't match")
        m_lower = {}
        vocab = (m.vocab if hasattr(m, 'vocab') else m)
        for k in vocab:
            if k in ['UNKNOWN', 'PADDING']:
                continue
            if self.num:
                m_lower[replace_numerals(k.lower())] = m[k]
            else:
                m_lower[k.lower()] = m[k]
        # transform weight matrix with using self.w2i
        params = numpy.zeros(
            self.tagger.layers[0].layers[0].get_param_vector().shape,
            dtype=theano.config.floatX)
        e = self.edim
        for w in self.w2i:
            if w in m_lower:
                v = m_lower[w]
                i = self.w2i[w]
                params[i * e:(i + 1) * e] = v
        if 'UNKNOWN' in vocab:
            params[-1 * e:] = vocab['UNKNOWN']
        if 'PADDING' in vocab:
            params[-2 * e:-1 * e] = vocab['PADDING']
        self.tagger.layers[0].layers[0].set_param_vector(params)

Пример #26

0

Показать файл

Файл: junk2.py Проект: caomw/Deep_wrapper

        max_col_norm = 1.9365,
        layer_name = 'y',
        n_classes = 7,
        istdev = .05
)
layers = [layer0, layer1, layer3]
#layers = [layer0, layer2, layer3]
ann = MLP(layers, input_space=ishape)
t_algo = SGD(learning_rate = 1e-1,
        batch_size = 100,
        batches_per_iter = 1,
        termination_criterion=EpochCounter(2)
        )
     
ds = DataPylearn2([train_set_x,train_set_y],[48,48,1],7)
t_algo.setup(ann, ds)
       
while True:
    t_algo.train(dataset=ds)
    ann.monitor.report_epoch()
    ann.monitor()
    if not t_algo.continue_learning(ann):
        break

# test: https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/scripts/icml_2013_wrepl/emotions/make_submission.py
ds2 = DataPylearn2([test_set_x,test_set_y],[48,48,1],-1)
m = ds2.X.shape[0]
batch_size = 100 
extra = (batch_size - m % batch_size) % batch_size
assert (m + extra) % batch_size == 0
if extra > 0:

Пример #27

0

Показать файл

Файл: pylearn2_mlp.py Проект: cosmoharrigan/blocks-models

import theano
import numpy as np

n = 200
p = 2
X = np.random.normal(0, 1, (n, p))
y = X[:,0]* X[:, 1] + np.random.normal(0, .1, n)
y.shape = (n, 1)

ds = DenseDesignMatrix(X=X, y=y)

hidden_layer = Sigmoid(layer_name='hidden', dim=10, irange=.1, init_bias=1.)
output_layer = Linear(dim=1, layer_name='y', irange=.1)
trainer = SGD(learning_rate=.05, batch_size=10,
              termination_criterion=EpochCounter(200))
layers = [hidden_layer, output_layer]
ann = MLP(layers, nvis=2)
trainer.setup(ann, ds)

while True:
    trainer.train(dataset=ds)
    ann.monitor.report_epoch()
    ann.monitor()
    if not trainer.continue_learning(ann):
        break

inputs = X 
y_est = ann.fprop(theano.shared(inputs, name='inputs')).eval()

print(y_est.shape)

Пример #28

0

Показать файл

def create_algorithm(mlp, train_set):
    rng = RandomState(hash('tobipuma') % 4294967295)
    algorithm = SGD(batch_size=20, learning_rate=0.1)
    algorithm.rng = rng  #try to always have same results for algorithm
    algorithm.setup(mlp, train_set)
    return algorithm

Пример #29

0

Показать файл

Файл: junk3.py Проект: caomw/Deep_wrapper

def runDeepLearning2():
    ### Loading training set and separting it into training set and testing set

    myDataset = Dataset("/home/Stephen/Desktop/Bird/DLearn/Data/Emotion_small/")
    preprocess = 0
    datasets = myDataset.loadTrain(preprocessFLAG=preprocess, flipFLAG=3)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]

    dataset_test = myDataset.loadTest(preprocess)
    test_set_x, test_set_y, test_set_y_array = dataset_test[0]
    # temporary solution to get the ground truth of sample out to test_set_y_array.
    # the reason is that after T.cast, test_set_y becomes TensorVariable, which I do not find way to output its
    # value...anyone can help?

    ### Model parameterso
    """
    learning_rate = 0.02
    n_epochs = 3000
    nkerns=[30, 40, 40] # number of kernal at each layer, current best performance is 50.0% on testing set, kernal number is [30,40,40]
    batch_size = 500

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels


    ishape = (48, 48)  # size of input images
    nClass = 7
    """
    rng = np.random.RandomState(23455)

    # Import yaml file that specifies the model to train
    # conv layer
    layer0 = ConvRectifiedLinear(
        layer_name="h2",
        output_channels=64,
        irange=0.05,
        kernel_shape=[8, 8],
        pool_shape=[4, 4],
        pool_stride=[2, 2],
        max_kernel_norm=0.9,
    )
    # mlp
    layer2 = RectifiedLinear(layer_name="h1", dim=1000, sparse_init=15)

    # softmax
    layer3 = Softmax(max_col_norm=1.9365, layer_name="y", n_classes=7, istdev=0.05)
    ds = Dataset2(train_set_x, train_set_y)
    layers = [layer0, layer2, layer3]
    ann = mlp.MLP(layers, nvis=3)
    t_algo = SGD(learning_rate=1e-1, batch_size=500, termination_criterion=EpochCounter(400))

    t_algo.setup(ann, ds)

    while True:
        trainer.train(dataset=ds)
        ann.monitor.report_epoch()
        ann.monitor()
        if not trainer.continue_learning(ann):
            break

Пример #30

0

Показать файл

Файл: test_training.py Проект: robintibor/pylearn3dconv

def create_algorithm(mlp, train_set):
    rng = RandomState(hash('tobipuma') % 4294967295)
    algorithm = SGD(batch_size=20, learning_rate=0.1)
    algorithm.rng = rng #try to always have same results for algorithm
    algorithm.setup(mlp, train_set)
    return algorithm

Пример #31

0

Показать файл

Файл: sequence_tagger.py Проект: zseder/hunvec

class SequenceTaggerNetwork(Model):
    def __init__(self, dataset, w2i, t2i, featurizer,
                 edim=None, hdims=None, fedim=None,
                 max_epochs=100, use_momentum=False, lr=.01, lr_lin_decay=None,
                 lr_scale=False, lr_monitor_decay=False,
                 valid_stop=False, reg_factors=None, dropout=False,
                 dropout_params=None, embedding_init=None,
                 embedded_model=None, monitor_train=True, plot_monitor=None,
                 num=False):
        super(SequenceTaggerNetwork, self).__init__()
        self.vocab_size = dataset.vocab_size
        self.window_size = dataset.window_size
        self.total_feats = dataset.total_feats
        self.feat_num = dataset.feat_num
        self.n_classes = dataset.n_classes
        self.max_epochs = max_epochs
        if edim is None:
            edim = 50
        if hdims is None:
            hdims = [100]
        if fedim is None:
            fedim = 5
        self.edim = edim
        self.fedim = fedim
        self.hdims = hdims

        self.w2i = w2i
        self.t2i = t2i
        self.featurizer = featurizer

        self._create_tagger()

        A_value = numpy.random.uniform(low=-.1, high=.1,
                                       size=(self.n_classes + 2,
                                             self.n_classes))
        self.A = sharedX(A_value, name='A')
        self.use_momentum = use_momentum
        self.lr = lr
        self.lr_lin_decay = lr_lin_decay
        self.lr_monitor_decay = lr_monitor_decay
        self.lr_scale = lr_scale
        self.valid_stop = valid_stop
        self.reg_factors = reg_factors
        self.close_cache = {}
        self.dropout_params = dropout_params
        self.dropout = dropout or self.dropout_params is not None
        self.hdims = hdims
        self.monitor_train = monitor_train
        self.num = num
        self.plot_monitor = plot_monitor
        if embedding_init is not None:
            self.set_embedding_weights(embedding_init)

    def _create_tagger(self):
        self.tagger = WordTaggerNetwork(
            self.vocab_size, self.window_size, self.total_feats,
            self.feat_num, self.hdims, self.edim, self.fedim, self.n_classes)

    def _create_data_specs(self, dataset):
        self.input_space = CompositeSpace([
            dataset.data_specs[0].components[i]
            for i in xrange(len(dataset.data_specs[0].components) - 1)])
        self.output_space = dataset.data_specs[0].components[-1]

        self.input_source = dataset.data_specs[1][:-1]
        self.target_source = dataset.data_specs[1][-1]

    def __getstate__(self):
        d = {}
        d['vocab_size'] = self.vocab_size
        d['window_size'] = self.window_size
        d['feat_num'] = self.feat_num
        d['total_feats'] = self.total_feats
        d['n_classes'] = self.n_classes
        d['input_space'] = self.input_space
        d['output_space'] = self.output_space
        d['input_source'] = self.input_source
        d['target_source'] = self.target_source
        d['A'] = self.A
        d['tagger'] = self.tagger
        d['w2i'] = self.w2i
        d['t2i'] = self.t2i
        d['featurizer'] = self.featurizer
        d['max_epochs'] = self.max_epochs
        d['use_momentum'] = self.use_momentum
        d['lr'] = self.lr
        d['lr_lin_decay'] = self.lr_lin_decay
        d['lr_monitor_decay'] = self.lr_monitor_decay
        d['lr_scale'] = self.lr_scale
        d['valid_stop'] = self.valid_stop
        d['reg_factors'] = self.reg_factors
        d['dropout'] = self.dropout
        d['dropout_params'] = self.dropout_params
        d['monitor_train'] = self.monitor_train
        d['num'] = self.num
        d['plot_monitor'] = self.plot_monitor
        return d

    def fprop(self, data):
        tagger_out = self.tagger.fprop(data)
        probs = T.concatenate([self.A, tagger_out])
        return probs

    def dropout_fprop(self, data, default_input_include_prob=0.5,
                      input_include_probs=None, default_input_scale=2.0,
                      input_scales=None, per_example=True):
        if input_scales is None:
            input_scales = {'input': 1.0}
        if input_include_probs is None:
            input_include_probs = {'input': 1.0}
        if self.dropout_params is not None:
            if len(self.dropout_params) == len(self.tagger.layers) - 1:
                input_include_probs['tagger_out'] = self.dropout_params[-1]
                input_scales['tagger_out'] = 1.0/self.dropout_params[-1]
                for i, p in enumerate(self.dropout_params[:-1]):
                    input_include_probs['h{0}'.format(i)] = p
                    input_scales['h{0}'.format(i)] = 1.0/p
        tagger_out = self.tagger.dropout_fprop(
            data, default_input_include_prob, input_include_probs,
            default_input_scale, input_scales, per_example)
        probs = T.concatenate([self.A, tagger_out])
        return probs

    @functools.wraps(Model.get_lr_scalers)
    def get_lr_scalers(self):
        if not self.lr_scale:
            return {}
        d = self.tagger.get_lr_scalers()
        d[self.A] = 1. / self.n_classes
        return d

    @functools.wraps(Model.get_params)
    def get_params(self):
        return self.tagger.get_params() + [self.A]

    def create_adjustors(self):
        initial_momentum = .5
        final_momentum = .99
        start = 1
        saturate = self.max_epochs
        self.momentum_adjustor = learning_rule.MomentumAdjustor(
            final_momentum, start, saturate)
        self.momentum_rule = learning_rule.Momentum(initial_momentum,
                                                    nesterov_momentum=True)

        if self.lr_monitor_decay:
            self.learning_rate_adjustor = MonitorBasedLRAdjuster(
                high_trigger=1., shrink_amt=0.9,
                low_trigger=.95, grow_amt=1.1, channel_name='train_objective')
        elif self.lr_lin_decay:
            self.learning_rate_adjustor = LinearDecayOverEpoch(
                start, saturate, self.lr_lin_decay)

    def compute_used_inputs(self):
        seen = {'words': set(), 'feats': set()}
        for sen_w in self.dataset['train'].X1:
            seen['words'] |= reduce(
                lambda x, y: set(x) | set(y),
                sen_w, set())
        for sen_f in self.dataset['train'].X2:
            seen['feats'] |= reduce(
                lambda x, y: set(x) | set(y),
                sen_f, set())
        words = set(xrange(len(self.w2i)))
        feats = set(xrange(self.total_feats))
        self.notseen = {
            'words': numpy.array(sorted(words - seen['words'])),
            'feats': numpy.array(sorted(feats - seen['feats']))
        }

    def set_dataset(self, data):
        self._create_data_specs(data['train'])
        self.dataset = data
        self.compute_used_inputs()
        self.tagger.notseen = self.notseen

    def create_algorithm(self, data, save_best_path=None):
        self.set_dataset(data)
        self.create_adjustors()
        term = EpochCounter(max_epochs=self.max_epochs)
        if self.valid_stop:
            cost_crit = MonitorBased(channel_name='valid_objective',
                                     prop_decrease=.0, N=3)
            term = And(criteria=[cost_crit, term])

        #(layers, A_weight_decay)
        coeffs = None
        if self.reg_factors:
            rf = self.reg_factors
            lhdims = len(self.tagger.hdims)
            l_inputlayer = len(self.tagger.layers[0].layers)
            coeffs = ([[rf] * l_inputlayer] + ([rf] * lhdims) + [rf], rf)
        cost = SeqTaggerCost(coeffs, self.dropout)
        self.cost = cost

        self.mbsb = MonitorBasedSaveBest(channel_name='valid_objective',
                                         save_path=save_best_path)
        mon_dataset = dict(self.dataset)
        if not self.monitor_train:
            del mon_dataset['train']

        _learning_rule = (self.momentum_rule if self.use_momentum else None)
        self.algorithm = SGD(batch_size=1, learning_rate=self.lr,
                             termination_criterion=term,
                             monitoring_dataset=mon_dataset,
                             cost=cost,
                             learning_rule=_learning_rule,
                             )

        self.algorithm.setup(self, self.dataset['train'])
        if self.plot_monitor:
            cn = ["valid_objective", "test_objective"]
            if self.monitor_train:
                cn.append("train_objective")
            plots = Plots(channel_names=cn, save_path=self.plot_monitor)
            self.pm = PlotManager([plots], freq=1)
            self.pm.setup(self, None, self.algorithm)

    def train(self):
        while True:
            if not self.algorithm.continue_learning(self):
                break
            self.algorithm.train(dataset=self.dataset['train'])
            self.monitor.report_epoch()
            self.monitor()
            self.mbsb.on_monitor(self, self.dataset['valid'], self.algorithm)
            if self.use_momentum:
                self.momentum_adjustor.on_monitor(self, self.dataset['valid'],
                                                  self.algorithm)
            if hasattr(self, 'learning_rate_adjustor'):
                self.learning_rate_adjustor.on_monitor(
                    self, self.dataset['valid'], self.algorithm)
            if hasattr(self, 'pm'):
                self.pm.on_monitor(
                    self, self.dataset['valid'], self.algorithm)

    def prepare_tagging(self):
        X = self.get_input_space().make_theano_batch(batch_size=1)
        Y = self.fprop(X)
        self.f = theano.function([X[0], X[1]], Y)
        self.start = self.A.get_value()[0]
        self.end = self.A.get_value()[1]
        self.A_value = self.A.get_value()[2:]

    def process_input(self, words, feats):
        return self.f(words, feats)

    def tag_sen(self, words, feats, debug=False, return_probs=False):
        if not hasattr(self, 'f'):
            self.prepare_tagging()
        y = self.process_input(words, feats)
        tagger_out = y[2 + self.n_classes:]
        res = viterbi(self.start, self.A_value, self.end, tagger_out,
                               self.n_classes, return_probs)
        if return_probs:
            return res / res.sum(axis=1)[:,numpy.newaxis]
            #return res.reshape((1, len(res)))
        
        if debug:
            return numpy.array([[e] for e in res[1]]), tagger_out
        return numpy.array([[e] for e in res[1]])

    def get_score(self, dataset, mode='pwp'):
        self.prepare_tagging()
        tagged = (self.tag_sen(w, f) for w, f in
                  izip(dataset.X1, dataset.X2))
        gold = dataset.y
        good, bad = 0., 0.
        if mode == 'pwp':
            for t, g in izip(tagged, gold):
                g = g.argmax(axis=1)
                t = t.flatten()
                good += sum(t == g)
                bad += sum(t != g)
            return [good / (good + bad)]
        elif mode == 'f1':
            i2t = [t for t, i in sorted(self.t2i.items(), key=lambda x: x[1])]
            f1c = FScCounter(i2t, binary_input=False)
            gold = map(lambda x:x.argmax(axis=1), gold)
            tagged = map(lambda x:x.flatten(), tagged)
            return f1c.count_score(gold, tagged)

    def set_embedding_weights(self, embedding_init):
        # load embedding with gensim
        from gensim.models import Word2Vec
        try:
            m = Word2Vec.load_word2vec_format(embedding_init, binary=False)
            edim = m.layer1_size
        except UnicodeDecodeError:
            try:
                m = Word2Vec.load_word2vec_format(embedding_init, binary=True)
                edim = m.layer1_size
            except UnicodeDecodeError:
                # not in word2vec format
                m = Word2Vec.load(embedding_init)
                edim = m.layer1_size
        except ValueError:
            # glove model
            m = {}
            if embedding_init.endswith('gz'):
                fp = gzip.open(embedding_init)
            else:
                fp = open(embedding_init)
            for l in fp:
                le = l.split()
                m[le[0].decode('utf-8')] = numpy.array(
                    [float(e) for e in le[1:]], dtype=theano.config.floatX)
                edim = len(le) - 1

        if edim != self.edim:
            raise Exception("Embedding dim and edim doesn't match")
        m_lower = {}
        vocab = (m.vocab if hasattr(m, 'vocab') else m)
        for k in vocab:
            if k in ['UNKNOWN', 'PADDING']:
                continue
            if self.num:
                m_lower[replace_numerals(k.lower())] = m[k]
            else:
                m_lower[k.lower()] = m[k]
        # transform weight matrix with using self.w2i
        params = numpy.zeros(
            self.tagger.layers[0].layers[0].get_param_vector().shape, dtype=theano.config.floatX)
        e = self.edim
        for w in self.w2i:
            if w in m_lower:
                v = m_lower[w]
                i = self.w2i[w]
                params[i*e:(i+1)*e] = v
        if 'UNKNOWN' in vocab:
            params[-1*e:] = vocab['UNKNOWN']
        if 'PADDING' in vocab:
            params[-2*e:-1*e] = vocab['PADDING']
        self.tagger.layers[0].layers[0].set_param_vector(params)

Python SGD.setup примеры использования