示例#1
0
    def test_take_along_axis_grad(self, shape, axis, samples):
        if axis < 0:
            _axis = len(shape) + axis
        else:
            _axis = axis
        # Setup the theano function
        t_arr, t_indices = self.get_input_tensors(shape)
        t_out2 = theano.grad(
            tt.sum(self._output_tensor(t_arr**2, t_indices, axis)),
            t_arr,
        )
        func = theano.function([t_arr, t_indices], [t_out2])

        # Test that the gradient gives the same output as what is expected
        arr, indices = self.get_input_values(shape, axis, samples)
        expected_grad = np.zeros_like(arr)
        slicer = [slice(None)] * len(shape)
        for i in range(indices.shape[axis]):
            slicer[axis] = i
            inds = indices[slicer].reshape(shape[:_axis] + (1, ) +
                                           shape[_axis + 1:])
            inds = _make_along_axis_idx(shape, inds, _axis)
            expected_grad[inds] += 1
        expected_grad *= 2 * arr
        out = func(arr, indices)[0]
        assert np.allclose(out, expected_grad)
示例#2
0
 def apply(self, f):
     # f: kernel function for KSD f(histogram) -> (k(x,.), \nabla_x k(x,.))
     X = self.approx.histogram
     t = self.approx.normalizing_constant
     dlogpdx = theano.scan(
         fn=lambda zg: theano.grad(self.logp_norm(zg), zg),
         sequences=[X])[0]  # bottleneck
     Kxy, dxkxy = f(X)
     # scaling factor
     # not needed for Kxy as we already scaled dlogpdx
     dxkxy /= t
     n = X.shape[0].astype('float32') / t
     svgd_grad = (tt.dot(Kxy, dlogpdx) + dxkxy) / n
     return -1 * svgd_grad  # gradient
示例#3
0
文件: operators.py 项目: taku-y/pymc3
 def apply(self, f):
     # f: kernel function for KSD f(histogram) -> (k(x,.), \nabla_x k(x,.))
     X = self.approx.histogram
     t = self.approx.normalizing_constant
     dlogpdx = theano.scan(
         fn=lambda zg: theano.grad(self.logp_norm(zg), zg),
         sequences=[X]
     )[0]    # bottleneck
     Kxy, dxkxy = f(X)
     # scaling factor
     # not needed for Kxy as we already scaled dlogpdx
     dxkxy /= t
     n = X.shape[0].astype('float32') / t
     svgd_grad = (tt.dot(Kxy, dlogpdx) + dxkxy) / n
     return -1 * svgd_grad   # gradient
示例#4
0
 def dlogp(self):
     return theano.scan(
         fn=lambda zg: theano.grad(self.approx.logp_norm(zg), zg),
         sequences=[self.input_matrix])[0]
示例#5
0
def train_model(learning_rate=0.1,
                n_epochs=1000,
                batch_size=20,
                n_hidden=500):

    # BUILD MODEL #
    print("...building model")

    rng = numpy.random.RandomState(1234)

    classifier = DeepVS(
        rng=rng,
        input=x,
        n_in=42,
        n_hidden=n_hidden,
        n_out=3
    )

    cost = (
            classifier.negative_log_likelihood(y)
            + L1_reg * classifier.L1
            + L2_reg * classifier.L2_sqr
    )

    test_model = theano.function(
            inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: test_set_x[index * batch_size:(index + 1) * batch_size],
                y: test_set_y[index * batch_size:(index + 1) * batch_size]
            }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta (sorted in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )