def test_eigvalsh_grad(): rng = numpy.random.RandomState(utt.fetch_seed()) a = rng.randn(5, 5) a = a + a.T b = 10 * numpy.eye(5, 5) + rng.randn(5, 5) tensor.verify_grad(lambda a, b: eigvalsh(a, b).dot([1, 2, 3, 4, 5]), [a, b], rng=numpy.random)
def test_maxpool(): # generate flatted images maxpoolshps = ((2,2),(3,3),(4,4),(5,5),(6,6)) imval = N.random.rand(4,5,10,10) images = T.dmatrix() for maxpoolshp in maxpoolshps: # symbolic stuff output, outshp = sp.max_pool(images, imval.shape[1:], maxpoolshp) f = function([images,],[output,]) output_val = f(imval.reshape(imval.shape[0],-1)) # numeric verification my_output_val = N.zeros((imval.shape[0], imval.shape[1], imval.shape[2]/maxpoolshp[0], imval.shape[3]/maxpoolshp[1])) assert N.prod(my_output_val.shape[1:]) == N.prod(N.r_[imval.shape[1],outshp]) for n in range(imval.shape[0]): for k in range(imval.shape[1]): for i in range(imval.shape[2]/maxpoolshp[0]): for j in range(imval.shape[3]/maxpoolshp[1]): ii,jj = i*maxpoolshp[0], j*maxpoolshp[1] patch = imval[n,k,ii:ii+maxpoolshp[0],jj:jj+maxpoolshp[1]] my_output_val[n,k,i,j] = N.max(patch) my_output_val = my_output_val.reshape(imval.shape[0],-1) assert N.all(output_val == my_output_val) def mp(input): output, outshp = sp.max_pool(input, imval.shape[1:], maxpoolshp) return output T.verify_grad(None, mp, [imval.reshape(imval.shape[0],-1)])
def test_col_scale(): x = theano.sparse.csc_dmatrix() s = theano.tensor.dvector() def d(x, s): return sp.sp_sum(sp.col_scale(x, s), sparse_grad=True) rng = numpy.random.RandomState(8723) R = 5 C = 8 x_val_dense = numpy.zeros((R, C), dtype="d") for idx in [(0, 0), (4, 1), (2, 1), (3, 3), (4, 4), (3, 7), (2, 7)]: x_val_dense.__setitem__(idx, rng.randn()) x_val = scipy.sparse.csc_matrix(x_val_dense) s_val = rng.randn(C) f = theano.function([x, s], sp.col_scale(x, s)) print "A", f(x_val, s_val).toarray() print "B", (x_val_dense * s_val) assert numpy.all(f(x_val, s_val).toarray() == (x_val_dense * s_val)) if 0: tensor.verify_grad(None, d, [x_val, s_val], mode=theano.Mode(linker="py", optimizer="fast_compile")) else: print >> sys.stderr, "WARNING: skipping gradient test because verify_grad doesn't support sparse arguments"
def test_det_grad(): # If scipy is not available, this test will fail, thus we skip it. if not use_scipy: raise SkipTest('Scipy is not available') rng = numpy.random.RandomState(utt.fetch_seed()) r = rng.randn(5,5) tensor.verify_grad(det, [r], rng=numpy.random)
def test_expm_grad_3(): # with non-symmetric matrix (complex eigenvectors) if not imported_scipy: raise SkipTest("Scipy needed for the expm op.") rng = numpy.random.RandomState(utt.fetch_seed()) A = rng.randn(5, 5).astype(config.floatX) tensor.verify_grad(expm, [A,], rng=rng)
def test_expm_grad_3(): # with non-symmetric matrix (complex eigenvectors) if not imported_scipy: raise SkipTest("Scipy needed for the expm op.") rng = numpy.random.RandomState(utt.fetch_seed()) # Always test in float64 for better numerical stability. A = rng.randn(5, 5) tensor.verify_grad(expm, [A], rng=rng)
def verify_grad(op, pt, n_tests=2, rng=None, *args, **kwargs): """ Wrapper for tensor/basic.py:verify_grad Takes care of seeding the random number generator if None is given """ if rng is None: seed_rng() rng = numpy.random T.verify_grad(op, pt, n_tests, rng, *args, **kwargs)
def test_inverse_grad(): rng = np.random.RandomState(utt.fetch_seed()) r = rng.randn(4, 4) tensor.verify_grad(matrix_inverse, [r], rng=np.random) rng = np.random.RandomState(utt.fetch_seed()) r = rng.randn(4, 4) tensor.verify_grad(matrix_inverse, [r], rng=np.random)
def test_fractional_max_pooling_numeric_gradient(): def fun(x): return fmp.DisjointPseudorandomFractionalMaxPooling2DOp( alpha=1.414, u=0.5 )(x) T.verify_grad(fun, [np.arange(25).reshape(1, 1, 5, 5).astype(fX)], rng=np.random)
def test_eigvalsh_grad(): if not imported_scipy: raise SkipTest("Scipy needed for the geigvalsh op.") import scipy.linalg rng = numpy.random.RandomState(utt.fetch_seed()) a = rng.randn(5, 5) a = a + a.T b = 10 * numpy.eye(5, 5) + rng.randn(5, 5) tensor.verify_grad(lambda a, b: eigvalsh(a, b).dot([1, 2, 3, 4, 5]), [a, b], rng=numpy.random)
def test_expm_grad_2(): # with non-symmetric matrix with real eigenspecta if not imported_scipy: raise SkipTest("Scipy needed for the expm op.") rng = numpy.random.RandomState(utt.fetch_seed()) A = rng.randn(5, 5).astype(config.floatX) w = (rng.randn(5).astype(config.floatX))**2 A = (numpy.diag(w**0.5)).dot(A + A.T).dot(numpy.diag(w**(-0.5))) assert not numpy.allclose(A, A.T) tensor.verify_grad(expm, [A,], rng=rng)
def test_expm_grad_2(): # with non-symmetric matrix with real eigenspecta if not imported_scipy: raise SkipTest("Scipy needed for the expm op.") rng = np.random.RandomState(utt.fetch_seed()) # Always test in float64 for better numerical stability. A = rng.randn(5, 5) w = rng.randn(5)**2 A = (np.diag(w**0.5)).dot(A + A.T).dot(np.diag(w**(-0.5))) assert not np.allclose(A, A.T) tensor.verify_grad(expm, [A], rng=rng)
def test_expm_grad_2(): # with non-symmetric matrix with real eigenspecta if not imported_scipy: raise SkipTest("Scipy needed for the expm op.") rng = numpy.random.RandomState(utt.fetch_seed()) # Always test in float64 for better numerical stability. A = rng.randn(5, 5) w = rng.randn(5)**2 A = (numpy.diag(w**0.5)).dot(A + A.T).dot(numpy.diag(w**(-0.5))) assert not numpy.allclose(A, A.T) tensor.verify_grad(expm, [A], rng=rng)
def test_softmax_grad(self): def cmp(n, m, f, f_gpu): data = numpy.arange(n * m, dtype="float32").reshape(n, m) gdata = numpy.asarray(data)[:, :, None, None] out = f(data) gout = numpy.asarray(f_gpu(gdata))[:, :, 0, 0] utt.assert_allclose(out, gout) x = T.matrix("x", "float32") x_gpu = T.tensor4("x_gpu", "float32") f_z = T.nnet.softmax_op f_gpu = dnn.GpuDnnSoftmax("accurate", "channel") # Verify the grad operation dims = (2, 3, 4, 5) gdata = numpy.arange(numpy.product(dims), dtype="float32").reshape(dims) T.verify_grad(f_gpu, [gdata], rng=numpy.random, mode=mode_with_gpu) # Verify that the CPU and GPU implementations return the same results # up to a tolerance. self._test_softmax(x, x_gpu, f_z, f_gpu, cmp) self._test_softmax(x, x, f_z, f_z, self._cmp) # Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad # optimization is applied when cudnn is required y = T.fvector("y") f = theano.function([y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_with_gpu) sorted_f = f.maker.fgraph.toposort() assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1 assert len([i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)]) == 0 # Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad # optimization is not applied when cudnn is excluded or not # available mode_wo_cudnn = mode_with_gpu.excluding("cudnn") y = T.fvector("y") f = theano.function([y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_wo_cudnn) sorted_f = f.maker.fgraph.toposort() assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 0 assert len([i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)]) == 1 # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not # crash with manual graph y = T.fvector("y") o = theano.tensor.nnet.SoftmaxGrad()(y, y * 2) f = theano.function([y], o, mode=mode_with_gpu) sorted_f = f.maker.fgraph.toposort() assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1 assert len([i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)]) == 0
def test_verify_exprgrad(): from theano import tensor import numpy x = tt.scalar() f = theano.function([x], x) #def f(x): # return x x_val = numpy.asarray([0.1, 0.2]) rng = numpy.random.RandomState(42) print 'going' print tensor.verify_grad(f, [x_val], rng=rng)
def test_grad(self): x = tensor.matrix('x') one_of_n = tensor.lvector('one_of_n') op = crossentropy_categorical_1hot xe = op(x, one_of_n) f = theano.function([x, one_of_n], xe) x_val = numpy.asarray([[.4, .6, .0], [.1, .8, .1]], dtype=config.floatX) xe_val = f(x_val, [0, 1]) assert numpy.allclose(xe_val, -numpy.log([.4, .8])) def oplike(x): return op(x, [0, 1]) tensor.verify_grad(oplike, [x_val], rng=numpy.random)
def test_other_grad_tests(self): x = theano.tensor.dmatrix() x_val1 = numpy.array([[1,2,3],[0,5,6],[0,0,9]], dtype='float32') x_val2 = numpy.array([[1,2,0],[0,5,6],[7,8,9],[9,10,0]], dtype='float32') rng = rng = numpy.random.RandomState(43) p = Prod(axis=1) grad_p = theano.tensor.grad(p(x).sum(), x) grad_fn = theano.function([x], grad_p, mode=self.mode) assert numpy.allclose(grad_fn(x_val1), [[6.,3.,2.],[30.,0.,0.],[0.,0.,0.]]) assert numpy.allclose(grad_fn(x_val2), [[0., 0., 2.], [30., 0., 0.], [72., 63., 56.], [0., 0., 90.]]) p_axis0 = Prod(axis=0) grad_p_axis0 = theano.tensor.grad(p_axis0(x).sum(), x) grad_fn_axis0 = theano.function([x], grad_p_axis0, mode=self.mode) assert numpy.allclose(grad_fn_axis0(x_val2), [[0., 400., 0.],[63., 160., 0.], [0., 100., 0.], [0., 80., 0.]]) tensor.verify_grad(p, [x_val1], rng=rng, mode=self.mode)
def verify_layers( batch_size, layers, train_set_x, train_set_y ): index = 0 range_start = index*batch_size range_end = (index+1)*batch_size sample = train_set_x[range_start:range_end] layer_0_activation = layers[0].output(sample).eval() layer_1_activation = layers[1].output(layer_0_activation) layer_1_cost = layers[1].cost( T.nnet.softmax(T.mean( layer_1_activation, axis=2 )), train_set_y[range_start:range_end] ) layer_0_cost = layers[1].cost( T.nnet.softmax(T.mean( layers[1].output(layers[0].output(sample)), axis=2 )), train_set_y[range_start:range_end] ) temp = verify_layer(layer_1_cost, layers[1].W) T.verify_grad( temp, [layers[1].W.get_value()], rng=np.random.RandomState() ) temp = verify_layer(layer_0_cost, layers[0].W) T.verify_grad( temp, [layers[0].W.get_value()], rng=np.RandomState() )
def test_grad(self): np.random.seed(1234) M_val = np.concatenate( ( np.linspace(-10, 10, 100), [ 0.0, -np.pi + 1e-3, np.pi - 1e-3, 0.5 * np.pi, -0.5 * np.pi, 1.5 * np.pi, 2 * np.pi + 1e-3, ], ) ) e_val = np.random.uniform(0, 0.9, len(M_val)) a = lambda *args: tt.arctan2(*self.op(*args)) # NOQA tt.verify_grad(a, [M_val, e_val], eps=1e-8, rng=np.random)
def test_maxpool(): # generate flatted images maxpoolshps = ((2, 2), (3, 3), (4, 4), (5, 5), (6, 6)) imval = N.random.rand(4, 5, 10, 10) images = T.dmatrix() for maxpoolshp in maxpoolshps: # symbolic stuff output, outshp = sp.max_pool(images, imval.shape[1:], maxpoolshp) f = function([ images, ], [ output, ]) output_val = f(imval.reshape(imval.shape[0], -1)) # numeric verification my_output_val = N.zeros((imval.shape[0], imval.shape[1], imval.shape[2] / maxpoolshp[0], imval.shape[3] / maxpoolshp[1])) assert N.prod(my_output_val.shape[1:]) == N.prod( N.r_[imval.shape[1], outshp]) for n in range(imval.shape[0]): for k in range(imval.shape[1]): for i in range(imval.shape[2] / maxpoolshp[0]): for j in range(imval.shape[3] / maxpoolshp[1]): ii, jj = i * maxpoolshp[0], j * maxpoolshp[1] patch = imval[n, k, ii:ii + maxpoolshp[0], jj:jj + maxpoolshp[1]] my_output_val[n, k, i, j] = N.max(patch) my_output_val = my_output_val.reshape(imval.shape[0], -1) assert N.all(output_val == my_output_val) def mp(input): output, outshp = sp.max_pool(input, imval.shape[1:], maxpoolshp) return output T.verify_grad(None, mp, [imval.reshape(imval.shape[0], -1)])
thunk.outputs = outputs thunk.lazy = False return thunk if __name__ == "__main__": import theano import theano.tensor as T fX = theano.config.floatX inp = T.constant(np.random.randn(10, 10, 10, 10).astype(fX)) foo = DisjointPseudorandomFractionalMaxPooling2DOp(1.414, 0.5)(inp) bar = foo.eval() print np.array(bar) print np.array(bar[0, 0, :2, :2]) print np.array(inp.eval()[0, 0, :4, :4]) g = T.grad(foo.sum(), inp) choo = np.array(g.eval()) # print choo print np.array(bar[0, 0, :3, :3]) print np.array(inp.eval()[0, 0, :4, :4]) print choo[0, 0, :4, :4] def fun(x): return DisjointPseudorandomFractionalMaxPooling2DOp(1.414, 0.5)(x) T.verify_grad(fun, [np.arange(25).reshape(1, 1, 5, 5).astype(fX)], rng=np.random)
def test_grad(self): t, _, _, in_args = self.get_args() func = lambda *args: self.op(*(list(args) + [t]))[0] # NOQA tt.verify_grad(func, in_args, n_tests=1, rng=np.random)
def test_cudnn_softmax_grad(self): if not cuda.dnn.dnn_available(): raise SkipTest(cuda.dnn.dnn_available.msg) def cmp(n, m, f, f_gpu): data = numpy.arange(n * m, dtype='float32').reshape(n, m) gdata = numpy.asarray(data)[:, :, None, None] out = f(data) gout = numpy.asarray(f_gpu(gdata))[:, :, 0, 0] assert numpy.allclose(out, gout), numpy.absolute(out - gout) x = T.matrix('x', 'float32') x_gpu = T.tensor4('x_gpu', 'float32') f_z = T.nnet.softmax f_gpu = theano.sandbox.cuda.dnn.GpuDnnSoftmax('bc01', 'accurate', 'channel') # Verify the grad operation dims = (2, 3, 4, 5) gdata = numpy.arange(numpy.product(dims), dtype='float32').reshape(dims) T.verify_grad(f_gpu, [gdata], rng=numpy.random, mode=mode_with_gpu) def check_types(graph, graph_gpu): self._check_types(graph, graph_gpu, -1, type(f_z), theano.sandbox.cuda.dnn.GpuDnnSoftmax) def check_types_opt(graph, graph_gpu): assert isinstance(graph.maker.fgraph.toposort()[-1].op, type(f_z)) assert len([ n for n in graph_gpu.maker.fgraph.toposort() if isinstance(n.op, theano.sandbox.cuda.dnn.GpuDnnSoftmax) ]) == 1 # Verify that the CPU and GPU implementations return the same results # up to a tolerance. self._test_softmax(x, x_gpu, f_z, f_gpu, cmp, mode_with_gpu, check_types) mode_w_cudnn = mode_with_gpu.including("cudnn") self._test_softmax(x, x, f_z, f_z, self._cmp, mode_w_cudnn, check_types_opt) # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization is # applied when cudnn is required y = T.fvector('y') f = theano.function([y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_with_gpu) sorted_f = f.maker.fgraph.toposort() assert (len([ i for i in sorted_f if isinstance(i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad) ]) == 1) assert (len([ i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad) ]) == 0) # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization is not # applied when cudnn is excluded or not available mode_wo_cudnn = mode_with_gpu.excluding("cudnn") y = T.fvector('y') f = theano.function([y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_wo_cudnn) sorted_f = f.maker.fgraph.toposort() assert (len([ i for i in sorted_f if isinstance(i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad) ]) == 0) assert (len([ i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad) ]) == 1) # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not # crash with manual graph y = T.fvector('y') o = theano.tensor.nnet.SoftmaxGrad()(y, y * 2) f = theano.function([y], o, mode=mode_with_gpu) sorted_f = f.maker.fgraph.toposort() assert (len([ i for i in sorted_f if isinstance(i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad) ]) == 1) assert (len([ i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad) ]) == 0)
def test_grad(self): _, _, in_args = self.get_args() func = lambda *args: self.op(*args)[0] # NOQA tt.verify_grad(func, in_args, rng=np.random)
def test_sp_sum(self): from theano.sparse.sandbox.sp import SpSum # TODO: test both grad. rng = numpy.random.RandomState(42) from theano.sparse.basic import SparseFromDense, DenseFromSparse cases = [("csc", scipy.sparse.csc_matrix), ("csr", scipy.sparse.csr_matrix)] for format, cast in cases: # print 'format: %(format)s' % locals() x = theano.sparse.SparseType(format=format, dtype=theano.config.floatX)() x_data = numpy.arange(20).reshape(5, 4).astype(theano.config.floatX) # Sum on all axis # print 'sum on all axis...' z = theano.sparse.sandbox.sp.sp_sum(x) assert z.type.broadcastable == () f = theano.function([x], z) x_val = cast(x_data) out = f(x_val) expected = x_val.sum() assert out == expected # Sum on axis 0 # print 'sum on axis 0...' z = theano.sparse.sandbox.sp.sp_sum(x, axis=0) assert z.type.broadcastable == (False,) f = theano.function([x], z) x_val = cast(x_data) out = f(x_val) expected = x_val.sum(axis=0) assert (out == expected).all() # Sum on axis 1 # print 'sum on axis 1...' z = theano.sparse.sandbox.sp.sp_sum(x, axis=1) assert z.type.broadcastable == (False,) f = theano.function([x], z) x_val = cast(x_data) out = f(x_val) expected = numpy.asarray(x_val.sum(axis=1)).reshape(x_val.shape[0]) assert (out == expected).all() # Sparse gradient on Sum on all axis # unfinished, and suspended until verify_grad get fixed if False: # print 'grad on sum on all axis...' def fun(x): ## verify_grad does not handle sparse data, so here's some casting as a workaround. # x is a dense matrix: make it sparse sparse_var = SparseFromDense(format)(x) # apply op dense_sum = theano.sparse.sandbox.sp.SpSum(axis=None, sparse_grad=False)(sparse_var) return dense_sum # cast back to dense so that verify_grad can work dense_sum = theano.sparse.DenseFromSparse()(sparse_sum) return dense_sum x_val = x_data.copy() # print type(x_val) import pdb pdb.set_trace() tensor.verify_grad(fun, [x_val], rng=rng)
def test_alloc_diag_grad(self): rng = np.random.RandomState(utt.fetch_seed()) x = rng.rand(5) tensor.verify_grad(alloc_diag, [x], rng=rng)
def test_inverse_grad(): rng = numpy.random.RandomState(1234) r = rng.randn(4,4) tensor.verify_grad(matrix_inverse, [r], rng=numpy.random)
def test_cudnn_softmax(self): if not cuda.dnn.dnn_available(): raise SkipTest(cuda.dnn.dnn_available.msg) def cmp(n, m, f, f_gpu): data = numpy.arange(n * m, dtype='float32').reshape(n, m) gdata = numpy.asarray(data)[:, :, None, None] out = f(data) gout = numpy.asarray(f_gpu(gdata))[:, :, 0, 0] assert numpy.allclose(out, gout), numpy.absolute(out - gout) x = T.matrix('x', 'float32') x_gpu = T.tensor4('x_gpu', 'float32') f_z = T.nnet.softmax f_gpu = theano.sandbox.cuda.dnn.GpuDnnSoftmax( 'bc01', 'accurate', 'channel' ) # Verify the grad operation dims = (2, 3, 4, 5) gdata = numpy.arange( numpy.product(dims), dtype='float32' ).reshape(dims) T.verify_grad(f_gpu, [gdata], rng=numpy.random) def check_types(graph, graph_gpu): self._check_types( graph, graph_gpu, -1, type(f_z), theano.sandbox.cuda.dnn.GpuDnnSoftmax ) def check_types_opt(graph, graph_gpu): assert isinstance(graph.maker.fgraph.toposort()[-1].op, type(f_z)) assert len([n for n in graph_gpu.maker.fgraph.toposort() if isinstance( n.op, theano.sandbox.cuda.dnn.GpuDnnSoftmax )]) == 1 # Verify that the CPU and GPU implementations return the same results # up to a tolerance. self._test_softmax( x, x_gpu, f_z, f_gpu, cmp, mode_with_gpu, check_types ) mode_w_cudnn = mode_with_gpu.including("cudnn") self._test_softmax( x, x, f_z, f_z, self._cmp, mode_w_cudnn, check_types_opt ) # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization is # applied when cudnn is required y = T.fvector('y') f = theano.function( [y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_with_gpu ) sorted_f = f.maker.fgraph.toposort() assert(len([i for i in sorted_f if isinstance( i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad )]) == 1) assert(len([i for i in sorted_f if isinstance( i.op, theano.tensor.nnet.SoftmaxGrad )]) == 0) # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization is not # applied when cudnn is excluded or not available mode_wo_cudnn = mode_with_gpu.excluding("cudnn") y = T.vector('y') f = theano.function( [y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_wo_cudnn ) sorted_f = f.maker.fgraph.toposort() assert(len([i for i in sorted_f if isinstance( i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad )]) == 0) assert(len([i for i in sorted_f if isinstance( i.op, theano.tensor.nnet.SoftmaxGrad )]) == 1)
def test_extract_diag_grad(): rng = numpy.random.RandomState(utt.fetch_seed()) x = rng.rand(5, 4) tensor.verify_grad(extract_diag, [x], rng=rng)
def test_ungroup_irregular_length_tensors_numeric_gradient(): lengths = np.array([2, 3, 4, 5, 7, 2], dtype=fX) T.verify_grad(lambda x: ungroup_irregular_length_tensors(x, lengths), [np.random.randn(23, 10).astype(fX)], rng=np.random)
def test_verify_grad(): def fun(h_0, V_re, V_im, hidden_bias, theta, reflection, scale, U, out_bias): return costs[0] T.verify_grad(fun, [p.get_value() for p in parameters], rng=rng)
def test_softmax_grad(self): def cmp(n, m, f, f_gpu): data = numpy.arange(n * m, dtype='float32').reshape(n, m) gdata = numpy.asarray(data)[:, :, None, None] out = f(data) gout = numpy.asarray(f_gpu(gdata))[:, :, 0, 0] utt.assert_allclose(out, gout) x = T.matrix('x', 'float32') x_gpu = T.tensor4('x_gpu', 'float32') f_z = T.nnet.softmax_op f_gpu = dnn.GpuDnnSoftmax('accurate', 'channel') # Verify the grad operation dims = (2, 3, 4, 5) gdata = numpy.arange(numpy.product(dims), dtype='float32').reshape(dims) T.verify_grad(f_gpu, [gdata], rng=numpy.random, mode=mode_with_gpu) # Verify that the CPU and GPU implementations return the same results # up to a tolerance. self._test_softmax(x, x_gpu, f_z, f_gpu, cmp) self._test_softmax(x, x, f_z, f_z, self._cmp) # Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad # optimization is applied when cudnn is required y = T.fvector('y') f = theano.function([y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_with_gpu) sorted_f = f.maker.fgraph.toposort() val = numpy.random.rand(5).astype('float32') out_dnn = f(val) assert (len( [i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1) assert (len([ i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad) ]) == 0) # Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad # optimization is not applied when cudnn is excluded or not # available mode_wo_cudnn = mode_with_gpu.excluding("cudnn") y = T.fvector('y') f = theano.function([y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_wo_cudnn) sorted_f = f.maker.fgraph.toposort() out_cpu = f(val) utt.assert_allclose(out_dnn, out_cpu) assert (len( [i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 0) assert (len([ i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad) ]) == 1) # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not # crash with manual graph y = T.fvector('y') o = theano.tensor.nnet.SoftmaxGrad()(y, y * 2) f = theano.function([y], o, mode=mode_with_gpu) sorted_f = f.maker.fgraph.toposort() assert (len( [i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1) assert (len([ i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad) ]) == 0)
def test_sp_sum(self): from theano.sparse.sandbox.sp import SpSum # TODO: test both grad. rng = numpy.random.RandomState(42) from theano.sparse.basic import SparseFromDense, DenseFromSparse cases = [("csc", scipy.sparse.csc_matrix), ("csr", scipy.sparse.csr_matrix)] for format, cast in cases: #print 'format: %(format)s' % locals() x = theano.sparse.SparseType(format=format, dtype=theano.config.floatX)() x_data = numpy.arange(20).reshape(5, 4).astype(theano.config.floatX) # Sum on all axis #print 'sum on all axis...' z = theano.sparse.sandbox.sp.sp_sum(x) assert z.type.broadcastable == () f = theano.function([x], z) x_val = cast(x_data) out = f(x_val) expected = x_val.sum() assert out == expected # Sum on axis 0 #print 'sum on axis 0...' z = theano.sparse.sandbox.sp.sp_sum(x, axis=0) assert z.type.broadcastable == (False, ) f = theano.function([x], z) x_val = cast(x_data) out = f(x_val) expected = x_val.sum(axis=0) assert (out == expected).all() # Sum on axis 1 #print 'sum on axis 1...' z = theano.sparse.sandbox.sp.sp_sum(x, axis=1) assert z.type.broadcastable == (False, ) f = theano.function([x], z) x_val = cast(x_data) out = f(x_val) expected = numpy.asarray(x_val.sum(axis=1)).reshape(x_val.shape[0]) assert (out == expected).all() # Sparse gradient on Sum on all axis # unfinished, and suspended until verify_grad get fixed if False: print 'grad on sum on all axis...' def fun(x): ## verify_grad does not handle sparse data, so here's some casting as a workaround. # x is a dense matrix: make it sparse sparse_var = SparseFromDense(format)(x) # apply op dense_sum = theano.sparse.sandbox.sp.SpSum( axis=None, sparse_grad=False)(sparse_var) return dense_sum # cast back to dense so that verify_grad can work dense_sum = theano.sparse.DenseFromSparse()(sparse_sum) return dense_sum x_val = x_data.copy() print type(x_val) import pdb pdb.set_trace() tensor.verify_grad(fun, [x_val], rng=rng) #utt.verify_grad(SpSum(axis=None), [x_val]) print 'ok'
value = x + env * np.exp(x) - a * b**2 return value def x_from_ab(a, b, env): value = optimize.newton(func, 4, args=(a, b, env)) return value class Xf(tt.Op): itypes = [tt.dscalar, tt.dscalar] otypes = [tt.dscalar] def perform(self, node, inputs, outputs): a, b = inputs x = x_from_ab(a, b, env) outputs[0][0] = np.array(x) def grad(self, inputs, g): a, b = inputs x = self(a, b) dx = 1 + env * tt.exp(x) da = -b**2 db = -2 * a * b return [-g[0] * da / dx, -g[0] * db / dx] for i in range(len(envod)): env = envod[i] tt.verify_grad(Xf(), [3.0, 4.0], rng=np.random)
def test_grad(self): tt.verify_grad(self.op, [np.array([-1, 0.3, 0.2, 0.5])], rng=np.random)
def test_det_grad(): rng = np.random.RandomState(utt.fetch_seed()) r = rng.randn(5, 5).astype(config.floatX) tensor.verify_grad(det, [r], rng=np.random)
(ca - 4 * tau) * J + J**2)) * dJdI dAdI = ((1 + (Ac * (-1 + 2 * z2) - 1 * Aj) / (-4 * Ac * z2 * Aj + (Ac + Aj)**2)**0.5) * dAjdI) / (2 * z2) dfdI = -((g1 * dAdI) / (tt.exp((px / c)**bs) * (ca - tau))) # dD dgsdD = -((kx * (ps - px)) / (1000 * a * D**2)) dfdD = (1 - (g1 * dAdgs) / (tt.exp((px / c)**bs) * (ca - tau))) * dgsdD # ds dpsds = -beta * pe * s**(-beta - 1) dgsds = (kx * dpsds) / (1000 * a * D) dfds = (1 - (g1 * dAdgs) / (tt.exp((px / c)**bs) * (ca - tau))) * dgsds # Implicit function theorm dpxdbs = -dfdbs / dfdpx dpxdc = -dfdc / dfdpx dpxdg1 = -dfdg1 / dfdpx dpxdkxmax = -dfdkxmax / dfdpx dpxdp50 = -dfdp50 / dfdpx dpxdT = -dfdT / dfdpx dpxdI = -dfdI / dfdpx dpxdD = -dfdD / dfdpx dpxds = -dfds / dfdpx return [ g[0] * dpxdbs, g[0] * dpxdc, g[0] * dpxdg1, g[0] * dpxdkxmax, g[0] * dpxdp50, g[0] * dpxdT, g[0] * dpxdI, g[0] * dpxdD, g[0] * dpxds ] tt.verify_grad(Pxf(), [0.6, -1., 30., 4.5, -3., 20., 300., 0.01, 1.0], rng=np.random)
def test_extract_diag_grad(self): rng = np.random.RandomState(utt.fetch_seed()) x = rng.rand(5, 4).astype(self.floatX) tensor.verify_grad(extract_diag, [x], rng=rng)
plt.hist(np_dynew[1].ravel()) plt.title("np_dynew[1]") plt.matshow(np_dynew[1]) plt.title("np_dynew[1]") plt.show() import pdb pdb.set_trace() else: print "grads match" if False: print "verifying grad" T.verify_grad(lambda l, s: crop( T.constant(data["x"]), T.constant(data["a"].astype(theano.config.floatX)), T.constant(data["b"].astype(theano.config.floatX)), l, s), [data["l"], data["s"]], rng=np.random) print "grad verified" if False: for image, patch, oldpatch, location, scale in itertools.izip( data["x"], np_ynew, np_yold, data["l"], data["s"]): import matplotlib.pyplot as plt plt.figure() plt.imshow(np.rollaxis(image, 0, image.ndim), interpolation="nearest") plt.figure() plt.imshow(np.rollaxis(patch, 0, patch.ndim), interpolation="nearest") plt.title("l %s s %s" % (location, scale))
thunk.inputs = inputs thunk.outputs = outputs thunk.lazy = False return thunk if __name__ == "__main__": import theano import theano.tensor as T fX = theano.config.floatX inp = T.constant(np.random.randn(10, 10, 10, 10).astype(fX)) foo = DisjointPseudorandomFractionalMaxPooling2DOp(1.414, 0.5)(inp) bar = foo.eval() print np.array(bar) print np.array(bar[0, 0, :2, :2]) print np.array(inp.eval()[0, 0, :4, :4]) g = T.grad(foo.sum(), inp) choo = np.array(g.eval()) # print choo print np.array(bar[0, 0, :3, :3]) print np.array(inp.eval()[0, 0, :4, :4]) print choo[0, 0, :4, :4] def fun(x): return DisjointPseudorandomFractionalMaxPooling2DOp(1.414, 0.5)(x) T.verify_grad(fun, [np.arange(25).reshape(1, 1, 5, 5).astype(fX)], rng=np.random)