def test_local_sigm_times_exp(self): # Test the `local_sigm_times_exp` optimization. # exp(x) * sigm(-x) -> sigm(x) # exp(-x) * sigm(x) -> sigm(-x) def match(func, ops): # print [node.op.scalar_op for node in func.maker.fgraph.toposort()] assert [node.op for node in func.maker.fgraph.toposort()] == ops m = self.get_mode(excluding=["local_elemwise_fusion", "inplace"]) x, y = tt.vectors("x", "y") f = theano.function([x], sigmoid(-x) * tt.exp(x), mode=m) match(f, [sigmoid]) assert check_stack_trace(f, ops_to_check=sigmoid) f = theano.function([x], sigmoid(x) * tt.exp(-x), mode=m) match(f, [tt.neg, sigmoid]) assert check_stack_trace(f, ops_to_check=sigmoid) f = theano.function([x], -(-(-(sigmoid(x)))) * tt.exp(-x), mode=m) match(f, [tt.neg, sigmoid, tt.neg]) # assert check_stack_trace(f, ops_to_check=sigmoid) f = theano.function( [x, y], (sigmoid(x) * sigmoid(-y) * -tt.exp(-x) * tt.exp(x * y) * tt.exp(y)), mode=m, ) topo = f.maker.fgraph.toposort() for op, nb in [(sigmoid, 2), (tt.mul, 2), (tt.neg, 1), (tt.exp, 1)]: assert sum([n.op == op for n in topo]) == nb
def test_merge_only(self): # Test `is_same_graph` when `equal_computations` cannot be used. x, y, z = tensor.vectors('x', 'y', 'z') t = x * y self.check([ (x, t, (({}, False), ({ t: x }, True))), (t * 2, x * 2, ( ({}, False), ({ t: x }, True), )), (x * x, x * y, ( ({}, False), ({ y: x }, True), )), (x * x, x * y, ( ({}, False), ({ y: x }, True), )), (x * x + z, x * y + t, (({}, False), ({ y: x }, False), ({ y: x, t: z }, True))), ], debug=False)
def test_connection_pattern_override(self, cls_ofg): x, y = T.vectors('xy') def f1(x, y): del x # but we know how to backpropagate for x for some reasons # and we don't care about the gradient wrt y. return y + T.round(y) def f1_back(inputs, output_gradients): return [ output_gradients[0], theano.gradient.disconnected_type()] op = cls_ofg( inputs=[x, y], outputs=[f1(x, y)], grad_overrides=f1_back, connection_pattern=[[True], [False]], # This is new on_unused_input='ignore') # This is new c = op(x, y) g1 = theano.grad(c.sum(), x) out = g1.eval({ x: np.ones((5,), dtype=np.float32), y: np.ones((5,), dtype=np.float32)}) assert np.allclose(out, [1.] * 5)
def test_full_graph(self): # Test `is_same_graph` with more complex graphs. x, y, z = tensor.vectors("x", "y", "z") t = x * y self.check([ (x * 2, x * 2, (({}, True), )), ( x * 2, y * 2, ( ({}, False), ({ y: x }, True), ), ), ( x * 2, y * 2, ( ({}, False), ({ x: y }, True), ), ), ( x * 2, y * 3, ( ({}, False), ({ y: x }, False), ), ), ( t * 2, z * 2, ( ({}, False), ({ t: z }, True), ), ), ( t * 2, z * 2, ( ({}, False), ({ z: t }, True), ), ), (x * (y * z), (x * y) * z, (({}, False), )), ])
def test_input_dimensions_overflow(self): # Elemwise.perform used to compute the product # of input shapes to check if there was a zero in them, # it overflowed in this case. a, b, c, d, e, f = tensor.vectors("abcdef") s = a + b + c + d + e + f g = theano.function([a, b, c, d, e, f], s, mode=theano.compile.Mode(linker="py")) g(*[numpy.zeros(2 ** 11, config.floatX) for i in xrange(6)])
def test_input_dimensions_overflow(self): # Elemwise.perform used to compute the product # of input shapes to check if there was a zero in them, # it overflowed in this case. a, b, c, d, e, f = tensor.vectors('abcdef') s = a + b + c + d + e + f g = theano.function([a, b, c, d, e, f], s, mode=theano.compile.Mode(linker='py')) g(*[numpy.zeros(2 ** 11, config.floatX) for i in range(6)])
def test_single_var(self): # Test `is_same_graph` with some trivial graphs (one Variable). x, y, z = tensor.vectors('x', 'y', 'z') self.check([ (x, x, (({}, True), )), (x, y, (({}, False), ({y: x}, True), )), (x, tensor.neg(x), (({}, False), )), (x, tensor.neg(y), (({}, False), )), ])
def test_parse_mul_tree(self): x, y, z = tt.vectors("x", "y", "z") assert parse_mul_tree(x * y) == [False, [[False, x], [False, y]]] assert parse_mul_tree(-(x * y)) == [True, [[False, x], [False, y]]] assert parse_mul_tree(-x * y) == [False, [[True, x], [False, y]]] assert parse_mul_tree(-x) == [True, x] assert parse_mul_tree((x * y) * -z) == [ False, [[False, [[False, x], [False, y]]], [True, z]], ]
def test_single_var(self): """ Test `is_same_graph` with some trivial graphs (one Variable). """ x, y, z = tensor.vectors('x', 'y', 'z') self.check([ (x, x, (({}, True), )), (x, y, (({}, False), ({y: x}, True), )), (x, tensor.neg(x), (({}, False), )), (x, tensor.neg(y), (({}, False), )), ])
def test_single_var(self): """ Test `is_same_graph` with some trivial graphs (one Variable). """ x, y, z = tensor.vectors("x", "y", "z") self.check( [ (x, x, (({}, True),)), (x, y, (({}, False), ({y: x}, True))), (x, tensor.neg(x), (({}, False),)), (x, tensor.neg(y), (({}, False),)), ] )
def test_full_graph(self): # Test `is_same_graph` with more complex graphs. x, y, z = tensor.vectors('x', 'y', 'z') t = x * y self.check([ (x * 2, x * 2, (({}, True), )), (x * 2, y * 2, (({}, False), ({y: x}, True), )), (x * 2, y * 2, (({}, False), ({x: y}, True), )), (x * 2, y * 3, (({}, False), ({y: x}, False), )), (t * 2, z * 2, (({}, False), ({t: z}, True), )), (t * 2, z * 2, (({}, False), ({z: t}, True), )), (x * (y * z), (x * y) * z, (({}, False), )), ])
def test_rop_override(self, cls_ofg): x, y = tt.vectors("xy") def ro(inps, epts): x, y = inps u, v = epts return [u * y * 2.0 + x * v * 1.5] u, v = tt.vectors("uv") op_mul_rop = cls_ofg([x, y, u, v], ro([x, y], [u, v])) op_mul = cls_ofg([x, y], [x * y], rop_overrides=ro) op_mul2 = cls_ofg([x, y], [x * y], rop_overrides=op_mul_rop) # single override case xx, yy = tt.vector("xx"), tt.vector("yy") du, dv = tt.vector("du"), tt.vector("dv") for op in [op_mul, op_mul2]: zz = op_mul(xx, yy) dw = tt.Rop(zz, [xx, yy], [du, dv]) fn = function([xx, yy, du, dv], dw) vals = np.random.rand(4, 32).astype(config.floatX) dwval = fn(*vals) assert np.allclose(dwval, vals[0] * vals[3] * 1.5 + vals[1] * vals[2] * 2.0)
def test_merge_only(self): # Test `is_same_graph` when `equal_computations` cannot be used. x, y, z = tensor.vectors('x', 'y', 'z') t = x * y self.check([ (x, t, (({}, False), ({t: x}, True))), (t * 2, x * 2, (({}, False), ({t: x}, True), )), (x * x, x * y, (({}, False), ({y: x}, True), )), (x * x, x * y, (({}, False), ({y: x}, True), )), (x * x + z, x * y + t, (({}, False), ({y: x}, False), ({y: x, t: z}, True))), ], debug=False)
def test_single_var(self): # Test `is_same_graph` with some trivial graphs (one Variable). x, y, z = tensor.vectors("x", "y", "z") self.check([ (x, x, (({}, True), )), (x, y, ( ({}, False), ({ y: x }, True), )), (x, tensor.neg(x), (({}, False), )), (x, tensor.neg(y), (({}, False), )), ])
def test_nested(self, cls_ofg): x, y = T.vectors('xy') u, v = x + y, x - y op_ft = cls_ofg([x, y], [u, v]) op_ift = cls_ofg([x, y], [u / 2, v / 2]) xx, yy = T.vector('xx'), T.vector('yy') xx2, yy2 = op_ift(*op_ft(xx, yy)) fn = function([xx, yy], [xx2, yy2]) xv = np.random.rand(16).astype(config.floatX) yv = np.random.rand(16).astype(config.floatX) xv2, yv2 = fn(xv, yv) assert np.allclose(xv, xv2) assert np.allclose(yv, yv2)
def test_rop_override(self, cls_ofg): x, y = T.vectors('xy') def ro(inps, epts): x, y = inps u, v = epts return [u * y * 2. + x * v * 1.5] u, v = T.vectors('uv') op_mul_rop = cls_ofg([x, y, u, v], ro([x, y], [u, v])) op_mul = cls_ofg([x, y], [x * y], rop_overrides=ro) op_mul2 = cls_ofg([x, y], [x * y], rop_overrides=op_mul_rop) # single override case xx, yy = T.vector('xx'), T.vector('yy') du, dv = T.vector('du'), T.vector('dv') for op in [op_mul, op_mul2]: zz = op_mul(xx, yy) dw = T.Rop(zz, [xx, yy], [du, dv]) fn = function([xx, yy, du, dv], dw) vals = np.random.rand(4, 32).astype(config.floatX) dwval = fn(*vals) assert np.allclose( dwval, vals[0] * vals[3] * 1.5 + vals[1] * vals[2] * 2.)
def test_c_thunks(): a = tensor.scalars('a') b, c = tensor.vectors('bc') cases = [False] if theano.config.cxx: cases.append(True) for c_thunks in cases: f = function([a, b, c], ifelse(a, a * b, b * c), mode=Mode(optimizer=None, linker=vm.VM_Linker(c_thunks=c_thunks, use_cloop=False))) f(1, [2], [3, 2]) from nose.tools import assert_raises assert_raises(ValueError, f, 0, [2], [3, 4]) assert any([hasattr(t, 'cthunk') for t in f.fn.thunks]) == c_thunks
def test_c_thunks(): a = tensor.scalars('a') b, c = tensor.vectors('bc') cases = [False] if theano.config.cxx: cases.append(True) for c_thunks in cases: f = function([a, b, c], ifelse(a, a * b, b * c), mode=Mode( optimizer=None, linker=vm.VM_Linker(c_thunks=c_thunks, use_cloop=False))) f(1, [2], [3, 2]) from nose.tools import assert_raises assert_raises(ValueError, f, 0, [2], [3, 4]) assert any([hasattr(t, 'cthunk') for t in f.fn.thunks]) == c_thunks
def test_c_thunks(): a = tensor.scalars("a") b, c = tensor.vectors("bc") cases = [False] if theano.config.cxx: cases.append(True) for c_thunks in cases: f = function( [a, b, c], ifelse(a, a * b, b * c), mode=Mode(optimizer=None, linker=vm.VM_Linker(c_thunks=c_thunks, use_cloop=False)), ) f(1, [2], [3, 2]) with pytest.raises(ValueError): f(0, [2], [3, 4]) assert any([hasattr(t, "cthunk") for t in f.fn.thunks]) == c_thunks
def ucs_to_srgb_helper(X, Jab, Y_w, L_A, Y_b, F, c, N_c): """Loss and gradient at point X (sRGB space) of the distance between the corresponding Jab color and a target Jab color. Descending this gradient will approximately invert srgb_to_ucs().""" global _ucs_to_srgb_helper if _ucs_to_srgb_helper is None: print('Building ucs_to_srgb_helper()...', file=sys.stderr) conditions = T.scalars('Y_w', 'L_A', 'Y_b', 'F', 'c', 'N_c') x, jab = T.vectors('x', 'jab') jab_x = symbolic.srgb_to_ucs(x, *conditions) loss = symbolic.delta_e(jab_x, jab)**2 grad = T.grad(loss, x) _ucs_to_srgb_helper = theano.function([x, jab] + conditions, [loss, grad], allow_input_downcast=True, on_unused_input='ignore') return _ucs_to_srgb_helper(np.squeeze(X), np.squeeze(Jab), Y_w, L_A, Y_b, F, c, N_c)
def test_perform_sigm_times_exp(self): # Test the core function doing the `sigm_times_exp` optimization. # # It is easier to test different graph scenarios this way than by # compiling a theano function. x, y, z, t = tt.vectors("x", "y", "z", "t") exp = tt.exp def ok(expr1, expr2): trees = [parse_mul_tree(e) for e in (expr1, expr2)] perform_sigm_times_exp(trees[0]) trees[0] = simplify_mul(trees[0]) good = is_same_graph(compute_mul(trees[0]), compute_mul(trees[1])) if not good: print(trees[0]) print(trees[1]) print("***") theano.printing.debugprint(compute_mul(trees[0])) print("***") theano.printing.debugprint(compute_mul(trees[1])) assert good ok(sigmoid(x) * exp(-x), sigmoid(-x)) ok( -x * sigmoid(x) * (y * (-1 * z) * exp(-x)), -x * sigmoid(-x) * (y * (-1 * z)), ) ok( -sigmoid(-x) * ( exp(y) * (-exp(-z) * 3 * -exp(x)) * (y * 2 * (-sigmoid(-y) * (z + t) * exp(z)) * sigmoid(z)) ) * -sigmoid(x), sigmoid(x) * (-sigmoid(y) * (-sigmoid(-z) * 3) * (y * 2 * ((z + t) * exp(z)))) * (-sigmoid(x)), ) ok(exp(-x) * -exp(-x) * (-sigmoid(x) * -sigmoid(x)), -sigmoid(-x) * sigmoid(-x)) ok(-exp(x) * -sigmoid(-x) * -exp(-x), -sigmoid(-x))
def test_ops3(self): # __div__, __rdiv__ l1, l2, u1, u2 = T.vectors('l1', 'l2', 'u1', 'u2') i1 = TheanoInterval(l1, u1) i2 = TheanoInterval(l2, u2) r1 = i1 / i2 v1l = A([3.0, -4.0, 3.0, -4.0, -4.0, -4.0]) v1u = A([4.0, -3.0, 4.0, -3.0, 3.0, 3.0]) v2l = A([5.0, 5.0, -6.0, -6.0, 5.0, -6.0]) v2u = A([6.0, 6.0, -5.0, -5.0, 6.0, -5.0]) d12 = {l1: v1l, l2: v2l, u1: v1u, u2: v2u} res1 = r1.eval(d12) ll = 3.0 / 5.0 lu = 3.0 / 6.0 ul = 4.0 / 5.0 ans1l = A([lu, -ul, -ul, lu, -ul, -ll]) ans1u = A([ul, -lu, -lu, ul, ll, ul]) vl = A([-4.0, -4.0, 3.0]) vu = A([-3.0, 3.0, 4.0]) v = 7.0 d1 = {l1: vl, u1: vu} r2 = i1 / v res2 = r2.eval(d1) l = 3.0 / 7.0 u = 4.0 / 7.0 ans2l = A([-u, -u, l]) ans2u = A([-l, l, u]) vl = A([-4.0, 3.0]) vu = A([-3.0, 4.0]) d1 = {l1: vl, u1: vu} r3 = v / i1 res3 = r3.eval(d1) l = 7.0 / 3.0 u = 7.0 / 4.0 ans3l = A([-l, u]) ans3u = A([-u, l]) array_almost_equal(res1[0], ans1l) array_almost_equal(res1[1], ans1u) array_almost_equal(res2[0], ans2l) array_almost_equal(res2[1], ans2u) array_almost_equal(res3[0], ans3l) array_almost_equal(res3[1], ans3u)
def compile(self,X,n_negative_samples=None): if n_negative_samples is None: n_negative_samples = 1000 pos_samples = X.loc[:, self.column_ranges.keys()].values.astype(floatX) pos_data, neg_data = T.matrices('SigData', 'BckData') pos_w, neg_w, parameters = T.vectors('SigW', 'BckW', 'parameters') neg_samples, neg_weight = self.generate_negative_samples(n_negative_samples=n_negative_samples, strategy=self.sampling_strategy) givens = {pos_data: pos_samples, neg_data: neg_samples, neg_w: neg_weight} pdf = self.prepare_pdf() pdfs, summands = pdf(pos_data, neg_data, neg_weights=neg_w, weights=parameters) result = - T.mean(pos_w * T.log(pdfs)) self.Tfunction = theano.function([parameters,pos_w], result, givens=givens) self.Tderivative = theano.function([parameters,pos_w], T.grad(result, parameters), givens=givens) self.X=X
# # if any([x.op.__class__.__name__ in ['Gemv', 'CGmv', 'Gemm', 'CGemm'] for x in # train.maker.fgraph.toposort()]): # print 'Used the cpu' # elif any([x.op.__class__.__name__ in ['GpuGemm', 'GpuGemv'] for x in # train.maker.fgraph.toposort()]): # print 'Used the gpu' # else: # print('ERROR, not able to tell if theano used the cpu or the gpu') # print(train.maker.fgraph.toposort()) # # for i in range(training_step): # pred, err = train(D[0], D[1]) # # print("target values for D") # print(D[1]) # # print("prediction on D") # print(predict(D[0])) # x = T.dvector('x') # f = theano.function(inputs=[x], outputs=10*x, mode='DebugMode') # f([5]) # f([0]) # f([7]) from theano import ProfileMode profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker()) v1, v2 = T.vectors(2) o = v1 + v2 f = theano.function([v1,v2],[o], mode=profmode)
def test_compute_mul(self): x, y, z = tt.vectors("x", "y", "z") tree = (x * y) * -z mul_tree = parse_mul_tree(tree) assert parse_mul_tree(compute_mul(mul_tree)) == mul_tree assert is_same_graph(compute_mul(parse_mul_tree(tree)), tree)
def test_functions(self): Case = namedtuple("Case", "func input_data answer") testcases = [ Case( func=cg.fletcher_reeves, input_data=( np.array([1.35, 0.3]), np.array([0.11, -0.5]), np.array([0, 0]), ), answer=0.137 ), Case( func=cg.polak_ribiere, input_data=( np.array([1., -0.5]), np.array([1.2, -0.45]), np.array([0, 0]), ), answer=0.174 ), Case( func=cg.hentenes_stiefel, input_data=( np.array([1., -0.5]), np.array([1.2, -0.45]), np.array([0.2, 0.05]), ), answer=5.118 ), Case( func=cg.conjugate_descent, input_data=( np.array([1., -0.5]), np.array([1.2, -0.45]), np.array([0.2, 0.05]), ), answer=-7.323 ), Case( func=cg.liu_storey, input_data=( np.array([1., -0.5]), np.array([1.2, -0.45]), np.array([0.2, 0.05]), ), answer=1.243 ), Case( func=cg.dai_yuan, input_data=( np.array([1., -0.5]), np.array([1.2, -0.45]), np.array([0.2, 0.05]), ), answer=38.647 ), ] for testcase in testcases: input_data = asfloat(np.array(testcase.input_data)) variables = T.vectors(3) # For functions some input variables can be optional and we # ignore them during the computation. This solution cause errors # related to the Theano computational graph, because we # do not use all defined variables. That's why we need # simple hack that fix this issue and do not add changes to # the output result. hack = asfloat(0) * variables[-1][0] output_func = theano.function( variables, testcase.func(*variables) + hack ) result = output_func(*input_data) self.assertAlmostEqual(result, testcase.answer, places=3)
from theano import tensor as T from theano.ifelse import ifelse import theano, time import numpy as np if __name__ == "__main__": a,b = T.scalars('a', 'b') x,y = T.vectors('x', 'y') z_lazy = ifelse(T.eq(a, b), # condition T.mean(x), # then branch T.mean(y)) # else branch var_1 = np.array([1, 2]) var_2 = np.array([3, 4]) condition_1 = 1 condition_2 = 1 iffunction = theano.function([a,b,x,y],[z_lazy]) result = iffunction(condition_1, condition_2, var_1, var_2) print result
def test_grad_override(self, cls_ofg): x, y = T.vectors('xy') def go(inps, gs): x, y = inps g, = gs return [g * y * 2, g * x * 1.5] dedz = T.vector('dedz') op_mul_grad = cls_ofg([x, y, dedz], go([x, y], [dedz])) op_mul = cls_ofg([x, y], [x * y], grad_overrides=go) op_mul2 = cls_ofg([x, y], [x * y], grad_overrides=op_mul_grad) # single override case (function or OfG instance) xx, yy = T.vector('xx'), T.vector('yy') for op in [op_mul, op_mul2]: zz = T.sum(op(xx, yy)) dx, dy = T.grad(zz, [xx, yy]) fn = function([xx, yy], [dx, dy]) xv = np.random.rand(16).astype(config.floatX) yv = np.random.rand(16).astype(config.floatX) dxv, dyv = fn(xv, yv) assert np.allclose(yv * 2, dxv) assert np.allclose(xv * 1.5, dyv) # list override case def go1(inps, gs): x, w, b = inps g = gs[0] return g * w * 2 def go2(inps, gs): x, w, b = inps g = gs[0] return g * x * 1.5 w, b = T.vectors('wb') # we make the 3rd gradient default (no override) op_linear = cls_ofg([x, w, b], [x * w + b], grad_overrides=[go1, go2, 'default']) xx, ww, bb = T.vector('xx'), T.vector('yy'), T.vector('bb') zz = T.sum(op_linear(xx, ww, bb)) dx, dw, db = T.grad(zz, [xx, ww, bb]) fn = function([xx, ww, bb], [dx, dw, db]) xv = np.random.rand(16).astype(config.floatX) wv = np.random.rand(16).astype(config.floatX) bv = np.random.rand(16).astype(config.floatX) dxv, dwv, dbv = fn(xv, wv, bv) assert np.allclose(wv * 2, dxv) assert np.allclose(xv * 1.5, dwv) assert np.allclose(np.ones(16, dtype=config.floatX), dbv) # NullType and DisconnectedType op_linear2 = cls_ofg( [x, w, b], [x * w + b], grad_overrides=[go1, NullType()(), DisconnectedType()()]) zz2 = T.sum(op_linear2(xx, ww, bb)) dx2, dw2, db2 = T.grad(zz2, [xx, ww, bb], return_disconnected='Disconnected', disconnected_inputs='ignore', null_gradients='return') assert isinstance(dx2.type, T.TensorType) assert dx2.ndim == 1 assert isinstance(dw2.type, NullType) assert isinstance(db2.type, DisconnectedType)
def test_grad_override(self, cls_ofg): x, y = T.vectors('xy') def go(inps, gs): x, y = inps g, = gs return [g * y * 2, g * x * 1.5] dedz = T.vector('dedz') op_mul_grad = cls_ofg([x, y, dedz], go([x, y], [dedz])) op_mul = cls_ofg([x, y], [x * y], grad_overrides=go) op_mul2 = cls_ofg([x, y], [x * y], grad_overrides=op_mul_grad) # single override case (function or OfG instance) xx, yy = T.vector('xx'), T.vector('yy') for op in [op_mul, op_mul2]: zz = T.sum(op(xx, yy)) dx, dy = T.grad(zz, [xx, yy]) fn = function([xx, yy], [dx, dy]) xv = np.random.rand(16).astype(config.floatX) yv = np.random.rand(16).astype(config.floatX) dxv, dyv = fn(xv, yv) assert np.allclose(yv * 2, dxv) assert np.allclose(xv * 1.5, dyv) # list override case def go1(inps, gs): x, w, b = inps g = gs[0] return g * w * 2 def go2(inps, gs): x, w, b = inps g = gs[0] return g * x * 1.5 w, b = T.vectors('wb') # we make the 3rd gradient default (no override) op_linear = cls_ofg([x, w, b], [x * w + b], grad_overrides=[go1, go2, 'default']) xx, ww, bb = T.vector('xx'), T.vector('yy'), T.vector('bb') zz = T.sum(op_linear(xx, ww, bb)) dx, dw, db = T.grad(zz, [xx, ww, bb]) fn = function([xx, ww, bb], [dx, dw, db]) xv = np.random.rand(16).astype(config.floatX) wv = np.random.rand(16).astype(config.floatX) bv = np.random.rand(16).astype(config.floatX) dxv, dwv, dbv = fn(xv, wv, bv) assert np.allclose(wv * 2, dxv) assert np.allclose(xv * 1.5, dwv) assert np.allclose(np.ones(16, dtype=config.floatX), dbv) # NullType and DisconnectedType op_linear2 = cls_ofg( [x, w, b], [x * w + b], grad_overrides=[go1, NullType()(), DisconnectedType()()]) zz2 = T.sum(op_linear2(xx, ww, bb)) dx2, dw2, db2 = T.grad( zz2, [xx, ww, bb], return_disconnected='Disconnected', disconnected_inputs='ignore', null_gradients='return') assert isinstance(dx2.type, T.TensorType) assert dx2.ndim == 1 assert isinstance(dw2.type, NullType) assert isinstance(db2.type, DisconnectedType)
from theano import tensor as T from theano.ifelse import ifelse import theano, time import numpy as np if __name__ == "__main__": a, b = T.scalars('a', 'b') x, y = T.vectors('x', 'y') z_lazy = ifelse( T.eq(a, b), # condition T.mean(x), # then branch T.mean(y)) # else branch var_1 = np.array([1, 2]) var_2 = np.array([3, 4]) condition_1 = 1 condition_2 = 1 iffunction = theano.function([a, b, x, y], [z_lazy]) result = iffunction(condition_1, condition_2, var_1, var_2) print result
def test_grad_override(self, cls_ofg): x, y = tt.vectors("xy") def go(inps, gs): x, y = inps (g, ) = gs return [g * y * 2, g * x * 1.5] dedz = tt.vector("dedz") op_mul_grad = cls_ofg([x, y, dedz], go([x, y], [dedz])) op_mul = cls_ofg([x, y], [x * y], grad_overrides=go) op_mul2 = cls_ofg([x, y], [x * y], grad_overrides=op_mul_grad) # single override case (function or OfG instance) xx, yy = tt.vector("xx"), tt.vector("yy") for op in [op_mul, op_mul2]: zz = tt.sum(op(xx, yy)) dx, dy = tt.grad(zz, [xx, yy]) fn = function([xx, yy], [dx, dy]) xv = np.random.rand(16).astype(config.floatX) yv = np.random.rand(16).astype(config.floatX) dxv, dyv = fn(xv, yv) assert np.allclose(yv * 2, dxv) assert np.allclose(xv * 1.5, dyv) # list override case def go1(inps, gs): x, w, b = inps g = gs[0] return g * w * 2 def go2(inps, gs): x, w, b = inps g = gs[0] return g * x * 1.5 w, b = tt.vectors("wb") # we make the 3rd gradient default (no override) op_linear = cls_ofg([x, w, b], [x * w + b], grad_overrides=[go1, go2, "default"]) xx, ww, bb = tt.vector("xx"), tt.vector("yy"), tt.vector("bb") zz = tt.sum(op_linear(xx, ww, bb)) dx, dw, db = tt.grad(zz, [xx, ww, bb]) fn = function([xx, ww, bb], [dx, dw, db]) xv = np.random.rand(16).astype(config.floatX) wv = np.random.rand(16).astype(config.floatX) bv = np.random.rand(16).astype(config.floatX) dxv, dwv, dbv = fn(xv, wv, bv) assert np.allclose(wv * 2, dxv) assert np.allclose(xv * 1.5, dwv) assert np.allclose(np.ones(16, dtype=config.floatX), dbv) # NullType and DisconnectedType op_linear2 = cls_ofg( [x, w, b], [x * w + b], grad_overrides=[go1, NullType()(), DisconnectedType()()], ) zz2 = tt.sum(op_linear2(xx, ww, bb)) dx2, dw2, db2 = tt.grad( zz2, [xx, ww, bb], return_disconnected="Disconnected", disconnected_inputs="ignore", null_gradients="return", ) assert isinstance(dx2.type, tt.TensorType) assert dx2.ndim == 1 assert isinstance(dw2.type, NullType) assert isinstance(db2.type, DisconnectedType)
def test_functions(self): Case = namedtuple("Case", "func input_data answer") testcases = [ Case(func=cg.fletcher_reeves, input_data=( np.array([1.35, 0.3]), np.array([0.11, -0.5]), np.array([0, 0]), ), answer=0.137), Case(func=cg.polak_ribiere, input_data=( np.array([1., -0.5]), np.array([1.2, -0.45]), np.array([0, 0]), ), answer=0.174), Case(func=cg.hentenes_stiefel, input_data=( np.array([1., -0.5]), np.array([1.2, -0.45]), np.array([0.2, 0.05]), ), answer=5.118), Case(func=cg.conjugate_descent, input_data=( np.array([1., -0.5]), np.array([1.2, -0.45]), np.array([0.2, 0.05]), ), answer=-7.323), Case(func=cg.liu_storey, input_data=( np.array([1., -0.5]), np.array([1.2, -0.45]), np.array([0.2, 0.05]), ), answer=1.243), Case(func=cg.dai_yuan, input_data=( np.array([1., -0.5]), np.array([1.2, -0.45]), np.array([0.2, 0.05]), ), answer=38.647), ] for testcase in testcases: input_data = asfloat(np.array(testcase.input_data)) variables = T.vectors(3) # For functions some input variables can be optional and we # ignore them during the computation. This solution cause errors # related to the Theano computational graph, because we # do not use all defined variables. That's why we need # simple hack that fix this issue and do not add changes to # the output result. hack = asfloat(0) * variables[-1][0] output_func = theano.function(variables, testcase.func(*variables) + hack) result = output_func(*input_data) self.assertAlmostEqual(result, testcase.answer, places=1)
def test_ops4(self): # power x, y = T.vectors('x', 'y') itv = TheanoInterval(x, y) v1l = A([-3, -2, -1, -2, 0.5, 0.5, 1, 2]) v1u = A([-2, -1, -0.5, -0.5, 2, 1, 2, 3]) v2l = A([1, 2]) v2u = A([3, 4]) v3l = A([-2., -2., -2., -1., -1., -1., -0.5, -0.5, -0.5]) v3u = A([0.5, 1., 2., 0.5, 1., 2., 0.5, 1., 2.]) v1 = (v1l, v1u) v2 = (v2l, v2u) v3 = (v3l, v3u) exponents1 = [-3, -2, 2, 3] exponents2 = [-2.5, -2., 2., 2.5] exponents3 = [2, 3] def make_power(exp): return itv.power(exp) powers1 = map(make_power, exponents1) powers2 = map(make_power, exponents2) powers3 = map(make_power, exponents3) def make_lu(power): return power.lower, power.upper lus1 = map(make_lu, powers1) lus2 = map(make_lu, powers2) lus3 = map(make_lu, powers3) def make_function((l, u)): return function([x, y], [l, u]) functions1 = map(make_function, lus1) functions2 = map(make_function, lus2) functions3 = map(make_function, lus3) def make_res1(f): return f(*v1) def make_res2(f): return f(*v2) def make_res3(f): return f(*v3) res1 = map(make_res1, functions1) res2 = map(make_res2, functions2) res3 = map(make_res3, functions3) ans1l = [A([4., 1., 0.25, 0.25, 0.25, 0.25, 1., 4.]), A([-27., -8., -1., -8., 0.125, 0.125, 1., 8.])] ans1u = [A([9., 4., 1., 4., 4., 1., 4., 9.]), A([-8., -1., -0.125, -0.125, 8., 1., 8., 27.])] ans1l = [np.reciprocal(ans1u[1]), np.reciprocal(ans1u[0])] + ans1l ans1u = [np.reciprocal(ans1l[3]), np.reciprocal(ans1l[2])] + ans1u ans2l = [A([1., 4.]), A([1., 2. ** 2.5])] ans2u = [A([9., 16.]), A([3. ** 2.5, 4. ** 2.5])] ans2l = [np.reciprocal(ans2u[1]), np.reciprocal(ans2u[0])] + ans2l ans2u = [np.reciprocal(ans2l[3]), np.reciprocal(ans2l[2])] + ans2u ans3l = [A([0.] * 9), A([-8., -8., -8., -1., -1., -1., -0.125, -0.125, -0.125])] ans3u = [A([4., 4., 4., 1., 1., 4., 0.25, 1., 4.]), A([0.125, 1., 8., 0.125, 1., 8., 0.125, 1., 8.])] for i in range(4): array_almost_equal(res1[i][0], ans1l[i]) array_almost_equal(res1[i][1], ans1u[i]) array_almost_equal(res2[i][0], ans2l[i]) array_almost_equal(res2[i][1], ans2u[i]) for i in range(2): array_almost_equal(res3[i][0], ans3l[i]) array_almost_equal(res3[i][1], ans3u[i])
def _compile_model(self): """ theano implementation of 3C model """ ### GC90 atmospheric model implementation theta_sun, beta, alpha, am, rh, pressure = T.scalars( 'theta_sun', 'beta', 'alpha', 'am', 'rh', 'pressure') wl = T.vector('wl') wl_a = 550 theta_sun_ = theta_sun * np.pi / 180. z3 = -0.1417 * alpha + 0.82 z2 = ifelse(T.gt(alpha, 1.2), 0.65, z3) z1 = ifelse(T.lt(alpha, 0), 0.82, z2) theta_sun_mean = z1 B3 = T.log(1 - theta_sun_mean) B2 = B3 * (0.0783 + B3 * (-0.3824 - 0.5874 * B3)) B1 = B3 * (1.459 + B3 * (0.1595 + 0.4129 * B3)) Fa = 1 - 0.5 * T.exp((B1 + B2 * T.cos(theta_sun_)) * T.cos(theta_sun_)) omega_a = (-0.0032 * am + 0.972) * T.exp(3.06 * 1e-4 * rh) tau_a = beta * (wl / wl_a)**(-alpha) # fixed a bug in M, thanks Jaime! [brackets added] M = 1 / (T.cos(theta_sun_) + 0.50572 * (90 + 6.07995 - theta_sun)**(-1.6364)) M_ = M * pressure / 1013.25 Tr = T.exp(-M_ / (115.6406 * (wl / 1000)**4 - 1.335 * (wl / 1000)**2)) Tas = T.exp(-omega_a * tau_a * M) Edd = Tr * Tas Edsr = 0.5 * (1 - Tr**0.95) Edsa = Tr**1.5 * (1 - Tas) * Fa Ed = Edd + Edsr + Edsa Edd_Ed = Edd / Ed Edsr_Ed = Edsr / Ed Edsa_Ed = Edsa / Ed Eds_Ed = Edsr_Ed + Edsa_Ed ### Albert and Mobley bio-optical model implementation a_w, daw_dT, astar_ph, astar_y, Ls_Ed = T.vectors( 'a_w', 'daw_dT', 'astar_ph', 'astar_y', 'Ls_Ed') C_chl, C_sm, C_mie, n_mie, C_y, S_y, T_w, theta_view, n_w, rho_s, rho_dd, rho_ds, delta = T.scalars( 'C_chl', 'C_sm', 'C_mie', 'n_mie', 'C_y', 'S_y', 'T_w', 'theta_view', 'n_w', 'rho_s', 'rho_dd', 'rho_ds', 'delta') # calc_a_ph a_ph = C_chl * astar_ph # calc_a_y wl_ref_y = 440 a_y = ifelse(T.eq(S_y, -1), C_y * astar_y, C_y * T.exp(-S_y * (wl - wl_ref_y))) # calc_a T_w_ref = 20. a_w_corr = a_w + (T_w - T_w_ref) * daw_dT a = a_w_corr + a_ph + a_y # calc_bb_sm bbstar_sm = 0.0086 bbstar_mie = 0.0042 wl_ref_mie = 500 bb_sm = C_sm * bbstar_sm + C_mie * bbstar_mie * (wl / wl_ref_mie)**n_mie # calc_bb b1 = ifelse(T.eq(n_w, 1.34), 0.00144, 0.00111) wl_ref_water = 500 S_water = -4.32 bb_water = b1 * (wl / wl_ref_water)**S_water bb = bb_water + bb_sm # calc omega_b omega_b = bb / (bb + a) # calc sun and viewing zenith angles under water theta_sun_ = theta_sun * np.pi / 180. theta_sun_ss = T.arcsin(T.sin(theta_sun_) / n_w) theta_view_ = theta_view * np.pi / 180. theta_view_ss = T.arcsin(T.sin(theta_view_) / n_w) p_f = [0.1034, 1, 3.3586, -6.5358, 4.6638, 2.4121] p_frs = [0.0512, 1, 4.6659, -7.8387, 5.4571, 0.1098, 0.4021] # calc subsurface reflectance f = p_f[0] * (p_f[1] + p_f[2] * omega_b + p_f[3] * omega_b**2 + p_f[4] * omega_b**3) * (1 + p_f[5] / T.cos(theta_sun_ss)) R0minus = f * omega_b # calc subsurface remote sensing reflectance frs = p_frs[0] * (p_frs[1] + p_frs[2] * omega_b + p_frs[3] * omega_b**2 + p_frs[4] * omega_b**3) * ( 1 + p_frs[5] / T.cos(theta_sun_ss)) * ( 1 + p_frs[6] / T.cos(theta_view_ss)) Rrs0minus = frs * omega_b # calc water surface reflected reflectance Rrs_refl = rho_s * Ls_Ed + rho_dd * Edd_Ed / np.pi + rho_ds * Eds_Ed / np.pi + delta # calc_Rrs0plus (Lee1998, eq22), R=Q*Rrs gamma = 0.48 zeta = 0.518 Rrs = zeta * Rrs0minus / (1 - gamma * R0minus) Lu_Ed = Rrs + Rrs_refl f = th.function([ beta, alpha, am, rh, pressure, C_chl, C_sm, C_mie, n_mie, C_y, S_y, T_w, theta_sun, theta_view, n_w, rho_s, rho_dd, rho_ds, delta, wl, a_w, daw_dT, astar_ph, astar_y, Ls_Ed ], [Rrs, Rrs_refl, Lu_Ed, Ed], on_unused_input='warn') return f