def check_grad(func, mod=None): """ Test that directional gradient calculated by reverse mode is close to the one calculated by finite difference. """ global CHECK_GRAD_COUNTER if mod is None: mod = relay.Module() def make(name): return GlobalVar(name + str(CHECK_GRAD_COUNTER)) func_name = make("func_") back_func_name = make("back_func_") finite_difference_func_name = make("finite_difference_") reverse_mode_func_name = make("reverse_mode_") check_func_name = make("check_func_") CHECK_GRAD_COUNTER = CHECK_GRAD_COUNTER + 1 epsilon = relay.const(0.01) mod[func_name] = func mod[back_func_name] = gradient(mod[func_name], mod=mod) params = mod[func_name].params directions = [rand_from_type(x.checked_type) for x in params] ft = TensorType(()) sb = ScopeBuilder() def get_reverse_mode_result(e, d, t): assert isinstance(t, TensorType) return op.cast(e * d, 'float32') bf = sb.let("bf", TupleGetItem(back_func_name(*params), 1)) reverse_mode_results = [ get_reverse_mode_result(TupleGetItem(bf, i), directions[i], x.checked_type) for i, x in enumerate(params) ] reverse_mode_result = relay.const(0.0) for x in reverse_mode_results: reverse_mode_result = reverse_mode_result + op.reduce.sum(x) sb.ret(reverse_mode_result) reverse_mode_result = sb.get() mod[reverse_mode_func_name] = Function(params, reverse_mode_result, ft, mod[func_name].type_params, mod[func_name].attrs) finite_difference_result = op.reduce.sum( (func_name(*[x + epsilon * y for x, y in zip(params, directions)]) - func_name(*params)) / epsilon) mod[finite_difference_func_name] = Function(params, finite_difference_result, ft, mod[func_name].type_params, mod[func_name].attrs) check_func_result = op.abs( reverse_mode_func_name(*params) - finite_difference_func_name(*params)) mod[check_func_name] = Function(params, check_func_result, ft, mod[func_name].type_params, mod[func_name].attrs) ex = create_executor(mod=mod) res = ex.evaluate( check_func_name(*[rand_from_type(x.checked_type) for x in params])) assert res.data.asnumpy() < 0.001
def build_impl(self, input_size, memory_size, dtype="float32"): t = TensorType(shape=(1, memory_size), dtype=dtype) i = self.input( var("lstmcell_input", shape=(1, input_size), dtype=dtype)) c = self.input(Var("lstmcell_children", self.p.l(TupleType([t, t])))) sum = lam(["x", "y"], lambda x, y: x + y) child_h_sum = self.p.foldl( sum, op.zeros(shape=(1, memory_size), dtype=dtype), self.p.map(lam(["z"], lambda z: TupleGetItem(z, 1)), c)) ioux = Linear(input_size=input_size, output_size=memory_size * 3)(i) iouh = Linear(input_size=memory_size, output_size=memory_size * 3)(child_h_sum) iou = ioux + iouh fx = Linear(input_size=input_size, output_size=memory_size)(i) fh = Linear(input_size=memory_size, output_size=memory_size) i, o, u = op.split(iou, 3, axis=1) i, o, u = op.sigmoid(i), op.sigmoid(o), op.tanh(u) def foreach_children(children): f = op.sigmoid(fh(TupleGetItem(children, 1)) + fx) return f * TupleGetItem(children, 0) c = self.p.foldl(sum, i * u, self.p.map(lam(["z"], foreach_children), c)) return Tuple([c, o * op.tanh(c)])
def test_tuple(): t = TypeVar("t") x = Var("x", t) body = TupleGetItem(relay.Tuple([relay.const(4.0), x]), 1) f = Function([x], body, None, [t]) expected = relay.Function([x], x, None, [t]) expected = run_opt_pass(expected, transform.InferType()) assert tvm.ir.structural_equal(dcpe(f), expected)
def test_tuple(): t = TypeVar("t") x = Var("x", t) body = TupleGetItem(relay.Tuple([relay.const(4.0), x]), 1) f = Function([x], body, None, [t]) expected = relay.Function([x], x, None, [t]) expected = transform.OptimizeOnExpr(expected, transform.InferType()) assert alpha_equal(dcpe(f), expected)
def build_impl(self, input_size, memory_size, dtype="float32"): l = self.input( Var("l", self.p.l(TensorType(shape=(1, input_size), dtype=dtype)))) def f(c, x): cell = LSTMCell(input_size=input_size, memory_size=memory_size, dtype=dtype) o = cell(x, self.p.cons(c, self.p.nil())) return Tuple([o, TupleGetItem(o, 1)]) res = self.p.map_accuml( lam(["c", "x"], f), Tuple([ op.zeros(shape=(1, memory_size), dtype=dtype), op.zeros(shape=(1, memory_size), dtype=dtype) ]), l) return Tuple( [TupleGetItem(TupleGetItem(res, 0), 1), TupleGetItem(res, 1)])
def build_impl(self, input_size, memory_size, dtype="float32"): l = self.input( Var("l", self.p.l(TensorType(shape=(1, input_size), dtype=dtype)))) def LSTM(l): return LSTMTransformer(input_size=input_size, memory_size=memory_size, dtype=dtype)(l) fwd = LSTM(l) rev = LSTM(self.p.rev(l)) lhs = op.concatenate( [TupleGetItem(fwd, 0), TupleGetItem(rev, 0)], axis=1) t = TensorType(shape=(1, memory_size), dtype=dtype) x = Var("x", TupleType([t, t])) # cannot infer here rhs = self.p.map( Function([x], op.concatenate([TupleGetItem(x, 0), TupleGetItem(x, 1)], axis=1)), self.p.zip(TupleGetItem(fwd, 1), TupleGetItem(rev, 1))) return Tuple([lhs, rhs])
def f(c, x): cell = LSTMCell(input_size=input_size, memory_size=memory_size, dtype=dtype) o = cell(x, self.p.cons(c, self.p.nil())) return Tuple([o, TupleGetItem(o, 1)])
def foreach_children(children): f = op.sigmoid(fh(TupleGetItem(children, 1)) + fx) return f * TupleGetItem(children, 0)
def test_tuple(): t = TypeVar("t") x = Var("x", t) body = TupleGetItem(relay.Tuple([relay.const(4.0), x]), 1) f = Function([x], body, None, [t]) assert alpha_equal(dcpe(f), relay.Function([x], x, None, [t]))