def test_reduce(): x = sym.Variable("x", shape=(1, 16, 512, 512)) y = sym.sum(x, name="y", axis=1) g, ldict = correct_layout(y, "NCHW") assert (ldict["x"][0] == "NCHW") assert (ldict["y"][0] == "__undef__") # second pass will insert layout transform g, ldict = correct_layout(g, "NCHW16c") assert (ldict["x"][0] == "NCHW16c") assert (ldict["x_NCHW"][0] == "NCHW") assert (ldict["y"][0] == "__undef__")
def test_reduce(): x = sym.Variable("x", shape=(1, 16, 512, 512)) y = sym.sum(x, name="y", axis=1) g, ldict = correct_layout(y, "NCHW") assert(ldict["x"][0] == "NCHW") assert(ldict["y"][0] == "__undef__") # second pass will insert layout transform g, ldict = correct_layout(g, "NCHW16c") assert(ldict["x"][0] == "NCHW16c") assert(ldict["x_NCHW"][0] == "NCHW") assert(ldict["y"][0] == "__undef__")
def test_multi_loss_graph_gradients(): # input data shape1 = (1000, 100) data1 = sym.Variable('data1', shape=(1000, 100), dtype=0) # fake non-sparse label label = sym.full(fill_value=3) # square loss sub1 = sym.elemwise_sub(data1, label, name="sub1") square_loss = sym.sum(data=sub1**2, axis=1, name="square_loss") # fake loss1 shape2 = (1000, ) data2 = sym.Variable('data2', shape=shape2, dtype=0) loss1 = sym.sqrt(data2, name="loss1") # fake loss2 loss2 = sym.relu(data1, name='loss2') # block loss1 total_loss = sym.elemwise_sum(sym.block_grad(loss1), square_loss, num_args=2, name="total_loss") # grad_g.symbol.list_output_names() # >> ['loss1_grad_0_output', 'grad_sum_output'] grad_g = graph_util.get_gradient_graph([total_loss, loss2], total_loss.list_input_variables()) # infer shape in_shapes, out_shapes = graph_util.infer_shape(grad_g) assert out_shapes == [list(shape2), list(shape1)] # grad_data1 is elemwise_sum of grad_loss2, grad_square_loss grad_data1 = grad_g.symbol[1] assert grad_data1.list_attr()['num_args'] == '2' # block grad should return zero grad grad_data2 = grad_g.symbol[0] assert 'zeros_like' in grad_g.ir() # test reverse infer shape for label assert grad_g.apply('InferShape').json_attr('shape_num_unknown_nodes') == 0 # infer type in_dtypes, out_dtypes = graph_util.infer_dtype(grad_g) assert out_dtypes == ['float32', 'float32'] # test reverse infer type for label assert grad_g.apply('InferType').json_attr('dtype_num_unknown_nodes') == 0
def test_multi_loss_graph_gradients(): # input data shape1 = (1000, 100) data1 = sym.Variable('data1', shape=(1000, 100), dtype=0) # fake non-sparse label label = sym.full(fill_value=3) # square loss sub1 = sym.elemwise_sub(data1, label, name="sub1") square_loss = sym.sum(data=sub1**2, axis=1, name="square_loss") # fake loss1 shape2 = (1000, ) data2 = sym.Variable('data2', shape=shape2, dtype=0) loss1 = sym.sqrt(data2, name="loss1") # fake loss2 loss2 = sym.relu(data1, name='loss2') # block loss1 total_loss = sym.elemwise_sum( sym.block_grad(loss1), square_loss, num_args=2, name="total_loss") # grad_g.symbol.list_output_names() # >> ['loss1_grad_0_output', 'grad_sum_output'] grad_g = graph_util.get_gradient_graph([total_loss, loss2], total_loss.list_input_variables()) # infer shape in_shapes, out_shapes = graph_util.infer_shape(grad_g) assert out_shapes == [list(shape2), list(shape1)] # grad_data1 is elemwise_sum of grad_loss2, grad_square_loss grad_data1 = grad_g.symbol[1] assert grad_data1.list_attr()['num_args'] == '2' # block grad should return zero grad grad_data2 = grad_g.symbol[0] assert 'zeros_like' in grad_g.ir() # test reverse infer shape for label assert grad_g.apply('InferShape').json_attr('shape_num_unknown_nodes') == 0 # infer type in_dtypes, out_dtypes = graph_util.infer_dtype(grad_g) assert out_dtypes == ['float32', 'float32'] # test reverse infer type for label assert grad_g.apply('InferType').json_attr('dtype_num_unknown_nodes') == 0
def nn(m: Model): v_images = sym.Variable("images", shape=(BATCH_SIZE, 1, 28, 28), dtype=0) v_true_labels = sym.Variable("true_labels", shape=(BATCH_SIZE, 10), dtype=0) x = v_images x = sym.reshape(data=x, shape=(BATCH_SIZE, 28 * 28)) x = sym.dense(data=x, units=10) logits = x x = -sym.elemwise_mul(v_true_labels, sym.log_softmax(x)) loss = sym.sum(x) / BATCH_SIZE # This is not really accuracy, because we use softmax instead of hardmax accuracy = sym.sum(v_true_labels * sym.softmax(logits)) / BATCH_SIZE # We have to somehow list all weights (the corresponding variables are generated automatically) weight_vars = [ v for v in loss.list_input_variables() if v.attr('name') not in ['images', 'true_labels'] ] optimizer = SGD(learning_rate=1e-4) update_step = optimizer.minimize(loss, var=weight_vars) tgraph = nnvm.graph.create(sym.Group( [loss, update_step])).apply("InferShape").apply("InferType") fgraph = nnvm.graph.create(sym.Group( [loss, accuracy])).apply("InferShape").apply("InferType") m.tgraph = tgraph m.fgraph = fgraph m.optimizer = optimizer m.loss = loss return m
def test_injective_reduce_injective(): x = sym.Variable("x") x = sym.flatten(x) + 1 y = sym.sum(x, axis=1) dtype = "float32" dshape = (32, 1, 18, 18) shape_dict = {"x": dshape} for target, ctx in ctx_list(): graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) m = graph_runtime.create(graph, lib, ctx) assert graph.index.num_nodes == 2 data = np.random.uniform(size=dshape).astype(dtype) m.run(x=data) c_np = np.sum(data.reshape(32, 18 * 18) + 1, axis=1) # get output out = m.get_output(0, tvm.nd.empty(c_np.shape, dtype)) np.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
def nnvm_array_reduce(c, fn, array, shape): """Implementation of array_reduce.""" assert fn.is_constant(Primitive) assert shape.is_constant(tuple) fn = fn.value tshp = shape.value ary = c.ref(array) if fn == P.scalar_add: ashp = ashape(array) if len(tshp) < len(ashp): ts = (1, ) * (len(ashp) - len(tshp)) + tshp else: ts = tshp axis = list(i for i, t in enumerate(ts) if t == 1) if len(axis) == 1: axis = axis[0] res = sym.sum(ary, axis=axis, keepdims=1) if len(tshp) < len(ashp): res = sym.reshape(res, shape=tshp) return res else: raise NotImplementedError(f"reduce with {fn}")
def check(in_shape, out_shape, **kwargs): x = sym.Variable("x", shape=in_shape) y = sym.sum(x, name="y", **kwargs) sdict = infer_shape(y) assert(tuple(sdict["y"][0]) == tuple(out_shape))
import nnvm.compiler import nnvm.symbol as sym import tvm x = sym.Variable("x", shape=(100,), dtype=1) z = sym.sum(x, axis=(0,)) graph = nnvm.graph.create(z) print(graph.ir()) with nnvm.compiler.build_config(opt_level=2): deploy_graph, lib, params = nnvm.compiler.build( graph, target="llvm")
def test_check_function(): # test the testing function x = sym.Variable("x") y = sym.Variable("y") # different styles of returning gradients from the backward function check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: [head_grads, 2 * head_grads], shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: (head_grads, 2 * head_grads), shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: { 'x': head_grads, 'y': 2 * head_grads }, shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: {'y': 2 * head_grads}, shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: [2 * head_grads], grad_input_vars=[y], shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: 2 * head_grads, grad_input_vars=[y], shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: 2 * head_grads, grad_input_vars=[y], shape={ 'x': (1, 2), y: (1, 2) }, dtype='float64') # test just numerical gradients # different styles of shape and dtype passing check_function(x + 2 * y, shape={ 'x': (1, 2), y: (1, 2) }, numerical_grads=True) check_function(x + 2 * y, shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32', numerical_grads=True) check_function(x + 2 * y, shape={ 'x': (1, 2), y: (1, 2) }, dtype={ x: 'float32', 'y': 'float32' }, numerical_grads=True) check_function(x + 2 * y, shape=(1, 2), dtype='float32', numerical_grads=True) # specifying variable attributes on variable creation # (in this case type codes must be used) x = sym.Variable("x", dtype=0, shape=(1, 2)) check_function(x + 2 * y, shape={y: (1, 2)}, dtype={'y': 'float32'}, numerical_grads=True) y = sym.Variable("y", dtype=0, shape=(1, 2)) # shape overriding def _fwd1(x, y): assert x.shape == (1, 1) assert y.shape == (1, 2) return x + 2 * y check_function(x + 2 * y, _fwd1, shape={x: (1, 1)}) # in_range def _fwd2(x, y): assert x.shape == (100, ) assert (x <= 0.9).all() assert (x >= 0.8).all() return x + 2 * y check_function(x + 2 * y, _fwd2, shape=(100, ), in_range=(0.8, 0.9), numerical_grads=False) check_function(x + 2 * y, _fwd2, shape=(100, ), in_range={'x': (0.8, 0.9)}, numerical_grads=False) check_function(x + 2 * y, backward=lambda x, y, head_grads: [1.0, 2.0], in_range={'head_grads_0': (1.0, 1.0)}) # explicit passing of values check_function(x + 2 * y, backward=lambda x, y, head_grads: [1.0, 2.0], values={'head_grads_0': np.full((1, 2), 1.0)}) # check that the function reports errors def _check_function_must_fail(*args, **kwargs): error = AssertionError if 'error' in kwargs: error = kwargs['error'] del kwargs['error'] try: check_function(*args, quiet=True, **kwargs) except error: pass else: raise AssertionError("check_function didn't raise an exception") _check_function_must_fail(x + 2 * y, error=ValueError) _check_function_must_fail(x + 2 * y, lambda x, y: x + y) _check_function_must_fail(x + 2 * y, backward=lambda x, y, head_grads: [1.0, 2.0]) _check_function_must_fail(sym.block_grad(x + 2 * y), numerical_grads=True) _check_function_must_fail(x * x, numerical_grads=True, numerical_grads_params={ 'atol': 0.0, 'rtol': 0.0 }) _check_function_must_fail(sym.log(-x * x), numerical_grads=True, error=ValueError) # different styles of returning results from the forward function check_function(x + 2 * y, lambda x, y: [x + 2 * y], numerical_grads=False) _check_function_must_fail(x + 2 * y, lambda x, y: [x + 2 * y, x], numerical_grads=False, error=ValueError) _check_function_must_fail(x + 2 * y, lambda x, y: [], numerical_grads=False, error=ValueError) # multiple outputs z = sym.Group([2 * x + y, x + 2 * y]) check_function(z, lambda x, y: [2 * x + y, x + 2 * y]) check_function(z, lambda x, y: (2 * x + y, x + 2 * y)) check_function( z, backward=lambda x, y, head_grads: [2 * head_grads[0] + head_grads[1], head_grads[0] + 2 * head_grads[1]]) _check_function_must_fail(z, backward=lambda x, y, head_grads: [2 * head_grads[0], 2 * head_grads[1]]) check_function( z, backward=lambda x, y, head_grads: [head_grads[1], 2 * head_grads[1]], in_range={'head_grads_0': (0, 0)}) check_function(z, numerical_grads=True) z = sym.Group([sym.block_grad(2 * x + y), x + 2 * y]) check_function(z, lambda x, y: [2 * x + y, x + 2 * y], numerical_grads=False) _check_function_must_fail(z, lambda x, y: [2 * x + y, x + 2 * y]) _check_function_must_fail(z, numerical_grads=True) z = sym.Group([2 * x + y, sym.block_grad(x + 2 * y)]) _check_function_must_fail(z, numerical_grads=True) z = sym.Group([2 * x + y, x + 2 * y, x, y, sym.sum(x)]) check_function(z, lambda x, y: [2 * x + y, x + 2 * y, x, y, np.sum(x)]) # passing additional parameters to forward and backward def _fwd3(x, p): assert p == 'v' return x + 1 def _bwd3(x, p, head_grads): assert p == 'v' return head_grads check_function(x + 1, _fwd3, _bwd3, additional_params={'p': 'v'}) # implicitly created variables and shape/dtype inference for inputs x = sym.Variable("x", shape=(2, 3), dtype=0) b = sym.Variable("b") y = sym.dense(data=x, bias=b, units=4) # Don't check gradients on cuda because is doesn't yet support ewise after reduce check_function(y, exclude_targets={'cuda'}, numerical_grads=True) check_function(y, shape={'x': (3, 4)}, exclude_targets={'cuda'}, numerical_grads=True) check_function(y, dtype={'x': 'float64'}, exclude_targets={'cuda'}, numerical_grads=True) x = sym.Variable("x") b = sym.Variable("b") w = sym.Variable("w") y = sym.dense(data=x, bias=b, weight=w, units=4) def _fwd_dense(x, w, b): return np.dot(x, w.T) + b check_function(y, _fwd_dense, shape={'x': (1, 2)}, dtype={'x': 'float32'}, numerical_grads=False) check_function(y, _fwd_dense, shape={'x': (1, 2)}, dtype={'w': 'float64'}, numerical_grads=False) _check_function_must_fail(y, _fwd_dense, shape={'x': (1, 2)}, dtype={ 'w': 'float64', 'b': 'float32' }, numerical_grads=False, error=nnvm._base.NNVMError) # fails because no shape _check_function_must_fail(y, _fwd_dense, numerical_grads=False, error=ValueError) # ok because type is float32 by default check_function(y, _fwd_dense, shape={'x': (1, 2)}, numerical_grads=False)
def test_cnn_gradients(): # input data h = 128 w = 128 data_shape = (1000, 3, h, w) data = sym.Variable('data', shape=data_shape, dtype=0) # conv2d num_channels = 64 kernel_size = 32 conv_w_shape = (num_channels, 3, kernel_size, kernel_size) conv_b_shape = (num_channels, ) conv_w = sym.Variable('conv_w', shape=conv_w_shape) conv_b = sym.Variable('conv_b', shape=conv_b_shape) conv1 = sym.conv2d(data=data, weight=conv_w, bias=conv_b, channels=num_channels, kernel_size=(kernel_size, kernel_size), name='conv1') # relu1 relu1 = sym.relu(data=conv1, name='relu1') # max pooling max_pooling1 = sym.max_pool2d(data=relu1, pool_size=(2, 2), name='max_pooling1') # flatten flatten1 = sym.flatten(data=max_pooling1) # shape after flatten flatten_out_shape = (h - kernel_size) * (w - kernel_size) * num_channels # dense1 dense1_hidden_units = 100 dense1 = sym.dense(data=flatten1, name='dense1', units=dense1_hidden_units) # relu2 relu2 = sym.relu(data=dense1, name='relu2') # dense2 dense2_hidden_units = 10 dense2 = sym.dense(data=relu2, name='dense2', units=dense2_hidden_units) # softmax mlp = sym.softmax(data=dense2, name='softmax') # fake non-sparse label label = sym.full_like(mlp, fill_value=1) # cross entropy loss ce_loss = sym.sum(sym.elemwise_mul(sym.log_softmax(dense2), label), axis=1, keepdims=True, name="ce_loss") # input variables: # print grad_g.symbol.list_input_names() # >> ['data', 'conv_w', 'conv_b', # 'dense1_weight', 'dense1_bias', # 'dense2_weight', 'dense2_bias'] # output gradient variables: # print grad_g.symbol.list_output_names() # >> ['conv1_grad_data', 'conv1_grad_weight', 'conv1_grad_bias', # 'dense1_grad_weight', 'dense1_grad_bias', # 'dense2_grad_weight', 'dense2_grad_bias'] grad_g = graph_util.get_gradient_graph(ce_loss, ce_loss.list_input_variables()) # infer shape in_shapes, out_shapes = graph_util.infer_shape(grad_g) # forward graph shape assert in_shapes == [ list(data_shape), list(conv_w_shape), list(conv_b_shape), [dense1_hidden_units, flatten_out_shape], [dense1_hidden_units], [dense2_hidden_units, dense1_hidden_units], [dense2_hidden_units] ] # input grads shape should be equal with input shape assert in_shapes == out_shapes # output grads w.r.t input variables grads = graph_util.gradients(ce_loss, ce_loss.list_input_variables()) # gradients number should be equal with grad_input number assert len(grads) == len(ce_loss.list_input_variables()) # infer type in_dtypes, out_dtypes = graph_util.infer_dtype(grad_g) assert out_dtypes == [ 'float32', 'float32', 'float32', 'float32', 'float32', 'float32', 'float32' ]
def test_check_function(): # test the testing function x = sym.Variable("x") y = sym.Variable("y") # different styles of returning gradients from the backward function check_function(x + 2*y, lambda x, y: x + 2*y, lambda x, y, head_grads: [head_grads, 2*head_grads], shape={'x': (1, 2), y: (1, 2)}, dtype='float32') check_function(x + 2*y, lambda x, y: x + 2*y, lambda x, y, head_grads: (head_grads, 2*head_grads), shape={'x': (1, 2), y: (1, 2)}, dtype='float32') check_function(x + 2*y, lambda x, y: x + 2*y, lambda x, y, head_grads: {'x': head_grads, 'y': 2*head_grads}, shape={'x': (1, 2), y: (1, 2)}, dtype='float32') check_function(x + 2*y, lambda x, y: x + 2*y, lambda x, y, head_grads: {'y': 2*head_grads}, shape={'x': (1, 2), y: (1, 2)}, dtype='float32') check_function(x + 2*y, lambda x, y: x + 2*y, lambda x, y, head_grads: [2*head_grads], grad_input_vars=[y], shape={'x': (1, 2), y: (1, 2)}, dtype='float32') check_function(x + 2*y, lambda x, y: x + 2*y, lambda x, y, head_grads: 2*head_grads, grad_input_vars=[y], shape={'x': (1, 2), y: (1, 2)}, dtype='float32') check_function(x + 2*y, lambda x, y: x + 2*y, lambda x, y, head_grads: 2*head_grads, grad_input_vars=[y], shape={'x': (1, 2), y: (1, 2)}, dtype='float64') # test just numerical gradients # different styles of shape and dtype passing check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)}, numerical_grads=True) check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)}, dtype='float32', numerical_grads=True) check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)}, dtype={x: 'float32', 'y': 'float32'}, numerical_grads=True) check_function(x + 2*y, shape=(1, 2), dtype='float32', numerical_grads=True) # specifying variable attributes on variable creation # (in this case type codes must be used) x = sym.Variable("x", dtype=0, shape=(1, 2)) check_function(x + 2*y, shape={y: (1, 2)}, dtype={'y': 'float32'}, numerical_grads=True) y = sym.Variable("y", dtype=0, shape=(1, 2)) # shape overriding def _fwd1(x, y): assert x.shape == (1, 1) assert y.shape == (1, 2) return x + 2*y check_function(x + 2*y, _fwd1, shape={x: (1, 1)}) # in_range def _fwd2(x, y): assert x.shape == (100,) assert (x <= 0.9).all() assert (x >= 0.8).all() return x + 2*y check_function(x + 2*y, _fwd2, shape=(100,), in_range=(0.8, 0.9), numerical_grads=False) check_function(x + 2*y, _fwd2, shape=(100,), in_range={'x': (0.8, 0.9)}, numerical_grads=False) check_function(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0], in_range={'head_grads_0': (1.0, 1.0)}) # explicit passing of values check_function(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0], values={'head_grads_0': np.full((1, 2), 1.0)}) # check that the function reports errors def _check_function_must_fail(*args, **kwargs): error = AssertionError if 'error' in kwargs: error = kwargs['error'] del kwargs['error'] try: check_function(*args, quiet=True, **kwargs) except error: pass else: raise AssertionError("check_function didn't raise an exception") _check_function_must_fail(x + 2*y, error=ValueError) _check_function_must_fail(x + 2*y, lambda x, y: x + y) _check_function_must_fail(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0]) _check_function_must_fail(sym.block_grad(x + 2*y), numerical_grads=True) _check_function_must_fail(x*x, numerical_grads=True, numerical_grads_params={'atol': 0.0, 'rtol': 0.0}) _check_function_must_fail(sym.log(-x*x), numerical_grads=True, error=ValueError) # different styles of returning results from the forward function check_function(x + 2*y, lambda x, y: [x + 2*y], numerical_grads=False) _check_function_must_fail(x + 2*y, lambda x, y: [x + 2*y, x], numerical_grads=False, error=ValueError) _check_function_must_fail(x + 2*y, lambda x, y: [], numerical_grads=False, error=ValueError) # multiple outputs z = sym.Group([2*x + y, x + 2*y]) check_function(z, lambda x, y: [2*x + y, x + 2*y]) check_function(z, lambda x, y: (2*x + y, x + 2*y)) check_function(z, backward=lambda x, y, head_grads: [2*head_grads[0] + head_grads[1], head_grads[0] + 2*head_grads[1]]) _check_function_must_fail(z, backward=lambda x, y, head_grads: [2*head_grads[0], 2*head_grads[1]]) check_function(z, backward=lambda x, y, head_grads: [head_grads[1], 2*head_grads[1]], in_range={'head_grads_0': (0, 0)}) check_function(z, numerical_grads=True) z = sym.Group([sym.block_grad(2*x + y), x + 2*y]) check_function(z, lambda x, y: [2*x + y, x + 2*y], numerical_grads=False) _check_function_must_fail(z, lambda x, y: [2*x + y, x + 2*y]) _check_function_must_fail(z, numerical_grads=True) z = sym.Group([2*x + y, sym.block_grad(x + 2*y)]) _check_function_must_fail(z, numerical_grads=True) z = sym.Group([2*x + y, x + 2*y, x, y, sym.sum(x)]) check_function(z, lambda x, y: [2*x + y, x + 2*y, x, y, np.sum(x)]) # passing additional parameters to forward and backward def _fwd3(x, p): assert p == 'v' return x + 1 def _bwd3(x, p, head_grads): assert p == 'v' return head_grads check_function(x + 1, _fwd3, _bwd3, additional_params={'p': 'v'}) # implicitly created variables and shape/dtype inference for inputs x = sym.Variable("x", shape=(2, 3), dtype=0) b = sym.Variable("b") y = sym.dense(data=x, bias=b, units=4) # Don't check gradients on cuda because is doesn't yet support ewise after reduce check_function(y, exclude_targets={'cuda'}, numerical_grads=True) check_function(y, shape={'x': (3, 4)}, exclude_targets={'cuda'}, numerical_grads=True) check_function(y, dtype={'x': 'float64'}, exclude_targets={'cuda'}, numerical_grads=True) x = sym.Variable("x") b = sym.Variable("b") w = sym.Variable("w") y = sym.dense(data=x, bias=b, weight=w, units=4) def _fwd_dense(x, w, b): return np.dot(x, w.T) + b check_function(y, _fwd_dense, shape={'x': (1,2)}, dtype={'x': 'float32'}, numerical_grads=False) check_function(y, _fwd_dense, shape={'x': (1,2)}, dtype={'w': 'float64'}, numerical_grads=False) _check_function_must_fail(y, _fwd_dense, shape={'x': (1,2)}, dtype={'w': 'float64', 'b': 'float32'}, numerical_grads=False, error=nnvm._base.NNVMError) # fails because no shape _check_function_must_fail(y, _fwd_dense, numerical_grads=False, error=ValueError) # ok because type is float32 by default check_function(y, _fwd_dense, shape={'x': (1,2)}, numerical_grads=False)
def test_cnn_gradients(): # input data h = 128 w = 128 data_shape = (1000, 3, h, w) data = sym.Variable('data', shape=data_shape, dtype=0) # conv2d num_channels = 64 kernel_size = 32 conv_w_shape = (num_channels, 3, kernel_size, kernel_size) conv_b_shape = (num_channels,) conv_w = sym.Variable('conv_w', shape=conv_w_shape) conv_b = sym.Variable('conv_b', shape=conv_b_shape) conv1 = sym.conv2d(data=data, weight=conv_w, bias=conv_b, channels=num_channels, kernel_size=(kernel_size, kernel_size), name='conv1') # relu1 relu1 = sym.relu(data=conv1, name='relu1') # max pooling max_pooling1 = sym.max_pool2d(data=relu1, pool_size=(2, 2), name='max_pooling1') # flatten flatten1 = sym.flatten(data=max_pooling1) # shape after flatten flatten_out_shape = (h - kernel_size) * (w - kernel_size) * num_channels # dense1 dense1_hidden_units = 100 dense1 = sym.dense(data=flatten1, name='dense1', units=dense1_hidden_units) # relu2 relu2 = sym.relu(data=dense1, name='relu2') # dense2 dense2_hidden_units = 10 dense2 = sym.dense(data=relu2, name='dense2', units=dense2_hidden_units) # softmax mlp = sym.softmax(data=dense2, name='softmax') # fake non-sparse label label = sym.full_like(mlp, fill_value=1) # cross entropy loss ce_loss = sym.sum( sym.elemwise_mul(sym.log_softmax(dense2), label), axis=1, keepdims=True, name="ce_loss") # input variables: # print grad_g.symbol.list_input_names() # >> ['data', 'conv_w', 'conv_b', # 'dense1_weight', 'dense1_bias', # 'dense2_weight', 'dense2_bias'] # output gradient variables: # print grad_g.symbol.list_output_names() # >> ['conv1_grad_data', 'conv1_grad_weight', 'conv1_grad_bias', # 'dense1_grad_weight', 'dense1_grad_bias', # 'dense2_grad_weight', 'dense2_grad_bias'] grad_g = graph_util.get_gradient_graph(ce_loss, ce_loss.list_input_variables()) # infer shape in_shapes, out_shapes = graph_util.infer_shape(grad_g) # forward graph shape assert in_shapes == [list(data_shape), list(conv_w_shape), list(conv_b_shape), [dense1_hidden_units, flatten_out_shape], [dense1_hidden_units], [dense2_hidden_units, dense1_hidden_units], [dense2_hidden_units]] # input grads shape should be equal with input shape assert in_shapes == out_shapes # output grads w.r.t input variables grads = graph_util.gradients(ce_loss, ce_loss.list_input_variables()) # gradients number should be equal with grad_input number assert len(grads) == len(ce_loss.list_input_variables()) # infer type in_dtypes, out_dtypes = graph_util.infer_dtype(grad_g) assert out_dtypes == ['float32', 'float32', 'float32', 'float32', 'float32', 'float32', 'float32']