def add_loader(expr): args = list(expr.args) if not isinstance(args[1], TensorNode): args[1] = Tensor(args[1]) node = Constant(args[1], "const").outputs[0] astype_expr = CallMethod(node, "astype") oup = TensorNode( astype_expr, shape=node.shape, dtype=node.dtype, qparams=node.qparams, ) astype_expr.set_args_kwargs(node, expr.inputs[0].dtype) astype_expr.return_val = (oup, ) add_expr = CallMethod(oup, "__add__") add_expr.set_args_kwargs(oup, oup) oup1 = TensorNode( add_expr, shape=oup.shape, dtype=oup.dtype, qparams=node.qparams, ) add_expr.return_val = oup1 args[1] = oup1 expr.set_args_kwargs(*args)
def test_training_converge_with_swap_and_drop(): _set_swap_flag(True) _set_drop_flag(True) net = XORNet() opt = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) gm = ad.GradManager().attach(net.parameters()) def train(data, label): with gm: pred = net(data) loss = F.nn.cross_entropy(pred, label) gm.backward(loss) return loss def infer(data): return net(data) train_dataset = minibatch_generator() losses = [] for data, label in itertools.islice(train_dataset, 2000): data = Tensor(data, dtype=np.float32) label = Tensor(label, dtype=np.int32) opt.clear_grad() loss = train(data, label) opt.step() losses.append(loss.numpy()) assert np.mean(losses[-100:]) < 0.1, "Final training Loss must be low enough" ngrid = 10 x = np.linspace(-1.0, 1.0, ngrid) xx, yy = np.meshgrid(x, x) xx = xx.reshape((ngrid * ngrid, 1)) yy = yy.reshape((ngrid * ngrid, 1)) data = np.concatenate((xx, yy), axis=1).astype(np.float32) pred = infer(Tensor(data)).numpy() precision = calculate_precision(data, pred) assert precision == 1.0, "Test precision must be high enough, get {}".format( precision ) _set_swap_flag(False) _set_drop_flag(False)
def test_zero_dim(): a = Tensor(1) a_np = np.array(1, dtype=np.int32) np.testing.assert_equal(a, a_np) if use_symbolic_shape(): np.testing.assert_equal(a.shape, np.array(a_np.shape)) else: np.testing.assert_equal(a.shape, a_np.shape)
def test_training_converge(test_traced_module): net = XORNet() if test_traced_module: inp = Tensor(np.random.random((14, 2))) net = trace_module(net, inp) opt = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) gm = ad.GradManager().attach(net.parameters()) @trace(symbolic=False) def train(data, label): with gm: pred = net(data) loss = F.nn.cross_entropy(pred, label) gm.backward(loss) optim.clip_grad_norm(net.parameters(), max_norm=0.2, ord=2.0) return loss def infer(data): return net(data) train_dataset = minibatch_generator() losses = [] for data, label in itertools.islice(train_dataset, 2000): data = Tensor(data, dtype=np.float32) label = Tensor(label, dtype=np.int32) opt.clear_grad() loss = train(data, label) optim.clip_grad_value(net.parameters(), lower=-0.1, upper=0.1) opt.step() losses.append(loss.numpy()) assert (np.mean(losses[-100:]) < 0.1), "Final training Loss must be low enough, get {}".format( np.mean(losses[-100:])) ngrid = 10 x = np.linspace(-1.0, 1.0, ngrid) xx, yy = np.meshgrid(x, x) xx = xx.reshape((ngrid * ngrid, 1)) yy = yy.reshape((ngrid * ngrid, 1)) data = mge.tensor(np.concatenate((xx, yy), axis=1).astype(np.float32)) pred = infer(data) precision = calculate_precision(data.numpy(), pred.numpy()) assert precision == 1.0, "Test precision must be high enough, get {}".format( precision)
def test_syncbn1d(): nr_chan = 8 data_shape = (3, nr_chan, 4) momentum = 0.9 bn = SyncBatchNorm(nr_chan, momentum=momentum) running_mean = np.zeros((1, nr_chan, 1), dtype=np.float32) running_var = np.ones((1, nr_chan, 1), dtype=np.float32) for i in range(3): xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True) xv_transposed = np.transpose(xv, [0, 2, 1]).reshape( (data_shape[0] * data_shape[2], nr_chan)) var_biased = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1)) sd = np.sqrt(var_biased + bn.eps) var_unbiased = np.var(xv_transposed, axis=0, ddof=1).reshape( (1, nr_chan, 1)) running_mean = running_mean * momentum + mean * (1 - momentum) running_var = running_var * momentum + var_unbiased * (1 - momentum) yv = bn(Tensor(xv)) yv_expect = (xv - mean) / sd _assert_allclose(yv.numpy(), yv_expect) _assert_allclose(bn.running_mean.numpy().reshape(-1), running_mean.reshape(-1)) _assert_allclose(bn.running_var.numpy().reshape(-1), running_var.reshape(-1)) # test set 'training' flag to False mean_backup = bn.running_mean.numpy() var_backup = bn.running_var.numpy() bn.training = False xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) data = Tensor(xv) yv1 = bn(data) yv2 = bn(data) np.testing.assert_equal(yv1.numpy(), yv2.numpy()) np.testing.assert_equal(mean_backup, bn.running_mean.numpy()) np.testing.assert_equal(var_backup, bn.running_var.numpy()) yv_expect = (xv - running_mean) / np.sqrt(running_var + bn.eps) _assert_allclose(yv1.numpy(), yv_expect)
def worker(data, yv_expect, running_mean, running_var): rank = dist.get_rank() bn = SyncBatchNorm(nr_chan, momentum=momentum, eps=eps) for i in range(steps): yv = bn(Tensor(data[rank][i])) _assert_allclose(yv.numpy(), yv_expect[rank]) _assert_allclose(bn.running_mean.numpy(), running_mean) _assert_allclose(bn.running_var.numpy(), running_var)
def worker(): rank = dist.get_rank() if rank == 0: # remote send x = Tensor(val, device="gpu0") remote_send(x, 1) sync() else: # remote recv y = remote_recv(0, val.shape, val.dtype) assert y.device == "gpu1" np.testing.assert_almost_equal(val, y.numpy())
def worker(rank, data, yv_expect, running_mean, running_var): if mge.get_device_count("gpu") < nr_ranks: return dist.init_process_group("localhost", port, nr_ranks, rank, rank) bn = SyncBatchNorm(nr_chan, momentum=momentum, eps=eps) for i in range(steps): yv = bn(Tensor(data[i])) _assert_allclose(yv.numpy(), yv_expect) _assert_allclose(bn.running_mean.numpy(), running_mean) _assert_allclose(bn.running_var.numpy(), running_var)
def test_apply_easy_quant(): qat_net = init_qat_net() data = Tensor(np.random.rand(2, 3, 3, 3), dtype=np.float32) eq_net = reset_qconfig(qat_net, passive_qconfig, inplace=False) apply_easy_quant(eq_net, data, 0.9, 1.1, 10) assert isinstance(eq_net.quant.act_observer, PassiveObserver) assert isinstance(eq_net.linear[0].weight_observer, PassiveObserver) assert isinstance(eq_net.linear[0].act_observer, PassiveObserver) assert isinstance(eq_net.linear[1].weight_observer, PassiveObserver) assert isinstance(eq_net.linear[1].act_observer, PassiveObserver) assert eq_net.dequant.act_observer is None
def test_trace_module_2(): class Model(M.Module): def __init__(self): super().__init__() def forward(self, x): out = x.shape out = apply(builtin.Elemwise(mode="ADD"), out, Tensor(1)) return out traced_model = trace_module(Model(), Tensor(([1,]))) assert isinstance(traced_model.graph._exprs[0], Apply) and isinstance( traced_model.graph._exprs[0].opdef, builtin.GetVarShape ) assert isinstance(traced_model.graph._exprs[1], Constant) assert isinstance(traced_model.graph._exprs[2], Apply) and isinstance( traced_model.graph._exprs[2].opdef, builtin.Elemwise ) assert int(traced_model(Tensor([1, 2]))[0]) == 3
def test_functional_loader(): class MyModule2(Module): def forward(self, x, y): return F.conv2d(x, y) m = MyModule2() x = Tensor(np.random.random((1, 3, 32, 32))) y = Tensor(np.random.random((3, 3, 3, 3))) traced_module = trace_module(m, x, y) orig_loader_dict = S.FUNCTIONAL_LOADER S.FUNCTIONAL_LOADER = {} @register_functional_loader(("megengine.functional.nn", "conv2d")) def conv2df_loader(expr): # expr.func = ("megengine.functional.nn","conv2d") kwargs = expr.kwargs orig_weight = expr.named_args["weight"] astype_expr = CallMethod(orig_weight, "astype") oup = TensorNode( astype_expr, shape=orig_weight.shape, dtype=orig_weight.dtype, qparams=orig_weight.qparams, ) astype_expr.set_args_kwargs(orig_weight, expr.named_args["inp"].dtype) astype_expr.return_val = (oup, ) expr.set_arg("weight", oup) obj = pickle.dumps(traced_module) new_module = pickle.loads(obj) _check_expr_users(new_module) _check_id(new_module) result = new_module(x, y) gt = m(x, y) assert (isinstance(new_module.graph._exprs[0], CallMethod) and len(new_module.graph._exprs) == 2) np.testing.assert_equal(result.numpy(), gt.numpy()) S.FUNCTIONAL_LOADER = orig_loader_dict
def build_observered_net(net: M.Module, observer_cls): qat_net = Q.quantize_qat( net, qconfig=get_observer_config(observer_cls), mapping={MyConvBnRelu2d: MyQATConvBnRelu2d}, ) Q.enable_observer(qat_net) inp = Tensor(np.random.random(size=(5, 3, 32, 32))) qat_net.eval() qat_net(inp) Q.disable_observer(qat_net) return qat_net
def test_dump_model(): data_shape = (2, 28) data = Tensor(np.random.random(data_shape)) mlp = MLP() pred = mlp(data) f = tempfile.NamedTemporaryFile(delete=False) f_name = f.name try: mge.dump(pred, f_name) finally: f.close() os.unlink(f_name)
def test_opdef_loader(): class MyModule1(Module): def forward(self, x, y): op = Elemwise("ADD") return apply(op, x, y)[0] m = MyModule1() x = Tensor(np.ones((20))) y = Tensor(np.ones((20))) traced_module = trace_module(m, x, y) orig_loader_dict = S.OPDEF_LOADER S.OPDEF_LOADER = {} @register_opdef_loader(Elemwise) def add_opdef_loader(expr): if expr.opdef_state["mode"] == "ADD": expr.opdef_state["mode"] = "MUL" node = expr.inputs[1] astype_expr = CallMethod(node, "astype") oup = TensorNode( astype_expr, shape=node.shape, dtype=expr.inputs[0].dtype, qparams=node.qparams, ) astype_expr.set_args_kwargs(node, expr.inputs[0].dtype) astype_expr.return_val = (oup, ) expr.inputs[1] = oup obj = pickle.dumps(traced_module) new_module = pickle.loads(obj) _check_id(new_module) _check_expr_users(new_module) _check_name(new_module.flatten()) assert (isinstance(new_module.graph._exprs[0], CallMethod) and new_module.graph._exprs[1].opdef.mode == "MUL" and len(new_module.graph._exprs) == 2) result = new_module(x, y) np.testing.assert_equal(result.numpy(), x.numpy()) S.OPDEF_LOADER = orig_loader_dict
def worker(rank): if mge.get_device_count("gpu") < world_size: return if rank == 0: # remote send dist.init_process_group("localhost", port, world_size, rank, rank) x = Tensor(val, device="gpu0") y = remote_send(x, 1) assert y.numpy()[0] == 0 else: # remote recv dist.init_process_group("localhost", port, world_size, rank, rank) y = remote_recv(0, val.shape, val.dtype) assert y.device == "gpu1" np.testing.assert_almost_equal(val, y.numpy())
def test_shared_module(): class MyModule(M.Module): def __init__(self): super().__init__() self.a = M.Elemwise("ADD") self.b = self.a def forward(self, x, y): z = self.a(x, y) z = self.b(z, y) return z x = Tensor(1) y = Tensor(2) m = MyModule() tm = trace_module(m, x, y) obj = pickle.dumps(tm) load_tm = pickle.loads(obj) _check_expr_users(load_tm) _check_name(load_tm.flatten()) _check_id(load_tm) assert load_tm.a is load_tm.b
def test_catch_input_name(tensor_name, var_name): def f(x): return 2 * x func = trace(f, symbolic=True, capture_as_const=True) x = Tensor(np.ones(shape=(2, 3)), name=tensor_name) func(x).numpy() file = io.BytesIO() func.dump(file, optimize_for_inference=False, keep_opr_name=True, keep_var_name=2) file.seek(0) *_, outputs = G.load_graph(file) op = cgtools.get_oprs_seq(outputs)[-1] assert op.inputs[0].name == var_name
def test_tensor_serialization(): with TemporaryFile() as f: data = np.random.randint(low=0, high=7, size=[233]) a = Tensor(data, device="cpu0", dtype=np.int32) mge.save(a, f) f.seek(0) b = mge.load(f) np.testing.assert_equal(a.numpy(), data) assert b.device.logical_name == "cpu0:0" assert b.dtype == np.int32 with TemporaryFile() as f: a = Parameter(np.random.random(size=(233, 2)).astype(np.float32)) mge.save(a, f) f.seek(0) b = mge.load(f) assert isinstance(b, Parameter) np.testing.assert_equal(a.numpy(), b.numpy()) with TemporaryFile() as f: a = Tensor(np.random.random(size=(2, 233)).astype(np.float32)) mge.save(a, f) f.seek(0) b = mge.load(f) assert type(b) is Tensor np.testing.assert_equal(a.numpy(), b.numpy()) with TemporaryFile() as f: a = Tensor(np.random.random(size=(2, 233)).astype(np.float32)) mge.save(a, f) f.seek(0) b = mge.load(f, map_location="cpux") assert type(b) is Tensor assert "cpu" in str(b.device) np.testing.assert_equal(a.numpy(), b.numpy()) with TemporaryFile() as f: if mge.is_cuda_available(): device_org = mge.get_default_device() mge.set_default_device("gpu0") a = Tensor(np.random.random(size=(2, 233)).astype(np.float32)) mge.save(a, f) f.seek(0) mge.set_default_device("cpux") b = mge.load(f, map_location={"gpu0": "cpu0"}) assert type(b) is Tensor assert "cpu0" in str(b.device) np.testing.assert_equal(a.numpy(), b.numpy()) mge.set_default_device(device_org) with TemporaryFile() as f: a = Tensor(0) a.qparams.scale = Tensor(1.0) mge.save(a, f) f.seek(0) b = mge.load(f) assert isinstance(b.qparams.scale, Tensor) np.testing.assert_equal(b.qparams.scale.numpy(), 1.0)
def test_tensor_method_loader(): class MyModule3(Module): def forward(self, x): return x + 1 m = MyModule3() x = Tensor(np.ones((20))) traced_module = trace_module(m, x) orig_loader_dict = S.TENSORMETHOD_LOADER S.TENSORMETHOD_LOADER = {} @register_tensor_method_loader("__add__") def add_loader(expr): args = list(expr.args) if not isinstance(args[1], TensorNode): args[1] = Tensor(args[1]) node = Constant(args[1], "const").outputs[0] astype_expr = CallMethod(node, "astype") oup = TensorNode( astype_expr, shape=node.shape, dtype=node.dtype, qparams=node.qparams, ) astype_expr.set_args_kwargs(node, expr.inputs[0].dtype) astype_expr.return_val = (oup, ) add_expr = CallMethod(oup, "__add__") add_expr.set_args_kwargs(oup, oup) oup1 = TensorNode( add_expr, shape=oup.shape, dtype=oup.dtype, qparams=node.qparams, ) add_expr.return_val = oup1 args[1] = oup1 expr.set_args_kwargs(*args) obj = pickle.dumps(traced_module) new_module = pickle.loads(obj) _check_expr_users(new_module) _check_id(new_module) result = new_module(x) gt = m(x) assert (isinstance(new_module.graph._exprs[0], Constant) and len(new_module.graph._exprs) == 4) np.testing.assert_equal(result.numpy(), (x + 2).numpy()) S.TENSORMETHOD_LOADER = orig_loader_dict
def test_cambricon_module(): model = "CambriconRuntimeOprTest.MutableBatchSize.mlu" model = os.path.join(os.path.dirname(__file__), model) with open(model, "rb") as f: data = f.read() m = MyModule(data) inp = Tensor(np.random.normal((1, 64, 32, 32)).astype(np.float16), device="cambricon0") def inference(inps): pred = m(inps) return pred pred = inference([inp])
def worker(data, yv_expect, running_mean, running_var): with amp.autocast(enabled=enable_amp): rank = dist.get_rank() bn = SyncBatchNorm(nr_chan, momentum=momentum, eps=eps) for i in range(steps): yv = bn(Tensor(data[rank][i])) if enable_amp: np.testing.assert_allclose( yv.numpy(), yv_expect[rank], atol=5e-4, rtol=5e-4 ) else: _assert_allclose(yv.numpy(), yv_expect[rank]) _assert_allclose(bn.running_mean.numpy(), running_mean) _assert_allclose(bn.running_var.numpy(), running_var)
def _check_qualname(net): inp = Tensor(np.random.random(size=(5, 3, 32, 32))) net.eval() traced_net = trace_module(net, inp) base_qualname = traced_net.graph.qualname for node in traced_net.graph.nodes(): qualname = node.qualname qualname = qualname[len(base_qualname) + 1:] if qualname.endswith("]"): qualname = qualname.rsplit(".", 1)[0] if qualname.startswith("["): qualname = "" traced_attr = get_subattr(traced_net, qualname) orig_attr = get_subattr(net, qualname) assert traced_attr is not None assert orig_attr is not None
def test_enable_and_disable_all(): x = Tensor(np.random.randint(1, 10, size=(3, 3)).astype(np.float32)) net = FloatNet() y1 = net(x).numpy() net = quantize_qat(net, min_max_fakequant_qconfig) init_observer(net, x) y2 = net(x).numpy() disable_fake_quant(net) y3 = net(x).numpy() enable_fake_quant(net) y4 = net(x).numpy() np.testing.assert_allclose(y1, y3) np.testing.assert_allclose(y2, y4) with pytest.raises(AssertionError): np.testing.assert_allclose(y2, y3)
def _dump_and_load(func, symbolic, keep_opr_name=True): AutoNaming.clear() func = trace(func, symbolic=symbolic, capture_as_const=True) x = Tensor(np.ones(shape=(2, 3))) func(x).numpy() file = io.BytesIO() func.dump( file, optimize_for_inference=False, arg_names=("x", ), keep_opr_name=keep_opr_name, keep_var_name=2, ) file.seek(0) outputs = G.load_graph(file).output_vars_list ops = cgtools.get_oprs_seq(outputs) return ops
def _check_module(build_func: Callable): net = build_func() net.eval() buffer = io.BytesIO() mge.save(net.state_dict(), buffer) buffer.seek(0) inp = Tensor(np.random.random(size=(5, 3, 32, 32))) traced_net = trace_module(build_func(), inp) traced_net.load_state_dict(mge.load(buffer)) _check_param(net, traced_net) buffer.seek(0) traced_net = trace_module(build_func(), inp).flatten() traced_net.load_state_dict(mge.load(buffer)) _check_param(net, traced_net)
def test_syncbn2d_no_stats(): nr_chan = 8 data_shape = (3, nr_chan, 16, 16) bn = SyncBatchNorm(8, track_running_stats=False) for i in range(4): if i == 2: bn.training = False xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape( (data_shape[0] * data_shape[2] * data_shape[3], nr_chan)) mean = np.mean(xv_transposed, axis=0).reshape(1, nr_chan, 1, 1) var = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1)) sd = np.sqrt(var + bn.eps) yv = bn(Tensor(xv)) yv_expect = (xv - mean) / sd _assert_allclose(yv.numpy(), yv_expect)
def test_batchnorm_no_stats(): nr_chan = 8 data_shape = (3, nr_chan, 4) bn = BatchNorm1d(8, track_running_stats=False) for i in range(4): if i == 2: bn.training = False xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True) var = np.var( np.transpose(xv, [0, 2, 1]).reshape( (data_shape[0] * data_shape[2], nr_chan) ), axis=0, ).reshape((1, nr_chan, 1)) sd = np.sqrt(var + bn.eps) yv = bn(Tensor(xv)) yv_expect = (xv - mean) / sd _assert_allclose(yv.numpy(), yv_expect)
def test_module_loader(): class MyModule4(Module): def __init__(self): super().__init__() self.conv = M.Conv2d(3, 3, 3) def forward(self, x): return self.conv(x) m = MyModule4() x = Tensor(np.random.random((1, 3, 32, 32))) traced_module = trace_module(m, x) orig_loader_dict = S.MODULE_LOADER S.MODULE_LOADER = {} @register_module_loader(("megengine.module.conv", "Conv2d")) def conv2dm_loader(expr): module = expr.inputs[0].owner args = list(expr.args) orig_inp = args[1] astype_expr = CallMethod(orig_inp, "astype") oup = TensorNode( astype_expr, shape=orig_inp.shape, dtype=orig_inp.dtype, qparams=orig_inp.qparams, ) astype_expr.set_args_kwargs(orig_inp, module.weight.dtype) astype_expr.return_val = (oup, ) args[1] = oup expr.set_args_kwargs(*args) obj = pickle.dumps(traced_module) new_module = pickle.loads(obj) result = new_module(x) gt = m(x) assert (isinstance(new_module.graph._exprs[1], CallMethod) and len(new_module.graph._exprs) == 3) np.testing.assert_equal(result.numpy(), gt.numpy()) S.MODULE_LOADER = orig_loader_dict
def _check_qat_module(qat_net: QATModule): inp = Tensor(np.random.random(size=(5, 3, 32, 32))) traced_net = trace_module(qat_net, inp) for name, qat_module in qat_net.named_modules(): if not isinstance(qat_module, QATModule): continue traced_qat_module = get_subattr(traced_net, name) weight_qparams, act_qparams = get_qparams(qat_module) traced_weight_qparams, traced_act_qparams = get_qparams( traced_qat_module) if weight_qparams: check_qparams(weight_qparams, traced_weight_qparams) if act_qparams: check_qparams(act_qparams, traced_act_qparams) flatten_traced_net = traced_net.flatten() conv0_node = flatten_traced_net.graph.get_node_by_name( "MyModule_block0_conv0").as_unique() conv0_out_node = flatten_traced_net.graph.get_node_by_name( "MyModule_block0_conv0_out").as_unique() assert isinstance(conv0_node.owner, TracedModule) assert conv0_out_node.expr.inputs[0] is conv0_node
def test_permutation_op_dtype(dtype): def sum_result(res, fun): return sum( [1 if i == v else 0 for i, v in enumerate(fun(res.numpy()))]) shape = Tensor((n, ), dtype="int32") op = PermutationRNG(seed=get_global_rng_seed(), dtype=dtype) (output, ) = apply(op, shape) assert sum_result(output, lambda x: x) < 500 assert sum_result(output, np.sort) == n assert str(output.device) == str(CompNode("xpux")) assert output.dtype == dtype cn = CompNode("xpu2") seed = 233333 h = new_rng_handle(cn, seed) op = PermutationRNG(seed=seed, handle=h, dtype=dtype) (output, ) = apply(op, shape) delete_rng_handle(h) assert sum_result(output, lambda x: x) < 500 assert sum_result(output, np.sort) == n assert str(output.device) == str(cn) assert output.dtype == dtype