def test_GammaRNG(): m1 = RNG(seed=111, device="xpu0") m2 = RNG(seed=111, device="xpu1") m3 = RNG(seed=222, device="xpu0") out1 = m1.gamma(2, size=(100, )) out1_ = m1.uniform(size=(100, )) out2 = m2.gamma(2, size=(100, )) out3 = m3.gamma(2, size=(100, )) np.testing.assert_allclose(out1.numpy(), out2.numpy(), atol=1e-6) assert out1.device == "xpu0" and out2.device == "xpu1" assert not (out1.numpy() == out3.numpy()).all() assert not (out1.numpy() == out1_.numpy()).all() shape = Tensor([[2, 3, 4], [9, 10, 11]], dtype=np.float32, device="xpu0") scale = Tensor([0.5, 1, 1.5], dtype=np.float32, device="xpu0") expected_mean = (shape * scale).numpy() expected_std = (F.sqrt(shape) * scale).numpy() out = m1.gamma(shape=shape, scale=scale, size=(20, 30, 40)) out_shp = out.shape if isinstance(out_shp, tuple): assert out_shp == (20, 30, 40, 2, 3) else: assert all(out.shape.numpy() == np.array([20, 30, 40, 2, 3])) assert (np.abs(out.mean(axis=(0, 1)).numpy() - expected_mean) / expected_std).mean() < 0.1 assert (np.abs(np.std(out.numpy(), axis=(0, 1)) - expected_std)).mean() < 0.1
def get_plane_anchors(self, anchor_scales: np.ndarray): """get anchors per location on feature map. The anchor number is anchor_scales x anchor_ratios """ base_anchor = Tensor([0, 0, self.base_size - 1, self.base_size - 1]) base_anchor = base_anchor.reshape(1, -1) w, h, x_ctr, y_ctr = self._whctrs(base_anchor) # ratio enumerate size = w * h size_ratios = size / self.anchor_ratios #pdb.set_trace() ws = F.sqrt(size_ratios) hs = ws * self.anchor_ratios # ws = size_ratios.sqrt().round() # hs = (ws * self.anchor_ratios).round() # scale enumerate anchor_scales = anchor_scales.reshape(1, -1).astype(np.float32) ws = F.expand_dims(ws, 1) hs = F.expand_dims(hs, 1) ws = (ws * anchor_scales).reshape(-1, 1) hs = (hs * anchor_scales).reshape(-1, 1) # make anchors anchors = F.concat( [ x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1), ], axis=1, ) return anchors.astype(np.float32)
def test_BetaRNG(): m1 = RNG(seed=111, device="xpu0") m2 = RNG(seed=111, device="xpu1") m3 = RNG(seed=222, device="xpu0") out1 = m1.beta(2, 1, size=(100, )) out1_ = m1.uniform(size=(100, )) out2 = m2.beta(2, 1, size=(100, )) out3 = m3.beta(2, 1, size=(100, )) np.testing.assert_allclose(out1.numpy(), out2.numpy(), atol=1e-6) assert out1.device == "xpu0" and out2.device == "xpu1" assert not (out1.numpy() == out3.numpy()).all() assert not (out1.numpy() == out1_.numpy()).all() alpha = Tensor([[2, 3, 4], [9, 10, 11]], dtype=np.float32, device="xpu0") beta = Tensor([0.5, 1, 1.5], dtype=np.float32, device="xpu0") expected_mean = (alpha / (alpha + beta)).numpy() expected_std = (F.sqrt(alpha * beta / (F.pow(alpha + beta, 2) * (alpha + beta + 1)))).numpy() out = m1.beta(alpha=alpha, beta=beta, size=(20, 30)) out_shp = out.shape if isinstance(out_shp, tuple): assert out_shp == (20, 30, 2, 3) else: assert all(out.shape.numpy() == np.array([20, 30, 2, 3])) assert (np.abs(out.mean(axis=(0, 1)).numpy() - expected_mean) / expected_std).mean() < 0.1 assert (np.abs(np.std(out.numpy(), axis=(0, 1)) - expected_std)).mean() < 0.1
def _update_inputs_qparams( traced_module, input_data_type: Union[str, QuantDtypeMeta], input_scales, input_zero_points, ): if input_data_type is None or input_scales is None: return for i in range(len(traced_module.graph.inputs[1:])): if traced_module.graph.inputs[i + 1].qparams is None: traced_module.graph.inputs[i + 1].qparams = create_qparams() if input_data_type in dtype._builtin_quant_dtypes: q_dtype_meta = dtype._builtin_quant_dtypes[input_data_type] elif isinstance(input_data_type, dtype.QuantDtypeMeta): q_dtype_meta = input_data_type else: assert isinstance(input_data_type, str) dt = np.dtype(input_data_type) assert np.issubdtype(dt, np.integer) v_min = np.iinfo(dt).min v_max = np.iinfo(dt).max q_dtype_meta = dtype.QuantDtypeMeta(input_data_type, "", input_data_type, v_min, v_max) traced_module.graph.inputs[i + 1].qparams.dtype_meta = q_dtype_meta if input_scales is not None: if isinstance(input_scales, str): str_scales = input_scales.split(",") input_scales = [] try: for s in str_scales: input_scales.append(float(s)) except: raise ValueError( "input scales({}) do not in correct format.".format( str_scales)) if not isinstance(input_scales, Sequence): input_scales = (input_scales, ) for i in range(len(traced_module.graph.inputs[1:])): scale = input_scales[i] if i < len( input_scales) else input_scales[-1] traced_module.graph.inputs[i + 1].qparams.scale = Tensor( float(scale)) if input_zero_points is not None: if isinstance(input_zero_points, str): str_zp = input_zero_points.split(",") input_zero_points = [] try: for zp in str_zp: input_zero_points.append(float(zp)) except: raise ValueError( "input zero points({}) do not in correct format.".format( str_zp)) if not isinstance(input_zero_points, Sequence): input_zero_points = (input_zero_points, ) for i in range(len(traced_module.graph.inputs[1:])): zero_point = (input_zero_points[i] if i < len(input_zero_points) else input_zero_points[-1]) traced_module.graph.inputs[i + 1].qparams.zero_point = Tensor( int(zero_point))
def test_syncbn2d_grad(): nr_chan = 8 data_shape = (3, nr_chan, 16, 16) syncbn = SyncBatchNorm(8, track_running_stats=False) bn = BatchNorm2d(8, track_running_stats=False) for i in range(4): if i == 2: syncbn.training = False bn.training = False inp = Tensor(np.random.normal(loc=2.3, size=data_shape).astype(np.float32)) diff = Tensor(np.random.normal(size=data_shape).astype(np.float32)) with GradManager().attach(inp) as gm: oup = syncbn(inp) gm.backward(oup, diff) grad = inp.grad inp.grad = None with GradManager().attach(inp) as gm: oup_expect = bn(inp) gm.backward(oup_expect, diff) grad_expect = inp.grad inp.grad = None _assert_allclose(oup.numpy(), oup_expect.numpy()) _assert_allclose(grad.numpy(), grad_expect.numpy())
def init_qat_net(net): if net.with_weight: net.weight_observer.min_val[...] = Tensor(min_val[0]) net.weight_observer.max_val[...] = Tensor(max_val[0]) if net.with_act: net.act_observer.min_val[...] = Tensor(min_val[1]) net.act_observer.max_val[...] = Tensor(max_val[1])
def batched_nms(boxes: Tensor, scores: Tensor, idxs: Tensor, iou_thresh: float, max_output: Optional[int] = None) -> Tensor: r""" Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union (IoU). :param boxes: tensor of shape `(N, 4)`; the boxes to perform nms on; each box is expected to be in `(x1, y1, x2, y2)` format. :param iou_thresh: ``IoU`` threshold for overlapping. :param idxs: tensor of shape `(N,)`, the class indexs of boxes in the batch. :param scores: tensor of shape `(N,)`, the score of boxes. :return: indices of the elements that have been kept by NMS. Examples: .. testcode:: import numpy as np from megengine import tensor x = np.zeros((100,4)) np.random.seed(42) x[:,:2] = np.random.rand(100,2) * 20 x[:,2:] = np.random.rand(100,2) * 20 + 100 scores = tensor(np.random.rand(100)) idxs = tensor(np.random.randint(0, 10, 100)) inp = tensor(x) result = batched_nms(inp, scores, idxs, iou_thresh=0.6) print(result.numpy()) Outputs: .. testoutput:: [75 41 99 98 69 64 11 27 35 18] """ assert (boxes.ndim == 2 and boxes.shape[1] == 4), "the expected shape of boxes is (N, 4)" assert scores.ndim == 1, "the expected shape of scores is (N,)" assert idxs.ndim == 1, "the expected shape of idxs is (N,)" assert (boxes.shape[0] == scores.shape[0] == idxs.shape[0]), "number of boxes, scores and idxs are not matched" idxs = idxs.detach() max_coordinate = boxes.max() offsets = idxs.astype("float32") * (max_coordinate + 1) boxes = boxes + offsets.reshape(-1, 1) return F.nn.nms(boxes, scores, iou_thresh, max_output)
def test_set_warp_perspective_config(): config._conv_format = "NHWC" inp_shape = (1, 1, 4, 4) inp = Tensor(np.arange(16, dtype=np.float32).reshape(inp_shape)) M_shape = (1, 3, 3) M = Tensor(np.random.randn(3, 3), dtype=np.float32).reshape(M_shape) config_out = F.vision.warp_perspective(inp, M, (2, 2)) config._conv_format = "default" with config._override(conv_format="NHWC"): context_out = F.vision.warp_perspective(inp, M, (2, 2)) expected = F.vision.warp_perspective(inp, M, (2, 2), format="NHWC") np.testing.assert_allclose(config_out.numpy(), expected.numpy()) np.testing.assert_allclose(context_out.numpy(), expected.numpy())
def test_fill(): a = Tensor(np.zeros((2, 3), dtype=np.float32)) a.fill(3) np.testing.assert_allclose(a.numpy(), np.full((2, 3), 3, dtype=np.float32)) a.fill(124.568) np.testing.assert_allclose(a.numpy(), np.full((2, 3), 124.568, dtype=np.float32))
def truncated_normal_(tensor: Tensor, mean=0.0, std=1.0): """ use truncated_normal init parameter inplace PT doesn't have truncated normal. https://discuss.pytorch.org/t/implementing-truncated-normal-initializer/4778/18 Args: tensor (meg.Tensor): parameter mean (float, optional): Defaults to 0.0. std (float, optional): Defaults to 1.0. """ values = truncnorm.rvs(-2, 2, size=tensor.shape) values = mean + std * values with Graph(eager_evaluation=True): tensor.set_value(values)
def func(): out = m1.permutation(Tensor(7)) out_shp = out.shape if isinstance(out_shp, tuple): assert out_shp == (1, ) else: assert all(out.shape.numpy() == np.array([1])) n, m = 6, 3 out = m1.permutation( Tensor(np.arange(n * m), dtype="float32").reshape(n, m)) out_shp = out.shape if isinstance(out_shp, tuple): assert out_shp == (n, m) else: assert all(out.shape.numpy() == np.array([n, m]))
def test_training_converge_with_swap_and_drop(): _set_swap_flag(True) _set_drop_flag(True) old_buffer_length = get_option("buffer_length") set_option("buffer_length", 0) net = XORNet() opt = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) gm = ad.GradManager().attach(net.parameters()) def train(data, label): with gm: pred = net(data) loss = F.nn.cross_entropy(pred, label) gm.backward(loss) return loss def infer(data): return net(data) train_dataset = minibatch_generator() losses = [] for data, label in itertools.islice(train_dataset, 2000): data = Tensor(data, dtype=np.float32) label = Tensor(label, dtype=np.int32) opt.clear_grad() loss = train(data, label) opt.step() losses.append(loss.numpy()) assert np.mean( losses[-100:]) < 0.1, "Final training Loss must be low enough" ngrid = 10 x = np.linspace(-1.0, 1.0, ngrid) xx, yy = np.meshgrid(x, x) xx = xx.reshape((ngrid * ngrid, 1)) yy = yy.reshape((ngrid * ngrid, 1)) data = np.concatenate((xx, yy), axis=1).astype(np.float32) pred = infer(Tensor(data)).numpy() precision = calculate_precision(data, pred) assert precision == 1.0, "Test precision must be high enough, get {}".format( precision) _set_swap_flag(False) _set_drop_flag(False) set_option("buffer_length", old_buffer_length)
def test_dump_and_load(): module = MyModule() x = Tensor(np.ones((1, 8, 14, 14))) expect = module(x) traced_module = trace_module(module, x) np.testing.assert_array_equal(expect, traced_module(x)) obj = pickle.dumps(traced_module) new_tm = pickle.loads(obj) _check_id(new_tm) _check_expr_users(new_tm) traced_module.graph._reset_ids() old_nodes = traced_module.graph.nodes().as_list() new_nodes = new_tm.graph.nodes().as_list() old_exprs = traced_module.graph.exprs().as_list() new_exprs = new_tm.graph.exprs().as_list() assert len(old_nodes) == len(new_nodes) for i, j in zip(old_nodes, new_nodes): assert i._name == j._name assert i._qualname == j._qualname assert i._id == j._id assert len(old_exprs) == len(new_exprs) for i, j in zip(old_exprs, new_exprs): assert i._id == j._id np.testing.assert_array_equal(expect, traced_module(x))
def test_gaussian_op(): # FIXME: remove this sync mge.core.set_option("async_level", 0) set_global_seed(1024) shape = ( 8, 9, 11, 12, ) shape = Tensor(shape, dtype="int32") op = GaussianRNG(seed=get_global_rng_seed(), mean=1.0, std=3.0, dtype="float32") (output, ) = apply(op, shape) assert np.fabs(output.numpy().mean() - 1.0) < 1e-1 assert np.fabs(np.sqrt(output.numpy().var()) - 3.0) < 1e-1 assert str(output.device) == str(CompNode("xpux")) assert output.dtype == np.float32 cn = CompNode("xpu2") seed = 233333 h = new_rng_handle(cn, seed) op = GaussianRNG(seed=seed, mean=3.0, std=1.0, dtype="float32", handle=h) (output, ) = apply(op, shape) delete_rng_handle(h) assert np.fabs(output.numpy().mean() - 3.0) < 1e-1 assert np.fabs(np.sqrt(output.numpy().var()) - 1.0) < 1e-1 assert str(output.device) == str(cn) assert output.dtype == np.float32
def add_loader(expr): args = list(expr.args) if not isinstance(args[1], TensorNode): args[1] = Tensor(args[1]) node = Constant(args[1], "const").outputs[0] astype_expr = CallMethod(node, "astype") oup = TensorNode( astype_expr, shape=node.shape, dtype=node.dtype, qparams=node.qparams, ) astype_expr.set_args_kwargs(node, expr.inputs[0].dtype) astype_expr.return_val = (oup, ) add_expr = CallMethod(oup, "__add__") add_expr.set_args_kwargs(oup, oup) oup1 = TensorNode( add_expr, shape=oup.shape, dtype=oup.dtype, qparams=node.qparams, ) add_expr.return_val = oup1 args[1] = oup1 expr.set_args_kwargs(*args)
def test_zero_dim(): a = Tensor(1) a_np = np.array(1, dtype=np.int32) np.testing.assert_equal(a, a_np) if use_symbolic_shape(): np.testing.assert_equal(a.shape, np.array(a_np.shape)) else: np.testing.assert_equal(a.shape, a_np.shape)
def test_syncbn1d(): nr_chan = 8 data_shape = (3, nr_chan, 4) momentum = 0.9 bn = SyncBatchNorm(nr_chan, momentum=momentum) running_mean = np.zeros((1, nr_chan, 1), dtype=np.float32) running_var = np.ones((1, nr_chan, 1), dtype=np.float32) for i in range(3): xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True) xv_transposed = np.transpose(xv, [0, 2, 1]).reshape( (data_shape[0] * data_shape[2], nr_chan)) var_biased = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1)) sd = np.sqrt(var_biased + bn.eps) var_unbiased = np.var(xv_transposed, axis=0, ddof=1).reshape( (1, nr_chan, 1)) running_mean = running_mean * momentum + mean * (1 - momentum) running_var = running_var * momentum + var_unbiased * (1 - momentum) yv = bn(Tensor(xv)) yv_expect = (xv - mean) / sd _assert_allclose(yv.numpy(), yv_expect) _assert_allclose(bn.running_mean.numpy().reshape(-1), running_mean.reshape(-1)) _assert_allclose(bn.running_var.numpy().reshape(-1), running_var.reshape(-1)) # test set 'training' flag to False mean_backup = bn.running_mean.numpy() var_backup = bn.running_var.numpy() bn.training = False xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) data = Tensor(xv) yv1 = bn(data) yv2 = bn(data) np.testing.assert_equal(yv1.numpy(), yv2.numpy()) np.testing.assert_equal(mean_backup, bn.running_mean.numpy()) np.testing.assert_equal(var_backup, bn.running_var.numpy()) yv_expect = (xv - running_mean) / np.sqrt(running_var + bn.eps) _assert_allclose(yv1.numpy(), yv_expect)
def test_training_converge(test_traced_module): net = XORNet() if test_traced_module: inp = Tensor(np.random.random((14, 2))) net = trace_module(net, inp) opt = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) gm = ad.GradManager().attach(net.parameters()) @trace(symbolic=False) def train(data, label): with gm: pred = net(data) loss = F.nn.cross_entropy(pred, label) gm.backward(loss) optim.clip_grad_norm(net.parameters(), max_norm=0.2, ord=2.0) return loss def infer(data): return net(data) train_dataset = minibatch_generator() losses = [] for data, label in itertools.islice(train_dataset, 2000): data = Tensor(data, dtype=np.float32) label = Tensor(label, dtype=np.int32) opt.clear_grad() loss = train(data, label) optim.clip_grad_value(net.parameters(), lower=-0.1, upper=0.1) opt.step() losses.append(loss.numpy()) assert (np.mean(losses[-100:]) < 0.1), "Final training Loss must be low enough, get {}".format( np.mean(losses[-100:])) ngrid = 10 x = np.linspace(-1.0, 1.0, ngrid) xx, yy = np.meshgrid(x, x) xx = xx.reshape((ngrid * ngrid, 1)) yy = yy.reshape((ngrid * ngrid, 1)) data = mge.tensor(np.concatenate((xx, yy), axis=1).astype(np.float32)) pred = infer(data) precision = calculate_precision(data.numpy(), pred.numpy()) assert precision == 1.0, "Test precision must be high enough, get {}".format( precision)
def worker(data, yv_expect, running_mean, running_var): rank = dist.get_rank() bn = SyncBatchNorm(nr_chan, momentum=momentum, eps=eps) for i in range(steps): yv = bn(Tensor(data[rank][i])) _assert_allclose(yv.numpy(), yv_expect[rank]) _assert_allclose(bn.running_mean.numpy(), running_mean) _assert_allclose(bn.running_var.numpy(), running_var)
def worker(): rank = dist.get_rank() if rank == 0: # remote send x = Tensor(val, device="gpu0") remote_send(x, 1) sync() else: # remote recv y = remote_recv(0, val.shape, val.dtype) assert y.device == "gpu1" np.testing.assert_almost_equal(val, y.numpy())
def worker(rank, data, yv_expect, running_mean, running_var): if mge.get_device_count("gpu") < nr_ranks: return dist.init_process_group("localhost", port, nr_ranks, rank, rank) bn = SyncBatchNorm(nr_chan, momentum=momentum, eps=eps) for i in range(steps): yv = bn(Tensor(data[i])) _assert_allclose(yv.numpy(), yv_expect) _assert_allclose(bn.running_mean.numpy(), running_mean) _assert_allclose(bn.running_var.numpy(), running_var)
def test_trace_module_2(): class Model(M.Module): def __init__(self): super().__init__() def forward(self, x): out = x.shape out = apply(builtin.Elemwise(mode="ADD"), out, Tensor(1)) return out traced_model = trace_module(Model(), Tensor(([1,]))) assert isinstance(traced_model.graph._exprs[0], Apply) and isinstance( traced_model.graph._exprs[0].opdef, builtin.GetVarShape ) assert isinstance(traced_model.graph._exprs[1], Constant) assert isinstance(traced_model.graph._exprs[2], Apply) and isinstance( traced_model.graph._exprs[2].opdef, builtin.Elemwise ) assert int(traced_model(Tensor([1, 2]))[0]) == 3
def test_apply_easy_quant(): qat_net = init_qat_net() data = Tensor(np.random.rand(2, 3, 3, 3), dtype=np.float32) eq_net = reset_qconfig(qat_net, passive_qconfig, inplace=False) apply_easy_quant(eq_net, data, 0.9, 1.1, 10) assert isinstance(eq_net.quant.act_observer, PassiveObserver) assert isinstance(eq_net.linear[0].weight_observer, PassiveObserver) assert isinstance(eq_net.linear[0].act_observer, PassiveObserver) assert isinstance(eq_net.linear[1].weight_observer, PassiveObserver) assert isinstance(eq_net.linear[1].act_observer, PassiveObserver) assert eq_net.dequant.act_observer is None
def test_functional_loader(): class MyModule2(Module): def forward(self, x, y): return F.conv2d(x, y) m = MyModule2() x = Tensor(np.random.random((1, 3, 32, 32))) y = Tensor(np.random.random((3, 3, 3, 3))) traced_module = trace_module(m, x, y) orig_loader_dict = S.FUNCTIONAL_LOADER S.FUNCTIONAL_LOADER = {} @register_functional_loader(("megengine.functional.nn", "conv2d")) def conv2df_loader(expr): # expr.func = ("megengine.functional.nn","conv2d") kwargs = expr.kwargs orig_weight = expr.named_args["weight"] astype_expr = CallMethod(orig_weight, "astype") oup = TensorNode( astype_expr, shape=orig_weight.shape, dtype=orig_weight.dtype, qparams=orig_weight.qparams, ) astype_expr.set_args_kwargs(orig_weight, expr.named_args["inp"].dtype) astype_expr.return_val = (oup, ) expr.set_arg("weight", oup) obj = pickle.dumps(traced_module) new_module = pickle.loads(obj) _check_expr_users(new_module) _check_id(new_module) result = new_module(x, y) gt = m(x, y) assert (isinstance(new_module.graph._exprs[0], CallMethod) and len(new_module.graph._exprs) == 2) np.testing.assert_equal(result.numpy(), gt.numpy()) S.FUNCTIONAL_LOADER = orig_loader_dict
def build_observered_net(net: M.Module, observer_cls): qat_net = Q.quantize_qat( net, qconfig=get_observer_config(observer_cls), mapping={MyConvBnRelu2d: MyQATConvBnRelu2d}, ) Q.enable_observer(qat_net) inp = Tensor(np.random.random(size=(5, 3, 32, 32))) qat_net.eval() qat_net(inp) Q.disable_observer(qat_net) return qat_net
def test_dump_model(): data_shape = (2, 28) data = Tensor(np.random.random(data_shape)) mlp = MLP() pred = mlp(data) f = tempfile.NamedTemporaryFile(delete=False) f_name = f.name try: mge.dump(pred, f_name) finally: f.close() os.unlink(f_name)
def test_opdef_loader(): class MyModule1(Module): def forward(self, x, y): op = Elemwise("ADD") return apply(op, x, y)[0] m = MyModule1() x = Tensor(np.ones((20))) y = Tensor(np.ones((20))) traced_module = trace_module(m, x, y) orig_loader_dict = S.OPDEF_LOADER S.OPDEF_LOADER = {} @register_opdef_loader(Elemwise) def add_opdef_loader(expr): if expr.opdef_state["mode"] == "ADD": expr.opdef_state["mode"] = "MUL" node = expr.inputs[1] astype_expr = CallMethod(node, "astype") oup = TensorNode( astype_expr, shape=node.shape, dtype=expr.inputs[0].dtype, qparams=node.qparams, ) astype_expr.set_args_kwargs(node, expr.inputs[0].dtype) astype_expr.return_val = (oup, ) expr.inputs[1] = oup obj = pickle.dumps(traced_module) new_module = pickle.loads(obj) _check_id(new_module) _check_expr_users(new_module) _check_name(new_module.flatten()) assert (isinstance(new_module.graph._exprs[0], CallMethod) and new_module.graph._exprs[1].opdef.mode == "MUL" and len(new_module.graph._exprs) == 2) result = new_module(x, y) np.testing.assert_equal(result.numpy(), x.numpy()) S.OPDEF_LOADER = orig_loader_dict
def test_PoissonRNG(): m1 = RNG(seed=111, device="xpu0") m2 = RNG(seed=111, device="xpu1") m3 = RNG(seed=222, device="xpu0") lam = Tensor([[2, 3, 4], [9, 10, 11]], dtype=np.float32) out1 = m1.poisson(lam.to("xpu0"), size=(100, )) out2 = m2.poisson(lam.to("xpu1"), size=(100, )) out3 = m3.poisson(lam.to("xpu0"), size=(100, )) np.testing.assert_allclose(out1.numpy(), out2.numpy(), atol=1e-6) assert out1.device == "xpu0" and out2.device == "xpu1" assert not (out1.numpy() == out3.numpy()).all() out = m1.poisson(lam.to("xpu0"), size=(20, 30)) out_shp = out.shape expected_shape = (20, 30) + lam._tuple_shape if isinstance(out_shp, tuple): assert out_shp == expected_shape else: assert all(out.shape.numpy() == np.array(expected_shape)) lam = lam.numpy() assert (np.abs(out.mean(axis=(0, 1)).numpy() - lam) / np.sqrt(lam)).mean() < 0.1 assert np.abs(np.std(out.numpy(), axis=(0, 1)) - np.sqrt(lam)).mean() < 0.1
def worker(rank): if mge.get_device_count("gpu") < world_size: return if rank == 0: # remote send dist.init_process_group("localhost", port, world_size, rank, rank) x = Tensor(val, device="gpu0") y = remote_send(x, 1) assert y.numpy()[0] == 0 else: # remote recv dist.init_process_group("localhost", port, world_size, rank, rank) y = remote_recv(0, val.shape, val.dtype) assert y.device == "gpu1" np.testing.assert_almost_equal(val, y.numpy())
def test_shared_module(): class MyModule(M.Module): def __init__(self): super().__init__() self.a = M.Elemwise("ADD") self.b = self.a def forward(self, x, y): z = self.a(x, y) z = self.b(z, y) return z x = Tensor(1) y = Tensor(2) m = MyModule() tm = trace_module(m, x, y) obj = pickle.dumps(tm) load_tm = pickle.loads(obj) _check_expr_users(load_tm) _check_name(load_tm.flatten()) _check_id(load_tm) assert load_tm.a is load_tm.b