def test_single_input(): data_shape = (9, 2, 6) av = np.random.random(data_shape).astype(np.float32) class MulFunc(Function): def forward(self, a): self.a = a return a * 10 def backward(self, grad_o): return grad_o * 10 class Simple(Module): def __init__(self, a): super().__init__() self.a = Parameter(a, dtype=np.float32) self.layer1 = MulFunc() def forward(self): x = self.layer1(self.a) return x net = Simple(av) gm = ad.GradManager().attach(net.parameters()) opt = optimizer.SGD(net.parameters(), lr=1.0) opt.clear_grad() with gm: loss = net() gm.backward(loss.sum()) opt.step() np.testing.assert_almost_equal(loss.numpy(), (av * 10)) np.testing.assert_almost_equal(net.a.numpy(), (av - 10))
def worker(max_err): net = MnistNet(has_bn=True) net.load_state_dict(checkpoint["net_init"]) lr = checkpoint["sgd_lr"] opt = SGD(net.parameters(), lr=lr) gm = ad.GradManager().attach( net.parameters(), callbacks=[dist.make_allreduce_cb("MEAN", dist.WORLD)] ) # use same data and label for all gpu's # such that the result does not depend on number of gpu data_train = Tensor(data) label_train = Tensor(label) loss = train(data_train, label_train, net, opt, gm) np.testing.assert_allclose(loss.numpy(), checkpoint["loss"], atol=max_err) if dist.get_rank(): return for param, param_ref in zip( net.state_dict().items(), checkpoint["net_updated"].items() ): assert param[0] == param_ref[0] if "bn" in param[0]: ref = param_ref[1].reshape(param[1].shape) np.testing.assert_allclose(param[1], ref, atol=max_err) else: np.testing.assert_allclose(param[1], param_ref[1], atol=max_err)
def update_model(model_path): """ Update the dumped model with test cases for new reference values. The model with pre-trained weights is trained for one iter with the test data attached. The loss and updated net state dict is dumped. .. code-block:: python from test_correctness import update_model update_model('mnist_model_with_test.mge') # for gpu update_model('mnist_model_with_test_cpu.mge') # for cpu """ net = MnistNet(has_bn=True) checkpoint = mge.load(model_path) net.load_state_dict(checkpoint["net_init"]) lr = checkpoint["sgd_lr"] opt = SGD(net.parameters(), lr=lr) gm = ad.GradManager().attach(net.parameters()) data = Tensor(checkpoint["data"], dtype=np.float32) label = Tensor(checkpoint["label"], dtype=np.int32) opt.clear_grad() loss = train(data, label, net, opt, gm) opt.step() xpu_name = get_xpu_name() checkpoint.update( {"net_updated": net.state_dict(), "loss": loss.numpy(), "xpu": xpu_name} ) mge.save(checkpoint, model_path)
def test_none_in_out_grad(): class Test(Function): def forward(self, a, b): return a, b def backward(self, grad_a, grad_b): assert grad_b is None return (grad_a, None) class Simple(Module): def __init__(self, a, b): super().__init__() self.a = Parameter(a, dtype=np.float32) self.b = Parameter(b, dtype=np.float32) self.layer = Test() def forward(self): aa, bb = self.layer(self.a, self.b) return aa, bb a = tensor(np.array([1.0], dtype=np.float32)) b = tensor(np.array([2.0], dtype=np.float32)) net = Simple(a, b) optim = optimizer.SGD(net.parameters(), lr=1.0) gm = ad.GradManager().attach(net.parameters()) optim.clear_grad() with gm: loss, _ = net() gm.backward(loss) optim.step() np.testing.assert_almost_equal(net.a.numpy(), np.array([1.0 - 1.0], dtype=np.float32)) np.testing.assert_almost_equal(net.b.numpy(), np.array([2.0 - 0.0], dtype=np.float32))
def run_syncbn(trace_mode): x = F.ones([2, 16, 4, 4], dtype="float32") net = Sequential( Conv2d(16, 16, 1), SyncBatchNorm(16), Conv2d(16, 16, 1), SyncBatchNorm(16), ) gm = ad.GradManager().attach( net.parameters(), callbacks=dist.make_allreduce_cb("MEAN") ) opt = optimizer.SGD(net.parameters(), 1e-3) def train_func(x): with gm: y = net(x) loss = y.mean() gm.backward(loss) opt.step().clear_grad() return loss if trace_mode is not None: train_func = trace(train_func, symbolic=trace_mode) for _ in range(3): loss = train_func(x) loss.numpy()
def test_sgd_momentum(): net = Simple() optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) optim.clear_grad() gm = ad.GradManager().attach(net.parameters()) data = tensor([2.34]) # do a step of train with gm: loss = net(data) gm.backward(loss) optim.step() np.testing.assert_almost_equal( optim._state[net.a]["momentum_buffer"].numpy(), 2.34) # do a step of infer loss = net(data) np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5) np.testing.assert_almost_equal( optim._state[net.a]["momentum_buffer"].numpy(), 2.34) # do a step of train optim.clear_grad() with gm: loss = net(data) gm.backward(loss) optim.step() np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5) np.testing.assert_almost_equal( optim._state[net.a]["momentum_buffer"].numpy(), 0.9 * 2.34 + 2.34)
def test_clear_grad(): class StopGradient(Function): def forward(self, a): return a def backward(self, *_): return None class Simple(Module): def __init__(self, a): super().__init__() self.a = Parameter(a, dtype=np.float32) self.layer = StopGradient() def forward(self): b = self.a * 3.0 c = self.a * 4.0 return self.layer(b) + c a = tensor(np.array([1.0], dtype=np.float32)) net = Simple(a) optim = optimizer.SGD(net.parameters(), lr=1.0) gm = ad.GradManager().attach(net.parameters()) optim.clear_grad() with gm: loss = net() gm.backward(loss.sum()) optim.step() np.testing.assert_almost_equal( net.a.numpy(), np.array([1.0 - 4.0], dtype=np.float32), )
def run_train( model_path, use_jit, use_symbolic, sublinear_memory_config=None, max_err=None, use_adaptive_pooling=False, ): """ Load the model with test cases and run the training for one iter. The loss and updated weights are compared with reference value to verify the correctness. Dump a new file with updated result by calling update_model if you think the test fails due to numerical rounding errors instead of bugs. Please think twice before you do so. """ net = MnistNet(has_bn=True, use_adaptive_pooling=use_adaptive_pooling) checkpoint = mge.load(model_path) net.load_state_dict(checkpoint["net_init"]) lr = checkpoint["sgd_lr"] opt = SGD(net.parameters(), lr=lr) gm = ad.GradManager().attach(net.parameters()) data = Tensor(checkpoint["data"], dtype=np.float32) label = Tensor(checkpoint["label"], dtype=np.int32) if max_err is None: max_err = 1e-5 train_func = train if use_jit: train_func = jit.trace( train_func, symbolic=use_symbolic, sublinear_memory_config=sublinear_memory_config, ) opt.clear_grad() loss = train_func(data, label, net, opt, gm) opt.step() np.testing.assert_allclose(loss.numpy(), checkpoint["loss"], atol=max_err) for param, param_ref in zip( net.state_dict().items(), checkpoint["net_updated"].items() ): assert param[0] == param_ref[0] if "bn" in param[0]: ref = param_ref[1].reshape(param[1].shape) np.testing.assert_allclose(param[1], ref, atol=max_err) else: np.testing.assert_allclose(param[1], param_ref[1], atol=max_err)
def test_bn_no_track_stat(): nchannel = 3 m = BatchNorm2d(nchannel, track_running_stats=False) gm = ad.GradManager().attach(m.parameters()) optim = optimizer.SGD(m.parameters(), lr=1.0) optim.clear_grad() data = np.random.random((6, nchannel, 2, 2)).astype("float32") with gm: loss = m(data).sum() gm.backward(loss) optim.step()
def test_hello_world(): net = Simple() optim = optimizer.SGD(net.parameters(), lr=1.0) optim.clear_grad() gm = ad.GradManager().attach(net.parameters()) data = tensor([2.34]) with gm: loss = net(data) gm.backward(loss) optim.step() np.testing.assert_almost_equal(net.a.numpy(), np.array([1.23 - 2.34]).astype(np.float32))
def test_clip_grad_norm(): net = Net() x = mge.tensor(np.random.randn(10, 3, 224, 224)) gm = ad.GradManager().attach(net.parameters()) opt = optim.SGD(net.parameters(), 1e-3, momentum=0.9) with gm: loss = net(x).sum() gm.backward(loss) save_grad_value(net) max_norm = 1.0 original_norm = optim.clip_grad_norm(net.parameters(), max_norm=max_norm, ord=2) scale = max_norm / original_norm for param in net.parameters(): np.testing.assert_almost_equal(param.grad.numpy(), param.grad_backup * scale) opt.step().clear_grad()
def train_pipeline(): m = ResNet18Pipeline() x = F.ones([32, 3, 224, 224]) label = F.zeros([ 32, ], dtype="int32") gm = ad.GradManager().attach(m.parameters()) opt = optim.SGD(m.parameters(), 1e-3, 0.9, 1e-4) for _ in range(2): m(x) loss = m.backward(label, gm) opt.step().clear_grad() print(loss)
def test_training_converge_with_swap_and_drop(): _set_swap_flag(True) _set_drop_flag(True) old_buffer_length = get_option("buffer_length") set_option("buffer_length", 0) net = XORNet() opt = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) gm = ad.GradManager().attach(net.parameters()) def train(data, label): with gm: pred = net(data) loss = F.nn.cross_entropy(pred, label) gm.backward(loss) return loss def infer(data): return net(data) train_dataset = minibatch_generator() losses = [] for data, label in itertools.islice(train_dataset, 2000): data = Tensor(data, dtype=np.float32) label = Tensor(label, dtype=np.int32) opt.clear_grad() loss = train(data, label) opt.step() losses.append(loss.numpy()) assert np.mean( losses[-100:]) < 0.1, "Final training Loss must be low enough" ngrid = 10 x = np.linspace(-1.0, 1.0, ngrid) xx, yy = np.meshgrid(x, x) xx = xx.reshape((ngrid * ngrid, 1)) yy = yy.reshape((ngrid * ngrid, 1)) data = np.concatenate((xx, yy), axis=1).astype(np.float32) pred = infer(Tensor(data)).numpy() precision = calculate_precision(data, pred) assert precision == 1.0, "Test precision must be high enough, get {}".format( precision) _set_swap_flag(False) _set_drop_flag(False) set_option("buffer_length", old_buffer_length)
def test_detach(): net = Simple() optim = optimizer.SGD(net.parameters(), lr=1.0) optim.clear_grad() gm = ad.GradManager().attach(net.parameters()) dshape = (10, 10) data = tensor(np.ones(dshape).astype(np.float32)) with gm: loss = net(data).sum() gm.backward(loss) optim.step() np.testing.assert_equal(net.a.numpy(), np.array([1.0]).astype(np.float32)) np.testing.assert_equal(net.b.numpy(), np.array([1.0 - 10.0 * 10.0]).astype(np.float32))
def worker(): net = Simple() opt = SGD(net.parameters(), lr=0.1) gm = ad.GradManager().attach( net.parameters(), callbacks=[dist.make_allreduce_cb("MEAN", dist.WORLD)]) opt.clear_grad() with gm: x = tensor(data) loss = net(x) loss = loss.sum() gm.backward(loss) for p in net.params: np.testing.assert_equal(p.grad.numpy(), 1)
def test_sgd_momentum_trace(): origin_inplace = os.getenv("MEGENGINE_INPLACE_UPDATE") symbolic = (True, False) inplace = (0, 1) for symbolic, inplace in itertools.product(symbolic, inplace): os.environ["MEGENGINE_INPLACE_UPDATE"] = str(inplace) @trace(symbolic=symbolic) def train_func(data, *, model=None, optim=None, gm=None): optim.clear_grad() with gm: loss = net(data) gm.backward(loss) optim.step() return loss @trace(symbolic=symbolic) def eval_func(data, *, model=None, optim=None, gm=None): loss = net(data) return loss net = Simple() optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) gm = ad.GradManager().attach(net.parameters()) data = tensor([2.34]) train_func(data, model=net, optim=optim, gm=gm) np.testing.assert_almost_equal( optim._state[net.a]["momentum_buffer"].numpy(), 2.34) # do 3 steps of infer for _ in range(3): loss = eval_func(data) np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5) np.testing.assert_almost_equal( optim._state[net.a]["momentum_buffer"].numpy(), 2.34) # do a step of train train_func(data, model=net, optim=optim, gm=gm) np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5) np.testing.assert_almost_equal( optim._state[net.a]["momentum_buffer"].numpy(), 0.9 * 2.34 + 2.34, 5) if origin_inplace: os.environ["MEGENGINE_INPLACE_UPDATE"] = origin_inplace else: del os.environ["MEGENGINE_INPLACE_UPDATE"]
def test_advance_indexing_with_subtensor(): net = Simple2() gm = ad.GradManager().attach(net.parameters()) optim = optimizer.SGD(net.parameters(), lr=1.0) optim.clear_grad() dshape = (2, 3, 4, 3, 4, 2) raw_data = np.arange(576).reshape(dshape).astype(np.float32) data = tensor(raw_data) answer = 1.0 - raw_data[1, ..., :, 0:4:2, 0:2].sum() with gm: loss = net(data).sum() gm.backward(loss) optim.step() np.testing.assert_almost_equal(net.a.numpy(), np.array([answer]).astype(np.float32))
def test_save_load(): net = Simple() optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) optim.clear_grad() gm = ad.GradManager().attach(net.parameters()) data = tensor([2.34]) with gm: loss = net(data) gm.backward(loss) optim.step() model_name = "simple.pkl" print("save to {}".format(model_name)) mge.save( { "name": "simple", "state_dict": net.state_dict(), "opt_state": optim.state_dict(), }, model_name, ) # Load param to cpu checkpoint = mge.load(model_name, map_location="cpu0") device_save = mge.get_default_device() mge.set_default_device("cpu0") net = Simple() net.load_state_dict(checkpoint["state_dict"]) optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) optim.load_state_dict(checkpoint["opt_state"]) print("load done") os.remove("simple.pkl") with gm: loss = net([1.23]) gm.backward(loss) optim.step() # Restore device mge.set_default_device(device_save)
def run_frozen_bn(BNModule, is_training, use_trace, use_symbolic): nchannel = 3 m = BNModule(nchannel, freeze=True) if is_training: m.train() else: m.eval() var = 4.0 bias = 1.0 shape = (1, nchannel, 1, 1) m.running_var[...] = var * F.ones(shape) m.running_mean[...] = bias * F.ones(shape) saved_var = m.running_var.numpy() saved_mean = m.running_mean.numpy() saved_wt = m.weight.numpy() saved_bias = m.bias.numpy() gm = ad.GradManager().attach(m.parameters()) optim = optimizer.SGD(m.parameters(), lr=1.0) optim.clear_grad() data = np.random.random((6, nchannel, 2, 2)).astype("float32") def train_fn(d): for _ in range(3): with gm: loss = m(d).mean() gm.backward(loss) optim.step() return loss if use_trace: train_fn = trace(train_fn, symbolic=use_symbolic) for _ in range(3): loss = train_fn(megengine.tensor(data)) if not is_training: np.testing.assert_equal(m.running_var.numpy(), saved_var) np.testing.assert_equal(m.running_mean.numpy(), saved_mean) np.testing.assert_almost_equal( loss.numpy(), ((data - bias) / np.sqrt(var)).mean(), 5 ) np.testing.assert_equal(m.weight.numpy(), saved_wt) np.testing.assert_equal(m.bias.numpy(), saved_bias)
def test_training_converge(test_traced_module): net = XORNet() if test_traced_module: inp = Tensor(np.random.random((14, 2))) net = trace_module(net, inp) opt = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) gm = ad.GradManager().attach(net.parameters()) @trace(symbolic=False) def train(data, label): with gm: pred = net(data) loss = F.nn.cross_entropy(pred, label) gm.backward(loss) optim.clip_grad_norm(net.parameters(), max_norm=0.2, ord=2.0) return loss def infer(data): return net(data) train_dataset = minibatch_generator() losses = [] for data, label in itertools.islice(train_dataset, 2000): data = Tensor(data, dtype=np.float32) label = Tensor(label, dtype=np.int32) opt.clear_grad() loss = train(data, label) optim.clip_grad_value(net.parameters(), lower=-0.1, upper=0.1) opt.step() losses.append(loss.numpy()) assert (np.mean(losses[-100:]) < 0.1), "Final training Loss must be low enough, get {}".format( np.mean(losses[-100:])) ngrid = 10 x = np.linspace(-1.0, 1.0, ngrid) xx, yy = np.meshgrid(x, x) xx = xx.reshape((ngrid * ngrid, 1)) yy = yy.reshape((ngrid * ngrid, 1)) data = mge.tensor(np.concatenate((xx, yy), axis=1).astype(np.float32)) pred = infer(data) precision = calculate_precision(data.numpy(), pred.numpy()) assert precision == 1.0, "Test precision must be high enough, get {}".format( precision)
def test_load_state_dict_no_cache(monkeypatch): with monkeypatch.context() as mk: mk.setenv("MEGENGINE_INPLACE_UPDATE", "1") net = Net() optim = optimizer.SGD(net.parameters(), lr=0.1) gm = ad.GradManager().attach(net.parameters()) state = { "fc.weight": np.array([[0]], dtype=np.float32), "fc.bias": np.array([0.0], dtype=np.float32), } net.load_state_dict(state) images = mge.tensor([[0]], dtype=np.float32) with gm: loss = net(images) gm.backward(loss) optim.step() optim.clear_grad()
def test_sgd_momentum(monkeypatch, trace_mode, inplace_mode): with monkeypatch.context() as mk: mk.setenv("MEGENGINE_INPLACE_UPDATE", str(int(inplace_mode))) def train_func(data, *, model=None, optim=None, gm=None): optim.clear_grad() with gm: loss = net(data) gm.backward(loss) optim.step() return loss if trace_mode is not None: train_func = trace(symbolic=trace_mode)(train_func) def eval_func(data, *, model=None, optim=None, gm=None): loss = net(data) return loss if trace_mode is not None: eval_func = trace(symbolic=trace_mode)(eval_func) net = Simple() optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) gm = ad.GradManager().attach(net.parameters()) data = tensor([2.34]) train_func(data, model=net, optim=optim, gm=gm) np.testing.assert_almost_equal( optim._state[net.a]["momentum_buffer"].numpy(), 2.34) # do 3 steps of infer for _ in range(3): loss = eval_func(data) np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5) np.testing.assert_almost_equal( optim._state[net.a]["momentum_buffer"].numpy(), 2.34) # do a step of train train_func(data, model=net, optim=optim, gm=gm) np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5) np.testing.assert_almost_equal( optim._state[net.a]["momentum_buffer"].numpy(), 0.9 * 2.34 + 2.34, 5)
def test_clip_grad_value(): net = Net() x = np.random.randn(10, 3, 224, 224).astype("float32") gm = ad.GradManager().attach(net.parameters()) opt = optim.SGD(net.parameters(), 1e-3, momentum=0.9) with gm: y = net(mge.tensor(x)) y = y.mean() gm.backward(y) save_grad_value(net) max_val = 5 min_val = -2 optim.clip_grad_value(net.parameters(), lower=min_val, upper=max_val) for param in net.parameters(): np.testing.assert_almost_equal( param.grad.numpy(), np.maximum(np.minimum(param.grad_backup, max_val), min_val), ) opt.step().clear_grad()
def test_advance_indexing(): net = Simple() gm = ad.GradManager().attach(net.parameters()) optim = optimizer.SGD(net.parameters(), lr=1.0) optim.clear_grad() dshape = (10, 10) raw_data = np.arange(100).reshape(dshape).astype(np.float32) raw_mask = (np.random.random_sample(dshape) > 0.5).astype(np.bool_) data = tensor(raw_data) mask = tensor(raw_mask) answer = 1.0 - raw_data[raw_mask].sum() with gm: loss = net(data, mask).sum() gm.backward(loss) optim.step() np.testing.assert_almost_equal(net.a.numpy(), np.array([answer]).astype(np.float32))
def train(): m = ResNet18MP() x = F.ones([32, 3, 224, 224]) label = F.zeros([ 32, ], dtype="int32") gm = ad.GradManager().attach(m.parameters()) opt = optim.SGD(m.parameters(), 1e-3, 0.9, 1e-4) for _ in range(2): with gm: y = m(x) if dist.get_rank() == 3: loss = F.nn.cross_entropy(y, label) else: loss = None gm.backward(loss) opt.step().clear_grad() print(loss)
def worker(): net = Simple(param_shape) opt = SGD(net.parameters(), lr=0.1) allreduce_cb = dist.make_allreduce_cb("MEAN", dist.WORLD) if threshold is not None: allreduce_cb._param_pack_thd = threshold gm = ad.GradManager().attach(net.parameters(), callbacks=[allreduce_cb]) def run(): opt.clear_grad() with gm: x = tensor(data) loss = net(x) loss = loss.sum() gm.backward(loss) for i in range(n_iters): run() for p in net.params: np.testing.assert_equal(p.grad.numpy(), np.ones_like(p.grad.numpy()))
def test_sgd_momentum_trace(): for symbolic in (True, False): @trace(symbolic=symbolic) def train_func(data, *, model=None, optim=None, gm=None): optim.clear_grad() with gm: loss = net(data) gm.backward(loss) optim.step() return loss @trace(symbolic=symbolic) def eval_func(data, *, model=None, optim=None, gm=None): loss = net(data) return loss net = Simple() optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) gm = ad.GradManager().attach(net.parameters()) data = tensor([2.34]) train_func(data, model=net, optim=optim, gm=gm) np.testing.assert_almost_equal( optim._state[net.a]["momentum_buffer"].numpy(), 2.34) # do 3 steps of infer for _ in range(3): loss = eval_func(data) np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5) np.testing.assert_almost_equal( optim._state[net.a]["momentum_buffer"].numpy(), 2.34) # do a step of train train_func(data, model=net, optim=optim, gm=gm) np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5) np.testing.assert_almost_equal( optim._state[net.a]["momentum_buffer"].numpy(), 0.9 * 2.34 + 2.34)
def test_multi_output(): data_shape = (9, 2, 6) av = np.random.random(data_shape).astype(np.float32) bv = np.random.random(data_shape).astype(np.float32) class MulFunc(Function): def forward(self, a, b): self.a = a self.b = b return a * b, a + b def backward(self, grad_1, grad_2): return grad_1 * (self.b + 1), grad_2 * (self.a + 1) class Simple(Module): def __init__(self, a, b): super().__init__() self.a = Parameter(a, dtype=np.float32) self.b = Parameter(b, dtype=np.float32) self.layer1 = MulFunc() def forward(self): x, y = self.layer1(self.a, self.b) return x + y net = Simple(av, bv) gm = ad.GradManager().attach(net.parameters()) opt = optimizer.SGD(net.parameters(), lr=1.0) opt.clear_grad() with gm: loss = net() gm.backward(loss.sum()) opt.step() np.testing.assert_almost_equal(loss.numpy(), (av * bv + av + bv), decimal=6) np.testing.assert_almost_equal(net.a.numpy(), (av - bv - 1), decimal=6) np.testing.assert_almost_equal(net.b.numpy(), (bv - av - 1), decimal=6)
def test_bn_no_track_stat2(): nchannel = 3 m = BatchNorm2d(nchannel) # Init with track_running_stat = True m.track_running_stats = False # m.running_var and m.running_mean created during init time saved_var = m.running_var.numpy() assert saved_var is not None saved_mean = m.running_mean.numpy() assert saved_mean is not None gm = ad.GradManager().attach(m.parameters()) optim = optimizer.SGD(m.parameters(), lr=1.0) optim.clear_grad() data = np.random.random((6, nchannel, 2, 2)).astype("float32") with gm: loss = m(data).sum() gm.backward(loss) optim.step() np.testing.assert_equal(m.running_var.numpy(), saved_var) np.testing.assert_equal(m.running_mean.numpy(), saved_mean)
def test_ste(): class STE(Function): def forward(self, x): maxv, minv = x.max(), x.min() scale = F.maximum(maxv, -minv) / 127 return F.round(x / scale) * scale def backward(self, grad_y): return grad_y class Simple(Module): def __init__(self, a): super().__init__() self.a = Parameter(a, dtype=np.float32) self.layer1 = STE() def forward(self): x = self.layer1(self.a) x = (x * 2.0).sum() return x data_shape = (1, 9, 2, 6) av = np.random.random(data_shape).astype(np.float32) net = Simple(av) optim = optimizer.SGD(net.parameters(), lr=1.0) gm = ad.GradManager().attach(net.parameters()) optim.clear_grad() with gm: loss = net() gm.backward(loss.sum()) optim.step() np.testing.assert_almost_equal( net.a.numpy(), av - np.broadcast_to(np.array([2.0], dtype=np.float32), data_shape), )