示例#1
0
def test_single_input():
    data_shape = (9, 2, 6)
    av = np.random.random(data_shape).astype(np.float32)

    class MulFunc(Function):
        def forward(self, a):
            self.a = a
            return a * 10

        def backward(self, grad_o):
            return grad_o * 10

    class Simple(Module):
        def __init__(self, a):
            super().__init__()
            self.a = Parameter(a, dtype=np.float32)
            self.layer1 = MulFunc()

        def forward(self):
            x = self.layer1(self.a)
            return x

    net = Simple(av)
    gm = ad.GradManager().attach(net.parameters())
    opt = optimizer.SGD(net.parameters(), lr=1.0)

    opt.clear_grad()
    with gm:
        loss = net()
        gm.backward(loss.sum())
    opt.step()

    np.testing.assert_almost_equal(loss.numpy(), (av * 10))
    np.testing.assert_almost_equal(net.a.numpy(), (av - 10))
示例#2
0
    def worker(max_err):
        net = MnistNet(has_bn=True)
        net.load_state_dict(checkpoint["net_init"])
        lr = checkpoint["sgd_lr"]
        opt = SGD(net.parameters(), lr=lr)

        gm = ad.GradManager().attach(
            net.parameters(), callbacks=[dist.make_allreduce_cb("MEAN", dist.WORLD)]
        )

        # use same data and label for all gpu's
        # such that the result does not depend on number of gpu
        data_train = Tensor(data)
        label_train = Tensor(label)

        loss = train(data_train, label_train, net, opt, gm)

        np.testing.assert_allclose(loss.numpy(), checkpoint["loss"], atol=max_err)

        if dist.get_rank():
            return
        for param, param_ref in zip(
            net.state_dict().items(), checkpoint["net_updated"].items()
        ):
            assert param[0] == param_ref[0]
            if "bn" in param[0]:
                ref = param_ref[1].reshape(param[1].shape)
                np.testing.assert_allclose(param[1], ref, atol=max_err)
            else:
                np.testing.assert_allclose(param[1], param_ref[1], atol=max_err)
示例#3
0
def update_model(model_path):
    """
    Update the dumped model with test cases for new reference values.

    The model with pre-trained weights is trained for one iter with the test data attached.
    The loss and updated net state dict is dumped.

    .. code-block:: python

        from test_correctness import update_model
        update_model('mnist_model_with_test.mge') # for gpu
        update_model('mnist_model_with_test_cpu.mge') # for cpu

    """
    net = MnistNet(has_bn=True)
    checkpoint = mge.load(model_path)
    net.load_state_dict(checkpoint["net_init"])
    lr = checkpoint["sgd_lr"]
    opt = SGD(net.parameters(), lr=lr)
    gm = ad.GradManager().attach(net.parameters())

    data = Tensor(checkpoint["data"], dtype=np.float32)
    label = Tensor(checkpoint["label"], dtype=np.int32)

    opt.clear_grad()
    loss = train(data, label, net, opt, gm)
    opt.step()

    xpu_name = get_xpu_name()

    checkpoint.update(
        {"net_updated": net.state_dict(), "loss": loss.numpy(), "xpu": xpu_name}
    )
    mge.save(checkpoint, model_path)
示例#4
0
def test_none_in_out_grad():
    class Test(Function):
        def forward(self, a, b):
            return a, b

        def backward(self, grad_a, grad_b):
            assert grad_b is None
            return (grad_a, None)

    class Simple(Module):
        def __init__(self, a, b):
            super().__init__()
            self.a = Parameter(a, dtype=np.float32)
            self.b = Parameter(b, dtype=np.float32)
            self.layer = Test()

        def forward(self):
            aa, bb = self.layer(self.a, self.b)
            return aa, bb

    a = tensor(np.array([1.0], dtype=np.float32))
    b = tensor(np.array([2.0], dtype=np.float32))
    net = Simple(a, b)
    optim = optimizer.SGD(net.parameters(), lr=1.0)
    gm = ad.GradManager().attach(net.parameters())
    optim.clear_grad()
    with gm:
        loss, _ = net()
        gm.backward(loss)
    optim.step()

    np.testing.assert_almost_equal(net.a.numpy(),
                                   np.array([1.0 - 1.0], dtype=np.float32))
    np.testing.assert_almost_equal(net.b.numpy(),
                                   np.array([2.0 - 0.0], dtype=np.float32))
示例#5
0
def run_syncbn(trace_mode):
    x = F.ones([2, 16, 4, 4], dtype="float32")

    net = Sequential(
        Conv2d(16, 16, 1), SyncBatchNorm(16), Conv2d(16, 16, 1), SyncBatchNorm(16),
    )

    gm = ad.GradManager().attach(
        net.parameters(), callbacks=dist.make_allreduce_cb("MEAN")
    )
    opt = optimizer.SGD(net.parameters(), 1e-3)

    def train_func(x):
        with gm:
            y = net(x)
            loss = y.mean()
            gm.backward(loss)
            opt.step().clear_grad()
        return loss

    if trace_mode is not None:
        train_func = trace(train_func, symbolic=trace_mode)

    for _ in range(3):
        loss = train_func(x)
        loss.numpy()
示例#6
0
def test_sgd_momentum():
    net = Simple()

    optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
    optim.clear_grad()
    gm = ad.GradManager().attach(net.parameters())

    data = tensor([2.34])

    # do a step of train
    with gm:
        loss = net(data)
        gm.backward(loss)
    optim.step()

    np.testing.assert_almost_equal(
        optim._state[net.a]["momentum_buffer"].numpy(), 2.34)

    # do a step of infer
    loss = net(data)
    np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5)

    np.testing.assert_almost_equal(
        optim._state[net.a]["momentum_buffer"].numpy(), 2.34)

    # do a step of train
    optim.clear_grad()
    with gm:
        loss = net(data)
        gm.backward(loss)
    optim.step()

    np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5)
    np.testing.assert_almost_equal(
        optim._state[net.a]["momentum_buffer"].numpy(), 0.9 * 2.34 + 2.34)
示例#7
0
def test_clear_grad():
    class StopGradient(Function):
        def forward(self, a):
            return a

        def backward(self, *_):
            return None

    class Simple(Module):
        def __init__(self, a):
            super().__init__()
            self.a = Parameter(a, dtype=np.float32)
            self.layer = StopGradient()

        def forward(self):
            b = self.a * 3.0
            c = self.a * 4.0
            return self.layer(b) + c

    a = tensor(np.array([1.0], dtype=np.float32))
    net = Simple(a)
    optim = optimizer.SGD(net.parameters(), lr=1.0)
    gm = ad.GradManager().attach(net.parameters())
    optim.clear_grad()

    with gm:
        loss = net()
        gm.backward(loss.sum())
    optim.step()
    np.testing.assert_almost_equal(
        net.a.numpy(),
        np.array([1.0 - 4.0], dtype=np.float32),
    )
示例#8
0
def run_train(
    model_path,
    use_jit,
    use_symbolic,
    sublinear_memory_config=None,
    max_err=None,
    use_adaptive_pooling=False,
):

    """
    Load the model with test cases and run the training for one iter.
    The loss and updated weights are compared with reference value to verify the correctness.

    Dump a new file with updated result by calling update_model
    if you think the test fails due to numerical rounding errors instead of bugs.
    Please think twice before you do so.

    """
    net = MnistNet(has_bn=True, use_adaptive_pooling=use_adaptive_pooling)
    checkpoint = mge.load(model_path)
    net.load_state_dict(checkpoint["net_init"])
    lr = checkpoint["sgd_lr"]
    opt = SGD(net.parameters(), lr=lr)
    gm = ad.GradManager().attach(net.parameters())

    data = Tensor(checkpoint["data"], dtype=np.float32)
    label = Tensor(checkpoint["label"], dtype=np.int32)

    if max_err is None:
        max_err = 1e-5

    train_func = train
    if use_jit:
        train_func = jit.trace(
            train_func,
            symbolic=use_symbolic,
            sublinear_memory_config=sublinear_memory_config,
        )

    opt.clear_grad()
    loss = train_func(data, label, net, opt, gm)
    opt.step()

    np.testing.assert_allclose(loss.numpy(), checkpoint["loss"], atol=max_err)

    for param, param_ref in zip(
        net.state_dict().items(), checkpoint["net_updated"].items()
    ):
        assert param[0] == param_ref[0]
        if "bn" in param[0]:
            ref = param_ref[1].reshape(param[1].shape)
            np.testing.assert_allclose(param[1], ref, atol=max_err)
        else:
            np.testing.assert_allclose(param[1], param_ref[1], atol=max_err)
示例#9
0
def test_bn_no_track_stat():
    nchannel = 3
    m = BatchNorm2d(nchannel, track_running_stats=False)

    gm = ad.GradManager().attach(m.parameters())
    optim = optimizer.SGD(m.parameters(), lr=1.0)
    optim.clear_grad()

    data = np.random.random((6, nchannel, 2, 2)).astype("float32")
    with gm:
        loss = m(data).sum()
        gm.backward(loss)
    optim.step()
示例#10
0
def test_hello_world():
    net = Simple()

    optim = optimizer.SGD(net.parameters(), lr=1.0)
    optim.clear_grad()
    gm = ad.GradManager().attach(net.parameters())

    data = tensor([2.34])
    with gm:
        loss = net(data)
        gm.backward(loss)
    optim.step()
    np.testing.assert_almost_equal(net.a.numpy(),
                                   np.array([1.23 - 2.34]).astype(np.float32))
示例#11
0
def test_clip_grad_norm():
    net = Net()
    x = mge.tensor(np.random.randn(10, 3, 224, 224))
    gm = ad.GradManager().attach(net.parameters())
    opt = optim.SGD(net.parameters(), 1e-3, momentum=0.9)
    with gm:
        loss = net(x).sum()
        gm.backward(loss)
    save_grad_value(net)
    max_norm = 1.0
    original_norm = optim.clip_grad_norm(net.parameters(), max_norm=max_norm, ord=2)
    scale = max_norm / original_norm
    for param in net.parameters():
        np.testing.assert_almost_equal(param.grad.numpy(), param.grad_backup * scale)
    opt.step().clear_grad()
示例#12
0
def train_pipeline():
    m = ResNet18Pipeline()
    x = F.ones([32, 3, 224, 224])
    label = F.zeros([
        32,
    ], dtype="int32")

    gm = ad.GradManager().attach(m.parameters())
    opt = optim.SGD(m.parameters(), 1e-3, 0.9, 1e-4)

    for _ in range(2):
        m(x)
        loss = m.backward(label, gm)
        opt.step().clear_grad()
        print(loss)
def test_training_converge_with_swap_and_drop():
    _set_swap_flag(True)
    _set_drop_flag(True)
    old_buffer_length = get_option("buffer_length")
    set_option("buffer_length", 0)
    net = XORNet()
    opt = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    gm = ad.GradManager().attach(net.parameters())

    def train(data, label):
        with gm:
            pred = net(data)
            loss = F.nn.cross_entropy(pred, label)
            gm.backward(loss)
        return loss

    def infer(data):
        return net(data)

    train_dataset = minibatch_generator()
    losses = []

    for data, label in itertools.islice(train_dataset, 2000):
        data = Tensor(data, dtype=np.float32)
        label = Tensor(label, dtype=np.int32)
        opt.clear_grad()
        loss = train(data, label)
        opt.step()
        losses.append(loss.numpy())

    assert np.mean(
        losses[-100:]) < 0.1, "Final training Loss must be low enough"

    ngrid = 10
    x = np.linspace(-1.0, 1.0, ngrid)
    xx, yy = np.meshgrid(x, x)
    xx = xx.reshape((ngrid * ngrid, 1))
    yy = yy.reshape((ngrid * ngrid, 1))
    data = np.concatenate((xx, yy), axis=1).astype(np.float32)

    pred = infer(Tensor(data)).numpy()
    precision = calculate_precision(data, pred)
    assert precision == 1.0, "Test precision must be high enough, get {}".format(
        precision)

    _set_swap_flag(False)
    _set_drop_flag(False)
    set_option("buffer_length", old_buffer_length)
示例#14
0
def test_detach():
    net = Simple()

    optim = optimizer.SGD(net.parameters(), lr=1.0)
    optim.clear_grad()
    gm = ad.GradManager().attach(net.parameters())

    dshape = (10, 10)
    data = tensor(np.ones(dshape).astype(np.float32))
    with gm:
        loss = net(data).sum()
        gm.backward(loss)
    optim.step()
    np.testing.assert_equal(net.a.numpy(), np.array([1.0]).astype(np.float32))
    np.testing.assert_equal(net.b.numpy(),
                            np.array([1.0 - 10.0 * 10.0]).astype(np.float32))
示例#15
0
    def worker():
        net = Simple()
        opt = SGD(net.parameters(), lr=0.1)

        gm = ad.GradManager().attach(
            net.parameters(),
            callbacks=[dist.make_allreduce_cb("MEAN", dist.WORLD)])

        opt.clear_grad()
        with gm:
            x = tensor(data)
            loss = net(x)
            loss = loss.sum()
            gm.backward(loss)
        for p in net.params:
            np.testing.assert_equal(p.grad.numpy(), 1)
示例#16
0
def test_sgd_momentum_trace():
    origin_inplace = os.getenv("MEGENGINE_INPLACE_UPDATE")
    symbolic = (True, False)
    inplace = (0, 1)
    for symbolic, inplace in itertools.product(symbolic, inplace):
        os.environ["MEGENGINE_INPLACE_UPDATE"] = str(inplace)

        @trace(symbolic=symbolic)
        def train_func(data, *, model=None, optim=None, gm=None):
            optim.clear_grad()
            with gm:
                loss = net(data)
                gm.backward(loss)
            optim.step()
            return loss

        @trace(symbolic=symbolic)
        def eval_func(data, *, model=None, optim=None, gm=None):
            loss = net(data)
            return loss

        net = Simple()
        optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
        gm = ad.GradManager().attach(net.parameters())
        data = tensor([2.34])
        train_func(data, model=net, optim=optim, gm=gm)
        np.testing.assert_almost_equal(
            optim._state[net.a]["momentum_buffer"].numpy(), 2.34)

        # do 3 steps of infer
        for _ in range(3):
            loss = eval_func(data)
            np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34),
                                           5)
            np.testing.assert_almost_equal(
                optim._state[net.a]["momentum_buffer"].numpy(), 2.34)

        # do a step of train
        train_func(data, model=net, optim=optim, gm=gm)
        np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5)
        np.testing.assert_almost_equal(
            optim._state[net.a]["momentum_buffer"].numpy(), 0.9 * 2.34 + 2.34,
            5)
    if origin_inplace:
        os.environ["MEGENGINE_INPLACE_UPDATE"] = origin_inplace
    else:
        del os.environ["MEGENGINE_INPLACE_UPDATE"]
def test_advance_indexing_with_subtensor():
    net = Simple2()

    gm = ad.GradManager().attach(net.parameters())
    optim = optimizer.SGD(net.parameters(), lr=1.0)
    optim.clear_grad()

    dshape = (2, 3, 4, 3, 4, 2)
    raw_data = np.arange(576).reshape(dshape).astype(np.float32)
    data = tensor(raw_data)
    answer = 1.0 - raw_data[1, ..., :, 0:4:2, 0:2].sum()
    with gm:
        loss = net(data).sum()
        gm.backward(loss)
    optim.step()
    np.testing.assert_almost_equal(net.a.numpy(),
                                   np.array([answer]).astype(np.float32))
示例#18
0
def test_save_load():
    net = Simple()

    optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
    optim.clear_grad()
    gm = ad.GradManager().attach(net.parameters())

    data = tensor([2.34])

    with gm:
        loss = net(data)
        gm.backward(loss)

    optim.step()

    model_name = "simple.pkl"
    print("save to {}".format(model_name))

    mge.save(
        {
            "name": "simple",
            "state_dict": net.state_dict(),
            "opt_state": optim.state_dict(),
        },
        model_name,
    )

    # Load param to cpu
    checkpoint = mge.load(model_name, map_location="cpu0")
    device_save = mge.get_default_device()
    mge.set_default_device("cpu0")
    net = Simple()
    net.load_state_dict(checkpoint["state_dict"])
    optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
    optim.load_state_dict(checkpoint["opt_state"])
    print("load done")
    os.remove("simple.pkl")

    with gm:
        loss = net([1.23])
        gm.backward(loss)

    optim.step()
    # Restore device
    mge.set_default_device(device_save)
示例#19
0
def run_frozen_bn(BNModule, is_training, use_trace, use_symbolic):
    nchannel = 3
    m = BNModule(nchannel, freeze=True)
    if is_training:
        m.train()
    else:
        m.eval()
    var = 4.0
    bias = 1.0
    shape = (1, nchannel, 1, 1)
    m.running_var[...] = var * F.ones(shape)
    m.running_mean[...] = bias * F.ones(shape)

    saved_var = m.running_var.numpy()
    saved_mean = m.running_mean.numpy()
    saved_wt = m.weight.numpy()
    saved_bias = m.bias.numpy()

    gm = ad.GradManager().attach(m.parameters())
    optim = optimizer.SGD(m.parameters(), lr=1.0)
    optim.clear_grad()

    data = np.random.random((6, nchannel, 2, 2)).astype("float32")

    def train_fn(d):
        for _ in range(3):
            with gm:
                loss = m(d).mean()
                gm.backward(loss)
            optim.step()
        return loss

    if use_trace:
        train_fn = trace(train_fn, symbolic=use_symbolic)

    for _ in range(3):
        loss = train_fn(megengine.tensor(data))
        if not is_training:
            np.testing.assert_equal(m.running_var.numpy(), saved_var)
            np.testing.assert_equal(m.running_mean.numpy(), saved_mean)
            np.testing.assert_almost_equal(
                loss.numpy(), ((data - bias) / np.sqrt(var)).mean(), 5
            )
        np.testing.assert_equal(m.weight.numpy(), saved_wt)
        np.testing.assert_equal(m.bias.numpy(), saved_bias)
示例#20
0
def test_training_converge(test_traced_module):
    net = XORNet()
    if test_traced_module:
        inp = Tensor(np.random.random((14, 2)))
        net = trace_module(net, inp)
    opt = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    gm = ad.GradManager().attach(net.parameters())

    @trace(symbolic=False)
    def train(data, label):
        with gm:
            pred = net(data)
            loss = F.nn.cross_entropy(pred, label)
            gm.backward(loss)
            optim.clip_grad_norm(net.parameters(), max_norm=0.2, ord=2.0)
        return loss

    def infer(data):
        return net(data)

    train_dataset = minibatch_generator()
    losses = []

    for data, label in itertools.islice(train_dataset, 2000):
        data = Tensor(data, dtype=np.float32)
        label = Tensor(label, dtype=np.int32)
        opt.clear_grad()
        loss = train(data, label)
        optim.clip_grad_value(net.parameters(), lower=-0.1, upper=0.1)
        opt.step()
        losses.append(loss.numpy())
    assert (np.mean(losses[-100:]) <
            0.1), "Final training Loss must be low enough, get {}".format(
                np.mean(losses[-100:]))

    ngrid = 10
    x = np.linspace(-1.0, 1.0, ngrid)
    xx, yy = np.meshgrid(x, x)
    xx = xx.reshape((ngrid * ngrid, 1))
    yy = yy.reshape((ngrid * ngrid, 1))
    data = mge.tensor(np.concatenate((xx, yy), axis=1).astype(np.float32))
    pred = infer(data)
    precision = calculate_precision(data.numpy(), pred.numpy())
    assert precision == 1.0, "Test precision must be high enough, get {}".format(
        precision)
示例#21
0
def test_load_state_dict_no_cache(monkeypatch):
    with monkeypatch.context() as mk:
        mk.setenv("MEGENGINE_INPLACE_UPDATE", "1")
        net = Net()

        optim = optimizer.SGD(net.parameters(), lr=0.1)
        gm = ad.GradManager().attach(net.parameters())
        state = {
            "fc.weight": np.array([[0]], dtype=np.float32),
            "fc.bias": np.array([0.0], dtype=np.float32),
        }
        net.load_state_dict(state)
        images = mge.tensor([[0]], dtype=np.float32)
        with gm:
            loss = net(images)
            gm.backward(loss)
            optim.step()
            optim.clear_grad()
示例#22
0
def test_sgd_momentum(monkeypatch, trace_mode, inplace_mode):
    with monkeypatch.context() as mk:
        mk.setenv("MEGENGINE_INPLACE_UPDATE", str(int(inplace_mode)))

        def train_func(data, *, model=None, optim=None, gm=None):
            optim.clear_grad()
            with gm:
                loss = net(data)
                gm.backward(loss)
            optim.step()
            return loss

        if trace_mode is not None:
            train_func = trace(symbolic=trace_mode)(train_func)

        def eval_func(data, *, model=None, optim=None, gm=None):
            loss = net(data)
            return loss

        if trace_mode is not None:
            eval_func = trace(symbolic=trace_mode)(eval_func)

        net = Simple()
        optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
        gm = ad.GradManager().attach(net.parameters())
        data = tensor([2.34])
        train_func(data, model=net, optim=optim, gm=gm)
        np.testing.assert_almost_equal(
            optim._state[net.a]["momentum_buffer"].numpy(), 2.34)

        # do 3 steps of infer
        for _ in range(3):
            loss = eval_func(data)
            np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34),
                                           5)
            np.testing.assert_almost_equal(
                optim._state[net.a]["momentum_buffer"].numpy(), 2.34)

        # do a step of train
        train_func(data, model=net, optim=optim, gm=gm)
        np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5)
        np.testing.assert_almost_equal(
            optim._state[net.a]["momentum_buffer"].numpy(), 0.9 * 2.34 + 2.34,
            5)
示例#23
0
def test_clip_grad_value():
    net = Net()
    x = np.random.randn(10, 3, 224, 224).astype("float32")
    gm = ad.GradManager().attach(net.parameters())
    opt = optim.SGD(net.parameters(), 1e-3, momentum=0.9)
    with gm:
        y = net(mge.tensor(x))
        y = y.mean()
        gm.backward(y)
    save_grad_value(net)
    max_val = 5
    min_val = -2
    optim.clip_grad_value(net.parameters(), lower=min_val, upper=max_val)
    for param in net.parameters():
        np.testing.assert_almost_equal(
            param.grad.numpy(),
            np.maximum(np.minimum(param.grad_backup, max_val), min_val),
        )
    opt.step().clear_grad()
def test_advance_indexing():
    net = Simple()

    gm = ad.GradManager().attach(net.parameters())
    optim = optimizer.SGD(net.parameters(), lr=1.0)
    optim.clear_grad()

    dshape = (10, 10)
    raw_data = np.arange(100).reshape(dshape).astype(np.float32)
    raw_mask = (np.random.random_sample(dshape) > 0.5).astype(np.bool_)
    data = tensor(raw_data)
    mask = tensor(raw_mask)
    answer = 1.0 - raw_data[raw_mask].sum()
    with gm:
        loss = net(data, mask).sum()
        gm.backward(loss)
    optim.step()
    np.testing.assert_almost_equal(net.a.numpy(),
                                   np.array([answer]).astype(np.float32))
示例#25
0
def train():
    m = ResNet18MP()
    x = F.ones([32, 3, 224, 224])
    label = F.zeros([
        32,
    ], dtype="int32")

    gm = ad.GradManager().attach(m.parameters())
    opt = optim.SGD(m.parameters(), 1e-3, 0.9, 1e-4)

    for _ in range(2):
        with gm:
            y = m(x)
            if dist.get_rank() == 3:
                loss = F.nn.cross_entropy(y, label)
            else:
                loss = None
            gm.backward(loss)
        opt.step().clear_grad()
        print(loss)
示例#26
0
    def worker():
        net = Simple(param_shape)
        opt = SGD(net.parameters(), lr=0.1)

        allreduce_cb = dist.make_allreduce_cb("MEAN", dist.WORLD)
        if threshold is not None:
            allreduce_cb._param_pack_thd = threshold
        gm = ad.GradManager().attach(net.parameters(), callbacks=[allreduce_cb])

        def run():
            opt.clear_grad()
            with gm:
                x = tensor(data)
                loss = net(x)
                loss = loss.sum()
                gm.backward(loss)

        for i in range(n_iters):
            run()

        for p in net.params:
            np.testing.assert_equal(p.grad.numpy(), np.ones_like(p.grad.numpy()))
示例#27
0
def test_sgd_momentum_trace():

    for symbolic in (True, False):

        @trace(symbolic=symbolic)
        def train_func(data, *, model=None, optim=None, gm=None):
            optim.clear_grad()
            with gm:
                loss = net(data)
                gm.backward(loss)
            optim.step()
            return loss

        @trace(symbolic=symbolic)
        def eval_func(data, *, model=None, optim=None, gm=None):
            loss = net(data)
            return loss

        net = Simple()
        optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
        gm = ad.GradManager().attach(net.parameters())
        data = tensor([2.34])
        train_func(data, model=net, optim=optim, gm=gm)
        np.testing.assert_almost_equal(
            optim._state[net.a]["momentum_buffer"].numpy(), 2.34)

        # do 3 steps of infer
        for _ in range(3):
            loss = eval_func(data)
            np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34),
                                           5)
            np.testing.assert_almost_equal(
                optim._state[net.a]["momentum_buffer"].numpy(), 2.34)

        # do a step of train
        train_func(data, model=net, optim=optim, gm=gm)
        np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5)
        np.testing.assert_almost_equal(
            optim._state[net.a]["momentum_buffer"].numpy(), 0.9 * 2.34 + 2.34)
示例#28
0
def test_multi_output():
    data_shape = (9, 2, 6)
    av = np.random.random(data_shape).astype(np.float32)
    bv = np.random.random(data_shape).astype(np.float32)

    class MulFunc(Function):
        def forward(self, a, b):
            self.a = a
            self.b = b
            return a * b, a + b

        def backward(self, grad_1, grad_2):
            return grad_1 * (self.b + 1), grad_2 * (self.a + 1)

    class Simple(Module):
        def __init__(self, a, b):
            super().__init__()
            self.a = Parameter(a, dtype=np.float32)
            self.b = Parameter(b, dtype=np.float32)
            self.layer1 = MulFunc()

        def forward(self):
            x, y = self.layer1(self.a, self.b)
            return x + y

    net = Simple(av, bv)
    gm = ad.GradManager().attach(net.parameters())
    opt = optimizer.SGD(net.parameters(), lr=1.0)

    opt.clear_grad()
    with gm:
        loss = net()
        gm.backward(loss.sum())
    opt.step()

    np.testing.assert_almost_equal(loss.numpy(), (av * bv + av + bv),
                                   decimal=6)
    np.testing.assert_almost_equal(net.a.numpy(), (av - bv - 1), decimal=6)
    np.testing.assert_almost_equal(net.b.numpy(), (bv - av - 1), decimal=6)
示例#29
0
def test_bn_no_track_stat2():
    nchannel = 3
    m = BatchNorm2d(nchannel)  # Init with track_running_stat = True
    m.track_running_stats = False

    # m.running_var and m.running_mean created during init time
    saved_var = m.running_var.numpy()
    assert saved_var is not None
    saved_mean = m.running_mean.numpy()
    assert saved_mean is not None

    gm = ad.GradManager().attach(m.parameters())
    optim = optimizer.SGD(m.parameters(), lr=1.0)
    optim.clear_grad()

    data = np.random.random((6, nchannel, 2, 2)).astype("float32")
    with gm:
        loss = m(data).sum()
        gm.backward(loss)
    optim.step()

    np.testing.assert_equal(m.running_var.numpy(), saved_var)
    np.testing.assert_equal(m.running_mean.numpy(), saved_mean)
示例#30
0
def test_ste():
    class STE(Function):
        def forward(self, x):
            maxv, minv = x.max(), x.min()
            scale = F.maximum(maxv, -minv) / 127
            return F.round(x / scale) * scale

        def backward(self, grad_y):
            return grad_y

    class Simple(Module):
        def __init__(self, a):
            super().__init__()
            self.a = Parameter(a, dtype=np.float32)
            self.layer1 = STE()

        def forward(self):
            x = self.layer1(self.a)
            x = (x * 2.0).sum()
            return x

    data_shape = (1, 9, 2, 6)
    av = np.random.random(data_shape).astype(np.float32)
    net = Simple(av)
    optim = optimizer.SGD(net.parameters(), lr=1.0)
    gm = ad.GradManager().attach(net.parameters())
    optim.clear_grad()

    with gm:
        loss = net()
        gm.backward(loss.sum())
    optim.step()

    np.testing.assert_almost_equal(
        net.a.numpy(),
        av - np.broadcast_to(np.array([2.0], dtype=np.float32), data_shape),
    )