示例#1
0
def test_msd_gradients():
    t.manual_seed(1)

    dtype = t.double
    size = (11, 13)
    batch_sz = 2

    for depth in [9]:
        print(f"Depth: {depth}")
        width = c_in = c_out = batch_sz
        x = Variable(t.randn(batch_sz, c_in, *size, dtype=dtype)).cuda()
        x.requires_grad = True

        net = MSDModule(c_in, c_out, depth, width)
        net.double()

        for p in net.parameters():
            p.data = t.randn_like(p.data)

        assert net is not None

        # o = net(x)
        # analytical, reentrant, correct_grad_sizes = get_analytical_jacobian((x,), o)
        # print(analytical)
        # print(f"Reentrant: {reentrant}")
        # print(correct_grad_sizes)
        # print(f"Net L shape: {net.L.shape}")
        gradcheck(net, [x], raise_exception=True)
示例#2
0
def test_parameters_change():
    # This test ensures that all parameters are updated after an
    # update step.
    t.manual_seed(1)

    size = (30, 30)
    for batch_sz in [1]:
        for depth in range(0, 20, 6):
            width = c_in = c_out = batch_sz
            x = Variable(t.randn(batch_sz, c_in, *size)).cuda()
            target = Variable(t.randn(batch_sz, c_out, *size)).cuda()
            assert x.data.is_cuda

            net = MSDModule(c_in, c_out, depth, width)

            assert net is not None

            params0 = dict((n, p.data.clone()) for n, p in net.named_parameters())
            # Train for two iterations. The convolution weights in
            # the MSD layers are not updated after the first
            # training step because the final 1x1 convolution
            # weights are zero.
            optimizer = optim.Adam(net.parameters())
            optimizer.zero_grad()
            for _ in range(2):
                y = net(x)
                assert y is not None
                criterion = nn.L1Loss()
                loss = criterion(y, target)
                loss.backward()
                optimizer.step()

            params1 = dict(net.named_parameters())

            for name in params1.keys():
                p0, p1 = params0[name], params1[name]
                d = abs(p0 - p1.data.clone()).sum().item()
                assert 0.0 < d, (
                    f"Parameter {name} left unchanged: \n"
                    f"Initial value: {p0}\n"
                    f"Current value: {p1}\n"
                    f"Gradient: {p1.grad}\n"
                )

            # Check that the loss is not zero
            assert loss.abs().item() != approx(0.0)
def test_msd_gradients():
    t.manual_seed(1)

    dtype = t.double
    size = (11, 13)
    batch_sz = 2

    for depth in [9]:
        print(f"Depth: {depth}")
        width = c_in = c_out = batch_sz
        x = Variable(t.randn(batch_sz, c_in, *size, dtype=dtype)).cuda()
        x.requires_grad = True

        net = MSDModule(c_in, c_out, depth, width).cuda()
        net.double()

        # The weights of the final layer are initialized to zero by
        # default. This makes it trivial to pass gradcheck. Therefore,
        # we reinitialize all weights randomly.
        for p in net.parameters():
            p.data = t.randn_like(p.data)

        gradcheck(net, [x], raise_exception=True, atol=1e-4, rtol=1e-3)