예제 #1
0
def test_minimize_methods(dtype, device, method):
    torch.manual_seed(400)
    random.seed(100)

    nr = 3
    nbatch = 2
    default_fwd_options = {
        "max_niter": 50,
        "f_tol": 1e-9,
        "alpha": -1.0,
    }
    linearmixing_fwd_options = {
        "max_niter": 50,
        "f_tol": 3e-6,
        "alpha": -0.3,
    }
    gd_fwd_options = {
        "maxiter": 5000,
        "f_rtol": 1e-10,
        "x_rtol": 1e-10,
        "step": 1e-2,
    }
    # list the methods and the options here
    options = {
        "broyden1": default_fwd_options,
        "broyden2": default_fwd_options,
        "linearmixing": linearmixing_fwd_options,
        "gd": gd_fwd_options,
        "adam": gd_fwd_options,
    }[method]

    # specify higher atol for non-ideal method
    atol = defaultdict(lambda: 1e-8)
    atol["linearmixing"] = 3e-6

    A = torch.nn.Parameter((torch.randn(
        (nr, nr)) * 0.5).to(dtype).requires_grad_())
    diag = torch.nn.Parameter(
        torch.randn((nbatch, nr)).to(dtype).requires_grad_())
    # bias will be detached from the optimization line, so set it undifferentiable
    bias = torch.zeros((nbatch, nr)).to(dtype)
    y0 = torch.randn((nbatch, nr)).to(dtype)
    activation = "square"  # square activation makes it easy to optimize

    fwd_options = {**options, "method": method}
    model = DummyModule(A, addx=False, activation=activation, sumoutput=True)
    model.set_diag_bias(diag, bias)
    y = minimize(model.forward, y0, **fwd_options)

    # check the grad (must be close to 1)
    with torch.enable_grad():
        y1 = y.clone().requires_grad_()
        f = model.forward(y1)
    grady, = torch.autograd.grad(f, (y1, ))
    assert torch.allclose(grady, grady * 0, atol=atol[method])

    # check the hessian (must be posdef)
    h = hess(model.forward, (y1, ), idxs=0).fullmatrix()
    eigval, _ = torch.symeig(h)
    assert torch.all(eigval >= 0)
예제 #2
0
 def getloss(A, y0, diag, bias):
     model = clss(A, addx=False, activation=activation, sumoutput=True)
     model.set_diag_bias(diag, bias)
     y = minimize(model.forward,
                  y0,
                  method=method,
                  bck_options=bck_options,
                  **fwd_options)
     return y
예제 #3
0
def test_minimizer_warnings():
    # test to see if it produces warnings
    def fcn(a):
        return (a * a).sum()

    with pytest.warns(UserWarning, match="converge"):
        # set it so that it will never converge
        a = torch.tensor(1.0, dtype=torch.float64)
        amin = minimize(fcn,
                        a,
                        method="gd",
                        step=0.1,
                        f_rtol=0,
                        x_rtol=0,
                        maxiter=10,
                        verbose=True)
예제 #4
0
def test_minimize(dtype, device, clss):
    torch.manual_seed(400)
    random.seed(100)

    nr = 3
    nbatch = 2

    A = torch.nn.Parameter((torch.randn(
        (nr, nr)) * 0.5).to(dtype).requires_grad_())
    diag = torch.nn.Parameter(
        torch.randn((nbatch, nr)).to(dtype).requires_grad_())
    # bias will be detached from the optimization line, so set it undifferentiable
    bias = torch.zeros((nbatch, nr)).to(dtype)
    y0 = torch.randn((nbatch, nr)).to(dtype)
    fwd_options = {
        "method": "broyden1",
        "max_niter": 50,
        "f_tol": 1e-9,
        "alpha": -0.5,
    }
    activation = "square"  # square activation makes it easy to optimize

    model = clss(A, addx=False, activation=activation, sumoutput=True)
    model.set_diag_bias(diag, bias)
    y = minimize(model.forward, y0, **fwd_options)

    # check the grad (must be close to 1)
    with torch.enable_grad():
        y1 = y.clone().requires_grad_()
        f = model.forward(y1)
    grady, = torch.autograd.grad(f, (y1, ))
    assert torch.allclose(grady, grady * 0)

    # check the hessian (must be posdef)
    h = hess(model.forward, (y1, ), idxs=0).fullmatrix()
    eigval, _ = torch.symeig(h)
    assert torch.all(eigval >= 0)

    def getloss(A, y0, diag, bias):
        model = clss(A, addx=False, activation=activation, sumoutput=True)
        model.set_diag_bias(diag, bias)
        y = minimize(model.forward, y0, **fwd_options)
        return y

    gradcheck(getloss, (A, y0, diag, bias))
    gradgradcheck(getloss, (A, y0, diag, bias))
예제 #5
0
def test_minimize_methods(dtype, device):
    torch.manual_seed(400)
    random.seed(100)
    dtype = torch.float64

    nr = 3
    nbatch = 2
    default_fwd_options = {
        "max_niter": 50,
        "f_tol": 1e-9,
        "alpha": -0.5,
    }
    # list the methods and the options here
    methods_and_options = {
        "broyden1": default_fwd_options,
    }

    A = torch.nn.Parameter((torch.randn(
        (nr, nr)) * 0.5).to(dtype).requires_grad_())
    diag = torch.nn.Parameter(
        torch.randn((nbatch, nr)).to(dtype).requires_grad_())
    # bias will be detached from the optimization line, so set it undifferentiable
    bias = torch.zeros((nbatch, nr)).to(dtype)
    y0 = torch.randn((nbatch, nr)).to(dtype)
    activation = "square"  # square activation makes it easy to optimize

    for method in methods_and_options:
        fwd_options = {**methods_and_options[method], "method": method}
        model = DummyModule(A,
                            addx=False,
                            activation=activation,
                            sumoutput=True)
        model.set_diag_bias(diag, bias)
        y = minimize(model.forward, y0, **fwd_options)

        # check the grad (must be close to 1)
        with torch.enable_grad():
            y1 = y.clone().requires_grad_()
            f = model.forward(y1)
        grady, = torch.autograd.grad(f, (y1, ))
        assert torch.allclose(grady, grady * 0)

        # check the hessian (must be posdef)
        h = hess(model.forward, (y1, ), idxs=0).fullmatrix()
        eigval, _ = torch.symeig(h)
        assert torch.all(eigval >= 0)
예제 #6
0
def test_min_not_stop_for_negative_value():
    # there was a bug where the minimizer stops right away if the value is negative
    # this test is written to make sure it does not happen again
    def fcn(a):
        return (a * a).sum() - 100.

    # the method must be non-rootfinder method
    with pytest.warns(UserWarning, match="converge"):
        method = "gd"
        a = torch.tensor(1.0, dtype=torch.float64)
        amin = minimize(fcn,
                        a,
                        method=method,
                        step=0.2,
                        f_rtol=0,
                        x_rtol=0,
                        verbose=True)
        amin_true = torch.zeros_like(amin)
    assert torch.allclose(amin, amin_true)
예제 #7
0
def test_minimize(dtype, device, clss, method):
    torch.manual_seed(400)
    random.seed(100)

    method_fwd_options = {
        "broyden1": {
            "max_niter": 50,
            "f_tol": 1e-9,
            "alpha": -0.5,
        },
        "gd": {
            "maxiter": 10000,
            "f_rtol": 1e-14,
            "x_rtol": 1e-14,
            "step": 2e-2,
        },
    }

    nr = 2
    nbatch = 2

    A = torch.nn.Parameter((torch.randn(
        (nr, nr)) * 0.5).to(dtype).requires_grad_())
    diag = torch.nn.Parameter(
        torch.randn((nbatch, nr)).to(dtype).requires_grad_())
    bias = torch.nn.Parameter(
        torch.zeros((nbatch, nr)).to(dtype).requires_grad_())
    y0 = torch.randn((nbatch, nr)).to(dtype)
    fwd_options = method_fwd_options[method]
    bck_options = {
        "rtol": 1e-9,
        "atol": 1e-9,
    }
    activation = "square"  # square activation makes it easy to optimize

    model = clss(A, addx=False, activation=activation, sumoutput=True)
    model.set_diag_bias(diag, bias)
    y = minimize(model.forward, y0, method=method, **fwd_options)

    # check the grad (must be close to 0)
    with torch.enable_grad():
        y1 = y.clone().requires_grad_()
        f = model.forward(y1)
    grady, = torch.autograd.grad(f, (y1, ))
    assert torch.allclose(grady, grady * 0)

    # check the hessian (must be posdef)
    h = hess(model.forward, (y1, ), idxs=0).fullmatrix()
    eigval, _ = torch.symeig(h)
    assert torch.all(eigval >= 0)

    def getloss(A, y0, diag, bias):
        model = clss(A, addx=False, activation=activation, sumoutput=True)
        model.set_diag_bias(diag, bias)
        y = minimize(model.forward,
                     y0,
                     method=method,
                     bck_options=bck_options,
                     **fwd_options)
        return y

    gradcheck(getloss, (A, y0, diag, bias))
    # pytorch 1.8's gradgradcheck fails if there are unrelated variables
    # I have made a PR to solve this and will be in 1.9
    gradgradcheck(getloss, (A, y0, diag, bias.detach()))
예제 #8
0
 def getloss(a):
     model = clss(a, sumoutput=True)
     y = minimize(model.forward, y0, **fwd_options)
     return y