def test_upfirdn2d(self): from mmcv.ops import upfirdn2d if _USING_PARROTS: gradcheck( upfirdn2d, (self.input_tensor.cuda(), self.kernel.type_as( self.input_tensor).cuda(), self.factor, 1, self.pad), delta=1e-4, pt_atol=1e-3) else: gradcheck( upfirdn2d, (self.input_tensor.cuda(), self.kernel.type_as( self.input_tensor).cuda(), self.factor, 1, self.pad), eps=1e-4, atol=1e-3) gradgradcheck( upfirdn2d, (self.input_tensor.cuda(), self.kernel.type_as( self.input_tensor).cuda(), self.factor, 1, self.pad), eps=1e-4, atol=1e-3)
def test_tacotron2_loss_gradcheck(self): """Performing gradient check on Tacotron2Loss.""" ( mel_specgram, mel_specgram_postnet, gate_out, truth_mel_specgram, truth_gate_out, ) = self._get_inputs() mel_specgram.requires_grad_(True) mel_specgram_postnet.requires_grad_(True) gate_out.requires_grad_(True) def _fn(mel_specgram, mel_specgram_postnet, gate_out, truth_mel_specgram, truth_gate_out): loss_fn = Tacotron2Loss() return loss_fn( (mel_specgram, mel_specgram_postnet, gate_out), (truth_mel_specgram, truth_gate_out), ) gradcheck( _fn, (mel_specgram, mel_specgram_postnet, gate_out, truth_mel_specgram, truth_gate_out), fast_mode=True, ) gradgradcheck( _fn, (mel_specgram, mel_specgram_postnet, gate_out, truth_mel_specgram, truth_gate_out), fast_mode=True, )
def test_rootfinder_with_params(dtype, device, bias_is_tensor): torch.manual_seed(100) random.seed(100) nr = 3 nbatch = 2 fwd_options = { "method": "broyden1", "f_tol": 1e-9, "alpha": -0.5, } clss = DummyModuleExplicit A = (torch.randn((nr, nr)) * 0.5).to(dtype).requires_grad_() diag = torch.randn((nbatch, nr)).to(dtype).requires_grad_() if bias_is_tensor: bias = torch.zeros((nbatch, nr)).to(dtype).requires_grad_() else: bias = 0.0 y0 = torch.randn((nbatch, nr)).to(dtype) model = clss(addx=True) y = rootfinder(model.forward, y0, (A, diag, bias), **fwd_options) f = model.forward(y, A, diag, bias) assert torch.allclose(f * 0, f) def getloss(y0, A, diag, bias): model = clss(addx=True) y = rootfinder(model.forward, y0, (A, diag, bias), **fwd_options) return y gradcheck(getloss, (y0, A, diag, bias)) gradgradcheck(getloss, (y0, A, diag, bias))
def test_jac_method_grad(): na = 3 params = getfnparams(na) nnparams = getnnparams(na) num_nnparams = len(nnparams) jacs = jac(func2(*nnparams), params) nout = jacs[0].shape[-2] def fcnr(i, v, *allparams): nnparams = allparams[:num_nnparams] params = allparams[num_nnparams:] jacs = jac(func2(*nnparams), params) return jacs[i].rmv(v.view(-1)) def fcnl(i, v, *allparams): nnparams = allparams[:num_nnparams] params = allparams[num_nnparams:] jacs = jac(func2(*nnparams), params) return jacs[i].mv(v.view(-1)) v = torch.rand((na, ), dtype=dtype, requires_grad=True) w = [torch.rand_like(p).requires_grad_() for p in params] for i in range(len(jacs)): gradcheck(fcnr, (i, v, *nnparams, *params)) gradgradcheck(fcnr, (i, v, *nnparams, *params)) gradcheck(fcnl, (i, w[i], *nnparams, *params)) gradgradcheck(fcnl, (i, w[i], *nnparams, *params))
def test_equil(dtype, device): torch.manual_seed(100) random.seed(100) nr = 3 nbatch = 2 fwd_options = { "f_tol": 1e-9, "alpha": -0.5, } for clss in [DummyModule, DummyNNModule]: A = torch.nn.Parameter((torch.randn( (nr, nr)) * 0.5).to(dtype).requires_grad_()) diag = torch.nn.Parameter( torch.randn((nbatch, nr)).to(dtype).requires_grad_()) bias = torch.nn.Parameter( torch.zeros((nbatch, nr)).to(dtype).requires_grad_()) y0 = torch.randn((nbatch, nr)).to(dtype) model = clss(A, addx=False) model.set_diag_bias(diag, bias) y = equilibrium(model.forward, y0, **fwd_options) f = model.forward(y) assert torch.allclose(y, f) def getloss(A, y0, diag, bias): model = clss(A, addx=False) model.set_diag_bias(diag, bias) y = equilibrium(model.forward, y0, **fwd_options) return y gradcheck(getloss, (A, y0, diag, bias)) gradgradcheck(getloss, (A, y0, diag, bias))
def test_gradgradient(self): from mmcv.ops import FusedBiasLeakyReLU gradgradcheck(FusedBiasLeakyReLU(2).cuda(), self.input_tensor, eps=1e-4, atol=1e-3)
def test_conv2d_transposed_cuda(self): x = self.input.cuda() weight = self.weight.cuda() res = conv_transpose2d(x, weight, None, 1, 1) assert res.shape == (1, 1, 32, 32) gradgradcheck( partial(conv_transpose2d, weight=weight, padding=1, stride=1), x)
def stest_return_duplicate() -> None: """ :rtype: None """ from torch.autograd import Function, gradcheck, gradgradcheck class DoubleDuplicate(Function): @staticmethod def forward(ctx, x): """ """ output = x * 2 return output, output @staticmethod def backward(ctx, grad1, grad2): """ """ return grad1 * 2 + grad2 * 2 def fn(x): """ """ a, b = DoubleDuplicate.apply(x) return a + b x = torch.randn(5, 5, requires_grad=True, dtype=torch.double) gradcheck(fn, [x], eps=1e-6) gradgradcheck(fn, [x])
def test_quad_inf(dtype, device): torch.manual_seed(100) random.seed(100) nr = 4 fwd_options = { "method": "leggauss", "n": 100, } w = torch.nn.Parameter(torch.abs(torch.randn((nr,), dtype=dtype, device=device)).requires_grad_()) i = 0 for totensor in [True, False]: if totensor: xl = torch.tensor(-float("inf"), dtype=dtype, device=device) xu = torch.tensor( float("inf"), dtype=dtype, device=device) else: xl = -float("inf") xu = float("inf") def get_loss(w): module = IntegrationInfModule(w) y = quad(module.forward, xl, xu, params=[], **fwd_options) return y y = get_loss(w) ytrue = w * np.sqrt(2*np.pi) assert torch.allclose(y, ytrue) if i == 0: gradcheck(get_loss, (w,)) gradgradcheck(get_loss, (w,)) i += 1
def test_ivp(dtype, device): torch.manual_seed(100) random.seed(100) nr = 2 nt = 5 t0 = 0.0 t1 = 0.2 fwd_options = { "method": "rk4", } a = torch.nn.Parameter(torch.rand((nr,), dtype=dtype, device=device).requires_grad_()) b = torch.nn.Parameter(torch.randn((nr,), dtype=dtype, device=device).requires_grad_()) c = torch.randn((nr,), dtype=dtype, device=device).requires_grad_() ts = torch.linspace(t0, t1, nt, dtype=dtype, device=device).requires_grad_() y0 = torch.rand((nr,), dtype=dtype, device=device).requires_grad_() ts1 = ts.unsqueeze(-1) for clss in [IVPModule, IVPNNModule]: def getoutput(a, b, c, ts, y0): module = clss(a, b) yt = solve_ivp(module.forward, ts, y0, params=(c,), **fwd_options) return yt yt = getoutput(a, b, c, ts, y0) yt_true = y0 * torch.exp(-(0.5 * a * (ts1 + t0) + b + c) * (ts1 - t0)) assert torch.allclose(yt, yt_true) gradcheck(getoutput, (a, b, c, ts, y0)) gradgradcheck(getoutput, (a, b, c, ts, y0))
def test_quad(dtype, device): torch.manual_seed(100) random.seed(100) nr = 2 fwd_options = { "method": "leggauss", "n": 100, } a = torch.nn.Parameter(torch.rand((nr,), dtype=dtype, device=device).requires_grad_()) b = torch.nn.Parameter(torch.randn((nr,), dtype=dtype, device=device).requires_grad_()) c = torch.randn((nr,), dtype=dtype, device=device).requires_grad_() xl = torch.zeros((1,), dtype=dtype, device=device).requires_grad_() xu = (torch.ones ((1,), dtype=dtype, device=device) * 0.5).requires_grad_() for clss in [IntegrationModule, IntegrationNNModule]: module = clss(a, b) y = quad(module.forward, xl, xu, params=(c,), **fwd_options) ytrue = (torch.sin(a * xu + b * c) - torch.sin(a * xl + b * c)) / a assert torch.allclose(y, ytrue) def getloss(a, b, c, xl, xu): module = clss(a, b) y = quad(module.forward, xl, xu, params=(c,), **fwd_options) return y gradcheck (getloss, (a, b, c, xl, xu)) gradgradcheck(getloss, (a, b, c, xl, xu)) # check if not all parameters require grad gradcheck (getloss, (a, b.detach(), c, xl, xu))
def test_solve_A_gmres(): torch.manual_seed(seed) na = 3 dtype = torch.float64 ashape = (na, na) bshape = (2, na, na) fwd_options = {"method": "gmres"} ncols = bshape[-1] - 1 bshape = [*bshape[:-1], ncols] xshape = list(get_bcasted_dims(ashape[:-2], bshape[:-2])) + [na, ncols] amat = torch.rand(ashape, dtype=dtype) + torch.eye(ashape[-1], dtype=dtype) bmat = torch.rand(bshape, dtype=dtype) amat = amat + amat.transpose(-2, -1) amat = amat.requires_grad_() bmat = bmat.requires_grad_() def solvefcn(amat, bmat): alinop = LinearOperator.m(amat) x = solve(A=alinop, B=bmat, fwd_options=fwd_options) return x x = solvefcn(amat, bmat) assert list(x.shape) == xshape ax = LinearOperator.m(amat).mm(x) assert torch.allclose(ax, bmat) gradcheck(solvefcn, (amat, bmat)) gradgradcheck(solvefcn, (amat, bmat))
def test_svd_A(dtype, device, shape, method): torch.manual_seed(seed) mat1 = torch.rand(shape, dtype=dtype, device=device) mat1 = mat1.requires_grad_() linop1 = LinearOperator.m(mat1, is_hermitian=False) fwd_options = {"method": method} min_mn = min(shape[-1], shape[-2]) for k in [min_mn]: u, s, vh = svd( linop1, k=k, **fwd_options) # u: (..., m, k), s: (..., k), vh: (..., k, n) assert list(u.shape) == list([*linop1.shape[:-1], k]) assert list(s.shape) == list([*linop1.shape[:-2], k]) assert list(vh.shape) == list( [*linop1.shape[:-2], k, linop1.shape[-1]]) keye = torch.zeros((*shape[:-2], k, k), dtype=dtype, device=device) + \ torch.eye(k, dtype=dtype, device=device) assert torch.allclose(u.transpose(-2, -1) @ u, keye) assert torch.allclose(vh @ vh.transpose(-2, -1), keye) if k == min_mn: assert torch.allclose(mat1, u @ torch.diag_embed(s) @ vh) def svd_fcn(amat, only_s=False): alinop = LinearOperator.m(amat, is_hermitian=False) u_, s_, vh_ = svd(alinop, k=k, **fwd_options) if only_s: return s_ else: return u_, s_, vh_ gradcheck(svd_fcn, (mat1, )) gradgradcheck(svd_fcn, (mat1, True))
def test_lsymeig_A(dtype, device, shape, method): torch.manual_seed(seed) mat1 = torch.rand(shape, dtype=dtype, device=device) mat1 = mat1 + mat1.transpose(-2, -1) mat1 = mat1.requires_grad_() linop1 = LinearOperator.m(mat1, True) fwd_options = {"method": method} for neig in [2, shape[-1]]: eigvals, eigvecs = lsymeig( linop1, neig=neig, **fwd_options) # eigvals: (..., neig), eigvecs: (..., na, neig) assert list(eigvecs.shape) == list([*linop1.shape[:-1], neig]) assert list(eigvals.shape) == list([*linop1.shape[:-2], neig]) ax = linop1.mm(eigvecs) xe = torch.matmul(eigvecs, torch.diag_embed(eigvals, dim1=-2, dim2=-1)) assert torch.allclose(ax, xe) # only perform gradcheck if neig is full, to reduce the computational cost if neig == shape[-1]: def lsymeig_fcn(amat): amat = (amat + amat.transpose(-2, -1)) * 0.5 # symmetrize alinop = LinearOperator.m(amat, is_hermitian=True) eigvals_, eigvecs_ = lsymeig(alinop, neig=neig, **fwd_options) return eigvals_, eigvecs_ gradcheck(lsymeig_fcn, (mat1, )) gradgradcheck(lsymeig_fcn, (mat1, ))
def test_rootfinder(dtype, device, clss): torch.manual_seed(100) random.seed(100) nr = 3 nbatch = 2 fwd_options = { "method": "broyden1", "f_tol": 1e-9, "alpha": -0.5, } A = torch.nn.Parameter((torch.randn( (nr, nr)) * 0.5).to(dtype).requires_grad_()) diag = torch.nn.Parameter( torch.randn((nbatch, nr)).to(dtype).requires_grad_()) bias = torch.nn.Parameter( torch.zeros((nbatch, nr)).to(dtype).requires_grad_()) y0 = torch.randn((nbatch, nr)).to(dtype) model = clss(A, addx=True) model.set_diag_bias(diag, bias) y = rootfinder(model.forward, y0, **fwd_options) f = model.forward(y) assert torch.allclose(f * 0, f) def getloss(A, y0, diag, bias): model = clss(A, addx=True) model.set_diag_bias(diag, bias) y = rootfinder(model.forward, y0, **fwd_options) return y gradcheck(getloss, (A, y0, diag, bias)) gradgradcheck(getloss, (A, y0, diag, bias))
def test_grad_y_of_inverse(): phi_net = MixExpPhi() phi_inv = PhiInv(phi_net) query = torch.tensor( [[0.1, 0.2], [0.2, 0.3], [0.25, 0.7]]).requires_grad_(True) gradcheck(phi_inv, (query, ), eps=1e-10) gradgradcheck(phi_inv, (query, ), eps=1e-10)
def test_grad_of_phi(): phi_net = MixExpPhi() phi_inv = PhiInv(phi_net) query = torch.tensor( [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [1., 1., 1.]]).requires_grad_(True) gradcheck(phi_net, (query), eps=1e-9) gradgradcheck(phi_net, (query,), eps=1e-9)
def test_grad_y_of_pdf(): phi_net = MixExpPhi() query = torch.tensor( [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.99, 0.99, 0.99]]).requires_grad_(True) cop = Copula(phi_net) def f(y): return cop(y, mode='pdf') gradcheck(f, (query, ), eps=1e-8) # This fails sometimes if rtol is too low..? gradgradcheck(f, (query, ), eps=1e-8, atol=1e-6, rtol=1e-2)
def test_solve_AEM(dtype, device, abeshape, mshape, method): torch.manual_seed(seed) na = abeshape[-1] ashape = abeshape bshape = abeshape eshape = abeshape checkgrad = method.endswith("exactsolve") ncols = bshape[-1] - 1 bshape = [*bshape[:-1], ncols] eshape = [*eshape[:-2], ncols] xshape = list( get_bcasted_dims(ashape[:-2], bshape[:-2], eshape[:-1], mshape[:-2])) + [na, ncols] fwd_options = {"method": method, "min_eps": 1e-9} bck_options = { "method": method } # exactsolve at backward just to test the forward solve amat = torch.rand(ashape, dtype=dtype, device=device) * 0.1 + \ torch.eye(ashape[-1], dtype=dtype, device=device) mmat = torch.rand(mshape, dtype=dtype, device=device) * 0.1 + \ torch.eye(mshape[-1], dtype=dtype, device=device) * 0.5 bmat = torch.rand(bshape, dtype=dtype, device=device) emat = torch.rand(eshape, dtype=dtype, device=device) mmat = (mmat + mmat.transpose(-2, -1)) * 0.5 amat = amat.requires_grad_() mmat = mmat.requires_grad_() bmat = bmat.requires_grad_() emat = emat.requires_grad_() def solvefcn(amat, mmat, bmat, emat): mmat = (mmat + mmat.transpose(-2, -1)) * 0.5 alinop = LinearOperator.m(amat) mlinop = LinearOperator.m(mmat) x = solve(A=alinop, B=bmat, E=emat, M=mlinop, **fwd_options, bck_options=bck_options) return x x = solvefcn(amat, mmat, bmat, emat) assert list(x.shape) == xshape ax = LinearOperator.m(amat).mm(x) mxe = LinearOperator.m(mmat).mm( torch.matmul(x, torch.diag_embed(emat, dim2=-1, dim1=-2))) y = ax - mxe assert torch.allclose(y, bmat) # gradient checker if checkgrad: gradcheck(solvefcn, (amat, mmat, bmat, emat)) gradgradcheck(solvefcn, (amat, mmat, bmat, emat))
def test_squad(dtype, device, imethod): x = torch.tensor([0.0, 1.0, 2.0, 4.0, 5.0, 7.0], dtype=dtype, device=device).requires_grad_() y = torch.tensor( [[1.0, 2.0, 2.0, 1.5, 1.2, 4.0], [0.0, 0.8, 1.0, 1.5, 2.0, 1.4]], dtype=dtype, device=device).requires_grad_() # true values ycumsum_trapz = torch.tensor( # obtained by calculating manually [[0.0, 1.5, 3.5, 7.0, 8.35, 13.55], [0.0, 0.4, 1.3, 3.8, 5.55, 8.95]], dtype=dtype, device=device) ycspline_natural = torch.tensor( # obtained using scipy's CubicSpline and quad [[ 0.0, 1.5639104372355428, 3.6221791255289135, 7.2068053596614945, 8.4994887166897, 13.11119534565217 ], [ 0.0, 0.43834626234132584, 1.3733074753173484, 3.724083215796897, 5.494693230049832, 9.181717209378409 ]], dtype=dtype, device=device) i, method = imethod option = [{}, {"bc_type": "natural"}][i] ytrue = [ycumsum_trapz, ycspline_natural][i] def getval(x, y, tpe): quad = SQuad(x, method=method, **option) if tpe == "cumsum": return quad.cumsum(y, dim=-1) else: return quad.integrate(y, dim=-1) # getparamnames quad = SQuad(x, method=method, **option) quad.assertparams(quad.cumsum, y, dim=-1) quad.assertparams(quad.integrate, y, dim=-1) # cumsum ycumsum = getval(x, y, "cumsum") assert torch.allclose(ycumsum, ytrue) # integrate yintegrate = getval(x, y, "integrate") assert torch.allclose(yintegrate, ytrue[..., -1]) gradcheck(getval, (x, y, "cumsum")) gradgradcheck(getval, (x, y, "cumsum")) gradcheck(getval, (x, y, "integrate")) gradgradcheck(getval, (x, y, "integrate"))
def test_lsymeig_AM(): torch.manual_seed(seed) shapes = [(3, 3), (2, 3, 3), (2, 1, 3, 3)] # only 2 of methods, because both gradient implementations are covered methods = ["exacteig", "custom_exacteig"] dtype = torch.float64 for ashape, mshape, method in itertools.product(shapes, shapes, methods): mata = torch.rand(ashape, dtype=dtype) matm = torch.rand(mshape, dtype=dtype) + torch.eye( mshape[-1], dtype=dtype) # make sure it's not singular mata = mata + mata.transpose(-2, -1) matm = matm + matm.transpose(-2, -1) mata = mata.requires_grad_() matm = matm.requires_grad_() linopa = LinearOperator.m(mata, True) linopm = LinearOperator.m(matm, True) fwd_options = {"method": method} na = ashape[-1] bshape = get_bcasted_dims(ashape[:-2], mshape[:-2]) for neig in [2, ashape[-1]]: eigvals, eigvecs = lsymeig(linopa, M=linopm, neig=neig, **fwd_options) # eigvals: (..., neig) assert list(eigvals.shape) == list([*bshape, neig]) assert list(eigvecs.shape) == list([*bshape, na, neig]) ax = linopa.mm(eigvecs) mxe = linopm.mm( torch.matmul(eigvecs, torch.diag_embed(eigvals, dim1=-2, dim2=-1))) assert torch.allclose(ax, mxe) # only perform gradcheck if neig is full, to reduce the computational cost if neig == ashape[-1]: def lsymeig_fcn(amat, mmat): # symmetrize amat = (amat + amat.transpose(-2, -1)) * 0.5 mmat = (mmat + mmat.transpose(-2, -1)) * 0.5 alinop = LinearOperator.m(amat, is_hermitian=True) mlinop = LinearOperator.m(mmat, is_hermitian=True) eigvals_, eigvecs_ = lsymeig(alinop, M=mlinop, neig=neig, **fwd_options) return eigvals_, eigvecs_ gradcheck(lsymeig_fcn, (mata, matm)) gradgradcheck(lsymeig_fcn, (mata, matm))
def test_symeig_AM_degenerate(dtype, device): # same as test_symeig_A_degenerate, but now with the overlap matrix torch.manual_seed(126) n = 5 neig = 3 kwargs = { "dtype": dtype, "device": device, } # random matrix to be orthogonalized for the eigenvectors matA = torch.randn((n, n), **kwargs) matM = torch.rand((n, n), **kwargs) # matrix for the loss function P2 = torch.randn((n, n), **kwargs).requires_grad_() # the degenerate eigenvalues a = torch.tensor([1.0, 2.0, 3.0], **kwargs).requires_grad_() bck_options = { "method": "exactsolve", } def get_loss(a, matA, matM, P2): # get the orthogonal vector for the eigenvectors P, _ = torch.qr(matA) PM, _ = torch.qr(matM) # line up the eigenvalues b = torch.cat((a[:2], a[1:2], a[2:], a[2:])) # construct the matrix diag = torch.diag_embed(b) A = torch.matmul(torch.matmul(P.T, diag), P) M = torch.matmul(PM.T, PM) Alinop = LinearOperator.m(A) Mlinop = LinearOperator.m(M) eivals, eivecs = symeig(Alinop, M=Mlinop, neig=neig, method="custom_exacteig", bck_options=bck_options) U = eivecs[:, 1:3] # the degenerate eigenvectors loss = torch.einsum("rc,rc->", torch.matmul(P2, U), U) return loss gradcheck(get_loss, (a, matA, matM, P2)) gradgradcheck(get_loss, (a, matA, matM, P2))
def test_symeig_A_degenerate(dtype, device, eivaloffset): # test if the gradient can be stably propagated if the loss function # does not depend on which degenerate eigenvectors are used # (note: the variable is changed in a certain way so that the degeneracy # is kept) torch.manual_seed(126) n = 5 neig = 3 kwargs = { "dtype": dtype, "device": device, } # random matrix to be orthogonalized for the eigenvectors mat = torch.randn((n, n), **kwargs).requires_grad_() # matrix for the loss function P2 = torch.randn((n, n), **kwargs).requires_grad_() # the degenerate eigenvalues a = (torch.tensor([1.0, 2.0, 3.0], **kwargs) + eivaloffset).requires_grad_() bck_options = { "method": "exactsolve", } def get_loss(a, mat, P2): # get the orthogonal vector for the eigenvectors P, _ = torch.qr(mat) # line up the eigenvalues b = torch.cat((a[:2], a[1:2], a[2:], a[2:])) # construct the matrix diag = torch.diag_embed(b) A = torch.matmul(torch.matmul(P.T, diag), P) Alinop = LinearOperator.m(A) eivals, eivecs = symeig(Alinop, neig=neig, method="custom_exacteig", bck_options=bck_options) U = eivecs[:, 1:3] # the degenerate eigenvectors loss = torch.einsum("rc,rc->", torch.matmul(P2, U), U) return loss gradcheck(get_loss, (a, mat, P2)) gradgradcheck(get_loss, (a, mat, P2))
def test_gradcheck(self): from torch.autograd import gradcheck, gradgradcheck a = wrap(torch.tensor(5.0, dtype=torch.double)) b = wrap(torch.tensor(6.0, dtype=torch.double)) a.requires_grad = True b.requires_grad = True gradcheck(torch.add, (a, b), raise_exception=False) gradgradcheck(torch.add, (a, b), raise_exception=False) total_used_attrs = a.used_attrs.union(b.used_attrs) total_used_calls = a.used_calls.union(b.used_calls) # These attributes (and the functions below) may change # if the gradcheck implementation changes. It's best to # aim for attributes that may be commonly present on other # Tensor-likes. self.assertEqual( total_used_attrs, { 'data', 'device', 'dtype', 'is_complex', 'is_floating_point', 'is_sparse', 'layout', 'nelement', 'new_zeros', 'requires_grad', 'retain_grad', 'size', 'stride', }) self.assertEqual( total_used_calls, { torch.Tensor.new_zeros, torch.Tensor.size, torch.Tensor.is_complex, torch.Tensor.is_floating_point, torch.Tensor.nelement, torch.Tensor.retain_grad, torch.Tensor.stride, torch.autograd.grad, torch.add, })
def test_upfirdn2d(self): from mmcv.ops import upfirdn2d gradcheck( upfirdn2d, (self.input_tensor.cuda(), self.kernel.type_as( self.input_tensor).cuda(), self.factor, 1, self.pad), eps=1e-4, atol=1e-3) gradgradcheck( upfirdn2d, (self.input_tensor.cuda(), self.kernel.type_as( self.input_tensor).cuda(), self.factor, 1, self.pad), eps=1e-4, atol=1e-3)
def test_grads(self, func_with_args): func, args = func_with_args if func in THIRD_TYPE: args[0] = args[0][1:-1] # n in (0, 1), n != m # derivatives are undefined when n == m # could be smaller (1e-6), but spoils gradgradcheck in fast_mode args[0] = args[0] + 1e-2 args[-1] = args[-1][:-1] # m in [0, 1) if func in INCOMPLETE: args[-2] = args[-2][1:-1] # phi in (0, pi/2) args = [arg.clone().requires_grad_(True) for arg in args] assert gradcheck(func, args, fast_mode=True, check_batched_grad=True) if func in INCOMPLETE: # Check only other gradients at phi in {0, pi/2} _args = args[:] _args[-2] = torch.tensor((0, pi / 2))[:, None] assert gradcheck(func, _args, fast_mode=True) _args = args[:] _args[-1] = args[-1][1:] assert gradgradcheck(func, _args, fast_mode=True, check_batched_grad=True)
def test_solve_A(): torch.manual_seed(seed) na = 2 shapes = [(na, na), (2, na, na), (2, 1, na, na)] dtype = torch.float64 # custom exactsolve to check the backward implementation methods = ["exactsolve", "custom_exactsolve", "lbfgs"] # hermitian check here to make sure the gradient propagated symmetrically hermits = [False, True] for ashape, bshape, method, hermit in itertools.product( shapes, shapes, methods, hermits): print(ashape, bshape, method, hermit) checkgrad = method.endswith("exactsolve") ncols = bshape[-1] - 1 bshape = [*bshape[:-1], ncols] xshape = list(get_bcasted_dims(ashape[:-2], bshape[:-2])) + [na, ncols] fwd_options = {"method": method, "min_eps": 1e-9} bck_options = {"method": method} amat = torch.rand(ashape, dtype=dtype) * 0.1 + torch.eye(ashape[-1], dtype=dtype) bmat = torch.rand(bshape, dtype=dtype) if hermit: amat = (amat + amat.transpose(-2, -1)) * 0.5 amat = amat.requires_grad_() bmat = bmat.requires_grad_() def solvefcn(amat, bmat): # is_hermitian=hermit is required to force the hermitian status in numerical gradient alinop = LinearOperator.m(amat, is_hermitian=hermit) x = solve(A=alinop, B=bmat, fwd_options=fwd_options, bck_options=bck_options) return x x = solvefcn(amat, bmat) assert list(x.shape) == xshape ax = LinearOperator.m(amat).mm(x) assert torch.allclose(ax, bmat) if checkgrad: gradcheck(solvefcn, (amat, bmat)) gradgradcheck(solvefcn, (amat, bmat))
def test_ellipr_grads(func, nargs, _, x, y, z): args = (x, y, z)[:nargs] for _1, _2 in combinations(args, 2): assume(abs(_1-_2) > 1e-2) args = (*tensor(args, dtype=get_default_complex_dtype(), requires_grad=True),) assert gradcheck(func, args, raise_exception=False) assert gradgradcheck(func, args, raise_exception=False)
def test_minimize(dtype, device, clss): torch.manual_seed(400) random.seed(100) nr = 3 nbatch = 2 A = torch.nn.Parameter((torch.randn( (nr, nr)) * 0.5).to(dtype).requires_grad_()) diag = torch.nn.Parameter( torch.randn((nbatch, nr)).to(dtype).requires_grad_()) # bias will be detached from the optimization line, so set it undifferentiable bias = torch.zeros((nbatch, nr)).to(dtype) y0 = torch.randn((nbatch, nr)).to(dtype) fwd_options = { "method": "broyden1", "max_niter": 50, "f_tol": 1e-9, "alpha": -0.5, } activation = "square" # square activation makes it easy to optimize model = clss(A, addx=False, activation=activation, sumoutput=True) model.set_diag_bias(diag, bias) y = minimize(model.forward, y0, **fwd_options) # check the grad (must be close to 1) with torch.enable_grad(): y1 = y.clone().requires_grad_() f = model.forward(y1) grady, = torch.autograd.grad(f, (y1, )) assert torch.allclose(grady, grady * 0) # check the hessian (must be posdef) h = hess(model.forward, (y1, ), idxs=0).fullmatrix() eigval, _ = torch.symeig(h) assert torch.all(eigval >= 0) def getloss(A, y0, diag, bias): model = clss(A, addx=False, activation=activation, sumoutput=True) model.set_diag_bias(diag, bias) y = minimize(model.forward, y0, **fwd_options) return y gradcheck(getloss, (A, y0, diag, bias)) gradgradcheck(getloss, (A, y0, diag, bias))
def test_elliprj_grad(x, y, z, p): args = x, y, z, p for _1, _2 in combinations(args, 2): assume(abs(_1-_2) > 1e-2) args = (*tensor(args, dtype=get_default_complex_dtype(), requires_grad=True),) assert gradcheck(elliprj, args, raise_exception=False) assert gradgradcheck(elliprj, args, raise_exception=False)