def testAtanh2ndDerivative(self): w_torch = self.w1_torch.atanh() y_torch = self.x_torch*w_torch dy_dw1_torch = grad_torch(y_torch, self.w1_torch, grad_outputs=ones_like_torch(y_torch), create_graph=True, retain_graph=True)[0] d2y_dw12_torch = grad_torch(dy_dw1_torch, self.w1_torch, grad_outputs=ones_like_torch(dy_dw1_torch))[0] cxt = tc.Context() w_tc = self.w1_tc.atanh() y_tc = self.x_tc*w_tc _dy_dw1_tc = grad_tc(y_tc, ones_like_tc(y_tc), self.w1_tc) _d2y_dw2_tc = grad_tc(_dy_dw1_tc, ones_like_tc(_dy_dw1_tc), self.w1_tc) cxt.map = tc.Map({'the_first_derivative': _dy_dw1_tc, 'the_second_derivative': _d2y_dw2_tc}) result = HOST.post(ENDPOINT, cxt) dy_dw1_tc = result['the_first_derivative'] d2y_dw2_tc = result['the_second_derivative'] self.assertAllClose(dy_dw1_torch, dy_dw1_tc, 0.01) self.assertAllClose(d2y_dw12_torch, d2y_dw2_tc, 0.01)
def testAcosh2ndDerivative(self): w1 = np.random.rand(2, 2)*10 + 1.1 x = np.random.rand(2, 2) + 1 self.w1_torch = torch.tensor(w1, dtype=torch.float, requires_grad=True) self.x_torch = torch.tensor(x, dtype=torch.float) y_torch = (self.x_torch*self.w1_torch).acosh() dy_dw1_torch = grad_torch(y_torch, self.w1_torch, grad_outputs=ones_like_torch(y_torch), create_graph=True, retain_graph=True)[0] d2y_dw12_torch = grad_torch(dy_dw1_torch, self.w1_torch, grad_outputs=ones_like_torch(dy_dw1_torch))[0] cxt = tc.Context() self.w1_tc = tc.ml.optimizer.Variable.load(w1.shape, w1.flatten().tolist(), tc.F32) self.x_tc = tc.tensor.Dense.load(x.shape, x.flatten().tolist(), tc.F32) y_tc = (self.x_tc*self.w1_tc).acosh() _dy_dw1_tc = grad_tc(y_tc, ones_like_tc(y_tc), self.w1_tc) _d2y_dw2_tc = grad_tc(_dy_dw1_tc, ones_like_tc(_dy_dw1_tc), self.w1_tc) cxt.map = tc.Map({'the_first_derivative': _dy_dw1_tc, 'the_second_derivative': _d2y_dw2_tc}) result = HOST.post(ENDPOINT, cxt) dy_dw1_tc = result['the_first_derivative'] d2y_dw2_tc = result['the_second_derivative'] self.assertAllClose(dy_dw1_torch, dy_dw1_tc) self.assertAllClose(d2y_dw12_torch, d2y_dw2_tc)
def testMatMul2ndDerivative(self): y_torch = [email protected]_torch**2 + self.b1_torch y2_torch = ([email protected]_torch + self.b2_torch)**2 dy_dw1_torch = grad_torch(y2_torch, self.w1_torch, grad_outputs=ones_like_torch(y2_torch), create_graph=True, retain_graph=True)[0] d2y_dw12_torch = grad_torch(dy_dw1_torch, self.w1_torch, grad_outputs=ones_like_torch(dy_dw1_torch))[0] cxt = tc.Context() y_tc = [email protected]_tc**2 + self.b1_tc y_2tc = ([email protected]_tc + self.b2_tc)**2 _dy_dw1_tc = grad_tc(y_2tc, ones_like_tc(y_2tc), self.w1_tc) _d2y_dw2_tc = grad_tc(_dy_dw1_tc, ones_like_tc(_dy_dw1_tc), self.w1_tc) cxt.map = tc.Map({'the_first_derivative': _dy_dw1_tc, 'the_second_derivative': _d2y_dw2_tc}) result = HOST.post(ENDPOINT, cxt) dy_dw1_tc = result['the_first_derivative'] d2y_dw2_tc = result['the_second_derivative'] self.assertAllClose(dy_dw1_torch, dy_dw1_tc) self.assertAllClose(d2y_dw12_torch, d2y_dw2_tc)
def testSum_gradient(self): y_torch = (self.x_torch @ torch.exp(self.w1_torch) + self.b1_torch)**2 y2_torch = torch.sum(y_torch, 0)**0.5 w1_torch_grad = grad_torch(y2_torch, self.w1_torch, grad_outputs=torch.ones_like(y2_torch)) cxt = tc.Context() cxt.y_tc = (self.x_tc @ self.w1_tc.exp() + self.b1_tc)**2 cxt.y_2tc = cxt.y_tc.sum(0)**0.5 cxt.result = grad_tc(cxt.y_2tc, ones_like_tc(cxt.y_2tc), self.w1_tc) w1_tc_grad = HOST.post(ENDPOINT, cxt) self.assertAllClose(w1_torch_grad, w1_tc_grad)
def testMultipleFunctions(self): y_torch = self.x_torch @ self.w1_torch + self.w1_torch y2_torch = y_torch @ self.w2_torch + self.b2_torch + torch.exp(y_torch) w1_torch_grad = grad_torch(y2_torch, self.w1_torch, grad_outputs=ones_like_torch(y2_torch)) cxt = tc.Context() cxt.y_tc = self.x_tc @ self.w1_tc + self.w1_tc cxt.y_2tc = cxt.y_tc @ self.w2_tc + self.b2_tc + cxt.y_tc.exp() cxt.result = grad_tc(cxt.y_2tc, ones_like_tc(cxt.y_2tc), self.w1_tc) w1_tc_grad = HOST.post(ENDPOINT, cxt) self.assertAllClose(w1_torch_grad, w1_tc_grad)
def testAtanh(self): w_torch = (self.w1_torch).atanh() y_torch = self.x_torch * w_torch w1_torch_grad = grad_torch(y_torch, self.w1_torch, grad_outputs=ones_like_torch(y_torch)) cxt = tc.Context() cxt.w_tc = self.w1_tc.atanh() cxt.y_tc = self.x_tc * cxt.w_tc cxt.result = grad_tc(cxt.y_tc, ones_like_tc(cxt.y_tc), self.w1_tc) w1_tc_grad = HOST.post(ENDPOINT, cxt) self.assertAllClose(w1_torch_grad, w1_tc_grad)
def testPow1stDerivative(self): y_torch = self.x_torch**self.w1_torch + self.b1_torch y2_torch = y_torch**self.w2_torch + self.b2_torch w1_torch_grad = grad_torch(y2_torch, self.w1_torch, grad_outputs=ones_like_torch(y2_torch)) cxt = tc.Context() cxt.y_tc = self.x_tc**self.w1_tc + self.b1_tc cxt.y_2tc = cxt.y_tc**self.w2_tc + self.b2_tc cxt.result = grad_tc(cxt.y_2tc, ones_like_tc(cxt.y_2tc), self.w1_tc) w1_tc_grad = HOST.post(ENDPOINT, cxt) self.assertAllClose(w1_torch_grad, w1_tc_grad)
def testDerivative(self): y_torch = self.x_torch @ self.w1_torch + self.b1_torch + torch.exp(self.w1_torch) dy_dw1_torch = grad_torch(y_torch, self.w1_torch, grad_outputs=ones_like_torch(y_torch), create_graph=True, retain_graph=True)[0] d2y_dw12_torch = grad_torch(dy_dw1_torch, self.w1_torch, grad_outputs=ones_like_torch(dy_dw1_torch))[0] cxt = tc.Context() cxt.y_tc = self.x_tc @ self.w1_tc + self.b1_tc + self.w1_tc.exp() cxt._dy_dw1_tc = grad_tc(cxt.y_tc, ones_like_tc(cxt.y_tc), self.w1_tc) cxt._d2y_dw2_tc = grad_tc(cxt._dy_dw1_tc, ones_like_tc(cxt._dy_dw1_tc), self.w1_tc) cxt.result = {'the_first_derivative': cxt._dy_dw1_tc, 'the_second_derivative': cxt._d2y_dw2_tc} result = HOST.post(ENDPOINT, cxt) dy_dw1_tc = result['the_first_derivative'] d2y_dw2_tc = result['the_second_derivative'] self.assertAllClose(dy_dw1_torch, dy_dw1_tc) self.assertAllClose(d2y_dw12_torch, d2y_dw2_tc)
def testAcosh(self): w1 = np.random.rand(2, 2)*10 + 1.1 x = np.random.rand(2, 2) + 1 w1_torch = torch.tensor(w1, dtype=torch.float, requires_grad=True) x_torch = torch.tensor(x, dtype=torch.float) y_torch = (x_torch * w1_torch).acosh() w1_torch_grad = grad_torch(y_torch, w1_torch, grad_outputs=torch.ones_like(y_torch)) cxt = tc.Context() cxt.w1_tc = tc.ml.optimizer.Variable.load(w1.shape, w1.flatten().tolist(), tc.F32) cxt.x_tc = tc.tensor.Dense.load(x.shape, x.flatten().tolist(), tc.F32) cxt.y_tc = (cxt.x_tc * cxt.w1_tc).acosh() cxt.result = grad_tc(cxt.y_tc, ones_like_tc(cxt.y_tc), cxt.w1_tc) w1_tc_grad = HOST.post(ENDPOINT, cxt) self.assertAllClose(w1_torch_grad, w1_tc_grad)
def testDiv(self): w1 = np.random.rand(2, 2) + 1 w1_torch = torch.tensor(w1, dtype=torch.float, requires_grad=True) w2 = np.random.rand(2, 2) + 1 w2_torch = torch.tensor(w2, dtype=torch.float, requires_grad=True) y_torch = self.x_torch / w1_torch + self.b1_torch y2_torch = y_torch / w2_torch + self.b2_torch w1_torch_grad = grad_torch(y2_torch, w1_torch, grad_outputs=ones_like_torch(y2_torch)) w1_tc = tc.ml.optimizer.Variable.load(w1.shape, w1.flatten().tolist(), tc.F32) w2_tc = tc.ml.optimizer.Variable.load(w2.shape, w2.flatten().tolist(), tc.F32) cxt = tc.Context() cxt.y_tc = self.x_tc / w1_tc + self.b1_tc cxt.y_2tc = cxt.y_tc / w2_tc + self.b2_tc cxt.result = grad_tc(cxt.y_2tc, ones_like_tc(cxt.y_2tc), w1_tc) w1_tc_grad = HOST.post(ENDPOINT, cxt) self.assertAllClose(w1_torch_grad, w1_tc_grad)