print('forward gradient g(x) = \n', grad1) # Now we want to compute the same Jacobian in the reverse mode of AD # before we do that we have a look what the computational graph looks like: # print 'Computational graph is', cg # the reverse mode is called by cg.pullback([ybar]) # it is a little hard to explain what's going on here. Suffice to say that we # now compute one row of the Jacobian instead of one column as in the forward mode zbar = z.x.zeros_like() # compute gradient in the reverse mode zbar.data[0, :, 0, 0] = 1 cg.pullback([zbar]) grad2_x = x.xbar.data[0, 0] grad2_y = y.xbar.data[0, 0] grad2 = numpy.concatenate([grad2_x, grad2_y]) print('reverse gradient g(x) = \n', grad2) #check that the forward computed gradient equals the reverse gradient print('difference forward/reverse gradient=\n', grad1 - grad2) # one can also easiliy extract the Hessian H = numpy.zeros((2 * N, 2 * N)) H[:, :N] = x.xbar.data[1, :, :, 0] H[:, N:] = y.xbar.data[1, :, :, 0] print('Hessian = \n', H)
# 6. + 7.*t + 8.*t**2 + 9.*t**3 + 10.*t**5 ) # # where # # Jx = dg/dx # Jy = dg/dy # setup input Taylor polynomials D,P = 5, 3 # order D=5, number of directions P ax = UTPM(numpy.zeros((D, P))) ay = UTPM(numpy.zeros((D, P))) ax.data[:, :] = numpy.array([1., 2. ,3., 4. ,5.]).reshape((5,1)) # input Taylor polynomial ay.data[:, :] = numpy.array([6., 7. ,8., 9. ,10.]).reshape((5,1)) # input Taylor polynomial # forward sweep cg.pushforward([ax, ay]) azbar = UTPM(numpy.zeros((D, P, 3))) azbar.data[0, ...] = numpy.eye(3) # reverse sweep cg.pullback([azbar]) # get results Jx = cg.independentFunctionList[0].xbar Jy = cg.independentFunctionList[1].xbar print('Taylor series of Jx =\n', Jx) print('Taylor series of Jy =\n', Jy)
# # checking against the analytical result print('J - A =\n', J - A.x.data[0, 0]) # Now we want to compute the same Jacobian in the reverse mode of AD # before we do that we have a look what the computational graph looks like: # print 'Computational graph is', cg # the reverse mode is called by cg.pullback([ybar]) # it is a little hard to explain what's going on here. Suffice to say that we # now compute one row of the Jacobian instead of one column as in the forward mode ybar = y.x.zeros_like() # compute first row of J ybar.data[0, 0, 0, 0] = 1 cg.pullback([ybar]) J_row1 = x.xbar.data[0, 0] # compute second row of J ybar.data[...] = 0 ybar.data[0, 0, 1, 0] = 1 cg.pullback([ybar]) J_row2 = x.xbar.data[0, 0] # build Jacobian J2 = numpy.vstack([J_row1.T, J_row2.T]) print('J - J2 =\n', J - J2) # one can also easiliy extract the Hessian which is here a (M,N,N)-tensor # e.g. the hessian of y[1] is zero since y[1] is linear in x print('Hessian of y[1] w.r.t. x = \n', x.xbar.data[1, :, :, 0])
# METHOD 2: image space method (potentially numerically unstable) cg2 = CGraph() J1 = Function(J1.x) J2 = Function(J2.x) M = Function(UTPM(numpy.zeros((D, P, N + K, N + K)))) M[:N, :N] = dot(J1.T, J1) M[:N, N:] = J2.T M[N:, :N] = J2 C2 = inv(M)[:N, :N] cg2.trace_off() cg2.independentFunctionList = [J1, J2] cg2.dependentFunctionList = [C2] print('covariance matrix: C =\n', C2) print('difference between image and nullspace method:\n', C - C2) Cbar = UTPM(numpy.random.rand(D, P, N, N)) cg1.pullback([Cbar]) cg2.pullback([Cbar]) print( 'J1\n', cg2.independentFunctionList[0].xbar - cg1.independentFunctionList[0].xbar) print( 'J2\n', cg2.independentFunctionList[1].xbar - cg1.independentFunctionList[1].xbar)
# # checking against the analytical result print('J - A =\n', J - A.x.data[0,0]) # Now we want to compute the same Jacobian in the reverse mode of AD # before we do that we have a look what the computational graph looks like: # print 'Computational graph is', cg # the reverse mode is called by cg.pullback([ybar]) # it is a little hard to explain what's going on here. Suffice to say that we # now compute one row of the Jacobian instead of one column as in the forward mode ybar = y.x.zeros_like() # compute first row of J ybar.data[0,0,0,0] = 1 cg.pullback([ybar]) J_row1 = x.xbar.data[0,0] # compute second row of J ybar.data[...] = 0 ybar.data[0,0,1,0] = 1 cg.pullback([ybar]) J_row2 = x.xbar.data[0,0] # build Jacobian J2 = numpy.vstack([J_row1.T, J_row2.T]) print('J - J2 =\n', J - J2) # one can also easiliy extract the Hessian which is here a (M,N,N)-tensor # e.g. the hessian of y[1] is zero since y[1] is linear in x print('Hessian of y[1] w.r.t. x = \n',x.xbar.data[1,:,:,0])
# forward mode with ALGOPY utp = logp(x, mu, sigma).data[:, 0] print( 'function evaluation = %f\n1st directional derivative = %f\n2nd directional derivative = %f' % (utp[0], 1. * utp[1], 2. * utp[2])) # finite differences solution: print('finite differences derivative =\n', (logp(x, 3.5 + 10**-8, sigma) - logp(x, 3.5, sigma)) / 10**-8) # trace function evaluation cg = CGraph() mu = Function(UTPM([[3.5], [1], [0]])) #unknown variable out = logp(x, mu, sigma) cg.trace_off() cg.independentFunctionList = [mu] cg.dependentFunctionList = [out] cg.plot( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'posterior_log_probability_cgraph.png')) # reverse mode with ALGOPY outbar = UTPM([[1.], [0], [0]]) cg.pullback([outbar]) gradient = mu.xbar.data[0, 0] Hess_vec = mu.xbar.data[1, 0] print('gradient = ', gradient) print('Hessian vector product = ', Hess_vec)
print 'function evaluation =\n',logp(x,3.5,sigma) # forward mode with ALGOPY utp = logp(x, mu, sigma).data[:,0] print 'function evaluation = %f\n1st directional derivative = %f\n2nd directional derivative = %f'%(utp[0], 1.*utp[1], 2.*utp[2]) # finite differences solution: print 'finite differences derivative =\n',(logp(x,3.5+10**-8,sigma) - logp(x, 3.5, sigma))/10**-8 # trace function evaluation cg = CGraph() mu = Function(UTPM([[3.5],[1],[0]])) #unknown variable out = logp(x, mu, sigma) cg.trace_off() cg.independentFunctionList = [mu] cg.dependentFunctionList = [out] cg.plot(os.path.join(os.path.dirname(os.path.realpath(__file__)),'posterior_log_probability_cgraph.png')) # reverse mode with ALGOPY outbar = UTPM([[1.],[0],[0]]) cg.pullback([outbar]) gradient = mu.xbar.data[0,0] Hess_vec = mu.xbar.data[1,0] print 'gradient = ', gradient print 'Hessian vector product = ', Hess_vec
M = Function(UTPM(numpy.zeros((D,P,N+K,N+K)))) M[:N,:N] = dot(J1.T,J1) M[:N,N:] = J2.T M[N:,:N] = J2 C2 = inv(M)[:N,:N] cg2.trace_off() cg2.independentFunctionList = [J1, J2] cg2.dependentFunctionList = [C2] print('covariance matrix: C =\n',C2) print('difference between image and nullspace method:\n',C - C2) Cbar = UTPM(numpy.random.rand(D,P,N,N)) cg1.pullback([Cbar]) cg2.pullback([Cbar]) print('J1\n',cg2.independentFunctionList[0].xbar - cg1.independentFunctionList[0].xbar) print('J2\n',cg2.independentFunctionList[1].xbar - cg1.independentFunctionList[1].xbar)
print 'forward gradient g(x) = \n', grad1 # Now we want to compute the same Jacobian in the reverse mode of AD # before we do that we have a look what the computational graph looks like: # print 'Computational graph is', cg # the reverse mode is called by cg.pullback([ybar]) # it is a little hard to explain what's going on here. Suffice to say that we # now compute one row of the Jacobian instead of one column as in the forward mode zbar = z.x.zeros_like() # compute gradient in the reverse mode zbar.data[0,:,0,0] = 1 cg.pullback([zbar]) grad2_x = x.xbar.data[0,0] grad2_y = y.xbar.data[0,0] grad2 = numpy.concatenate([grad2_x, grad2_y]) print 'reverse gradient g(x) = \n', grad2 #check that the forward computed gradient equals the reverse gradient print 'difference forward/reverse gradient=\n',grad1 - grad2 # one can also easiliy extract the Hessian H = numpy.zeros((2*N,2*N)) H[:,:N] = x.xbar.data[1,:,:,0] H[:,N:] = y.xbar.data[1,:,:,0] print 'Hessian = \n', H
# # where # # Jx = dg/dx # Jy = dg/dy # setup input Taylor polynomials D, P = 5, 3 # order D=5, number of directions P ax = UTPM(numpy.zeros((D, P))) ay = UTPM(numpy.zeros((D, P))) ax.data[:, :] = numpy.array([1., 2., 3., 4., 5.]).reshape( (5, 1)) # input Taylor polynomial ay.data[:, :] = numpy.array([6., 7., 8., 9., 10.]).reshape( (5, 1)) # input Taylor polynomial # forward sweep cg.pushforward([ax, ay]) azbar = UTPM(numpy.zeros((D, P, 3))) azbar.data[0, ...] = numpy.eye(3) # reverse sweep cg.pullback([azbar]) # get results Jx = cg.independentFunctionList[0].xbar Jy = cg.independentFunctionList[1].xbar print('Taylor series of Jx =\n', Jx) print('Taylor series of Jy =\n', Jy)