def gradient_and_hessian_of_Phi(x): H = zeros((2,2)) # Hessian g = zeros(2) # gradient V = zeros((2,1)) F = zeros((2,2)) Fdot = zeros((2,2)) D = 1 keep = D+1 for n in range(2): # 1: hos_forward, propagate two directions V[n,0] = 1. (y,W) = adolc.hos_forward(1,x,V,keep) V[n,0] = 0. F[0,:] = y[:2] F[1,:] = y[2:] Fdot[0,:] = W[:2,0] Fdot[1,:] = W[2:,0] # 2: matrix forward cg.forward([Mtc(F,Fdot)]) # 3: matrix reverse Phibar = array([[1.]]) Phibardot = array([[0.]]) cg.reverse([Mtc(Phibar, Phibardot)]) # 4: hov_reverse U = zeros((1,4,2)) U[0,:,0] = cg.independentFunctionList[0].xbar.X.flatten() U[0,:,1] = cg.independentFunctionList[0].xbar.Xdot.flatten() res = adolc.hov_ti_reverse(1,U)[0].copy() g[:] = res[0,:,0] H[n,:] = res[0,:,1] return (g,H)
def gradient_of_E_PHI(v,DM): """ computes the gradient of the expectation of PHI, i.e. grad( E[PHI] ), where E[PHI] is approximated by the method of moments up to moment degree DM. This gradient is then used in the steepest descent optimization below""" # multi indices for the full tensor of order DM-1 to compute d^(DM-1) PHI # the remaining order to compute d^DM PHI is achieved by a reverse sweep later Jq = generate_multi_indices(Nq,DM) NJq = shape(Jq)[0] # since Nq is usually much larger than Np # we want to be able to get one higher order derivative # w.r.t. q by reverse mode # that means we have to evaluate the mixed partial derivatives # d^2 F/dpdq in the forward mode # since UTPS cannot do those partial derivatives straightforward # we need to use interpolation J = zeros((DM+1,1,Nm,Np)) # Taylor series of degree DM has DM+1 taylor coefficients (i.e. count also the zero-derivative) qbar_rays = zeros((NJq,Nq,DM+1)) for nq in range(NJq): # 1: evaluation of J # here, we have to use nested interpolation # we need to compute # d^d F(x + s_1 z_1 + s_2 z_2) | # ---------------------------- | # d z_1 d^(d-1) z_2 |_z=0 # for s_1 = [1,0,0] resp. s_1 = [0,1,0] (one direction for each parameter p) # we use here formula (13) of the paper "Evaluating higher derivative tensors by forward propagation of univariate Taylor series" for np in range(Np): # each column of J for dm in range(DM+1): I = array([1,dm]) K = zeros((dm+1,2),dtype=int) K[:,0] = 1 K[:,1] = list(range(dm+1)) V = zeros((Nv,dm+1)) for k in K: s1 = zeros(Nv) s1[np] = k[0] s2 = zeros(Nv) s2[Np:] = k[1]*Jq[nq,:] V[:,0] = s1+s2 tmp = adolc.hos_forward(1,v,V,0)[1] J[dm,0,:,np] += (-1)**multi_index_abs( I - k) * multi_index_binomial(I,k) * tmp[:,dm] scale_factor = array([1./prod(list(range(1,d+1))) for d in range(DM+1)]) for dm in range(DM+1): J[dm,:,:,:] *= scale_factor[dm] # 2: forward evaluation of Phi Jtc=Mtc(J[:,:,:,:]) cg.forward([Jtc]) # 3: reverse evaluation of Phi Phibar = zeros((DM+1,1,1,1)) Phibar[0,0,0,0]=1. cg.reverse([Mtc(Phibar)]) Jbar = FJ.xbar.TC[:,0,:,:] #print Jbar #print shape(Jbar) ## 4: reverse evaluation of J vbar = zeros(Nv) dJ = zeros((Nq,DM+1)) # taylor coefficients for one direction in q for np in range(Np): # each column of J for dm in range(DM+1): I = array([1,dm]) K = zeros((dm+1,2),dtype=int) K[:,0] = 1 K[:,1] = list(range(dm+1)) V = zeros((Nv,dm+1)) for k in K: s1 = zeros(Nv) s1[np] = k[0] s2 = zeros(Nv) s2[Np:] = k[1]*Jq[nq,:] V[:,0] = s1+s2 keep = dm + 2 adolc.hos_forward(1,v,V,keep)[1] # U is a (Q,M,D) array # Jbar is a (D,M,Np) array U = zeros((1,Nm,keep)) for d in range(dm+1): U[0,:,d] = Jbar[d,:,np] #print 'U=',U Z = adolc.hov_ti_reverse(1,U)[0] #print 'Z=',Z #J[dm,0,:,np] += (-1)**multi_index_abs( I - k) * multi_index_binomial(I,k) * tmp[:,dm] #print shape(Z[0,:,:]) #exit() #print Z[0,:,dm+1] tmp = 1./prod(list(range(1,dm+2))) * (-1)**multi_index_abs( I - k) * multi_index_binomial(I,k) * Z[0,Np:,dm+1] dJ[:,dm] += tmp #vbar += tmp qbar_rays[nq,:,:] = dJ # use interpolation to build all PHI^(dm) 0<= dm <= DM derivative_tensor_list = [] for dm in range(DM+1): derivative_tensor = numpy.zeros([Nq for i in range(dm+1)]) I = generate_multi_indices(Nq,dm) d = sum(I[0,:]) NI = shape(I)[0] NJq = shape(Jq)[0] for ni in range(NI): for nj in range(NJq): derivative_tensor[tuple([0 for i in range(dm+1)])] += gamma(I[ni,:], Jq[nj,:]) * qbar_rays[nj,:,d] derivative_tensor_list.append(derivative_tensor) print(derivative_tensor_list) #print qbar_rays vbar[Np:] = derivative_tensor_list[0] #vbar[Np:] = dJ[:,0] return vbar
def gradient_of_E_PHI(v, DM): """ computes the gradient of the expectation of PHI, i.e. grad( E[PHI] ), where E[PHI] is approximated by the method of moments up to moment degree DM. This gradient is then used in the steepest descent optimization below""" # multi indices for the full tensor of order DM-1 to compute d^(DM-1) PHI # the remaining order to compute d^DM PHI is achieved by a reverse sweep later Jq = generate_multi_indices(Nq, DM) NJq = shape(Jq)[0] # since Nq is usually much larger than Np # we want to be able to get one higher order derivative # w.r.t. q by reverse mode # that means we have to evaluate the mixed partial derivatives # d^2 F/dpdq in the forward mode # since UTPS cannot do those partial derivatives straightforward # we need to use interpolation J = zeros( (DM + 1, 1, Nm, Np) ) # Taylor series of degree DM has DM+1 taylor coefficients (i.e. count also the zero-derivative) qbar_rays = zeros((NJq, Nq, DM + 1)) for nq in range(NJq): # 1: evaluation of J # here, we have to use nested interpolation # we need to compute # d^d F(x + s_1 z_1 + s_2 z_2) | # ---------------------------- | # d z_1 d^(d-1) z_2 |_z=0 # for s_1 = [1,0,0] resp. s_1 = [0,1,0] (one direction for each parameter p) # we use here formula (13) of the paper "Evaluating higher derivative tensors by forward propagation of univariate Taylor series" for np in range(Np): # each column of J for dm in range(DM + 1): I = array([1, dm]) K = zeros((dm + 1, 2), dtype=int) K[:, 0] = 1 K[:, 1] = list(range(dm + 1)) V = zeros((Nv, dm + 1)) for k in K: s1 = zeros(Nv) s1[np] = k[0] s2 = zeros(Nv) s2[Np:] = k[1] * Jq[nq, :] V[:, 0] = s1 + s2 tmp = adolc.hos_forward(1, v, V, 0)[1] J[dm, 0, :, np] += (-1)**multi_index_abs( I - k) * multi_index_binomial(I, k) * tmp[:, dm] scale_factor = array( [1. / prod(list(range(1, d + 1))) for d in range(DM + 1)]) for dm in range(DM + 1): J[dm, :, :, :] *= scale_factor[dm] # 2: forward evaluation of Phi Jtc = Mtc(J[:, :, :, :]) cg.forward([Jtc]) # 3: reverse evaluation of Phi Phibar = zeros((DM + 1, 1, 1, 1)) Phibar[0, 0, 0, 0] = 1. cg.reverse([Mtc(Phibar)]) Jbar = FJ.xbar.TC[:, 0, :, :] #print Jbar #print shape(Jbar) ## 4: reverse evaluation of J vbar = zeros(Nv) dJ = zeros( (Nq, DM + 1)) # taylor coefficients for one direction in q for np in range(Np): # each column of J for dm in range(DM + 1): I = array([1, dm]) K = zeros((dm + 1, 2), dtype=int) K[:, 0] = 1 K[:, 1] = list(range(dm + 1)) V = zeros((Nv, dm + 1)) for k in K: s1 = zeros(Nv) s1[np] = k[0] s2 = zeros(Nv) s2[Np:] = k[1] * Jq[nq, :] V[:, 0] = s1 + s2 keep = dm + 2 adolc.hos_forward(1, v, V, keep)[1] # U is a (Q,M,D) array # Jbar is a (D,M,Np) array U = zeros((1, Nm, keep)) for d in range(dm + 1): U[0, :, d] = Jbar[d, :, np] #print 'U=',U Z = adolc.hov_ti_reverse(1, U)[0] #print 'Z=',Z #J[dm,0,:,np] += (-1)**multi_index_abs( I - k) * multi_index_binomial(I,k) * tmp[:,dm] #print shape(Z[0,:,:]) #exit() #print Z[0,:,dm+1] tmp = 1. / prod(list(range(1, dm + 2))) * ( -1)**multi_index_abs(I - k) * multi_index_binomial( I, k) * Z[0, Np:, dm + 1] dJ[:, dm] += tmp #vbar += tmp qbar_rays[nq, :, :] = dJ # use interpolation to build all PHI^(dm) 0<= dm <= DM derivative_tensor_list = [] for dm in range(DM + 1): derivative_tensor = numpy.zeros([Nq for i in range(dm + 1)]) I = generate_multi_indices(Nq, dm) d = sum(I[0, :]) NI = shape(I)[0] NJq = shape(Jq)[0] for ni in range(NI): for nj in range(NJq): derivative_tensor[tuple([ 0 for i in range(dm + 1) ])] += gamma(I[ni, :], Jq[nj, :]) * qbar_rays[nj, :, d] derivative_tensor_list.append(derivative_tensor) print(derivative_tensor_list) #print qbar_rays vbar[Np:] = derivative_tensor_list[0] #vbar[Np:] = dJ[:,0] return vbar
# time ALGOPY hov_reverse ybar = UTPM(numpy.random.rand(D, P)) start_time = time.time() for rep in range(reps): cg.pullback([ybar]) end_time = time.time() time_hov_reverse_algopy = end_time - start_time # time PYADOLC hov_reverse W = numpy.random.rand(1, 1, D) start_time = time.time() V = numpy.random.rand(N, P, D - 1) for rep in range(reps): for p in range(P): adolc.hos_forward(1, x, V[:, p, :], keep=D) adolc.hov_ti_reverse(1, W) end_time = time.time() time_hov_reverse_adolc = end_time - start_time # time PYADOLC.cgraph hov_reverse W = numpy.random.rand(1, 1, P, D) for rep in range(reps): ap.reverse([W]) end_time = time.time() time_hov_reverse_cgraph = end_time - start_time print "----------------" print time_trace_algopy print time_trace_adolc print time_trace_cgraph print "----------------"
# time ALGOPY hov_reverse ybar = UTPM(numpy.random.rand(D, P)) start_time = time.time() for rep in range(reps): cg.pullback([ybar]) end_time = time.time() time_hov_reverse_algopy = end_time - start_time # time PYADOLC hov_reverse W = numpy.random.rand(1, 1, D) start_time = time.time() V = numpy.random.rand(N, P, D - 1) for rep in range(reps): for p in range(P): adolc.hos_forward(1, x, V[:, p, :], keep=D) adolc.hov_ti_reverse(1, W) end_time = time.time() time_hov_reverse_adolc = end_time - start_time # time PYADOLC.cgraph hov_reverse W = numpy.random.rand(1, 1, P, D) for rep in range(reps): ap.reverse([W]) end_time = time.time() time_hov_reverse_cgraph = end_time - start_time print('----------------') print(time_trace_algopy) print(time_trace_adolc) print(time_trace_cgraph) print('----------------')