def predict_u(self,X_star): X_u = self.X_u y_u = self.y_u X_f = self.X_f y_f = self.y_f y = np.vstack((y_u, y_f)) L = self.L theta = self.hyp[:-1] K_uu = self.k_uu(X_star, X_u[0:1,:], theta) K_uu1 = self.k_uu1(X_star, X_u[1:2,:], theta) K_uu2 = self.k_uu2(X_star, X_u[2:3,:], theta) K_uu3 = self.k_uu3(X_star, X_u[3:4,:], theta) K_uf = self.k_uf(X_star, X_f, theta) psi = np.hstack((K_uu, K_uu1, K_uu2, K_uu3, K_uf)) alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L,y)) pred_u_star = np.matmul(psi,alpha) beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L,psi.T)) var_u_star = self.k_uu(X_star, X_star, theta) - np.matmul(psi,beta) return pred_u_star, var_u_star
def calculate_Fi_ci_si(self): ''' Calculate simple calculation of Fi, ci, si: does not include any CPV effects, not time dependece ''' bin_num = self.binning.get_number_of_bins() Fi = np.array([]) ci = np.array([]) si = np.array([]) A_mag = abs(self.amplitude.get_A( 0)) # Just make simple calculation in this class A_ph = np.angle(self.amplitude.get_A(0)) A_mag_inv = np.transpose(A_mag) A_ph_inv = np.transpose(A_ph) avg_eff_over_phsp = self.efficiency.get_time_averaged_eff() for i in range(-bin_num, bin_num + 1): if i == 0: continue bin_idx = self.binning.get_bin_indices(i) inv_bin_idx = self.binning.get_bin_indices(-i) avg_eff = avg_eff_over_phsp[bin_idx] Fi = np.append(Fi, np.sum(avg_eff * A_mag[bin_idx]**2)) ci = np.append( ci, np.sum(avg_eff * A_mag[bin_idx] * A_mag_inv[bin_idx] * np.cos(A_ph[bin_idx] - A_ph_inv[bin_idx]))) si = np.append( si, np.sum(avg_eff * A_mag[bin_idx] * A_mag_inv[bin_idx] * np.sin(A_ph[bin_idx] - A_ph_inv[bin_idx]))) Fi_inv = np.flip(Fi, 0) ci = ci / np.sqrt(Fi * np.flip(Fi, 0)) si = si / np.sqrt(Fi * np.flip(Fi, 0)) Fi = Fi / sum(Fi) return Fi, ci, si
def predict_f(self,X_star): X_u = self.X_u y_u = self.y_u X_f = self.X_f y_f = self.y_f y = np.vstack((y_u, y_f)) L = self.L theta = self.hyp[:-1] K_uf = self.k_uf(X_u[0:1,:], X_star, theta) K_u1f = self.k_u1f(X_u[1:2,:], X_star, theta) K_u2f = self.k_u2f(X_u[2:3,:], X_star, theta) K_u3f = self.k_u3f(X_u[3:4,:], X_star, theta) K_ff = self.k_ff(X_star, X_f, theta) psi = np.hstack((K_uf.T, K_u1f.T, K_u2f.T, K_u3f.T, K_ff)) alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L,y)) pred_u_star = np.matmul(psi,alpha) beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L,psi.T)) var_u_star = self.k_ff(X_star, X_star, theta) - np.matmul(psi,beta) return pred_u_star, var_u_star
def likelihood(self, hyp): X_L = self.X_L y_L = self.y_L X_H = self.X_H y_H = self.y_H y = np.vstack((y_L,y_H)) NL = y_L.shape[0] NH = y_H.shape[0] N = y.shape[0] rho = hyp[-3] logsigma_n_L = hyp[-2] logsigma_n_H = hyp[-1] sigma_n_L = np.exp(logsigma_n_L) sigma_n_H = np.exp(logsigma_n_H) theta_L = hyp[self.idx_theta_L] theta_H = hyp[self.idx_theta_H] K_LL = self.kernel(X_L, X_L, theta_L) + np.eye(NL)*sigma_n_L K_LH = rho*self.kernel(X_L, X_H, theta_L) K_HH = rho**2 * self.kernel(X_H, X_H, theta_L) + \ self.kernel(X_H, X_H, theta_H) + np.eye(NH)*sigma_n_H K = np.vstack((np.hstack((K_LL,K_LH)), np.hstack((K_LH.T,K_HH)))) L = np.linalg.cholesky(K + np.eye(N)*self.jitter) self.L = L alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L,y)) NLML = 0.5*np.matmul(np.transpose(y),alpha) + \ np.sum(np.log(np.diag(L))) + 0.5*np.log(2.*np.pi)*N return NLML[0,0]
def partial_derivatives(x, y, W, V, b, c): # Filling in some dummy values # THIS IS WHERE YOU WILL WRITE YOUR PARTIAL DERIVATIVES s = b + W @ x h = np.tanh(s) f = c + V @ h eHat = np.zeros(c.shape) eHat[y] = 1 # dLdf: -e + g(f(x)) dLdf = -eHat + np.exp(f) / np.sum(np.exp(f)) # dLdc = dL/df * df/dc dLdc = -eHat + (np.exp(f) / np.sum(np.exp(f))) # dLdV = dL/df * htranspose h_transpose = np.transpose(h) dLdV = dLdf * h_transpose # dLdb = sig'(b + Wx) elementwise mult (Vtranspose * dLdf) sigp = lambda x : 1 - np.tanh(x)**2 V_transpose = np.transpose(V) dLh = V_transpose @ dLdf dLdb = sigp(s) * dLh # dLdW = dL/df * df/dW x_transpose = np.transpose(x) dLdW = dLdb * x_transpose return dLdW, dLdV, dLdb, dLdc
def log_py_zM_bin_j(lambda_bin_j, y_bin_j, zM, k, nj_bin_j): ''' Compute log p(y_j | zM, s1 = k1) of the jth lambda_bin_j ( (r + 1) 1darray): Coefficients of the binomial distributions in the GLLVM layer y_bin_j (numobs 1darray): The subset containing only the binary/count variables in the dataset zM (M x r x k ndarray): M Monte Carlo copies of z for each component k1 of the mixture k (int): The number of components of the mixture nj_bin_j (int): The number of possible values/maximum values of the jth binary/count variable -------------------------------------------------------------- returns (ndarray): p(y_j | zM, s1 = k1) ''' M = zM.shape[0] r = zM.shape[1] numobs = len(y_bin_j) yg = np.repeat(y_bin_j[np.newaxis], axis = 0, repeats = M) yg = yg.astype(np.float) nj_bin_j = np.float(nj_bin_j) coeff_binom = binom(nj_bin_j, yg).reshape(M, 1, numobs) eta = np.transpose(zM, (0, 2, 1)) @ lambda_bin_j[1:].reshape(1, r, 1) eta = eta + lambda_bin_j[0].reshape(1, 1, 1) # Add the constant den = nj_bin_j * log_1plusexp(eta) num = eta @ y_bin_j[np.newaxis, np.newaxis] log_p_y_z = num - den + np.log(coeff_binom) return np.transpose(log_p_y_z, (0, 2, 1)).astype(np.float)
def vjp(g): vjps = [] q_vjp = solve_sylvester(anp.transpose(a), anp.transpose(b), g) if 0 in argnums: vjps.append(-anp.dot(q_vjp, anp.transpose(ans))) if 1 in argnums: vjps.append(-anp.dot(anp.transpose(ans), q_vjp)) if 2 in argnums: vjps.append(q_vjp) return tuple(vjps)
def predict(self,X_star): X_L = self.X_L y_L = self.y_L X_H = self.X_H y_H = self.y_H L = self.L y = np.vstack((y_L,y_H)) rho = self.hyp[-3] theta_L = self.hyp[self.idx_theta_L] theta_H = self.hyp[self.idx_theta_H] psi1 = rho*self.kernel(X_star, X_L, theta_L) psi2 = rho**2 * self.kernel(X_star, X_H, theta_L) + \ self.kernel(X_star, X_H, theta_H) psi = np.hstack((psi1,psi2)) alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L,y)) pred_u_star = np.matmul(psi,alpha) beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L,psi.T)) var_u_star = rho**2 * self.kernel(X_star, X_star, theta_L) + \ self.kernel(X_star, X_star, theta_H) - np.matmul(psi,beta) return pred_u_star, var_u_star
def predict_u(self,X_star): X_u = self.X_u y_u = self.y_u X_f = self.X_f y_f = self.y_f y = np.vstack((y_u, y_f)) L = self.L theta = self.hyp[:-1] K_uu = self.k_uu(X_star, X_u, theta) K_uf = self.k_uf(X_star, X_f, theta) psi = np.hstack((K_uu, K_uf)) alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L,y)) pred_u_star = np.matmul(psi,alpha) beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L,psi.T)) var_u_star = self.k_uu(X_star, X_star, theta) - np.matmul(psi,beta) if isinstance(pred_u_star, np.ndarray) == False: pred_u_star = pred_u_star._value if isinstance(var_u_star, np.ndarray) == False: var_u_star = var_u_star._value return pred_u_star, var_u_star
def likelihood(self, hyp): X_u = self.X_u y_u = self.y_u X_f = self.X_f y_f = self.y_f y = np.vstack((y_u, y_f)) N = y.shape[0] N_f = y_f.shape[0] theta = hyp[:-1] sigma_n = np.exp(hyp[-1]) K_uu = self.k_uu(X_u, X_u, theta) K_uf = self.k_uf(X_u, X_f, theta) K_ff = self.k_ff(X_f, X_f, theta) + np.eye(N_f)*sigma_n K = np.vstack((np.hstack((K_uu, K_uf)), np.hstack((K_uf.T, K_ff)))) L = np.linalg.cholesky(K + np.eye(N)*self.jitter) self.L = L alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L,y)) NLML = 0.5*np.matmul(np.transpose(y),alpha) + \ np.sum(np.log(np.diag(L))) + 0.5*np.log(2.*np.pi)*N return NLML[0,0]
def partial_derivatives(x, y, W, V, b, c): # Filling in some dummy values # THIS IS WHERE YOU WILL WRITE YOUR PARTIAL DERIVATIVES s = b + W @ x h = np.tanh(s) f = c + np.matmul(V, h) # dLdf: -e + g(f(x)) dLdf = -f * y + np.exp(f) / np.sum(np.exp(f)) # dLdc = dL/df * df/dc dLdc = dLdf * np.exp(f) / np.sum(np.exp(c)) # dLdV = dL/df * htranspose h_transpose = np.transpose(h) dLdV = dLdf * h_transpose # dLdb = sig'(b + Wx) @ (Vtranspose * dLdf) sigp = lambda x: 1 - np.tanh(x)**2 V_transpose = np.transpose(V) dLh = np.matmul(V_transpose, dLdf) dLdb = sigp(s) * dLh # dLdW = dL/df * df/dW x_transpose = np.transpose(x) sigp_of_s = sigp(s) dLdW = sigp_of_s * np.multiply(dLh, x_transpose) return dLdW, dLdV, dLdb, dLdc
def predict(self, X_star): # Normalize data X_star = (X_star - self.Xmean) / self.Xstd X = self.X y = self.y L = self.L theta = self.hyp[:-1] psi = self.kernel(X_star, X, theta) alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L, y)) pred_u_star = np.matmul(psi, alpha) beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L, psi.T)) var_u_star = self.kernel(X_star, X_star, theta) - np.matmul(psi, beta) # De-normalize pred_u_star = pred_u_star * self.Ystd + self.Ymean var_u_star = var_u_star * self.Ystd**2 if isinstance(pred_u_star, np.ndarray) == False: pred_u_star = pred_u_star._value if isinstance(var_u_star, np.ndarray) == False: var_u_star = var_u_star._value return pred_u_star, var_u_star
def Local2Global_Coord(rot_mat, trans_vector, points_in_local): ''' function Local2Global_Coord(rot_mat, trans_vector, points_in_local) - Takes "rotation matrix", whereby the columns form an orthonormal basis. The "rotation matrix" should describe the axes of the new coordinate system in terms of the global coordinate system. The matrix should be 3x3 and be invertible. [ e_1 e_2 e_3 ] - Takes translation vector of size 3, which describes translation from global origin to the new local origin (global origin ----> local origin). - Takes points defined in the local coordinate frame. - Returns positions (which were originally defined in the local coordinate frame) in the global coordinate frame. ''' if rot_mat.shape[0] != rot_mat.shape[1]: raise ValueError('Rotation Matrix should be square') elif trans_vector.shape != (3, ) and trans_vector.shape != (1, 3): raise ValueError( 'Translation Matrix should be an array of size 3 or 1x3 matrix') rotated_points = np.transpose( np.matmul(rot_mat, np.transpose(points_in_local))) points_in_global = rotated_points + trans_vector return points_in_global
def debug_scp_iteration_plot( tx_next, u_next, xbar, ubar, x0, T, i_iter): unl = u_next x_curr = x0 Xnl = [] Vnl_nlx = [] Vnl_lx = [] tV_nlx = [] tV_lx = [] for k,t in enumerate(T): x_next = x_curr + dynamics.get_dxdt( x_curr, unl[:,k], t) * param.get('dt') R_k, w_k = dynamics.get_linear_lyapunov( xbar[:,k], ubar[:,k], t) Vnl_nlx.append( dynamics.get_V( x_curr, t)) Vnl_lx.append( dynamics.get_V( tx_next[:,k],t)) tV_nlx.append( np.matmul( R_k, x_curr) + w_k ) tV_lx.append( np.matmul( R_k, tx_next[:,k]) + w_k) Xnl.append( x_curr) x_curr = x_next Xnl = np.asarray(Xnl) Vnl_nlx = np.asarray(Vnl_nlx) Vnl_lx = np.asarray(Vnl_lx) tV_nlx = np.asarray(tV_nlx) tV_lx = np.asarray(tV_lx) plot_scp_iteration_state( Xnl, np.transpose(tx_next,(1,0,2)), \ np.transpose(xbar,(1,0,2)), T, title = str(param.get('controller')) + ' State' + \ '\nIteration: ' + str(i_iter) + '\nTime: ' + str(T[0])) plot_scp_iteration_lyapunov( np.squeeze(Vnl_nlx), np.squeeze(Vnl_lx), np.squeeze( tV_nlx), \ np.squeeze( tV_lx), T, title = str(param.get('controller')) + ' Lyapunov' + \ '\nIteration: ' + str(i_iter) + '\nTime: ' + str(T[0]))
def log_py_zM_categ_j(lambda_categ_j, y_categ_j, zM, k, nj_categ_j): ''' Compute log p(y_j | zM, s1 = k1) of each categorical variable lambda_categ_j (nj_categ x (r + 1) ndarray): Coefficients of the categorical distributions in the GLLVM layer y_categ_j (numobs 1darray): The jth categorical variable in the dataset zM (M x r x k ndarray): M Monte Carlo copies of z for each component k1 of the mixture k (int): The number of components of the mixture nj_categ_j (int): The number of possible values values of the jth categorical variable -------------------------------------------------------------- returns (ndarray): The p(y_j | zM, s1 = k1) for the jth categorical variable ''' epsilon = 1E-10 r = zM.shape[1] nj = y_categ_j.shape[1] zM_broad = np.expand_dims(np.expand_dims(np.transpose(zM, (0, 2, 1)), 2), 3) lambda_categ_j_ = lambda_categ_j.reshape(nj, r + 1, order = 'C') eta = zM_broad @ lambda_categ_j_[:, 1:][n_axis, n_axis, ..., n_axis] # Check que l'on fait r et pas k ? eta = eta + lambda_categ_j_[:,0].reshape(1, 1, nj_categ_j, 1, 1) # Add the constant pi = softmax_(eta.astype(np.float), axis = 2) # Numeric stability pi = np.where(pi <= 0, epsilon, pi) pi = np.where(pi >= 1, 1 - epsilon, pi) yg = np.expand_dims(np.expand_dims(y_categ_j, 1), 1)[..., np.newaxis, np.newaxis] log_p_y_z = yg * np.log(pi[n_axis]) # Reshaping output log_p_y_z = log_p_y_z.sum((3)) # Suming over the modalities nj log_p_y_z = log_p_y_z[:,:,:,0,0] # Deleting useless axes return np.transpose(log_p_y_z,(1,0, 2))
def batched_dot(a, b): if len(a.shape) != 3 or len(b.shape) != 3 or a.shape[0] != b.shape[0]: raise ValueError( "a,b must be 3-dimensional arrays, with a.shape[0]==b.shape[0] and a.shape[2]==b.shape[1]" ) elif a.shape[0] == 1: ## use numpy.dot for blas a = np.reshape(a, a.shape[1:]) b = np.reshape(b, b.shape[1:]) c = np.dot(a, b) return np.reshape(c, [1] + list(c.shape)) elif a.shape[2] == 1: ## the main cost is simply allocating space for the array, ## so we are better off doing things in serial a = np.reshape(a, a.shape[:-1]) b = np.reshape(b, (b.shape[0], b.shape[2])) if a.shape[-1] > 1 and b.shape[-1] > 1: ## batch outer product return np.einsum("ij,ik->ijk", a, b) else: ## broadcasted elementary-wise multiplication outshape = (a.shape[0], a.shape[1], b.shape[1]) a = np.transpose(a) b = np.transpose(b) if a.shape[0] == 1: a = np.reshape(a, [-1]) if b.shape[0] == 1: b = np.reshape(b, [-1]) return np.transpose(np.reshape(a * b, outshape[::-1])) else: ## parallel batched matrix multiply return _par_matmul(a, b)
def _grad_L2loss(self, beta, reg_lambda, X, y): n_samples = np.float(X.shape[0]) z = np.dot(X, beta) #grad_beta = 1. / n_samples * np.transpose(np.dot(np.transpose(z - y), X)) grad_beta = np.transpose(np.dot(np.transpose(z - y), X)) print('grad_beta 0,1', grad_beta[0:2]) return grad_beta
def draw_posterior_samples(self, X_star, N_samples=1): # Normalize data X_star = (X_star - self.Xmean) / self.Xstd X = self.X y = self.y L = self.L theta = self.hyp[:-1] psi = self.kernel(X_star, X, theta) alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L, y)) pred_u_star = np.matmul(psi, alpha) beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L, psi.T)) var_u_star = self.kernel(X_star, X_star, theta) - np.matmul(psi, beta) samples = np.random.multivariate_normal(pred_u_star.flatten(), var_u_star, N_samples).T # De-normalize samples = samples * self.Ystd + self.Ymean return samples
def ExpectedImprovement(self, X_star): # Normalize data X_star = (X_star - self.Xmean) / self.Xstd X = self.X y = self.y L = self.L theta = self.hyp[:-1] psi = self.kernel(X_star, X, theta) alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L, y)) pred_u_star = np.matmul(psi, alpha) beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L, psi.T)) var_u_star = self.kernel(X_star, X_star, theta) - np.matmul(psi, beta) var_u_star = np.abs(np.diag(var_u_star))[:, None] # Expected Improvement best = np.min(y) Z = (best - pred_u_star) / var_u_star EI_acq = (best - pred_u_star) * norm.cdf(Z) + var_u_star * norm.pdf(Z) return EI_acq
def predict_H(self, X_star): # Normalize data X_star = (X_star - self.Xmean) / self.Xstd X_L = self.X_L y_L = self.y_L X_H = self.X_H y_H = self.y_H L = self.L y = np.vstack((y_L, y_H)) rho = self.hyp[-3] theta_L = self.hyp[self.idx_theta_L] theta_H = self.hyp[self.idx_theta_H] psi1 = rho * self.kernel(X_star, X_L, theta_L) psi2 = rho**2 * self.kernel(X_star, X_H, theta_L) + \ self.kernel(X_star, X_H, theta_H) psi = np.hstack((psi1, psi2)) alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L, y)) pred_u_star = np.matmul(psi, alpha) beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L, psi.T)) var_u_star = rho**2 * self.kernel(X_star, X_star, theta_L) + \ self.kernel(X_star, X_star, theta_H) - np.matmul(psi,beta) # De-normalize pred_u_star = pred_u_star * self.Ystd + self.Ymean var_u_star = var_u_star * self.Ystd**2 return pred_u_star, var_u_star
def nn_predict_GCN(params, x): # x: NSAMPLES x NFEATURES U = hyper['U'] xf = np.matmul(x, U) xf = np.expand_dims(xf, 1) # NSAMPLES x 1 x NFEATURES xf = np.transpose(xf) # NFEATURES x 1 x NSAMPLES # Filter yf = np.matmul(params['W1'], xf) # for each feature yf = np.transpose(yf) # NSAMPLES x NFILTERS x NFEATURES yf = np.reshape(yf, [-1, hyper['NFEATURES']]) # Transform back to graph domain Ut = np.transpose(U) y = np.matmul(yf, Ut) y = np.reshape(y, [-1, hyper['F'], hyper['NFEATURES']]) y += params['b1'] # NSAMPLES x NFILTERS x NFEATURES # nonlinear layer y = ReLU(y) # y = np.tanh(y) # dense layer y = np.reshape(y, [-1, hyper['F']*hyper['NFEATURES']]) y = np.matmul(y, params['W2']) + params['b2'] outputs = y return outputs - logsumexp(outputs, axis=1, keepdims=True)
def predict(self, X_star): hyp = self.hyp theta_L = hyp[self.idx_theta_L] theta_H = hyp[self.idx_theta_H] rho = np.exp(hyp[-3]) mean_L = theta_L[0] mean_H = rho * mean_L + theta_H[0] X_L = self.X_L y_L = self.y_L - mean_L X_H = self.X_H y_H = self.y_H - mean_H L = self.L y = np.vstack((y_L, y_H)) psi1 = rho * self.kernel(X_star, X_L, theta_L) psi2 = rho ** 2 * self.kernel(X_star, X_H, theta_L) + \ self.kernel(X_star, X_H, theta_H) psi = np.hstack((psi1, psi2)) alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L, y)) pred_u_star = mean_H + np.matmul(psi, alpha) beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L, psi.T)) var_u_star = rho ** 2 * self.kernel(X_star, X_star, theta_L) + \ self.kernel(X_star, X_star, theta_H) - np.matmul(psi, beta) return pred_u_star, var_u_star
def forward(self, X1, X2): """ Actual computation of the matrix of squared distances (see details above) :param X1: input data of size (n1,d) :param X2: input data of size (n2,d) :param inverse_bandwidths_internal: self.inverse_bandwidths_internal """ # In case inverse_bandwidths if of size (1, dimension), dimension>1, # ARD is handled by broadcasting inverse_bandwidths = anp.reshape(self._inverse_bandwidths(), (1, -1)) if X2 is X1: X1_scaled = anp.multiply(X1, inverse_bandwidths) D = -2.0 * anp.dot(X1_scaled, anp.transpose(X1_scaled)) X1_squared_norm = anp.sum(anp.square(X1_scaled), axis=1) D = D + anp.reshape(X1_squared_norm, (1, -1)) D = D + anp.reshape(X1_squared_norm, (-1, 1)) else: X1_scaled = anp.multiply(X1, inverse_bandwidths) X2_scaled = anp.multiply(X2, inverse_bandwidths) X1_squared_norm = anp.sum(anp.square(X1_scaled), axis=1) X2_squared_norm = anp.sum(anp.square(X2_scaled), axis=1) D = -2.0 * anp.matmul(X1_scaled, anp.transpose(X2_scaled)) D = D + anp.reshape(X1_squared_norm, (-1, 1)) D = D + anp.reshape(X2_squared_norm, (1, -1)) return anp.abs(D)
def ExpectedImprovement(self, X_star): # Normalize data X_star = (X_star - self.Xmean) / self.Xstd X = self.X y = self.y L = self.L theta = self.hyp[:-1] psi = self.kernel(X_star, X, theta) alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L, y)) pred_u_star = np.matmul(psi, alpha) beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L, psi.T)) var_u_star = self.kernel(X_star, X_star, theta) - np.matmul(psi, beta) var_u_star = np.abs(np.diag(var_u_star))[:, None] # Expected Improvement # from https://people.orie.cornell.edu/pfrazier/Presentations/2011.11.INFORMS.Tutorial.pdf best = np.min(y) delta = -(pred_u_star - best) deltap = -(pred_u_star - best) deltap[delta < 0] = 0 Z = delta / np.sqrt(var_u_star) EI_acq = deltap - np.abs(deltap) * norm.cdf(-Z) + np.sqrt( var_u_star) * norm.pdf(Z) if isinstance(EI_acq, np.ndarray) == False: EI_acq = EI_acq._value return EI_acq
def estimateC_weighted(W, R, B, D, lam): ''' :param W : the heatmap :param R : the rotation matrix :param B : the base matrix :param D : the weight :param lam : lam value used to simplify some results :return : C0 ''' p = len(W[0]) k = int(B.shape[0] / 3) d = np.diag(D) D = np.zeros((2 * p, 2 * p)) eps = sys.float_info.epsilon for i in range(p): D[2 * i, 2 * i] = d[i] D[2 * i + 1, 2 * i + 1] = d[i] # next we work on the linear system y = X*C y = W.flatten() # vectorized W X = np.zeros((2 * p, k)) # each colomn is a rotated Bk for i in range(k): RBi = np.dot(R, B[3 * i:3 * (i + 1), :]) X[:, i] = RBi.flatten() # we want to calculate C = pinv(X'*D*X+lam*eye(size(X,2)))*X'*D*y and then C = C' A = np.dot(np.dot(np.transpose(X), D), X) + lam * np.eye(X.shape[1]) tol = max(A.shape) * np.linalg.norm(A, np.inf) * eps C = np.dot(np.dot(np.linalg.pinv(A), np.dot(np.transpose(X), D)), y) return np.transpose(C)
def avg_pred_log(w,images): log_pc_x = 0 for i in range(0,images.shape[0]): current_log_pc_x = np.dot(np.transpose(w),images[i,:]) - logsumexp(np.dot(np.transpose(w),images[i,:])) log_pc_x = log_pc_x + current_log_pc_x return np.sum(log_pc_x)/float(images.shape[0])
def grad_power_noise(x): """ Compute the gradient of the power criterion with respect to the width of Gaussian RBF kernel and the noise vector. Args: x: 1 + 2J*d_n vector Returns: the gradient of the power criterion with respect to kernel width/latent vector """ with util.ContextTimer() as t: width, z = unflatten(x) zp = z[:J] zq = z[J:] # Compute the Jacobian of the generators with respect to noise vector torch_zp = to_torch_variable(zp, shape=(-1, zp.shape[1], 1, 1), requires_grad=True) torch_zq = to_torch_variable(zq, shape=(-1, zq.shape[1], 1, 1), requires_grad=True) gp_grad = compute_jacobian(torch_zp, gen_p(torch_zp).view(J, -1)) # J x d_pix x d_noise x 1 x 1 gq_grad = compute_jacobian(torch_zq, gen_q(torch_zq).view(J, -1)) # J x d_pix x d_noise x 1 x 1 v_grad_z = np.vstack([gp_grad, gq_grad]) v_grad_z = np.squeeze(v_grad_z, [3, 4]) # 2J x d_pix x d_noise # Compute the Jacobian of the feature extractor with respect to noise vector vp_flatten = to_torch_variable( gen_p(torch_zp).view(J, -1).cpu().data.numpy(), shape=(J, 3, image_size, image_size), requires_grad=True ) vq_flatten = to_torch_variable( gen_q(torch_zq).view(J, -1).cpu().data.numpy(), shape=(J, 3, image_size, image_size), requires_grad=True ) size = (model_input_size, model_input_size) upsample = nn.Upsample(size=size, mode='bilinear') fp = model(upsample(vp_flatten)) fq = model(upsample(vq_flatten)) fp_grad = compute_jacobian(vp_flatten, fp.view(J, -1)) # J x d_nn x C x H x W fq_grad = compute_jacobian(vq_flatten, fq.view(J, -1)) # J x d_nn x C x H x W f_grad_v = np.vstack([fp_grad, fq_grad]) f_grad_v = f_grad_v.reshape((2*J, f_grad_v.shape[1], -1)) # 2J x d_nn x d_pix # Compute the gradient of the objective function with respect to # the gaussian width and test locations F = np.vstack([fp.cpu().data.numpy(), fq.cpu().data.numpy()]) F = np.reshape(F, (2*J, -1)) grad_obj = autograd.elementwise_grad(flat_obj_feat) # 1+(2J)*d_nn input obj_grad_f = grad_obj(flatten(width, F)) obj_grad_width = obj_grad_f[0] obj_grad_f = np.reshape(obj_grad_f[1:], [(2*J), -1]) # 2J x d_nn array obj_grad_v = inner1d(obj_grad_f, np.transpose(f_grad_v, (2, 0, 1))) # 2J x d_pix obj_grad_z = inner1d(obj_grad_v.T, np.transpose(v_grad_z, (2, 0, 1))).flatten() return np.concatenate([obj_grad_width.reshape([1]), obj_grad_z])
def Predict(self, X): kstar = self.covariance(X, self.X, self.scales) predictive_mean = np.matmul(np.transpose(kstar), self.alpha) v = solve_triangular(self.cholesky, kstar, lower=True) predictive_variance = self.covariance(X, X, self.scales) - np.matmul( np.transpose(v), v) return predictive_mean.reshape( -1, 1), np.diag(predictive_variance).reshape(-1, 1, 1)
def get_direction(self, x): self.jacob_func = jacobian(self.evaluate) self.jacobian = self.jacob_func(x) self.fx = self.evaluate(x) self.gradient = 2*np.transpose(self.jacobian) @ self.fx self.hessian = np.transpose(self.jacobian) @ self.jacobian return np.zeros(4)
def get_S(self): arg1 = (self.deltaK @ np.transpose(self.deltaK)) / ( np.transpose(self.deltaK) @ self.gamma) arg2 = (self.S[self.k] @ self.gamma) @ ( self.gamma.T @ self.S[self.k]) / ( self.gamma.T @ self.S[self.k] @ self.gamma) self.S[self.k + 1] = self.S[self.k] + arg1 - arg2 return self.S[self.k + 1]
def compute_stats(Ex, ExxT, ExnxT, inhomog): T = Ex.shape[-1] E_init_stats = ExxT[:,:,0], Ex[:,0], 1., 1. E_pair_stats = np.transpose(ExxT, (2, 0, 1))[:-1], \ ExnxT.T, np.transpose(ExxT, (2, 0, 1))[1:], np.ones(T-1) E_node_stats = np.diagonal(ExxT.T, axis1=-1, axis2=-2), Ex.T, np.ones(T) if not inhomog: E_pair_stats = map(lambda x: np.sum(x, axis=0), E_pair_stats) return E_init_stats, E_pair_stats, E_node_stats
def vjp(g): result = convolve(g, Y[flipped_idxs(Y.ndim, axes[_Y_]['conv'])], axes = [axes['out']['conv'], axes[_Y_]['conv']], dot_axes = [axes['out'][ignore_Y], axes[_Y_]['ignore']], mode = new_mode) new_order = npo.argsort(axes[_X_]['ignore'] + axes[_X_]['dot'] + axes[_X_]['conv']) return np.transpose(result, new_order)
def get_marginal(self, u, V, R, x_test): ''' current metric to test convergence-- log space predictive marginal likelihood ''' I = self.sigx*np.identity(self.dimx) mu = np.zeros(self.dimx,) n_samples = 200 ll = 0 test_size = x_test.shape[0] for i in xrange(test_size): x = x_test[i] mc = 0 for j in xrange(n_samples): w = self.sample_w(u, V) var = np.dot(w, np.transpose(w)) var = np.add(var, I) px = gaussian.Gaussian_full(mu, var) px = px.eval(x)#eval_log_properly(x) mc = mc + px mc = mc/float(n_samples) mc = np.log(mc) ll += mc return (ll/float(test_size))
def grad_convolve(argnum, g, ans, vs, gvs, A, B, axes=None, dot_axes=[(),()], mode='full'): assert mode in ['valid', 'full'], "Grad for mode {0} not yet implemented".format(mode) axes, shapes = parse_axes(A.shape, B.shape, axes, dot_axes, mode) if argnum == 0: X, Y = A, B _X_, _Y_ = 'A', 'B' ignore_Y = 'ignore_B' elif argnum == 1: X, Y = B, A _X_, _Y_ = 'B', 'A' ignore_Y = 'ignore_A' else: raise NotImplementedError("Can't take grad of convolve w.r.t. arg {0}".format(argnum)) if mode == 'full': new_mode = 'valid' else: if any([x_size > y_size for x_size, y_size in zip(shapes[_X_]['conv'], shapes[_Y_]['conv'])]): new_mode = 'full' else: new_mode = 'valid' result = convolve(g, Y[flipped_idxs(Y.ndim, axes[_Y_]['conv'])], axes = [axes['out']['conv'], axes[_Y_]['conv']], dot_axes = [axes['out'][ignore_Y], axes[_Y_]['ignore']], mode = new_mode) new_order = npo.argsort(axes[_X_]['ignore'] + axes[_X_]['dot'] + axes[_X_]['conv']) return np.transpose(result, new_order)
def eval_log_properly(self, x): det = np.linalg.det(self.Sigma) const = (self.size/2.0)*np.log(2*np.pi) const = -0.5*np.log(det) - const prec = np.linalg.inv(self.Sigma) t = np.subtract(x, self.Mu) v = np.dot(np.transpose(t), prec) v = -0.5*np.dot(v, t) return const + v
def generate_data(beta,tau,n,num_times): num_features = len(beta)-1 X = np.random.uniform(-2,2,(n,num_times,num_features)) alpha = np.random.normal(0,tau,n) alpha = np.reshape(np.tile(alpha,num_times),(num_times,n)) alpha = np.transpose(alpha) P = logistic(beta[0]+np.dot(X,beta[1:]))#+alpha) y = np.random.binomial(1,P) return X,y
def eval(self,x): #x and mu must have same dimensions det = np.linalg.det(self.Sigma)**(-0.5) const = (2*np.pi)**(-self.size/2.0) const = det*const prec = np.linalg.inv(self.Sigma) t = np.subtract(x, self.Mu) v = np.dot(np.transpose(t), prec) v = np.exp(-0.5*np.dot(v, t)) return const*v
def KL_two_gaussians(params): d = np.shape(params)[0]-1 mu = params[0:d,0] toSigma = params[0:d,1:d+1] intSigma = toSigma-np.diag(np.diag(toSigma))+np.diag(np.exp(np.diag(toSigma))) Sigma = intSigma-np.tril(intSigma)+np.transpose(np.triu(intSigma)) muPrior = np.zeros(d) sigmaPrior = np.identity(d) #print Sigma #print np.linalg.det(Sigma) return 1/2*(np.log(np.linalg.det(Sigma)/np.linalg.det(sigmaPrior))-d+np.trace(np.dot(np.linalg.inv(Sigma),sigmaPrior))+np.dot(np.transpose(mu-muPrior),np.dot(np.linalg.inv(Sigma),mu-muPrior)))
def _init_params(self, data, lengths=None, params='stmp'): X = data['obs'] if 's' in params: self.startprob_.fill(1.0 / self.n_components) if 't' in params or 'm' in params or 'p' in params: kmmod = cluster.KMeans(n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ if 't' in params: # TODO: estimate transitions from data (!) / consider n_tied=1 if self.n_tied == 0: transmat = np.ones([self.n_components, self.n_components]) np.fill_diagonal(transmat, 10.0) self.transmat_ = transmat # .90 for self-transition else: transmat = np.zeros((self.n_components, self.n_components)) transmat[range(self.n_components), range(self.n_components)] = 100.0 # diagonal transmat[range(self.n_components-1), range(1, self.n_components)] = 1.0 # diagonal + 1 transmat[[r * (self.n_chain) - 1 for r in range(1, self.n_unique+1) for c in range(self.n_unique-1)], [c * (self.n_chain) for r in range(self.n_unique) for c in range(self.n_unique) if c != r]] = 1.0 self.transmat_ = np.copy(transmat) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): for f in range(self.n_features): mu_init[u][f] = kmeans[u, f] self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = np.zeros((self.n_unique, self.n_features, self.n_features)) for u in range(self.n_unique): if self.n_features == 1: precision_init[u] = np.linalg.inv(np.cov(X[kmmod.labels_ == u], bias = 1)) else: precision_init[u] = np.linalg.inv(np.cov(np.transpose(X[kmmod.labels_ == u]))) self.precision_ = np.copy(precision_init)
def loss(weights): mu1 = parser.get(weights, 'mu1') mu2 = parser.get(weights, 'mu2') sig1 = parser.get(weights, 'sig1')*np.eye(mu1.size) sig2 = parser.get(weights, 'sig2')*np.eye(mu1.size) return 0.5*( \ np.log(np.linalg.det(sig2) / np.linalg.det(sig1)) \ - mu1.size \ + np.trace(np.dot(np.linalg.inv(sig2),sig1)) \ #+ np.dot(np.dot(np.transpose(mu2 - mu1), np.linalg.inv(sig2)), mu2 - mu1 ) + np.dot(np.dot(mu2 - mu1, np.linalg.inv(sig2)), np.transpose(mu2 - mu1 )) )
def pair_mean_to_natural(A, sigma): assert 2 <= A.ndim == sigma.ndim <= 3 ndim = A.ndim einstring = 'tji,tjk->tik' if ndim == 3 else 'ji,jk->ik' trans = (0, 2, 1) if ndim == 3 else (1, 0) temp = np.linalg.solve(sigma, A) Jxx = -1./2 * np.einsum(einstring, A, temp) Jxy = np.transpose(temp, trans) Jyy = -1./2 * np.linalg.inv(sigma) logZ = -1./2 * np.linalg.slogdet(sigma)[1] return Jxx, Jxy, Jyy, logZ
def G(self): full_W = np.array([node.w for node in self.nodes]) WB = full_W[:,1:].reshape((self.K,self.K, self.B)) # Weight matrix is summed over impulse response functions WT = WB.sum(axis=2) # Impulse response weights are normalized weights GT = WB / WT[:,:,None] # Then we transpose so that the impuolse matrix is (outgoing x incoming x basis) G = np.transpose(GT, [1,0,2]) # TODO: Decide if this is still necessary for k1 in xrange(self.K): for k2 in xrange(self.K): if G[k1,k2,:].sum() < 1e-2: G[k1,k2,:] = 1.0/self.B return G
def pylds_E_step_inhomog(lds, data): T = data.shape[0] mu_init, sigma_init, A, sigma_states, C, sigma_obs = lds normalizer, smoothed_mus, smoothed_sigmas, E_xtp1_xtT = \ _E_step(mu_init, sigma_init, A, sigma_states, C, sigma_obs, data) EyyT = np.einsum('ti,tj->tij', data, data) EyxT = np.einsum('ti,tj->tij', data, smoothed_mus) ExxT = smoothed_sigmas + np.einsum('ti,tj->tij', smoothed_mus, smoothed_mus) E_xt_xtT = ExxT[:-1] E_xtp1_xtp1T = ExxT[1:] E_xtp1_xtT = E_xtp1_xtT E_x1_x1T = smoothed_sigmas[0] + np.outer(smoothed_mus[0], smoothed_mus[0]) E_x1 = smoothed_mus[0] E_init_stats = E_x1_x1T, E_x1, 1. E_pairwise_stats = E_xt_xtT.sum(0), E_xtp1_xtT.sum(0).T, E_xtp1_xtp1T.sum(0), T-1 E_node_stats = ExxT, np.transpose(EyxT, (0, 2, 1)), EyyT, np.ones(T) return E_init_stats, E_pairwise_stats, E_node_stats
def expectation(params,y,X,eps,N,u): #for each sample of theta, calculate likelihood #likelihood has participants #for each participant, we have N particles #with L samples, n participants, N particles per participant and sample, we have #L*n*N particles #get the first column to be mu d = np.shape(X)[-1]+1 mu = params[0:d,0] toSigma = params[0:d,1:d+1] intSigma = toSigma-np.diag(np.diag(toSigma))+np.diag(np.exp(np.diag(toSigma))) Sigma = intSigma-np.tril(intSigma)+np.transpose(np.triu(intSigma)) print mu print Sigma n = X.shape[0] E = 0 #iterate over number of samples of theta for j in range(np.shape(eps)[0]): beta = mu+np.dot(Sigma,eps[j,:]) #this log likelihood will iterate over both the participants and the particles E+=log_likelihood(beta,y,X,u[j*(n*N):(j+1)*(n*N)]) return E/len(beta)
def trance_quad(W, A): return np.trace(np.dot(np.dot(np.transpose(W),A), W))
def eval_log_prec(Mu, prec, x): t = np.subtract(x, Mu) v = np.dot(np.transpose(t), prec) v = -0.5*np.dot(v, t) return v
def normal_pdf(theta,mu,Sigma): d = len(mu) #return np.exp(-(theta-mu)**2/(2*sigma**2))/np.sqrt(2*sigma**2*np.pi) return (2*np.pi)**(-d/2)*np.linalg.det(Sigma)**(-1/2)*np.exp(-np.dot(np.transpose(theta-mu),np.dot(np.linalg.inv(Sigma), theta-mu))/2)
def _vjp_sqrtm(ans, A, disp=True, blocksize=64): assert disp, "sqrtm vjp not implemented for disp=False" ans_transp = anp.transpose(ans) def vjp(g): return anp.real(solve_sylvester(ans_transp, ans_transp, g)) return vjp
def _init_params(self, data, lengths=None, params='stmpaw'): X = data['obs'] if self.n_lags == 0: super(ARTHMM, self)._init_params(data, lengths, params) else: if 's' in params: super(ARTHMM, self)._init_params(data, lengths, 's') if 't' in params: super(ARTHMM, self)._init_params(data, lengths, 't') if 'm' in params or 'a' in params or 'p' in params: kmmod = cluster.KMeans( n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ ar_mod = [] ar_alpha = [] ar_resid = [] if not self.shared_alpha: count = 0 for u in range(self.n_unique): for f in range(self.n_features): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ u,f]).fit(self.n_lags)) ar_alpha.append(ar_mod[count].params[1:]) ar_resid.append(ar_mod[count].resid) count += 1 else: # run one AR model on most part of time series # that has most points assigned after clustering mf = np.argmax(np.bincount(kmmod.labels_)) for f in range(self.n_features): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ mf,f]).fit(self.n_lags)) ar_alpha.append(ar_mod[f].params[1:]) ar_resid.append(ar_mod[f].resid) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): for f in range(self.n_features): ar_idx = u if self.shared_alpha: ar_idx = 0 mu_init[u,f] = kmeans[u, f] - np.dot( np.repeat(kmeans[u, f], self.n_lags), ar_alpha[ar_idx]) self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = \ np.zeros((self.n_unique, self.n_features, self.n_features)) for u in range(self.n_unique): if self.n_features == 1: precision_init[u] = 1.0/(np.var(X[kmmod.labels_ == u])) else: precision_init[u] = np.linalg.inv\ (np.cov(np.transpose(X[kmmod.labels_ == u]))) # Alternative: Initialization using ar_resid #for f in range(self.n_features): # if not self.shared_alpha: # precision_init[u,f,f] = 1./np.var(ar_resid[count]) # count += 1 # else: # precision_init[u,f,f] = 1./np.var(ar_resid[f])''' self.precision_ = np.copy(precision_init) if 'a' in params: if self.shared_alpha: alpha_init = np.zeros((1, self.n_lags)) alpha_init = ar_alpha[0].reshape((1, self.n_lags)) else: alpha_init = np.zeros((self.n_unique, self.n_lags)) for u in range(self.n_unique): ar_idx = 0 alpha_init[u] = ar_alpha[ar_idx] ar_idx += self.n_features self.alpha_ = np.copy(alpha_init)
def minConf_PQN(funObj, x, funProj, options=None): """ The problems are of the form min funObj(x) s.t. x in C The projected quasi-Newton sub-problems are solved using the spectral projected gradient algorithm Parameters ---------- funObj: function to minimize, return objective value as the first argument and gradient as the second argument funProj: function that returns projection of x onto C options: 1) verbose: level of verbosity (0: no output, 1: final, 2: iter (default), 3: debug) 2) optTol: tolerance used to check for optimality (default: 1e-5) 3) progTol: tolerance used to check for progress (default: 1e-9) 4) maxIter: maximum number of calls to funObj (default: 500) 5) maxProject: maximum number of calls to funProj (default: 100000) 6) numDiff: compute derivatives numerically (0: use user-supplied derivatives (default), 1: use finite differences, 2: use complex differentials) 7) suffDec: sufficient decrease parameter in Armijo condition (default: 1e-4) 8) corrections: number of lbfgs corrections to store (default: 10) 9) adjustStep: use quadratic initialization of line search (default: 0) 10) bbInit: initialize sub-problem with Barzilai-Borwein step (default: 0) 11) SPGoptTol: optimality tolerance for SPG direction finding (default: 1e-6) 12) SPGiters: maximum number of iterations for SPG direction finding (default: 10) Returns ------- x: optimal parameter values f: optimal objective value funEvals: number of function evaluations """ # number of variables/parameters nVars = len(x) # set default optimization settings options_default = {'verbose':2, 'numDiff':0, 'optTol':1e-5, 'progTol':1e-9, \ 'maxIter':500, 'maxProject':100000, 'suffDec':1e-4, \ 'corrections':10, 'adjustStep':0, 'bbInit':0, 'SPGoptTol':1e-6,\ 'SPGprogTol':1e-10, 'SPGiters':10, 'SPGtestOpt':0} options = setDefaultOptions(options, options_default) if options['verbose'] == 3: print 'Running PQN...' print 'Number of L-BFGS Corrections to store: ' + \ str(options['corrections']) print 'Spectral initialization of SPG: ' + str(options['bbInit']) print 'Maximum number of SPG iterations: ' + str(options['SPGiters']) print 'SPG optimality tolerance: ' + str(options['SPGoptTol']) print 'SPG progress tolerance: ' + str(options['SPGprogTol']) print 'PQN optimality tolerance: ' + str(options['optTol']) print 'PQN progress tolerance: ' + str(options['progTol']) print 'Quadratic initialization of line search: ' + \ str(options['adjustStep']) print 'Maximum number of function evaluations: ' + \ str(options['maxIter']) print 'Maximum number of projections: ' + str(options['maxProject']) if options['verbose'] >= 2: print '{:10s}'.format('Iteration') + \ '{:10s}'.format('FunEvals') + \ '{:10s}'.format('Projections') + \ '{:15s}'.format('StepLength') + \ '{:15s}'.format('FunctionVal') + \ '{:15s}'.format('OptCond') funEvalMultiplier = 1 # project initial parameter vector # translate this function (Done!) x = funProj(x) projects = 1 # evaluate initial parameters # translate this function (Done!) [f, g] = funObj(x) funEvals = 1 # check optimality of initial point projects = projects + 1 if np.max(np.abs(funProj(x-g)-x)) < options['optTol']: if options['verbose'] >= 1: print "First-Order Optimality Conditions Below optTol at Initial Point" return (x, f, funEvals) i = 1 while funEvals <= options['maxIter']: # compute step direction # this is for initialization if i == 1: p = funProj(x-g) projects = projects + 1 S = np.zeros((nVars, 0)) Y = np.zeros((nVars, 0)) Hdiag = 1 else: y = g - g_old s = x - x_old # translate this function (Done!) [S, Y, Hdiag] = lbfgsUpdate(y, s, options['corrections'], \ options['verbose']==3, S, Y, Hdiag) # make compact representation k = Y.shape[1] L = np.zeros((k,k)) for j in range(k): L[j+1:,j] = np.dot(np.transpose(S[:,j+1:]), Y[:,j]) N = np.hstack((S/Hdiag, Y.reshape(Y.shape[0], Y.size/Y.shape[0]))) M1 = np.hstack((np.dot(S.T,S)/Hdiag, L)) M2 = np.hstack((L.T, -np.diag(np.diag(np.dot(S.T,Y))))) M = np.vstack((M1, M2)) # translate this function (Done!) HvFunc = lambda v: v/Hdiag - np.dot(N,np.linalg.solve(M,np.dot(N.T,v))) if options['bbInit'] == True: # use Barzilai-Borwein step to initialize sub-problem alpha = np.dot(s,s)/np.dot(s,y) if alpha <= 1e-10 or alpha > 1e10: alpha = min(1., 1./np.sum(np.abs(g))) # solve sub-problem xSubInit = x - alpha*g feasibleInit = 0 else: xSubInit = x feasibleInit = 1 # solve Sub-problem # translate this function (Done!) [p, subProjects] = solveSubProblem(x, g, HvFunc, funProj, \ options['SPGoptTol'], options['SPGprogTol'], \ options['SPGiters'], options['SPGtestOpt'], feasibleInit,\ xSubInit) projects = projects + subProjects d = p - x g_old = g x_old = x # check the progress can be made along the direction gtd = np.dot(g,d) if gtd > -options['progTol']: if options['verbose'] >= 1: print "Directional Derivative below progTol" break # select initial guess to step length if i == 1 or options['adjustStep'] == 0: t = 1. else: t = min(1., 2.*(f-f_old)/gtd) # bound step length on first iteration if i == 1: t = min(1., 1./np.sum(np.abs(g))) # evluate the objective and gradient at the initial step if t == 1: x_new = p else: x_new = x + t*d [f_new, g_new] = funObj(x_new) funEvals = funEvals + 1 # backtracking line search f_old = f # translate isLegal (Done!) while f_new > f + options['suffDec']*np.dot(g,x_new-x) or \ not isLegal(f_new): temp = t # backtrack to next trial value if not isLegal(f_new) or not isLegal(g_new): if options['verbose'] == 3: print "Halving step size" t = t/2. else: if options['verbose'] == 3: print "Cubic backtracking" # translate polyinterp (Done!) t = polyinterp(np.array([[0.,f,gtd],\ [t,f_new,np.dot(g_new,d)]]))[0] # adjust if change is too small/large if t < temp*1e-3: if options['verbose'] == 3: print "Interpolated value too small, Adjusting" t = temp*1e-3 elif t > temp*0.6: if options['verbose'] == 3: print "Interpolated value too large, Adjusting" t = temp*0.6 # check whether step has become too small if np.sum(np.abs(t*d)) < options['progTol'] or t == 0: if options['verbose'] == 3: print "Line search failed" t = 0 f_new = f g_new = g break # evaluate new point f_prev = f_new t_prev = temp x_new = x + t*d [f_new, g_new] = funObj(x_new) funEvals = funEvals + 1 # take step x = x_new f = f_new g = g_new optCond = np.max(np.abs(funProj(x-g)-x)) projects = projects + 1 # output log if options['verbose'] >= 2: print '{:10d}'.format(i) + \ '{:10d}'.format(funEvals*funEvalMultiplier) + \ '{:10d}'.format(projects) + \ '{:15.5e}'.format(t) + \ '{:15.5e}'.format(f) + \ '{:15.5e}'.format(optCond) # check optimality if optCond < options['optTol']: print "First-order optimality conditions below optTol" break if np.max(np.abs(t*d)) < options['progTol']: if options['verbose'] >= 1: print "Step size below progTol" break if np.abs(f-f_old) < options['progTol']: if options['verbose'] >= 1: print "Function value changing by less than progTol" break if funEvals > options['maxIter']: if options['verbose'] >= 1: print "Function evaluation exceeds maxIter" break if projects > options['maxProject']: if options['verbose'] >= 1: print "Number of projections exceeds maxProject" break i = i + 1 return (x, f, funEvals)
#!/usr/bin/python import autograd.numpy as np from autograd import grad # Automatically find the gradient of a function # Define a function Tr(W.T*A*W), we know that gradient = (A+A')*W def trance_quad(W, A): return np.trace(np.dot(np.dot(np.transpose(W),A), W)) # Initial setup n = 5 A = np.random.random((n,n)) W = np.random.random((n,1)) grad_foo = grad(trance_quad) # Obtain its gradient function print 'Autogen Gradient : \n', grad_foo(W,A) print 'Theoretical Gradient : \n', np.dot((A+np.transpose(A)), W) import pdb; pdb.set_trace()
def KL_two_gaussians(params): mu = params[0:len(params)/2] Sigma = np.diag(np.exp(params[len(params)/2:])) muPrior = np.zeros(d) sigmaPrior = np.identity(d) return 1/2*(np.log(np.linalg.det(Sigma)/np.linalg.det(sigmaPrior))-d+np.trace(np.dot(np.linalg.inv(Sigma),sigmaPrior))+np.dot(np.transpose(mu-muPrior),np.dot(np.linalg.inv(Sigma),mu-muPrior)))