def balreal(sys): """Computes the balanced realization of sys and returns its eigenvalues. References: [1] http://www.mathworks.com/help/control/ref/balreal.html [2] Laub, A.J., M.T. Heath, C.C. Paige, and R.C. Ward, "Computation of System Balancing Transformations and Other Applications of Simultaneous Diagonalization Algorithms," *IEEE Trans. Automatic Control*, AC-32 (1987), pp. 115-122. """ sys = LinearSystem(sys) # cast first to memoize sys2ss if not sys.analog: raise NotImplementedError("balanced digital filters not supported") R = control_gram(sys) O = observe_gram(sys) LR = cholesky(R, lower=True) LO = cholesky(O, lower=True) U, S, V = svd(np.dot(LO.T, LR)) T = np.dot(LR, V.T) * S ** (-1. / 2) Tinv = (S ** (-1. / 2))[:, None] * np.dot(U.T, LO.T) return similarity_transform(sys, T, Tinv), S
def log_mvnpdf(X, means, covars, min_covar=1.e-7): """Log probability for full covariance matrices.""" n_samples, n_dim = X.shape nmix = len(means) log_prob = np.empty((n_samples, nmix)) for c, (mu, cv) in enumerate(zip(means, covars)): try: cv_chol = linalg.cholesky(cv, lower=True) except linalg.LinAlgError: # The model is most probably stuck in a component with too # few observations, we need to reinitialize this components try: cv_chol = linalg.cholesky(cv + min_covar * np.eye(n_dim), lower=True) except linalg.LinAlgError: raise ValueError("'covars' must be symmetric, " "positive-definite") cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol))) cv_sol = linalg.solve_triangular(cv_chol, (X - mu).T, lower=True).T log_prob[:, c] = - .5 * (np.sum(cv_sol ** 2, axis=1) + n_dim * np.log(2 * np.pi) + cv_log_det) return log_prob
def contour_2d(mu, cov=None, prec=None, n=100, radius=[1, np.sqrt(6)]): """ Assuming a bivariate normal distribution, draw contours, given 'radius' information. Note: sqrt(6) covers roughly 95% of probability mass (in 2d), given the fact that the mahalanobis distribution is chi-squared distributed. """ mu = mu.reshape(2,1) t = np.linspace(0, 2*np.pi, n) circle = np.array([np.cos(t), np.sin(t)]) if prec is None: L = la.cholesky(cov) ellipse = np.dot(L, circle) else: L = la.cholesky(prec) ellipse = la.solve_triangular(L, circle) # FIXME: not correct yet plots = {} for r in radius: plots[r] = (r*ellipse[0,:] + mu[0], r*ellipse[1,:] + mu[1]) return plots
def LMLdebug(self): """ LML function for debug """ assert self.N*self.P<5000, 'gp2kronSum:: N*P>=5000' y = SP.reshape(self.Y,(self.N*self.P), order='F') V = SP.kron(SP.eye(self.P),self.F) XX = SP.dot(self.Xr,self.Xr.T) K = SP.kron(self.Cr.K(),XX) K += SP.kron(self.Cn.K()+self.offset*SP.eye(self.P),SP.eye(self.N)) # inverse of K cholK = LA.cholesky(K) Ki = LA.cho_solve((cholK,False),SP.eye(self.N*self.P)) # Areml and inverse Areml = SP.dot(V.T,SP.dot(Ki,V)) cholAreml = LA.cholesky(Areml) Areml_i = LA.cho_solve((cholAreml,False),SP.eye(self.K*self.P)) # effect sizes and z b = SP.dot(Areml_i,SP.dot(V.T,SP.dot(Ki,y))) z = y-SP.dot(V,b) Kiz = SP.dot(Ki,z) # lml lml = y.shape[0]*SP.log(2*SP.pi) lml += 2*SP.log(SP.diag(cholK)).sum() lml += 2*SP.log(SP.diag(cholAreml)).sum() lml += SP.dot(z,Kiz) lml *= 0.5 return lml
def genBrownianMotion (n, tMax=10.0): tSeq = np.arange (tMax/float(500), tMax*(1+1/float(500)), tMax/float(500)); sig = np.zeros ((500,500), dtype='float64'); for i in range (500): sig[i,0:i] = tSeq[0:i]; sig[i,i:] = tSeq[i]; sigSqrt = LA.cholesky (sig, lower=True); for j in xrange(n/500): z = np.dot (sigSqrt, nr.randn (500)); if j == 0: zTot = np.insert (z, 0, 0); else: z = z + zTot[-1] zTot = np.append(zTot, z) m = n % 500 - 1 tSeq = np.arange (tMax/float(m), tMax*(1+1/float(m)), tMax/float(m)); sig = np.zeros ((m,m), dtype='float64'); for i in range (m): sig[i,0:i] = tSeq[0:i]; sig[i,i:] = tSeq[i]; print(sig) sigSqrt = LA.cholesky (sig, lower=True); z = np.dot (sigSqrt, nr.randn (m)); z = z + zTot[-1] zTot = np.append(zTot, z) return zTot
def choleskyjitter(A): try: return(la.cholesky(A, lower = True)) except Exception: pass n = len(A) maxscale = 10*np.sum(A.diagonal()) minscale = min(1/64, maxscale/1024) scale = minscale if VERBOSE: print('\t', 'Jittering...') while scale < maxscale: try: jitA = scale * np.diag(np.random.rand(n)) L = la.cholesky(A + jitA, lower = True) return(L) except Exception as e: scale += minscale raise ValueError("Jittering failed")
def test_check_finite(self): a = [[8, 2, 3], [2, 9, 3], [3, 3, 6]] c = cholesky(a, check_finite=False) assert_array_almost_equal(dot(transpose(c), c), a) c = transpose(c) a = dot(c, transpose(c)) assert_array_almost_equal(cholesky(a, lower=1, check_finite=False), c)
def _update(self): sn2 = self._likelihood.s2 su2 = sn2 / 1e6 # kernel wrt the inducing points. Kuu = self._kernel.get(self._U) p = self._U.shape[0] # cholesky for the information gain. note that we only need to compute # this once as it is independent from the data. self._L = sla.cholesky(Kuu + su2 * np.eye(p)) # evaluate the kernel and residuals at the new points Kux = self._kernel.get(self._U, self._X) kxx = self._kernel.dget(self._X) r = self._y - self._mean # the cholesky of Q. V = sla.solve_triangular(self._L, Kux, trans=True) # rescale everything by the diagonal matrix ell. ell = np.sqrt(kxx + sn2 - np.sum(V**2, axis=0)) Kux /= ell V /= ell r /= ell # NOTE: to update things incrementally all we need to do is store these # components. A just needs to be initialized at the identity and then # we just accumulate here. self._A = np.eye(p) + np.dot(V, V.T) self._a = np.dot(Kux, r) # update the posterior. self._R = np.dot(sla.cholesky(self._A), self._L) self._b = sla.solve_triangular(self._R, self._a, trans=True)
def geigen(Amat, Bmat, Cmat): """ generalized eigenvalue problem of the form max tr L'AM / sqrt(tr L'BL tr M'CM) w.r.t. L and M :param Amat numpy ndarray of shape (M,N) :param Bmat numpy ndarray of shape (M,N) :param Bmat numpy ndarray of shape (M,N) :rtype: numpy ndarray :return values: eigenvalues :return Lmat: left eigenvectors :return Mmat: right eigenvectors """ if Bmat.shape[0] != Bmat.shape[1]: print("BMAT is not square.\n") sys.exit(1) if Cmat.shape[0] != Cmat.shape[1]: print("CMAT is not square.\n") sys.exit(1) p = Bmat.shape[0] q = Cmat.shape[0] s = min(p, q) tmp = fabs(Bmat - Bmat.transpose()) tmp1 = fabs(Bmat) if tmp.max() / tmp1.max() > 1e-10: print("BMAT not symmetric..\n") sys.exit(1) tmp = fabs(Cmat - Cmat.transpose()) tmp1 = fabs(Cmat) if tmp.max() / tmp1.max() > 1e-10: print("CMAT not symmetric..\n") sys.exit(1) Bmat = (Bmat + Bmat.transpose()) / 2. Cmat = (Cmat + Cmat.transpose()) / 2. Bfac = cholesky(Bmat) Cfac = cholesky(Cmat) Bfacinv = inv(Bfac) Bfacinvt = Bfacinv.transpose() Cfacinv = inv(Cfac) Dmat = Bfacinvt.dot(Amat).dot(Cfacinv) if p >= q: u, d, v = svd(Dmat) values = d Lmat = Bfacinv.dot(u) Mmat = Cfacinv.dot(v.transpose()) else: u, d, v = svd(Dmat.transpose()) values = d Lmat = Bfacinv.dot(u) Mmat = Cfacinv.dot(v.transpose()) return values, Lmat, Mmat
def choleskyjitter(A, overwrite_a = False, check_finite = True): """Add jitter stochastically until a positive definite matrix occurs""" # Avoid preparing for jittering if we can already find the cholesky # with no problem try: return la.cholesky(A, lower = True, overwrite_a = overwrite_a, check_finite = check_finite) except Exception: pass # Prepare for jittering (all the magic numbers here are arbitary...) n = A.shape[0] maxscale = 1e10 minscale = 1e-4 scale = minscale # Keep jittering stochastically, increasing the jitter magnitude along # the way, until it's all good while scale < maxscale: try: jitA = scale * np.diag(np.random.rand(n)) L = la.cholesky(A + jitA, lower = True, overwrite_a = overwrite_a, check_finite = check_finite) return L except la.LinAlgError: scale *= 1.01 log.warning('Jitter added stochastically. Scale: %f!' % scale) raise la.LinAlgError("Max value of jitter reached")
def _initialize(self, X, resp): """Initialization of the Gaussian mixture parameters. Parameters ---------- X : array-like, shape (n_samples, n_features) resp : array-like, shape (n_samples, n_components) """ n_samples, _ = X.shape weights, means, covariances = _estimate_gaussian_parameters( X, resp, self.reg_covar, self.covariance_type) weights /= n_samples self.weights_ = (weights if self.weights_init is None else self.weights_init) self.means_ = means if self.means_init is None else self.means_init if self.precisions_init is None: self.covariances_ = covariances self.precisions_cholesky_ = _compute_precision_cholesky( covariances, self.covariance_type) elif self.covariance_type == 'full': self.precisions_cholesky_ = np.array( [linalg.cholesky(prec_init, lower=True) for prec_init in self.precisions_init]) elif self.covariance_type == 'tied': self.precisions_cholesky_ = linalg.cholesky(self.precisions_init, lower=True) else: self.precisions_cholesky_ = self.precisions_init
def log_multivariate_normal_density(X, means, covars, min_covar=1.e-7): """Log probability for full covariance matrices. """ if hasattr(linalg, 'solve_triangular'): # only in scipy since 0.9 solve_triangular = linalg.solve_triangular else: # slower, but works solve_triangular = linalg.solve n_samples, n_dim = X.shape nmix = len(means) log_prob = np.empty((n_samples, nmix)) for c, (mu, cv) in enumerate(itertools.izip(means, covars)): try: cv_chol = linalg.cholesky(cv, lower=True) except linalg.LinAlgError: # The model is most probabily stuck in a component with too # few observations, we need to reinitialize this components cv_chol = linalg.cholesky(cv + min_covar * np.eye(n_dim), lower=True) cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol))) cv_sol = solve_triangular(cv_chol, (X - mu).T, lower=True).T log_prob[:, c] = - .5 * (np.sum(cv_sol ** 2, axis=1) + \ n_dim * np.log(2 * np.pi) + cv_log_det) return log_prob
def isposdef(X): "Return if matrix is positive definite. Relies on cholesky decomp" try: la.cholesky(X) # will raise LinAlgError if not positive def return True except la.LinAlgError: return False
def test_simple(self): a = [[8,2,3],[2,9,3],[3,3,6]] c = cholesky(a) assert_array_almost_equal(dot(transpose(c),c),a) c = transpose(c) a = dot(c,transpose(c)) assert_array_almost_equal(cholesky(a,lower=1),c)
def fitPairwiseModel(Y,XX=None,S_XX=None,U_XX=None,verbose=False): N,P = Y.shape """ initilizes parameters """ RV = fitSingleTraitModel(Y,XX=XX,S_XX=S_XX,U_XX=U_XX,verbose=verbose) Cg = covariance.freeform(2) Cn = covariance.freeform(2) gp = gp2kronSum(mean(Y[:,0:2]),Cg,Cn,XX=XX,S_XX=S_XX,U_XX=U_XX) conv2 = SP.ones((P,P),dtype=bool) rho_g = SP.ones((P,P)) rho_n = SP.ones((P,P)) for p1 in range(P): for p2 in range(p1): if verbose: print '.. fitting correlation (%d,%d)'%(p1,p2) gp.setY(Y[:,[p1,p2]]) Cg_params0 = SP.array([SP.sqrt(RV['varST'][p1,0]),1e-6*SP.randn(),SP.sqrt(RV['varST'][p2,0])]) Cn_params0 = SP.array([SP.sqrt(RV['varST'][p1,1]),1e-6*SP.randn(),SP.sqrt(RV['varST'][p2,1])]) params0 = {'Cg':Cg_params0,'Cn':Cn_params0} conv2[p1,p2],info = OPT.opt_hyper(gp,params0,factr=1e3) rho_g[p1,p2] = Cg.K()[0,1]/SP.sqrt(Cg.K().diagonal().prod()) rho_n[p1,p2] = Cn.K()[0,1]/SP.sqrt(Cn.K().diagonal().prod()) conv2[p2,p1] = conv2[p1,p2]; rho_g[p2,p1] = rho_g[p1,p2]; rho_n[p2,p1] = rho_n[p1,p2] RV['Cg0'] = rho_g*SP.dot(SP.sqrt(RV['varST'][:,0:1]),SP.sqrt(RV['varST'][:,0:1].T)) RV['Cn0'] = rho_n*SP.dot(SP.sqrt(RV['varST'][:,1:2]),SP.sqrt(RV['varST'][:,1:2].T)) RV['conv2'] = conv2 #3. regularizes covariance matrices offset_g = abs(SP.minimum(LA.eigh(RV['Cg0'])[0].min(),0))+1e-4 offset_n = abs(SP.minimum(LA.eigh(RV['Cn0'])[0].min(),0))+1e-4 RV['Cg0_reg'] = RV['Cg0']+offset_g*SP.eye(P) RV['Cn0_reg'] = RV['Cn0']+offset_n*SP.eye(P) RV['params0_Cg']=LA.cholesky(RV['Cg0_reg'])[SP.tril_indices(P)] RV['params0_Cn']=LA.cholesky(RV['Cn0_reg'])[SP.tril_indices(P)] return RV
def simulate_lu_decom(sim_locations,sample_locations,vmodel): c11 = fill_cova(sample_locations,None,vmodel) c21 = fill_cova(sim_locations,sample_locations,vmodel) c22 = fill_cova(sim_locations,None,vmodel) u11 = cholesky(c11) l11 = u11.T u11_inv = inv(u11) l21 = c21 @ u11_inv u12 = l21.T l22 = cholesky(c22-l21@u12,lower=True) return u11_inv.T,l21,l22 l11,u11 = lu(c11,permute_l= True) l11_inv = inv(l11) a21t = l11_inv @ c21.T a21 = a21t.T b12 = a21t l22,u22 = lu(c22-l21@u12,permute_l= True) return a21,l11_inv,l22
def update(self, ncov): """updates the covariance matrix and recalculates internals :Parameters: ncov : ndarray symetric matrix, noise covariance """ # checks if ncov.ndim != 2 or ncov.shape[0] != ncov.shape[1]: raise ValueError('noise covariance is not a symmetric, ' 'pos. definite matrix') # inits self.input_dim = ncov.shape[0] self._ncov = ncov self._chol_ncov = None self._inv_chol_ncov = None # compute cholesky decomposition try: self._chol_ncov = sp_la.cholesky(self._ncov) except: self._ncov = coloured_loading(self._ncov, 50) self._chol_ncov = sp_la.cholesky(self._ncov) # invert self._inv_chol_ncov = sp_la.inv(self._chol_ncov) # set ready flag self._is_ready = True
def jitchol_old(A, maxtries=5): """ :param A: An almost pd square matrix :rval L: the Cholesky decomposition of A .. note: Adds jitter to K, to enforce positive-definiteness if stuff breaks, please check: np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T) """ try: return linalg.cholesky(A, lower=True) except linalg.LinAlgError: diagA = np.diag(A) if np.any(diagA < 0.): raise linalg.LinAlgError, "not pd: negative diagonal elements" jitter = diagA.mean() * 1e-6 for i in range(1, maxtries + 1): print '\rWarning: adding jitter of {:.10e} '.format(jitter), try: return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True) except: jitter *= 10 raise linalg.LinAlgError, "not positive definite, even with jitter."
def compute(self, x, yerr): """ Compute and factorize the covariance matrix. :param x: ``(nsamples, ndim)`` The independent coordinates of the data points. :param yerr: (optional) ``(nsamples,)`` or scalar The Gaussian uncertainties on the data points at coordinates ``x``. These values will be added in quadrature to the diagonal of the covariance matrix. """ # Compute the kernel matrix. K = self.kernel.value(x) K[np.diag_indices_from(K)] += yerr ** 2 # Factor the matrix and compute the log-determinant. # TODO CPD 25.01.15: added check on cholesky ala sklearn to account for # linalg error... # Also removed overwrite_a try: self._factor = (cholesky(K), False) except LinAlgError: self._factor = (cholesky(K + min_covar * np.eye(K.shape[0])), False) ##self._factor = (cholesky(K, overwrite_a=True, lower=False), False) self.log_determinant = 2 * np.sum(np.log(np.diag(self._factor[0]))) self.computed = True
def get_sigma_points_mean_cov(pts, weights): '''Find mean and covariance of a set of weighted sigma points pts Returns ------- mean, cov np arrays (n,1) and (n,n) respectively ''' n = pts.shape[0] n_pts = pts.shape[1] mean = np.sum(pts*weights, axis=1)[:,np.newaxis] cov = np.dot(weights*(pts-mean), (pts-mean).T) # Sometimes if kappa < 0, cov may become non positive semi-definite. If so, # approximate 'covariance' matrix according to UKF paper Julier 1997 try: # check positive semi-definiteness la.cholesky(cov) except la.LinAlgError: print 'Covariance matrix is not positive semi-definite, aproximating...' # take 'covariance' about propagated 0th sigma point instead of new mean X0 = pts[:,0,np.newaxis] # first sigma point cov = np.dot(weights*(pts-X0), (pts-X0).T) # Check positive semi-definiteness again (should always be) la.cholesky(cov)# will throw LinAlgError if not return mean, cov
def __init__(self, mean, sigma): self.mean = mean self.sigma = sigma self.sigmainv = sigmainv self.cholsigma = linalg.cholesky(sigma) # the following makes it lower triangular with increasing time self.cholsigmainv = linalg.cholesky(sigmainv)[::-1, ::-1]
def jitChol(A, maxTries=10, warning=True): """Do a Cholesky decomposition with jitter. Description: U, jitter = jitChol(A, maxTries, warning) attempts a Cholesky decomposition on the given matrix, if matrix isn't positive definite the function adds 'jitter' and tries again. Thereafter the amount of jitter is multiplied by 10 each time it is added again. This is continued for a maximum of 10 times. The amount of jitter added is returned. Returns: U - the Cholesky decomposition for the matrix. jitter - the amount of jitter that was added to the matrix. Arguments: A - the matrix for which the Cholesky decomposition is required. maxTries - the maximum number of times that jitter is added before giving up (default 10). warning - whether to give a warning for adding jitter (default is True) See also CHOL, PDINV, LOGDET Copyright (c) 2005, 2006 Neil D. Lawrence """ warning = True jitter = 0 i = 0 while(True): try: # Try --- need to check A is positive definite if jitter == 0: jitter = abs(SP.trace(A))/A.shape[0]*1e-6 LC = linalg.cholesky(A, lower=True) return LC.T, 0.0 else: if warning: # pdb.set_trace() # plt.figure() # plt.imshow(A, interpolation="nearest") # plt.colorbar() # plt.show() logging.error("Adding jitter of %f in jitChol()." % jitter) LC = linalg.cholesky(A+jitter*SP.eye(A.shape[0]), lower=True) return LC.T, jitter except linalg.LinAlgError: # Seems to have been non-positive definite. if i<maxTries: jitter = jitter*10 else: raise linalg.LinAlgError, "Matrix non positive definite, jitter of " + str(jitter) + " added but failed after " + str(i) + " trials." i += 1 return LC
def posterior_y(self, predict_times=None, predict_periods=None): K_train = self.s_y * self.K_y K_gold = K_train[self.goldidxs, :][:, self.goldidxs] if not np.any(predict_times) or not np.any(predict_periods): K_predict = K_train silveridxs = self.silveridxs testidxs = self.silveridxs else: predict_times = np.concatenate((self.times[self.silveridxs], predict_times), axis=0) distances = predict_times - predict_times.T # Ntest x N nonmatchingperiods = (predict_periods - predict_periods.T) != 0 distances[nonmatchingperiods] = np.inf K_predict = self.sqexpkernel(distances, self.l_y) + 1e-6 * np.eye(self.N) silveridxs = np.arange(1, np.sum(self.silveridxs)) testidxs = np.arange(np.sum(self.silveridxs), len(predict_times)) # update the prior mean v_obs_y = np.var(self.y) self.mu0_y = (self.m_mu0_y * v_obs_y + np.mean(self.y) * self.v_mu0_y) / (self.v_mu0_y + v_obs_y) print "mu0_y = %.3f" % self.mu0_y # learn from the training labels innovation = self.y[self.goldidxs, :] - self.mu0_y L_y = cholesky(K_gold, lower=True, check_finite=False) B = solve_triangular(L_y, innovation, lower=True, overwrite_b=True, check_finite=False) A = solve_triangular(L_y.T, B, overwrite_b=True, check_finite=False) V = solve_triangular(L_y, K_predict[:, self.goldidxs].T, lower=True, check_finite=False) mu = self.mu0_y + K_predict[testidxs][:, self.goldidxs].dot(A) cov = K_predict - V.T.dot(V) # now update the test indexes from the x observations for f in range(self.F): mu_fminus1 = mu cov_f = cov[silveridxs][:, silveridxs]# + 1e-6 * np.eye(len(mu)) # jitter innovation = self.x[self.silveridxs, f:f+1] - (mu_fminus1 * self.a[self.silveridxs, f:f+1] + self.c[self.silveridxs, f:f+1] + self.e[self.silveridxs, f:f+1]) # observation minus prior over forecasters' predictions print np.min(innovation) a_diag = np.diag(self.a[self.silveridxs, f]) var_a = np.diag(np.diag(self.cov_a[f, self.silveridxs][:, self.silveridxs])) var_a = np.diag(mu_fminus1.reshape(-1)).dot(var_a).dot(np.diag(mu_fminus1.reshape(-1)).T) var_e = np.diag(np.diag(self.cov_e[f][self.silveridxs][:, self.silveridxs])) var_c = np.diag(np.diag(self.cov_c[f, self.silveridxs][:, self.silveridxs])) S_y = cov_f + var_a + var_e + var_c L_y = cholesky(S_y, lower=True, check_finite=False) B = solve_triangular(L_y, innovation, lower=True, overwrite_b=True, check_finite=False) A = solve_triangular(L_y.T, B, overwrite_b=True, check_finite=False) V = solve_triangular(L_y, a_diag.dot(cov[silveridxs, :]), lower=True, overwrite_b=True, check_finite=False) mu = mu_fminus1 + cov[silveridxs][:, silveridxs].dot(a_diag).dot(A) cov = cov - V.T.dot(V) return mu, cov[testidxs][:, testidxs]
def test_simple_complex(self): m = array([[3+1j,3+4j,5],[0,2+2j,2+7j],[0,0,7+4j]]) a = dot(transpose(conjugate(m)),m) c = cholesky(a) a1 = dot(transpose(conjugate(c)),c) assert_array_almost_equal(a,a1) c = transpose(c) a = dot(c,transpose(conjugate(c))) assert_array_almost_equal(cholesky(a,lower=1),c)
def factor(X, rho, mu=0.0): n, d = X.shape if n >= d: L = la.cholesky((2. / n) * np.dot(X.T, X) + (2. * mu + rho) * np.eye(d), lower=True) else: L = la.cholesky(np.eye(n) + (2. / (rho * n)) * np.dot(X, X.T), lower=True) return L, L.T # L, U
def pseudo_determinant(S, thres=1e-3, min_covar=1.e-7): n_dim = S.shape[0] try: S_chol = linalg.cholesky(S, lower=True) except linalg.LinAlgError: # The model is most probably stuck in a component with too # few observations, we need to reinitialize this components S_chol = linalg.cholesky(S + min_covar * np.eye(n_dim), lower=True) S_chol_diag = np.diag(S_chol) return np.prod(S_chol_diag[S_chol_diag>thres]) ** 2
def mvnrewards(nstates, nactions, mu, covmat): """Create a random reward structure for an (nstates, nactions) MDP where the rewards for each pair of tasks are correlated according to the specified covariance matrix.""" # make sure covmat is positive definite; raise an exception if it # isn't. Note that the multivariate_normal call succeeds either way # but the results aren't physically meaningful if the matrix isn't # semi-positive definite, and we'd rather bail than generate data # that doesn't match what the user asked for. sla.cholesky(covmat) ntasks = covmat.shape[0] rewards = npr.multivariate_normal(mu, covmat, (nstates, nactions)) return rewards
def _log_multivariate_normal_density_full(X, means, covars, min_covar=1.e-7): n_samples, n_dim = X.shape nmix = len(means) log_prob = np.empty((n_samples, nmix)) for c, (mu, cv) in enumerate(zip(means, covars)): try: cv_chol = linalg.cholesky(cv, lower=True) except linalg.LinAlgError: cv_chol = linalg.cholesky(cv + min_covar * np.eye(n_dim), lower=True) cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol))) cv_sol = linalg.solve_triangular(cv_chol, (X - mu).T, lower=True).T log_prob[:, c] = - .5 * (np.sum(cv_sol ** 2, axis=1) + n_dim * np.log(2 * np.pi) + cv_log_det) return log_prob
def test_random_complex(self): n = 20 for k in range(2): m = random([n,n])+1j*random([n,n]) for i in range(n): m[i,i] = 20*(.1+abs(m[i,i])) a = dot(transpose(conjugate(m)),m) c = cholesky(a) a1 = dot(transpose(conjugate(c)),c) assert_array_almost_equal(a,a1) c = transpose(c) a = dot(c,transpose(conjugate(c))) assert_array_almost_equal(cholesky(a,lower=1),c)
def test_random(self): n = 20 for k in range(2): m = random([n,n]) for i in range(n): m[i,i] = 20*(.1+m[i,i]) a = dot(transpose(m),m) c = cholesky(a) a1 = dot(transpose(c),c) assert_array_almost_equal(a,a1) c = transpose(c) a = dot(c,transpose(c)) assert_array_almost_equal(cholesky(a,lower=1),c)
def grad_optimize_ei(self, cand, comp, pend, vals, compute_grad=True): if pend.shape[0] == 0: best = np.min(vals) cand = np.reshape(cand, (-1, comp.shape[1])) # The primary covariances for prediction. comp_cov = self.cov(comp) cand_cross = self.cov(comp, cand) # Compute the required Cholesky. obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0]) obsv_chol = spla.cholesky(obsv_cov, lower=True) cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__) cand_cross_grad = cov_grad_func(self.ls, comp, cand) # Predictive things. # Solve the linear systems. alpha = spla.cho_solve((obsv_chol, True), vals - self.mean) beta = spla.solve_triangular(obsv_chol, cand_cross, lower=True) # Predict the marginal means and variances at candidates. func_m = np.dot(cand_cross.T, alpha) + self.mean func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0) # Expected improvement func_s = np.sqrt(func_v) u = (best - func_m) / func_s ncdf = sps.norm.cdf(u) npdf = sps.norm.pdf(u) ei = func_s * (u * ncdf + npdf) if not compute_grad: return ei # Gradients of ei w.r.t. mean and variance g_ei_m = -ncdf g_ei_s2 = 0.5 * npdf / func_s # Apply covariance function grad_cross = np.squeeze(cand_cross_grad) grad_xp_m = np.dot(alpha.transpose(), grad_cross) grad_xp_v = np.dot( -2 * spla.cho_solve((obsv_chol, True), cand_cross).transpose(), grad_cross) grad_xp = 0.5 * self.amp2 * (grad_xp_m * g_ei_m + grad_xp_v * g_ei_s2) ei = -np.sum(ei) return ei, grad_xp.flatten() else: # If there are pending experiments, fantasize their outcomes. cand = np.reshape(cand, (-1, comp.shape[1])) # Create a composite vector of complete and pending. comp_pend = np.concatenate((comp, pend)) # Compute the covariance and Cholesky decomposition. comp_pend_cov = (self.cov(comp_pend) + self.noise * np.eye(comp_pend.shape[0])) comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True) # Compute submatrices. pend_cross = self.cov(comp, pend) pend_kappa = self.cov(pend) # Use the sub-Cholesky. obsv_chol = comp_pend_chol[:comp.shape[0], :comp.shape[0]] # Solve the linear systems. alpha = spla.cho_solve((obsv_chol, True), vals - self.mean) beta = spla.cho_solve((obsv_chol, True), pend_cross) # Finding predictive means and variances. pend_m = np.dot(pend_cross.T, alpha) + self.mean pend_K = pend_kappa - np.dot(pend_cross.T, beta) # Take the Cholesky of the predictive covariance. pend_chol = spla.cholesky(pend_K, lower=True) # Make predictions. npr.set_state(self.randomstate) pend_fant = np.dot( pend_chol, npr.randn(pend.shape[0], self.pending_samples)) + pend_m[:, None] # Include the fantasies. fant_vals = np.concatenate( (np.tile(vals[:, np.newaxis], (1, self.pending_samples)), pend_fant)) # Compute bests over the fantasies. bests = np.min(fant_vals, axis=0) # Now generalize from these fantasies. cand_cross = self.cov(comp_pend, cand) cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__) cand_cross_grad = cov_grad_func(self.ls, comp_pend, cand) # Solve the linear systems. alpha = spla.cho_solve((comp_pend_chol, True), fant_vals - self.mean) beta = spla.solve_triangular(comp_pend_chol, cand_cross, lower=True) # Predict the marginal means and variances at candidates. func_m = np.dot(cand_cross.T, alpha) + self.mean func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0) # Expected improvement func_s = np.sqrt(func_v[:, np.newaxis]) u = (bests[np.newaxis, :] - func_m) / func_s ncdf = sps.norm.cdf(u) npdf = sps.norm.pdf(u) ei = func_s * (u * ncdf + npdf) # Gradients of ei w.r.t. mean and variance g_ei_m = -ncdf g_ei_s2 = 0.5 * npdf / func_s # Apply covariance function grad_cross = np.squeeze(cand_cross_grad) grad_xp_m = np.dot(alpha.transpose(), grad_cross) grad_xp_v = np.dot( -2 * spla.cho_solve( (comp_pend_chol, True), cand_cross).transpose(), grad_cross) grad_xp = 0.5 * self.amp2 * ( grad_xp_m * np.tile(g_ei_m, (comp.shape[1], 1)).T + (grad_xp_v.T * g_ei_s2).T) ei = -np.mean(ei, axis=1) grad_xp = np.mean(grad_xp, axis=0) return ei, grad_xp.flatten()
def chol_metric(self, state): return sla.cholesky(self.metric(state), True)
def sim_frac_gaussian_noise(n_seconds, fs, chi=0, hurst=None): """Simulate a timeseries as fractional gaussian noise. Parameters ---------- n_seconds : float Simulation time, in seconds. fs : float Sampling rate of simulated signal, in Hz. chi: float, optional, default: 0 Desired power law exponent of the spectrum of the signal. Must be in the range (-1, 1). hurst : float, optional, default: None Desired Hurst parameter, which must be in the range (0, 1). If provided, this value overwrites the `chi` parameter. Returns ------- sig: 1d array Simulated fractional gaussian noise time series. Notes ----- The time series can be specified with either a desired power law exponent, or alternatively with a specified Hurst parameter. The Hurst parameter is not the Hurst exponent as defined in rescaled range analysis. The Hurst parameter is defined for self-similar processes such that Y(at) = a^H Y(t) for all a > 0, where this equality holds in distribution. The relationship between the power law exponent chi and the Hurst parameter for fractional gaussian noise is chi = 2 * hurst - 1. For more information, consult [1]_. References ---------- .. [1] Eke, A., Herman, P., Kocsis, L., & Kozak, L. R. (2002). Fractal characterization of complexity in temporal physiological signals. Physiological Measurement, 23(1), R1–R38. DOI: https://doi.org/10.1088/0967-3334/23/1/201 Examples -------- Simulate fractional gaussian noise with a power law decay of 0 (white noise): >>> sig = sim_frac_gaussian_noise(n_seconds=1, fs=500, chi=0) Simulate fractional gaussian noise with a Hurst parameter of 0.5 (also white noise): >>> sig = sim_frac_gaussian_noise(n_seconds=1, fs=500, hurst=0.5) """ if hurst is not None: check_param_range(hurst, 'hurst', (0, 1)) else: check_param_range(chi, 'chi', (-1, 1)) # Infer the hurst parameter from chi hurst = (-chi + 1.) / 2 # Compute the number of samples for the simulated time series n_samples = compute_nsamples(n_seconds, fs) # Define helper function for computing the auto-covariance def autocov(hurst): return lambda k: 0.5 * (np.abs(k - 1) ** (2 * hurst) - 2 * \ k ** (2 * hurst) + (k + 1) ** (2 * hurst)) # Build the autocovariance matrix gamma = np.arange(0, n_samples) gamma = np.apply_along_axis(autocov(hurst), 0, gamma) autocov_matrix = toeplitz(gamma) # Use the Cholesky factor to transform white noise to get the desired time series white_noise = np.random.randn(n_samples) cholesky_factor = cholesky(autocov_matrix, lower=True) sig = cholesky_factor @ white_noise return sig
def reduced_likelihood_function(self, theta=None): """ This function determines the BLUP parameters and evaluates the reduced likelihood function for the given autocorrelation parameters theta. Maximizing this function wrt the autocorrelation parameters theta is equivalent to maximizing the likelihood of the assumed joint Gaussian distribution of the observations y evaluated onto the design of experiments X. Parameters ---------- theta : array_like, optional An array containing the autocorrelation parameters at which the Gaussian Process model parameters should be determined. Default uses the built-in autocorrelation parameters (ie theta = self.theta). Returns ------- reduced_likelihood_function_value : double The value of the reduced likelihood function associated to the given autocorrelation parameters theta. par : dict A dictionary containing the requested Gaussian Process model parameters: sigma2 Gaussian Process variance. beta Generalized least-squares regression weights for Universal Kriging or given beta0 for Ordinary Kriging. gamma Gaussian Process weights. C Cholesky decomposition of the correlation matrix [R]. Ft Solution of the linear equation system : [R] x Ft = F G QR decomposition of the matrix Ft. """ if theta is None: # Use built-in autocorrelation parameters theta = self.theta # Initialize output reduced_likelihood_function_value = -np.inf par = {} # Retrieve data n_samples = self.X.shape[0] D = self.D ij = self.ij F = self.F if D is None: # Light storage mode (need to recompute D, ij and F) D, ij = l1_cross_distances(self.X) if np.min(np.sum(D, axis=1)) == 0. \ and self.corr != correlation.pure_nugget: raise Exception("Multiple X are not allowed") F = self.regr(self.X) # Set up R r = self.corr(theta, D) R = np.eye(n_samples) * (1. + self.nugget) R[ij[:, 0], ij[:, 1]] = r R[ij[:, 1], ij[:, 0]] = r # Cholesky decomposition of R try: C = linalg.cholesky(R, lower=True) except linalg.LinAlgError: return reduced_likelihood_function_value, par # Get generalized least squares solution Ft = solve_triangular(C, F, lower=True) try: Q, G = linalg.qr(Ft, econ=True) except: #/usr/lib/python2.6/dist-packages/scipy/linalg/decomp.py:1177: # DeprecationWarning: qr econ argument will be removed after scipy # 0.7. The economy transform will then be available through the # mode='economic' argument. Q, G = linalg.qr(Ft, mode='economic') pass sv = linalg.svd(G, compute_uv=False) rcondG = sv[-1] / sv[0] if rcondG < 1e-10: # Check F sv = linalg.svd(F, compute_uv=False) condF = sv[0] / sv[-1] if condF > 1e15: raise Exception("F is too ill conditioned. Poor combination " + "of regression model and observations.") else: # Ft is too ill conditioned, get out (try different theta) return reduced_likelihood_function_value, par Yt = solve_triangular(C, self.y, lower=True) if self.beta0 is None: # Universal Kriging beta = solve_triangular(G, np.dot(Q.T, Yt)) else: # Ordinary Kriging beta = np.array(self.beta0) rho = Yt - np.dot(Ft, beta) sigma2 = (rho**2.).sum(axis=0) / n_samples # The determinant of R is equal to the squared product of the diagonal # elements of its Cholesky decomposition C detR = (np.diag(C)**(2. / n_samples)).prod() # Compute/Organize output reduced_likelihood_function_value = -sigma2.sum() * detR par['sigma2'] = sigma2 * self.y_std**2. par['beta'] = beta par['gamma'] = solve_triangular(C.T, rho) par['C'] = C par['Ft'] = Ft par['G'] = G return reduced_likelihood_function_value, par
def fit(self, X, y, ncores=1): """Fit a Gaussian process regression model on training forces Args: X (list): training configurations y (np.ndarray): training forces ncores (int): number of CPU workers to use, default is 1 """ self.kernel_ = self.kernel self.X_train_ = X self.y_train_ = np.reshape(y, (y.shape[0] * 3, 1)) # if self.optimizer is not None: # # Choose hyperparameters based on maximizing the log-marginal # # likelihood (potentially starting from several initial values) # def obj_func(theta, eval_gradient=True): # if eval_gradient: # lml, grad = self.log_marginal_likelihood( # theta, eval_gradient=True) # return -lml, -grad # else: # return -self.log_marginal_likelihood(theta) # # First optimize starting from theta specified in kernel_ # optima = [(self._constrained_optimization(obj_func, # self.kernel_.theta, # self.kernel_.bounds))] # # Additional runs are performed from log-uniform chosen initial # # theta # if self.n_restarts_optimizer > 0: # if not np.isfinite(self.kernel_.bounds).all(): # raise ValueError( # "Multiple optimizer restarts (n_restarts_optimizer>0) " # "requires that all bounds are finite.") # bounds = self.kernel_.bounds # for iteration in range(self.n_restarts_optimizer): # theta_initial = \ # self._rng.uniform(bounds[:, 0], bounds[:, 1]) # optima.append( # self._constrained_optimization(obj_func, theta_initial, # bounds)) # # Select result from run with minimal (negative) log-marginal # # likelihood # lml_values = list(map(itemgetter(1), optima)) # self.kernel_.theta = optima[np.argmin(lml_values)][0] # self.log_marginal_likelihood_value_ = -np.min(lml_values) # else: # self.log_marginal_likelihood_value_ = \ # self.log_marginal_likelihood(self.kernel_.theta) # Precompute quantities required for predictions which are independent # of actual query points K = self.kernel_.calc_gram(self.X_train_, ncores) K[np.diag_indices_from(K)] += self.noise try: # Use Cholesky decomposition to build the lower triangular matrix self.L_ = cholesky(K, lower=True) except np.linalg.LinAlgError as exc: exc.args = ("The kernel, %s, is not returning a " "positive definite matrix. Try gradually " "increasing the 'noise' parameter of your " "GaussianProcessRegressor estimator." % self.kernel_, ) + exc.args raise # Calculate the alpha weights using the Cholesky method self.alpha_ = cho_solve((self.L_, True), self.y_train_) self.K = K self.energy_alpha_ = None self.energy_K = None self.X_glob_train_ = None self.fitted[0] = 'force' self.n_train = len(self.y_train_) // 3 return self
def balreal_iter_old(A, B, C, lowrank=True, tolSmith=1e-10, tolSVD=1e-6, kmax=None, tolAbs=False): """ Find balanced realisation of DLTI system. Notes: Lyapunov equations are solved using iterative squared Smith algorithm, in its low or full rank version. These implementations are as per the low_rank_smith and smith_iter functions respectively but, for computational efficiency,, the iterations are rewritten here so as to solve for the observability and controllability Gramians contemporary. """ ### Solve Lyapunov equations # Notation reminder: # scipy: A X A.T - X = -Q # contr: A W A.T - W = - B B.T # obser: A.T W A - W = - C.T C # low-rank smith: A.T X A - X = -Q Q.T if lowrank: # low-rank square-Smith iteration (with SVD) # matrices size N = A.shape[0] rB = B.shape[1] rC = C.shape[0] # initialise smith iteration DeltaNorm = 1e6 print('Iter\tMaxZhere') kk = 0 Apow = A Qck = B Qok = C.T while DeltaNorm > tolSmith: ### compute products Ak^2 * Zk ### (use block Arnoldi) Qcright = np.dot(Apow, Qck) Qoright = np.dot(Apow.T, Qok) Apow = np.dot(Apow, Apow) ### enlarge Z matrices Qck = np.concatenate((Qck, Qcright), axis=1) Qok = np.concatenate((Qok, Qoright), axis=1) ### check convergence without reconstructing the added term MaxZhere = max(np.max(np.abs(Qoright)), np.max(np.abs(Qcright))) print('%.4d\t%.3e' % (kk, MaxZhere)) DeltaNorm = MaxZhere # fixed columns chopping if kmax is None: # cheap SVD truncation if Qck.shape[1] > .4 * N or Qok.shape[1] > .4 * N: Uc, svc = scalg.svd(Qck, full_matrices=False)[:2] Uo, svo = scalg.svd(Qok, full_matrices=False)[:2] if tolAbs: rcmax = np.sum(svc > tolSVD) romax = np.sum(svo > tolSVD) else: rcmax = np.sum(svc > tolSVD * svc[0]) romax = np.sum(svo > tolSVD * svo[0]) pmax = max(rcmax, romax) Qck = Uc[:, :pmax] * svc[:pmax] Qok = Uo[:, :pmax] * svo[:pmax] # Qck_old=np.dot(Uc[:,:pmax],np.diag(svc[:pmax])) # Qok_old=np.dot(Uo[:,:pmax],np.diag(svo[:pmax])) # Qck=np.dot(Uc[:,:rcmax],np.diag(svc[:rcmax])) # Qok=np.dot(Uo[:,:romax],np.diag(svo[:romax])) else: if Qck.shape[1] > kmax: Uc, svc = scalg.svd(Qck, full_matrices=False)[:2] Qck = Uc[:, :kmax] * svc[:kmax] if Qok.shape[1] > kmax: Uo, svo = scalg.svd(Qok, full_matrices=False)[:2] Qok = Uo[:, :kmax] * svo[:kmax] ### update kk = kk + 1 del Apow Qc, Qo = Qck, Qok else: # full-rank squared smith iteration (with Cholevsky) # first iteration Wc = np.dot(B, B.T) Wo = np.dot(C.T, C) Apow = A AXAobs = np.dot(np.dot(A.T, Wo), A) AXActrl = np.dot(np.dot(A, Wc), A.T) DeltaNorm = max(np.max(np.abs(AXAobs)), np.max(np.abs(AXActrl))) kk = 1 print('Iter\tRes') while DeltaNorm > tolSmith: kk = kk + 1 # update Wo = Wo + AXAobs Wc = Wc + AXActrl # incremental Apow = np.dot(Apow, Apow) AXAobs = np.dot(np.dot(Apow.T, Wo), Apow) AXActrl = np.dot(np.dot(Apow, Wc), Apow.T) DeltaNorm = max(np.max(np.abs(AXAobs)), np.max(np.abs(AXActrl))) print('%.4d\t%.3e' % (kk, DeltaNorm)) # final update (useless in very low tolerance) Wo = Wo + AXAobs Wc = Wc + AXActrl # Choleski factorisation: W=Q Q.T. If unsuccessful, directly solve # eigenvalue problem Qc = scalg.cholesky(Wc).T Qo = scalg.cholesky(Wo).T # # eigenvalues are normalised by one, hence Tinv and T matrices # # here are not scaled # ssq,Tinv,T=scalg.eig(np.dot(Wc,Wo),left=True,right=True) # Tinv=Tinv.T # #Tinv02=Tinv02.T # S=np.diag(np.sqrt(ssq)) # return S,T,Tinv # find min size (only if iter used) cc, co = Qc.shape[1], Qo.shape[1] cmin = min(cc, co) print('cc=%.2d, co=%.2d' % (cc, co)) # build M matrix and SVD M = np.dot(Qo.T, Qc) # ### not optimised # U,s,Vh=scalg.svd(M,full_matrices=True) # U,Vh,s=U[:,:cmin],Vh[:cmin,:],s[:cmin] # S=np.diag(s) # Sinv=np.diag(1./s) # V=Vh.T # # Build transformation matrices # T=np.dot(Qc,np.dot(V,np.sqrt(Sinv))) # Tinv=np.dot(np.sqrt(Sinv),np.dot(U.T,Qo.T)) ### optimised U, s, Vh = scalg.svd( M, full_matrices=True) # as M is square, full_matrices has no effect sinv = s**(-0.5) T = np.dot(Qc, Vh.T * sinv) Tinv = np.dot((U * sinv).T, Qo.T) return s, T, Tinv
bk[0::DOF] = check bk[1::DOF] = check bk[2::DOF] = check bu = ~bk # defining unknown DOFs # sub-matrices corresponding to unknown DOFs Kuu = K[bu, :][:, bu] Muu = M[bu, :][:, bu] # solving generalized eigenvalue problem # NOTE: extracting ALL eigenvectors eigvals_g, U = eigh(a=Kuu, b=Muu) wn_g = eigvals_g**0.5 # solving symmetric eigenvalue problem L = cholesky(Muu, lower=True) Linv = np.linalg.inv(L) Kuutilde = (Linv @ Kuu) @ Linv.T #NOTE checking if Kuutilde is symmetric assert np.allclose(Kuutilde, Kuutilde.T) eigvals_s, V = eigh(Kuutilde) wn_s = eigvals_s**0.5 print('eigenvalues (wn_generalized**2)', wn_g[:3]**2) print('eigenvalues (wn_symmetric**2) ', wn_s[:3]**2) print() print('checks for U') for I, J in [[0, 0], [0, 1], [0, 2], [1, 1], [1, 2], [2, 2]]: print('I =', I, 'J =', J,
def log_marginal_likelihood(self, theta=None, eval_gradient=False): """Returns log-marginal likelihood of theta for training data. Parameters ---------- theta : array-like, shape = (n_kernel_params,) or None Kernel hyperparameters for which the log-marginal likelihood is evaluated. If None, the precomputed log_marginal_likelihood of ``self.kernel_.theta`` is returned. eval_gradient : bool, default: False If True, the gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta is returned additionally. If True, theta must not be None. Returns ------- log_likelihood : float Log-marginal likelihood of theta for training data. log_likelihood_gradient : array, shape = (n_kernel_params,), optional Gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta. Only returned when eval_gradient is True. """ if theta is None: if eval_gradient: raise ValueError( "Gradient can only be evaluated for theta!=None") return self.log_marginal_likelihood_value_ kernel = self.kernel_.clone_with_theta(theta) if eval_gradient: K, K_gradient = kernel(self.X_train_, eval_gradient=True) else: K = kernel(self.X_train_) K[np.diag_indices_from(K)] += self.alpha try: L = cholesky(K, lower=True) # Line 2 except np.linalg.LinAlgError: return (-np.inf, np.zeros_like(theta)) \ if eval_gradient else -np.inf # Support multi-dimensional output of self.y_train_ y_train = self.y_train_ if y_train.ndim == 1: y_train = y_train[:, np.newaxis] alpha = cho_solve((L, True), y_train) # Line 3 # Compute log-likelihood (compare line 7) log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha) log_likelihood_dims -= np.log(np.diag(L)).sum() log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi) log_likelihood = log_likelihood_dims.sum(-1) # sum over dimensions if eval_gradient: # compare Equation 5.9 from GPML tmp = np.einsum("ik,jk->ijk", alpha, alpha) # k: output-dimension tmp -= cho_solve((L, True), np.eye(K.shape[0]))[:, :, np.newaxis] # Compute "0.5 * trace(tmp.dot(K_gradient))" without # constructing the full matrix tmp.dot(K_gradient) since only # its diagonal is required log_likelihood_gradient_dims = \ 0.5 * np.einsum("ijl,ijk->kl", tmp, K_gradient) log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1) if eval_gradient: return log_likelihood, log_likelihood_gradient else: return log_likelihood
def fit(self, X, y): """Fit Gaussian process regression model Parameters ---------- X : array-like, shape = (n_samples, n_features) Training data y : array-like, shape = (n_samples, [n_output_dims]) Target values Returns ------- self : returns an instance of self. """ if self.kernel is None: # Use an RBF kernel as default self.kernel_ = C(1.0, constant_value_bounds="fixed") \ * RBF(1.0, length_scale_bounds="fixed") else: self.kernel_ = clone(self.kernel) self.rng = check_random_state(self.random_state) X, y = check_X_y(X, y, multi_output=True, y_numeric=True) # Normalize target value if self.normalize_y: self.y_train_mean = np.mean(y, axis=0) # demean y y = y - self.y_train_mean else: self.y_train_mean = np.zeros(1) if np.iterable(self.alpha) \ and self.alpha.shape[0] != y.shape[0]: if self.alpha.shape[0] == 1: self.alpha = self.alpha[0] else: raise ValueError( "alpha must be a scalar or an array" " with same number of entries as y.(%d != %d)" % (self.alpha.shape[0], y.shape[0])) self.X_train_ = np.copy(X) if self.copy_X_train else X self.y_train_ = np.copy(y) if self.copy_X_train else y if self.optimizer is not None and self.kernel_.n_dims > 0: # Choose hyperparameters based on maximizing the log-marginal # likelihood (potentially starting from several initial values) def obj_func(theta, eval_gradient=True): if eval_gradient: lml, grad = self.log_marginal_likelihood( theta, eval_gradient=True) return -lml, -grad else: return -self.log_marginal_likelihood(theta) # First optimize starting from theta specified in kernel optima = [(self._constrained_optimization(obj_func, self.kernel_.theta, self.kernel_.bounds))] # Additional runs are performed from log-uniform chosen initial # theta if self.n_restarts_optimizer > 0: if not np.isfinite(self.kernel_.bounds).all(): raise ValueError( "Multiple optimizer restarts (n_restarts_optimizer>0) " "requires that all bounds are finite.") bounds = self.kernel_.bounds for iteration in range(self.n_restarts_optimizer): theta_initial = \ self.rng.uniform(bounds[:, 0], bounds[:, 1]) optima.append( self._constrained_optimization(obj_func, theta_initial, bounds)) # Select result from run with minimal (negative) log-marginal # likelihood lml_values = list(map(itemgetter(1), optima)) self.kernel_.theta = optima[np.argmin(lml_values)][0] self.log_marginal_likelihood_value_ = -np.min(lml_values) else: self.log_marginal_likelihood_value_ = \ self.log_marginal_likelihood(self.kernel_.theta) # Precompute quantities required for predictions which are independent # of actual query points K = self.kernel_(self.X_train_) K[np.diag_indices_from(K)] += self.alpha self.L_ = cholesky(K, lower=True) # Line 2 self.alpha_ = cho_solve((self.L_, True), self.y_train_) # Line 3 return self
def loglkl(self, cosmo, data): # class' Omega_m includes Omega_nu etc! # but Omega0_m doesn't! # ATTENTION: definition of Omega_m in CLASS has changed again: Omega_m = self.ba.Omega0_cdm+self.ba.Omega0_b # But I think Omega_m should also contain densities of other species!!! #Omega_m = cosmo.Omega_m() # this is now a copy of what is returned as Omega_m to MontePython: # that didn't work, because ".ba" is not available... #Omega_m = cosmo.ba.Omega0_b + cosmo.ba.Omega0_cdm + cosmo.ba.Omega0_ncdm_tot + cosmo.ba.Omega0_dcdm # Next try: # Omega_m() = self.ba.Omega0_cdm+self.ba.Omega0_b # Omega_nu = self.ba.Omega0_ncdm_tot # only contributions from decaying DM missing... # be careful though, if at some point Omega_m is defined again to contain every species' contribution # it does contain all species again in CLASS 2.5.0! #+ cosmo.Omega_nu # TODO: Always check definition of cosmo.Omega_m() in classy.pyx!!! self.Omega_m = cosmo.Omega_m() self.small_h = cosmo.h() # m-correction: # Errors on m-corrections for different z-bins are correlated, thus one free nuisance "m_corr" is enough, # We fix the amplitude to the 2\sigma range around the fiducial m-correction value from the lowest redshift-bin # for that and add the delta_m to all fiducial m-corrections, hence: param_name = 'm_corr' if param_name in data.mcmc_parameters: m_corr = data.mcmc_parameters[param_name][ 'current'] * data.mcmc_parameters[param_name]['scale'] #ATTENTION: sign matters and this order is the correct one for correlation if delta_m_corr is added! delta_m_corr = m_corr - self.m_corr_fiducial_per_zbin[0] # this is wrong! #m_corr_per_zbin = [m_corr_z1] #m_corr_per_zbin = [self.m_corr_fiducial_per_zbin[0] + delta_m_corr] m_corr_per_zbin = np.zeros(self.nzbins) for zbin in xrange(0, self.nzbins): m_corr_per_zbin[ zbin] = self.m_corr_fiducial_per_zbin[zbin] + delta_m_corr else: # if "m_corr" is not specified in input parameter script we just apply the fiducial m-correction values # if these could not be loaded, this vector contains only zeros! m_corr_per_zbin = self.m_corr_fiducial_per_zbin # draw m-correction now instead from a multivariate Gaussian taking the fully correlated errors into account: # this does not yield converging chains in reasonable runtimes (e.g. 3 z-bins > 1000 CPUh...) ''' if self.marginalize_over_multiplicative_bias: if self.nzbins > 1: m_corr_per_zbin = np.random.multivariate_normal(self.m_corr_fiducial_per_zbin, self.cov_m_corr) #print 'm-correction' #print self.m_corr_fiducial_per_zbin, self.cov_m_corr #print m_corr_per_zbin else: m_corr_per_zbin = np.random.normal(self.m_corr_fiducial_per_zbin, self.err_multiplicative_bias) else: m_corr_per_zbin = self.m_corr_fiducial_per_zbin ''' # needed for IA modelling: if ('A_IA' in data.mcmc_parameters) and ('exp_IA' in data.mcmc_parameters): amp_IA = data.mcmc_parameters['A_IA'][ 'current'] * data.mcmc_parameters['A_IA']['scale'] exp_IA = data.mcmc_parameters['exp_IA'][ 'current'] * data.mcmc_parameters['exp_IA']['scale'] intrinsic_alignment = True elif ('A_IA' in data.mcmc_parameters) and ('exp_IA' not in data.mcmc_parameters): amp_IA = data.mcmc_parameters['A_IA'][ 'current'] * data.mcmc_parameters['A_IA']['scale'] # redshift-scaling is turned off: exp_IA = 0. intrinsic_alignment = True else: intrinsic_alignment = False if intrinsic_alignment: self.rho_crit = self.get_critical_density() # derive the linear growth factor D(z) linear_growth_rate = np.zeros_like(self.redshifts) #print self.redshifts for index_z, z in enumerate(self.redshifts): try: # for CLASS ver >= 2.6: linear_growth_rate[ index_z] = cosmo.scale_independent_growth_factor(z) except: # my own function from private CLASS modification: linear_growth_rate[index_z] = cosmo.growth_factor_at_z(z) # normalize to unity at z=0: try: # for CLASS ver >= 2.6: linear_growth_rate /= cosmo.scale_independent_growth_factor(0.) except: # my own function from private CLASS modification: linear_growth_rate /= cosmo.growth_factor_at_z(0.) #residual noise correction amplitude: #param_name = 'A_noise' # zeros == False! A_noise = np.zeros(self.nzbins) add_noise_power = np.zeros(self.nzbins, dtype=bool) param_name = 'A_noise_corr' if param_name in data.mcmc_parameters: # assume correlated apmlitudes for the noise-power (i.e. same amplitude for all autocorrelations): A_noise[:] = data.mcmc_parameters[param_name][ 'current'] * data.mcmc_parameters[param_name]['scale'] add_noise_power[:] = True else: # assume uncorrelated amplitudes for the noise-power: for zbin in xrange(self.nzbins): param_name = 'A_noise_z{:}'.format(zbin + 1) if param_name in data.mcmc_parameters: A_noise[zbin] = data.mcmc_parameters[param_name][ 'current'] * data.mcmc_parameters[param_name]['scale'] add_noise_power[zbin] = True # this is not correct, if this is considered to be a calibration! ''' # this is all for B-mode power-law model: param_name1 = 'A_B_modes' param_name2 = 'exp_B_modes' use_B_mode_model = False if param_name1 in data.mcmc_parameters and param_name2 in data.mcmc_parameters: amp_BB = data.mcmc_parameters[param_name1]['current'] * data.mcmc_parameters[param_name1]['scale'] exp_BB = data.mcmc_parameters[param_name2]['current'] * data.mcmc_parameters[param_name2]['scale'] use_B_mode_model = True ''' # this was the fiducial approach for the first submission # the one above might be faster (and more consistent) if self.correct_resetting_bias: #A_B_modes = np.random.normal(self.best_fit_A_B_modes, self.best_fit_err_A_B_modes) #exp_B_modes = np.random.normal(self.best_fit_exp_B_modes, self.best_fit_err_exp_B_modes) amp_BB, exp_BB = np.random.multivariate_normal( self.params_resetting_bias, self.cov_resetting_bias) #print 'resetting bias' #print self.params_resetting_bias, self.cov_resetting_bias #print amp_BB, exp_BB # get distances from cosmo-module: r, dzdr = cosmo.z_of_r(self.redshifts) # 1) determine l-range for taking the sum, #l = l_high-l_min at least!!!: # this is the correct calculation! # for real data, I should start sum from physical scales, i.e., currently l>= 80! # TODO: Set this automatically!!! --> not automatically yet, but controllable via "myCFHTLenS_tomography.data"!!! # these are integer l-values over which we will take the sum used in the convolution with the band window matrix ells_min = self.ells_intp[0] ''' if self.key == 'data_XinPi': ells_sum = self.ell_bin_centers # TODO: This might cause trouble!!! ells_max = 5150. else: ells_max = self.ells_intp[-1] nells = int(ells_max - ells_min + 1) ells_sum = np.linspace(ells_min, ells_max, nells) ''' ells_max = self.ells_intp[-1] nells = int(ells_max - ells_min + 1) ells_sum = np.linspace(ells_min, ells_max, nells) # these are the l-nodes for the derivation of the theoretical Cl: ells = np.logspace(np.log10(ells_min), np.log10(ells_max), self.nellsmax) # After long and extensive testing: # Don't put calls to Class (i.e. cosmo...) into a loop... # before "pk" and the constants were just called at demand below in the code (due to convenience an copy & paste) # which seemed to have been the source for the memory leak... # Get power spectrum P(k=l/r,z(r)) from cosmological module # this doesn't really have to go into the loop over fields! pk = np.zeros((self.nellsmax, self.nzmax), 'float64') k_max_in_inv_Mpc = self.k_max_h_by_Mpc * self.small_h for index_ells in xrange(self.nellsmax): for index_z in xrange(1, self.nzmax): # standard Limber approximation: #k = ells[index_ells] / r[index_z] # extended Limber approximation (cf. LoVerde & Afshordi 2008): k_in_inv_Mpc = (ells[index_ells] + 0.5) / r[index_z] if k_in_inv_Mpc > k_max_in_inv_Mpc: pk_dm = 0. else: pk_dm = cosmo.pk(k_in_inv_Mpc, self.redshifts[index_z]) #pk[index_ells,index_z] = cosmo.pk(ells[index_ells]/r[index_z], self.redshifts[index_z]) if self.baryon_feedback: if 'A_bary' in data.mcmc_parameters: A_bary = data.mcmc_parameters['A_bary'][ 'current'] * data.mcmc_parameters['A_bary']['scale'] #print 'A_bary={:.4f}'.format(A_bary) pk[index_ells, index_z] = pk_dm * self.baryon_feedback_bias_sqr( k_in_inv_Mpc / self.small_h, self.redshifts[index_z], A_bary=A_bary) else: pk[index_ells, index_z] = pk_dm * self.baryon_feedback_bias_sqr( k_in_inv_Mpc / self.small_h, self.redshifts[index_z]) else: pk[index_ells, index_z] = pk_dm # for KiDS-450 constant biases in photo-z are not sufficient: if self.bootstrap_photoz_errors: # draw a random bootstrap n(z); borders are inclusive! random_index_bootstrap = np.random.randint( int(self.index_bootstrap_low), int(self.index_bootstrap_high) + 1) #print 'Bootstrap index:', random_index_bootstrap pz = np.zeros((self.nzmax, self.nzbins), 'float64') pz_norm = np.zeros(self.nzbins, 'float64') for zbin in xrange(self.nzbins): redshift_bin = self.redshift_bins[zbin] #ATTENTION: hard-coded subfolder! #index can be recycled since bootstraps for tomographic bins are independent! fname = os.path.join( self.data_directory, '{:}/bootstraps/{:}/n_z_avg_bootstrap{:}.hist'.format( self.photoz_method, redshift_bin, random_index_bootstrap)) z_hist, n_z_hist = np.loadtxt(fname, unpack=True) param_name = 'D_z{:}'.format(zbin + 1) if param_name in data.mcmc_parameters: z_mod = self.redshifts + data.mcmc_parameters[param_name][ 'current'] * data.mcmc_parameters[param_name]['scale'] else: z_mod = self.redshifts shift_to_midpoint = np.diff(z_hist)[0] / 2. spline_pz = itp.splrep(z_hist + shift_to_midpoint, n_z_hist) mask_min = z_mod >= z_hist.min() + shift_to_midpoint mask_max = z_mod <= z_hist.max() + shift_to_midpoint mask = mask_min & mask_max # points outside the z-range of the histograms are set to 0! pz[mask, zbin] = itp.splev(z_mod[mask], spline_pz) dz = self.redshifts[1:] - self.redshifts[:-1] pz_norm[zbin] = np.sum(0.5 * (pz[1:, zbin] + pz[:-1, zbin]) * dz) pr = pz * (dzdr[:, np.newaxis] / pz_norm) elif (not self.bootstrap_photoz_errors) and ( self.shift_n_z_by_D_z.any()): pz = np.zeros((self.nzmax, self.nzbins), 'float64') pz_norm = np.zeros(self.nzbins, 'float64') for zbin in xrange(self.nzbins): param_name = 'D_z{:}'.format(zbin + 1) if param_name in data.mcmc_parameters: z_mod = self.redshifts + data.mcmc_parameters[param_name][ 'current'] * data.mcmc_parameters[param_name]['scale'] else: z_mod = self.redshifts # Load n(z) again: redshift_bin = self.redshift_bins[zbin] fname = os.path.join( self.data_directory, '{:}/n_z_avg_{:}.hist'.format(self.photoz_method, redshift_bin)) z_hist, n_z_hist = np.loadtxt(fname, usecols=(0, 1), unpack=True) shift_to_midpoint = np.diff(z_hist)[0] / 2. spline_pz = itp.splrep(z_hist + shift_to_midpoint, n_z_hist) mask_min = z_mod >= z_hist.min() + shift_to_midpoint mask_max = z_mod <= z_hist.max() + shift_to_midpoint mask = mask_min & mask_max # points outside the z-range of the histograms are set to 0! pz[mask, zbin] = itp.splev(z_mod[mask], spline_pz) # Normalize selection functions dz = self.redshifts[1:] - self.redshifts[:-1] pz_norm[zbin] = np.sum(0.5 * (pz[1:, zbin] + pz[:-1, zbin]) * dz) pr = pz * (dzdr[:, np.newaxis] / pz_norm) else: pr = self.pz * (dzdr[:, np.newaxis] / self.pz_norm) # Compute function g_i(r), that depends on r and the bin # g_i(r) = 2r(1+z(r)) int_r^+\infty drs eta_r(rs) (rs-r)/rs g = np.zeros((self.nzmax, self.nzbins), 'float64') for zbin in xrange(self.nzbins): # assumes that z[0] = 0 for nr in xrange(1, self.nzmax - 1): #for nr in xrange(self.nzmax - 1): fun = pr[nr:, zbin] * (r[nr:] - r[nr]) / r[nr:] g[nr, zbin] = np.sum(0.5 * (fun[1:] + fun[:-1]) * (r[nr + 1:] - r[nr:-1])) g[nr, zbin] *= 2. * r[nr] * (1. + self.redshifts[nr]) # Start loop over l for computation of C_l^shear Cl_GG_integrand = np.zeros((self.nzmax, self.nzbins, self.nzbins), 'float64') Cl_GG = np.zeros((self.nellsmax, self.nzbins, self.nzbins), 'float64') if intrinsic_alignment: Cl_II_integrand = np.zeros_like(Cl_GG_integrand) Cl_II = np.zeros_like(Cl_GG) Cl_GI_integrand = np.zeros_like(Cl_GG_integrand) Cl_GI = np.zeros_like(Cl_GG) dr = r[1:] - r[:-1] # removing shifts like array[1:, ...] which assume that z[0] = 0: for index_ell in xrange(self.nellsmax): # find Cl_integrand = (g(r) / r)**2 * P(l/r,z(r)) for zbin1 in xrange(self.nzbins): for zbin2 in xrange(zbin1 + 1): #self.nzbins): Cl_GG_integrand[1:, zbin1, zbin2] = g[1:, zbin1] * g[ 1:, zbin2] / r[1:]**2 * pk[index_ell, 1:] if intrinsic_alignment: factor_IA = self.get_factor_IA( self.redshifts[1:], linear_growth_rate[1:], amp_IA, exp_IA) #/ self.dzdr[1:] #print F_of_x #print self.eta_r[1:, zbin1].shape Cl_II_integrand[1:, zbin1, zbin2] = pr[1:, zbin1] * pr[ 1:, zbin2] * factor_IA**2 / r[1:]**2 * pk[index_ell, 1:] Cl_GI_integrand[1:, zbin1, zbin2] = ( g[1:, zbin1] * pr[1:, zbin2] + g[1:, zbin2] * pr[1:, zbin1] ) * factor_IA / r[1:]**2 * pk[index_ell, 1:] # Integrate over r to get C_l^shear_ij = P_ij(l) # C_l^shear_ii = 9/4 Omega0_m^2 H_0^4 \sum_0^rmax dr (g_i(r) g_j(r) /r**2) P(k=l/r,z(r)) for zbin1 in xrange(self.nzbins): for zbin2 in xrange(zbin1 + 1): #self.nzbins): Cl_GG[index_ell, zbin1, zbin2] = np.sum( 0.5 * (Cl_GG_integrand[1:, zbin1, zbin2] + Cl_GG_integrand[:-1, zbin1, zbin2]) * dr) # here we divide by 16, because we get a 2^2 from g(z)! Cl_GG[ index_ell, zbin1, zbin2] *= 9. / 16. * self.Omega_m**2 # in units of Mpc**4 Cl_GG[index_ell, zbin1, zbin2] *= (self.small_h / 2997.9)**4 # dimensionless if intrinsic_alignment: Cl_II[index_ell, zbin1, zbin2] = np.sum( 0.5 * (Cl_II_integrand[1:, zbin1, zbin2] + Cl_II_integrand[:-1, zbin1, zbin2]) * dr) Cl_GI[index_ell, zbin1, zbin2] = np.sum( 0.5 * (Cl_GI_integrand[1:, zbin1, zbin2] + Cl_GI_integrand[:-1, zbin1, zbin2]) * dr) # here we divide by 4, because we get a 2 from g(r)! Cl_GI[index_ell, zbin1, zbin2] *= 3. / 4. * self.Omega_m Cl_GI[index_ell, zbin1, zbin2] *= (self.small_h / 2997.9)**2 if intrinsic_alignment: Cl = Cl_GG + Cl_GI + Cl_II else: Cl = Cl_GG # ordering of redshift bins is correct in definition of theory below! theory_EE = np.zeros((self.nzcorrs, self.band_offset_EE), 'float64') theory_BB = np.zeros((self.nzcorrs, self.band_offset_BB), 'float64') theory_noise_EE = np.zeros((self.nzcorrs, self.band_offset_EE), 'float64') theory_noise_BB = np.zeros((self.nzcorrs, self.band_offset_BB), 'float64') #print theory.shape index_corr = 0 #A_noise_corr = np.zeros(self.nzcorrs) for zbin1 in xrange(self.nzbins): for zbin2 in xrange(zbin1 + 1): #self.nzbins): #correlation = 'z{:}z{:}'.format(zbin1 + 1, zbin2 + 1) ell_norm = ells_sum * (ells_sum + 1) / (2. * np.pi) # calculate m-correction vector here: # this loop goes over bands per z-corr; m-correction is the same for all bands in one tomographic bin!!! val_m_corr_EE = (1. + m_corr_per_zbin[zbin1]) * ( 1. + m_corr_per_zbin[zbin2]) * np.ones( len(self.bands_EE_to_use)) val_m_corr_BB = (1. + m_corr_per_zbin[zbin1]) * ( 1. + m_corr_per_zbin[zbin2]) * np.ones( len(self.bands_BB_to_use)) ''' arg_a = (1. + A_noise[zbin1]) arg_b = (1. + A_noise[zbin2]) if np.sign(arg_a) < 0 and np.sign(arg_b) < 0: sign = -1. elif np.sign(arg_a) < 0 or np.sign(arg_b) < 0: sign = -1. else: sign = 1. A_noise_corr[index_corr] = sign * self.sigma_e[zbin1] * self.sigma_e[zbin2] * np.sqrt(np.abs(arg_a)) * np.sqrt(np.abs(arg_b)) / (np.sqrt(self.n_eff[zbin1]) * np.sqrt(self.n_eff[zbin2])) ''' # alternative definition, makes more sense than the one above: # I should add noise only to auto-correlations! if zbin1 == zbin2: #A_noise_corr = self.sigma_e[zbin1] * self.sigma_e[zbin2] * (1. + A_noise[zbin1] + A_noise[zbin2]) / (np.sqrt(self.n_eff[zbin1]) * np.sqrt(self.n_eff[zbin2])) # now the very simple definition should be sufficient! A_noise_corr = A_noise[zbin1] * self.sigma_e[ zbin1]**2 / self.n_eff[zbin1] else: A_noise_corr = 0. Cl_sample = Cl[:, zbin1, zbin2] spline_Cl = itp.splrep(ells, Cl_sample) D_l_EE = ell_norm * itp.splev(ells_sum, spline_Cl) # TODO: 1e-9 can either become an adjustable constant or a parameter! # taking out ell_norm now (a constant times ell_norm is just another noise-power component) if self.correct_resetting_bias: # TODO: get ell_centers... #x_BB = ell_center * (ell_center + 1.) / (2. * np.pi) * self.sigma_e[zbin1] * self.sigma_e[zbin2] / np.sqrt(self.n_eff[zbin1] * self.n_eff[zbin2]) # try to pull the model through the BWM first, that's more consistent with the code and doesn't require x_BB = ell_norm * self.sigma_e[zbin1] * self.sigma_e[ zbin2] / np.sqrt(self.n_eff[zbin1] * self.n_eff[zbin2]) D_l_BB = self.get_B_mode_model(x_BB, amp_BB, exp_BB) #else: # D_l_BB = self.scale_B_modes # * ell_norm D_l_noise = ell_norm * A_noise_corr #theory[zbin1, zbin2, :] = get_theory(ells_sum, D_l, self.ells_intp, band_window_matrix, self.band_offset, correlation, bwm_style=self.bwm_style) ''' if self.key == 'data_XinPi': theory_EE[index_corr, :] = D_l_EE theory_BB[index_corr, :] = 0. if add_noise_power.all(): theory_noise_EE[index_corr, :] = D_l_noise theory_noise_BB[index_corr, :] = 0. else: theory_EE[index_corr, :] = self.get_theory(ells_sum, D_l_EE, self.band_window_matrix, index_corr, band_type_is_EE=True) if self.correct_resetting_bias: theory_BB[index_corr, :] = self.get_theory(ells_sum, D_l_BB, self.band_window_matrix, index_corr, band_type_is_EE=False) else: theory_BB[index_corr, :] = 0. if add_noise_power.all(): theory_noise_EE[index_corr, :] = self.get_theory(ells_sum, D_l_noise, self.band_window_matrix, index_corr, band_type_is_EE=True) theory_noise_BB[index_corr, :] = self.get_theory(ells_sum, D_l_noise, self.band_window_matrix, index_corr, band_type_is_EE=False) ''' theory_EE[index_corr, :] = self.get_theory( ells_sum, D_l_EE, self.band_window_matrix, index_corr, band_type_is_EE=True) if self.correct_resetting_bias: theory_BB[index_corr, :] = self.get_theory( ells_sum, D_l_BB, self.band_window_matrix, index_corr, band_type_is_EE=False) else: theory_BB[index_corr, :] = 0. if add_noise_power.all(): theory_noise_EE[index_corr, :] = self.get_theory( ells_sum, D_l_noise, self.band_window_matrix, index_corr, band_type_is_EE=True) theory_noise_BB[index_corr, :] = self.get_theory( ells_sum, D_l_noise, self.band_window_matrix, index_corr, band_type_is_EE=False) if index_corr == 0: m_corr_EE = val_m_corr_EE m_corr_BB = val_m_corr_BB else: m_corr_EE = np.concatenate((m_corr_EE, val_m_corr_EE)) m_corr_BB = np.concatenate((m_corr_BB, val_m_corr_BB)) index_corr += 1 # take care of m-correction: m_corr = np.concatenate((m_corr_EE, m_corr_BB)) # this is required for scaling of covariance matrix: m_corr_matrix = np.matrix(m_corr).T * np.matrix(m_corr) theory_BB = theory_BB.flatten() + theory_noise_BB.flatten() theory_EE = theory_EE.flatten() + theory_noise_EE.flatten() band_powers_theory = np.concatenate((theory_EE, theory_BB)) #apply m-corrections also to covariance: # we want elementwise division!!! covariance = self.covariance / np.asarray(m_corr_matrix) # some numpy-magic for slicing: cov_sliced = covariance[np.ix_(self.indices_for_bands_to_use, self.indices_for_bands_to_use)] # invert covariance matrix: #inv_cov_sliced = np.linalg.inv(cov_sliced) # Eq. 16 from Heymans et al. 2013 (arxiv:1303.1808v1) # not necessary for analytical covariance! ''' if self.use_debias_factor: params = len(self.indices_for_bands_to_use) debias_factor = (self.nmocks - params - 2.) / (self.nmocks - 1.) else: debias_factor = 1. inverse_covariance_debiased = debias_factor * inv_cov_sliced ''' # m-correction is applied to DATA! Can also be marginalized over! difference_vector = (self.band_powers / m_corr) - band_powers_theory difference_vector = difference_vector[self.indices_for_bands_to_use] # Don't invert that matrix! #chi2 = difference_vector.T.dot(inv_cov_sliced.dot(difference_vector)) # this is for running smoothly with MultiNest # (in initial checking of prior space, there might occur weird solutions) if np.isinf(band_powers_theory).any() or np.isnan( band_powers_theory).any(): chi2 = 2e12 else: # use a Cholesky decomposition instead: cholesky_transform = cholesky(cov_sliced, lower=True) yt = solve_triangular(cholesky_transform, difference_vector, lower=True) chi2 = yt.dot(yt) return -0.5 * chi2
def update_variance(self, variance): self.L = cholesky(variance, lower=False) self.variance = variance
z_test = np.tanh(np.dot(x_test, w1.data.numpy()) + b1.data.numpy()) z_test = np.tanh(np.dot(z_test, w2.data.numpy()) + b2.data.numpy()) z_test = np.dot(z_test, w3.data.numpy()) + b3.data.numpy() #z_test=10.0*z_test znp = np.tanh( np.dot(xnp[j * batch_size:(j + 1) * batch_size], w1.data.numpy()) + b1.data.numpy()) znp = np.tanh(np.dot(znp, w2.data.numpy()) + b2.data.numpy()) znp = np.dot(znp, w3.data.numpy()) + b3.data.numpy() #znp=10.0*znp for k in range(0, xnp[j * batch_size:(j + 1) * batch_size].shape[0]): K1[:, k] = np.exp(-0.5 * np.sum((z_test - znp[[k], :])**2, 1)) K2[:, k] = np.exp(-0.5 * np.sum((znp - znp[[k], :])**2, 1)) K2[k, k] += (1.0 / (np.exp(-nug) + 1.0) + 1e-8) L = cholesky(K2, lower=True) L_inv = solve_triangular(L.T, np.eye(L.shape[0])) K_inv = L_inv.dot(L_inv.T) yp = np.dot( K1, np.dot(K_inv, ynp[j * batch_size:(j + 1) * batch_size])) yp2 = np.rint(yp) print(np.average(yp2 == y_test), np.sqrt(np.mean( (yp - y_test)**2))) #print(np.average(np.argmax(yp,1)==np.argmax(y_test,1))) #print(yp) #print(y_test)
def log_marginal_likelihood(self, theta=None, eval_gradient=False): """Return log-marginal likelihood of theta for training data. Parameters ---------- theta : array-like, shape = (n_kernel_params,) or None Kernel hyperparameters for which the log-marginal likelihood is evaluated. If None, the precomputed log_marginal_likelihood of ``self.kernel_.theta`` is returned. eval_gradient : bool, default: False If True, the gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta is returned additionally. If True, theta must not be None. Returns ------- log_likelihood : float Log-marginal likelihood of theta for training data. log_likelihood_gradient : array, shape = (n_kernel_params,), optional Gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta. Only returned when eval_gradient is True. """ if theta is None: if eval_gradient: raise ValueError( "Gradient can only be evaluated for theta!=None") return self.log_marginal_likelihood_value_ kernel_l = self.kernel_l_.clone_with_theta( theta[1:1 + len(self.kernel_l_.theta)]) kernel_d = self.kernel_d_.clone_with_theta( theta[-len(self.kernel_d_.theta):]) rho = theta[0] if eval_gradient: raise Warning("eval_gradient = True mode is not implemented yet!") else: K = np.vstack((np.hstack( (kernel_l(self.X_train_[:self.n_l_]), rho * kernel_l( self.X_train_[:self.n_l_], self.X_train_[self.n_l_:]))), np.hstack( (rho * kernel_l(self.X_train_[self.n_l_:], self.X_train_[:self.n_l_]), rho**2 * kernel_l(self.X_train_[self.n_l_:]) + kernel_d(self.X_train_[self.n_l_:]))))) K[np.diag_indices_from(K)] += self.alpha try: L = cholesky(K, lower=True) # Line 2 except np.linalg.LinAlgError: return (-np.inf, np.zeros_like(theta)) \ if eval_gradient else -np.inf # Support multi-dimensional output of self.y_train_ y_train = self.y_train_ if y_train.ndim == 1: y_train = y_train[:, np.newaxis] alpha = cho_solve((L, True), y_train) # Line 3 # Compute log-likelihood (compare line 7) log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha) log_likelihood_dims -= np.log( np.diag(L)).sum() # -0.5 log det (K) = log(L) (since K = LL^T) log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi) log_likelihood = log_likelihood_dims.sum(-1) # sum over dimensions if eval_gradient: # compare Equation 5.9 from GPML raise Warning("eval_gradient = True mode is not implemented yet!") if eval_gradient: raise Warning("eval_gradient = True mode is not implemented yet!") else: return log_likelihood
import numpy as np import scipy as sp import scipy.linalg as la A = np.array([[1, 2, 3], [2, 1, 2], [3, 2, 1]]) L = la.cholesky(A)
def _reduced_likelihood_function(self, theta): """ This function determines the BLUP parameters and evaluates the reduced likelihood function for the given autocorrelation parameters theta. Maximizing this function wrt the autocorrelation parameters theta is equivalent to maximizing the likelihood of the assumed joint Gaussian distribution of the observations y evaluated onto the design of experiments X. Parameters ---------- theta: list(n_comp), optional - An array containing the autocorrelation parameters at which the Gaussian Process model parameters should be determined. Returns ------- reduced_likelihood_function_value: real - The value of the reduced likelihood function associated to the given autocorrelation parameters theta. par: dict() - A dictionary containing the requested Gaussian Process model parameters: sigma2 Gaussian Process variance. beta Generalized least-squares regression weights for Universal Kriging or for Ordinary Kriging. gamma Gaussian Process weights. C Cholesky decomposition of the correlation matrix [R]. Ft Solution of the linear equation system : [R] x Ft = F Q, G QR decomposition of the matrix Ft. """ # Initialize output reduced_likelihood_function_value = -np.inf par = {} # Set up R MACHINE_EPSILON = np.finfo(np.double).eps nugget = 10.0 * MACHINE_EPSILON if self.name == "MFK": if self._lvl != self.nlvl: # in the case of multi-fidelity optimization # it is very probable that lower-fidelity correlation matrix # becomes ill-conditionned nugget = 10.0 * nugget elif self.name in ["MGP"]: nugget = 100.0 * nugget noise = self.options["noise"] tmp_var = theta if self.name in ["MFK", "MFKPLS", "MFKPLSK"]: if self.options["eval_noise"]: theta = tmp_var[:-1] noise = tmp_var[-1] r = self._correlation_types[self.options["corr"]](theta, self.D).reshape( -1, 1) R = np.eye(self.nt) * (1.0 + nugget + noise) R[self.ij[:, 0], self.ij[:, 1]] = r[:, 0] R[self.ij[:, 1], self.ij[:, 0]] = r[:, 0] # Cholesky decomposition of R try: C = linalg.cholesky(R, lower=True) except (linalg.LinAlgError, ValueError) as e: print("exception : ", e) # raise e return reduced_likelihood_function_value, par # Get generalized least squares solution Ft = linalg.solve_triangular(C, self.F, lower=True) Q, G = linalg.qr(Ft, mode="economic") sv = linalg.svd(G, compute_uv=False) rcondG = sv[-1] / sv[0] if rcondG < 1e-10: # Check F sv = linalg.svd(self.F, compute_uv=False) condF = sv[0] / sv[-1] if condF > 1e15: raise Exception("F is too ill conditioned. Poor combination " "of regression model and observations.") else: # Ft is too ill conditioned, get out (try different theta) return reduced_likelihood_function_value, par Yt = linalg.solve_triangular(C, self.y_norma, lower=True) beta = linalg.solve_triangular(G, np.dot(Q.T, Yt)) rho = Yt - np.dot(Ft, beta) # The determinant of R is equal to the squared product of the diagonal # elements of its Cholesky decomposition C detR = (np.diag(C)**(2.0 / self.nt)).prod() # Compute/Organize output if self.name in ["MFK", "MFKPLS", "MFKPLSK"]: n_samples = self.nt p = self.p q = self.q sigma2 = (rho**2.0).sum(axis=0) / (n_samples - p - q) reduced_likelihood_function_value = -( n_samples - p - q) * np.log10(sigma2) - n_samples * np.log10(detR) else: sigma2 = (rho**2.0).sum(axis=0) / (self.nt) reduced_likelihood_function_value = -np.log( sigma2.sum()) - np.log(detR) par["sigma2"] = sigma2 * self.y_std**2.0 par["beta"] = beta par["gamma"] = linalg.solve_triangular(C.T, rho) par["C"] = C par["Ft"] = Ft par["G"] = G par["Q"] = Q if self.name in ["MGP"]: reduced_likelihood_function_value += self._reduced_log_prior(theta) # A particular case when f_min_cobyla fail if (self.best_iteration_fail is not None) and ( not np.isinf(reduced_likelihood_function_value)): if reduced_likelihood_function_value > self.best_iteration_fail: self.best_iteration_fail = reduced_likelihood_function_value self._thetaMemory = np.array(tmp_var) elif (self.best_iteration_fail is None) and (not np.isinf(reduced_likelihood_function_value)): self.best_iteration_fail = reduced_likelihood_function_value self._thetaMemory = np.array(tmp_var) return reduced_likelihood_function_value, par
def logdet(A): return 2 * np.sum(np.log(np.diag(spl.cholesky(A))))
import numpy as np import matplotlib.pyplot as plt import GPy from scipy import linalg import scipy.spatial.distance as spdist # Dataset according to the matlab code: n = 20 np.random.seed(0) x = 15 * (np.random.uniform(low=0, high=1, size=20) - 0.5).reshape((-1, 1)) sigma_y = 0.1 sigma_f = 1.0 l = 1.0 q = spdist.cdist(x / l, x / l, 'sqeuclidean') A = (sigma_y**2) * np.eye(n) + (sigma_f**2) * np.exp(-0.5 * q) B = linalg.cholesky(A) B = B.conjugate() y = B.dot(np.random.randn(n, 1)) xstar = np.linspace(-7.5, 7.5, 201) xstar = xstar.reshape(-1, 1) l = np.array([1.0, 0.3, 3.0]) sigma_f = np.array([1, 1.08, 1.16]) sigma_y = np.array([0.1, 0.00005, 0.89]) def generate_plots(sigma_f, l, sigma_y): kernel = GPy.kern.RBF(1, sigma_f, l) model = GPy.models.GPRegression(x, y, kernel) model.Gaussian_noise.variance = sigma_y**2
def modes_system_undamped(M, K): r"""Return eigensolution of multiple DOF system. Returns the natural frequencies (w), eigenvectors (P), mode shapes (S) and the modal transformation matrix S for an undamped system. See Notes for explanation of the underlying math. Parameters ---------- M: float array Mass matrix K: float array Stiffness matrix Returns ------- w: float array The natural frequencies of the system P: float array The eigenvectors of the system. S: float array The mass-normalized mode shapes of the system. Sinv: float array The modal transformation matrix S^-1(takes x -> r(modal coordinates)) Notes ----- Given :math:`M\ddot{x}(t)+Kx(t)=0`, with mode shapes :math:`u`, the matrix of mode shapes :math:`S=[u_1 u_1 \ldots]` can be created. If the modal coordinates are the vector :math:`r(t)`. The modal transformation separates space and time from :math:`x(t)` such that :math:`x(t)=S r(t)`. Substituting into the governing equation: :math:`MS\ddot{r}(t)+KSr(t)=0` Premultiplying by :math:`S^T` :math:`S^TMS\ddot{r}(t)+S^TKSr(t)=0` The matrices :math:`S^TMS` and :math:`S^TKS` will be diagonalized by this process (:math:`u_i` are the eigenvectors of :math:`M^{-1}K`). If scaled properly (mass normalized so :math:`u_i^TMu_i=1`) then :math:`S^TMS=I` and :math:`S^TKS=\Omega^2` where :math:`\Omega^2` is a diagonal matrix of the natural frequencies squared in radians per second. Further, inverses are unstable so the better way to solve linear equations is with Gauss elimination. :math:`AB=C` given known :math:`A` and :math:`C` is solved using `la.solve(A, C, assume_a='pos')`. :math:`BA=C` given known :math:`A` and :math:`C` is solved by first transposing the equation to :math:`A^TB^T=C^T`, then solving for :math:`C^T`. The resulting command is `la.solve(A.T, C.T, assume_a='pos').T` Examples -------- >>> M = np.array([[4, 0, 0], ... [0, 4, 0], ... [0, 0, 4]]) >>> K = np.array([[8, -4, 0], ... [-4, 8, -4], ... [0, -4, 4]]) >>> w, P, S, Sinv = modes_system_undamped(M, K) >>> w # doctest: +SKIP array([0.45, 1.25, 1.8 ]) >>> S array([[ 0.16, -0.37, -0.3 ], [ 0.3 , -0.16, 0.37], [ 0.37, 0.3 , -0.16]]) """ L = la.cholesky(M) lam, P = _eigen( la.solve(L, la.solve(L, K, assume_a='pos').T, assume_a='pos').T) w = np.real(np.sqrt(lam)) S = la.solve(L, P, assume_a="pos") Sinv = la.solve(L.T, P, assume_a="pos").T return w, P, S, Sinv
def fit_energy(self, X_glob, y, ncores=1): """Fit a Gaussian process regression model using local energies. Args: X_glob (list of lists of arrays): list of grouped training configurations y (np.ndarray): training total energies ncores (int): number of CPU workers to use, default is 1 """ self.kernel_ = self.kernel self.X_glob_train_ = X_glob self.y_train_energy_ = np.reshape(y, (y.shape[0], 1)) if self.optimizer is not None: # TODO Debug logger.warning("Optimizer not yet implemented for energy training") ''' # Choose hyperparameters based on maximizing the log-marginal # likelihood (potentially starting from several initial values) def obj_func(theta, eval_gradient=True): if eval_gradient: lml, grad = self.log_marginal_likelihood( theta, eval_gradient=True) return -lml, -grad else: return -self.log_marginal_likelihood(theta) # First optimize starting from theta specified in kernel optima = [(self._constrained_optimization(obj_func, self.kernel_.theta, self.kernel_.bounds))] # Additional runs are performed from log-uniform chosen initial # theta if self.n_restarts_optimizer > 0: if not np.isfinite(self.kernel_.bounds).all(): raise ValueError( "Multiple optimizer restarts (n_restarts_optimizer>0) " "requires that all bounds are finite.") bounds = self.kernel_.bounds for iteration in range(self.n_restarts_optimizer): theta_initial = \ self._rng.uniform(bounds[:, 0], bounds[:, 1]) optima.append( self._constrained_optimization(obj_func, theta_initial, bounds)) # Select result from run with minimal (negative) log-marginal # likelihood lml_values = list(map(itemgetter(1), optima)) self.kernel_.theta = optima[np.argmin(lml_values)][0] self.log_marginal_likelihood_value_ = -np.min(lml_values) ''' else: pass ''' self.log_marginal_likelihood_value_ = \ self.log_marginal_likelihood(self.kernel_.theta) ''' # Precompute quantities required for predictions which are independent # of actual query points self.energy_K = self.kernel_.calc_gram_e(self.X_glob_train_, ncores) self.energy_K[np.diag_indices_from(self.energy_K)] += self.noise try: # Use Cholesky decomposition to build the lower triangular matrix self.L_ = cholesky(self.energy_K, lower=True) except np.linalg.LinAlgError as exc: exc.args = ("The kernel, %s, is not returning a " "positive definite matrix. Try gradually " "increasing the 'noise' parameter of your " "GaussianProcessRegressor estimator." % self.kernel_, ) + exc.args raise # Calculate the alpha weights using the Cholesky method self.energy_alpha_ = cho_solve((self.L_, True), self.y_train_energy_) self.K = None self.alpha_ = None self.fitted[1] = 'energy' self.n_train = len(self.y_train_energy_) self.X_train_ = None return self
def AddJitterOp(inputs: np.ndarray, initial_jitter_factor=INITIAL_JITTER_FACTOR, jitter_growth=JITTER_GROWTH, debug_log='false'): """ Finds smaller jitter to add to diagonal of square matrix to render the matrix positive definite (in that linalg.potrf works). Given input x (positive semi-definite matrix) and sigsq_init (nonneg scalar), find sigsq_final (nonneg scalar), so that: sigsq_final = sigsq_init + jitter, jitter >= 0, x + sigsq_final * Id positive definite (so that potrf call works) We return the matrix x + sigsq_final * Id, for which potrf has not failed. For the gradient, the dependence of jitter on the inputs is ignored. The values tried for sigsq_final are: sigsq_init, sigsq_init + initial_jitter * (jitter_growth ** k), k = 0, 1, 2, ..., initial_jitter = initial_jitter_factor * max(mean(diag(x)), 1) Note: The scaling of initial_jitter with mean(diag(x)) is taken from GPy. The rationale is that the largest eigenvalue of x is >= mean(diag(x)), and likely of this magnitude. There is no guarantee that the Cholesky factor returned is well-conditioned enough for subsequent computations to be reliable. A better solution would be to estimate the condition number of the Cholesky factor, and to add jitter until this is bounded below a threshold we tolerate. See Higham, N. A Survey of Condition Number Estimation for Triangular Matrices MIMS EPrint: 2007.10 Algorithm 4.1 could work for us. """ assert initial_jitter_factor > 0. and jitter_growth > 1. n_square = inputs.shape[0] - 1 n = int(math.sqrt(n_square)) assert n_square % n == 0 and n_square // n == n, "x must be square matrix, shape (n, n)" x, sigsq_init = np.reshape(inputs[:-1], (n, -1)), inputs[-1] def _get_constant_identity(x, constant): n, _ = x.shape return np.diag(np.ones((n,)) * constant) def _get_jitter_upperbound(x): # To define a safeguard in the while-loop of the forward, # we define an upperbound on the jitter we can reasonably add # the bound is quite generous, and is dependent on the scale of the input x # (the scale is captured via the trace of x) # the primary goal is avoid any infinite while-loop. return JITTER_UPPERBOUND_FACTOR * max(1., np.mean(np.diag(x))) jitter = 0. jitter_upperbound = _get_jitter_upperbound(x) must_increase_jitter = True x_plus_constant = None while must_increase_jitter and jitter <= jitter_upperbound: try: x_plus_constant = x + _get_constant_identity( x, sigsq_init + jitter) # Note: Do not use np.linalg.cholesky here, this can cause # locking issues L = spl.cholesky(x_plus_constant, lower=True) must_increase_jitter = False except spl.LinAlgError: if debug_log == 'true': logger.info("sigsq = {} does not work".format( sigsq_init + jitter)) if jitter == 0.0: jitter = initial_jitter_factor * max(1., np.mean(np.diag(x))) else: jitter = jitter * jitter_growth assert not must_increase_jitter, "The jitter ({}) has reached its upperbound ({}) while the Cholesky of the input matrix still cannot be computed.".format(jitter, jitter_upperbound) if debug_log == 'true': logger.info("sigsq_final = {}".format(sigsq_init + jitter)) return x_plus_constant
def evalObjCon(self, x): ''' Evaluate the objective (compliance) and constraint (mass) ''' # Add the number of function evaluations self.fevals += 1 # Convert the design variables with the scaling A = self.Area_scale * x[:] # Evaluate compliance objective self.assembleMat(A, self.K) self.assembleLoadVec(self.f) self.applyBCs(self.K, self.f) # Copy the values self.u[:] = self.f[:] # Perform the Cholesky factorization try: self.L = linalg.cholesky(self.K, lower=True) except Exception as excpt: print('Exception in cholesky factorization ', excpt) # Solve the resulting linear system of equations linalg.solve_triangular(self.L, self.u, lower=True, trans='N', overwrite_b=True) linalg.solve_triangular(self.L, self.u, lower=True, trans='T', overwrite_b=True) # Compute the compliance objective obj = np.dot(self.u, self.f) if self.obj_scale is None: self.obj_scale = obj / 10.0 # Scale the compliance objective obj = obj / self.obj_scale # Compute the mass of the entire truss mass = 0.0 index = 0 for bar in self.conn: # Get the first and second node numbers from the bar n1 = bar[0] n2 = bar[1] # Compute the nodal locations xd = self.xpos[2 * n2] - self.xpos[2 * n1] yd = self.xpos[2 * n2 + 1] - self.xpos[2 * n1 + 1] Le = np.sqrt(xd**2 + yd**2) mass += self.rho * Le * A[index] index += 1 # Create the array of constraints >= 0.0 con = np.array([self.m_fixed - mass]) / self.mass_scale fail = 0 return fail, obj, con
def fit(self, X, y): """Fit Gaussian process regression model. Parameters ---------- X : array-like of shape (n_samples, n_features) or list of object Feature vectors or other representations of training data. y : array-like of shape (n_samples,) or (n_samples, n_targets) Target values Returns ------- self : returns an instance of self. """ if self.kernel is None: # Use an RBF kernel as default self.kernel_ = C(1.0, constant_value_bounds="fixed") \ * RBF(1.0, length_scale_bounds="fixed") else: self.kernel_ = clone(self.kernel) self._rng = check_random_state(self.random_state) if self.kernel_.requires_vector_input: X, y = self._validate_data(X, y, multi_output=True, y_numeric=True, ensure_2d=True, dtype="numeric") else: X, y = self._validate_data(X, y, multi_output=True, y_numeric=True, ensure_2d=False, dtype=None) # Normalize target value if self.normalize_y: self._y_train_mean = np.mean(y, axis=0) self._y_train_std = np.std(y, axis=0) # Remove mean and make unit variance y = (y - self._y_train_mean) / self._y_train_std else: self._y_train_mean = np.zeros(1) self._y_train_std = 1 if np.iterable(self.alpha) \ and self.alpha.shape[0] != y.shape[0]: if self.alpha.shape[0] == 1: self.alpha = self.alpha[0] else: raise ValueError("alpha must be a scalar or an array" " with same number of entries as y.(%d != %d)" % (self.alpha.shape[0], y.shape[0])) self.X_train_ = np.copy(X) if self.copy_X_train else X self.y_train_ = np.copy(y) if self.copy_X_train else y if self.optimizer is not None and self.kernel_.n_dims > 0: # Choose hyperparameters based on maximizing the log-marginal # likelihood (potentially starting from several initial values) def obj_func(theta, eval_gradient=True): if eval_gradient: lml, grad = self.log_marginal_likelihood( theta, eval_gradient=True, clone_kernel=False) return -lml, -grad else: return -self.log_marginal_likelihood(theta, clone_kernel=False) # First optimize starting from theta specified in kernel optima = [(self._constrained_optimization(obj_func, self.kernel_.theta, self.kernel_.bounds))] # Additional runs are performed from log-uniform chosen initial # theta if self.n_restarts_optimizer > 0: if not np.isfinite(self.kernel_.bounds).all(): raise ValueError( "Multiple optimizer restarts (n_restarts_optimizer>0) " "requires that all bounds are finite.") bounds = self.kernel_.bounds for iteration in range(self.n_restarts_optimizer): theta_initial = \ self._rng.uniform(bounds[:, 0], bounds[:, 1]) optima.append( self._constrained_optimization(obj_func, theta_initial, bounds)) # Select result from run with minimal (negative) log-marginal # likelihood lml_values = list(map(itemgetter(1), optima)) self.kernel_.theta = optima[np.argmin(lml_values)][0] self.kernel_._check_bounds_params() self.log_marginal_likelihood_value_ = -np.min(lml_values) else: self.log_marginal_likelihood_value_ = \ self.log_marginal_likelihood(self.kernel_.theta, clone_kernel=False) # Precompute quantities required for predictions which are independent # of actual query points K = self.kernel_(self.X_train_) K[np.diag_indices_from(K)] += self.alpha try: self.L_ = cholesky(K, lower=True) # Line 2 # self.L_ changed, self._K_inv needs to be recomputed self._K_inv = None except np.linalg.LinAlgError as exc: exc.args = ("The kernel, %s, is not returning a " "positive definite matrix. Try gradually " "increasing the 'alpha' parameter of your " "GaussianProcessRegressor estimator." % self.kernel_,) + exc.args raise self.alpha_ = cho_solve((self.L_, True), self.y_train_) # Line 3 return self
def chol_gram(self, state): jacob_constr = self.jacob_constr(state) gram = jacob_constr @ self.mult_inv_metric(jacob_constr.T) return sla.cholesky(gram, lower=True)
def getcov(self, around): if rank==0: N=self.N if (self.fixedcov): cov=zeros((N,N)) for i in range(N): cov[i,i]=self.sigreg[i]**2 if(self.verbose>0): print cov G=Gaussian(around,cov) return G icov=zeros((N,N)) delta=self.sigreg/1000.0 toget=[] toget.append(around) ### This is a kinda ugly hack ### We repeat the exactly the same loop twice. ### first populating where to evaluate like ### and the popping hoping for perfect sync if rank==0: for i in range(N): parspi=around*1.0 parsmi=around*1.0 parspi[i]+=delta[i] parsmi[i]-=delta[i] for j in range(N): if (i==j): toget.append(parspi) toget.append(parsmi) else: parspp=parspi*1.0 parspm=parspi*1.0 parsmp=parsmi*1.0 parsmm=parsmi*1.0 parspp[j]+=delta[j] parspm[j]-=delta[j] parsmp[j]+=delta[j] parsmm[j]-=delta[j] toget.append(parspp) toget.append(parsmm) toget.append(parspm) toget.append(parsmp) likes=self.like(toget) if rank==0: like0=likes.pop(0) for i in range(N): for j in range(N): if (i==j): der=(likes.pop(0)+likes.pop(0)-2*like0)/(delta[i]**2) else: der=(likes.pop(0)+likes.pop(0)-likes.pop(0)-likes.pop(0))/(4*delta[i]*delta[j]) icov[i,j]=-der icov[j,i]=-der while True: if(self.verbose>0): print "Regularizing cholesky" for i in range(N): icov[i,i]+=1/self.sigreg[i]**2 try: ch=la.cholesky(icov) break except: pass cov=la.inv(icov) if(self.verbose>0): print cov G=Gaussian(around,self.blow*cov) return G
def __init__(self, pot_energy, metric, grad_pot_energy=None): super().__init__(pot_energy, metric, grad_pot_energy) self.chol_metric = sla.cholesky(metric, lower=True)
def chol(x): """Compute Cholesky factorization.""" return linalg.cholesky(x).T
def compute_ei(self, comp, pend, cand, vals): if pend.shape[0] == 0: # If there are no pending, don't do anything fancy. # Current best. best = np.min(vals) # The primary covariances for prediction. comp_cov = self.cov(comp) cand_cross = self.cov(comp, cand) # Compute the required Cholesky. obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0]) obsv_chol = spla.cholesky(obsv_cov, lower=True) # Solve the linear systems. alpha = spla.cho_solve((obsv_chol, True), vals - self.mean) beta = spla.solve_triangular(obsv_chol, cand_cross, lower=True) # Predict the marginal means and variances at candidates. func_m = np.dot(cand_cross.T, alpha) + self.mean func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0) # Expected improvement func_s = np.sqrt(func_v) u = (best - func_m) / func_s ncdf = sps.norm.cdf(u) npdf = sps.norm.pdf(u) ei = func_s * (u * ncdf + npdf) return ei else: # If there are pending experiments, fantasize their outcomes. # Create a composite vector of complete and pending. comp_pend = np.concatenate((comp, pend)) # Compute the covariance and Cholesky decomposition. comp_pend_cov = self.cov(comp_pend) + self.noise * np.eye( comp_pend.shape[0]) comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True) # Compute submatrices. pend_cross = self.cov(comp, pend) pend_kappa = self.cov(pend) # Use the sub-Cholesky. obsv_chol = comp_pend_chol[:comp.shape[0], :comp.shape[0]] # Solve the linear systems. alpha = spla.cho_solve((obsv_chol, True), vals - self.mean) beta = spla.cho_solve((obsv_chol, True), pend_cross) # Finding predictive means and variances. pend_m = np.dot(pend_cross.T, alpha) + self.mean pend_K = pend_kappa - np.dot(pend_cross.T, beta) # Take the Cholesky of the predictive covariance. pend_chol = spla.cholesky(pend_K, lower=True) # Make predictions. pend_fant = (np.dot( pend_chol, npr.randn(pend.shape[0], self.pending_samples)) + pend_m[:, None]) # Include the fantasies. fant_vals = np.concatenate( (np.tile(vals[:, np.newaxis], (1, self.pending_samples)), pend_fant)) # Compute bests over the fantasies. bests = np.min(fant_vals, axis=0) # Now generalize from these fantasies. cand_cross = self.cov(comp_pend, cand) # Solve the linear systems. alpha = spla.cho_solve((comp_pend_chol, True), fant_vals - self.mean) beta = spla.solve_triangular(comp_pend_chol, cand_cross, lower=True) # Predict the marginal means and variances at candidates. func_m = np.dot(cand_cross.T, alpha) + self.mean func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0) # Expected improvement func_s = np.sqrt(func_v[:, np.newaxis]) u = (bests[np.newaxis, :] - func_m) / func_s ncdf = sps.norm.cdf(u) npdf = sps.norm.pdf(u) ei = func_s * (u * ncdf + npdf) return np.mean(ei, axis=1)
synapse_0_masuk = np.reshape(synapse_0_update, (1, -1)) synapse_h_masuk = np.reshape(synapse_h_update, (1, -1)) synapse_1_masuk = np.reshape(synapse_1_update, (1, -1)) # satu baris kesamping masuk = np.concatenate( (synapse_0_masuk, synapse_h_masuk, synapse_1_masuk), axis=1) #%% Inisialisasi UKF without filterpy X_ = masuk # myu atau Wcap dari de lima n = X_.size # julier versi masalah 'dimension of problem' lambda_ = alpha**2 * (n + kappa) - n #%% SIGMA POINTS around mean mean = np.sum(w_concat) / n # mean secara keseluruhan U = cholesky((n + lambda_) * P) # sama dg np.sqrt # U = np.sqrt(n+lambda_)*P sigmas = np.zeros((2 * n + 1, n)) sigmas[ 0] = X_ # filterpy version dengan shape (121,60) karena dikali dg P! # maka.... for k in range(n): # gabung kebawah.. jadinya 121,60 sigmas[k + 1] = np.subtract(X_, -U[k]) sigmas[n + k + 1] = np.subtract(X_, U[k]) #%% BOBOT SIGMA dari Merwe c_ = .5 / (n + lambda_) Wm = np.full(2 * n + 1, c_) Wc = Wm # size (121,) atau (n,) Wc[0] = lambda_ / (n + lambda_) + (1 - alpha**2 + beta)
def curve_fit(f, xdata, ydata, p0=None, sigma=None, absolute_sigma=False, check_finite=True, bounds=(-np.inf, np.inf), method=None, jac=None, **kwargs): """ Use non-linear least squares to fit a function, f, to data. Assumes ``ydata = f(xdata, *params) + eps`` Parameters ---------- f : callable The model function, f(x, ...). It must take the independent variable as the first argument and the parameters to fit as separate remaining arguments. xdata : array_like or object The independent variable where the data is measured. Should usually be an M-length sequence or an (k,M)-shaped array for functions with k predictors, but can actually be any object. ydata : array_like The dependent data, a length M array - nominally ``f(xdata, ...)``. p0 : array_like, optional Initial guess for the parameters (length N). If None, then the initial values will all be 1 (if the number of parameters for the function can be determined using introspection, otherwise a ValueError is raised). sigma : None or M-length sequence or MxM array, optional Determines the uncertainty in `ydata`. If we define residuals as ``r = ydata - f(xdata, *popt)``, then the interpretation of `sigma` depends on its number of dimensions: - A 1-d `sigma` should contain values of standard deviations of errors in `ydata`. In this case, the optimized function is ``chisq = sum((r / sigma) ** 2)``. - A 2-d `sigma` should contain the covariance matrix of errors in `ydata`. In this case, the optimized function is ``chisq = r.T @ inv(sigma) @ r``. .. versionadded:: 0.19 None (default) is equivalent of 1-d `sigma` filled with ones. absolute_sigma : bool, optional If True, `sigma` is used in an absolute sense and the estimated parameter covariance `pcov` reflects these absolute values. If False, only the relative magnitudes of the `sigma` values matter. The returned parameter covariance matrix `pcov` is based on scaling `sigma` by a constant factor. This constant is set by demanding that the reduced `chisq` for the optimal parameters `popt` when using the *scaled* `sigma` equals unity. In other words, `sigma` is scaled to match the sample variance of the residuals after the fit. Mathematically, ``pcov(absolute_sigma=False) = pcov(absolute_sigma=True) * chisq(popt)/(M-N)`` check_finite : bool, optional If True, check that the input arrays do not contain nans of infs, and raise a ValueError if they do. Setting this parameter to False may silently produce nonsensical results if the input arrays do contain nans. Default is True. bounds : 2-tuple of array_like, optional Lower and upper bounds on parameters. Defaults to no bounds. Each element of the tuple must be either an array with the length equal to the number of parameters, or a scalar (in which case the bound is taken to be the same for all parameters.) Use ``np.inf`` with an appropriate sign to disable bounds on all or some parameters. .. versionadded:: 0.17 method : {'lm', 'trf', 'dogbox'}, optional Method to use for optimization. See `least_squares` for more details. Default is 'lm' for unconstrained problems and 'trf' if `bounds` are provided. The method 'lm' won't work when the number of observations is less than the number of variables, use 'trf' or 'dogbox' in this case. .. versionadded:: 0.17 jac : callable, string or None, optional Function with signature ``jac(x, ...)`` which computes the Jacobian matrix of the model function with respect to parameters as a dense array_like structure. It will be scaled according to provided `sigma`. If None (default), the Jacobian will be estimated numerically. String keywords for 'trf' and 'dogbox' methods can be used to select a finite difference scheme, see `least_squares`. .. versionadded:: 0.18 kwargs Keyword arguments passed to `leastsq` for ``method='lm'`` or `least_squares` otherwise. Returns ------- popt : array Optimal values for the parameters so that the sum of the squared residuals of ``f(xdata, *popt) - ydata`` is minimized pcov : 2d array The estimated covariance of popt. The diagonals provide the variance of the parameter estimate. To compute one standard deviation errors on the parameters use ``perr = np.sqrt(np.diag(pcov))``. How the `sigma` parameter affects the estimated covariance depends on `absolute_sigma` argument, as described above. If the Jacobian matrix at the solution doesn't have a full rank, then 'lm' method returns a matrix filled with ``np.inf``, on the other hand 'trf' and 'dogbox' methods use Moore-Penrose pseudoinverse to compute the covariance matrix. Raises ------ ValueError if either `ydata` or `xdata` contain NaNs, or if incompatible options are used. RuntimeError if the least-squares minimization fails. OptimizeWarning if covariance of the parameters can not be estimated. See Also -------- least_squares : Minimize the sum of squares of nonlinear functions. scipy.stats.linregress : Calculate a linear least squares regression for two sets of measurements. Notes ----- With ``method='lm'``, the algorithm uses the Levenberg-Marquardt algorithm through `leastsq`. Note that this algorithm can only deal with unconstrained problems. Box constraints can be handled by methods 'trf' and 'dogbox'. Refer to the docstring of `least_squares` for more information. Examples -------- >>> import matplotlib.pyplot as plt >>> from scipy.optimize import curve_fit >>> def func(x, a, b, c): ... return a * np.exp(-b * x) + c Define the data to be fit with some noise: >>> xdata = np.linspace(0, 4, 50) >>> y = func(xdata, 2.5, 1.3, 0.5) >>> np.random.seed(1729) >>> y_noise = 0.2 * np.random.normal(size=xdata.size) >>> ydata = y + y_noise >>> plt.plot(xdata, ydata, 'b-', label='data') Fit for the parameters a, b, c of the function `func`: >>> popt, pcov = curve_fit(func, xdata, ydata) >>> popt array([ 2.55423706, 1.35190947, 0.47450618]) >>> plt.plot(xdata, func(xdata, *popt), 'r-', ... label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt)) Constrain the optimization to the region of ``0 <= a <= 3``, ``0 <= b <= 1`` and ``0 <= c <= 0.5``: >>> popt, pcov = curve_fit(func, xdata, ydata, bounds=(0, [3., 1., 0.5])) >>> popt array([ 2.43708906, 1. , 0.35015434]) >>> plt.plot(xdata, func(xdata, *popt), 'g--', ... label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt)) >>> plt.xlabel('x') >>> plt.ylabel('y') >>> plt.legend() >>> plt.show() """ if p0 is None: # determine number of parameters by inspecting the function from scipy._lib._util import getargspec_no_self as _getargspec args, varargs, varkw, defaults = _getargspec(f) if len(args) < 2: raise ValueError("Unable to determine number of fit parameters.") n = len(args) - 1 else: p0 = np.atleast_1d(p0) n = p0.size lb, ub = prepare_bounds(bounds, n) if p0 is None: p0 = _initialize_feasible(lb, ub) bounded_problem = np.any((lb > -np.inf) | (ub < np.inf)) if method is None: if bounded_problem: method = 'trf' else: method = 'lm' if method == 'lm' and bounded_problem: raise ValueError("Method 'lm' only works for unconstrained problems. " "Use 'trf' or 'dogbox' instead.") # optimization may produce garbage for float32 inputs, cast them to float64 # NaNs can not be handled if check_finite: ydata = np.asarray_chkfinite(ydata, float) else: ydata = np.asarray(ydata, float) if isinstance(xdata, (list, tuple, np.ndarray)): # `xdata` is passed straight to the user-defined `f`, so allow # non-array_like `xdata`. if check_finite: xdata = np.asarray_chkfinite(xdata, float) else: xdata = np.asarray(xdata, float) if ydata.size == 0: raise ValueError("`ydata` must not be empty!") # Determine type of sigma if sigma is not None: sigma = np.asarray(sigma) # if 1-d, sigma are errors, define transform = 1/sigma if sigma.shape == (ydata.size, ): transform = 1.0 / sigma # if 2-d, sigma is the covariance matrix, # define transform = L such that L L^T = C elif sigma.shape == (ydata.size, ydata.size): try: # scipy.linalg.cholesky requires lower=True to return L L^T = A transform = cholesky(sigma, lower=True) except LinAlgError: raise ValueError("`sigma` must be positive definite.") else: raise ValueError("`sigma` has incorrect shape.") else: transform = None func = _wrap_func(f, xdata, ydata, transform) if callable(jac): jac = _wrap_jac(jac, xdata, transform) elif jac is None and method != 'lm': jac = '2-point' if method == 'lm': # Remove full_output from kwargs, otherwise we're passing it in twice. return_full = kwargs.pop('full_output', False) res = leastsq(func, p0, Dfun=jac, full_output=1, **kwargs) popt, pcov, infodict, errmsg, ier = res cost = np.sum(infodict['fvec']**2) if ier not in [1, 2, 3, 4]: raise RuntimeError("Optimal parameters not found: " + errmsg) else: # Rename maxfev (leastsq) to max_nfev (least_squares), if specified. if 'max_nfev' not in kwargs: kwargs['max_nfev'] = kwargs.pop('maxfev', None) res = least_squares(func, p0, jac=jac, bounds=bounds, method=method, **kwargs) if not res.success: raise RuntimeError("Optimal parameters not found: " + res.message) cost = 2 * res.cost # res.cost is half sum of squares! popt = res.x # Do Moore-Penrose inverse discarding zero singular values. _, s, VT = svd(res.jac, full_matrices=False) threshold = np.finfo(float).eps * max(res.jac.shape) * s[0] s = s[s > threshold] VT = VT[:s.size] pcov = np.dot(VT.T / s**2, VT) return_full = False warn_cov = False if pcov is None: # indeterminate covariance pcov = zeros((len(popt), len(popt)), dtype=float) pcov.fill(inf) warn_cov = True elif not absolute_sigma: if ydata.size > p0.size: s_sq = cost / (ydata.size - p0.size) pcov = pcov * s_sq else: pcov.fill(inf) warn_cov = True if warn_cov: warnings.warn('Covariance of the parameters could not be estimated', category=OptimizeWarning) if return_full: return popt, pcov, infodict, errmsg, ier else: return popt, pcov
def fit(self, X_l, y_l, X_h, y_h): """Fit Gaussian process regression model. Parameters ---------- X_l : array-like, shape = (n_l_samples, n_features) Training data y_l : array-like, shape = (n_l_samples, [n_output_dims]) Target values X_h : array-like, shape = (n_h_samples, n_features) Training data y_h : array-like, shape = (n_h_samples, [n_output_dims]) Target values Returns ------- self : returns an instance of self. """ if self.kernel is None: # Use an RBF kernel as default self.kernel_l_ = C(1.0, constant_value_bounds="fixed") \ * RBF(1.0, length_scale_bounds="fixed") else: self.kernel_l_ = clone(self.kernel) self.kernel_d_ = clone(self.kernel_l_) self.rng = check_random_state(self.random_state) X_l, y_l = check_X_y(X_l, y_l, multi_output=True, y_numeric=True) X_h, y_h = check_X_y(X_h, y_h, multi_output=True, y_numeric=True) self.n_l_ = len(X_l) # Normalize target value if self.normalize_y: self._y_l_train_mean = np.mean(y_l, axis=0) self._y_h_train_mean = np.mean(y_h, axis=0) # demean y y_l = y_l - self._y_l_train_mean y_h = y_h - self._y_h_train_mean else: self._y_l_train_mean = np.zeros(1) self._y_h_train_mean = np.zeros(1) self.X_train_ = np.vstack((X_l, X_h)) self.y_train_ = np.hstack((y_l, y_h)) theta_initial = np.hstack( (np.array([self.rho]), self.kernel_l_.theta, self.kernel_d_.theta)) if self.optimizer is not None and self.kernel_l_.n_dims > 0: # Choose hyperparameters based on maximizing the log-marginal # likelihood (potentially starting from several initial values) def obj_func(theta, eval_gradient=self.eval_gradient): if eval_gradient: raise Warning( "eval_gradient = True mode is not implemented yet!") lml, grad = self.log_marginal_likelihood( theta, eval_gradient=True) return -lml, -grad else: return -self.log_marginal_likelihood(theta) theta_bounds = np.r_[np.array(self.rho_bounds)[np.newaxis], self.kernel_l_.bounds, self.kernel_d_.bounds] # First optimize starting from theta specified in kernel optima = [(self._constrained_optimization(obj_func, theta_initial, theta_bounds, self.eval_gradient))] # Additional runs are performed from log-uniform chosen initial # theta if self.n_restarts_optimizer > 0: flag = np.isfinite(self.kernel_l_.bounds).all() and \ np.isfinite(self.kernel_d_.bounds).all() and \ np.isfinite(self.rho_bounds).all() if not flag: raise ValueError( "Multiple optimizer restarts (n_restarts_optimizer>0) " "requires that all bounds are finite.") bounds = np.vstack( (np.array(self.rho_bounds).reshape(1, -1), self.kernel_l_.bounds, self.kernel_d_.bounds)) for iteration in range(self.n_restarts_optimizer): theta_initial = np.hstack( (self.rng.uniform(bounds[0, 0], bounds[0, 1]), np.exp(self.rng.uniform(bounds[1:, 0], bounds[1:, 1])))) optima.append( self._constrained_optimization(obj_func, theta_initial, bounds, self.eval_gradient)) # Select result from run with minimal (negative) log-marginal # likelihood lml_values = list(map(itemgetter(1), optima)) best_hyperparams = optima[np.argmin(lml_values)][0] self.rho = best_hyperparams[0] self.kernel_l_.theta = best_hyperparams[1:1 + len(self.kernel_l_.theta)] self.kernel_d_.theta = best_hyperparams[1 + len(self.kernel_l_.theta):] self.log_marginal_likelihood_value_ = -np.min(lml_values) else: self.log_marginal_likelihood_value_ = \ self.log_marginal_likelihood(theta_initial) # Precompute quantities required for predictions which are independent # of actual query points K_lf = self.kernel_l_(self.X_train_[:self.n_l_]) K = np.vstack(( np.hstack((self.kernel_l_(self.X_train_[:self.n_l_]), self.rho * self.kernel_l_(self.X_train_[:self.n_l_], self.X_train_[self.n_l_:]))), np.hstack(( self.rho * self.kernel_l_(self.X_train_[self.n_l_:], self.X_train_[:self.n_l_]), self.rho**2 * self.kernel_l_(self.X_train_[self.n_l_:]) + # noqa W504 self.kernel_d_(self.X_train_[self.n_l_:]))))) K_lf[np.diag_indices_from(K_lf)] += self.alpha K[np.diag_indices_from(K)] += self.alpha try: self.L_lf_ = cholesky(K_lf, lower=True) # Line 2 (lf) self.L_ = cholesky(K, lower=True) # Line 2 # self.L_ changed, self._K_inv needs to be recomputed self._K_inv = None self._K_lf_inv = None except np.linalg.LinAlgError as exc: exc.args = ("The kernel is not returning a " "positive definite matrix. Try gradually " "increasing the 'alpha' parameter of your " "GaussianProcessRegressor estimator.", ) + exc.args raise self.alpha_lf_ = cho_solve((self.L_lf_, True), self.y_train_[:self.n_l_]) # Line 3 (Lf) self.alpha_ = cho_solve((self.L_, True), self.y_train_) # Line 3 return self