def test_multivariate_normal(session_tf, x, mu, cov_sqrt): cov = np.dot(cov_sqrt, cov_sqrt.T) L = np.linalg.cholesky(cov) if len(x.shape) != 2 or len(mu.shape) != 2: with pytest.raises(Exception) as e_info: gp_result = logdensities.multivariate_normal( tf.convert_to_tensor(x), tf.convert_to_tensor(mu), tf.convert_to_tensor(L)) else: x_tf = tf.placeholder(settings.float_type) mu_tf = tf.placeholder(settings.float_type) gp_result = logdensities.multivariate_normal( x_tf, mu_tf, tf.convert_to_tensor(L)) gp_result = session_tf.run(gp_result, feed_dict={x_tf: x, mu_tf: mu}) if mu.shape[1] > 1: if x.shape[1] > 1: sp_result = [mvn.logpdf(x[:,i], mu[:,i], cov) for i in range(mu.shape[1])] else: sp_result = [mvn.logpdf(x.ravel(), mu[:, i], cov) for i in range(mu.shape[1])] else: sp_result = mvn.logpdf(x.T, mu.ravel(), cov) assert_allclose(gp_result, sp_result)
def recluster_DUP(df): #priors mu_0={1: np.array([0.03, 2]), 2:np.array([0.27,3]), 3:np.array([0.45,4])} psi={1:np.matrix('0.00128 -0.00075; -0.00075 1.1367'), 2:np.matrix('0.013 -0.0196; -0.0196 0.4626'), 3:np.matrix('0.0046 -0.0112; -0.0112 0.07556')} lambda_0=1 nu_0=1 gpd=df.loc[:, ['gtn', 'CN', 'AB']].groupby(['gtn']) covs=gpd[['AB','CN']].cov() mns=gpd[['AB', 'CN']].mean() cts=gpd.size() df.loc[:, 'gt_adj']=df.loc[:, 'gtn'].copy() mu_map={1: get_mu_map(1, cts, lambda_0, mu_0, mns), 2: get_mu_map(2, cts, lambda_0, mu_0, mns), 3: get_mu_map(3, cts, lambda_0, mu_0, mns)} sigma_map={1: get_sigma_map(1, cts, lambda_0, psi, covs, mns, mu_0), 2: get_sigma_map(2, cts, lambda_0, psi, covs, mns, mu_0), 3: get_sigma_map(3, cts, lambda_0, psi, covs, mns, mu_0)} df.loc[:, 'lld1']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[1], cov=sigma_map[1]) df.loc[:, 'lld2']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[2], cov=sigma_map[2]) df.loc[:, 'lld3']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[3], cov=sigma_map[3]) lld_code={'lld1':1, 'lld2':2, 'lld3':3} df.loc[:,'gt_new']=df.loc[:, ['lld1', 'lld2', 'lld3']].idxmax(1).map(lld_code) df.loc[:, 'gq']=df.loc[:, ['lld1', 'lld2', 'lld3']].max(axis=1)-df.loc[:, ['lld1', 'lld2', 'lld3']].median(axis=1) df.loc[:, 'med_gq']=df.loc[:, 'gq'].median() df.loc[:, 'q10_gq']=df.loc[:, 'gq'].quantile(0.1) return
def recluster_INV_BND(df): #priors mu_0={1: 0.03, 2:0.46, 3:0.94} psi={1:0.00128, 2:0.013, 3:0.0046} lambda_0=1 nu_0=1 gpd=df.loc[:, ['gtn', 'AB']].groupby(['gtn']) covs=gpd[['AB']].cov() mns=gpd[['AB']].mean() cts=gpd.size() df.loc[:, 'gt_adj']=df.loc[:, 'gtn'].copy() mu_map={1: get_mu_map(1, cts, lambda_0, mu_0, mns), 2: get_mu_map(2, cts, lambda_0, mu_0, mns), 3: get_mu_map(3, cts, lambda_0, mu_0, mns)} sigma_map={1: get_sigma_map(1, cts, lambda_0, psi, covs, mns, mu_0), 2: get_sigma_map(2, cts, lambda_0, psi, covs, mns, mu_0), 3: get_sigma_map(3, cts, lambda_0, psi, covs, mns, mu_0)} df.loc[:, 'lld1']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[1], cov=sigma_map[1]) df.loc[:, 'lld2']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[2], cov=sigma_map[2]) df.loc[:, 'lld3']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[3], cov=sigma_map[3]) lld_code={'lld1':1, 'lld2':2, 'lld3':3} df.loc[:,'gt_new']=df.loc[:, ['lld1', 'lld2', 'lld3']].idxmax(1).map(lld_code) df.loc[:, 'gq']=df.loc[:, ['lld1', 'lld2', 'lld3']].max(axis=1)-df.loc[:, ['lld1', 'lld2', 'lld3']].median(axis=1) df.loc[:, 'med_gq']=df.loc[:, 'gq'].median() df.loc[:, 'q10_gq']=df.loc[:, 'gq'].quantile(0.1) return
def recluster_DEL(df): #priors mu_0={1: np.array([0.03, 2]), 2:np.array([0.46,1.1]), 3:np.array([0.94,0.1])} psi={1:np.matrix('0.00128 -0.00075; -0.00075 1.1367'), 2:np.matrix('0.013 -0.0196; -0.0196 0.4626'), 3:np.matrix('0.0046 -0.0112; -0.0112 0.07556')} lambda_0=1 nu_0=1 gpd=df.loc[:, ['gtn', 'CN', 'AB']].groupby(['gtn']) covs=gpd[['AB','CN']].cov() mns=gpd[['AB', 'CN']].mean() cts=gpd.size() with warnings.catch_warnings(): warnings.filterwarnings("ignore") lin_fit=smf.ols('CN~AB',df).fit() df.loc[:, 'gt_adj']=df.loc[:, 'gtn'].copy() #check that CN, AB are correlated, and in the right direction if (lin_fit.rsquared>0.5) and (-1*lin_fit.params[1]>0.5): x_int=-lin_fit.params[0]/lin_fit.params[1] #adjust init GT calls if AB shifted toward 0 if x_int<1: #find mdpts between neighboring GT mins=gpd['AB'].min() maxes=gpd['AB'].max() bound1=0.2 bound2=0.7 if (2 in mins) and (1 in maxes): bound1=0.5*(mins[2]+maxes[1]) if (3 in mins) and (2 in maxes): bound2=0.5*(mins[3]+maxes[2]) newbound1=bound1*x_int newbound2=bound2*x_int df.loc[:, 'gt_adj']=pd.to_numeric(pd.cut(df['AB'], bins=[-1, newbound1, newbound2, 1], labels=['1', '2', '3'])) gpd=df.loc[:,['gt_adj', 'CN', 'AB']].groupby(['gt_adj']) covs=gpd[['AB', 'CN']].cov() mns=gpd[['AB', 'CN']].mean() cts=gpd.size() mu_map={1: get_mu_map(1, cts, lambda_0, mu_0, mns), 2: get_mu_map(2, cts, lambda_0, mu_0, mns), 3: get_mu_map(3, cts, lambda_0, mu_0, mns)} sigma_map={1: get_sigma_map(1, cts, lambda_0, psi, covs, mns, mu_0), 2: get_sigma_map(2, cts, lambda_0, psi, covs, mns, mu_0), 3: get_sigma_map(3, cts, lambda_0, psi, covs, mns, mu_0)} df.loc[:, 'lld1']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[1], cov=sigma_map[1]) df.loc[:, 'lld2']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[2], cov=sigma_map[2]) df.loc[:, 'lld3']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[3], cov=sigma_map[3]) lld_code={'lld1':1, 'lld2':2, 'lld3':3} df.loc[:,'gt_new']=df.loc[:, ['lld1', 'lld2', 'lld3']].idxmax(1).map(lld_code) df.loc[:, 'gq']=df.loc[:, ['lld1', 'lld2', 'lld3']].max(axis=1)-df.loc[:, ['lld1', 'lld2', 'lld3']].median(axis=1) df.loc[:, 'med_gq']=df.loc[:, 'gq'].median() df.loc[:, 'q10_gq']=df.loc[:, 'gq'].quantile(0.1) return
def log_joint(y, z, lam, prior_cov, likelihood_cov): ''' The log joint distribution of the model. :return: ''' plog_prior = mvn.logpdf(z, lam.T, prior_cov) plog_likelihood = 0 for ii in xrange(len(y)): plog_likelihood += mvn.logpdf(y[ii,:], z.T, likelihood_cov) plog_joint = plog_prior + plog_likelihood return plog_joint, plog_prior, plog_likelihood
def test_logpdf_default_values(self): # Check that the log of the pdf is in fact the logpdf # with default parameters Mean=None and cov = 1 np.random.seed(1234) x = np.random.randn(5) d1 = multivariate_normal.logpdf(x) d2 = multivariate_normal.pdf(x) # check whether default values are being used d3 = multivariate_normal.logpdf(x, None, 1) d4 = multivariate_normal.pdf(x, None, 1) assert_allclose(d1, np.log(d2)) assert_allclose(d3, np.log(d4))
def condition_on_2(mu_x, sigma_x, A, y, sigma_obs): sigma_xy = sigma_x.dot(A.T) sigma_yy = A.dot(sigma_x).dot(A.T) + sigma_obs mu = mu_x + sigma_xy.dot(np.linalg.solve(sigma_yy, y - A.dot(mu_x))) sigma = sigma_x - sigma_xy.dot(np.linalg.solve(sigma_yy, sigma_xy.T)) ll = mvn.logpdf(y, A.dot(mu_x), sigma_yy) return (mu, sigma), ll
def nll(self, x, y, z, theta_x, theta_y, theta_z, sigma_x, sigma_y, sigma_z): ''' ''' if self.verbose: print '\n==========' print 'x :\t', x , '[cm]' print 'y :\t', y , '[cm]' print 'z :\t', z , '[cm]' print 'theta_x:\t', theta_x, '[rad]' print 'theta_y:\t', theta_y, '[rad]' print 'theta_z:\t', theta_z, '[rad]' print 'sigma x:\t', sigma_x, '[cm]' print 'sigma y:\t', sigma_y, '[cm]' print 'sigma z:\t', sigma_z, '[cm]' cov = self._compute_covariance_matrix(theta_x, theta_y, theta_z, sigma_x, sigma_y, sigma_z) if self.verbose: print 'covariance matrix', cov print 'determinant: ', np.linalg.det(cov) # check singularity / inveritbility if np.linalg.det(cov) > 0.: nll = -multivariate_normal.logpdf(self.events, mean=np.array([x, y, z]), cov=cov).sum() else: print 'WARNING! Singular covariance matrix, cannot invert!' return float('nan') if self.verbose: print 'nLL: ', nll return nll
def _compute_sum_nll_vtx(self, cov_beam, x, y, z): nlls = np.array([]).astype(np.float64) for i in self.rnevents: vtx_xx = np.power(self.errorscale * self.uncertainties[i][0], 2) vtx_yy = np.power(self.errorscale * self.uncertainties[i][1], 2) vtx_zz = np.power(self.errorscale * self.uncertainties[i][2], 2) vtx_xy = self.correlations[i][0] * self.errorscale * self.uncertainties[i][0] * self.errorscale * self.uncertainties[i][1] vtx_xz = self.correlations[i][1] * self.errorscale * self.uncertainties[i][0] * self.errorscale * self.uncertainties[i][2] vtx_yz = self.correlations[i][2] * self.errorscale * self.uncertainties[i][1] * self.errorscale * self.uncertainties[i][2] cov_vtx = np.matrix([ [vtx_xx, vtx_xy, vtx_xz], [vtx_xy, vtx_yy, vtx_yz], [vtx_xz, vtx_yz, vtx_zz], ]).astype(np.float64) cov_tot = cov_vtx + cov_beam nll = -multivariate_normal.logpdf(self.events[i], mean=np.array([x, y, z]), cov=cov_tot, allow_singular=True) # this was needed because? if self.verbose and i%20==0: print '\t====> evaluated %d/%d vertex, nll = %f, sum nll = %f' %(i, self.nevents, nll, nlls.sum()) nlls = np.append(nlls, nll) return nlls.sum()
def simple_model_prior_logpdf(values): means = np.sum([uniform_logpdf(val, START, END) for val in values[components:(2*components)]]) scales = np.sum([invgamma.logpdf(val, A, LOC, SCALE) for val in values[(2*components):(3*components)]]) weights = multivariate_normal.logpdf(values[:components], mean=np.zeros(components), cov=COV_ALPHA*np.eye(components)) return weights + means + scales
def loglikelihood(A): """Compute log likelihood function of GMM. Assume hard assignments, i.e. A = [A_1, A_2, ...] so each set A_1 corresponds to one cluster. """ return sum([multivariate_normal.logpdf(a, mean=a.mean(axis=0), cov=np.cov(a.T)).sum() for a in A])
def test(test_img, test_label, m_array, mu_array, sigma_array): error = 0 abstain = 0 thre = 0 pi_array = np.log(m_array / np.sum(m_array)) test_n = len(test_label) print "Run ", test_n, " Tests" test_img_array = np.array(test_img) p_x = np.zeros((10, test_n)) for k in range(10): p_x[k] = multivariate_normal.logpdf(test_img_array, mean=mu_array[k], cov=sigma_array[k]) # px_T = p_x.T + pi_array px_T = p_x.T # np.save("p_x.npy", px_T) for i in range(test_n): log_sum = logsumexp(px_T[i]) py_x = np.exp(px_T[i] - log_sum) print i, py_x class_id = np.argmax(px_T[i]) print "Predict: ", class_id, "Accurate: ", test_label[i] m = np.sort(px_T[i]) if m[9] - m[8] < thre: abstain += 1 continue if class_id != test_label[i]: # print i error += 1 print "Abstain ", abstain return error * 100.0 / (test_n - abstain)
def predict_log_probs(self,X,bias_term = None): ''' Calculates log of probabilities Parameters: ----------- X: numpy array of size 'unknown x m' Expalanatory variables bias_term: bool If True , explanatory variables matrix contains bias_term (bias term should be in last column of design matrix) Returns: -------- prior_prob: numpy array of size 'unknown x k' Posterior probability that class belongs to particular probability ''' X = self._bias_term_pre_processing_X(X,bias_term) n,m = np.shape(X) log_posterior = np.zeros([n,self.k]) for i in range(self.k): log_posterior[:,i] = mvn.logpdf(X,self.means[:,i], cov = self.cov) log_posterior[:,i] += self.log_priors[i] normaliser = logsumexp(log_posterior, axis = 1) posterior_log_prob = (log_posterior.T - normaliser).T return posterior_log_prob
def loglike(rho,x,y): beta = rho[0:K+1] sigma2 = np.exp(rho[K+1]) residual = y-np.dot(x,beta) contributions = mn.logpdf(residual,0,sigma2) loglikelihood = np.sum(contributions) return -loglikelihood
def test_miner_init_smoke(miner_df): logcf = lambda row, x: mvn.logpdf(x, np.zeros(2), np.eye(len(x))) miner = MInER(miner_df, logcf, ['x_2', 'x_3'], n_models=2, use_mp=False) miner.init_models() assert hasattr(miner, '_logcf') assert hasattr(miner, '_miner_cols') assert hasattr(miner, '_miner_col_idxs') assert not hasattr(miner, 'combat_wombat')
def test_fit_smoke(miner_df): logcf = lambda row, x: mvn.logpdf(x, np.zeros(2), np.eye(len(x))) miner = MInER(miner_df, logcf, ['x_2', 'x_3'], n_models=2, use_mp=False) miner.init_models() miner.fit(1, 5) assert(not np.any(np.isnan(miner._df['x_2'].values))) assert(not np.any(np.isnan(miner._df['x_3'].values)))
def loglikelihood(X, Z, W): ZW = Z.dot(W.T) LL = 0 for i in xrange(N): ll = mvn.logpdf(X[i], mean=ZW[i], cov=sigmaI) LL += ll LL += norm.logpdf(W.flatten(), scale=1/lam).sum() return LL
def mvn_likelihood(x, mu, Sigma): # Work around for multivariate_normal logpdf, since it only accepts dimensions as arrays # Reshape arrays to 1 dim if mu.ndim != 1 and not isinstance(mu, float): (rows, cols) = mu.shape x = x.reshape((rows)) mu = mu.reshape((rows)) return multivariate_normal.logpdf(x=x, mean=mu, cov=Sigma, allow_singular=True)
def predict(self, X): N, D = X.shape K = len(self.gaussians) P = np.zeros((N, K)) for c, g in self.gaussians.iteritems(): mean, cov = g['mean'], g['cov'] P[:,c] = mvn.logpdf(X, mean=mean, cov=cov) + np.log(self.priors[c]) return np.argmax(P, axis=1)
def kalman_filter(self, observ): """Kalman filter using the model on a set of observations""" # Get system matrices F = self.transition_matrix() Q = self.transition_covariance() H = self.observation_matrix() R = self.observation_covariance() # Initialise arrays of Gaussian densities and (log-)likelihood num_time_instants = len(observ) flt = GaussianDensityTimeSeries(num_time_instants, self.ds) prd = GaussianDensityTimeSeries(num_time_instants, self.ds) lhood = 0 # Loop through time instants for kk in range(num_time_instants): # Prediction if kk > 0: prd_kk = kal.predict(flt.get_instant(kk-1), F, Q) else: prd_kk = self.initial_state_prior prd.set_instant(kk, prd_kk) # Correction - handles misisng data indicated by NaNs y = observ[kk] if not np.any(np.isnan(y)): # Nothing missing - full update flt_kk,innov = kal.correct(prd.get_instant(kk), y, H, R) lhood = lhood + mvn.logpdf(observ[kk], innov.mn, innov.vr) elif np.all(np.isnan(y)): # All missing - no update flt_kk = prd_kk else: # Partially missing - delete missing elements missing = np.where( np.isnan(y) ) yp = np.delete(y, missing, axis=0) Hp = np.delete(H, missing, axis=0) Rp = np.delete(np.delete(R, missing, axis=0), missing, axis=1) flt_kk,innov = kal.correct(prd.get_instant(kk), yp, Hp, Rp) lhood = lhood + mvn.logpdf(yp, innov.mn, innov.vr) flt.set_instant(kk, flt_kk) return flt, prd, lhood
def compute_likelihood(self, data): ps, covs = zip(*self.get_weights_and_covariances()) # get the log prob under each covariance matrix lps = map(lambda c: multivariate_normal.logpdf(data, mean=origin, cov=c), covs) ## TODO: DOUBLE CHECK THIS: return sum(logsumexp([lp + log(p) for p, lp in zip(ps, lps)], axis=0))
def test_logpdf(): # Check that the log of the pdf is in fact the logpdf np.random.seed(1234) x = np.random.randn(5) mean = np.random.randn(5) cov = np.abs(np.random.randn(5)) d1 = multivariate_normal.logpdf(x, mean, cov) d2 = multivariate_normal.pdf(x, mean, cov) assert_allclose(d1, np.log(d2))
def test_frozen(): # The frozen distribution should agree with the regular one np.random.seed(1234) x = np.random.randn(5) mean = np.random.randn(5) cov = np.abs(np.random.randn(5)) norm_frozen = multivariate_normal(mean, cov) assert_allclose(norm_frozen.pdf(x), multivariate_normal.pdf(x, mean, cov)) assert_allclose(norm_frozen.logpdf(x), multivariate_normal.logpdf(x, mean, cov))
def loglikelihood(X, parameters): pis, mus, Sigmas = parameters N = X.shape[0] K = len(pis) log_probs_flat = np.asarray( [np.log(pis[k]) + mvn.logpdf(X[n, :], mus[k, :], Sigmas[k, :, :]) for k in range(K) for n in range(N)]) log_probs = np.reshape(log_probs_flat, (K, N)).T L = np.sum(logsumexp(log_probs, axis=1)) return L
def get_supernovae(n, data=True): redshifts = RedshiftSampler() # Redshift distribution zs = redshifts.sample(size=n) # import matplotlib.pyplot as plt # plt.hist(zs, 100) # plt.show() # exit() # Population stats vals = get_truths_labels_significance() mapping = {k[0]: k[1] for k in vals} cosmology = FlatwCDM(70.0, mapping["Om"]) mus = cosmology.distmod(zs).value alpha = mapping["alpha"] beta = mapping["beta"] dscale = mapping["dscale"] dratio = mapping["dratio"] p_high_masses = np.random.uniform(low=0.0, high=1.0, size=n) means = np.array([mapping["mean_MB"], mapping["mean_x1"], mapping["mean_c"]]) sigmas = np.array([mapping["sigma_MB"], mapping["sigma_x1"], mapping["sigma_c"]]) sigmas_mat = np.dot(sigmas[:, None], sigmas[None, :]) correlations = np.dot(mapping["intrinsic_correlation"], mapping["intrinsic_correlation"].T) pop_cov = correlations * sigmas_mat results = [] for z, p, mu in zip(zs, p_high_masses, mus): try: MB, x1, c = np.random.multivariate_normal(means, pop_cov) mass_correction = dscale * (1.9 * (1 - dratio) / (0.9 + np.power(10, 0.95 * z)) + dratio) adjustment = - alpha * x1 + beta * c - mass_correction * p MB_adj = MB + adjustment mb = MB_adj + mu result = get_ia_summary_stats(z, MB_adj, x1, c, cosmo=cosmology, data=data) d = { "MB": MB, "mB": mb, "x1": x1, "c": c, "m": p, "z": z, "pc": result["passed_cut"], "lp": multivariate_normal.logpdf([MB, x1, c], means, pop_cov), "dp": result.get("delta_p"), "parameters": result.get("params"), "covariance": result.get("cov"), "lc": None if data else result.get("lc") } results.append(d) except RuntimeError: print("Error on nova: %0.2f %0.2f %0.2f %0.3f" % (MB, x1, c, z)) return results
def _e_step(self): ''' Calculates posterior distribution of latent variable for each class ''' log_lvpr = np.log(self.latent_var_prior) for i,resp_k in enumerate(self.responsibilities): for j in range(self.clusters[i]): log_prior = mvn.logpdf(self.X,self.mu[i][:,j],self.covar) resp_k[:,j] = log_prior + log_lvpr[i][j] normaliser = logsumexp(resp_k, axis = 1) self.responsibilities[i] = np.exp((resp_k.T - normaliser).T)
def logpdf(x, mean, cov, allow_singular=True): """Computes the log of the probability density function of the normal N(mean, cov) for the data x. The normal may be univariate or multivariate. Wrapper for older versions of scipy.multivariate_normal.logpdf which don't support support the allow_singular keyword prior to verion 0.15.0. If it is not supported, and cov is singular or not PSD you may get an exception. `x` and `mean` may be column vectors, row vectors, or lists. """ flat_mean = np.asarray(mean).flatten() flat_x = np.asarray(x).flatten() if _support_singular: return multivariate_normal.logpdf(flat_x, flat_mean, cov, allow_singular) else: return multivariate_normal.logpdf(flat_x, flat_mean, cov)
def predict_score(self, X): m = self.mu.shape[0] # number of classes n, p = X.shape ans = np.zeros((n, m)) for k in range(0, m): mu_k = self.mu[k, :] sigma_k = self.sigma[k, :, :] ans[:, k] = multivariate_normal.logpdf(X, mu_k, sigma_k) log_prior = [log(p) for p in self.prior] ans += log_prior return ans
def addTraces(self, traces, plaintexts, ciphertexts, knownkeys=None, progressBar=None, pointRange=None): # Hack for now - just use last template found template = self.loadTemplatesFromProject()[-1] pois = template["poi"] numparts = len(template['mean'][0]) results = np.zeros((16, 256)) tdiff = self._reportinginterval if progressBar: progressBar.setMinimum(0) progressBar.setMaximum(16 * len(traces)) pcnt = 0 for tnum in range(0, len(traces)): for bnum in range(0, 16): newresultsint = [multivariate_normal.logpdf(traces[tnum][pois[bnum]], mean=template['mean'][bnum][i], cov=np.diag(template['cov'][bnum][i])) for i in range(0, numparts)] ptype = template["partitiontype"] if ptype == "PartitionHWIntermediate": newresults = [] # Map to key guess format for i in range(0, 256): # Get hypothetical hamming weight hypint = HypHW(plaintexts[tnum], None, i, bnum) newresults.append(newresultsint[ hypint ]) elif ptype == "PartitionHDLastRound": newresults = [] # Map to key guess format for i in range(0, 256): # Get hypothetical hamming distance # hypint = HypHD(plaintexts[tnum], None, i, bnum) hypint = HypHD(None, ciphertexts[tnum], i, bnum) newresults.append(newresultsint[ hypint ]) else: newresults = newresultsint results[bnum] += newresults self.stats.updateSubkey(bnum, results[bnum], tnum=(tnum + 1)) if progressBar: progressBar.setValue(pcnt) progressBar.updateStatus((tnum, len(traces)), bnum) pcnt += 1 if progressBar.wasCanceled(): raise KeyboardInterrupt # Do plotting if required if (tnum % tdiff) == 0 and self.sr: self.sr()
def condition_on(mu_x, sigma_x, C, sigma_obs, y): p, n = C.shape sigma_xy = sigma_x.dot(C.T) sigma_yy = C.dot(sigma_x).dot(C.T) + np.diag(sigma_obs) mu_y = C.dot(mu_x) mu = mu_x + sigma_xy.dot(np.linalg.solve(sigma_yy, y - mu_y)) sigma = sigma_x - sigma_xy.dot(np.linalg.solve(sigma_yy, sigma_xy.T)) ll = multivariate_normal.logpdf(y, mu_y, sigma_yy) return ll, mu, sigma
def log_pdf_ppca(X, W_k, mu_k, sigma_k): """Calculate the log density of each point in a dataset w.r.t a specific local PPCA model $p(x_n | mu_k, sigma_k) ~ N(x_n | mu_k, W_k*W_k^T + sigma_k I)$ Args: X: (D, N) W_k: (D, M) mu_k: (D) sigma_k: (scalar) Returns: log_density: (N) T_inv: (M, M) """ D, N = X.shape D, M = W_k.shape C = W_k @ W_k.T + sigma_k * np.eye(D) # (D, D) T = W_k.T @ W_k + sigma_k * np.eye(M) # (M, M) T_inv = np.linalg.inv(T) # (M, M) # C_inv = 1.0 / sigma_k * ( # (D, D) # np.ones(D) - W_k @ T_inv @ W_k.T # ) # log_det = - 0.5 * np.log(np.linalg.det(C)) # scalar # coeff = -0.5 * D * np.log(2 * np.pi) # scalar # X_centered = X - mu_k.reshape(D, 1) # (D, N) # log_density = coeff + log_det \ # - 0.5 * (X_centered.T @ C_inv @ X_centered).sum(axis=0) # (N, 1) log_density2 = multivariate_normal.logpdf(X.T, mean=mu_k, cov=C) # # print('my implementation', log_density.sum()) # # print('scipy: ', log_density2.sum()) return log_density2, T_inv
def get_state_sequence(self, x): # returns the most likely state sequence given observed sequence x # using the Viterbi algorithm T = len(x) # make the emission matrix B logB = np.zeros((self.M, T)) for j in range(self.M): for t in range(T): for k in range(self.K): p = np.log(self.R[j, k]) + mvn.logpdf( x[t], self.mu[j, k], self.sigma[j, k]) logB[j, t] += p print("logB:", logB) # perform Viterbi as usual delta = np.zeros((T, self.M)) psi = np.zeros((T, self.M)) # smooth pi in case it is 0 pi = self.pi + 1e-10 pi /= pi.sum() delta[0] = np.log(pi) + logB[:, 0] for t in range(1, T): for j in range(self.M): next_delta = delta[t - 1] + np.log(self.A[:, j]) delta[t, j] = np.max(next_delta) + logB[j, t] psi[t, j] = np.argmax(next_delta) # backtrack states = np.zeros(T, dtype=np.int32) states[T - 1] = np.argmax(delta[T - 1]) for t in range(T - 2, -1, -1): states[t] = psi[t + 1, states[t + 1]] return states
def compute_vlb(self, observations, pi, mu, sigma, gamma): """ Each input is numpy array: X: (N x d), data points gamma: (N x C), distribution q(T) pi: (C) mu: (C x d) sigma: (C x d x d) Returns value of variational lower bound """ number_of_observations = observations.shape[0] number_of_clusters = gamma.shape[1] loss_per_observation = np.zeros(number_of_observations) for k in range(number_of_clusters): loss_per_observation += gamma[:, k] * ( np.log(pi[k]) + multivariate_normal.logpdf( observations, mean=mu[k, :], cov=sigma[k, ...])) loss_per_observation -= gamma[:, k] * np.log(gamma[:, k]) total_loss = np.sum(loss_per_observation) return total_loss
def recalPKX(pk, means, vars, XX): """ E步重新计算类后验概率矩阵P(Y|X) :param pk: :param means: :param vars: :param XX: :return: pkx_array """ logpxi_array = np.zeros(len(XX) * len(pk)).reshape(len(XX), len(pk)) pkx_array = np.zeros(len(XX) * len(pk)).reshape(len(XX), len(pk)) for j in range(len(XX)): for i in range(len(pk)): try: logpxi_array[j, i] = multivariate_normal.logpdf( XX[j], mean=means[i], cov=vars[i]) + np.log(pk[i]) except: print("Singular Matrix!!") for k in range(len(pk)): pkx_array[:, k] = np.sum( np.exp(logpxi_array - np.tile(logpxi_array[:, k], (len(pk), 1)).T), axis=1) pkx_array = 1.0 / pkx_array return pkx_array
def nll(self, x, y, z, theta_x, theta_y, theta_z, sigma_x, sigma_y, sigma_z): ''' ''' if self.verbose: print '\n==========' print 'x :\t', x, '[cm]' print 'y :\t', y, '[cm]' print 'z :\t', z, '[cm]' print 'theta_x:\t', theta_x, '[rad]' print 'theta_y:\t', theta_y, '[rad]' print 'theta_z:\t', theta_z, '[rad]' print 'sigma x:\t', sigma_x, '[cm]' print 'sigma y:\t', sigma_y, '[cm]' print 'sigma z:\t', sigma_z, '[cm]' cov = self._compute_covariance_matrix(theta_x, theta_y, theta_z, sigma_x, sigma_y, sigma_z) if self.verbose: print 'covariance matrix', np.matrix(cov) print 'determinant: ', cov.det() # check singularity / inveritbility if np.linalg.det(cov) > 0.: nll = -multivariate_normal.logpdf( self.events, mean=np.array([x, y, z]), cov=cov).sum() else: print 'WARNING! Singular covariance matrix, cannot invert!' return float('nan') if self.verbose: print 'nLL: ', nll return nll
def _log_likelihood(self, features, k_idx): """ Compute the likelihood of the features given the index of the Gaussian in the mixture model. This function compute the log multivariate_normal distribution for features given the means and covariance of the ```k_idx```th Gaussian. To do this, you can use the function: scipy.stats.multivariate_normal.logpdf Read the documentation of this function to understand how it is used here: https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.multivariate_normal.html Once the raw likelihood is computed, incorporate the mixing_weights for the Gaussian via: log(mixing_weight) + logpdf Where logpdf is the output of multivariate_normal. Arguments: features {np.ndarray} -- Features to compute multivariate_normal distribution on. k_idx {int} -- Which Gaussian to use (e.g. use self.means[k_idx], self.covariances[k_idx], self.mixing_weights[k_idx]). Returns: np.ndarray -- log likelihoods of each feature given a Gaussian. """ r = np.empty(features.shape[0]) for i, value in enumerate(features): y = multivariate_normal.logpdf(value, mean = self.means[k_idx], cov = self.covariances[k_idx]) r[i] = y + np.log(self.mixing_weights[k_idx]) return r
def critical_values(self, sims): #this is the case where we consider all configurations of how the SNP may affect the traits if self.model == 'config': #priors are already in terms of log null = multivariate_normal.logpdf(sims, self.mean, self.null_cov) null = null - self.nullprior one = multivariate_normal.logpdf(sims, self.mean, self.alt_cov[0]) one += self.altprior[0] two = multivariate_normal.logpdf(sims, self.mean, self.alt_cov[1]) two += self.altprior[1] alt = Likelihood_Ratio.sumlog(one, two) for i in range(2,len(self.alt_cov)): add = multivariate_normal.logpdf(sims, self.mean, self.alt_cov[i]) add += self.altprior[i] alt = Likelihood_Ratio.sumlog(add, alt) alt = math.log(len(self.alt_cov)) + alt ratio = alt - null self.sets = np.sort(ratio) #this is the case where we are only considering the full model (the SNP affects all traits) else: null = multivariate_normal.logpdf(sims, self.mean, self.null_cov) alt = multivariate_normal.logpdf(sims, self.mean, self.alt_cov) ratio = alt - null self.sets = np.sort(ratio)
def _log_likelihood(self, features, k_idx): log=np.log(self.mixing_weights[k_idx]) pdf=multivariate_normal.logpdf(features, self.means[k_idx], self.covariances[k_idx]) return log+pdf
def tseriescm(data, maxiter=400, burnin=sentinel, thinning=5, level=False, trend=True, seasonality=True, deg=2, c0eps=2, c1eps=1, c0beta=2, c1beta=1, c0alpha=2, c1alpha=1, priora=False, pia=0.5, q0a=1, q1a=1, priorb=False, q0b=1, q1b=1, a=0.25, b=0, indlpml=False, **kwargs): if burnin == sentinel: burnin = math.floor(0.1 * maxiter) if deg % 1 != 0 or deg <= 0: raise ValueError("deg must be a positive integer number.") if maxiter % 1 != 0 or maxiter <= 0: raise ValueError("maxiter must be a positive (large) integer number.") if burnin % 1 != 0 or burnin <= 0: raise ValueError("burnin must be a non-negative integer number.") if thinning % 1 != 0 or thinning <= 0: raise ValueError("thinning must be a non-negative integer number.") if maxiter <= burnin: raise ValueError("maxiter cannot be less than or equal to burnin.") if c0eps <= 0 or c1eps <= 0 or c0beta <= 0 or c1beta <= 0 or c0alpha <= 0 or c1alpha <= 0: raise ValueError( "c0eps,c1eps,c0beta,c1beta,c0alpha and c1alpha must be positive numbers." ) if pia <= 0 or pia >= 1: raise ValueError( "The mixing proportion pia must be a number in (0,1).") if q0a <= 0 or q1a <= 0: raise ValueError("q0a and q1a must be positive numbers.") if a < 0 or a >= 1: raise ValueError("'a' must be a number in [0,1).") if q0b <= 0 or q1b <= 0: raise ValueError("q0b and q1b must be positive numbers.") if b <= -a: raise ValueError("'b' must be greater than '-a'.") periods, mydata, cts = scaleandperiods(data) ##### Construction of the design matrices##### T = mydata.shape[0] # Number of periods of the time series n = mydata.shape[1] # Number of time series present in the data p, d, X, Z = designmatrices(level, trend, seasonality, deg, T) ##### Initial Values for the parameters that will be part of the gibbs sampling ##### sig2eps = np.ones( n ) # Vector that has the diagonal entries of the variance-covariance matrix for every epsilon_i. sig2the = 1 # Initial value for sig2the. rho = 0 # Initial value for rho. P = np.zeros((T, T)) # Initial matrix P. for j in np.arange(1, T + 1): for k in np.arange(1, T + 1): P[j - 1, k - 1] = rho**(abs(j - k)) R = sig2the * P # Initial matrix R. if level + trend + seasonality == 0: sig2alpha = np.ones( p ) # Vector that has the diagonal entries of the variance-covariance matrix for alpha. sigmaalpha = np.diag( sig2alpha) # Variance-covariance matrix for alpha. invsigmaalpha = np.diag( 1 / sig2alpha) # Inverse variance-covariance matrix for alpha. alpha = np.random.multivariate_normal( np.zeros(p), sigmaalpha, size=n ).T # alpha is a matrix with a vector value of alpha for every time series in its columns. theta = np.random.multivariate_normal( np.zeros(T), R, size=n ).T # theta is a matrix with a vector value of theta for every time series in its columns. gamma = theta # gamma is the union by rows of the beta and theta matrices elif level + trend + seasonality == 3: sig2beta = np.ones(d) sigmabeta = np.diag(sig2beta) invsigmabeta = np.diag(1 / sig2beta) beta = np.random.multivariate_normal(np.zeros(d), sigmabeta, size=n).T theta = np.random.multivariate_normal(np.zeros(T), R, size=n).T gamma = np.concatenate((beta, theta)) else: sig2beta = np.ones(d) sigmabeta = np.diag(sig2beta) invsigmabeta = np.diag(1 / sig2beta) sig2alpha = np.ones(p) sigmaalpha = np.diag(sig2alpha) invsigmaalpha = np.diag(1 / sig2alpha) alpha = np.random.multivariate_normal(np.zeros(p), sigmaalpha, size=n).T beta = np.random.multivariate_normal(np.zeros(d), sigmabeta, size=n).T theta = np.random.multivariate_normal(np.zeros(T), R, size=n).T gamma = np.concatenate((beta, theta)) iter0 = 0 iter1 = 0 # Counter for the number of iterations saved during the Gibbs sampling. arrho = 0 # Variable that will contain the acceptance rate of rho in the Metropolis-Hastings step. ara = 0 # Variable that will contain the acceptance rate of a in the Metropolis-Hastings step. arb = 0 # Variable that will contain the acceptance rate of b in the Metropolis-Hastings step. sim = np.zeros((n, n)) # Initialization of the similarities matrix. if thinning == 0: CL = math.floor(maxiter - burnin) else: CL = math.floor((maxiter - burnin) / thinning) memory = np.zeros( (CL * n, n) ) # Matrix that will contain the cluster configuration of every iteration that is saved during the Gibbs sampling. memorygn = np.zeros( (CL, n) ) # Matrix that will save the group number to which each time series belongs in every iteration saved. sig2epssample = np.zeros( (CL, n) ) # Matrix that in its columns will contain the sample of each sig2eps_i's posterior distribution after Gibbs sampling. sig2thesample = np.zeros( (CL, 1) ) # Vector that will contain the sample of sig2the's posterior distribution after Gibbs sampling. rhosample = np.zeros( (CL, 1) ) # Vector that will contain the sample of rho's posterior distribution after Gibbs sampling. asample = np.zeros( (CL, 1) ) # Vector that will contain the sample of a's posterior distribution after Gibbs sampling. bsample = np.zeros( (CL, 1) ) # Vector that will contain the sample of b's posterior distribution after Gibbs sampling. msample = np.zeros( (CL, 1) ) # Vector that will contain the sample of the number of groups at each Gibbs sampling iteration. if level + trend + seasonality == 0: sig2alphasample = np.zeros( (CL, p) ) # Matrix that in its columns will contain the sample of each sig2alpha_i's posterior distribution after Gibbs sampling. elif level + trend + seasonality == 3: sig2betasample = np.zeros( (CL, d) ) # Matrix that in its columns will contain the sample of each sig2beta_i's posterior distribution after Gibbs sampling. else: sig2alphasample = np.zeros((CL, p)) sig2betasample = np.zeros((CL, d)) if indlpml != 0: iter2 = 0 auxlpml = np.zeros((math.floor((maxiter - burnin) / 10), n)) ##### BEGINNING OF GIBBS SAMPLING ##### while iter0 < maxiter: ##### 1) SIMULATION OF ALPHA'S POSTERIOR DISTRIBUTION ##### if level + trend + seasonality != 3: if level + trend + seasonality == 0: for i in range(0, n): sigmaeps = np.diag(np.repeat(sig2eps[i], T)) Q = sigmaeps + R Qinv = inv(Q) Winv = Qinv W = Q Valphainv = ( np.transpose(Z).dot(Winv).dot(Z)) + invsigmaalpha Valpha = inv(Valphainv) mualpha = Valpha.dot(np.transpose(Z)).dot(Winv).dot( mydata[:, i]) alpha[:, i] = np.random.multivariate_normal(mualpha, Valpha, size=1) else: for i in range(0, n): sigmaeps = np.diag(np.repeat(sig2eps[i], T)) Q = sigmaeps + R Qinv = inv(Q) Vinv = (np.transpose(X).dot(Qinv).dot(X)) + invsigmabeta V = inv(Vinv) Winv = Qinv + Qinv.dot(X).dot(V).dot( np.transpose(X)).dot(Qinv) W = inv(Winv) Valphainv = ( np.transpose(Z).dot(Winv).dot(Z)) + invsigmaalpha Valpha = inv(Valphainv) mualpha = Valpha.dot(np.transpose(Z)).dot(Winv).dot( mydata[:, i]) alpha[:, i] = np.random.multivariate_normal(mualpha, Valpha, size=1) ##### 2) SIMULATION OF GAMMA'S = (BETA,THETA) POSTERIOR DISTRIBUTION ##### for i in range(0, n): jstar, nstar, mi, gn = comp( np.delete(gamma[0, :], i) ) # Only the first entries of gamma[,-i] are compared to determine the cluster configuration gmi = np.delete(gamma, i, axis=1) gammastar = gmi[:, jstar] # Matrix with all the elements of gamma, except for the i-th element if level + trend + seasonality == 0: thetastar = gammastar[d:(T + d), :] else: if d == 1: betastar = gammastar[ 0: d, :] # Separation of unique vectors between betastar and thetastar thetastar = gammastar[d:(T + d), :] else: betastar = gammastar[0:d, :] thetastar = gammastar[d:(T + d), :] sigmaeps = sig2eps[i] * np.diag(np.repeat(1, T)) invsigmaeps = (1 / sig2eps[i]) * np.diag(np.repeat(1, T)) Q = sigmaeps + R Qinv = inv(Q) if level + trend + seasonality == 0: Winv = Winv W = Q else: Vinv = (np.transpose(X).dot(Qinv).dot(X)) + invsigmabeta V = inv(Vinv) Winv = Qinv + (Qinv.dot(X).dot(V).dot( np.transpose(X)).dot(Qinv)) W = inv(Winv) # Computing weigths for gamma(i)'s posterior distribution if level + trend + seasonality == 0: dj = np.zeros((mi)) d0 = (b + a * mi) * multivariate_normal.pdf( mydata[:, i], (Z.dot(alpha[:, i])), W) den = 0 for j in range(0, mi): dj[j] < -(nstar[j] - a) * multivariate_normal.pdf( mydata[:, i], (Z.dot(alpha[:, i]) + thetastar[:, j]), sigmaeps) den = d0 + sum(dj) if den == 0: d0 = (b + a * mi) + multivariate_normal.logpdf( mydata[:, i], (Z.dot(alpha[:, i])), W) for j in range(0, mi): dj[j] = (nstar[j] - a) + multivariate_normal.logpdf( mydata[:, i], (Z.dot(alpha[:, i]) + thetastar[:, j]), sigmaeps) dj = np.concatenate((dj, d0)) aa = min(dj) q = (1 + (dj - aa) + (dj - aa)**2 / 2) / sum(1 + (dj - aa) + (dj - aa)**2 / 2) else: q = dj / den q = np.append(q, (d0 / den)) elif level + trend + seasonality == 3: dj = np.zeros((mi)) d0 = (b + a * mi) * multivariate_normal.pdf( mydata[:, i], np.zeros((T)), W) den = 0 for j in range(0, mi): dj[j] = (nstar[j] - a) * multivariate_normal.pdf( mydata[:, i], (X.dot(betastar[:, j]) + thetastar[:, j]), sigmaeps) den = d0 + sum(dj) if den == 0: d0 = (b + a * mi) + multivariate_normal.logpdf( mydata[:, i], np.zeros((T)), W) for j in range(0, mi): dj[j] = (nstar[j] - a) + multivariate_normal.logpdf( mydata[:, i], (X.dot(betastar[:, j]) + thetastar[:, j]), sigmaeps) dj = np.concatenate((dj, d0)) aa = min(dj) q = (1 + (dj - aa) + (dj - aa)**2 / 2) / sum(1 + (dj - aa) + (dj - aa)**2 / 2) else: q = dj / den q = np.append(q, (d0 / den)) else: dj = np.zeros((mi)) d0 = (b + a * mi) * multivariate_normal.pdf( mydata[:, i], (Z.dot(alpha[:, i])), W) den = 0 for j in range(0, mi): dj[j] = (nstar[j] - a) * multivariate_normal.pdf( mydata[:, i], (Z.dot(alpha[:, i]) + X.dot(betastar[:, j]) + thetastar[:, j]), sigmaeps) den = d0 + sum(dj) if den == 0: d0 = (b + a * mi) + multivariate_normal.logpdf( mydata[:, i], Z * alpha[:, i], W) for j in range(0, mi): dj[j] = (nstar[j] - a) + multivariate_normal.logpdf( mydata[:, i], (Z.dot(alpha[:, i]) + X.dot(betastar[:, j]) + thetastar[:, j]), sigmaeps) dj = np.concatenate(dj, d0) aa = min(dj) q = (1 + (dj - aa) + (dj - aa)**2 / 2) / sum(1 + (dj - aa) + (dj - aa)**2 / 2) else: q = dj / den q = np.append(q, (d0 / den)) # Sampling a number between 1 and (mi+1) to determine what will be the simulated value for gamma(i) # The probabilities of the sample are based on the weights previously computed y = np.random.choice(np.arange(1, (mi + 2)), size=1, replace=False, p=q) # If sample returns the value (mi+1), a new vector from g0 will be simulated and assigned to gamma(i) if y == (mi + 1): if level + trend + seasonality == 0: Sthetai = inv(invsigmaeps + inv(R)) muthetai = Sthetai.dot(invsigmaeps).dot(mydata[:, i] - (Z.dot(alpha[:, i]))) theta0 = np.random.multivariate_normal(muthetai, Sthetai) gamma[:, i] = theta0 elif level + trend + seasonality == 3: Sthetai = inv(invsigmaeps + inv(R)) muthetai = Sthetai.dot(invsigmaeps).dot(mydata[:, i] - (X.dot(beta[:, i]))) mubetai = V.dot(np.transpose(X)).dot(Qinv).dot(mydata[:, i]) beta0 = np.random.multivariate_normal(mubetai, V) theta0 = np.random.multivariate_normal(muthetai, Sthetai) gamma[:, i] = np.concatenate((beta0, theta0)) else: Sthetai = inv(invsigmaeps + inv(R)) muthetai = Sthetai.dot(invsigmaeps).dot(mydata[:, i] - ( Z.dot(alpha[:, i])) - (X.dot(beta[:, i]))) mubetai = V.dot( np.transpose(X)).dot(Qinv).dot(mydata[:, i] - (Z.dot(alpha[:, i]))) beta0 = np.random.multivariate_normal(mubetai, V) theta0 = np.random.multivariate_normal(muthetai, Sthetai) gamma[:, i] = np.concatenate((beta0, theta0)) else: gamma[:, i] = gammastar[:, y - 1].reshape( len(gammastar) ) # Otherwise, column y from gammastar will be assigned to gamma(i) ##### 2.1) ACCELERATION STEP AND CONSTRUCTION OF SIMILARITIES MATRIX ##### jstar, nstar, m, gn = comp(gamma[0, :]) gammastar = gamma[:, jstar] if level + trend + seasonality == 0: theta = (gamma[d:(T + d), :]) thetastar = gammastar[d:(T + d), :] else: if d == 1: beta = gamma[0:d, :] theta = gamma[d:(T + d), :] betastar = gammastar[0:d, :] thetastar = gammastar[d:(T + d), :] else: beta = gamma[0:d, :] theta = gamma[d:(T + d), :] betastar = gammastar[0:d, :] thetastar = gammastar[d:(T + d), :] for j in range(0, m): if level + trend + seasonality == 0: cc = np.where( gn == j) # Identifying the cluster configuration of each group. aux = np.zeros( (T, T) ) # Calculating the necessary matrices for the simulation of the distributions for the acceleration step. aux1 = np.zeros((T, 1)) aux2 = np.zeros((T, 1)) for i in range(0, nstar[j]): aux = aux + np.diag(np.repeat(1 / sig2eps[cc[0][i]], T)) aux1 = aux1 + (np.diag(np.repeat( 1 / sig2eps[cc[0][i]], T)).dot(mydata[:, i] - Z.dot(alpha[:, i]))).reshape( (T, 1)) Sthetastar = inv(aux + inv(R)) muthetastar = Sthetastar.dot(aux1) theta[:, cc[0]] = np.random.multivariate_normal( muthetastar.flatten(), Sthetastar).reshape( (len(muthetastar), 1)) elif level + trend + seasonality == 3: cc = np.where(gn == j) aux = np.zeros((T, T)) aux1 = np.zeros((T, 1)) aux2 = np.zeros((T, 1)) for i in range(0, nstar[j]): aux = aux + np.diag(np.repeat(1 / sig2eps[cc[0][i]], T)) aux1 = aux1 + (np.diag(np.repeat( 1 / sig2eps[cc[0][i]], T)).dot(mydata[:, i] - X.dot(betastar[:, j]))).reshape( (T, 1)) aux2 = aux2 + (np.diag(np.repeat( 1 / sig2eps[cc[0][i]], T)).dot(mydata[:, i] - thetastar[:, j])).reshape( (T, 1)) Sthetastar = inv(aux + inv(R)) muthetastar = Sthetastar.dot(aux1) Sbetastar = inv(np.transpose(X).dot(aux).dot(X) + invsigmabeta) mubetastar = Sbetastar.dot(np.transpose(X)).dot(aux2) beta[:, cc[0]] = np.random.multivariate_normal( mubetastar.flatten(), Sbetastar).reshape( (len(mubetastar), 1)) theta[:, cc[0]] = np.random.multivariate_normal( muthetastar.flatten(), Sthetastar).reshape( (len(muthetastar), 1)) else: cc = np.where(gn == j) aux = np.zeros((T, T)) aux1 = np.zeros((T, 1)) aux2 = np.zeros((T, 1)) for i in range(0, nstar[j]): aux = aux + np.diag(np.repeat(1 / sig2eps[cc[0][i]], T)) aux1 = aux1 + (np.diag(np.repeat( 1 / sig2eps[cc[0][i]], T)).dot(mydata[:, i] - Z.dot(alpha[:, i]) - X.dot(betastar[:, j]))).reshape((T, 1)) aux2 = aux2 + (np.diag( np.repeat(1 / sig2eps[cc[0][i]], T)).dot(mydata[:, i] - Z.dot(alpha[:, i]) - thetastar[:, j])).reshape((T, 1)) Sthetastar = inv(aux + inv(R)) muthetastar = Sthetastar.dot(aux1) Sbetastar = inv(np.transpose(X).dot(aux).dot(X) + invsigmabeta) mubetastar = Sbetastar.dot(np.transpose(X)).dot(aux2) beta[:, cc[0]] = np.random.multivariate_normal( mubetastar.flatten(), Sbetastar).reshape( (len(mubetastar), 1)) theta[:, cc[0]] = np.random.multivariate_normal( muthetastar.flatten(), Sthetastar).reshape( (len(muthetastar), 1)) if (iter0 % thinning == 0) & iter0 >= burnin: for i1 in range(0, nstar[j]): for i2 in range(i1, nstar[j]): sim[cc[0][i1], cc[0][i2]] = sim[cc[0][i1], cc[0][i2]] + 1 sim[cc[0][i2], cc[0][i1]] = sim[cc[0][i2], cc[0][i1]] + 1 memory[cc[0][i1] + (n * iter1), cc[0][i2]] = memory[cc[0][i1] + (n * iter1), cc[0][i2]] + 1 memory[cc[0][i2] + (n * iter1), cc[0][i1]] = memory[cc[0][i2] + (n * iter1), cc[0][i1]] + 1 if level + trend + seasonality == 0: gamma = theta else: gamma = np.concatenate( (beta, theta), axis=0 ) # Obtaining all gamma vectors after the acceleration step. jstar, nstar, m, gn = comp(gamma[1, :]) gammastar = gamma[:, jstar] if level + trend + seasonality == 0: theta = gamma[d:(T + d), :] thetastar = gammastar[d:(T + d), :] else: if d == 1: beta = gamma[0:d, :] theta = gamma[d:(T + d), :] betastar = gammastar[0:d, :] thetastar = gammastar[d:(T + d), :] else: beta = gamma[0:d, :] theta = gamma[d:(T + d), :] betastar = gammastar[0:d, :] thetastar = gammastar[d:(T + d), :] ##### 3) SIMULATION OF SIG2EPS' POSTERIOR DISTRIBUTION ##### if level + trend + seasonality == 0: M = np.transpose(mydata - Z.dot(alpha) - theta).dot(mydata - Z.dot(alpha) - theta) elif level + trend + seasonality == 3: M = np.transpose(mydata - X.dot(beta) - theta).dot(mydata - X.dot(beta) - theta) else: M = np.transpose(mydata - Z.dot(alpha) - X.dot(beta) - theta).dot(mydata - Z.dot(alpha) - X.dot(beta) - theta) sig2eps = scipy.stats.invgamma.rvs((c0eps + T / 2), scale=(c1eps + M.diagonal() / 2), size=n) ##### 4) SIMULATION OF SIMGAALPHA'S POSTERIOR DISTRIBUTION ##### if level + trend + seasonality != 3: sig2alpha = scipy.stats.invgamma.rvs( (c0alpha + n / 2), scale=(c1alpha + (alpha**2).sum(axis=1)), size=p) sigmaalpha = np.diag(sig2alpha) invsigmaalpha = np.diag(1 / sig2alpha) ##### 5) SIMULATION OF SIGMABETA'S POSTERIOR DISTRIBUTION ##### if level + trend + seasonality != 0: diff_in_shape = d - betastar.shape[1] if diff_in_shape < 0: sig2beta = 1 / scipy.stats.invgamma.rvs( (c0beta + m / 2), scale=(c1beta + ((betastar**2).sum(axis=0) / 2)))[0:d] elif diff_in_shape <= (betastar.shape[1] / 2): sig2beta = 1/np.concatenate((scipy.stats.invgamma.rvs((c0beta + m/2), scale = (c1beta + ((betastar**2).sum(axis=0)/2)), size = betastar.shape[1]),\ scipy.stats.invgamma.rvs((c0beta + m/2), scale = (c1beta + ((betastar**2).sum(axis=0)/2)), size = betastar.shape[1])[:diff_in_shape])) else: beta_vector = [] for v in range(0, (math.floor(d / betastar.shape[1]))): beta_vector = np.concatenate( (beta_vector, scipy.stats.invgamma.rvs( (c0beta + m / 2), scale=(c1beta + ((betastar**2).sum(axis=0) / 2)), size=betastar.shape[1]))) sig2beta = 1 / np.concatenate( (beta_vector, scipy.stats.invgamma.rvs( (c0beta + m / 2), scale=(c1beta + ((betastar**2).sum(axis=0) / 2)), size=betastar.shape[1])[:(d % betastar.shape[1])])) sigmabeta = np.diag(sig2beta) invsigmabeta = np.diag(1 / sig2beta) ##### 6) SIMULATION OF SIG2THE'S POSTERIOR DISTRIBUTION ##### cholP = np.linalg.cholesky(P) Pinv = inv(cholP) s1 = 0 # Calculating the sum necessary for the rate parameter of the posterior distribution. for j in range(0, m): s1 = s1 + np.transpose(thetastar[:, j]).dot(Pinv).dot(thetastar[:, j]) if s1 < 0: s1 = s1 * -1 sig2the = scipy.stats.invgamma.rvs((m * T / 2), scale=(s1 / 2), size=1) ##### 7) SIMULATION OF RHO'S POSTERIOR DISTRIBUTION (Metropolis-Hastings step) ##### rhomh = np.random.uniform(low=-1, high=1, size=1) Pmh = np.zeros((T, T)) # Calculating the matrix P for the proposed value rhomh. for j in range(1, T + 1): for k in range(1, T + 1): Pmh[j - 1, k - 1] = rhomh**(abs(j - k)) cholPmh = scipy.linalg.cholesky(Pmh) Pmhinv = inv(cholPmh) s = 0 # Calculating the sum necessary for the computation of the acceptance probability. for j in range(0, m): s = np.add( s, np.asmatrix(thetastar[:, j]).dot(Pmhinv - Pmh).dot( np.transpose(np.asmatrix(thetastar[:, j])))) # Computation of the acceptance probability. q = (-m) * (np.log(np.prod(np.diag(cholPmh))) - np.log( np.prod(np.diag(cholP)))) - ((1 / (2 * sig2the)) * s) + (1 / 2) * ( np.log(1 + rhomh * rhomh) - np.log(1 + rho * rho)) - np.log( 1 - rhomh * rhomh) + np.log(1 - rho * rho) # Definition of the acceptance probability. quot = min(0, q) # Sampling a uniform random variable in [0,1] to determine if the proposal is accepted or not. unif1 = np.random.uniform(low=0, high=1, size=1) # Acceptance step. if np.log(unif1) <= quot: rho = rhomh arrho = arrho + 1 for j in np.arange(1, T + 1): for k in np.arange(1, T + 1): P[j - 1, k - 1] = rho**(abs(j - k)) R = sig2the * P ##### 8) SIMULATION OF A'S POSTERIOR DISTRIBUTION (METROPOLIS-HASTINGS WITH UNIFORM PROPOSALS) ##### if priora == 1: if b < 0: amh = np.random.uniform(low=-b, high=1, size=1) else: unif2 = np.random.uniform(low=0, high=1, size=1) if unif2 <= 0.5: amh = 0 else: amh = np.random.uniform(low=0, high=1, size=1) # If b is not greater than -a, then accept the proposal directly. if a + b <= 0: a = amh print("a + b < 0") else: quot1 = 0 if (m > 1): for j in range(0, m - 1): quot1 = quot1 + np.log(b + (j + 1) * amh) + np.log( scipy.special.gamma(nstar[j] - amh)) - np.log( scipy.special.gamma(1 - amh) ) - np.log(b + (j + 1) * a) - np.log( scipy.special.gamma(nstar[j] - a)) + np.log( scipy.special.gamma(1 - a)) quot1 = quot1 + np.log( scipy.special.gamma(nstar[m - 1] - amh)) - np.log( scipy.special.gamma(1 - amh)) - np.log( scipy.special.gamma(nstar[m - 1] - a)) + np.log( scipy.special.gamma(1 - a)) if a == 0: fa = 0.5 else: fa = 0.5 * scipy.stats.beta.pdf(a, q0a, q1a) if amh == 0: famh = 0.5 else: famh = 0.5 * scipy.stats.beta.pdf(amh, q0a, q1a) # Quotient to evaluate the Metropolis-Hastings step in logs quot1 = quot1 + np.log(famh) - np.log(fa) # Determination of the probability for the Metropolis-Hastings step alphamh1 = min(quot1, 0) unif3 = np.random.uniform(low=0, high=1, size=1) # Acceptance step if np.log(unif3) == alphamh1: a = amh ara = ara + 1 ##### 9) SIMULATION OF B'S POSTERIOR DISTRIBUTION (METROPOLIS-HASTINGS WITH GAMMA PROPOSALS) ##### if priorb == 1: y1 = scipy.stats.gamma.rvs(1, 1, scale=10) bmh = y1 - a # If b is not greater than -a, then accept the proposal directly. if a + b <= 0: b = bmh print("a+b < 0") else: quot2 = 0 if m > 1: for j in range(0, m - 1): quot2 = quot2 + np.log(bmh + (j + 1) * a) - np.log(b + (j + 1) * a) fb = scipy.stats.gamma.pdf(a + b, q0b, scale=q1b) fbmh = scipy.stats.gamma.pdf(y1, q0b, scale=q1b) # Quotient to evaluate the Metropolis-Hastings step in logs quot2 = quot2 + (np.log(scipy.special.gamma(bmh + 1)) - np.log(scipy.special.gamma(bmh + n)) - np.log(scipy.special.gamma(b + 1)) + np.log(scipy.special.gamma(b + n))) + ( np.log(fbmh) - np.log(fb)) - 0.1 * (b - bmh) # Determination of the probability for the Metropolis-Hastings step alphamh2 = min(quot2, 0) unif4 = np.random.uniform(low=0, high=1, size=1) # Acceptance step if np.log(unif4) <= alphamh2: b = bmh arb = arb + 1 if (iter0 % thinning == 0) & (iter0 >= burnin): iter1 = iter1 + 1 sig2epssample[iter1 - 1, :] = sig2eps sig2thesample[iter1 - 1] = sig2the rhosample[iter1 - 1] = rho asample[iter1 - 1] = a bsample[iter1 - 1] = b msample[iter1 - 1, :] = m memorygn[iter1 - 1, :] = gn if level + trend + seasonality == 0: sig2alphasample[iter1 - 1, :] = sig2alpha elif level + trend + seasonality == 3: sig2betasample[iter1 - 1, :] = sig2beta else: sig2alphasample[iter1 - 1, :] = sig2alpha sig2betasample[iter1 - 1, :] = sig2beta if indlpml != 0: if (iter0 % 10 == 0) & (iter0 >= burnin): iter2 = iter2 + 1 for i in range(0, n): if level + trend + seasonality == 0: for j in range(0, m): auxlpml[iter2 - 1, i] = auxlpml[iter2 - 1, i] + ( (nstar[j] - a) / (b + n)) * scipy.stats.multivariate_normal.pdf( mydata[:, i], ((Z.dot(alpha[:, i])) + thetastar[:, j]), np.diag(np.repeat(sig2eps[i], T))) sigmaeps = np.diag(np.repeat(sig2eps[i], T)) invsigmaeps = np.diag(np.repeat(1 / sig2eps[i], T)) Q = sigmaeps + R Qinv = inv(Q) Winv = Qinv W = Q auxlpml[iter2 - 1, i] = auxlpml[iter2 - 1, i] + ( (b + (a * m)) / (b + n)) * scipy.stats.multivariate_normal.pdf( mydata[:, i], (Z.dot(alpha[:, i])), W) elif level + trend + seasonality == 3: for j in range(0, m): auxlpml[iter2 - 1, i] = auxlpml[iter2 - 1, i] + ( (nstar[j] - a) / (b + n)) * scipy.stats.multivariate_normal.pdf( mydata[:, i], (X.dot(betastar[:, j]) + thetastar[:, j]), np.diag(np.repeat(sig2eps[i], T))) sigmaeps = np.diag(np.repeat(sig2eps[i], T)) invsigmaeps = np.diag(np.repeat(1 / sig2eps[i], T)) Q = sigmaeps + R Qinv = inv(Q) Vinv = np.transpose(X).dot(Qinv).dot(X) + invsigmabeta V = inv(Vinv) Winv = Qinv + (Qinv.dot(X).dot(V).dot( np.transpose(X)).dot(Qinv)) W = inv(Winv) auxlpml[iter2 - 1, i] = auxlpml[iter2 - 1, i] + ( (b + (a * m)) / (b + n)) * scipy.stats.multivariate_normal.pdf( mydata[:, i], np.zeros(T), W) else: for j in range(0, m): auxlpml[iter2 - 1, i] = auxlpml[iter2 - 1, i] + ( (nstar[j] - a) / (b + n)) * scipy.stats.multivariate_normal.pdf( mydata[:, i], (Z.dot(alpha[:, i]) + X.dot(betastar[:, j]) + thetastar[:, j]), np.diag(np.repeat(sig2eps[i], T))) sigmaeps = np.diag(np.repeat(sig2eps[i], T)) invsigmaeps = np.diag(np.repeat(1 / sig2eps[i], T)) Q = sigmaeps + R Qinv = inv(Q) Vinv = np.transpose(X).dot(Qinv).dot(X) + invsigmabeta V = inv(Vinv) Winv = Qinv + (Qinv.dot(X).dot(V).dot( np.transpose(X)).dot(Qinv)) W = inv(Winv) auxlpml[iter2 - 1, i] = auxlpml[iter2 - 1, i] + ( (b + (a * m)) / (b + n)) * scipy.stats.multivariate_normal.pdf( mydata[:, i], Z.dot(alpha[:, i]), W) iter0 = iter0 + 1 if iter0 % 50 == 0: print("Iteration Number: ", iter0, "Progress: ", round((iter0 / maxiter), 2) * 100, "% \n") ##### END OF GIBBS SAMPLING ##### # Calculation of acceptance rates and similarities matrix arrho = arrho / iter0 ara = ara / iter0 arb = arb / iter0 sim = sim / iter1 dist = np.zeros(CL) # Calculating the distance between each cluster configuration to the similarities matrix for i in range(0, CL): aux4 = memory[(i * n):((i + 1) * n), :] - sim dist[i] = np.linalg.norm(aux4) # Determining which cluster configuration minimizes the distance to the similarities matrix mstar = msample[np.argmin(dist)] gnstar = memorygn[np.argmin(dist), :] ##### HM MEASURE CALCULATION ##### HM = 0 for j in range(0, mstar[0].astype(int)): cc = np.where(gnstar == j)[0] HM1 = 0 if len(cc) > 1: for i1 in range(0, len(cc)): for i2 in range(0, i1): HM1 = HM1 + sum((mydata[:, cc[i2]] - mydata[:, cc[i1]])**2) HM = HM + (2 / (len(cc) - 1)) * HM1 ##### PRINT FINAL CLUSTER ASSIGNMENTS AND HM MEASURE ##### print("Number of groups of the chosen cluster configuration: ", mstar[0].astype(int)) for i in range(0, mstar[0].astype(int)): print("Time series in group ", i, np.where(gnstar == i)[0].astype(int), "\n") print("HM Measure: ", HM) if indlpml != 0: auxlpml = 1 / auxlpml cpo = auxlpml.mean(axis=0) cpo = 1 / cpo lpml = sum(np.log(cpo)) ##### PLOT FINAL CLUSTER ASSIGNMENTS (ONE PLOT PER CLUSTER)##### for j in range(0, mstar[0].astype(int)): plt.figure() plt.axes([0, 0, 1, 1]) cc_plot = np.where(gnstar == j)[0] plt.xlabel('Time Period') plt.ylabel('Scaled Value') title = "Group " + str(j) plt.title(title) plt.plot(mydata[:, cc_plot], c=np.random.rand(3)) plt.show()
def lr_real(self, data): null = multivariate_normal.logpdf(data, self.mean, self.sigmaE) alt = multivariate_normal.logpdf(data, self.mean, np.add(self.sigmaE, self.sigmaG)) return np.asarray(np.subtract(alt, null))
delta_2 = np.tanh(theta_draw[4]) sigma_R = np.exp(theta_draw[5]) sigma_Q = np.exp(theta_draw[6]) #print(delta_2, sigma_R, sigma_Q) #print(theta_draw) params_m = [theta_draw[0], theta_draw[1], sigma_R] params_t = [theta_draw[2], theta_draw[3], delta_2, sigma_Q] z_t = z(params_m, params_t, x_t, y_t, M, c_init, r_star) # Prior for gamma_0, gamma_1, delta_0 and delta_1 prior = multivariate_normal.logpdf(theta_draw[:4], mean=np.array([2.74, -1.19, 0.5, 0.8]), cov=2 * np.eye(4)) prior -= multivariate_normal.logpdf(theta[i][:4], mean=np.array([2.74, -1.19, 0.5, 0.8]), cov=2 * np.eye(4)) # Prior for delta_1 prior += uniform.logpdf(delta_2, -1, 2) prior -= uniform.logpdf(np.tanh(theta[i][4]), -1, 2) # Prior Sigma_R prior += gamma.logpdf(sigma_R, a=5, scale=1 / 5) prior -= gamma.logpdf(np.exp(theta[i][5]), a=5, scale=1 / 5) # Prior Sigma_Q prior += gamma.logpdf(sigma_Q, a=5, scale=1 / 5)
def log_partition(kernel_x, kernel_y, base_density, X, Y, base_x, base_y, beta, lmbda, num_samples=1000): # Data need to be centered and normalized d_node = Y.shape[1] sigmas = 4. mu = np.mean(Y, axis=0) num_samples = 1000 num_x = X.shape[0] X_rep = np.repeat(X, num_samples, axis=0) tmp_sigmas = sigmas * np.ones([X_rep.shape[0], 1]) samples = np.random.multivariate_normal(mu, np.eye(mu.shape[0]), num_samples * num_x) samples = np.multiply(samples, tmp_sigmas) chunk_size = 10000 if num_samples * num_x > 50000: log_diff = np.zeros([num_samples * num_x]) num_chunks = 2000 chunk_size = num_samples * num_x / num_chunks for i in range(num_chunks): log_diff[i * chunk_size:(i + 1) * chunk_size] = log_pdf( kernel_x, kernel_y, base_density, X_rep[i * chunk_size:(i + 1) * chunk_size], samples[i * chunk_size:(i + 1) * chunk_size], base_x, base_y, beta, lmbda) else: log_diff = log_pdf(kernel_x, kernel_y, base_density, X_rep, samples, base_x, base_y, beta, lmbda) # computing proposal log-pdf tmp = multivariate_normal.logpdf( samples, mean=mu) + (d_node / 2.) * np.log(2 * np.pi) tmp = np.reshape(tmp, [-1, 1]) tmp = np.multiply(tmp, 1. / tmp_sigmas**2) tmp = tmp - d_node * (np.log(2 * np.pi) / 2. + np.log(tmp_sigmas)) tmp = np.reshape(tmp, [-1]) # substracting proposal log-pdf log_diff -= tmp if base_x.shape[0] > 0: log_diff = np.reshape(log_diff, [-1, num_samples]) max_diff = np.max(log_diff, axis=1) log_diff -= np.reshape(max_diff, [-1, 1]) shifted_log_Z = np.log(np.mean(np.exp(log_diff), axis=1)) log_Z = max_diff + shifted_log_Z shifted_log_Z_2 = np.log(np.mean(np.exp(2 * log_diff), axis=1)) else: log_diff = np.reshape(log_diff, [-1, 1]) max_diff = np.max(log_diff) log_diff -= max_diff shifted_log_Z = np.log(np.mean(np.exp(log_diff))) log_Z = max_diff + shifted_log_Z shifted_log_Z_2 = np.log(np.mean(np.exp(2 * log_diff))) std_log_Z = shifted_log_Z_2 - 2 * shifted_log_Z std_log_Z = np.sqrt((np.exp(std_log_Z) - 1) / num_samples) if base_x.shape[0] == 0: log_Z = log_Z * np.ones(Y.shape[0]) std_log_Z = std_log_Z * np.ones(Y.shape[0]) return log_Z, std_log_Z
def lr_null(self, sims, weigh, percent): null = multivariate_normal.logpdf(sims, self.mean, self.sigmaE) alt = multivariate_normal.logpdf(sims, self.mean, np.add(self.sigmaE, self.sigmaG)) lrcrit = np.asarray(np.subtract(alt, null)) return self.process_crit(lrcrit, weigh, percent)
def _compute_accept_prob(self, current_state, proposed_state): if self.current_iter < 2.0 * self.settings['memory_length']: proposed_state.update({'accept_prob': 1.0}) return True if type(self.emp_hessian) is np.ndarray: sr1_trust_region_cov = self.settings['sr1_trust_region_scale'] * \ self.emp_hessian else: sr1_trust_region_cov = self.settings['sr1_trust_region_cov'] try: if self.qn_method is 'sr1' and self.settings['sr1_trust_region']: # Using trust-region approach for the SR1 update. current = current_state['params_free'] current_mean = current + current_state['nat_gradient'] proposed = proposed_state['params_free'] proposed_mean = proposed + proposed_state['nat_gradient'] proposed_probability = pmvn.logpdf(proposed, current_mean, current, current_state['hessian'], sr1_trust_region_cov) current_probability = pmvn.logpdf(current, proposed_mean, proposed, proposed_state['hessian'], sr1_trust_region_cov) else: current = current_state['params_free'] proposed = proposed_state['params_free'] current_mean = current + current_state['nat_gradient'] current_hess = current_state['hessian'] proposed_mean = proposed + proposed_state['nat_gradient'] proposed_hess = proposed_state['hessian'] proposed_probability = mvn.logpdf(proposed, current_mean, current_hess) current_probability = mvn.logpdf(current, proposed_mean, proposed_hess) tar_diff = proposed_state['log_target'] - \ current_state['log_target'] jac_diff = proposed_state['log_jacobian'] - \ current_state['log_jacobian'] pro_diff = current_probability - proposed_probability accept_prob = np.min((1.0, np.exp(tar_diff + jac_diff + pro_diff))) except Exception as e: if self.settings['show_overflow_warnings']: current_hess = current_state['hessian'] proposed_hess = proposed_state['hessian'] print("") print("Iteration: {}. Overflow in accept prob calculation.". format(self.current_iter)) print( "This is probably due to a mismatch in the current and proposed Hessians." ) print("Diag of current Hessian: {}.".format( np.diag(current_hess))) print("Diag of candidate Hessian: {}.".format( np.diag(proposed_hess))) print("") if self.settings['remove_overflow_iterations']: return False else: proposed_state.update({'accept_prob': 1.0}) return True proposed_state.update({'accept_prob': accept_prob}) return True
def obj(vars): V = sum(As[i] * vars[i] for i in range(r)) logL = -mvn.logpdf(zscores, cov=V, allow_singular=True) print("NLL({}) = {}".format(",".join(map(str, vars)), logL)) return logL
import numpy as np from numpy.linalg import inv import random import scipy.linalg as linalg import scipy.sparse as sp import scipy.sparse.linalg as spln import scipy.stats from scipy.stats import norm, multivariate_normal import warnings # Older versions of scipy do not support the allow_singular keyword. I could # check the version number explicily, but perhaps this is clearer _support_singular = True try: multivariate_normal.logpdf(1, 1, 1, allow_singular=True) except: _support_singular = False def _validate_vector(u, dtype=None): # this is taken from scipy.spatial.distance. Internal function, so # redefining here. u = np.asarray(u, dtype=dtype).squeeze() # Ensure values such as u=1 and u=[1] still return 1-D arrays. u = np.atleast_1d(u) if u.ndim > 1: raise ValueError("Input vector should be 1-D.") return u
c_dm[m][i] = c_dm[m][i] / sum_c_dm #print c_dm[m], sum(c_dm[m]) ####アクション選択の計算 Ad_candidate = [m for m in xrange(M)] za_candidate = [k for k in xrange(Ka)] CDP = [[pi_a[k] / float(M) for m in xrange(M)] for k in xrange(Ka)] #candidate propbability #F_temp = [f for f in itertools.permutations(modality,N)] ##モダリティの順列組み合わせ for c in list(itertools.product(za_candidate, Ad_candidate)): #print c[0],c[1] temp_ocpw = 0.0 logpdf = [] for zok in xrange(Ko): logpdf += [multivariate_normal.logpdf(o_dm[c[1]], mean=Mu_o[zok], cov=Sig_o[zok])] #print logpdf max_log = np.max(logpdf) for zok in xrange(Ko): temp_cpw = 0.0 for zck in xrange(Kc): temp_pw = 0.0 for zpk in xrange(Kp): temp_w = 0.0 for F_temp in itertools.permutations(modality,N): #print c[0],zok,zck,zpk,F_temp temp = 1e+1#00#1.0 for n in xrange(N): #print i,n,N[d],M[d] if F_temp[n] == "a": temp = temp * theta[c[0] ][W_list.index(w_dn[n])]
def addTraces(self, traceSource, tracerange, progressBar=None, pointRange=None): data = [] textins = [] textouts = [] for i in range(tracerange[0], tracerange[1] + 1): d = traceSource.getTrace(i) if d is None: continue startingPoint, endingPoint = pointRange # TODO:support start/end point different per byte d = d[startingPoint:endingPoint] data.append(d) textins.append(traceSource.getTextin(i)) textouts.append(traceSource.getTextout(i)) try: from scipy.stats import multivariate_normal except ImportError: raise Warning( "Version of SciPy too old, require >= 0.14, have %s. " "Update to support this attack" % (scipy.version.version)) # Hack for now - just use last template found template = self.loadTemplatesFromProject()[-1] pois = template["poi"] numparts = len(template['mean'][0]) results = np.zeros( (self.model.getNumSubKeys(), self.model.getPermPerSubkey())) if progressBar: progressBar.setStatusMask("Current Trace = %d Current Subkey = %d", (0, 0)) progressBar.setMaximum(self.model.getNumSubKeys() * len(data)) pcnt = 0 for tnum in range(0, len(data)): for bnum in self.brange: try: newresultsint = [ multivariate_normal.logpdf( data[tnum][pois[bnum]], mean=template['mean'][bnum][i], cov=np.diag(template['cov'][bnum][i])) for i in range(0, numparts) ] except np.linalg.LinAlgError as e: logging.warning( 'Error in applying template, probably template is poorly formed or POI incorrect. Byte %d for tnum %d skipped.' % (bnum, tnum)) logging.debug(e) newresultsint = [0] * self.model.getPermPerSubkey() ptype = template["partitiontype"] newresults = [] # Map to key guess format for i in range(0, self.model.getPermPerSubkey()): if ptype == "PartitionHWIntermediate": self.model.setHwModel( self.model. hwModels['HW: AES SBox Output, First Round (Enc)']) hypint = self.model.leakage(textins[tnum], textouts[tnum], i, bnum, None) elif ptype == "PartitionHDLastRound": self.model.setHwModel( self.model.hwModels['HD: AES Last-Round State']) hypint = self.model.leakage(textins[tnum], textouts[tnum], i, bnum, None) # TODO Temp elif ptype == "PartitionHDRounds": if bnum == 0: hypint = self.model.getHW(textins[tnum][bnum] ^ i) else: knownkey = [ 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c ] s1 = textins[tnum][bnum - 1] ^ knownkey[bnum - 1] s2 = textins[tnum][bnum] ^ i hypint = self.model.getHW(s1 ^ s2) else: hypint = i newresults.append(newresultsint[hypint]) results[bnum] += newresults self.stats.updateSubkey(bnum, results[bnum], tnum=(tnum + 1)) pcnt += 1 if progressBar: progressBar.updateStatus(pcnt, (tnum, bnum)) if progressBar.wasAborted(): return # Do plotting if required if (tnum % self._reportingInterval) == 0 and self.sr: self.sr()
####アクション選択の計算 Ad_candidate = [m for m in xrange(M)] za_candidate = [k for k in xrange(Ka)] CDP = [[pi_a[k] / float(M) for m in xrange(M)] for k in xrange(Ka)] #candidate propbability #F_temp = [f for f in itertools.permutations(modality,N)] ##モダリティの順列組み合わせ for c in list(itertools.product(za_candidate, Ad_candidate)): #print c[0],c[1] temp_ocpw = 0.0 logpdf = [] for zok in xrange(Ko): logpdf += [ multivariate_normal.logpdf(o_dm[c[1]], mean=Mu_o[zok], cov=Sig_o[zok]) ] #print logpdf max_log = np.max(logpdf) for zok in xrange(Ko): temp_cpw = 0.0 for zck in xrange(Kc): temp_pw = 0.0 for zpk in xrange(Kp): temp_w = 0.0 F_temp2 = [ f for f in itertools.product(modality, repeat=N) ] # デカルト積 繰り返しを許す: 1,1 がある 順序が違えば別と見なす: 1,2 と 2,1 は別 #for F_temp in itertools.product(modality,repeat=N): for i in xrange(len(F_temp2)):
def trainHSdataPGfull(Train_HS, labels_HS, diagonal=False): #define a function to train HS data Train = Train_HS #get Train data labels = labels_HS #get Train laabel Classes = np.sort(np.unique(labels)) #get class M = 15 X_train, X_valid, label_train, label_valid = train_test_split( Train, labels, test_size=0.33, random_state=M) X_train_class = [] #initialize classification for j in range(Classes.shape[0]): #classify train data according to label jth_class = X_train[label_train == Classes[j], :] X_train_class.append(jth_class) class0 = X_train_class[0] #get class class1 = X_train_class[1] #get class class2 = X_train_class[2] #get class class3 = X_train_class[3] #get class class4 = X_train_class[4] #get class mu0 = np.mean(class0, axis=0) #get mean mu1 = np.mean(class1, axis=0) #get mean mu2 = np.mean(class2, axis=0) #get mean mu3 = np.mean(class3, axis=0) #get mean mu4 = np.mean(class4, axis=0) #get mean if diagonal == 1: cov0 = np.cov(class0.T) * np.eye(class0.shape[1]) #get diagonal matrix cov1 = np.cov(class1.T) * np.eye(class1.shape[1]) #get diagonal matrix cov2 = np.cov(class2.T) * np.eye(class2.shape[1]) #get diagonal matrix cov3 = np.cov(class3.T) * np.eye(class3.shape[1]) #get diagonal matrix cov4 = np.cov(class4.T) * np.eye(class4.shape[1]) #get diagonal matrix else: constant = 1e-1 cov0 = np.cov(class0.T) + np.eye( class0.shape[1] ) * constant #get covariance and solve problem of singular matrix cov1 = np.cov(class1.T) + np.eye( class1.shape[1] ) * constant #get covariance and solve problem of singular matrix cov2 = np.cov(class2.T) + np.eye( class2.shape[1] ) * constant #get covariance and solve problem of singular matrix cov3 = np.cov(class3.T) + np.eye( class3.shape[1] ) * constant #get covariance and solve problem of singular matrix cov4 = np.cov(class4.T) + np.eye( class4.shape[1] ) * constant #get covariance and solve problem of singular matrix psum = (class0.shape[0] + class1.shape[0] + class2.shape[0] + class3.shape[0] + class4.shape[0]) #calculate N pc0 = class0.shape[0] / psum #calculate P(Ck) pc1 = class1.shape[0] / psum #calculate P(Ck) pc2 = class2.shape[0] / psum #calculate P(Ck) pc3 = class3.shape[0] / psum #calculate P(Ck) pc4 = class4.shape[0] / psum #calculate P(Ck) PG_predicted = np.zeros((X_valid.shape[0], 1)) #initialization for i in range(X_valid.shape[0]): y0 = multivariate_normal.logpdf(X_valid[i, :], mu0, cov0) #calculate prior y1 = multivariate_normal.logpdf(X_valid[i, :], mu1, cov1) #calculate prior y2 = multivariate_normal.logpdf(X_valid[i, :], mu2, cov2) #calculate prior y3 = multivariate_normal.logpdf(X_valid[i, :], mu3, cov3) #calculate prior y4 = multivariate_normal.logpdf(X_valid[i, :], mu4, cov4) #calculate prior # pall = y0 * pc0 + y1 * pc1 + y2 * pc2 + y3 * pc3 + y4 * pc4 #P(x) pos0 = y0 + math.log( pc0) #calculate posterior because P(x) is same so we omit it pos1 = y1 + math.log( pc1) #calculate posterior because P(x) is same so we omit it pos2 = y2 + math.log( pc2) #calculate posterior because P(x) is same so we omit it pos3 = y3 + math.log( pc3) #calculate posterior because P(x) is same so we omit it pos4 = y4 + math.log( pc4) #calculate posterior because P(x) is same so we omit it a = { 1: pos0, 2: pos1, 3: pos2, 4: pos3, 5: pos4 } #get dictionary of classes PG_predicted[i] = max( a, key=a.get) #get classes responding to max posterior if diagonal == 1: accuracy_PGdiag = accuracy_score(label_valid, PG_predicted) #compare and get score print( '\nThe accuracy of Probabilistic Generative classifier HS with diagonal covariance is: ', accuracy_PGdiag * 100, '%') else: accuracy_PG = accuracy_score(label_valid, PG_predicted) #compare and get score print( '\nThe accuracy of Probabilistic Generative classifier HS with full covariance is: ', accuracy_PG * 100, '%', 'M is: ', M) return PG_predicted
sigmaE = np.array( [[ 1.00000000e+00,2.21634186e-01,-6.59629045e-02,1.45811322e-01], [ 2.21634186e-01,1.00000000e+00,-9.71533404e-04,6.77997010e-01], [-6.59629045e-02,-9.71533404e-04,1.00000000e+00,-4.56618686e-02], [ 1.45811322e-01,6.77997010e-01,-4.56618686e-02,1.00000000e+00]]) maximum = -np.inf alpha = -1 for x in range(int(count),100,-25): if x == 0: x = 1 x = float(x) mult = count/x covar = sigmaE + mult*sigmaG pdf = multivariate_normal.logpdf(alphaData,np.array([0.0,0.0,0.0,0.0]), covar) total = np.sum(pdf) if total > maximum: maximum = total alpha = x print(alpha) sigmaG = (count/alpha)*sigmaG mat = sigmaE + sigmaG value = multivariate_normal.logpdf(zs, np.array([0.0,0.0,0.0,0.0]), mat) all_configs = np.copy(value) mvalues = [np.copy(value) for i in range(k)] for z in range(1,len(include)): alt = np.copy(mat) loc = include[z] for i in range(0,k):
def get_log_prior_at(self, *log_kernel_parameters): N = len(log_kernel_parameters) return multivariate_normal.logpdf( np.array(log_kernel_parameters).reshape(1, N), mean=np.zeros(N), cov=(self.sigma_prior_parameter**2) * np.identity(N))
def loglike(self, x): return multivariate_normal.logpdf(x, mean=np.zeros(self.x_dim), cov=np.eye(self.x_dim) + self.corr * (1 - np.eye(self.x_dim)))
def log_likelihood(self, y, X, Z): m = np.dot(X, self._coef) S = np.dot(Z, np.dot(self._ranef_cov, Z.T)) S += self._noise_var * np.eye(len(y)) return mvn.logpdf(y, m, S)
print(mu) var1 = variance(ip_file, "CS Score (USNews)") var2 = variance(ip_file, "Research Overhead %") var3 = variance(ip_file, "Admin Base Pay$") var4 = variance(ip_file, "Tuition(out-state)$") sigma1 = std(ip_file, "CS Score (USNews)") sigma2 = std(ip_file, "Research Overhead %") sigma3 = std(ip_file, "Admin Base Pay$") sigma4 = std(ip_file, "Tuition(out-state)$") df = ip_file.iloc[0:49, 2:6] cov_mat = df.cov().round(3) print(cov_mat) print(df.corr().round(3)) #do correlation using numpy #log likelihood independent variable X = 0 for i in range(0, 49): X += (multivariate_normal.logpdf(df.iloc[i, :], mu, cov_mat, allow_singular='True')) print(X) #model = BayesianNetwork.from_samples(df, algorithm='exact') mpl.plot()
def main(): ## speed:Make vectors of length iter for each round so we can store all the estimates from each iteration and ## take summary statistics at the end true_L = [] true_U = [] true_MLE = [] L_vec = [] U_vec = [] L_vec_noise = [] U_vec_noise = [] L_vec_g_noise = [] U_vec_g_noise = [] MLE_hat = [] MLE_var = [] a_store = [] b_store = [] a_reparam_store = [] a_var_store = [] b_var_store = [] a_reparam_var_store = [] ## These will be updated additively, could have stored as a vector and taken mean, but no need SD_g = 0 bias_g = 0 ### Begin Simulation ### coverage_iter_number = 2 # number of simulations I will use to assess the coverage for k in range(coverage_iter_number): ## Here I have to reset the seed because, if not, the seed reset in my optimization will mess up ## my data np.random.seed(k) ### Create data set for this iteration and true parameter values ### ## Simulation parameter sigma = 2 * np.array([[1, .5], [.5, 3]]) mu = np.array([-5.1, 5.2]) n = 20 data_dict = data_generator(n, mu, sigma) ## Generate data data_mean = data_dict['data_mean'] true_MLE.append(data_dict['true_MLE']) true_L.append(data_dict['true_L']) # storage true_U.append(data_dict['true_U']) # storage mle_like = multivariate_normal.logpdf(data_mean, mean=data_mean, cov=sigma / n) ### Generate points 'below' the profile likelihood that I will use to estimate true profile likelihood ### ## Estimation parameters t_g = 10 # Allotted horizontal error in each point, the larger, the smaller the horizontal error sample = 20 # Number of points I will generate to estimate the profile likelihood estimation_points_dict = estimation_points(data_mean, sigma, n, sample, t_g) likehood_sample = estimation_points_dict['likehood_sample'] mu_hat_max = estimation_points_dict['mu_hat_max'] y_star_max = max(likehood_sample) x_star_sd = estimation_points_dict['epsilon_sd'] bias_g = bias_g + estimation_points_dict['sum_epsilon'] SD_g = SD_g + x_star_sd # storage ### Given my points, I get an estimate of the profile likelihood ### ## Get initial quadratic guess ## I can alter this to get better initial estimates curvature = -5 # Inital estimate of curvature center = np.mean( mu_hat_max) # Initial estimate of center of my quadratic height = mle_like # My height is based on the likelihood of true mle which is known ## Get the corresponding values for a quadratic function a_init = curvature b_init = -2 * curvature * center c_reparam = height - y_star_max ## Find the optimized quadratice parameters, i.e. my PL estimate optimized_parameters = meta_model_optimization(a_init, b_init, c_reparam, mu_hat_max, likehood_sample, x_star_sd, y_star_max, 10000, sample) a_reparam = optimized_parameters.x[0] a = -np.exp(a_reparam) b = optimized_parameters.x[1] #information_inv_est = optimized_parameters.hess_inv ## keep this positive since I minimized the negative log likelihood ### Storage of values and finding new cut offs for our profile likelihood ### MLE_hat.append(-b / (2 * a)) # Store estimate of MLE based on PL grad_mle = np.array( [-b / (2 * np.exp(a_reparam)), 1 / (2 * np.exp(a_reparam))]) #cur_inv = information_inv_est # get error estimates of parameters based on hessian #cur_MLE_var = grad_mle.dot(cur_inv).dot(grad_mle) # get estimate of MLE variance #MLE_var.append(cur_MLE_var) # store MLE variance estimate a_store.append(a) # store curvaturue b_store.append(b) # store b value in quadratice #a_reparam_store.append(a_reparam) #a_var_store.append(np.exp(-2*a_reparam)*cur_inv[0,0]) # variance in a estimate #b_var_store.append(cur_inv[1,1]) # variance in b estimate #a_reparam_var_store.append(cur_inv[0,0]) # variance in reparameterized a ## obtain new profile likelihood cutoff based on estimated PL new_cut_off = y_star_max - 1.92 # Tim double check this should be y_star_max vs mle_like L_vec_noise_cur = -np.sqrt( (new_cut_off - (c_reparam + y_star_max)) / a) - b / ( 2 * a) # New estimated lower bound U_vec_noise_cur = np.sqrt((new_cut_off - (c_reparam + y_star_max)) / a) - b / (2 * a) # New estimated upper bound L_vec_noise.append(L_vec_noise_cur) U_vec_noise.append(U_vec_noise_cur) # Add in plotting? ## Print the iteration print k ## write the valid file in tab delimited format print "Noisy Upper Bound" print U_vec_noise print "True Upper Bound" print true_U print "Noisy Lower Bound" print L_vec_noise print "True Lower Bound" print true_L
def log_target(X, b, v): Y = X Y[1] = X[1] - b * ((X[0]**2) - v) Y[0] = X[0] / np.sqrt(v) return multivariate_normal.logpdf(Y, np.zeros([2]), np.eye(2))
def log_likelihood(self, X, Y, beta): return mvn.logpdf(Y, np.zeros(len(X)), nearestSPD(self.cov_matrix_(X, X, beta)), allow_singular=True)
def log_emission_prob(X, mu, sigma2): # Add singleton dimension using None because log_multivariate_normal_density is written for # multiple samples, but we only need it for 1 return multivariate_normal.logpdf(X, mean = mu, cov = sigma2)
AxesStyle="Normal2", color="g") ############ ESTIMATE THEM ################ theta1 = Gae.get_Gaussian_muSigma_ML(X1.T) print("mu1:") print(theta1[0]) print("Sigma1") print(theta1[1]) ############## Estimate Likelihood ################### ll = Gad.Gaussian_pdf_log(X1, [mu1, cov1]) ll2 = [] for i in range(ll.size): ll2.append( multivariate_normal.logpdf(X1[:, i], mean=mu1.flatten(), cov=cov1)) ll2 = np.array(ll2).reshape(ll.shape) print("ll ours") print(ll.T) print("ll scipy") print(ll2.T) print("Difference in ll") print((ll - ll2).T) ###### Multiple clusters case ll_K = Gad.Gaussian_K_pdf_log(X1, [[mu1, cov1], [mu2, cov2]]) ######################################################################################## ##################### Crossvalidate Using EM #################################### #########################################################################################