def __init__(self, data, ncomp): super().__init__(data, ncomp) assert (data.shape[1] == self.ndim) & (data.ndim == 2) cov = np.cov(data.T) stds = np.sqrt(np.diag(cov)) corr = np.eye(self.ndim) guess = corr2cov(corr, stds) self.wishart_matrix = guess / self.wishart_dof
def diffusion(self, n_samples=10000, fit_intercept=True): """ Calculate the diffusion coefficient for the trajectory. Args: n_samples (:py:attr:`int`, optional): The number of samples in the random generator. Default is :py:attr:`10000`. fit_intercept (:py:attr:`bool`, optional): Should the intercept of the diffusion relationship be fit. Default is :py:attr:`True`. """ cov = corr2cov(self.correlation_matrix, self.msd_sampled_std[np.argmax(self.ngp):]) single_msd = multivariate_normal(self.msd_sampled[np.argmax(self.ngp):], cov, allow_singular=True) single_msd_samples = single_msd.rvs(n_samples) A = np.array([self.dt[np.argmax(self.ngp):]]).T if fit_intercept: A = np.array([np.ones(self.dt[np.argmax(self.ngp):].size), self.dt[np.argmax(self.ngp):]]).T Y = single_msd_samples.T straight_line = np.matmul(np.linalg.inv(np.matmul(A.T, np.matmul(np.linalg.inv(cov), A))), np.matmul(A.T, np.matmul(np.linalg.inv(cov), Y))) if fit_intercept: intercept, gradient = straight_line self.diffusion_coefficient = Distribution(gradient / 6, ci_points=self.confidence_interval) self.intercept = Distribution(intercept, ci_points=self.confidence_interval) else: self.diffusion_coefficient = Distribution(straight_line[0] / 6, ci_points=self.confidence_interval)
def cov_nearest(cov, method='clipped', threshold=1e-15, n_fact=100, return_all=False): """ Find the nearest covariance matrix that is postive (semi-) definite This leaves the diagonal, i.e. the variance, unchanged Parameters ---------- cov : ndarray, (k,k) initial covariance matrix method : string if "clipped", then the faster but less accurate ``corr_clipped`` is used.if "nearest", then ``corr_nearest`` is used threshold : float clipping threshold for smallest eigen value, see Notes n_fact : int or float factor to determine the maximum number of iterations in ``corr_nearest``. See its doc string return_all : bool if False (default), then only the covariance matrix is returned. If True, then correlation matrix and standard deviation are additionally returned. Returns ------- cov_ : ndarray corrected covariance matrix corr_ : ndarray, (optional) corrected correlation matrix std_ : ndarray, (optional) standard deviation Notes ----- This converts the covariance matrix to a correlation matrix. Then, finds the nearest correlation matrix that is positive semidefinite and converts it back to a covariance matrix using the initial standard deviation. The smallest eigenvalue of the intermediate correlation matrix is approximately equal to the ``threshold``. If the threshold=0, then the smallest eigenvalue of the correlation matrix might be negative, but zero within a numerical error, for example in the range of -1e-16. Assumes input covariance matrix is symmetric. See Also -------- corr_nearest corr_clipped """ from statsmodels.stats.moment_helpers import cov2corr, corr2cov cov_, std_ = cov2corr(cov, return_std=True) if method == 'clipped': corr_ = corr_clipped(cov_, threshold=threshold) else: # method == 'nearest' corr_ = corr_nearest(cov_, threshold=threshold, n_fact=n_fact) cov_ = corr2cov(corr_, std_) if return_all: return cov_, corr_, std_ else: return cov_
def cov_nearest(cov, method='clipped', threshold=1e-15, n_fact=100, return_all=False): ''' Find the nearest covariance matrix that is postive (semi-) definite This leaves the diagonal, i.e. the variance, unchanged Parameters ---------- cov : ndarray, (k,k) initial covariance matrix method : string if "clipped", then the faster but less accurate ``corr_clipped`` is used. if "nearest", then ``corr_nearest`` is used threshold : float clipping threshold for smallest eigen value, see Notes nfact : int or float factor to determine the maximum number of iterations in ``corr_nearest``. See its doc string return_all : bool if False (default), then only the covariance matrix is returned. If True, then correlation matrix and standard deviation are additionally returned. Returns ------- cov_ : ndarray corrected covariance matrix corr_ : ndarray, (optional) corrected correlation matrix std_ : ndarray, (optional) standard deviation Notes ----- This converts the covariance matrix to a correlation matrix. Then, finds the nearest correlation matrix that is positive semidefinite and converts it back to a covariance matrix using the initial standard deviation. The smallest eigenvalue of the intermediate correlation matrix is approximately equal to the ``threshold``. If the threshold=0, then the smallest eigenvalue of the correlation matrix might be negative, but zero within a numerical error, for example in the range of -1e-16. Assumes input covariance matrix is symmetric. See Also -------- corr_nearest corr_clipped ''' from statsmodels.stats.moment_helpers import cov2corr, corr2cov cov_, std_ = cov2corr(cov, return_std=True) if method == 'clipped': corr_ = corr_clipped(cov_, threshold=threshold) elif method == 'nearest': corr_ = corr_nearest(cov_, threshold=threshold, n_fact=n_fact) cov_ = corr2cov(corr_, std_) if return_all: return cov_, corr_, std_ else: return cov_
if __name__ == '__main__': import matplotlib.pyplot as plt from statsmodels.stats.moment_helpers import corr2cov, cov2corr def fit(ncomponents, colours, mags, steps=1000): X = np.stack([colours, mags]).T XDGMM = CompleteXDGMMCompiled model = XDGMM(ncomponents, 2, labels=['colour', 'mag'], verbose=True) model.initialise(X, 1000) model.fit(X, max_iter=steps, tol=None, reinitialise=False) return model corra = np.ones((2, 2)) corra[1, 0] = corra[1, 0] = 0.7 a = np.random.multivariate_normal([0, 0], corr2cov(corra, 2), 3000) corrb = np.ones((2, 2)) corrb[1, 0] = corrb[1, 0] = 0.9 b = np.random.multivariate_normal([4, 9], corr2cov(corrb, [1, 3]), 3000) X = np.concatenate([a, b]) model = fit(2, X[:, 0], X[:, 1]) samples = model.sample(6000) plt.scatter(*X.T, s=5) plt.axis('equal') ax = plt.gca() _xs = np.linspace(*ax.get_xlim()) _ys = conditional_2d_line(model, 'colour', _xs) ax.plot(_xs, _ys, color='k', rasterized=True)
from decorrelator.estimation import find_mode N = 1000 #### assume there is a slight correlation between x and redshift corr = np.zeros((3, 3)) labels = ['z', 'x', 'mass'] # correlation coefficients corr[0, 1] = 0.5 # redshift, x corr[0, 2] = 0.3 # redshift, mass corr[1, 2] = 0.5 # x, mass corr = corr + corr.T + np.eye(3) std = [0.2, 0.01, 2] # stds for redshift, x cov = corr2cov(corr, std) # covariance mu = [ 1., 0., 10. ] # centre for correlation gaussian (not much meaning here unless you convert to a line) data = np.random.multivariate_normal(mu, cov, size=1000) # sample it data_covariances = None #### No controlling from correlator.correlation import CorrelationModel correlation = CorrelationModel('correlation', labels, data) correlation.sample(50) correlation.traceplot()
def calculate_map_params(self, subject, subject_dir, sub_index): """ Calculate the parameters based on the passed subject for generating a new map :param subject: numpy array containing the pixel values for the five fingers subject_dir: folder name containing the subject's noise data :return subject_component: subject specific component finger_component: array with the finger components noise_component: noise component for each finger noise_pixel: noise components in the pixel space data_dict: dictionary consisting of the above listed components """ # Load subject specific matrices matrix_dir = self.data_dir + "/" + subject_dir + "/" mappingFile = matrix_dir + subject_dir + "." + "voxelMappingInfo.pkl" with open(mappingFile, 'rb') as mfile: mappingFile = pkl.load(mfile) vox2pix = np.array(csr_matrix(mappingFile['vox2Pixel']).todense()) noise_mat = mappingFile['noise_corr'] avg_Bvar_est = mappingFile['avg_Bvar_est'] del mappingFile # Generate Subject specific component print("Subject") # Use cholesky decomposition followed by matrix multiplication to # generate the component with the required correlation structure a = self.chol_mat z = np.random.normal(0, 1, size=(16384, )) z = z / np.std(z) subject_component = np.dot(a, z) subject_component = subject_component - subject_component.mean() #subject_component = (subject_component/np.nanstd(subject_component)) * (np.sqrt(self.var_s[sub_index])) # Generate finger specific components print("Finger") subject_covariance_matrix = self.cov_mat #finger_covariance_matrix = \ # np.ma.cov(np.ma.masked_invalid(subject - subject.mean(axis=0))) + 0.0000001 subject_covariance_matrix = self.cov_mat finger_component = mnn.rvs(rowcov=finger_covariance_matrix, colcov=subject_covariance_matrix) #z = np.random.normal(size = (5, 16384)) #finger_component = np.dot(np.dot(finger_covariance_matrix, z), subject_covariance_matrix) # Generate noise print("Noise") noise_list = [] pixel_noise_list = [] noise_cov = corr2cov(noise_mat, avg_Bvar_est) #noise_cov = noise_mat * avg_Bvar_est try: for _iter in range(0, 5): #noise = mnn.rvs(rowcov=noise_cov) z = np.random.normal(size=(noise_cov.shape[0], 1)) noise = noise_cov @ z noise = noise - noise.mean() noise_list.append(noise) pixel_noise = np.dot(vox2pix.T, noise) pixel_noise_list.append(pixel_noise) except Exception as e: print(e) return -1 data_dict = { "subject_component": subject_component, "finger_component": finger_component, "noise_component": noise_list, "noise_pixel": pixel_noise_list } del noise_list, finger_component, pixel_noise_list, noise_mat, subject_covariance_matrix return data_dict
@property def chol(self): return tt.stacklists([cholesky(norm_covariance(self.V[i])) for i in range(self.n_components)]) if __name__ == '__main__': means = np.array([[1, 2], [3, 4], [2, 2]]) stds = np.array([[1, 3], [1, 1], [0.5, 5]]) rhos = np.array([0.6, 0, -0.3]) alphas = np.array([0.3, 0.3, 0.4]) corrs = np.asarray([np.eye(2)]*len(alphas)) corrs[:, 0, 1] = corrs[:, 1, 0] = rhos covs = np.zeros_like(corrs) covs[0] = corr2cov(corrs[0], stds[0]) covs[1] = corr2cov(corrs[1], stds[1]) covs[2] = corr2cov(corrs[2], stds[2]) truth = CompleteXDGMMBase(3, 2) truth.V = covs truth.mu = means truth.alpha = alphas X = truth.sample(9000) truth.labels = list('ab') truth.condition(a=0) # estimator = CompleteXDGMMBase(3, 2, verbose=True, debug=True) # estimator.fit(X)