def test_covariance(): x, y = make_blobs(n_samples=100, n_features=5, centers=1, random_state=42) # make features correlated x = np.dot(x, np.arange(x.shape[1]**2).reshape(x.shape[1], x.shape[1])) c_e = _cov(x, 'empirical') assert_almost_equal(c_e, c_e.T) c_s = _cov(x, 'auto') assert_almost_equal(c_s, c_s.T)
def test_covariance(): x, y = make_blobs(n_samples=100, n_features=5, centers=1, random_state=42) # make features correlated x = np.dot(x, np.arange(x.shape[1] ** 2).reshape(x.shape[1], x.shape[1])) c_e = _cov(x, "empirical") assert_almost_equal(c_e, c_e.T) c_s = _cov(x, "auto") assert_almost_equal(c_s, c_s.T)
def fit(self, X, y): if not np.all(X.index == y.index): warnings.warn( "Warning: Indexes in X and y are different. Are you sure they are correctly alligned?" ) self.X = X self.y = y sample_ids = X.index feature_ids = X.columns X = X.as_matrix() y = y.as_matrix() nuee_LDA = sklearn_LDA(solver=self.solver, shrinkage=self.shrinkage, priors=self.priors, n_components=self.n_components, store_covariance=self.store_covariance, tol=self.tol) nuee_LDA.fit(X, y) ordi_column_names = [ 'LDA%d' % (i + 1) for i in range(nuee_LDA.coef_.shape[1]) ] # prepare output ## Compute eigenvalues. sklearn doesn't export them, ## so they have to be generated Sw = nuee_LDA.covariance_ St = _cov(X, nuee_LDA.shrinkage) Sb = St - Sw # between scatter eigenvalues, _ = linalg.eigh(Sb, Sw) eigenvalues = eigenvalues[::-1] p_explained = pd.Series( nuee_LDA.explained_variance_ratio_, index=ordi_column_names[:len(nuee_LDA.explained_variance_ratio_)]) sample_scores = nuee_LDA.transform(X) biplot_scores = nuee_LDA.scalings_ if self.scaling == 2: sample_scores = sample_scores.dot( np.diag(eigenvalues[:sample_scores.shape[1]]**(-0.5))) biplot_scores = biplot_scores.dot( np.diag(eigenvalues[:biplot_scores.shape[1]]**0.5)) # Add LCA ordination object names to self self.ordiobject_type = 'LDA' self.method_name = 'Linear Discriminant Analysis' self.ordi_fitted = nuee_LDA self.eigenvalues = eigenvalues self.proportion_explained = p_explained self.sample_scores = pd.DataFrame( sample_scores, index=sample_ids, columns=ordi_column_names[:sample_scores.shape[1]]) self.sample_scores.index.name = 'ID' self.biplot_scores = pd.DataFrame( biplot_scores, index=feature_ids, columns=ordi_column_names[:biplot_scores.shape[1]]) self.biplot_scores.index.name = 'ID' return self
def imagery_time(folder, subs, filt, im_times, event_ids, n_perm, n_pseudo, bins, step=1): import numpy as np import mne from mne.time_frequency import tfr_morlet, psd_multitaper, psd_welch import os import scipy from sklearn.discriminant_analysis import _cov from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB svm = SVC(kernel='linear') CV = ShuffleBinLeaveOneOut out = list() for i, sub in enumerate(subs): fnames = [ folder + '/IR_' + str(sub).zfill(2) + '_S01.bdf', folder + '/IR_' + str(sub).zfill(2) + '_S02.bdf' ] epochs = load_to_epochs(fnames, event_ids, im_times, filt) epochs.drop_channels(['Status' ]).equalize_event_counts(event_ids=event_ids, method='mintime') X = epochs.get_data() y = epochs.events[:, 2] y = [a - 30 for a in y] n_conditions = len(np.unique(y)) n_sensors = X.shape[1] n_time = X.shape[2] cv = CV(y, n_iter=n_perm, n_pseudo=n_pseudo) result = np.full((n_perm, n_conditions, n_conditions, n_time), np.nan) for f, (train_indices, test_indices) in enumerate(cv.split(X)): print('\tPermutation %g / %g' % (f + 1, n_perm)) # 1. Compute pseudo-trials for training and test Xpseudo_train = np.full((len(train_indices), n_sensors, n_time), np.nan) Xpseudo_test = np.full((len(test_indices), n_sensors, n_time), np.nan) for i, ind in enumerate(train_indices): Xpseudo_train[i, :, :] = np.mean(X[ind, :, :], axis=0) for i, ind in enumerate(test_indices): Xpseudo_test[i, :, :] = np.mean(X[ind, :, :], axis=0) # 2. Whitening using the Epoch method sigma_conditions = cv.labels_pseudo_train[0, :, n_pseudo - 1:].flatten() sigma_ = np.empty((n_conditions, n_sensors, n_sensors)) for k, c in enumerate(np.unique(y)): # compute sigma for each time point, then average across time sigma_[k] = np.mean([ _cov(Xpseudo_train[sigma_conditions == c, :, t], shrinkage='auto') for t in range(n_time) ], axis=0) sigma = sigma_.mean(axis=0) # average across conditions sigma_inv = scipy.linalg.fractional_matrix_power(sigma, -0.5) Xpseudo_train = ( Xpseudo_train.swapaxes(1, 2) @ sigma_inv).swapaxes(1, 2) Xpseudo_test = (Xpseudo_test.swapaxes(1, 2) @ sigma_inv).swapaxes( 1, 2) for c1 in range(n_conditions - 1): for c2 in range(min(c1 + 1, n_conditions - 1), n_conditions): for t in np.arange(0, n_time - bins, step): # 3. Fit the classifier using training data data_train = Xpseudo_train[cv.ind_pseudo_train[c1, c2], :, t:t + bins] data_train = np.reshape( data_train, (data_train.shape[0], data_train.shape[1] * data_train.shape[2]), order='F') svm.fit(data_train, cv.labels_pseudo_train[c1, c2]) # 4. Compute and store classification accuracies data_test = Xpseudo_test[cv.ind_pseudo_test[c1, c2], :, t:t + bins] data_test = np.reshape( data_test, (data_test.shape[0], data_test.shape[1] * data_test.shape[2]), order='F') result[f, c1, c2, t] = np.mean( svm.predict(data_test) == cv.labels_pseudo_test[ c1, c2]) - 0.5 # average across permutations out.append(result) np.savez_compressed('temp', results=out) return out
def generate_new_face(self, N, age, gender, ethn, age_range=20, algorithm='pca', dist='normal', whitened=False, shrinkage=False, save_dir=None): """ Generates a new face by randomly synthesizing PCA components, applying the inverse PCA transform, and adding the norm. Parameters ---------- N : int How many new faces should be generated age : int Desired age of new face gender : str Desired gender of new face ('M' or 'F') ethn : str Desired ethnicity of new face ('WC', 'BA', 'EA') dist : str Distribution used to sample new values ('uniform', 'norm', 'mnorm') whitened : bool Was the data whitened before decomposition? shrinkage : bool Whether to apply shrinkage to covariance estimation of residuals. Only relevant when dist='mnorm'. save_dir : str Path to directory with (intermediate) results. """ if save_dir is None: save_dir = self.save_dir to_write = {i: dict() for i in range(N)} print("") for mod in self.mods: print("Generating new faces (%s) ..." % mod) decomp_comps = np.load( op.join(save_dir, '%s_decomp_comps.npy' % mod)) nz_mask = np.load(op.join(save_dir, '%s_nzmask.npy' % mod)) betas = self._load_chunks(mod, save_dir, 'betas') resids_decomp = self._load_chunks(mod, save_dir, 'residuals_decomp') relev_scodes = get_scodes_given_criteria(gender, age, age_range, ethn, 'v1') idx = self._get_idx_of_scode(relev_scodes) relev_resids = resids_decomp[idx, :] random_data = np.zeros((N, decomp_comps.shape[0])) for i in range(N): # this can probably be implemented faster ... if dist == 'uniform': mins, maxs = relev_resids.min(axis=0), relev_resids.max( axis=0) random_data[i, :] = np.random.uniform(mins, maxs) elif dist == 'norm': means, stds = relev_resids.mean(axis=0), relev_resids.std( axis=0) random_data[i, :] = np.random.normal(means, stds) elif dist == 'mnorm': means = relev_resids.mean(axis=0) if shrinkage: cov = _cov(relev_resids, shrinkage='auto') else: cov = np.cov(relev_resids.T) random_data[i, :] = np.random.multivariate_normal( means, cov) else: raise ValueError("Please choose `dist` from ('uniform', " "'norm', 'mnorm')") # For debugging if algorithm == 'pca': decomp_means = np.load( op.join(save_dir, '%s_decomp_means.npy' % mod)) if whitened: decomp_explvar = np.load( op.join(save_dir, '%s_decomp_explvar.npy' % mod)) resids_inv = np.dot( random_data, np.sqrt(decomp_explvar[:, np.newaxis]) * decomp_comps) + decomp_means else: resids_inv = random_data.dot(decomp_comps) + decomp_means elif algorithm == 'ica': resids_inv = random_data.dot(decomp_comps.T) resid_means = np.load( op.join(save_dir, '%s_residuals_means.npy' % mod)) resid_stds = np.load( op.join(save_dir, '%s_residuals_stds.npy' % mod)) resids_inv *= resid_stds resids_inv += resid_means elif algorithm == 'nmf': resids_inv = random_data.dot(decomp_comps) resid_mins = np.load( op.join(save_dir, '%s_residuals_mins.npy' % mod)) resid_scale = np.load( op.join(save_dir, '%s_residuals_scale.npy' % mod)) resids_inv -= resid_mins resids_inv /= resid_scale norm_vec = self._generate_design_vector(gender, age, ethn) norm = norm_vec.dot(betas) final_face_data = norm + resids_inv for i in range(N): tmp = np.zeros(DATA_SHAPES[self.version][mod]) tmp[nz_mask] = final_face_data[i, :] tmp = tmp.reshape(DATA_SHAPES[self.version][mod]) to_write[i][mod] = tmp to_return = [] for key, value in to_write.items(): name = 'id-g%i_gen-%s_age-%i_eth-%s.mat' % (key, gender, age, ethn) outname = op.join(save_dir, name) savemat(outname, value) to_return.append(outname) return to_return