def _dimReduce(df, method='pca', n_components=2, labels=None, standardize=False, smatFunc=None, ldaShrinkage='auto'): if method == 'kpca': """By using KernelPCA for dimensionality reduction we don't need to impute missing values""" if smatFunc is None: smatFunc = corrTSmatFunc pca = KernelPCA(kernel='precomputed', n_components=n_components) smat = smatFunc(df).values xy = pca.fit_transform(smat) pca.components_ = pca.alphas_ pca.explained_variance_ratio_ = pca.lambdas_ / pca.lambdas_.sum() return xy, pca elif method == 'pca': if standardize: normed = df.apply(lambda vec: (vec - vec.mean())/vec.std(), axis=0) else: normed = df.apply(lambda vec: vec - vec.mean(), axis=0) pca = PCA(n_components=n_components) xy = pca.fit_transform(normed) return xy, pca elif method == 'lda': if labels is None: raise ValueError('labels needed to perform LDA') if standardize: normed = df.apply(lambda vec: (vec - vec.mean())/vec.std(), axis=0) else: normed = df.apply(lambda vec: vec - vec.mean(), axis=0) if df.shape[1] > df.shape[0]: """Pre-PCA step""" ppca = PCA(n_components=int(df.shape[0]/1.5)) normed = ppca.fit_transform(df) lda = LinearDiscriminantAnalysis(solver='eigen', shrinkage=ldaShrinkage, n_components=n_components) lda.fit(normed, labels.values) lda.explained_variance_ratio_ = np.abs(lda.explained_variance_ratio_) / np.abs(lda.explained_variance_ratio_).sum() xy = lda.transform(normed) return xy, lda elif method == 'pls': if labels is None: raise ValueError('labels needed to perform PLS') if standardize: normed = df.apply(lambda vec: (vec - vec.mean())/vec.std(), axis=0) else: normed = df.apply(lambda vec: vec - vec.mean(), axis=0) pls = PLSRegression(n_components=n_components) pls.fit(normed, labels) pls.explained_variance_ratio_ = np.zeros(n_components) xy = pls.x_scores_ return xy, pls
def _dimReduce(df, method='pca', n_components=2, labels=None, standardize=False, smatFunc=None, ldaShrinkage='auto'): if method == 'kpca': """By using KernelPCA for dimensionality reduction we don't need to impute missing values""" if smatFunc is None: smatFunc = corrTSmatFunc pca = KernelPCA(kernel='precomputed', n_components=n_components) smat = smatFunc(df).values xy = pca.fit_transform(smat) pca.components_ = pca.alphas_ pca.explained_variance_ratio_ = pca.lambdas_ / pca.lambdas_.sum() return xy, pca elif method == 'pca': if standardize: normed = df.apply(lambda vec: (vec - vec.mean())/vec.std(), axis=0) else: normed = df.apply(lambda vec: vec - vec.mean(), axis=0) pca = PCA(n_components=n_components) xy = pca.fit_transform(normed) return xy, pca elif method == 'lda': if labels is None: raise ValueError('labels needed to perform LDA') if standardize: normed = df.apply(lambda vec: (vec - vec.mean())/vec.std(), axis=0) else: normed = df.apply(lambda vec: vec - vec.mean(), axis=0) if df.shape[1] > df.shape[0]: """Pre-PCA step""" ppca = PCA(n_components=int(df.shape[0]/1.5)) normed = ppca.fit_transform(df) lda = LinearDiscriminantAnalysis(solver='eigen', shrinkage=ldaShrinkage, n_components=n_components) lda.fit(normed, labels.values) lda.explained_variance_ratio_ = np.abs(lda.explained_variance_ratio_) / np.abs(lda.explained_variance_ratio_).sum() xy = lda.transform(normed) elif method == 'pls': if labels is None: raise ValueError('labels needed to perform PLS') if standardize: normed = df.apply(lambda vec: (vec - vec.mean())/vec.std(), axis=0) else: normed = df.apply(lambda vec: vec - vec.mean(), axis=0) pls = PLSRegression(n_components=n_components) pls.fit(normed, labels) pls.explained_variance_ratio_ = np.zeros(n_components) xy = pls.x_scores_ return xy, pls