def joint_gauss_model(self, n=1, no=3): """ This function models the functional data using a joint Gaussian model extracted from the principal components of the srsfs :param n: number of random samples :param no: number of principal components (default = 3) :type n: integer :type no: integer """ # Parameters fn = self.fn time = self.time qn = self.qn gam = self.gam M = time.size # Perform PCA jfpca = fpca.fdajpca(self) jfpca.calc_fpca(no=no) s = jfpca.latent U = jfpca.U C = jfpca.C mu_psi = jfpca.mu_psi # compute mean and covariance mq_new = qn.mean(axis=1) mididx = jfpca.id m_new = np.sign(fn[mididx, :]) * np.sqrt(np.abs(fn[mididx, :])) mqn = np.append(mq_new, m_new.mean()) # generate random samples vals = np.random.multivariate_normal(np.zeros(s.shape), np.diag(s), n) tmp = np.matmul(U, np.transpose(vals)) qhat = np.tile(mqn.T, (n, 1)).T + tmp[0:M + 1, :] tmp = np.matmul(U, np.transpose(vals) / C) vechat = tmp[(M + 1):, :] psihat = np.zeros((M, n)) gamhat = np.zeros((M, n)) for ii in range(n): psihat[:, ii] = geo.exp_map(mu_psi, vechat[:, ii]) gam_tmp = cumtrapz(psihat[:, ii]**2, np.linspace(0, 1, M), initial=0.0) gamhat[:, ii] = (gam_tmp - gam_tmp.min()) / (gam_tmp.max() - gam_tmp.min()) ft = np.zeros((M, n)) fhat = np.zeros((M, n)) for ii in range(n): fhat[:, ii] = uf.cumtrapzmid( time, qhat[0:M, ii] * np.fabs(qhat[0:M, ii]), np.sign(qhat[M, ii]) * (qhat[M, ii] * qhat[M, ii]), mididx) ft[:, ii] = uf.warp_f_gamma(np.linspace(0, 1, M), fhat[:, ii], gamhat[:, ii]) self.rsamps = True self.fs = fhat self.gams = gamhat self.ft = ft self.qs = qhat[0:M, :] return
def align_fPCA(f, time, num_comp=3, showplot=True, smoothdata=False, cores=-1): """ aligns a collection of functions while extracting principal components. The functions are aligned to the principal components :param f: numpy ndarray of shape (M,N) of N functions with M samples :param time: vector of size M describing the sample points :param num_comp: number of fPCA components :param showplot: Shows plots of results using matplotlib (default = T) :param smooth_data: Smooth the data using a box filter (default = F) :param cores: number of cores for parallel (default = -1 (all)) :type sparam: double :type smooth_data: bool :type f: np.ndarray :type time: np.ndarray :rtype: tuple of numpy array :return fn: aligned functions - numpy ndarray of shape (M,N) of N functions with M samples :return qn: aligned srvfs - similar structure to fn :return q0: original srvf - similar structure to fn :return mqn: srvf mean or median - vector of length M :return gam: warping functions - similar structure to fn :return q_pca: srsf principal directions :return f_pca: functional principal directions :return latent: latent values :return coef: coefficients :return U: eigenvectors :return orig_var: Original Variance of Functions :return amp_var: Amplitude Variance :return phase_var: Phase Variance """ lam = 0.0 MaxItr = 50 coef = np.arange(-2., 3.) Nstd = coef.shape[0] M = f.shape[0] N = f.shape[1] if M > 500: parallel = True elif N > 100: parallel = True else: parallel = False eps = np.finfo(np.double).eps f0 = f if showplot: plot.f_plot(time, f, title="Original Data") # Compute SRSF function from data f, g, g2 = uf.gradient_spline(time, f, smoothdata) q = g / np.sqrt(abs(g) + eps) print("Initializing...") mnq = q.mean(axis=1) a = mnq.repeat(N) d1 = a.reshape(M, N) d = (q - d1)**2 dqq = np.sqrt(d.sum(axis=0)) min_ind = dqq.argmin() print("Aligning %d functions in SRVF space to %d fPCA components..." % (N, num_comp)) itr = 0 mq = np.zeros((M, MaxItr + 1)) mq[:, itr] = q[:, min_ind] fi = np.zeros((M, N, MaxItr + 1)) fi[:, :, 0] = f qi = np.zeros((M, N, MaxItr + 1)) qi[:, :, 0] = q gam = np.zeros((M, N, MaxItr + 1)) cost = np.zeros(MaxItr + 1) while itr < MaxItr: print("updating step: r=%d" % (itr + 1)) if itr == MaxItr: print("maximal number of iterations is reached") # PCA Step a = mq[:, itr].repeat(N) d1 = a.reshape(M, N) qhat_cent = qi[:, :, itr] - d1 K = np.cov(qi[:, :, itr]) U, s, V = svd(K) alpha_i = np.zeros((num_comp, N)) for ii in range(0, num_comp): for jj in range(0, N): alpha_i[ii, jj] = trapz(qhat_cent[:, jj] * U[:, ii], time) U1 = U[:, 0:num_comp] tmp = U1.dot(alpha_i) qhat = d1 + tmp # Matching Step if parallel: out = Parallel(n_jobs=cores)( delayed(uf.optimum_reparam)(qhat[:, n], time, qi[:, n, itr], "DP", lam) for n in range(N)) gam_t = np.array(out) gam[:, :, itr] = gam_t.transpose() else: gam[:, :, itr] = uf.optimum_reparam(qhat, time, qi[:, :, itr], "DP", lam) for k in range(0, N): time0 = (time[-1] - time[0]) * gam[:, k, itr] + time[0] fi[:, k, itr + 1] = np.interp(time0, time, fi[:, k, itr]) qi[:, k, itr + 1] = uf.f_to_srsf(fi[:, k, itr + 1], time) qtemp = qi[:, :, itr + 1] mq[:, itr + 1] = qtemp.mean(axis=1) cost_temp = np.zeros(N) for ii in range(0, N): cost_temp[ii] = norm(qtemp[:, ii] - qhat[:, ii])**2 cost[itr + 1] = cost_temp.mean() if abs(cost[itr + 1] - cost[itr]) < 1e-06: break itr += 1 if itr >= MaxItr: itrf = MaxItr else: itrf = itr + 1 cost = cost[1:(itrf + 1)] # Aligned data & stats fn = fi[:, :, itrf] qn = qi[:, :, itrf] q0 = qi[:, :, 0] mean_f0 = f0.mean(axis=1) std_f0 = f0.std(axis=1) mqn = mq[:, itrf] gamf = gam[:, :, 0] for k in range(1, itr): gam_k = gam[:, :, k] for l in range(0, N): time0 = (time[-1] - time[0]) * gam_k[:, l] + time[0] gamf[:, l] = np.interp(time0, time, gamf[:, l]) # Center Mean gamI = uf.SqrtMeanInverse(gamf) gamI_dev = np.gradient(gamI, 1 / float(M - 1)) time0 = (time[-1] - time[0]) * gamI + time[0] mqn = np.interp(time0, time, mqn) * np.sqrt(gamI_dev) for k in range(0, N): qn[:, k] = np.interp(time0, time, qn[:, k]) * np.sqrt(gamI_dev) fn[:, k] = np.interp(time0, time, fn[:, k]) gamf[:, k] = np.interp(time0, time, gamf[:, k]) mean_fn = fn.mean(axis=1) std_fn = fn.std(axis=1) # Get Final PCA mididx = int(np.round(time.shape[0] / 2)) m_new = np.sign(fn[mididx, :]) * np.sqrt(np.abs(fn[mididx, :])) mqn2 = np.append(mqn, m_new.mean()) qn2 = np.vstack((qn, m_new)) K = np.cov(qn2) U, s, V = svd(K) stdS = np.sqrt(s) # compute the PCA in the q domain q_pca = np.ndarray(shape=(M + 1, Nstd, num_comp), dtype=float) for k in range(0, num_comp): for l in range(0, Nstd): q_pca[:, l, k] = mqn2 + coef[l] * stdS[k] * U[:, k] # compute the correspondence in the f domain f_pca = np.ndarray(shape=(M, Nstd, num_comp), dtype=float) for k in range(0, num_comp): for l in range(0, Nstd): q_pca_tmp = q_pca[0:M, l, k] * np.abs(q_pca[0:M, l, k]) q_pca_tmp2 = np.sign(q_pca[M, l, k]) * (q_pca[M, l, k]**2) f_pca[:, l, k] = uf.cumtrapzmid(time, q_pca_tmp, q_pca_tmp2, np.floor(time.shape[0] / 2), mididx) N2 = qn.shape[1] c = np.zeros((N2, num_comp)) for k in range(0, num_comp): for l in range(0, N2): c[l, k] = sum((np.append(qn[:, l], m_new[l]) - mqn2) * U[:, k]) if showplot: CBcdict = { 'Bl': (0, 0, 0), 'Or': (.9, .6, 0), 'SB': (.35, .7, .9), 'bG': (0, .6, .5), 'Ye': (.95, .9, .25), 'Bu': (0, .45, .7), 'Ve': (.8, .4, 0), 'rP': (.8, .6, .7), } cl = sorted(CBcdict.keys()) # Align Plots fig, ax = plot.f_plot(np.arange(0, M) / float(M - 1), gamf, title="Warping Functions") ax.set_aspect('equal') plot.f_plot(time, fn, title="Warped Data") tmp = np.array([mean_f0, mean_f0 + std_f0, mean_f0 - std_f0]) tmp = tmp.transpose() plot.f_plot(time, tmp, title=r"Original Data: Mean $\pm$ STD") tmp = np.array([mean_fn, mean_fn + std_fn, mean_fn - std_fn]) tmp = tmp.transpose() plot.f_plot(time, tmp, title=r"Warped Data: Mean $\pm$ STD") # PCA Plots fig, ax = plt.subplots(2, num_comp) for k in range(0, num_comp): axt = ax[0, k] for l in range(0, Nstd): axt.plot(time, q_pca[0:M, l, k], color=CBcdict[cl[l]]) axt.hold(True) axt.set_title('q domain: PD %d' % (k + 1)) plot.rstyle(axt) axt = ax[1, k] for l in range(0, Nstd): axt.plot(time, f_pca[:, l, k], color=CBcdict[cl[l]]) axt.hold(True) axt.set_title('f domain: PD %d' % (k + 1)) plot.rstyle(axt) fig.set_tight_layout(True) cumm_coef = 100 * np.cumsum(s) / sum(s) idx = np.arange(0, M + 1) + 1 plot.f_plot(idx, cumm_coef, "Coefficient Cumulative Percentage") plt.xlabel("Percentage") plt.ylabel("Index") plt.show() mean_f0 = f0.mean(axis=1) std_f0 = f0.std(axis=1) mean_fn = fn.mean(axis=1) std_fn = fn.std(axis=1) tmp = np.zeros(M) tmp[1:] = cumtrapz(mqn * np.abs(mqn), time) fmean = np.mean(f0[1, :]) + tmp fgam = np.zeros((M, N)) for k in range(0, N): time0 = (time[-1] - time[0]) * gamf[:, k] + time[0] fgam[:, k] = np.interp(time0, time, fmean) var_fgam = fgam.var(axis=1) orig_var = trapz(std_f0**2, time) amp_var = trapz(std_fn**2, time) phase_var = trapz(var_fgam, time) K = np.cov(fn) U, s, V = svd(K) align_fPCAresults = collections.namedtuple('align_fPCA', [ 'fn', 'qn', 'q0', 'mqn', 'gam', 'q_pca', 'f_pca', 'latent', 'coef', 'U', 'orig_var', 'amp_var', 'phase_var', 'cost' ]) out = align_fPCAresults(fn, qn, q0, mqn, gamf, q_pca, f_pca, s, c, U, orig_var, amp_var, phase_var, cost) return out
def align_fPCA(f, time, num_comp=3, showplot=True, smoothdata=False): """ aligns a collection of functions while extracting principal components. The functions are aligned to the principal components :param f: numpy ndarray of shape (M,N) of N functions with M samples :param time: vector of size M describing the sample points :param num_comp: number of fPCA components :param showplot: Shows plots of results using matplotlib (default = T) :param smooth_data: Smooth the data using a box filter (default = F) :param sparam: Number of times to run box filter (default = 25) :type sparam: double :type smooth_data: bool :type f: np.ndarray :type time: np.ndarray :rtype: tuple of numpy array :return fn: aligned functions - numpy ndarray of shape (M,N) of N functions with M samples :return qn: aligned srvfs - similar structure to fn :return q0: original srvf - similar structure to fn :return mqn: srvf mean or median - vector of length M :return gam: warping functions - similar structure to fn :return q_pca: srsf principal directions :return f_pca: functional principal directions :return latent: latent values :return coef: coefficients :return U: eigenvectors :return orig_var: Original Variance of Functions :return amp_var: Amplitude Variance :return phase_var: Phase Variance """ lam = 0.0 MaxItr = 50 coef = np.arange(-2., 3.) Nstd = coef.shape[0] M = f.shape[0] N = f.shape[1] if M > 500: parallel = True elif N > 100: parallel = True else: parallel = False eps = np.finfo(np.double).eps f0 = f if showplot: plot.f_plot(time, f, title="Original Data") # Compute SRSF function from data f, g, g2 = uf.gradient_spline(time, f, smoothdata) q = g / np.sqrt(abs(g) + eps) print ("Initializing...") mnq = q.mean(axis=1) a = mnq.repeat(N) d1 = a.reshape(M, N) d = (q - d1) ** 2 dqq = np.sqrt(d.sum(axis=0)) min_ind = dqq.argmin() print("Aligning %d functions in SRVF space to %d fPCA components..." % (N, num_comp)) itr = 0 mq = np.zeros((M, MaxItr + 1)) mq[:, itr] = q[:, min_ind] fi = np.zeros((M, N, MaxItr + 1)) fi[:, :, 0] = f qi = np.zeros((M, N, MaxItr + 1)) qi[:, :, 0] = q gam = np.zeros((M, N, MaxItr + 1)) cost = np.zeros(MaxItr + 1) while itr < MaxItr: print("updating step: r=%d" % (itr + 1)) if itr == MaxItr: print("maximal number of iterations is reached") # PCA Step a = mq[:, itr].repeat(N) d1 = a.reshape(M, N) qhat_cent = qi[:, :, itr] - d1 K = np.cov(qi[:, :, itr]) U, s, V = svd(K) alpha_i = np.zeros((num_comp, N)) for ii in range(0, num_comp): for jj in range(0, N): alpha_i[ii, jj] = trapz(qhat_cent[:, jj] * U[:, ii], time) U1 = U[:, 0:num_comp] tmp = U1.dot(alpha_i) qhat = d1 + tmp # Matching Step if parallel: out = Parallel(n_jobs=-1)( delayed(uf.optimum_reparam)(qhat[:, n], time, qi[:, n, itr], lam) for n in range(N)) gam_t = np.array(out) gam[:, :, itr] = gam_t.transpose() else: gam[:, :, itr] = uf.optimum_reparam(qhat, time, qi[:, :, itr], lam) for k in range(0, N): time0 = (time[-1] - time[0]) * gam[:, k, itr] + time[0] fi[:, k, itr + 1] = np.interp(time0, time, fi[:, k, itr]) qi[:, k, itr + 1] = uf.f_to_srsf(fi[:, k, itr + 1], time) qtemp = qi[:, :, itr + 1] mq[:, itr + 1] = qtemp.mean(axis=1) cost_temp = np.zeros(N) for ii in range(0, N): cost_temp[ii] = norm(qtemp[:, ii] - qhat[:, ii]) ** 2 cost[itr + 1] = cost_temp.mean() if abs(cost[itr + 1] - cost[itr]) < 1e-06: break itr += 1 if itr >= MaxItr: itrf = MaxItr else: itrf = itr+1 cost = cost[1:(itrf+1)] # Aligned data & stats fn = fi[:, :, itrf] qn = qi[:, :, itrf] q0 = qi[:, :, 0] mean_f0 = f0.mean(axis=1) std_f0 = f0.std(axis=1) mqn = mq[:, itrf] gamf = gam[:, :, 0] for k in range(1, itr): gam_k = gam[:, :, k] for l in range(0, N): time0 = (time[-1] - time[0]) * gam_k[:, l] + time[0] gamf[:, l] = np.interp(time0, time, gamf[:, l]) # Center Mean gamI = uf.SqrtMeanInverse(gamf) gamI_dev = np.gradient(gamI, 1 / float(M - 1)) time0 = (time[-1] - time[0]) * gamI + time[0] mqn = np.interp(time0, time, mqn) * np.sqrt(gamI_dev) for k in range(0, N): qn[:, k] = np.interp(time0, time, qn[:, k]) * np.sqrt(gamI_dev) fn[:, k] = np.interp(time0, time, fn[:, k]) gamf[:, k] = np.interp(time0, time, gamf[:, k]) mean_fn = fn.mean(axis=1) std_fn = fn.std(axis=1) # Get Final PCA mididx = np.round(time.shape[0] / 2) m_new = np.sign(fn[mididx, :]) * np.sqrt(np.abs(fn[mididx, :])) mqn2 = np.append(mqn, m_new.mean()) qn2 = np.vstack((qn, m_new)) K = np.cov(qn2) U, s, V = svd(K) stdS = np.sqrt(s) # compute the PCA in the q domain q_pca = np.ndarray(shape=(M + 1, Nstd, num_comp), dtype=float) for k in range(0, num_comp): for l in range(0, Nstd): q_pca[:, l, k] = mqn2 + coef[l] * stdS[k] * U[:, k] # compute the correspondence in the f domain f_pca = np.ndarray(shape=(M, Nstd, num_comp), dtype=float) for k in range(0, num_comp): for l in range(0, Nstd): q_pca_tmp = q_pca[0:M, l, k] * np.abs(q_pca[0:M, l, k]) q_pca_tmp2 = np.sign(q_pca[M, l, k]) * (q_pca[M, l, k] ** 2) f_pca[:, l, k] = uf.cumtrapzmid(time, q_pca_tmp, q_pca_tmp2) N2 = qn.shape[1] c = np.zeros((N2, num_comp)) for k in range(0, num_comp): for l in range(0, N2): c[l, k] = sum((np.append(qn[:, l], m_new[l]) - mqn2) * U[:, k]) if showplot: CBcdict = { 'Bl': (0, 0, 0), 'Or': (.9, .6, 0), 'SB': (.35, .7, .9), 'bG': (0, .6, .5), 'Ye': (.95, .9, .25), 'Bu': (0, .45, .7), 'Ve': (.8, .4, 0), 'rP': (.8, .6, .7), } cl = sorted(CBcdict.keys()) # Align Plots fig, ax = plot.f_plot(np.arange(0, M) / float(M - 1), gamf, title="Warping Functions") ax.set_aspect('equal') plot.f_plot(time, fn, title="Warped Data") tmp = np.array([mean_f0, mean_f0 + std_f0, mean_f0 - std_f0]) tmp = tmp.transpose() plot.f_plot(time, tmp, title="Original Data: Mean $\pm$ STD") tmp = np.array([mean_fn, mean_fn + std_fn, mean_fn - std_fn]) tmp = tmp.transpose() plot.f_plot(time, tmp, title="Warped Data: Mean $\pm$ STD") # PCA Plots fig, ax = plt.subplots(2, num_comp) for k in range(0, num_comp): axt = ax[0, k] for l in range(0, Nstd): axt.plot(time, q_pca[0:M, l, k], color=CBcdict[cl[l]]) axt.hold(True) axt.set_title('q domain: PD %d' % (k + 1)) plot.rstyle(axt) axt = ax[1, k] for l in range(0, Nstd): axt.plot(time, f_pca[:, l, k], color=CBcdict[cl[l]]) axt.hold(True) axt.set_title('f domain: PD %d' % (k + 1)) plot.rstyle(axt) fig.set_tight_layout(True) cumm_coef = 100 * np.cumsum(s) / sum(s) idx = np.arange(0, M + 1) + 1 plot.f_plot(idx, cumm_coef, "Coefficient Cumulative Percentage") plt.xlabel("Percentage") plt.ylabel("Index") plt.show() mean_f0 = f0.mean(axis=1) std_f0 = f0.std(axis=1) mean_fn = fn.mean(axis=1) std_fn = fn.std(axis=1) tmp = np.zeros(M) tmp[1:] = cumtrapz(mqn * np.abs(mqn), time) fmean = np.mean(f0[1, :]) + tmp fgam = np.zeros((M, N)) for k in range(0, N): time0 = (time[-1] - time[0]) * gamf[:, k] + time[0] fgam[:, k] = np.interp(time0, time, fmean) var_fgam = fgam.var(axis=1) orig_var = trapz(std_f0 ** 2, time) amp_var = trapz(std_fn ** 2, time) phase_var = trapz(var_fgam, time) K = np.cov(fn) U, s, V = svd(K) align_fPCAresults = collections.namedtuple('align_fPCA', ['fn', 'qn', 'q0', 'mqn', 'gam', 'q_pca', 'f_pca', 'latent', 'coef', 'U', 'orig_var', 'amp_var', 'phase_var', 'cost']) out = align_fPCAresults(fn, qn, q0, mqn, gamf, q_pca, f_pca, s, c, U, orig_var, amp_var, phase_var, cost) return out
def gauss_model(self, n=1, sort_samples=False): """ This function models the functional data using a Gaussian model extracted from the principal components of the srvfs :param n: number of random samples :param sort_samples: sort samples (default = T) :type n: integer :type sort_samples: bool """ fn = self.fn time = self.time qn = self.qn gam = self.gam # Parameters eps = np.finfo(np.double).eps binsize = np.diff(time) binsize = binsize.mean() M = time.size # compute mean and covariance in q-domain mq_new = qn.mean(axis=1) mididx = np.round(time.shape[0] / 2) m_new = np.sign(fn[mididx, :]) * np.sqrt(np.abs(fn[mididx, :])) mqn = np.append(mq_new, m_new.mean()) qn2 = np.vstack((qn, m_new)) C = np.cov(qn2) q_s = np.random.multivariate_normal(mqn, C, n) q_s = q_s.transpose() # compute the correspondence to the original function domain fs = np.zeros((M, n)) for k in range(0, n): fs[:, k] = uf.cumtrapzmid(time, q_s[0:M, k] * np.abs(q_s[0:M, k]), np.sign(q_s[M, k]) * (q_s[M, k]**2), mididx) fbar = fn.mean(axis=1) fsbar = fs.mean(axis=1) err = np.transpose(np.tile(fbar - fsbar, (n, 1))) fs += err # random warping generation rgam = uf.randomGamma(gam, n) gams = np.zeros((M, n)) for k in range(0, n): gams[:, k] = uf.invertGamma(rgam[:, k]) # sort functions and warping if sort_samples: mx = fs.max(axis=0) seq1 = mx.argsort() # compute the psi-function fy = np.gradient(rgam, binsize) psi = fy / np.sqrt(abs(fy) + eps) ip = np.zeros(n) len = np.zeros(n) for i in range(0, n): tmp = np.ones(M) ip[i] = tmp.dot(psi[:, i] / M) len[i] = np.arccos(tmp.dot(psi[:, i] / M)) seq2 = len.argsort() # combine x-variability and y-variability ft = np.zeros((M, n)) for k in range(0, n): ft[:, k] = np.interp(gams[:, seq2[k]], np.arange(0, M) / np.double(M - 1), fs[:, seq1[k]]) tmp = np.isnan(ft[:, k]) while tmp.any(): rgam2 = uf.randomGamma(gam, 1) ft[:, k] = np.interp(gams[:, seq2[k]], np.arange(0, M) / np.double(M - 1), uf.invertGamma(rgam2)) else: # combine x-variability and y-variability ft = np.zeros((M, n)) for k in range(0, n): ft[:, k] = np.interp(gams[:, k], np.arange(0, M) / np.double(M - 1), fs[:, k]) tmp = np.isnan(ft[:, k]) while tmp.any(): rgam2 = uf.randomGamma(gam, 1) ft[:, k] = np.interp(gams[:, k], np.arange(0, M) / np.double(M - 1), uf.invertGamma(rgam2)) self.rsamps = True self.fs = fs self.gams = rgam self.ft = ft self.qs = q_s[0:M, :] return
def jointfPCA(fn, time, qn, q0, gam, no=2, showplot=True): """ This function calculates joint functional principal component analysis on aligned data :param fn: numpy ndarray of shape (M,N) of N aligned functions with M samples :param time: vector of size N describing the sample points :param qn: numpy ndarray of shape (M,N) of N aligned SRSF with M samples :param no: number of components to extract (default = 2) :param showplot: Shows plots of results using matplotlib (default = T) :type showplot: bool :type no: int :rtype: tuple of numpy ndarray :return q_pca: srsf principal directions :return f_pca: functional principal directions :return latent: latent values :return coef: coefficients :return U: eigenvectors """ coef = np.arange(-1., 2.) Nstd = coef.shape[0] # set up for fPCA in q-space mq_new = qn.mean(axis=1) M = time.shape[0] mididx = int(np.round(M / 2)) m_new = np.sign(fn[mididx, :]) * np.sqrt(np.abs(fn[mididx, :])) mqn = np.append(mq_new, m_new.mean()) qn2 = np.vstack((qn, m_new)) # calculate vector space of warping functions mu_psi, gam_mu, psi, vec = uf.SqrtMean(gam) # joint fPCA C = fminbound(find_C,0,1e4,(qn2,vec,q0,no,mu_psi)) qhat, gamhat, a, U, s, mu_g = jointfPCAd(qn2, vec, C, no, mu_psi) # geodesic paths q_pca = np.ndarray(shape=(M, Nstd, no), dtype=float) f_pca = np.ndarray(shape=(M, Nstd, no), dtype=float) for k in range(0, no): for l in range(0, Nstd): qhat = mqn + dot(U[0:(M+1),k],coef[l]*np.sqrt(s[k])) vechat = dot(U[(M+1):,k],(coef[l]*np.sqrt(s[k]))/C) psihat = geo.exp_map(mu_psi,vechat) gamhat = cumtrapz(psihat*psihat,np.linspace(0,1,M),initial=0) gamhat = (gamhat - gamhat.min()) / (gamhat.max() - gamhat.min()) if (sum(vechat)==0): gamhat = np.linspace(0,1,M) fhat = uf.cumtrapzmid(time, qhat[0:M]*np.fabs(qhat[0:M]), np.sign(qhat[M])*(qhat[M]*qhat[M]), mididx) f_pca[:,l,k] = uf.warp_f_gamma(np.linspace(0,1,M), fhat, gamhat) q_pca[:,l,k] = uf.warp_q_gamma(np.linspace(0,1,M), qhat[0:M], gamhat) jfpca_results = collections.namedtuple('jfpca', ['q_pca', 'f_pca', 'latent', 'coef', 'U']) jfpca = jfpca_results(q_pca, f_pca, s, a, U) if showplot: CBcdict = { 'Bl': (0, 0, 0), 'Or': (.9, .6, 0), 'SB': (.35, .7, .9), 'bG': (0, .6, .5), 'Ye': (.95, .9, .25), 'Bu': (0, .45, .7), 'Ve': (.8, .4, 0), 'rP': (.8, .6, .7), } cl = sorted(CBcdict.keys()) fig, ax = plt.subplots(2, no) for k in range(0, no): axt = ax[0, k] for l in range(0, Nstd): axt.plot(time, q_pca[0:M, l, k], color=CBcdict[cl[l]]) axt.set_title('q domain: PD %d' % (k + 1)) axt = ax[1, k] for l in range(0, Nstd): axt.plot(time, f_pca[:, l, k], color=CBcdict[cl[l]]) axt.set_title('f domain: PD %d' % (k + 1)) fig.set_tight_layout(True) cumm_coef = 100 * np.cumsum(s) / sum(s) idx = np.arange(0, s.shape[0]) + 1 plot.f_plot(idx, cumm_coef, "Coefficient Cumulative Percentage") plt.xlabel("Percentage") plt.ylabel("Index") plt.show() return jfpca
def gauss_model(fn, time, qn, gam, n=1, sort_samples=False): """ This function models the functional data using a Gaussian model extracted from the principal components of the srvfs :param fn: numpy ndarray of shape (M,N) of N aligned functions with M samples :param time: vector of size M describing the sample points :param qn: numpy ndarray of shape (M,N) of N aligned srvfs with M samples :param gam: warping functions :param n: number of random samples :param sort_samples: sort samples (default = T) :type n: integer :type sort_samples: bool :type fn: np.ndarray :type qn: np.ndarray :type gam: np.ndarray :type time: np.ndarray :rtype: tuple of numpy array :return fs: random aligned samples :return gams: random warping functions :return ft: random samples """ # Parameters eps = np.finfo(np.double).eps binsize = np.diff(time) binsize = binsize.mean() M = time.size # compute mean and covariance in q-domain mq_new = qn.mean(axis=1) mididx = np.round(time.shape[0] / 2) m_new = np.sign(fn[mididx, :]) * np.sqrt(np.abs(fn[mididx, :])) mqn = np.append(mq_new, m_new.mean()) qn2 = np.vstack((qn, m_new)) C = np.cov(qn2) q_s = np.random.multivariate_normal(mqn, C, n) q_s = q_s.transpose() # compute the correspondence to the original function domain fs = np.zeros((M, n)) for k in range(0, n): fs[:, k] = uf.cumtrapzmid(time, q_s[0:M, k] * np.abs(q_s[0:M, k]), np.sign(q_s[M, k]) * (q_s[M, k]**2), mididx) fbar = fn.mean(axis=1) fsbar = fs.mean(axis=1) err = np.transpose(np.tile(fbar - fsbar, (n, 1))) fs += err # random warping generation rgam = uf.randomGamma(gam, n) gams = np.zeros((M, n)) for k in range(0, n): gams[:, k] = uf.invertGamma(rgam[:, k]) # sort functions and warping if sort_samples: mx = fs.max(axis=0) seq1 = mx.argsort() # compute the psi-function fy = np.gradient(rgam, binsize) psi = fy / np.sqrt(abs(fy) + eps) ip = np.zeros(n) len = np.zeros(n) for i in range(0, n): tmp = np.ones(M) ip[i] = tmp.dot(psi[:, i] / M) len[i] = np.acos(tmp.dot(psi[:, i] / M)) seq2 = len.argsort() # combine x-variability and y-variability ft = np.zeros((M, n)) for k in range(0, n): ft[:, k] = np.interp(gams[:, seq2[k]], np.arange(0, M) / np.double(M - 1), fs[:, seq1[k]]) tmp = np.isnan(ft[:, k]) while tmp.any(): rgam2 = uf.randomGamma(gam, 1) ft[:, k] = np.interp(gams[:, seq2[k]], np.arange(0, M) / np.double(M - 1), uf.invertGamma(rgam2)) else: # combine x-variability and y-variability ft = np.zeros((M, n)) for k in range(0, n): ft[:, k] = np.interp(gams[:, k], np.arange(0, M) / np.double(M - 1), fs[:, k]) tmp = np.isnan(ft[:, k]) while tmp.any(): rgam2 = uf.randomGamma(gam, 1) ft[:, k] = np.interp(gams[:, k], np.arange(0, M) / np.double(M - 1), uf.invertGamma(rgam2)) samples = collections.namedtuple('samples', ['fs', 'gams', 'ft']) out = samples(fs, rgam, ft) return out
def vertfPCA(fn, time, qn, no=2, showplot=True): """ This function calculates vertical functional principal component analysis on aligned data :param fn: numpy ndarray of shape (M,N) of N aligned functions with M samples :param time: vector of size N describing the sample points :param qn: numpy ndarray of shape (M,N) of N aligned SRSF with M samples :param no: number of components to extract (default = 2) :param showplot: Shows plots of results using matplotlib (default = T) :type showplot: bool :type no: int :rtype: tuple of numpy ndarray :return q_pca: srsf principal directions :return f_pca: functional principal directions :return latent: latent values :return coef: coefficients :return U: eigenvectors """ coef = np.arange(-2., 3.) Nstd = coef.shape[0] # FPCA mq_new = qn.mean(axis=1) N = mq_new.shape[0] mididx = int(np.round(time.shape[0] / 2)) m_new = np.sign(fn[mididx, :]) * np.sqrt(np.abs(fn[mididx, :])) mqn = np.append(mq_new, m_new.mean()) qn2 = np.vstack((qn, m_new)) K = np.cov(qn2) U, s, V = svd(K) stdS = np.sqrt(s) # compute the PCA in the q domain q_pca = np.ndarray(shape=(N + 1, Nstd, no), dtype=float) for k in range(0, no): for l in range(0, Nstd): q_pca[:, l, k] = mqn + coef[l] * stdS[k] * U[:, k] # compute the correspondence in the f domain f_pca = np.ndarray(shape=(N, Nstd, no), dtype=float) for k in range(0, no): for l in range(0, Nstd): f_pca[:, l, k] = uf.cumtrapzmid(time, q_pca[0:N, l, k] * np.abs(q_pca[0:N, l, k]), np.sign(q_pca[N, l, k]) * (q_pca[N, l, k] ** 2), mididx) fbar = fn.mean(axis=1) fsbar = f_pca[:, :, k].mean(axis=1) err = np.transpose(np.tile(fbar-fsbar, (Nstd,1))) f_pca[:, :, k] += err N2 = qn.shape[1] c = np.zeros((N2, no)) for k in range(0, no): for l in range(0, N2): c[l, k] = sum((np.append(qn[:, l], m_new[l]) - mqn) * U[:, k]) vfpca_results = collections.namedtuple('vfpca', ['q_pca', 'f_pca', 'latent', 'coef', 'U']) vfpca = vfpca_results(q_pca, f_pca, s, c, U) if showplot: CBcdict = { 'Bl': (0, 0, 0), 'Or': (.9, .6, 0), 'SB': (.35, .7, .9), 'bG': (0, .6, .5), 'Ye': (.95, .9, .25), 'Bu': (0, .45, .7), 'Ve': (.8, .4, 0), 'rP': (.8, .6, .7), } cl = sorted(CBcdict.keys()) fig, ax = plt.subplots(2, no) for k in range(0, no): axt = ax[0, k] for l in range(0, Nstd): axt.plot(time, q_pca[0:N, l, k], color=CBcdict[cl[l]]) axt.set_title('q domain: PD %d' % (k + 1)) axt = ax[1, k] for l in range(0, Nstd): axt.plot(time, f_pca[:, l, k], color=CBcdict[cl[l]]) axt.set_title('f domain: PD %d' % (k + 1)) fig.set_tight_layout(True) cumm_coef = 100 * np.cumsum(s) / sum(s) idx = np.arange(0, N + 1) + 1 plot.f_plot(idx, cumm_coef, "Coefficient Cumulative Percentage") plt.xlabel("Percentage") plt.ylabel("Index") plt.show() return vfpca
def vertfPCA(fn, time, qn, no=1, showplot=True): """ This function calculates vertical functional principal component analysis on aligned data :param fn: numpy ndarray of shape (M,N) of N aligned functions with M samples :param time: vector of size N describing the sample points :param qn: numpy ndarray of shape (M,N) of N aligned SRSF with M samples :param no: number of components to extract (default = 1) :param showplot: Shows plots of results using matplotlib (default = T) :type showplot: bool :type no: int :rtype: tuple of numpy ndarray :return q_pca: srsf principal directions :return f_pca: functional principal directions :return latent: latent values :return coef: coefficients :return U: eigenvectors """ coef = np.arange(-2., 3.) Nstd = coef.shape[0] # FPCA mq_new = qn.mean(axis=1) N = mq_new.shape[0] mididx = np.round(time.shape[0] / 2) m_new = np.sign(fn[mididx, :]) * np.sqrt(np.abs(fn[mididx, :])) mqn = np.append(mq_new, m_new.mean()) qn2 = np.vstack((qn, m_new)) K = np.cov(qn2) U, s, V = svd(K) stdS = np.sqrt(s) # compute the PCA in the q domain q_pca = np.ndarray(shape=(N + 1, Nstd, no), dtype=float) for k in range(0, no): for l in range(0, Nstd): q_pca[:, l, k] = mqn + coef[l] * stdS[k] * U[:, k] # compute the correspondence in the f domain f_pca = np.ndarray(shape=(N, Nstd, no), dtype=float) for k in range(0, no): for l in range(0, Nstd): f_pca[:, l, k] = uf.cumtrapzmid(time, q_pca[0:N, l, k] * np.abs(q_pca[0:N, l, k]), np.sign(q_pca[N, l, k]) * (q_pca[N, l, k] ** 2)) N2 = qn.shape[1] c = np.zeros((N2, no)) for k in range(0, no): for l in range(0, N2): c[l, k] = sum((np.append(qn[:, l], m_new[l]) - mqn) * U[:, k]) vfpca_results = collections.namedtuple('vfpca', ['q_pca', 'f_pca', 'latent', 'coef', 'U']) vfpca = vfpca_results(q_pca, f_pca, s, c, U) if showplot: CBcdict = { 'Bl': (0, 0, 0), 'Or': (.9, .6, 0), 'SB': (.35, .7, .9), 'bG': (0, .6, .5), 'Ye': (.95, .9, .25), 'Bu': (0, .45, .7), 'Ve': (.8, .4, 0), 'rP': (.8, .6, .7), } cl = sorted(CBcdict.keys()) fig, ax = plt.subplots(2, no) for k in range(0, no): axt = ax[0, k] for l in range(0, Nstd): axt.plot(time, q_pca[0:N, l, k], color=CBcdict[cl[l]]) axt.hold(True) axt.set_title('q domain: PD %d' % (k + 1)) plot.rstyle(axt) axt = ax[1, k] for l in range(0, Nstd): axt.plot(time, f_pca[:, l, k], color=CBcdict[cl[l]]) axt.hold(True) axt.set_title('f domain: PD %d' % (k + 1)) plot.rstyle(axt) fig.set_tight_layout(True) cumm_coef = 100 * np.cumsum(s) / sum(s) idx = np.arange(0, N + 1) + 1 plot.f_plot(idx, cumm_coef, "Coefficient Cumulative Percentage") plt.xlabel("Percentage") plt.ylabel("Index") plt.show() return vfpca
def calc_fpca(self, no=3, id=None, stds=np.arange(-1, 2)): """ This function calculates vertical functional principal component analysis on aligned data :param no: number of components to extract (default = 3) :param id: point to use for f(0) (default = midpoint) :param stds: number of standard deviations along gedoesic to compute (default = -1,0,1) :type no: int :type id: int :rtype: fdavpca object containing :return q_pca: srsf principal directions :return f_pca: functional principal directions :return latent: latent values :return coef: coefficients :return U: eigenvectors """ fn = self.warp_data.fn time = self.warp_data.time qn = self.warp_data.qn M = time.shape[0] if id is None: mididx = int(np.round(M / 2)) else: mididx = id Nstd = stds.shape[0] # FPCA mq_new = qn.mean(axis=1) N = mq_new.shape[0] m_new = np.sign(fn[mididx, :]) * np.sqrt(np.abs(fn[mididx, :])) mqn = np.append(mq_new, m_new.mean()) qn2 = np.vstack((qn, m_new)) K = np.cov(qn2) U, s, V = svd(K) stdS = np.sqrt(s) # compute the PCA in the q domain q_pca = np.ndarray(shape=(N + 1, Nstd, no), dtype=float) for k in range(0, no): for l in range(0, Nstd): q_pca[:, l, k] = mqn + stds[l] * stdS[k] * U[:, k] # compute the correspondence in the f domain f_pca = np.ndarray(shape=(N, Nstd, no), dtype=float) for k in range(0, no): for l in range(0, Nstd): f_pca[:, l, k] = uf.cumtrapzmid( time, q_pca[0:N, l, k] * np.abs(q_pca[0:N, l, k]), np.sign(q_pca[N, l, k]) * (q_pca[N, l, k]**2), mididx) fbar = fn.mean(axis=1) fsbar = f_pca[:, :, k].mean(axis=1) err = np.transpose(np.tile(fbar - fsbar, (Nstd, 1))) f_pca[:, :, k] += err N2 = qn.shape[1] c = np.zeros((N2, no)) for k in range(0, no): for l in range(0, N2): c[l, k] = sum((np.append(qn[:, l], m_new[l]) - mqn) * U[:, k]) self.q_pca = q_pca self.f_pca = f_pca self.latent = s[0:no] self.coef = c self.U = U[:, 0:no] self.id = mididx self.mqn = mqn self.time = time self.stds = stds self.no = no return
def calc_fpca(self, no=3, stds=np.arange(-1., 2.), id=None, parallel=False, cores=-1): """ This function calculates joint functional principal component analysis on aligned data :param no: number of components to extract (default = 3) :param id: point to use for f(0) (default = midpoint) :param stds: number of standard deviations along gedoesic to compute (default = -1,0,1) :param parallel: run in parallel (default = F) :param cores: number of cores for parallel (default = -1 (all)) :type no: int :type id: int :type parallel: bool :type cores: int :rtype: fdajpca object of numpy ndarray :return q_pca: srsf principal directions :return f_pca: functional principal directions :return latent: latent values :return coef: coefficients :return U: eigenvectors """ fn = self.warp_data.fn time = self.warp_data.time qn = self.warp_data.qn q0 = self.warp_data.q0 gam = self.warp_data.gam M = time.shape[0] if id is None: mididx = int(np.round(M / 2)) else: mididx = id Nstd = stds.shape[0] # set up for fPCA in q-space mq_new = qn.mean(axis=1) m_new = np.sign(fn[mididx, :]) * np.sqrt(np.abs(fn[mididx, :])) mqn = np.append(mq_new, m_new.mean()) qn2 = np.vstack((qn, m_new)) # calculate vector space of warping functions mu_psi, gam_mu, psi, vec = uf.SqrtMean(gam, parallel, cores) # joint fPCA C = fminbound(find_C, 0, 1e4, (qn2, vec, q0, no, mu_psi, parallel, cores)) qhat, gamhat, a, U, s, mu_g, g, cov = jointfPCAd( qn2, vec, C, no, mu_psi, parallel, cores) # geodesic paths q_pca = np.ndarray(shape=(M, Nstd, no), dtype=float) f_pca = np.ndarray(shape=(M, Nstd, no), dtype=float) for k in range(0, no): for l in range(0, Nstd): qhat = mqn + np.dot(U[0:(M + 1), k], stds[l] * np.sqrt(s[k])) vechat = np.dot(U[(M + 1):, k], (stds[l] * np.sqrt(s[k])) / C) psihat = geo.exp_map(mu_psi, vechat) gamhat = cumtrapz(psihat * psihat, np.linspace(0, 1, M), initial=0) gamhat = (gamhat - gamhat.min()) / (gamhat.max() - gamhat.min()) if (sum(vechat) == 0): gamhat = np.linspace(0, 1, M) fhat = uf.cumtrapzmid(time, qhat[0:M] * np.fabs(qhat[0:M]), np.sign(qhat[M]) * (qhat[M] * qhat[M]), mididx) f_pca[:, l, k] = uf.warp_f_gamma(np.linspace(0, 1, M), fhat, gamhat) q_pca[:, l, k] = uf.warp_q_gamma(np.linspace(0, 1, M), qhat[0:M], gamhat) self.q_pca = q_pca self.f_pca = f_pca self.latent = s[0:no] self.coef = a self.U = U[:, 0:no] self.mu_psi = mu_psi self.mu_g = mu_g self.id = mididx self.C = C self.time = time self.g = g self.cov = cov self.no = no self.stds = stds return
def gauss_model(fn, time, qn, gam, n=1, sort_samples=False): """ This function models the functional data using a Gaussian model extracted from the principal components of the srvfs :param fn: numpy ndarray of shape (M,N) of N aligned functions with M samples :param time: vector of size M describing the sample points :param qn: numpy ndarray of shape (M,N) of N aligned srvfs with M samples :param gam: warping functions :param n: number of random samples :param sort_samples: sort samples (default = T) :type n: integer :type sort_samples: bool :type fn: np.ndarray :type qn: np.ndarray :type gam: np.ndarray :type time: np.ndarray :rtype: tuple of numpy array :return fs: random aligned samples :return gams: random warping functions :return ft: random samples """ # Parameters eps = np.finfo(np.double).eps binsize = np.diff(time) binsize = binsize.mean() M = time.size # compute mean and covariance in q-domain mq_new = qn.mean(axis=1) mididx = np.round(time.shape[0] / 2) m_new = np.sign(fn[mididx, :]) * np.sqrt(np.abs(fn[mididx, :])) mqn = np.append(mq_new, m_new.mean()) qn2 = np.vstack((qn, m_new)) C = np.cov(qn2) q_s = np.random.multivariate_normal(mqn, C, n) q_s = q_s.transpose() # compute the correspondence to the original function domain fs = np.zeros((M, n)) for k in range(0, n): fs[:, k] = uf.cumtrapzmid(time, q_s[0:M, k] * np.abs(q_s[0:M, k]), np.sign(q_s[M, k]) * (q_s[M, k] ** 2), mididx) fbar = fn.mean(axis=1) fsbar = fs.mean(axis=1) err = np.transpose(np.tile(fbar-fsbar, (n,1))) fs += err # random warping generation rgam = uf.randomGamma(gam, n) gams = np.zeros((M, n)) for k in range(0, n): gams[:, k] = uf.invertGamma(rgam[:, k]) # sort functions and warping if sort_samples: mx = fs.max(axis=0) seq1 = mx.argsort() # compute the psi-function fy = np.gradient(rgam, binsize) psi = fy / np.sqrt(abs(fy) + eps) ip = np.zeros(n) len = np.zeros(n) for i in range(0, n): tmp = np.ones(M) ip[i] = tmp.dot(psi[:, i] / M) len[i] = np.acos(tmp.dot(psi[:, i] / M)) seq2 = len.argsort() # combine x-variability and y-variability ft = np.zeros((M, n)) for k in range(0, n): ft[:, k] = np.interp(gams[:, seq2[k]], np.arange(0, M) / np.double(M - 1), fs[:, seq1[k]]) tmp = np.isnan(ft[:, k]) while tmp.any(): rgam2 = uf.randomGamma(gam, 1) ft[:, k] = np.interp(gams[:, seq2[k]], np.arange(0, M) / np.double(M - 1), uf.invertGamma(rgam2)) else: # combine x-variability and y-variability ft = np.zeros((M, n)) for k in range(0, n): ft[:, k] = np.interp(gams[:, k], np.arange(0, M) / np.double(M - 1), fs[:, k]) tmp = np.isnan(ft[:, k]) while tmp.any(): rgam2 = uf.randomGamma(gam, 1) ft[:, k] = np.interp(gams[:, k], np.arange(0, M) / np.double(M - 1), uf.invertGamma(rgam2)) samples = collections.namedtuple('samples', ['fs', 'gams', 'ft']) out = samples(fs, rgam, ft) return out