def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, ref_img, tes, algorithm='mdl', kdaw=10., rdaw=1., out_dir='.', verbose=False, low_mem=False): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- data_cat : (S x E x T) array_like Input functional data data_oc : (S x T) array_like Optimally combined time series data combmode : {'t2s', 'paid'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'paid' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array adaptive_mask : (S,) array_like Array where each value indicates the number of echoes with good signal for that voxel. This mask may be thresholded; for example, with values less than 3 set to 0. For more information on thresholding, see `make_adaptive_mask`. t2sG : (S,) array_like Map of voxel-wise T2* estimates. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk tes : :obj:`list` List of echo times associated with `data_cat`, in milliseconds algorithm : {'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic', float}, optional Method with which to select components in TEDPCA. PCA decomposition with the mdl, kic and aic options are based on a Moving Average (stationary Gaussian) process and are ordered from most to least aggressive (see Li et al., 2007). If a float is provided, then it is assumed to represent percentage of variance explained (0-1) to retain from PCA. Default is 'mdl'. kdaw : :obj:`float`, optional Dimensionality augmentation weight for Kappa calculations. Must be a non-negative float, or -1 (a special value). Default is 10. rdaw : :obj:`float`, optional Dimensionality augmentation weight for Rho calculations. Must be a non-negative float, or -1 (a special value). Default is 1. out_dir : :obj:`str`, optional Output directory. verbose : :obj:`bool`, optional Whether to output files from fitmodels_direct or not. Default: False low_mem : :obj:`bool`, optional Whether to use incremental PCA (for low-memory systems) or not. This is only compatible with the "kundu" or "kundu-stabilize" algorithms. Default: False Returns ------- kept_data : (S x T) :obj:`numpy.ndarray` Dimensionally reduced optimally combined functional data n_components : :obj:`int` Number of components retained from PCA decomposition Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: ====================== ================================================= Filename Content ====================== ================================================= pca_decomposition.json PCA component table. pca_mixing.tsv PCA mixing matrix. pca_components.nii.gz Component weight maps. ====================== ================================================= See Also -------- :func:`tedana.utils.make_adaptive_mask` : The function used to create the ``adaptive_mask`` parameter. """ if algorithm == 'kundu': alg_str = ("followed by the Kundu component selection decision " "tree (Kundu et al., 2013)") RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192.") elif algorithm == 'kundu-stabilize': alg_str = ("followed by the 'stabilized' Kundu component " "selection decision tree (Kundu et al., 2013)") RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192.") elif isinstance(algorithm, Number): alg_str = ( "in which the number of components was determined based on a " "variance explained threshold") else: alg_str = ( "based on the PCA component estimation with a Moving Average" "(stationary Gaussian) process (Li et al., 2007)") RefLGR.info("Li, Y.O., Adalı, T. and Calhoun, V.D., (2007). " "Estimating the number of independent components for " "functional magnetic resonance imaging data. " "Human brain mapping, 28(11), pp.1251-1266.") RepLGR.info("Principal component analysis {0} was applied to " "the optimally combined data for dimensionality " "reduction.".format(alg_str)) n_samp, n_echos, n_vols = data_cat.shape LGR.info('Computing PCA of optimally combined multi-echo data') data = data_oc[mask, :] data_z = ((data.T - data.T.mean(axis=0)) / data.T.std(axis=0)).T # var normalize ts data_z = (data_z - data_z.mean()) / data_z.std() # var normalize everything if algorithm in ['mdl', 'aic', 'kic']: data_img = io.new_nii_like(ref_img, utils.unmask(data, mask)) mask_img = io.new_nii_like(ref_img, mask.astype(int)) voxel_comp_weights, varex, varex_norm, comp_ts = ma_pca.ma_pca( data_img, mask_img, algorithm) elif isinstance(algorithm, Number): ppca = PCA(copy=False, n_components=algorithm, svd_solver="full") ppca.fit(data_z) comp_ts = ppca.components_.T varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(data_z, comp_ts), np.diag(1. / varex)) varex_norm = varex / varex.sum() elif low_mem: voxel_comp_weights, varex, comp_ts = low_mem_pca(data_z) varex_norm = varex / varex.sum() else: ppca = PCA(copy=False, n_components=(n_vols - 1)) ppca.fit(data_z) comp_ts = ppca.components_.T varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(data_z, comp_ts), np.diag(1. / varex)) varex_norm = varex / varex.sum() # Compute Kappa and Rho for PCA comps # Normalize each component's time series vTmixN = stats.zscore(comp_ts, axis=0) comptable, _, _, _ = metrics.dependence_metrics(data_cat, data_oc, comp_ts, adaptive_mask, tes, ref_img, reindex=False, mmixN=vTmixN, algorithm=None, label='mepca_', out_dir=out_dir, verbose=verbose) # varex_norm from PCA overrides varex_norm from dependence_metrics, # but we retain the original comptable['estimated normalized variance explained'] = \ comptable['normalized variance explained'] comptable['normalized variance explained'] = varex_norm # write component maps to 4D image comp_ts_z = stats.zscore(comp_ts, axis=0) comp_maps = utils.unmask(computefeats2(data_oc, comp_ts_z, mask), mask) io.filewrite(comp_maps, op.join(out_dir, 'pca_components.nii.gz'), ref_img) # Select components using decision tree if algorithm == 'kundu': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=False) elif algorithm == 'kundu-stabilize': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=True) else: alg_str = "variance explained-based" if isinstance( algorithm, Number) else algorithm LGR.info('Selected {0} components with {1} dimensionality ' 'detection'.format(comptable.shape[0], alg_str)) comptable['classification'] = 'accepted' comptable['rationale'] = '' # Save decomposition comp_names = [ io.add_decomp_prefix(comp, prefix='pca', max_value=comptable.index.max()) for comp in comptable.index.values ] mixing_df = pd.DataFrame(data=comp_ts, columns=comp_names) mixing_df.to_csv(op.join(out_dir, 'pca_mixing.tsv'), sep='\t', index=False) comptable['Description'] = 'PCA fit to optimally combined data.' mmix_dict = {} mmix_dict['Method'] = ('Principal components analysis implemented by ' 'sklearn. Components are sorted by variance ' 'explained in descending order. ' 'Component signs are flipped to best match the ' 'data.') io.save_comptable(comptable, op.join(out_dir, 'pca_decomposition.json'), label='pca', metadata=mmix_dict) acc = comptable[comptable.classification == 'accepted'].index.values n_components = acc.size voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] * varex[None, acc]) kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T) kept_data = stats.zscore(kept_data, axis=1) # variance normalize time series kept_data = stats.zscore(kept_data, axis=None) # variance normalize everything return kept_data, n_components
def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG, ref_img, tes, algorithm='mdl', source_tes=-1, kdaw=10., rdaw=1., out_dir='.', verbose=False, low_mem=False): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- data_cat : (S x E x T) array_like Input functional data data_oc : (S x T) array_like Optimally combined time series data combmode : {'t2s', 'paid'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'paid' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array t2s : (S,) array_like Map of voxel-wise T2* estimates. t2sG : (S,) array_like Map of voxel-wise T2* estimates. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk tes : :obj:`list` List of echo times associated with `data_cat`, in milliseconds algorithm : {'mle', 'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic'}, optional Method with which to select components in TEDPCA. Default is 'mdl'. PCA decomposition with the mdl, kic and aic options are based on a Moving Average (stationary Gaussian) process and are ordered from most to least aggresive. See (Li et al., 2007). source_tes : :obj:`int` or :obj:`list` of :obj:`int`, optional Which echos to use in PCA. Values -1 and 0 are special, where a value of -1 will indicate using the optimal combination of the echos and 0 will indicate using all the echos. A list can be provided to indicate a subset of echos. Default: -1 kdaw : :obj:`float`, optional Dimensionality augmentation weight for Kappa calculations. Must be a non-negative float, or -1 (a special value). Default is 10. rdaw : :obj:`float`, optional Dimensionality augmentation weight for Rho calculations. Must be a non-negative float, or -1 (a special value). Default is 1. out_dir : :obj:`str`, optional Output directory. verbose : :obj:`bool`, optional Whether to output files from fitmodels_direct or not. Default: False low_mem : :obj:`bool`, optional Whether to use incremental PCA (for low-memory systems) or not. Default: False Returns ------- kept_data : (S x T) :obj:`numpy.ndarray` Dimensionally reduced optimally combined functional data n_components : :obj:`int` Number of components retained from PCA decomposition Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: ====================== ================================================= Filename Content ====================== ================================================= pca_decomposition.json PCA component table. pca_mixing.tsv PCA mixing matrix. pca_components.nii.gz Component weight maps. ====================== ================================================= """ if low_mem and algorithm == 'mle': LGR.warning('Low memory option is not compatible with MLE ' 'dimensionality estimation. Switching to Kundu decision ' 'tree.') algorithm = 'kundu' if algorithm == 'mle': alg_str = "using MLE dimensionality estimation (Minka, 2001)" RefLGR.info("Minka, T. P. (2001). Automatic choice of dimensionality " "for PCA. In Advances in neural information processing " "systems (pp. 598-604).") elif algorithm == 'kundu': alg_str = ("followed by the Kundu component selection decision " "tree (Kundu et al., 2013)") RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192.") elif algorithm == 'kundu-stabilize': alg_str = ("followed by the 'stabilized' Kundu component " "selection decision tree (Kundu et al., 2013)") RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192.") else: alg_str = ("based on the PCA component estimation with a Moving Average" "(stationary Gaussian) process (Li et al., 2007)") RefLGR.info("Li, Y.O., Adalı, T. and Calhoun, V.D., (2007). " "Estimating the number of independent components for " "functional magnetic resonance imaging data. " "Human brain mapping, 28(11), pp.1251-1266.") if source_tes == -1: dat_str = "the optimally combined data" elif source_tes == 0: dat_str = "the z-concatenated multi-echo data" else: dat_str = "a z-concatenated subset of echoes from the input data" RepLGR.info("Principal component analysis {0} was applied to " "{1} for dimensionality reduction.".format(alg_str, dat_str)) n_samp, n_echos, n_vols = data_cat.shape source_tes = np.array([int(ee) for ee in str(source_tes).split(',')]) if len(source_tes) == 1 and source_tes[0] == -1: LGR.info('Computing PCA of optimally combined multi-echo data') data = data_oc[mask, :][:, np.newaxis, :] elif len(source_tes) == 1 and source_tes[0] == 0: LGR.info('Computing PCA of spatially concatenated multi-echo data') data = data_cat[mask, ...] else: LGR.info('Computing PCA of echo #{0}'.format(','.join([str(ee) for ee in source_tes]))) data = np.stack([data_cat[mask, ee, :] for ee in source_tes - 1], axis=1) eim = np.squeeze(_utils.eimask(data)) data = np.squeeze(data[eim]) data_z = ((data.T - data.T.mean(axis=0)) / data.T.std(axis=0)).T # var normalize ts data_z = (data_z - data_z.mean()) / data_z.std() # var normalize everything if algorithm in ['mdl', 'aic', 'kic']: data_img = io.new_nii_like( ref_img, utils.unmask(utils.unmask(data, eim), mask)) mask_img = io.new_nii_like(ref_img, utils.unmask(eim, mask).astype(int)) voxel_comp_weights, varex, varex_norm, comp_ts = ma_pca.ma_pca( data_img, mask_img, algorithm) elif algorithm == 'mle': voxel_comp_weights, varex, varex_norm, comp_ts = run_mlepca(data_z) elif low_mem: voxel_comp_weights, varex, comp_ts = low_mem_pca(data_z) varex_norm = varex / varex.sum() else: ppca = PCA(copy=False, n_components=(n_vols - 1)) ppca.fit(data_z) comp_ts = ppca.components_.T varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(data_z, comp_ts), np.diag(1. / varex)) varex_norm = varex / varex.sum() # Compute Kappa and Rho for PCA comps eimum = np.atleast_2d(eim) eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1]) eimum = eimum.prod(axis=1) o = np.zeros((mask.shape[0], *eimum.shape[1:])) o[mask, ...] = eimum eimum = np.squeeze(o).astype(bool) # Normalize each component's time series vTmixN = stats.zscore(comp_ts, axis=0) comptable, _, _, _ = metrics.dependence_metrics(data_cat, data_oc, comp_ts, t2s, tes, ref_img, reindex=False, mmixN=vTmixN, algorithm=None, label='mepca_', out_dir=out_dir, verbose=verbose) # varex_norm from PCA overrides varex_norm from dependence_metrics, # but we retain the original comptable['estimated normalized variance explained'] = \ comptable['normalized variance explained'] comptable['normalized variance explained'] = varex_norm # write component maps to 4D image comp_ts_z = stats.zscore(comp_ts, axis=0) comp_maps = utils.unmask(computefeats2(data_oc, comp_ts_z, mask), mask) io.filewrite(comp_maps, op.join(out_dir, 'pca_components.nii.gz'), ref_img) # Select components using decision tree if algorithm == 'kundu': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=False) elif algorithm == 'kundu-stabilize': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=True) elif algorithm == 'mle': LGR.info('Selected {0} components with MLE dimensionality ' 'detection'.format(comptable.shape[0])) comptable['classification'] = 'accepted' comptable['rationale'] = '' elif algorithm in ['mdl', 'aic', 'kic']: LGR.info('Selected {0} components with {1} dimensionality ' 'detection'.format(comptable.shape[0], algorithm)) comptable['classification'] = 'accepted' comptable['rationale'] = '' # Save decomposition comp_names = [io.add_decomp_prefix(comp, prefix='pca', max_value=comptable.index.max()) for comp in comptable.index.values] mixing_df = pd.DataFrame(data=comp_ts, columns=comp_names) mixing_df.to_csv(op.join(out_dir, 'pca_mixing.tsv'), sep='\t', index=False) data_type = 'optimally combined data' if source_tes == -1 else 'z-concatenated data' comptable['Description'] = 'PCA fit to {0}.'.format(data_type) mmix_dict = {} mmix_dict['Method'] = ('Principal components analysis implemented by ' 'sklearn. Components are sorted by variance ' 'explained in descending order. ' 'Component signs are flipped to best match the ' 'data.') io.save_comptable(comptable, op.join(out_dir, 'pca_decomposition.json'), label='pca', metadata=mmix_dict) acc = comptable[comptable.classification == 'accepted'].index.values n_components = acc.size voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] * varex[None, acc]) kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T) kept_data = stats.zscore(kept_data, axis=1) # variance normalize time series kept_data = stats.zscore(kept_data, axis=None) # variance normalize everything return kept_data, n_components
def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG, ref_img, tes, algorithm='mle', source_tes=-1, kdaw=10., rdaw=1., out_dir='.', verbose=False, low_mem=False): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- data_cat : (S x E x T) array_like Input functional data data_oc : (S x T) array_like Optimally combined time series data combmode : {'t2s', 'paid'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'paid' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array t2s : (S,) array_like Map of voxel-wise T2* estimates. t2sG : (S,) array_like Map of voxel-wise T2* estimates. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk tes : :obj:`list` List of echo times associated with `data_cat`, in milliseconds algorithm : {'mle', 'kundu', 'kundu-stabilize'}, optional Method with which to select components in TEDPCA. Default is 'mle'. source_tes : :obj:`int` or :obj:`list` of :obj:`int`, optional Which echos to use in PCA. Values -1 and 0 are special, where a value of -1 will indicate using the optimal combination of the echos and 0 will indicate using all the echos. A list can be provided to indicate a subset of echos. Default: -1 kdaw : :obj:`float`, optional Dimensionality augmentation weight for Kappa calculations. Must be a non-negative float, or -1 (a special value). Default is 10. rdaw : :obj:`float`, optional Dimensionality augmentation weight for Rho calculations. Must be a non-negative float, or -1 (a special value). Default is 1. out_dir : :obj:`str`, optional Output directory. verbose : :obj:`bool`, optional Whether to output files from fitmodels_direct or not. Default: False low_mem : :obj:`bool`, optional Whether to use incremental PCA (for low-memory systems) or not. Default: False Returns ------- kept_data : (S x T) :obj:`numpy.ndarray` Dimensionally reduced optimally combined functional data n_components : :obj:`int` Number of components retained from PCA decomposition Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: ====================== ================================================= Filename Content ====================== ================================================= pcastate.pkl Values from PCA results. comp_table_pca.txt PCA component table. mepca_mix.1D PCA mixing matrix. ====================== ================================================= """ if low_mem and algorithm == 'mle': LGR.warning('Low memory option is not compatible with MLE ' 'dimensionality estimation. Switching to Kundu decision ' 'tree.') algorithm = 'kundu' n_samp, n_echos, n_vols = data_cat.shape source_tes = np.array([int(ee) for ee in str(source_tes).split(',')]) if len(source_tes) == 1 and source_tes[0] == -1: LGR.info('Computing PCA of optimally combined multi-echo data') data = data_oc[mask, :][:, np.newaxis, :] elif len(source_tes) == 1 and source_tes[0] == 0: LGR.info('Computing PCA of spatially concatenated multi-echo data') data = data_cat[mask, ...] else: LGR.info('Computing PCA of echo #{0}'.format(','.join([str(ee) for ee in source_tes]))) data = np.stack([data_cat[mask, ee, :] for ee in source_tes - 1], axis=1) eim = np.squeeze(eimask(data)) data = np.squeeze(data[eim]) data_z = ((data.T - data.T.mean(axis=0)) / data.T.std(axis=0)).T # var normalize ts data_z = (data_z - data_z.mean()) / data_z.std() # var normalize everything if algorithm == 'mle': voxel_comp_weights, varex, varex_norm, comp_ts = run_mlepca(data_z) elif low_mem: voxel_comp_weights, varex, comp_ts = low_mem_pca(data_z) varex_norm = varex / varex.sum() else: ppca = PCA(copy=False, n_components=(n_vols - 1)) ppca.fit(data_z) comp_ts = ppca.components_.T varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(data_z, comp_ts), np.diag(1. / varex)) varex_norm = varex / varex.sum() # Compute Kappa and Rho for PCA comps eimum = np.atleast_2d(eim) eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1]) eimum = eimum.prod(axis=1) o = np.zeros((mask.shape[0], *eimum.shape[1:])) o[mask, ...] = eimum eimum = np.squeeze(o).astype(bool) # Normalize each component's time series vTmixN = stats.zscore(comp_ts, axis=0) comptable, _, _, _ = metrics.dependence_metrics( data_cat, data_oc, comp_ts, t2s, tes, ref_img, reindex=False, mmixN=vTmixN, algorithm=None, label='mepca_', out_dir=out_dir, verbose=verbose) # varex_norm from PCA overrides varex_norm from dependence_metrics, # but we retain the original comptable['estimated normalized variance explained'] = \ comptable['normalized variance explained'] comptable['normalized variance explained'] = varex_norm np.savetxt('mepca_mix.1D', comp_ts) # write component maps to 4D image comp_maps = np.zeros((data_oc.shape[0], comp_ts.shape[1])) for i_comp in range(comp_ts.shape[1]): temp_comp_ts = comp_ts[:, i_comp][:, None] comp_map = utils.unmask(computefeats2(data_oc, temp_comp_ts, mask), mask) comp_maps[:, i_comp] = np.squeeze(comp_map) io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img) # Select components using decision tree if algorithm == 'kundu': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=False) elif algorithm == 'kundu-stabilize': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=True) elif algorithm == 'mle': LGR.info('Selected {0} components with MLE dimensionality ' 'detection'.format(comptable.shape[0])) comptable['classification'] = 'accepted' comptable['rationale'] = '' comptable.to_csv('comp_table_pca.txt', sep='\t', index=True, index_label='component', float_format='%.6f') acc = comptable[comptable.classification == 'accepted'].index.values n_components = acc.size voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] * varex[None, acc]) kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T) kept_data = stats.zscore(kept_data, axis=1) # variance normalize time series kept_data = stats.zscore(kept_data, axis=None) # variance normalize everything return kept_data, n_components
def tedpca( data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, io_generator, tes, algorithm="aic", kdaw=10.0, rdaw=1.0, verbose=False, low_mem=False, ): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- data_cat : (S x E x T) array_like Input functional data data_oc : (S x T) array_like Optimally combined time series data combmode : {'t2s', 'paid'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'paid' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array adaptive_mask : (S,) array_like Array where each value indicates the number of echoes with good signal for that voxel. This mask may be thresholded; for example, with values less than 3 set to 0. For more information on thresholding, see `make_adaptive_mask`. t2sG : (S,) array_like Map of voxel-wise T2* estimates. io_generator : :obj:`tedana.io.OutputGenerator` The output generation object for this workflow tes : :obj:`list` List of echo times associated with `data_cat`, in milliseconds algorithm : {'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic', float}, optional Method with which to select components in TEDPCA. PCA decomposition with the mdl, kic and aic options are based on a Moving Average (stationary Gaussian) process and are ordered from most to least aggressive (see Li et al., 2007). If a float is provided, then it is assumed to represent percentage of variance explained (0-1) to retain from PCA. If an int is provided, then it is assumed to be the number of components to select Default is 'aic'. kdaw : :obj:`float`, optional Dimensionality augmentation weight for Kappa calculations. Must be a non-negative float, or -1 (a special value). Default is 10. rdaw : :obj:`float`, optional Dimensionality augmentation weight for Rho calculations. Must be a non-negative float, or -1 (a special value). Default is 1. verbose : :obj:`bool`, optional Whether to output files from fitmodels_direct or not. Default: False low_mem : :obj:`bool`, optional Whether to use incremental PCA (for low-memory systems) or not. This is only compatible with the "kundu" or "kundu-stabilize" algorithms. Default: False Returns ------- kept_data : (S x T) :obj:`numpy.ndarray` Dimensionally reduced optimally combined functional data n_components : :obj:`int` Number of components retained from PCA decomposition Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: =========================== ============================================= Default Filename Content =========================== ============================================= desc-PCA_metrics.tsv PCA component table desc-PCA_metrics.json Metadata sidecar file describing the component table desc-PCA_mixing.tsv PCA mixing matrix desc-PCA_components.nii.gz Component weight maps desc-PCA_decomposition.json Metadata sidecar file describing the PCA decomposition =========================== ============================================= See Also -------- :func:`tedana.utils.make_adaptive_mask` : The function used to create the ``adaptive_mask`` parameter. :py:mod:`tedana.constants` : The module describing the filenames for various naming conventions """ if algorithm == "kundu": alg_str = "followed by the Kundu component selection decision tree (Kundu et al., 2013)" RefLGR.info( "Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192." ) elif algorithm == "kundu-stabilize": alg_str = ( "followed by the 'stabilized' Kundu component " "selection decision tree (Kundu et al., 2013)" ) RefLGR.info( "Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192." ) elif isinstance(algorithm, Number): if isinstance(algorithm, float): alg_str = ( "in which the number of components was determined based on a " "variance explained threshold" ) else: alg_str = "in which the number of components is pre-defined" else: alg_str = ( "based on the PCA component estimation with a Moving Average" "(stationary Gaussian) process (Li et al., 2007)" ) RefLGR.info( "Li, Y.O., Adalı, T. and Calhoun, V.D., (2007). " "Estimating the number of independent components for " "functional magnetic resonance imaging data. " "Human brain mapping, 28(11), pp.1251-1266." ) RepLGR.info( "Principal component analysis {0} was applied to " "the optimally combined data for dimensionality " "reduction.".format(alg_str) ) n_samp, n_echos, n_vols = data_cat.shape LGR.info( f"Computing PCA of optimally combined multi-echo data with selection criteria: {algorithm}" ) data = data_oc[mask, :] data_z = ((data.T - data.T.mean(axis=0)) / data.T.std(axis=0)).T # var normalize ts data_z = (data_z - data_z.mean()) / data_z.std() # var normalize everything if algorithm in ["mdl", "aic", "kic"]: data_img = io.new_nii_like(io_generator.reference_img, utils.unmask(data, mask)) mask_img = io.new_nii_like(io_generator.reference_img, mask.astype(int)) ma_pca = MovingAveragePCA(criterion=algorithm, normalize=True) _ = ma_pca.fit_transform(data_img, mask_img) # Extract results from maPCA voxel_comp_weights = ma_pca.u_ varex = ma_pca.explained_variance_ varex_norm = ma_pca.explained_variance_ratio_ comp_ts = ma_pca.components_.T aic = ma_pca.aic_ kic = ma_pca.kic_ mdl = ma_pca.mdl_ varex_90 = ma_pca.varexp_90_ varex_95 = ma_pca.varexp_95_ all_comps = ma_pca.all_ # Extract number of components and variance explained for logging and plotting n_aic = aic["n_components"] aic_varexp = np.round(aic["explained_variance_total"], 3) n_kic = kic["n_components"] kic_varexp = np.round(kic["explained_variance_total"], 3) n_mdl = mdl["n_components"] mdl_varexp = np.round(mdl["explained_variance_total"], 3) n_varex_90 = varex_90["n_components"] varex_90_varexp = np.round(varex_90["explained_variance_total"], 3) n_varex_95 = varex_95["n_components"] varex_95_varexp = np.round(varex_95["explained_variance_total"], 3) all_varex = np.round(all_comps["explained_variance_total"], 3) # Print out the results LGR.info("Optimal number of components based on different criteria:") LGR.info( f"AIC: {n_aic} | KIC: {n_kic} | MDL: {n_mdl} | 90% varexp: {n_varex_90} " f"| 95% varexp: {n_varex_95}" ) LGR.info("Explained variance based on different criteria:") LGR.info( f"AIC: {aic_varexp}% | KIC: {kic_varexp}% | MDL: {mdl_varexp}% | " f"90% varexp: {varex_90_varexp}% | 95% varexp: {varex_95_varexp}%" ) pca_optimization_curves = np.array([aic["value"], kic["value"], mdl["value"]]) pca_criteria_components = np.array( [ n_aic, n_kic, n_mdl, n_varex_90, n_varex_95, ] ) # Plot maPCA optimization curves LGR.info("Plotting maPCA optimization curves") plot_pca_results(pca_optimization_curves, pca_criteria_components, all_varex, io_generator) # Save maPCA results into a dictionary mapca_results = { "aic": { "n_components": n_aic, "explained_variance_total": aic_varexp, "curve": aic["value"], }, "kic": { "n_components": n_kic, "explained_variance_total": kic_varexp, "curve": kic["value"], }, "mdl": { "n_components": n_mdl, "explained_variance_total": mdl_varexp, "curve": mdl["value"], }, "varex_90": { "n_components": n_varex_90, "explained_variance_total": varex_90_varexp, }, "varex_95": { "n_components": n_varex_95, "explained_variance_total": varex_95_varexp, }, } # Save dictionary io_generator.save_file(mapca_results, "PCA cross component metrics json") elif isinstance(algorithm, Number): ppca = PCA(copy=False, n_components=algorithm, svd_solver="full") ppca.fit(data_z) comp_ts = ppca.components_.T varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(data_z, comp_ts), np.diag(1.0 / varex)) varex_norm = ppca.explained_variance_ratio_ elif low_mem: voxel_comp_weights, varex, varex_norm, comp_ts = low_mem_pca(data_z) else: ppca = PCA(copy=False, n_components=(n_vols - 1)) ppca.fit(data_z) comp_ts = ppca.components_.T varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(data_z, comp_ts), np.diag(1.0 / varex)) varex_norm = ppca.explained_variance_ratio_ # Compute Kappa and Rho for PCA comps required_metrics = [ "kappa", "rho", "countnoise", "countsigFT2", "countsigFS0", "dice_FT2", "dice_FS0", "signal-noise_t", "variance explained", "normalized variance explained", "d_table_score", ] comptable = metrics.collect.generate_metrics( data_cat, data_oc, comp_ts, adaptive_mask, tes, io_generator, "PCA", metrics=required_metrics, ) # varex_norm from PCA overrides varex_norm from dependence_metrics, # but we retain the original comptable["estimated normalized variance explained"] = comptable[ "normalized variance explained" ] comptable["normalized variance explained"] = varex_norm # write component maps to 4D image comp_maps = utils.unmask(computefeats2(data_oc, comp_ts, mask), mask) io_generator.save_file(comp_maps, "z-scored PCA components img") # Select components using decision tree if algorithm == "kundu": comptable, metric_metadata = kundu_tedpca( comptable, n_echos, kdaw, rdaw, stabilize=False, ) elif algorithm == "kundu-stabilize": comptable, metric_metadata = kundu_tedpca( comptable, n_echos, kdaw, rdaw, stabilize=True, ) else: if isinstance(algorithm, float): alg_str = "variance explained-based" elif isinstance(algorithm, int): alg_str = "a fixed number of components and no" else: alg_str = algorithm LGR.info( f"Selected {comptable.shape[0]} components with {round(100*varex_norm.sum(),2)}% " f"normalized variance explained using {alg_str} dimensionality detection" ) comptable["classification"] = "accepted" comptable["rationale"] = "" # Save decomposition files comp_names = [ io.add_decomp_prefix(comp, prefix="pca", max_value=comptable.index.max()) for comp in comptable.index.values ] mixing_df = pd.DataFrame(data=comp_ts, columns=comp_names) io_generator.save_file(mixing_df, "PCA mixing tsv") # Save component table and associated json io_generator.save_file(comptable, "PCA metrics tsv") metric_metadata = metrics.collect.get_metadata(comptable) io_generator.save_file(metric_metadata, "PCA metrics json") decomp_metadata = { "Method": ( "Principal components analysis implemented by sklearn. " "Components are sorted by variance explained in descending order. " ), } for comp_name in comp_names: decomp_metadata[comp_name] = { "Description": "PCA fit to optimally combined data.", "Method": "tedana", } io_generator.save_file(decomp_metadata, "PCA decomposition json") acc = comptable[comptable.classification == "accepted"].index.values n_components = acc.size voxel_kept_comp_weighted = voxel_comp_weights[:, acc] * varex[None, acc] kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T) kept_data = stats.zscore(kept_data, axis=1) # variance normalize time series kept_data = stats.zscore(kept_data, axis=None) # variance normalize everything return kept_data, n_components