def threshold_map(maps, mask, ref_img, threshold, csize=None): """Perform cluster-extent thresholding. Parameters ---------- maps : (M x C) array_like Statistical maps to be thresholded. mask : (S) array_like Binary mask. ref_img : img_like Reference image to convert to niimgs with. threshold : :obj:`float` Value threshold to apply to maps. csize : :obj:`int` or :obj:`None`, optional Minimum cluster size. If None, standard thresholding (non-cluster-extent) will be done. Default is None. Returns ------- maps_thresh : (M x C) array_like """ n_voxels, n_components = maps.shape maps_thresh = np.zeros([n_voxels, n_components], bool) if csize is None: csize = np.max([int(n_voxels * 0.0005) + 5, 20]) else: csize = int(csize) for i_comp in range(n_components): # Cluster-extent threshold and binarize F-maps ccimg = io.new_nii_like( ref_img, np.squeeze(utils.unmask(maps[:, i_comp], mask))) maps_thresh[:, i_comp] = utils.threshold_map(ccimg, min_cluster_size=csize, threshold=threshold, mask=mask, binarize=True) return maps_thresh
def dependence_metrics(catd, tsoc, mmix, t2s, tes, ref_img, reindex=False, mmixN=None, algorithm=None, label=None, out_dir='.', verbose=False): """ Fit TE-dependence and -independence models to components. Parameters ---------- catd : (S x E x T) array_like Input data, where `S` is samples, `E` is echos, and `T` is time tsoc : (S x T) array_like Optimally combined data mmix : (T x C) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `catd` t2s : (S [x T]) array_like Limited T2* map or timeseries. tes : list List of echo times associated with `catd`, in milliseconds ref_img : str or img_like Reference image to dictate how outputs are saved to disk reindex : bool, optional Whether to sort components in descending order by Kappa. Default: False mmixN : (T x C) array_like, optional Z-scored mixing matrix. Default: None algorithm : {'kundu_v2', 'kundu_v3', None}, optional Decision tree to be applied to metrics. Determines which maps will be generated and stored in seldict. Default: None label : :obj:`str` or None, optional Prefix to apply to generated files. Default is None. out_dir : :obj:`str`, optional Output directory for generated files. Default is current working directory. verbose : :obj:`bool`, optional Whether or not to generate additional files. Default is False. Returns ------- comptable : (C x X) :obj:`pandas.DataFrame` Component metric table. One row for each component, with a column for each metric. The index is the component number. seldict : :obj:`dict` or None Dictionary containing component-specific metric maps to be used for component selection. If `algorithm` is None, then seldict will be None as well. betas : :obj:`numpy.ndarray` mmix_new : :obj:`numpy.ndarray` """ # Use t2s as mask mask = t2s != 0 if not (catd.shape[0] == t2s.shape[0] == mask.shape[0] == tsoc.shape[0]): raise ValueError('First dimensions (number of samples) of catd ({0}), ' 'tsoc ({1}), and t2s ({2}) do not ' 'match'.format(catd.shape[0], tsoc.shape[0], t2s.shape[0])) elif catd.shape[1] != len(tes): raise ValueError('Second dimension of catd ({0}) does not match ' 'number of echoes provided (tes; ' '{1})'.format(catd.shape[1], len(tes))) elif not (catd.shape[2] == tsoc.shape[1] == mmix.shape[0]): raise ValueError('Number of volumes in catd ({0}), ' 'tsoc ({1}), and mmix ({2}) do not ' 'match.'.format(catd.shape[2], tsoc.shape[1], mmix.shape[0])) elif t2s.ndim == 2: if catd.shape[2] != t2s.shape[1]: raise ValueError('Number of volumes in catd ' '({0}) does not match number of volumes in ' 't2s ({1})'.format(catd.shape[2], t2s.shape[1])) # mask everything we can tsoc = tsoc[mask, :] catd = catd[mask, ...] t2s = t2s[mask] # demean optimal combination tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True) # compute un-normalized weight dataset (features) if mmixN is None: mmixN = mmix WTS = computefeats2(tsoc, mmixN, mask=None, normalize=False) # compute PSC dataset - shouldn't have to refit data tsoc_B = get_coeffs(tsoc_dm, mmix, mask=None) del tsoc_dm tsoc_Babs = np.abs(tsoc_B) PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100 # compute skews to determine signs based on unnormalized weights, # correct mmix & WTS signs based on spatial distribution tails signs = stats.skew(WTS, axis=0) signs /= np.abs(signs) mmix = mmix.copy() mmix *= signs WTS *= signs PSC *= signs totvar = (tsoc_B**2).sum() totvar_norm = (WTS**2).sum() # compute Betas and means over TEs for TE-dependence analysis betas = get_coeffs(utils.unmask(catd, mask), mmix, np.repeat(mask[:, np.newaxis], len(tes), axis=1)) betas = betas[mask, ...] n_voxels, n_echos, n_components = betas.shape mu = catd.mean(axis=-1, dtype=float) tes = np.reshape(tes, (n_echos, 1)) fmin, _, _ = getfbounds(n_echos) # set up Xmats X1 = mu.T # Model 1 X2 = np.tile(tes, (1, n_voxels)) * mu.T / t2s.T # Model 2 # tables for component selection kappas = np.zeros([n_components]) rhos = np.zeros([n_components]) varex = np.zeros([n_components]) varex_norm = np.zeros([n_components]) Z_maps = np.zeros([n_voxels, n_components]) F_R2_maps = np.zeros([n_voxels, n_components]) F_S0_maps = np.zeros([n_voxels, n_components]) pred_R2_maps = np.zeros([n_voxels, n_echos, n_components]) pred_S0_maps = np.zeros([n_voxels, n_echos, n_components]) LGR.info('Fitting TE- and S0-dependent models to components') for i_comp in range(n_components): # size of comp_betas is (n_echoes, n_samples) comp_betas = np.atleast_3d(betas)[:, :, i_comp].T alpha = (np.abs(comp_betas)**2).sum(axis=0) varex[i_comp] = (tsoc_B[:, i_comp]**2).sum() / totvar * 100. varex_norm[i_comp] = (WTS[:, i_comp]**2).sum() / totvar_norm # S0 Model # (S,) model coefficient map coeffs_S0 = (comp_betas * X1).sum(axis=0) / (X1**2).sum(axis=0) pred_S0 = X1 * np.tile(coeffs_S0, (n_echos, 1)) pred_S0_maps[:, :, i_comp] = pred_S0.T SSE_S0 = (comp_betas - pred_S0)**2 SSE_S0 = SSE_S0.sum(axis=0) # (S,) prediction error map F_S0 = (alpha - SSE_S0) * (n_echos - 1) / (SSE_S0) F_S0_maps[:, i_comp] = F_S0 # R2 Model coeffs_R2 = (comp_betas * X2).sum(axis=0) / (X2**2).sum(axis=0) pred_R2 = X2 * np.tile(coeffs_R2, (n_echos, 1)) pred_R2_maps[:, :, i_comp] = pred_R2.T SSE_R2 = (comp_betas - pred_R2)**2 SSE_R2 = SSE_R2.sum(axis=0) F_R2 = (alpha - SSE_R2) * (n_echos - 1) / (SSE_R2) F_R2_maps[:, i_comp] = F_R2 # compute weights as Z-values wtsZ = (WTS[:, i_comp] - WTS[:, i_comp].mean()) / WTS[:, i_comp].std() wtsZ[np.abs(wtsZ) > Z_MAX] = ( Z_MAX * (np.abs(wtsZ) / wtsZ))[np.abs(wtsZ) > Z_MAX] Z_maps[:, i_comp] = wtsZ # compute Kappa and Rho F_S0[F_S0 > F_MAX] = F_MAX F_R2[F_R2 > F_MAX] = F_MAX norm_weights = np.abs(wtsZ**2.) kappas[i_comp] = np.average(F_R2, weights=norm_weights) rhos[i_comp] = np.average(F_S0, weights=norm_weights) del SSE_S0, SSE_R2, wtsZ, F_S0, F_R2, norm_weights, comp_betas if algorithm != 'kundu_v3': del WTS, PSC, tsoc_B # tabulate component values comptable = np.vstack([kappas, rhos, varex, varex_norm]).T if reindex: # re-index all components in descending Kappa order sort_idx = comptable[:, 0].argsort()[::-1] comptable = comptable[sort_idx, :] mmix_new = mmix[:, sort_idx] betas = betas[..., sort_idx] pred_R2_maps = pred_R2_maps[:, :, sort_idx] pred_S0_maps = pred_S0_maps[:, :, sort_idx] F_R2_maps = F_R2_maps[:, sort_idx] F_S0_maps = F_S0_maps[:, sort_idx] Z_maps = Z_maps[:, sort_idx] tsoc_Babs = tsoc_Babs[:, sort_idx] if algorithm == 'kundu_v3': WTS = WTS[:, sort_idx] PSC = PSC[:, sort_idx] tsoc_B = tsoc_B[:, sort_idx] else: mmix_new = mmix del mmix if verbose: # Echo-specific weight maps for each of the ICA components. io.filewrite(utils.unmask(betas, mask), op.join(out_dir, '{0}betas_catd.nii'.format(label)), ref_img) # Echo-specific maps of predicted values for R2 and S0 models for each # component. io.filewrite(utils.unmask(pred_R2_maps, mask), op.join(out_dir, '{0}R2_pred.nii'.format(label)), ref_img) io.filewrite(utils.unmask(pred_S0_maps, mask), op.join(out_dir, '{0}S0_pred.nii'.format(label)), ref_img) # Weight maps used to average metrics across voxels io.filewrite(utils.unmask(Z_maps**2., mask), op.join(out_dir, '{0}metric_weights.nii'.format(label)), ref_img) del pred_R2_maps, pred_S0_maps comptable = pd.DataFrame(comptable, columns=[ 'kappa', 'rho', 'variance explained', 'normalized variance explained' ]) comptable.index.name = 'component' # Generate clustering criteria for component selection if algorithm in ['kundu_v2', 'kundu_v3']: Z_clmaps = np.zeros([n_voxels, n_components], bool) F_R2_clmaps = np.zeros([n_voxels, n_components], bool) F_S0_clmaps = np.zeros([n_voxels, n_components], bool) Br_R2_clmaps = np.zeros([n_voxels, n_components], bool) Br_S0_clmaps = np.zeros([n_voxels, n_components], bool) LGR.info('Performing spatial clustering of components') csize = np.max([int(n_voxels * 0.0005) + 5, 20]) LGR.debug('Using minimum cluster size: {}'.format(csize)) for i_comp in range(n_components): # Cluster-extent threshold and binarize F-maps ccimg = io.new_nii_like( ref_img, np.squeeze(utils.unmask(F_R2_maps[:, i_comp], mask))) F_R2_clmaps[:, i_comp] = utils.threshold_map(ccimg, min_cluster_size=csize, threshold=fmin, mask=mask, binarize=True) countsigFR2 = F_R2_clmaps[:, i_comp].sum() ccimg = io.new_nii_like( ref_img, np.squeeze(utils.unmask(F_S0_maps[:, i_comp], mask))) F_S0_clmaps[:, i_comp] = utils.threshold_map(ccimg, min_cluster_size=csize, threshold=fmin, mask=mask, binarize=True) countsigFS0 = F_S0_clmaps[:, i_comp].sum() # Cluster-extent threshold and binarize Z-maps with CDT of p < 0.05 ccimg = io.new_nii_like( ref_img, np.squeeze(utils.unmask(Z_maps[:, i_comp], mask))) Z_clmaps[:, i_comp] = utils.threshold_map(ccimg, min_cluster_size=csize, threshold=1.95, mask=mask, binarize=True) # Cluster-extent threshold and binarize ranked signal-change map ccimg = io.new_nii_like( ref_img, utils.unmask(stats.rankdata(tsoc_Babs[:, i_comp]), mask)) Br_R2_clmaps[:, i_comp] = utils.threshold_map( ccimg, min_cluster_size=csize, threshold=(max(tsoc_Babs.shape) - countsigFR2), mask=mask, binarize=True) Br_S0_clmaps[:, i_comp] = utils.threshold_map( ccimg, min_cluster_size=csize, threshold=(max(tsoc_Babs.shape) - countsigFS0), mask=mask, binarize=True) del ccimg, tsoc_Babs if algorithm == 'kundu_v2': # WTS, tsoc_B, PSC, and F_S0_maps are not used by Kundu v2.5 selvars = [ 'Z_maps', 'F_R2_maps', 'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', 'Br_R2_clmaps', 'Br_S0_clmaps' ] elif algorithm == 'kundu_v3': selvars = [ 'WTS', 'tsoc_B', 'PSC', 'Z_maps', 'F_R2_maps', 'F_S0_maps', 'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', 'Br_R2_clmaps', 'Br_S0_clmaps' ] elif algorithm is None: selvars = [] else: raise ValueError( 'Algorithm "{0}" not recognized.'.format(algorithm)) seldict = {} for vv in selvars: seldict[vv] = eval(vv) else: seldict = None return comptable, seldict, betas, mmix_new
def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, ref_img, tes, algorithm='mdl', kdaw=10., rdaw=1., out_dir='.', verbose=False, low_mem=False): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- data_cat : (S x E x T) array_like Input functional data data_oc : (S x T) array_like Optimally combined time series data combmode : {'t2s', 'paid'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'paid' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array adaptive_mask : (S,) array_like Array where each value indicates the number of echoes with good signal for that voxel. This mask may be thresholded; for example, with values less than 3 set to 0. For more information on thresholding, see `make_adaptive_mask`. t2sG : (S,) array_like Map of voxel-wise T2* estimates. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk tes : :obj:`list` List of echo times associated with `data_cat`, in milliseconds algorithm : {'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic', float}, optional Method with which to select components in TEDPCA. PCA decomposition with the mdl, kic and aic options are based on a Moving Average (stationary Gaussian) process and are ordered from most to least aggressive (see Li et al., 2007). If a float is provided, then it is assumed to represent percentage of variance explained (0-1) to retain from PCA. Default is 'mdl'. kdaw : :obj:`float`, optional Dimensionality augmentation weight for Kappa calculations. Must be a non-negative float, or -1 (a special value). Default is 10. rdaw : :obj:`float`, optional Dimensionality augmentation weight for Rho calculations. Must be a non-negative float, or -1 (a special value). Default is 1. out_dir : :obj:`str`, optional Output directory. verbose : :obj:`bool`, optional Whether to output files from fitmodels_direct or not. Default: False low_mem : :obj:`bool`, optional Whether to use incremental PCA (for low-memory systems) or not. This is only compatible with the "kundu" or "kundu-stabilize" algorithms. Default: False Returns ------- kept_data : (S x T) :obj:`numpy.ndarray` Dimensionally reduced optimally combined functional data n_components : :obj:`int` Number of components retained from PCA decomposition Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: ====================== ================================================= Filename Content ====================== ================================================= pca_decomposition.json PCA component table. pca_mixing.tsv PCA mixing matrix. pca_components.nii.gz Component weight maps. ====================== ================================================= See Also -------- :func:`tedana.utils.make_adaptive_mask` : The function used to create the ``adaptive_mask`` parameter. """ if algorithm == 'kundu': alg_str = ("followed by the Kundu component selection decision " "tree (Kundu et al., 2013)") RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192.") elif algorithm == 'kundu-stabilize': alg_str = ("followed by the 'stabilized' Kundu component " "selection decision tree (Kundu et al., 2013)") RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192.") elif isinstance(algorithm, Number): alg_str = ( "in which the number of components was determined based on a " "variance explained threshold") else: alg_str = ( "based on the PCA component estimation with a Moving Average" "(stationary Gaussian) process (Li et al., 2007)") RefLGR.info("Li, Y.O., Adalı, T. and Calhoun, V.D., (2007). " "Estimating the number of independent components for " "functional magnetic resonance imaging data. " "Human brain mapping, 28(11), pp.1251-1266.") RepLGR.info("Principal component analysis {0} was applied to " "the optimally combined data for dimensionality " "reduction.".format(alg_str)) n_samp, n_echos, n_vols = data_cat.shape LGR.info('Computing PCA of optimally combined multi-echo data') data = data_oc[mask, :] data_z = ((data.T - data.T.mean(axis=0)) / data.T.std(axis=0)).T # var normalize ts data_z = (data_z - data_z.mean()) / data_z.std() # var normalize everything if algorithm in ['mdl', 'aic', 'kic']: data_img = io.new_nii_like(ref_img, utils.unmask(data, mask)) mask_img = io.new_nii_like(ref_img, mask.astype(int)) voxel_comp_weights, varex, varex_norm, comp_ts = ma_pca.ma_pca( data_img, mask_img, algorithm) elif isinstance(algorithm, Number): ppca = PCA(copy=False, n_components=algorithm, svd_solver="full") ppca.fit(data_z) comp_ts = ppca.components_.T varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(data_z, comp_ts), np.diag(1. / varex)) varex_norm = varex / varex.sum() elif low_mem: voxel_comp_weights, varex, comp_ts = low_mem_pca(data_z) varex_norm = varex / varex.sum() else: ppca = PCA(copy=False, n_components=(n_vols - 1)) ppca.fit(data_z) comp_ts = ppca.components_.T varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(data_z, comp_ts), np.diag(1. / varex)) varex_norm = varex / varex.sum() # Compute Kappa and Rho for PCA comps # Normalize each component's time series vTmixN = stats.zscore(comp_ts, axis=0) comptable, _, _, _ = metrics.dependence_metrics(data_cat, data_oc, comp_ts, adaptive_mask, tes, ref_img, reindex=False, mmixN=vTmixN, algorithm=None, label='mepca_', out_dir=out_dir, verbose=verbose) # varex_norm from PCA overrides varex_norm from dependence_metrics, # but we retain the original comptable['estimated normalized variance explained'] = \ comptable['normalized variance explained'] comptable['normalized variance explained'] = varex_norm # write component maps to 4D image comp_ts_z = stats.zscore(comp_ts, axis=0) comp_maps = utils.unmask(computefeats2(data_oc, comp_ts_z, mask), mask) io.filewrite(comp_maps, op.join(out_dir, 'pca_components.nii.gz'), ref_img) # Select components using decision tree if algorithm == 'kundu': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=False) elif algorithm == 'kundu-stabilize': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=True) else: alg_str = "variance explained-based" if isinstance( algorithm, Number) else algorithm LGR.info('Selected {0} components with {1} dimensionality ' 'detection'.format(comptable.shape[0], alg_str)) comptable['classification'] = 'accepted' comptable['rationale'] = '' # Save decomposition comp_names = [ io.add_decomp_prefix(comp, prefix='pca', max_value=comptable.index.max()) for comp in comptable.index.values ] mixing_df = pd.DataFrame(data=comp_ts, columns=comp_names) mixing_df.to_csv(op.join(out_dir, 'pca_mixing.tsv'), sep='\t', index=False) comptable['Description'] = 'PCA fit to optimally combined data.' mmix_dict = {} mmix_dict['Method'] = ('Principal components analysis implemented by ' 'sklearn. Components are sorted by variance ' 'explained in descending order. ' 'Component signs are flipped to best match the ' 'data.') io.save_comptable(comptable, op.join(out_dir, 'pca_decomposition.json'), label='pca', metadata=mmix_dict) acc = comptable[comptable.classification == 'accepted'].index.values n_components = acc.size voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] * varex[None, acc]) kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T) kept_data = stats.zscore(kept_data, axis=1) # variance normalize time series kept_data = stats.zscore(kept_data, axis=None) # variance normalize everything return kept_data, n_components
def carpet_plot(optcom_ts, denoised_ts, hikts, lowkts, mask, io_generator, gscontrol=None): """Generate a set of carpet plots for the combined and denoised data. Parameters ---------- optcom_ts, denoised_ts, hikts, lowkts : (S x T) array_like Different types of data to plot. mask : (S,) array-like Binary mask used to apply to the data. io_generator : :obj:`tedana.io.OutputGenerator` The output generator for this workflow gscontrol : {None, 'mir', 'gsr'} or :obj:`list`, optional Additional denoising steps applied in the workflow. If any gscontrol methods were applied, then additional carpet plots will be generated for pertinent outputs from those steps. Default is None. """ mask_img = io.new_nii_like(io_generator.reference_img, mask.astype(int)) optcom_img = io.new_nii_like(io_generator.reference_img, optcom_ts) dn_img = io.new_nii_like(io_generator.reference_img, denoised_ts) hik_img = io.new_nii_like(io_generator.reference_img, hikts) lowk_img = io.new_nii_like(io_generator.reference_img, lowkts) # Carpet plots fig, ax = plt.subplots(figsize=(14, 7)) plotting.plot_carpet( optcom_img, mask_img, figure=fig, axes=ax, title="Optimally Combined Data", ) fig.tight_layout() fig.savefig( os.path.join(io_generator.out_dir, "figures", "carpet_optcom.svg")) fig, ax = plt.subplots(figsize=(14, 7)) plotting.plot_carpet( dn_img, mask_img, figure=fig, axes=ax, title="Denoised Data", ) fig.tight_layout() fig.savefig( os.path.join(io_generator.out_dir, "figures", "carpet_denoised.svg")) fig, ax = plt.subplots(figsize=(14, 7)) plotting.plot_carpet( hik_img, mask_img, figure=fig, axes=ax, title="High-Kappa Data", ) fig.tight_layout() fig.savefig( os.path.join(io_generator.out_dir, "figures", "carpet_accepted.svg")) fig, ax = plt.subplots(figsize=(14, 7)) plotting.plot_carpet( lowk_img, mask_img, figure=fig, axes=ax, title="Low-Kappa Data", ) fig.tight_layout() fig.savefig( os.path.join(io_generator.out_dir, "figures", "carpet_rejected.svg")) if (gscontrol is not None) and ("gsr" in gscontrol): optcom_with_gs_img = io_generator.get_name("has gs combined img") fig, ax = plt.subplots(figsize=(14, 7)) plotting.plot_carpet( optcom_with_gs_img, mask_img, figure=fig, axes=ax, title="Optimally Combined Data (Pre-GSR)", ) fig.tight_layout() fig.savefig( os.path.join(io_generator.out_dir, "figures", "carpet_optcom_nogsr.svg")) if (gscontrol is not None) and ("mir" in gscontrol): mir_denoised_img = io_generator.get_name("mir denoised img") fig, ax = plt.subplots(figsize=(14, 7)) plotting.plot_carpet( mir_denoised_img, mask_img, figure=fig, axes=ax, title="Denoised Data (Post-MIR)", ) fig.tight_layout() fig.savefig( os.path.join(io_generator.out_dir, "figures", "carpet_denoised_mir.svg")) mir_denoised_img = io_generator.get_name( "ICA accepted mir denoised img") fig, ax = plt.subplots(figsize=(14, 7)) plotting.plot_carpet( mir_denoised_img, mask_img, figure=fig, axes=ax, title="High-Kappa Data (Post-MIR)", ) fig.tight_layout() fig.savefig( os.path.join(io_generator.out_dir, "figures", "carpet_accepted_mir.svg"))
def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, tedort=False, gscontrol=None, tedpca='mle', source_tes=-1, combmode='t2s', verbose=False, stabilize=False, out_dir='.', fixed_seed=42, maxit=500, maxrestart=10, debug=False, quiet=False, png=False, png_cmap='coolwarm'): """ Run the "canonical" TE-Dependent ANAlysis workflow. Parameters ---------- data : :obj:`str` or :obj:`list` of :obj:`str` Either a single z-concatenated file (single-entry list or str) or a list of echo-specific files, in ascending order. tes : :obj:`list` List of echo times associated with data in milliseconds. mask : :obj:`str`, optional Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially aligned with `data`. If an explicit mask is not provided, then Nilearn's compute_epi_mask function will be used to derive a mask from the first echo's data. mixm : :obj:`str`, optional File containing mixing matrix. If not provided, ME-PCA and ME-ICA are done. ctab : :obj:`str`, optional File containing component table from which to extract pre-computed classifications. manacc : :obj:`list`, :obj:`str`, or None, optional List of manually accepted components. Can be a list of the components, a comma-separated string with component numbers, or None. Default is None. tedort : :obj:`bool`, optional Orthogonalize rejected components w.r.t. accepted ones prior to denoising. Default is False. gscontrol : {None, 't1c', 'gsr'} or :obj:`list`, optional Perform additional denoising to remove spatially diffuse noise. Default is None. tedpca : {'mle', 'kundu', 'kundu-stabilize'}, optional Method with which to select components in TEDPCA. Default is 'mle'. source_tes : :obj:`int`, optional Source TEs for models. 0 for all, -1 for optimal combination. Default is -1. combmode : {'t2s'}, optional Combination scheme for TEs: 't2s' (Posse 1999, default). verbose : :obj:`bool`, optional Generate intermediate and additional files. Default is False. png : obj:'bool', optional Generate simple plots and figures. Default is false. png_cmap : obj:'str', optional Name of a matplotlib colormap to be used when generating figures. --png must still be used to request figures. Default is 'coolwarm' out_dir : :obj:`str`, optional Output directory. Other Parameters ---------------- fixed_seed : :obj:`int`, optional Value passed to ``mdp.numx_rand.seed()``. Set to a positive integer value for reproducible ICA results; otherwise, set to -1 for varying results across calls. maxit : :obj:`int`, optional Maximum number of iterations for ICA. Default is 500. maxrestart : :obj:`int`, optional Maximum number of attempts for ICA. If ICA fails to converge, the fixed seed will be updated and ICA will be run again. If convergence is achieved before maxrestart attempts, ICA will finish early. Default is 10. debug : :obj:`bool`, optional Whether to run in debugging mode or not. Default is False. quiet : :obj:`bool`, optional If True, suppresses logging/printing of messages. Default is False. Notes ----- This workflow writes out several files. For a complete list of the files generated by this workflow, please visit https://tedana.readthedocs.io/en/latest/outputs.html """ out_dir = op.abspath(out_dir) if not op.isdir(out_dir): os.mkdir(out_dir) if debug and not quiet: # ensure old logs aren't over-written basename = 'tedana_run' extension = 'txt' logname = op.join(out_dir, (basename + '.' + extension)) logex = op.join(out_dir, (basename + '*')) previouslogs = glob.glob(logex) previouslogs.sort(reverse=True) for f in previouslogs: previousparts = op.splitext(f) newname = previousparts[0] + '_old' + previousparts[1] os.rename(f, newname) # set logging format formatter = logging.Formatter( '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s', datefmt='%Y-%m-%dT%H:%M:%S') # set up logging file and open it for writing fh = logging.FileHandler(logname) fh.setFormatter(formatter) logging.basicConfig(level=logging.DEBUG, handlers=[fh, logging.StreamHandler()]) elif quiet: logging.basicConfig(level=logging.WARNING) else: logging.basicConfig(level=logging.INFO) LGR.info('Using output directory: {}'.format(out_dir)) # ensure tes are in appropriate format tes = [float(te) for te in tes] n_echos = len(tes) # Coerce gscontrol to list if not isinstance(gscontrol, list): gscontrol = [gscontrol] # coerce data to samples x echos x time array if isinstance(data, str): data = [data] LGR.info('Loading input data: {}'.format([f for f in data])) catd, ref_img = io.load_data(data, n_echos=n_echos) n_samp, n_echos, n_vols = catd.shape LGR.debug('Resulting data shape: {}'.format(catd.shape)) if mixm is not None and op.isfile(mixm): mixm = op.abspath(mixm) # Allow users to re-run on same folder if mixm != op.join(out_dir, 'meica_mix.1D'): shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D')) shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm))) elif mixm is not None: raise IOError('Argument "mixm" must be an existing file.') if ctab is not None and op.isfile(ctab): ctab = op.abspath(ctab) # Allow users to re-run on same folder if ctab != op.join(out_dir, 'comp_table_ica.txt'): shutil.copyfile(ctab, op.join(out_dir, 'comp_table_ica.txt')) shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab))) elif ctab is not None: raise IOError('Argument "ctab" must be an existing file.') if isinstance(manacc, str): manacc = [int(comp) for comp in manacc.split(',')] if ctab and not mixm: LGR.warning('Argument "ctab" requires argument "mixm".') ctab = None elif ctab and (manacc is None): LGR.warning('Argument "ctab" requires argument "manacc".') ctab = None elif manacc is not None and not mixm: LGR.warning('Argument "manacc" requires argument "mixm".') manacc = None if mask is None: LGR.info('Computing EPI mask from first echo') first_echo_img = io.new_nii_like(ref_img, catd[:, 0, :]) mask = compute_epi_mask(first_echo_img) else: # TODO: add affine check LGR.info('Using user-defined mask') mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True) LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp)) if verbose: io.filewrite(masksum, op.join(out_dir, 'adaptive_mask.nii'), ref_img) os.chdir(out_dir) LGR.info('Computing T2* map') t2s, s0, t2ss, s0s, t2sG, s0G = decay.fit_decay(catd, tes, mask, masksum) # set a hard cap for the T2* map # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5, interpolation_method='lower') LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10)) t2s[t2s > cap_t2s * 10] = cap_t2s io.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img) io.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img) if verbose: io.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img) io.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img) io.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img) io.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img) # optimally combine data data_oc = combine.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode) # regress out global signal unless explicitly not desired if 'gsr' in gscontrol: catd, data_oc = gsc.gscontrol_raw(catd, data_oc, n_echos, ref_img) if mixm is None: # Identify and remove thermal noise from data dd, n_components = decomposition.tedpca(catd, data_oc, combmode, mask, t2s, t2sG, ref_img, tes=tes, algorithm=tedpca, source_tes=source_tes, kdaw=10., rdaw=1., out_dir=out_dir, verbose=verbose) mmix_orig = decomposition.tedica(dd, n_components, fixed_seed, maxit, maxrestart) if verbose: np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig) if source_tes == -1: io.filewrite(utils.unmask(dd, mask), op.join(out_dir, 'ts_OC_whitened.nii'), ref_img) LGR.info('Making second component selection guess from ICA results') # Estimate betas and compute selection metrics for mixing matrix # generated from dimensionally reduced data using full data (i.e., data # with thermal noise) comptable, metric_maps, betas, mmix = model.dependence_metrics( catd, data_oc, mmix_orig, mask, t2s, tes, ref_img, reindex=True, label='meica_', out_dir=out_dir, algorithm='kundu_v2', verbose=verbose) np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix) comptable = model.kundu_metrics(comptable, metric_maps) comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols) else: LGR.info('Using supplied mixing matrix from ICA') mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D')) comptable, metric_maps, betas, mmix = model.dependence_metrics( catd, data_oc, mmix_orig, mask, t2s, tes, ref_img, label='meica_', out_dir=out_dir, algorithm='kundu_v2', verbose=verbose) if ctab is None: comptable = model.kundu_metrics(comptable, metric_maps) comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols) else: comptable = pd.read_csv(ctab, sep='\t', index_col='component') comptable = selection.manual_selection(comptable, acc=manacc) comptable.to_csv(op.join(out_dir, 'comp_table_ica.txt'), sep='\t', index=True, index_label='component', float_format='%.6f') if comptable[comptable.classification == 'accepted'].shape[0] == 0: LGR.warning('No BOLD components detected! Please check data and ' 'results!') mmix_orig = mmix.copy() if tedort: acc_idx = comptable.loc[ ~comptable.classification.str.contains('rejected')].index.values rej_idx = comptable.loc[ comptable.classification.str.contains('rejected')].index.values acc_ts = mmix[:, acc_idx] rej_ts = mmix[:, rej_idx] betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0] pred_rej_ts = np.dot(acc_ts, betas) resid = rej_ts - pred_rej_ts mmix[:, rej_idx] = resid np.savetxt(op.join(out_dir, 'meica_mix_orth.1D'), mmix) io.writeresults(data_oc, mask=mask, comptable=comptable, mmix=mmix, n_vols=n_vols, ref_img=ref_img) if 't1c' in gscontrol: LGR.info('Performing T1c global signal regression to remove spatially ' 'diffuse noise') gsc.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img) if verbose: io.writeresults_echoes(catd, mmix, mask, comptable, ref_img) if png: LGR.info('Making figures folder with static component maps and ' 'timecourse plots.') # make figure folder first if not op.isdir(op.join(out_dir, 'figures')): os.mkdir(op.join(out_dir, 'figures')) viz.write_comp_figs(data_oc, mask=mask, comptable=comptable, mmix=mmix_orig, ref_img=ref_img, out_dir=op.join(out_dir, 'figures'), png_cmap=png_cmap) LGR.info('Making Kappa vs Rho scatter plot') viz.write_kappa_scatter(comptable=comptable, out_dir=op.join(out_dir, 'figures')) LGR.info('Making overall summary figure') viz.write_summary_fig(comptable=comptable, out_dir=op.join(out_dir, 'figures')) LGR.info('Workflow completed') for handler in logging.root.handlers[:]: logging.root.removeHandler(handler)
def test_new_nii_like(): data, ref = me.load_data(fnames, n_echos=len(tes)) nimg = me.new_nii_like(ref, data) assert isinstance(nimg, nib.Nifti1Image) assert nimg.shape == (39, 50, 33, 3, 5)
def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, tedort=False, gscontrol=None, tedpca='mle', source_tes=-1, combmode='t2s', verbose=False, stabilize=False, out_dir='.', fixed_seed=42, maxit=500, maxrestart=10, debug=False, quiet=False, png=False, png_cmap='coolwarm', low_mem=False): """ Run the "canonical" TE-Dependent ANAlysis workflow. Parameters ---------- data : :obj:`str` or :obj:`list` of :obj:`str` Either a single z-concatenated file (single-entry list or str) or a list of echo-specific files, in ascending order. tes : :obj:`list` List of echo times associated with data in milliseconds. mask : :obj:`str`, optional Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially aligned with `data`. If an explicit mask is not provided, then Nilearn's compute_epi_mask function will be used to derive a mask from the first echo's data. mixm : :obj:`str`, optional File containing mixing matrix. If not provided, ME-PCA and ME-ICA are done. ctab : :obj:`str`, optional File containing component table from which to extract pre-computed classifications. manacc : :obj:`list`, :obj:`str`, or None, optional List of manually accepted components. Can be a list of the components, a comma-separated string with component numbers, or None. Default is None. tedort : :obj:`bool`, optional Orthogonalize rejected components w.r.t. accepted ones prior to denoising. Default is False. gscontrol : {None, 't1c', 'gsr'} or :obj:`list`, optional Perform additional denoising to remove spatially diffuse noise. Default is None. tedpca : {'mle', 'kundu', 'kundu-stabilize'}, optional Method with which to select components in TEDPCA. Default is 'mle'. source_tes : :obj:`int`, optional Source TEs for models. 0 for all, -1 for optimal combination. Default is -1. combmode : {'t2s'}, optional Combination scheme for TEs: 't2s' (Posse 1999, default). verbose : :obj:`bool`, optional Generate intermediate and additional files. Default is False. png : obj:'bool', optional Generate simple plots and figures. Default is false. png_cmap : obj:'str', optional Name of a matplotlib colormap to be used when generating figures. --png must still be used to request figures. Default is 'coolwarm' out_dir : :obj:`str`, optional Output directory. Other Parameters ---------------- fixed_seed : :obj:`int`, optional Value passed to ``mdp.numx_rand.seed()``. Set to a positive integer value for reproducible ICA results; otherwise, set to -1 for varying results across calls. maxit : :obj:`int`, optional Maximum number of iterations for ICA. Default is 500. maxrestart : :obj:`int`, optional Maximum number of attempts for ICA. If ICA fails to converge, the fixed seed will be updated and ICA will be run again. If convergence is achieved before maxrestart attempts, ICA will finish early. Default is 10. low_mem : :obj:`bool`, optional Enables low-memory processing, including the use of IncrementalPCA. May increase workflow duration. Default is False. debug : :obj:`bool`, optional Whether to run in debugging mode or not. Default is False. quiet : :obj:`bool`, optional If True, suppresses logging/printing of messages. Default is False. Notes ----- This workflow writes out several files. For a complete list of the files generated by this workflow, please visit https://tedana.readthedocs.io/en/latest/outputs.html """ out_dir = op.abspath(out_dir) if not op.isdir(out_dir): os.mkdir(out_dir) # boilerplate refs = [] basename = 'report' extension = 'txt' repname = op.join(out_dir, (basename + '.' + extension)) repex = op.join(out_dir, (basename + '*')) previousreps = glob(repex) previousreps.sort(reverse=True) for f in previousreps: previousparts = op.splitext(f) newname = previousparts[0] + '_old' + previousparts[1] os.rename(f, newname) if debug and not quiet: # ensure old logs aren't over-written basename = 'tedana_run' extension = 'txt' logname = op.join(out_dir, (basename + '.' + extension)) logex = op.join(out_dir, (basename + '*')) previouslogs = glob(logex) previouslogs.sort(reverse=True) for f in previouslogs: previousparts = op.splitext(f) newname = previousparts[0] + '_old' + previousparts[1] os.rename(f, newname) # set logging format formatter = logging.Formatter( '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s', datefmt='%Y-%m-%dT%H:%M:%S') # set up logging file and open it for writing fh = logging.FileHandler(logname) fh.setFormatter(formatter) logging.basicConfig(level=logging.DEBUG, handlers=[fh, logging.StreamHandler()]) elif quiet: logging.basicConfig(level=logging.WARNING) else: logging.basicConfig(level=logging.INFO) LGR.info('Using output directory: {}'.format(out_dir)) # ensure tes are in appropriate format tes = [float(te) for te in tes] n_echos = len(tes) # Coerce gscontrol to list if not isinstance(gscontrol, list): gscontrol = [gscontrol] # coerce data to samples x echos x time array if isinstance(data, str): data = [data] LGR.info('Loading input data: {}'.format([f for f in data])) catd, ref_img = io.load_data(data, n_echos=n_echos) n_samp, n_echos, n_vols = catd.shape LGR.debug('Resulting data shape: {}'.format(catd.shape)) # check if TR is 0 img_t_r = ref_img.header.get_zooms()[-1] if img_t_r == 0 and png: raise IOError('Dataset has a TR of 0. This indicates incorrect' ' header information. To correct this, we recommend' ' using this snippet:' '\n' 'https://gist.github.com/jbteves/032c87aeb080dd8de8861cb151bff5d6' '\n' 'to correct your TR to the value it should be.') if mixm is not None and op.isfile(mixm): mixm = op.abspath(mixm) # Allow users to re-run on same folder if mixm != op.join(out_dir, 'meica_mix.1D'): shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D')) shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm))) elif mixm is not None: raise IOError('Argument "mixm" must be an existing file.') if ctab is not None and op.isfile(ctab): ctab = op.abspath(ctab) # Allow users to re-run on same folder if ctab != op.join(out_dir, 'comp_table_ica.txt'): shutil.copyfile(ctab, op.join(out_dir, 'comp_table_ica.txt')) shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab))) elif ctab is not None: raise IOError('Argument "ctab" must be an existing file.') if isinstance(manacc, str): manacc = [int(comp) for comp in manacc.split(',')] if ctab and not mixm: LGR.warning('Argument "ctab" requires argument "mixm".') ctab = None elif ctab and (manacc is None): LGR.warning('Argument "ctab" requires argument "manacc".') ctab = None elif manacc is not None and not mixm: LGR.warning('Argument "manacc" requires argument "mixm".') manacc = None bp_str = ("TE-dependence analysis was performed on input data.") if mask is None: LGR.info('Computing EPI mask from first echo') first_echo_img = io.new_nii_like(ref_img, catd[:, 0, :]) mask = compute_epi_mask(first_echo_img) bp_str += (" An initial mask was generated from the first echo using " "nilearn's compute_epi_mask function.") else: # TODO: add affine check LGR.info('Using user-defined mask') bp_str += (" A user-defined mask was applied to the data.") mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True) bp_str += (" An adaptive mask was then generated, in which each voxel's " "value reflects the number of echoes with 'good' data.") LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp)) if verbose: io.filewrite(masksum, op.join(out_dir, 'adaptive_mask.nii'), ref_img) os.chdir(out_dir) LGR.info('Computing T2* map') t2s, s0, t2ss, s0s, t2sG, s0G = decay.fit_decay(catd, tes, mask, masksum) bp_str += (" A monoexponential model was fit to the data at each voxel " "using log-linear regression in order to estimate T2* and S0 " "maps. For each voxel, the value from the adaptive mask was " "used to determine which echoes would be used to estimate T2* " "and S0.") # set a hard cap for the T2* map # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5, interpolation_method='lower') LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10)) t2s[t2s > cap_t2s * 10] = cap_t2s io.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img) io.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img) if verbose: io.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img) io.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img) io.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img) io.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img) # optimally combine data data_oc = combine.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode) if combmode == 't2s': cm_str = "'t2s' (Posse et al., 1999)" refs += ["Posse, S., Wiese, S., Gembris, D., Mathiak, K., Kessler, " "C., Grosse‐Ruyken, M. L., ... & Kiselev, V. G. (1999). " "Enhancement of BOLD‐contrast sensitivity by single‐shot " "multi‐echo functional MR imaging. Magnetic Resonance in " "Medicine: An Official Journal of the International Society " "for Magnetic Resonance in Medicine, 42(1), 87-97."] bp_str += (" Multi-echo data were then optimally combined using the {0} " "combination method.").format(cm_str) # regress out global signal unless explicitly not desired if 'gsr' in gscontrol: catd, data_oc = gsc.gscontrol_raw(catd, data_oc, n_echos, ref_img) bp_str += (" Global signal regression was applied to the multi-echo " "and optimally combined datasets.") if mixm is None: # Identify and remove thermal noise from data dd, n_components = decomposition.tedpca(catd, data_oc, combmode, mask, t2s, t2sG, ref_img, tes=tes, algorithm=tedpca, source_tes=source_tes, kdaw=10., rdaw=1., out_dir=out_dir, verbose=verbose, low_mem=low_mem) if tedpca == 'mle': alg_str = "using MLE dimensionality estimation (Minka, 2001)" refs += ["Minka, T. P. (2001). Automatic choice of dimensionality " "for PCA. In Advances in neural information processing " "systems (pp. 598-604)."] elif tedpca == 'kundu': alg_str = ("followed by the Kundu component selection decision " "tree (Kundu et al., 2013)") refs += ["Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192."] elif tedpca == 'kundu-stabilize': alg_str = ("followed by the 'stabilized' Kundu component " "selection decision tree (Kundu et al., 2013)") refs += ["Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192."] if source_tes == -1: dat_str = "the optimally combined data" elif source_tes == 0: dat_str = "the z-concatenated multi-echo data" else: dat_str = "a z-concatenated subset of echoes from the input data" bp_str += (" Principal component analysis {0} was applied to " "{1} for dimensionality reduction.").format(alg_str, dat_str) mmix_orig = decomposition.tedica(dd, n_components, fixed_seed, maxit, maxrestart) bp_str += (" Independent component analysis was then used to " "decompose the dimensionally reduced dataset.") if verbose: np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig) if source_tes == -1: io.filewrite(utils.unmask(dd, mask), op.join(out_dir, 'ts_OC_whitened.nii'), ref_img) LGR.info('Making second component selection guess from ICA results') # Estimate betas and compute selection metrics for mixing matrix # generated from dimensionally reduced data using full data (i.e., data # with thermal noise) comptable, metric_maps, betas, mmix = metrics.dependence_metrics( catd, data_oc, mmix_orig, t2s, tes, ref_img, reindex=True, label='meica_', out_dir=out_dir, algorithm='kundu_v2', verbose=verbose) bp_str += (" A series of TE-dependence metrics were calculated for " "each ICA component, including Kappa, Rho, and variance " "explained.") np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix) comptable = metrics.kundu_metrics(comptable, metric_maps) comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols) bp_str += (" Next, component selection was performed to identify " "BOLD (TE-dependent), non-BOLD (TE-independent), and " "uncertain (low-variance) components using the Kundu " "decision tree (v2.5; Kundu et al., 2013).") refs += ["Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192."] else: LGR.info('Using supplied mixing matrix from ICA') mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D')) comptable, metric_maps, betas, mmix = metrics.dependence_metrics( catd, data_oc, mmix_orig, t2s, tes, ref_img, label='meica_', out_dir=out_dir, algorithm='kundu_v2', verbose=verbose) if ctab is None: comptable = metrics.kundu_metrics(comptable, metric_maps) comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols) bp_str += (" Next, component selection was performed to identify " "BOLD (TE-dependent), non-BOLD (TE-independent), and " "uncertain (low-variance) components using the Kundu " "decision tree (v2.5; Kundu et al., 2013).") refs += ["Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192."] else: comptable = pd.read_csv(ctab, sep='\t', index_col='component') comptable = selection.manual_selection(comptable, acc=manacc) bp_str += (" Next, components were manually classified as " "BOLD (TE-dependent), non-BOLD (TE-independent), or " "uncertain (low-variance).") comptable.to_csv(op.join(out_dir, 'comp_table_ica.txt'), sep='\t', index=True, index_label='component', float_format='%.6f') if comptable[comptable.classification == 'accepted'].shape[0] == 0: LGR.warning('No BOLD components detected! Please check data and ' 'results!') mmix_orig = mmix.copy() if tedort: acc_idx = comptable.loc[ ~comptable.classification.str.contains('rejected')].index.values rej_idx = comptable.loc[ comptable.classification.str.contains('rejected')].index.values acc_ts = mmix[:, acc_idx] rej_ts = mmix[:, rej_idx] betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0] pred_rej_ts = np.dot(acc_ts, betas) resid = rej_ts - pred_rej_ts mmix[:, rej_idx] = resid np.savetxt(op.join(out_dir, 'meica_mix_orth.1D'), mmix) bp_str += (" Rejected components' time series were then " "orthogonalized with respect to accepted components' time " "series.") io.writeresults(data_oc, mask=mask, comptable=comptable, mmix=mmix, n_vols=n_vols, ref_img=ref_img) if 't1c' in gscontrol: LGR.info('Performing T1c global signal regression to remove spatially ' 'diffuse noise') gsc.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img) bp_str += (" T1c global signal regression was then applied to the " "data in order to remove spatially diffuse noise.") if verbose: io.writeresults_echoes(catd, mmix, mask, comptable, ref_img) if png: LGR.info('Making figures folder with static component maps and ' 'timecourse plots.') # make figure folder first if not op.isdir(op.join(out_dir, 'figures')): os.mkdir(op.join(out_dir, 'figures')) viz.write_comp_figs(data_oc, mask=mask, comptable=comptable, mmix=mmix_orig, ref_img=ref_img, out_dir=op.join(out_dir, 'figures'), png_cmap=png_cmap) LGR.info('Making Kappa vs Rho scatter plot') viz.write_kappa_scatter(comptable=comptable, out_dir=op.join(out_dir, 'figures')) LGR.info('Making overall summary figure') viz.write_summary_fig(comptable=comptable, out_dir=op.join(out_dir, 'figures')) LGR.info('Workflow completed') bp_str += ("\n\nThis workflow used numpy (Van Der Walt, Colbert, & " "Varoquaux, 2011), scipy (Jones et al., 2001), pandas " "(McKinney, 2010), scikit-learn (Pedregosa et al., 2011), " "nilearn, and nibabel (Brett et al., 2019).") refs += ["Van Der Walt, S., Colbert, S. C., & Varoquaux, G. (2011). The " "NumPy array: a structure for efficient numerical computation. " "Computing in Science & Engineering, 13(2), 22.", "Jones E, Oliphant E, Peterson P, et al. SciPy: Open Source " "Scientific Tools for Python, 2001-, http://www.scipy.org/", "McKinney, W. (2010, June). Data structures for statistical " "computing in python. In Proceedings of the 9th Python in " "Science Conference (Vol. 445, pp. 51-56).", "Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., " "Thirion, B., Grisel, O., ... & Vanderplas, J. (2011). " "Scikit-learn: Machine learning in Python. Journal of machine " "learning research, 12(Oct), 2825-2830.", "Brett, M., Markiewicz, C. J., Hanke, M., Côté, M.-A., " "Cipollini, B., McCarthy, P., … freec84. (2019, May 28). " "nipy/nibabel. Zenodo. http://doi.org/10.5281/zenodo.3233118"] bp_str += ("\n\nThis workflow also used the Dice similarity index " "(Dice, 1945; Sørensen, 1948).") refs += ["Dice, L. R. (1945). Measures of the amount of ecologic " "association between species. Ecology, 26(3), 297-302.", "Sørensen, T. J. (1948). A method of establishing groups of " "equal amplitude in plant sociology based on similarity of " "species content and its application to analyses of the " "vegetation on Danish commons. I kommission hos E. Munksgaard."] bp_str += '\n\nReferences\n\n' refs = sorted(list(set(refs))) bp_str += '\n\n'.join(refs) with open(repname, 'w') as fo: fo.write(bp_str) for handler in logging.root.handlers[:]: logging.root.removeHandler(handler)
def tedana_workflow(data, tes, out_dir='.', mask=None, fittype='loglin', combmode='t2s', tedpca='mdl', fixed_seed=42, maxit=500, maxrestart=10, tedort=False, gscontrol=None, no_png=False, png_cmap='coolwarm', verbose=False, low_mem=False, debug=False, quiet=False, t2smap=None, mixm=None, ctab=None, manacc=None): """ Run the "canonical" TE-Dependent ANAlysis workflow. Parameters ---------- data : :obj:`str` or :obj:`list` of :obj:`str` Either a single z-concatenated file (single-entry list or str) or a list of echo-specific files, in ascending order. tes : :obj:`list` List of echo times associated with data in milliseconds. out_dir : :obj:`str`, optional Output directory. mask : :obj:`str` or None, optional Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially aligned with `data`. If an explicit mask is not provided, then Nilearn's compute_epi_mask function will be used to derive a mask from the first echo's data. fittype : {'loglin', 'curvefit'}, optional Monoexponential fitting method. 'loglin' uses the the default linear fit to the log of the data. 'curvefit' uses a monoexponential fit to the raw data, which is slightly slower but may be more accurate. Default is 'loglin'. combmode : {'t2s'}, optional Combination scheme for TEs: 't2s' (Posse 1999, default). tedpca : {'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic'}, optional Method with which to select components in TEDPCA. Default is 'mdl'. tedort : :obj:`bool`, optional Orthogonalize rejected components w.r.t. accepted ones prior to denoising. Default is False. gscontrol : {None, 't1c', 'gsr'} or :obj:`list`, optional Perform additional denoising to remove spatially diffuse noise. Default is None. verbose : :obj:`bool`, optional Generate intermediate and additional files. Default is False. no_png : obj:'bool', optional Do not generate .png plots and figures. Default is false. png_cmap : obj:'str', optional Name of a matplotlib colormap to be used when generating figures. Cannot be used with --no-png. Default is 'coolwarm'. t2smap : :obj:`str`, optional Precalculated T2* map in the same space as the input data. mixm : :obj:`str` or None, optional File containing mixing matrix, to be used when re-running the workflow. If not provided, ME-PCA and ME-ICA are done. Default is None. ctab : :obj:`str` or None, optional File containing component table from which to extract pre-computed classifications, to be used with 'mixm' when re-running the workflow. Default is None. manacc : :obj:`list`, :obj:`str`, or None, optional List of manually accepted components. Can be a list of the components, a comma-separated string with component numbers, or None. Default is None. Other Parameters ---------------- fixed_seed : :obj:`int`, optional Value passed to ``mdp.numx_rand.seed()``. Set to a positive integer value for reproducible ICA results; otherwise, set to -1 for varying results across calls. maxit : :obj:`int`, optional Maximum number of iterations for ICA. Default is 500. maxrestart : :obj:`int`, optional Maximum number of attempts for ICA. If ICA fails to converge, the fixed seed will be updated and ICA will be run again. If convergence is achieved before maxrestart attempts, ICA will finish early. Default is 10. low_mem : :obj:`bool`, optional Enables low-memory processing, including the use of IncrementalPCA. May increase workflow duration. Default is False. debug : :obj:`bool`, optional Whether to run in debugging mode or not. Default is False. quiet : :obj:`bool`, optional If True, suppresses logging/printing of messages. Default is False. Notes ----- This workflow writes out several files. For a complete list of the files generated by this workflow, please visit https://tedana.readthedocs.io/en/latest/outputs.html """ out_dir = op.abspath(out_dir) if not op.isdir(out_dir): os.mkdir(out_dir) # boilerplate basename = 'report' extension = 'txt' repname = op.join(out_dir, (basename + '.' + extension)) repex = op.join(out_dir, (basename + '*')) previousreps = glob(repex) previousreps.sort(reverse=True) for f in previousreps: previousparts = op.splitext(f) newname = previousparts[0] + '_old' + previousparts[1] os.rename(f, newname) refname = op.join(out_dir, '_references.txt') # create logfile name basename = 'tedana_' extension = 'tsv' start_time = datetime.datetime.now().strftime('%Y-%m-%dT%H%M%S') logname = op.join(out_dir, (basename + start_time + '.' + extension)) # set logging format log_formatter = logging.Formatter( '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s', datefmt='%Y-%m-%dT%H:%M:%S') text_formatter = logging.Formatter('%(message)s') # set up logging file and open it for writing log_handler = logging.FileHandler(logname) log_handler.setFormatter(log_formatter) # Removing handlers after basicConfig doesn't work, so we use filters # for the relevant handlers themselves. log_handler.addFilter(ContextFilter()) sh = logging.StreamHandler() sh.addFilter(ContextFilter()) if quiet: logging.basicConfig(level=logging.WARNING, handlers=[log_handler, sh]) elif debug: logging.basicConfig(level=logging.DEBUG, handlers=[log_handler, sh]) else: logging.basicConfig(level=logging.INFO, handlers=[log_handler, sh]) # Loggers for report and references rep_handler = logging.FileHandler(repname) rep_handler.setFormatter(text_formatter) ref_handler = logging.FileHandler(refname) ref_handler.setFormatter(text_formatter) RepLGR.setLevel(logging.INFO) RepLGR.addHandler(rep_handler) RepLGR.setLevel(logging.INFO) RefLGR.addHandler(ref_handler) LGR.info('Using output directory: {}'.format(out_dir)) # ensure tes are in appropriate format tes = [float(te) for te in tes] n_echos = len(tes) # Coerce gscontrol to list if not isinstance(gscontrol, list): gscontrol = [gscontrol] LGR.info('Loading input data: {}'.format([f for f in data])) catd, ref_img = io.load_data(data, n_echos=n_echos) n_samp, n_echos, n_vols = catd.shape LGR.debug('Resulting data shape: {}'.format(catd.shape)) if no_png and (png_cmap != 'coolwarm'): LGR.warning('Overriding --no-png since --png-cmap provided.') no_png = False # check if TR is 0 img_t_r = ref_img.header.get_zooms()[-1] if img_t_r == 0 and not no_png: raise IOError( 'Dataset has a TR of 0. This indicates incorrect' ' header information. To correct this, we recommend' ' using this snippet:' '\n' 'https://gist.github.com/jbteves/032c87aeb080dd8de8861cb151bff5d6' '\n' 'to correct your TR to the value it should be.') if mixm is not None and op.isfile(mixm): mixm = op.abspath(mixm) # Allow users to re-run on same folder if mixm != op.join(out_dir, 'ica_mixing.tsv'): shutil.copyfile(mixm, op.join(out_dir, 'ica_mixing.tsv')) shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm))) elif mixm is not None: raise IOError('Argument "mixm" must be an existing file.') if ctab is not None and op.isfile(ctab): ctab = op.abspath(ctab) # Allow users to re-run on same folder if ctab != op.join(out_dir, 'ica_decomposition.json'): shutil.copyfile(ctab, op.join(out_dir, 'ica_decomposition.json')) shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab))) elif ctab is not None: raise IOError('Argument "ctab" must be an existing file.') if isinstance(manacc, str): manacc = [int(comp) for comp in manacc.split(',')] if ctab and not mixm: LGR.warning('Argument "ctab" requires argument "mixm".') ctab = None elif manacc is not None and not mixm: LGR.warning('Argument "manacc" requires argument "mixm".') manacc = None if t2smap is not None and op.isfile(t2smap): t2smap = op.abspath(t2smap) # Allow users to re-run on same folder if t2smap != op.join(out_dir, 't2sv.nii.gz'): shutil.copyfile(t2smap, op.join(out_dir, 't2sv.nii.gz')) shutil.copyfile(t2smap, op.join(out_dir, op.basename(t2smap))) elif t2smap is not None: raise IOError('Argument "t2smap" must be an existing file.') RepLGR.info("TE-dependence analysis was performed on input data.") if mask and not t2smap: # TODO: add affine check LGR.info('Using user-defined mask') RepLGR.info("A user-defined mask was applied to the data.") elif t2smap and not mask: LGR.info('Using user-defined T2* map to generate mask') t2s_limited = utils.load_image(t2smap) t2s_full = t2s_limited.copy() mask = (t2s_limited != 0).astype(int) elif t2smap and mask: LGR.info('Combining user-defined mask and T2* map to generate mask') t2s_limited = utils.load_image(t2smap) t2s_full = t2s_limited.copy() mask = utils.load_image(mask) mask[t2s_limited == 0] = 0 # reduce mask based on T2* map else: LGR.info('Computing EPI mask from first echo') first_echo_img = io.new_nii_like(ref_img, catd[:, 0, :]) mask = compute_epi_mask(first_echo_img) RepLGR.info("An initial mask was generated from the first echo using " "nilearn's compute_epi_mask function.") mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True) LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp)) io.filewrite(masksum, op.join(out_dir, 'adaptive_mask.nii'), ref_img) if t2smap is None: LGR.info('Computing T2* map') t2s_limited, s0_limited, t2s_full, s0_full = decay.fit_decay( catd, tes, mask, masksum, fittype) # set a hard cap for the T2* map # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile cap_t2s = stats.scoreatpercentile(t2s_limited.flatten(), 99.5, interpolation_method='lower') LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10)) t2s_limited[t2s_limited > cap_t2s * 10] = cap_t2s io.filewrite(t2s_limited, op.join(out_dir, 't2sv.nii'), ref_img) io.filewrite(s0_limited, op.join(out_dir, 's0v.nii'), ref_img) if verbose: io.filewrite(t2s_full, op.join(out_dir, 't2svG.nii'), ref_img) io.filewrite(s0_full, op.join(out_dir, 's0vG.nii'), ref_img) # optimally combine data data_oc = combine.make_optcom(catd, tes, mask, t2s=t2s_full, combmode=combmode) # regress out global signal unless explicitly not desired if 'gsr' in gscontrol: catd, data_oc = gsc.gscontrol_raw(catd, data_oc, n_echos, ref_img, out_dir=out_dir) if mixm is None: # Identify and remove thermal noise from data dd, n_components = decomposition.tedpca(catd, data_oc, combmode, mask, t2s_limited, t2s_full, ref_img, tes=tes, algorithm=tedpca, kdaw=10., rdaw=1., out_dir=out_dir, verbose=verbose, low_mem=low_mem) mmix_orig = decomposition.tedica(dd, n_components, fixed_seed, maxit, maxrestart) if verbose: io.filewrite(utils.unmask(dd, mask), op.join(out_dir, 'ts_OC_whitened.nii.gz'), ref_img) LGR.info('Making second component selection guess from ICA results') # Estimate betas and compute selection metrics for mixing matrix # generated from dimensionally reduced data using full data (i.e., data # with thermal noise) comptable, metric_maps, betas, mmix = metrics.dependence_metrics( catd, data_oc, mmix_orig, t2s_limited, tes, ref_img, reindex=True, label='meica_', out_dir=out_dir, algorithm='kundu_v2', verbose=verbose) comp_names = [ io.add_decomp_prefix(comp, prefix='ica', max_value=comptable.index.max()) for comp in comptable.index.values ] mixing_df = pd.DataFrame(data=mmix, columns=comp_names) mixing_df.to_csv(op.join(out_dir, 'ica_mixing.tsv'), sep='\t', index=False) betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask), mask) io.filewrite(betas_oc, op.join(out_dir, 'ica_components.nii.gz'), ref_img) comptable = metrics.kundu_metrics(comptable, metric_maps) comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols) else: LGR.info('Using supplied mixing matrix from ICA') mmix_orig = pd.read_table(op.join(out_dir, 'ica_mixing.tsv')).values if ctab is None: comptable, metric_maps, betas, mmix = metrics.dependence_metrics( catd, data_oc, mmix_orig, t2s_limited, tes, ref_img, label='meica_', out_dir=out_dir, algorithm='kundu_v2', verbose=verbose) comptable = metrics.kundu_metrics(comptable, metric_maps) comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols) else: mmix = mmix_orig.copy() comptable = io.load_comptable(ctab) if manacc is not None: comptable = selection.manual_selection(comptable, acc=manacc) betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask), mask) io.filewrite(betas_oc, op.join(out_dir, 'ica_components.nii.gz'), ref_img) # Save decomposition comptable[ 'Description'] = 'ICA fit to dimensionally-reduced optimally combined data.' mmix_dict = {} mmix_dict['Method'] = ('Independent components analysis with FastICA ' 'algorithm implemented by sklearn. Components ' 'are sorted by Kappa in descending order. ' 'Component signs are flipped to best match the ' 'data.') io.save_comptable(comptable, op.join(out_dir, 'ica_decomposition.json'), label='ica', metadata=mmix_dict) if comptable[comptable.classification == 'accepted'].shape[0] == 0: LGR.warning('No BOLD components detected! Please check data and ' 'results!') mmix_orig = mmix.copy() if tedort: acc_idx = comptable.loc[~comptable.classification.str. contains('rejected')].index.values rej_idx = comptable.loc[comptable.classification.str.contains( 'rejected')].index.values acc_ts = mmix[:, acc_idx] rej_ts = mmix[:, rej_idx] betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0] pred_rej_ts = np.dot(acc_ts, betas) resid = rej_ts - pred_rej_ts mmix[:, rej_idx] = resid comp_names = [ io.add_decomp_prefix(comp, prefix='ica', max_value=comptable.index.max()) for comp in comptable.index.values ] mixing_df = pd.DataFrame(data=mmix, columns=comp_names) mixing_df.to_csv(op.join(out_dir, 'ica_orth_mixing.tsv'), sep='\t', index=False) RepLGR.info("Rejected components' time series were then " "orthogonalized with respect to accepted components' time " "series.") io.writeresults(data_oc, mask=mask, comptable=comptable, mmix=mmix, n_vols=n_vols, ref_img=ref_img, out_dir=out_dir) if 't1c' in gscontrol: gsc.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img, out_dir=out_dir) if verbose: io.writeresults_echoes(catd, mmix, mask, comptable, ref_img, out_dir=out_dir) if not no_png: LGR.info('Making figures folder with static component maps and ' 'timecourse plots.') # make figure folder first if not op.isdir(op.join(out_dir, 'figures')): os.mkdir(op.join(out_dir, 'figures')) viz.write_comp_figs(data_oc, mask=mask, comptable=comptable, mmix=mmix_orig, ref_img=ref_img, out_dir=op.join(out_dir, 'figures'), png_cmap=png_cmap) LGR.info('Making Kappa vs Rho scatter plot') viz.write_kappa_scatter(comptable=comptable, out_dir=op.join(out_dir, 'figures')) LGR.info('Making Kappa/Rho scree plot') viz.write_kappa_scree(comptable=comptable, out_dir=op.join(out_dir, 'figures')) LGR.info('Making overall summary figure') viz.write_summary_fig(comptable=comptable, out_dir=op.join(out_dir, 'figures')) LGR.info('Workflow completed') RepLGR.info("This workflow used numpy (Van Der Walt, Colbert, & " "Varoquaux, 2011), scipy (Jones et al., 2001), pandas " "(McKinney, 2010), scikit-learn (Pedregosa et al., 2011), " "nilearn, and nibabel (Brett et al., 2019).") RefLGR.info( "Van Der Walt, S., Colbert, S. C., & Varoquaux, G. (2011). The " "NumPy array: a structure for efficient numerical computation. " "Computing in Science & Engineering, 13(2), 22.") RefLGR.info("Jones E, Oliphant E, Peterson P, et al. SciPy: Open Source " "Scientific Tools for Python, 2001-, http://www.scipy.org/") RefLGR.info("McKinney, W. (2010, June). Data structures for statistical " "computing in python. In Proceedings of the 9th Python in " "Science Conference (Vol. 445, pp. 51-56).") RefLGR.info("Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., " "Thirion, B., Grisel, O., ... & Vanderplas, J. (2011). " "Scikit-learn: Machine learning in Python. Journal of machine " "learning research, 12(Oct), 2825-2830.") RefLGR.info("Brett, M., Markiewicz, C. J., Hanke, M., Côté, M.-A., " "Cipollini, B., McCarthy, P., … freec84. (2019, May 28). " "nipy/nibabel. Zenodo. http://doi.org/10.5281/zenodo.3233118") RepLGR.info("This workflow also used the Dice similarity index " "(Dice, 1945; Sørensen, 1948).") RefLGR.info("Dice, L. R. (1945). Measures of the amount of ecologic " "association between species. Ecology, 26(3), 297-302.") RefLGR.info( "Sørensen, T. J. (1948). A method of establishing groups of " "equal amplitude in plant sociology based on similarity of " "species content and its application to analyses of the " "vegetation on Danish commons. I kommission hos E. Munksgaard.") with open(repname, 'r') as fo: report = [line.rstrip() for line in fo.readlines()] report = ' '.join(report) with open(refname, 'r') as fo: reference_list = sorted(list(set(fo.readlines()))) references = '\n'.join(reference_list) report += '\n\nReferences\n' + references with open(repname, 'w') as fo: fo.write(report) os.remove(refname) for handler in logging.root.handlers[:]: logging.root.removeHandler(handler)
def tedana_workflow(data, tes, out_dir='.', mask=None, convention='bids', prefix='', fittype='loglin', combmode='t2s', tedpca='mdl', fixed_seed=42, maxit=500, maxrestart=10, tedort=False, gscontrol=None, no_reports=False, png_cmap='coolwarm', verbose=False, low_mem=False, debug=False, quiet=False, t2smap=None, mixm=None, ctab=None, manacc=None): """ Run the "canonical" TE-Dependent ANAlysis workflow. Parameters ---------- data : :obj:`str` or :obj:`list` of :obj:`str` Either a single z-concatenated file (single-entry list or str) or a list of echo-specific files, in ascending order. tes : :obj:`list` List of echo times associated with data in milliseconds. out_dir : :obj:`str`, optional Output directory. mask : :obj:`str` or None, optional Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially aligned with `data`. If an explicit mask is not provided, then Nilearn's compute_epi_mask function will be used to derive a mask from the first echo's data. fittype : {'loglin', 'curvefit'}, optional Monoexponential fitting method. 'loglin' uses the the default linear fit to the log of the data. 'curvefit' uses a monoexponential fit to the raw data, which is slightly slower but may be more accurate. Default is 'loglin'. combmode : {'t2s'}, optional Combination scheme for TEs: 't2s' (Posse 1999, default). tedpca : {'mdl', 'aic', 'kic', 'kundu', 'kundu-stabilize', float}, optional Method with which to select components in TEDPCA. If a float is provided, then it is assumed to represent percentage of variance explained (0-1) to retain from PCA. Default is 'mdl'. tedort : :obj:`bool`, optional Orthogonalize rejected components w.r.t. accepted ones prior to denoising. Default is False. gscontrol : {None, 'mir', 'gsr'} or :obj:`list`, optional Perform additional denoising to remove spatially diffuse noise. Default is None. verbose : :obj:`bool`, optional Generate intermediate and additional files. Default is False. no_reports : obj:'bool', optional Do not generate .html reports and .png plots. Default is false such that reports are generated. png_cmap : obj:'str', optional Name of a matplotlib colormap to be used when generating figures. Cannot be used with --no-png. Default is 'coolwarm'. t2smap : :obj:`str`, optional Precalculated T2* map in the same space as the input data. Values in the map must be in seconds. mixm : :obj:`str` or None, optional File containing mixing matrix, to be used when re-running the workflow. If not provided, ME-PCA and ME-ICA are done. Default is None. ctab : :obj:`str` or None, optional File containing component table from which to extract pre-computed classifications, to be used with 'mixm' when re-running the workflow. Default is None. manacc : :obj:`list` of :obj:`int` or None, optional List of manually accepted components. Can be a list of the components numbers or None. If provided, this parameter requires ``mixm`` and ``ctab`` to be provided as well. Default is None. Other Parameters ---------------- fixed_seed : :obj:`int`, optional Value passed to ``mdp.numx_rand.seed()``. Set to a positive integer value for reproducible ICA results; otherwise, set to -1 for varying results across calls. maxit : :obj:`int`, optional Maximum number of iterations for ICA. Default is 500. maxrestart : :obj:`int`, optional Maximum number of attempts for ICA. If ICA fails to converge, the fixed seed will be updated and ICA will be run again. If convergence is achieved before maxrestart attempts, ICA will finish early. Default is 10. low_mem : :obj:`bool`, optional Enables low-memory processing, including the use of IncrementalPCA. May increase workflow duration. Default is False. debug : :obj:`bool`, optional Whether to run in debugging mode or not. Default is False. quiet : :obj:`bool`, optional If True, suppresses logging/printing of messages. Default is False. Notes ----- This workflow writes out several files. For a complete list of the files generated by this workflow, please visit https://tedana.readthedocs.io/en/latest/outputs.html """ out_dir = op.abspath(out_dir) if not op.isdir(out_dir): os.mkdir(out_dir) # boilerplate basename = 'report' extension = 'txt' repname = op.join(out_dir, (basename + '.' + extension)) repex = op.join(out_dir, (basename + '*')) previousreps = glob(repex) previousreps.sort(reverse=True) for f in previousreps: previousparts = op.splitext(f) newname = previousparts[0] + '_old' + previousparts[1] os.rename(f, newname) refname = op.join(out_dir, '_references.txt') # create logfile name basename = 'tedana_' extension = 'tsv' start_time = datetime.datetime.now().strftime('%Y-%m-%dT%H%M%S') logname = op.join(out_dir, (basename + start_time + '.' + extension)) utils.setup_loggers(logname, repname, refname, quiet=quiet, debug=debug) LGR.info('Using output directory: {}'.format(out_dir)) # ensure tes are in appropriate format tes = [float(te) for te in tes] n_echos = len(tes) # Coerce gscontrol to list if not isinstance(gscontrol, list): gscontrol = [gscontrol] # Check value of tedpca *if* it is a float tedpca = check_tedpca_value(tedpca, is_parser=False) LGR.info('Loading input data: {}'.format([f for f in data])) catd, ref_img = io.load_data(data, n_echos=n_echos) io_generator = io.OutputGenerator( ref_img, convention=convention, out_dir=out_dir, prefix=prefix, config="auto", verbose=verbose, ) n_samp, n_echos, n_vols = catd.shape LGR.debug('Resulting data shape: {}'.format(catd.shape)) # check if TR is 0 img_t_r = io_generator.reference_img.header.get_zooms()[-1] if img_t_r == 0: raise IOError( 'Dataset has a TR of 0. This indicates incorrect' ' header information. To correct this, we recommend' ' using this snippet:' '\n' 'https://gist.github.com/jbteves/032c87aeb080dd8de8861cb151bff5d6' '\n' 'to correct your TR to the value it should be.') if mixm is not None and op.isfile(mixm): mixm = op.abspath(mixm) # Allow users to re-run on same folder mixing_name = io_generator.get_name("ICA mixing tsv") if mixm != mixing_name: shutil.copyfile(mixm, mixing_name) shutil.copyfile(mixm, op.join(io_generator.out_dir, op.basename(mixm))) elif mixm is not None: raise IOError('Argument "mixm" must be an existing file.') if ctab is not None and op.isfile(ctab): ctab = op.abspath(ctab) # Allow users to re-run on same folder metrics_name = io_generator.get_name("ICA metrics tsv") if ctab != metrics_name: shutil.copyfile(ctab, metrics_name) shutil.copyfile(ctab, op.join(io_generator.out_dir, op.basename(ctab))) elif ctab is not None: raise IOError('Argument "ctab" must be an existing file.') if ctab and not mixm: LGR.warning('Argument "ctab" requires argument "mixm".') ctab = None elif manacc is not None and (not mixm or not ctab): LGR.warning('Argument "manacc" requires arguments "mixm" and "ctab".') manacc = None elif manacc is not None: # coerce to list of integers manacc = [int(m) for m in manacc] if t2smap is not None and op.isfile(t2smap): t2smap_file = io_generator.get_name('t2star img') t2smap = op.abspath(t2smap) # Allow users to re-run on same folder if t2smap != t2smap_file: shutil.copyfile(t2smap, t2smap_file) elif t2smap is not None: raise IOError('Argument "t2smap" must be an existing file.') RepLGR.info("TE-dependence analysis was performed on input data.") if mask and not t2smap: # TODO: add affine check LGR.info('Using user-defined mask') RepLGR.info("A user-defined mask was applied to the data.") elif t2smap and not mask: LGR.info('Using user-defined T2* map to generate mask') t2s_limited_sec = utils.load_image(t2smap) t2s_limited = utils.sec2millisec(t2s_limited_sec) t2s_full = t2s_limited.copy() mask = (t2s_limited != 0).astype(int) elif t2smap and mask: LGR.info('Combining user-defined mask and T2* map to generate mask') t2s_limited_sec = utils.load_image(t2smap) t2s_limited = utils.sec2millisec(t2s_limited_sec) t2s_full = t2s_limited.copy() mask = utils.load_image(mask) mask[t2s_limited == 0] = 0 # reduce mask based on T2* map else: LGR.info('Computing EPI mask from first echo') first_echo_img = io.new_nii_like(io_generator.reference_img, catd[:, 0, :]) mask = compute_epi_mask(first_echo_img) RepLGR.info("An initial mask was generated from the first echo using " "nilearn's compute_epi_mask function.") # Create an adaptive mask with at least 1 good echo, for denoising mask_denoise, masksum_denoise = utils.make_adaptive_mask( catd, mask=mask, getsum=True, threshold=1, ) LGR.debug('Retaining {}/{} samples for denoising'.format( mask_denoise.sum(), n_samp)) io_generator.save_file(masksum_denoise, "adaptive mask img") # Create an adaptive mask with at least 3 good echoes, for classification masksum_clf = masksum_denoise.copy() masksum_clf[masksum_clf < 3] = 0 mask_clf = masksum_clf.astype(bool) RepLGR.info( "A two-stage masking procedure was applied, in which a liberal mask " "(including voxels with good data in at least the first echo) was used for " "optimal combination, T2*/S0 estimation, and denoising, while a more conservative mask " "(restricted to voxels with good data in at least the first three echoes) was used for " "the component classification procedure.") LGR.debug('Retaining {}/{} samples for classification'.format( mask_clf.sum(), n_samp)) if t2smap is None: LGR.info('Computing T2* map') t2s_limited, s0_limited, t2s_full, s0_full = decay.fit_decay( catd, tes, mask_denoise, masksum_denoise, fittype) # set a hard cap for the T2* map # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile cap_t2s = stats.scoreatpercentile(t2s_full.flatten(), 99.5, interpolation_method='lower') LGR.debug('Setting cap on T2* map at {:.5f}s'.format( utils.millisec2sec(cap_t2s))) t2s_full[t2s_full > cap_t2s * 10] = cap_t2s io_generator.save_file(utils.millisec2sec(t2s_full), 't2star img') io_generator.save_file(s0_full, 's0 img') if verbose: io_generator.save_file(utils.millisec2sec(t2s_limited), 'limited t2star img') io_generator.save_file(s0_limited, 'limited s0 img') # optimally combine data data_oc = combine.make_optcom(catd, tes, masksum_denoise, t2s=t2s_full, combmode=combmode) # regress out global signal unless explicitly not desired if 'gsr' in gscontrol: catd, data_oc = gsc.gscontrol_raw(catd, data_oc, n_echos, io_generator) fout = io_generator.save_file(data_oc, 'combined img') LGR.info('Writing optimally combined data set: {}'.format(fout)) if mixm is None: # Identify and remove thermal noise from data dd, n_components = decomposition.tedpca(catd, data_oc, combmode, mask_clf, masksum_clf, t2s_full, io_generator, tes=tes, algorithm=tedpca, kdaw=10., rdaw=1., verbose=verbose, low_mem=low_mem) if verbose: io_generator.save_file(utils.unmask(dd, mask_clf), 'whitened img') # Perform ICA, calculate metrics, and apply decision tree # Restart when ICA fails to converge or too few BOLD components found keep_restarting = True n_restarts = 0 seed = fixed_seed while keep_restarting: mmix, seed = decomposition.tedica(dd, n_components, seed, maxit, maxrestart=(maxrestart - n_restarts)) seed += 1 n_restarts = seed - fixed_seed # Estimate betas and compute selection metrics for mixing matrix # generated from dimensionally reduced data using full data (i.e., data # with thermal noise) LGR.info( 'Making second component selection guess from ICA results') required_metrics = [ 'kappa', 'rho', 'countnoise', 'countsigFT2', 'countsigFS0', 'dice_FT2', 'dice_FS0', 'signal-noise_t', 'variance explained', 'normalized variance explained', 'd_table_score' ] comptable = metrics.collect.generate_metrics( catd, data_oc, mmix, masksum_clf, tes, io_generator, 'ICA', metrics=required_metrics, ) comptable, metric_metadata = selection.kundu_selection_v2( comptable, n_echos, n_vols) n_bold_comps = comptable[comptable.classification == 'accepted'].shape[0] if (n_restarts < maxrestart) and (n_bold_comps == 0): LGR.warning("No BOLD components found. Re-attempting ICA.") elif (n_bold_comps == 0): LGR.warning( "No BOLD components found, but maximum number of restarts reached." ) keep_restarting = False else: keep_restarting = False RepLGR.disabled = True # Disable the report to avoid duplicate text RepLGR.disabled = False # Re-enable the report after the while loop is escaped else: LGR.info('Using supplied mixing matrix from ICA') mixing_file = io_generator.get_name("ICA mixing tsv") mmix = pd.read_table(mixing_file).values if ctab is None: required_metrics = [ 'kappa', 'rho', 'countnoise', 'countsigFT2', 'countsigFS0', 'dice_FT2', 'dice_FS0', 'signal-noise_t', 'variance explained', 'normalized variance explained', 'd_table_score' ] comptable = metrics.collect.generate_metrics( catd, data_oc, mmix, masksum_clf, tes, io_generator, 'ICA', metrics=required_metrics, ) comptable, metric_metadata = selection.kundu_selection_v2( comptable, n_echos, n_vols) else: comptable = pd.read_table(ctab) if manacc is not None: comptable, metric_metadata = selection.manual_selection( comptable, acc=manacc) # Write out ICA files. comp_names = comptable["Component"].values mixing_df = pd.DataFrame(data=mmix, columns=comp_names) io_generator.save_file(mixing_df, "ICA mixing tsv") betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask_denoise), mask_denoise) io_generator.save_file(betas_oc, 'z-scored ICA components img') # Save component table and associated json io_generator.save_file(comptable, "ICA metrics tsv") metric_metadata = metrics.collect.get_metadata(comptable) io_generator.save_file(metric_metadata, "ICA metrics json") decomp_metadata = { "Method": ("Independent components analysis with FastICA " "algorithm implemented by sklearn. "), } for comp_name in comp_names: decomp_metadata[comp_name] = { "Description": "ICA fit to dimensionally-reduced optimally combined data.", "Method": "tedana", } with open(io_generator.get_name("ICA decomposition json"), "w") as fo: json.dump(decomp_metadata, fo, sort_keys=True, indent=4) if comptable[comptable.classification == 'accepted'].shape[0] == 0: LGR.warning('No BOLD components detected! Please check data and ' 'results!') mmix_orig = mmix.copy() if tedort: acc_idx = comptable.loc[~comptable.classification.str. contains('rejected')].index.values rej_idx = comptable.loc[comptable.classification.str.contains( 'rejected')].index.values acc_ts = mmix[:, acc_idx] rej_ts = mmix[:, rej_idx] betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0] pred_rej_ts = np.dot(acc_ts, betas) resid = rej_ts - pred_rej_ts mmix[:, rej_idx] = resid comp_names = [ io.add_decomp_prefix(comp, prefix='ica', max_value=comptable.index.max()) for comp in comptable.index.values ] mixing_df = pd.DataFrame(data=mmix, columns=comp_names) io_generator.save_file(mixing_df, "ICA orthogonalized mixing tsv") RepLGR.info("Rejected components' time series were then " "orthogonalized with respect to accepted components' time " "series.") io.writeresults(data_oc, mask=mask_denoise, comptable=comptable, mmix=mmix, n_vols=n_vols, io_generator=io_generator) if 'mir' in gscontrol: gsc.minimum_image_regression(data_oc, mmix, mask_denoise, comptable, io_generator) if verbose: io.writeresults_echoes(catd, mmix, mask_denoise, comptable, io_generator) # Write out BIDS-compatible description file derivative_metadata = { "Name": "tedana Outputs", "BIDSVersion": "1.5.0", "DatasetType": "derivative", "GeneratedBy": [{ "Name": "tedana", "Version": __version__, "Description": ("A denoising pipeline for the identification and removal " "of non-BOLD noise from multi-echo fMRI data."), "CodeURL": "https://github.com/ME-ICA/tedana" }] } with open(io_generator.get_name("data description json"), "w") as fo: json.dump(derivative_metadata, fo, sort_keys=True, indent=4) RepLGR.info("This workflow used numpy (Van Der Walt, Colbert, & " "Varoquaux, 2011), scipy (Jones et al., 2001), pandas " "(McKinney, 2010), scikit-learn (Pedregosa et al., 2011), " "nilearn, and nibabel (Brett et al., 2019).") RefLGR.info( "Van Der Walt, S., Colbert, S. C., & Varoquaux, G. (2011). The " "NumPy array: a structure for efficient numerical computation. " "Computing in Science & Engineering, 13(2), 22.") RefLGR.info("Jones E, Oliphant E, Peterson P, et al. SciPy: Open Source " "Scientific Tools for Python, 2001-, http://www.scipy.org/") RefLGR.info("McKinney, W. (2010, June). Data structures for statistical " "computing in python. In Proceedings of the 9th Python in " "Science Conference (Vol. 445, pp. 51-56).") RefLGR.info("Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., " "Thirion, B., Grisel, O., ... & Vanderplas, J. (2011). " "Scikit-learn: Machine learning in Python. Journal of machine " "learning research, 12(Oct), 2825-2830.") RefLGR.info("Brett, M., Markiewicz, C. J., Hanke, M., Côté, M.-A., " "Cipollini, B., McCarthy, P., … freec84. (2019, May 28). " "nipy/nibabel. Zenodo. http://doi.org/10.5281/zenodo.3233118") RepLGR.info("This workflow also used the Dice similarity index " "(Dice, 1945; Sørensen, 1948).") RefLGR.info("Dice, L. R. (1945). Measures of the amount of ecologic " "association between species. Ecology, 26(3), 297-302.") RefLGR.info( "Sørensen, T. J. (1948). A method of establishing groups of " "equal amplitude in plant sociology based on similarity of " "species content and its application to analyses of the " "vegetation on Danish commons. I kommission hos E. Munksgaard.") with open(repname, 'r') as fo: report = [line.rstrip() for line in fo.readlines()] report = ' '.join(report) with open(refname, 'r') as fo: reference_list = sorted(list(set(fo.readlines()))) references = '\n'.join(reference_list) report += '\n\nReferences:\n\n' + references with open(repname, 'w') as fo: fo.write(report) if not no_reports: LGR.info( 'Making figures folder with static component maps and timecourse plots.' ) dn_ts, hikts, lowkts = io.denoise_ts(data_oc, mmix, mask_denoise, comptable) reporting.static_figures.carpet_plot( optcom_ts=data_oc, denoised_ts=dn_ts, hikts=hikts, lowkts=lowkts, mask=mask_denoise, io_generator=io_generator, gscontrol=gscontrol, ) reporting.static_figures.comp_figures( data_oc, mask=mask_denoise, comptable=comptable, mmix=mmix_orig, io_generator=io_generator, png_cmap=png_cmap, ) if sys.version_info.major == 3 and sys.version_info.minor < 6: warn_msg = ("Reports requested but Python version is less than " "3.6.0. Dynamic reports will not be generated.") LGR.warn(warn_msg) else: LGR.info('Generating dynamic report') reporting.generate_report(io_generator, tr=img_t_r) LGR.info('Workflow completed') utils.teardown_loggers() os.remove(refname)
def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG, ref_img, tes, algorithm='mdl', source_tes=-1, kdaw=10., rdaw=1., out_dir='.', verbose=False, low_mem=False): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- data_cat : (S x E x T) array_like Input functional data data_oc : (S x T) array_like Optimally combined time series data combmode : {'t2s', 'paid'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'paid' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array t2s : (S,) array_like Map of voxel-wise T2* estimates. t2sG : (S,) array_like Map of voxel-wise T2* estimates. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk tes : :obj:`list` List of echo times associated with `data_cat`, in milliseconds algorithm : {'mle', 'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic'}, optional Method with which to select components in TEDPCA. Default is 'mdl'. PCA decomposition with the mdl, kic and aic options are based on a Moving Average (stationary Gaussian) process and are ordered from most to least aggresive. See (Li et al., 2007). source_tes : :obj:`int` or :obj:`list` of :obj:`int`, optional Which echos to use in PCA. Values -1 and 0 are special, where a value of -1 will indicate using the optimal combination of the echos and 0 will indicate using all the echos. A list can be provided to indicate a subset of echos. Default: -1 kdaw : :obj:`float`, optional Dimensionality augmentation weight for Kappa calculations. Must be a non-negative float, or -1 (a special value). Default is 10. rdaw : :obj:`float`, optional Dimensionality augmentation weight for Rho calculations. Must be a non-negative float, or -1 (a special value). Default is 1. out_dir : :obj:`str`, optional Output directory. verbose : :obj:`bool`, optional Whether to output files from fitmodels_direct or not. Default: False low_mem : :obj:`bool`, optional Whether to use incremental PCA (for low-memory systems) or not. Default: False Returns ------- kept_data : (S x T) :obj:`numpy.ndarray` Dimensionally reduced optimally combined functional data n_components : :obj:`int` Number of components retained from PCA decomposition Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: ====================== ================================================= Filename Content ====================== ================================================= pca_decomposition.json PCA component table. pca_mixing.tsv PCA mixing matrix. pca_components.nii.gz Component weight maps. ====================== ================================================= """ if low_mem and algorithm == 'mle': LGR.warning('Low memory option is not compatible with MLE ' 'dimensionality estimation. Switching to Kundu decision ' 'tree.') algorithm = 'kundu' if algorithm == 'mle': alg_str = "using MLE dimensionality estimation (Minka, 2001)" RefLGR.info("Minka, T. P. (2001). Automatic choice of dimensionality " "for PCA. In Advances in neural information processing " "systems (pp. 598-604).") elif algorithm == 'kundu': alg_str = ("followed by the Kundu component selection decision " "tree (Kundu et al., 2013)") RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192.") elif algorithm == 'kundu-stabilize': alg_str = ("followed by the 'stabilized' Kundu component " "selection decision tree (Kundu et al., 2013)") RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192.") else: alg_str = ("based on the PCA component estimation with a Moving Average" "(stationary Gaussian) process (Li et al., 2007)") RefLGR.info("Li, Y.O., Adalı, T. and Calhoun, V.D., (2007). " "Estimating the number of independent components for " "functional magnetic resonance imaging data. " "Human brain mapping, 28(11), pp.1251-1266.") if source_tes == -1: dat_str = "the optimally combined data" elif source_tes == 0: dat_str = "the z-concatenated multi-echo data" else: dat_str = "a z-concatenated subset of echoes from the input data" RepLGR.info("Principal component analysis {0} was applied to " "{1} for dimensionality reduction.".format(alg_str, dat_str)) n_samp, n_echos, n_vols = data_cat.shape source_tes = np.array([int(ee) for ee in str(source_tes).split(',')]) if len(source_tes) == 1 and source_tes[0] == -1: LGR.info('Computing PCA of optimally combined multi-echo data') data = data_oc[mask, :][:, np.newaxis, :] elif len(source_tes) == 1 and source_tes[0] == 0: LGR.info('Computing PCA of spatially concatenated multi-echo data') data = data_cat[mask, ...] else: LGR.info('Computing PCA of echo #{0}'.format(','.join([str(ee) for ee in source_tes]))) data = np.stack([data_cat[mask, ee, :] for ee in source_tes - 1], axis=1) eim = np.squeeze(_utils.eimask(data)) data = np.squeeze(data[eim]) data_z = ((data.T - data.T.mean(axis=0)) / data.T.std(axis=0)).T # var normalize ts data_z = (data_z - data_z.mean()) / data_z.std() # var normalize everything if algorithm in ['mdl', 'aic', 'kic']: data_img = io.new_nii_like( ref_img, utils.unmask(utils.unmask(data, eim), mask)) mask_img = io.new_nii_like(ref_img, utils.unmask(eim, mask).astype(int)) voxel_comp_weights, varex, varex_norm, comp_ts = ma_pca.ma_pca( data_img, mask_img, algorithm) elif algorithm == 'mle': voxel_comp_weights, varex, varex_norm, comp_ts = run_mlepca(data_z) elif low_mem: voxel_comp_weights, varex, comp_ts = low_mem_pca(data_z) varex_norm = varex / varex.sum() else: ppca = PCA(copy=False, n_components=(n_vols - 1)) ppca.fit(data_z) comp_ts = ppca.components_.T varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(data_z, comp_ts), np.diag(1. / varex)) varex_norm = varex / varex.sum() # Compute Kappa and Rho for PCA comps eimum = np.atleast_2d(eim) eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1]) eimum = eimum.prod(axis=1) o = np.zeros((mask.shape[0], *eimum.shape[1:])) o[mask, ...] = eimum eimum = np.squeeze(o).astype(bool) # Normalize each component's time series vTmixN = stats.zscore(comp_ts, axis=0) comptable, _, _, _ = metrics.dependence_metrics(data_cat, data_oc, comp_ts, t2s, tes, ref_img, reindex=False, mmixN=vTmixN, algorithm=None, label='mepca_', out_dir=out_dir, verbose=verbose) # varex_norm from PCA overrides varex_norm from dependence_metrics, # but we retain the original comptable['estimated normalized variance explained'] = \ comptable['normalized variance explained'] comptable['normalized variance explained'] = varex_norm # write component maps to 4D image comp_ts_z = stats.zscore(comp_ts, axis=0) comp_maps = utils.unmask(computefeats2(data_oc, comp_ts_z, mask), mask) io.filewrite(comp_maps, op.join(out_dir, 'pca_components.nii.gz'), ref_img) # Select components using decision tree if algorithm == 'kundu': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=False) elif algorithm == 'kundu-stabilize': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=True) elif algorithm == 'mle': LGR.info('Selected {0} components with MLE dimensionality ' 'detection'.format(comptable.shape[0])) comptable['classification'] = 'accepted' comptable['rationale'] = '' elif algorithm in ['mdl', 'aic', 'kic']: LGR.info('Selected {0} components with {1} dimensionality ' 'detection'.format(comptable.shape[0], algorithm)) comptable['classification'] = 'accepted' comptable['rationale'] = '' # Save decomposition comp_names = [io.add_decomp_prefix(comp, prefix='pca', max_value=comptable.index.max()) for comp in comptable.index.values] mixing_df = pd.DataFrame(data=comp_ts, columns=comp_names) mixing_df.to_csv(op.join(out_dir, 'pca_mixing.tsv'), sep='\t', index=False) data_type = 'optimally combined data' if source_tes == -1 else 'z-concatenated data' comptable['Description'] = 'PCA fit to {0}.'.format(data_type) mmix_dict = {} mmix_dict['Method'] = ('Principal components analysis implemented by ' 'sklearn. Components are sorted by variance ' 'explained in descending order. ' 'Component signs are flipped to best match the ' 'data.') io.save_comptable(comptable, op.join(out_dir, 'pca_decomposition.json'), label='pca', metadata=mmix_dict) acc = comptable[comptable.classification == 'accepted'].index.values n_components = acc.size voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] * varex[None, acc]) kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T) kept_data = stats.zscore(kept_data, axis=1) # variance normalize time series kept_data = stats.zscore(kept_data, axis=None) # variance normalize everything return kept_data, n_components
def tedpca( data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, io_generator, tes, algorithm="aic", kdaw=10.0, rdaw=1.0, verbose=False, low_mem=False, ): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- data_cat : (S x E x T) array_like Input functional data data_oc : (S x T) array_like Optimally combined time series data combmode : {'t2s', 'paid'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'paid' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array adaptive_mask : (S,) array_like Array where each value indicates the number of echoes with good signal for that voxel. This mask may be thresholded; for example, with values less than 3 set to 0. For more information on thresholding, see `make_adaptive_mask`. t2sG : (S,) array_like Map of voxel-wise T2* estimates. io_generator : :obj:`tedana.io.OutputGenerator` The output generation object for this workflow tes : :obj:`list` List of echo times associated with `data_cat`, in milliseconds algorithm : {'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic', float}, optional Method with which to select components in TEDPCA. PCA decomposition with the mdl, kic and aic options are based on a Moving Average (stationary Gaussian) process and are ordered from most to least aggressive (see Li et al., 2007). If a float is provided, then it is assumed to represent percentage of variance explained (0-1) to retain from PCA. If an int is provided, then it is assumed to be the number of components to select Default is 'aic'. kdaw : :obj:`float`, optional Dimensionality augmentation weight for Kappa calculations. Must be a non-negative float, or -1 (a special value). Default is 10. rdaw : :obj:`float`, optional Dimensionality augmentation weight for Rho calculations. Must be a non-negative float, or -1 (a special value). Default is 1. verbose : :obj:`bool`, optional Whether to output files from fitmodels_direct or not. Default: False low_mem : :obj:`bool`, optional Whether to use incremental PCA (for low-memory systems) or not. This is only compatible with the "kundu" or "kundu-stabilize" algorithms. Default: False Returns ------- kept_data : (S x T) :obj:`numpy.ndarray` Dimensionally reduced optimally combined functional data n_components : :obj:`int` Number of components retained from PCA decomposition Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: =========================== ============================================= Default Filename Content =========================== ============================================= desc-PCA_metrics.tsv PCA component table desc-PCA_metrics.json Metadata sidecar file describing the component table desc-PCA_mixing.tsv PCA mixing matrix desc-PCA_components.nii.gz Component weight maps desc-PCA_decomposition.json Metadata sidecar file describing the PCA decomposition =========================== ============================================= See Also -------- :func:`tedana.utils.make_adaptive_mask` : The function used to create the ``adaptive_mask`` parameter. :py:mod:`tedana.constants` : The module describing the filenames for various naming conventions """ if algorithm == "kundu": alg_str = "followed by the Kundu component selection decision tree (Kundu et al., 2013)" RefLGR.info( "Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192." ) elif algorithm == "kundu-stabilize": alg_str = ( "followed by the 'stabilized' Kundu component " "selection decision tree (Kundu et al., 2013)" ) RefLGR.info( "Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192." ) elif isinstance(algorithm, Number): if isinstance(algorithm, float): alg_str = ( "in which the number of components was determined based on a " "variance explained threshold" ) else: alg_str = "in which the number of components is pre-defined" else: alg_str = ( "based on the PCA component estimation with a Moving Average" "(stationary Gaussian) process (Li et al., 2007)" ) RefLGR.info( "Li, Y.O., Adalı, T. and Calhoun, V.D., (2007). " "Estimating the number of independent components for " "functional magnetic resonance imaging data. " "Human brain mapping, 28(11), pp.1251-1266." ) RepLGR.info( "Principal component analysis {0} was applied to " "the optimally combined data for dimensionality " "reduction.".format(alg_str) ) n_samp, n_echos, n_vols = data_cat.shape LGR.info( f"Computing PCA of optimally combined multi-echo data with selection criteria: {algorithm}" ) data = data_oc[mask, :] data_z = ((data.T - data.T.mean(axis=0)) / data.T.std(axis=0)).T # var normalize ts data_z = (data_z - data_z.mean()) / data_z.std() # var normalize everything if algorithm in ["mdl", "aic", "kic"]: data_img = io.new_nii_like(io_generator.reference_img, utils.unmask(data, mask)) mask_img = io.new_nii_like(io_generator.reference_img, mask.astype(int)) ma_pca = MovingAveragePCA(criterion=algorithm, normalize=True) _ = ma_pca.fit_transform(data_img, mask_img) # Extract results from maPCA voxel_comp_weights = ma_pca.u_ varex = ma_pca.explained_variance_ varex_norm = ma_pca.explained_variance_ratio_ comp_ts = ma_pca.components_.T aic = ma_pca.aic_ kic = ma_pca.kic_ mdl = ma_pca.mdl_ varex_90 = ma_pca.varexp_90_ varex_95 = ma_pca.varexp_95_ all_comps = ma_pca.all_ # Extract number of components and variance explained for logging and plotting n_aic = aic["n_components"] aic_varexp = np.round(aic["explained_variance_total"], 3) n_kic = kic["n_components"] kic_varexp = np.round(kic["explained_variance_total"], 3) n_mdl = mdl["n_components"] mdl_varexp = np.round(mdl["explained_variance_total"], 3) n_varex_90 = varex_90["n_components"] varex_90_varexp = np.round(varex_90["explained_variance_total"], 3) n_varex_95 = varex_95["n_components"] varex_95_varexp = np.round(varex_95["explained_variance_total"], 3) all_varex = np.round(all_comps["explained_variance_total"], 3) # Print out the results LGR.info("Optimal number of components based on different criteria:") LGR.info( f"AIC: {n_aic} | KIC: {n_kic} | MDL: {n_mdl} | 90% varexp: {n_varex_90} " f"| 95% varexp: {n_varex_95}" ) LGR.info("Explained variance based on different criteria:") LGR.info( f"AIC: {aic_varexp}% | KIC: {kic_varexp}% | MDL: {mdl_varexp}% | " f"90% varexp: {varex_90_varexp}% | 95% varexp: {varex_95_varexp}%" ) pca_optimization_curves = np.array([aic["value"], kic["value"], mdl["value"]]) pca_criteria_components = np.array( [ n_aic, n_kic, n_mdl, n_varex_90, n_varex_95, ] ) # Plot maPCA optimization curves LGR.info("Plotting maPCA optimization curves") plot_pca_results(pca_optimization_curves, pca_criteria_components, all_varex, io_generator) # Save maPCA results into a dictionary mapca_results = { "aic": { "n_components": n_aic, "explained_variance_total": aic_varexp, "curve": aic["value"], }, "kic": { "n_components": n_kic, "explained_variance_total": kic_varexp, "curve": kic["value"], }, "mdl": { "n_components": n_mdl, "explained_variance_total": mdl_varexp, "curve": mdl["value"], }, "varex_90": { "n_components": n_varex_90, "explained_variance_total": varex_90_varexp, }, "varex_95": { "n_components": n_varex_95, "explained_variance_total": varex_95_varexp, }, } # Save dictionary io_generator.save_file(mapca_results, "PCA cross component metrics json") elif isinstance(algorithm, Number): ppca = PCA(copy=False, n_components=algorithm, svd_solver="full") ppca.fit(data_z) comp_ts = ppca.components_.T varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(data_z, comp_ts), np.diag(1.0 / varex)) varex_norm = ppca.explained_variance_ratio_ elif low_mem: voxel_comp_weights, varex, varex_norm, comp_ts = low_mem_pca(data_z) else: ppca = PCA(copy=False, n_components=(n_vols - 1)) ppca.fit(data_z) comp_ts = ppca.components_.T varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(data_z, comp_ts), np.diag(1.0 / varex)) varex_norm = ppca.explained_variance_ratio_ # Compute Kappa and Rho for PCA comps required_metrics = [ "kappa", "rho", "countnoise", "countsigFT2", "countsigFS0", "dice_FT2", "dice_FS0", "signal-noise_t", "variance explained", "normalized variance explained", "d_table_score", ] comptable = metrics.collect.generate_metrics( data_cat, data_oc, comp_ts, adaptive_mask, tes, io_generator, "PCA", metrics=required_metrics, ) # varex_norm from PCA overrides varex_norm from dependence_metrics, # but we retain the original comptable["estimated normalized variance explained"] = comptable[ "normalized variance explained" ] comptable["normalized variance explained"] = varex_norm # write component maps to 4D image comp_maps = utils.unmask(computefeats2(data_oc, comp_ts, mask), mask) io_generator.save_file(comp_maps, "z-scored PCA components img") # Select components using decision tree if algorithm == "kundu": comptable, metric_metadata = kundu_tedpca( comptable, n_echos, kdaw, rdaw, stabilize=False, ) elif algorithm == "kundu-stabilize": comptable, metric_metadata = kundu_tedpca( comptable, n_echos, kdaw, rdaw, stabilize=True, ) else: if isinstance(algorithm, float): alg_str = "variance explained-based" elif isinstance(algorithm, int): alg_str = "a fixed number of components and no" else: alg_str = algorithm LGR.info( f"Selected {comptable.shape[0]} components with {round(100*varex_norm.sum(),2)}% " f"normalized variance explained using {alg_str} dimensionality detection" ) comptable["classification"] = "accepted" comptable["rationale"] = "" # Save decomposition files comp_names = [ io.add_decomp_prefix(comp, prefix="pca", max_value=comptable.index.max()) for comp in comptable.index.values ] mixing_df = pd.DataFrame(data=comp_ts, columns=comp_names) io_generator.save_file(mixing_df, "PCA mixing tsv") # Save component table and associated json io_generator.save_file(comptable, "PCA metrics tsv") metric_metadata = metrics.collect.get_metadata(comptable) io_generator.save_file(metric_metadata, "PCA metrics json") decomp_metadata = { "Method": ( "Principal components analysis implemented by sklearn. " "Components are sorted by variance explained in descending order. " ), } for comp_name in comp_names: decomp_metadata[comp_name] = { "Description": "PCA fit to optimally combined data.", "Method": "tedana", } io_generator.save_file(decomp_metadata, "PCA decomposition json") acc = comptable[comptable.classification == "accepted"].index.values n_components = acc.size voxel_kept_comp_weighted = voxel_comp_weights[:, acc] * varex[None, acc] kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T) kept_data = stats.zscore(kept_data, axis=1) # variance normalize time series kept_data = stats.zscore(kept_data, axis=None) # variance normalize everything return kept_data, n_components
def fitmodels_direct(catd, mmix, mask, t2s, t2s_full, tes, combmode, ref_img, reindex=False, mmixN=None, full_sel=True, label=None, out_dir='.', verbose=False): """ Fit TE-dependence and -independence models to components. Parameters ---------- catd : (S x E x T) array_like Input data, where `S` is samples, `E` is echos, and `T` is time mmix : (T x C) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `catd` mask : (S [x E]) array_like Boolean mask array t2s : (S [x T]) array_like Limited T2* map or timeseries. t2s_full : (S [x T]) array_like Full T2* map or timeseries. For voxels with good signal in only one echo, which are zeros in the limited T2* map, this map uses the T2* estimate using the first two echoes. tes : list List of echo times associated with `catd`, in milliseconds combmode : {'t2s', 'paid'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'paid' indicates using the method of Poser 2006 ref_img : str or img_like Reference image to dictate how outputs are saved to disk reindex : bool, optional Default: False mmixN : array_like, optional Default: None full_sel : bool, optional Whether to perform selection of components based on Rho/Kappa scores. Default: True Returns ------- seldict : dict comptable : (C x X) :obj:`pandas.DataFrame` Component metric table. One row for each component, with a column for each metric. The index is the component number. betas : :obj:`numpy.ndarray` mmix_new : :obj:`numpy.ndarray` """ if not (catd.shape[0] == t2s.shape[0] == t2s_full.shape[0] == mask.shape[0]): raise ValueError('First dimensions (number of samples) of catd ({0}), ' 't2s ({1}), and mask ({2}) do not ' 'match'.format(catd.shape[0], t2s.shape[0], mask.shape[0])) elif catd.shape[1] != len(tes): raise ValueError('Second dimension of catd ({0}) does not match ' 'number of echoes provided (tes; ' '{1})'.format(catd.shape[1], len(tes))) elif catd.shape[2] != mmix.shape[0]: raise ValueError('Third dimension (number of volumes) of catd ({0}) ' 'does not match first dimension of ' 'mmix ({1})'.format(catd.shape[2], mmix.shape[0])) elif t2s.shape != t2s_full.shape: raise ValueError('Shape of t2s array {0} does not match shape of ' 't2s_full array {1}'.format(t2s.shape, t2s_full.shape)) elif t2s.ndim == 2: if catd.shape[2] != t2s.shape[1]: raise ValueError('Third dimension (number of volumes) of catd ' '({0}) does not match second dimension of ' 't2s ({1})'.format(catd.shape[2], t2s.shape[1])) mask = t2s != 0 # Override mask because problems # compute optimal combination of raw data tsoc = combine.make_optcom(catd, tes, mask, t2s=t2s_full, combmode=combmode, verbose=False).astype(float)[mask] # demean optimal combination tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True) # compute un-normalized weight dataset (features) if mmixN is None: mmixN = mmix WTS = computefeats2(utils.unmask(tsoc, mask), mmixN, mask, normalize=False) # compute PSC dataset - shouldn't have to refit data tsoc_B = get_coeffs(tsoc_dm, mmix, mask=None) tsoc_Babs = np.abs(tsoc_B) PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100 # compute skews to determine signs based on unnormalized weights, # correct mmix & WTS signs based on spatial distribution tails signs = stats.skew(WTS, axis=0) signs /= np.abs(signs) mmix = mmix.copy() mmix *= signs WTS *= signs PSC *= signs totvar = (tsoc_B**2).sum() totvar_norm = (WTS**2).sum() # compute Betas and means over TEs for TE-dependence analysis betas = get_coeffs(catd, mmix, np.repeat(mask[:, np.newaxis], len(tes), axis=1)) n_samp, n_echos, n_components = betas.shape n_voxels = mask.sum() n_data_voxels = (t2s != 0).sum() mu = catd.mean(axis=-1, dtype=float) tes = np.reshape(tes, (n_echos, 1)) fmin, _, _ = utils.getfbounds(n_echos) # mask arrays mumask = mu[t2s != 0] t2smask = t2s[t2s != 0] betamask = betas[t2s != 0] # set up Xmats X1 = mumask.T # Model 1 X2 = np.tile(tes, (1, n_data_voxels)) * mumask.T / t2smask.T # Model 2 # tables for component selection kappas = np.zeros([n_components]) rhos = np.zeros([n_components]) varex = np.zeros([n_components]) varex_norm = np.zeros([n_components]) Z_maps = np.zeros([n_voxels, n_components]) F_R2_maps = np.zeros([n_data_voxels, n_components]) F_S0_maps = np.zeros([n_data_voxels, n_components]) Z_clmaps = np.zeros([n_voxels, n_components]) F_R2_clmaps = np.zeros([n_data_voxels, n_components]) F_S0_clmaps = np.zeros([n_data_voxels, n_components]) Br_R2_clmaps = np.zeros([n_voxels, n_components]) Br_S0_clmaps = np.zeros([n_voxels, n_components]) pred_R2_maps = np.zeros([n_data_voxels, n_echos, n_components]) pred_S0_maps = np.zeros([n_data_voxels, n_echos, n_components]) LGR.info('Fitting TE- and S0-dependent models to components') for i_comp in range(n_components): # size of B is (n_echoes, n_samples) B = np.atleast_3d(betamask)[:, :, i_comp].T alpha = (np.abs(B)**2).sum(axis=0) varex[i_comp] = (tsoc_B[:, i_comp]**2).sum() / totvar * 100. varex_norm[i_comp] = (utils.unmask(WTS, mask)[t2s != 0][:, i_comp]**2).sum() /\ totvar_norm * 100. # S0 Model # (S,) model coefficient map coeffs_S0 = (B * X1).sum(axis=0) / (X1**2).sum(axis=0) pred_S0 = X1 * np.tile(coeffs_S0, (n_echos, 1)) pred_S0_maps[:, :, i_comp] = pred_S0.T SSE_S0 = (B - pred_S0)**2 SSE_S0 = SSE_S0.sum(axis=0) # (S,) prediction error map F_S0 = (alpha - SSE_S0) * (n_echos - 1) / (SSE_S0) F_S0_maps[:, i_comp] = F_S0 # R2 Model coeffs_R2 = (B * X2).sum(axis=0) / (X2**2).sum(axis=0) pred_R2 = X2 * np.tile(coeffs_R2, (n_echos, 1)) pred_R2_maps[:, :, i_comp] = pred_R2.T SSE_R2 = (B - pred_R2)**2 SSE_R2 = SSE_R2.sum(axis=0) F_R2 = (alpha - SSE_R2) * (n_echos - 1) / (SSE_R2) F_R2_maps[:, i_comp] = F_R2 # compute weights as Z-values wtsZ = (WTS[:, i_comp] - WTS[:, i_comp].mean()) / WTS[:, i_comp].std() wtsZ[np.abs(wtsZ) > Z_MAX] = ( Z_MAX * (np.abs(wtsZ) / wtsZ))[np.abs(wtsZ) > Z_MAX] Z_maps[:, i_comp] = wtsZ # compute Kappa and Rho F_S0[F_S0 > F_MAX] = F_MAX F_R2[F_R2 > F_MAX] = F_MAX norm_weights = np.abs( np.squeeze(utils.unmask(wtsZ, mask)[t2s != 0]**2.)) kappas[i_comp] = np.average(F_R2, weights=norm_weights) rhos[i_comp] = np.average(F_S0, weights=norm_weights) # tabulate component values comptable = np.vstack([kappas, rhos, varex, varex_norm]).T if reindex: # re-index all components in Kappa order sort_idx = comptable[:, 0].argsort()[::-1] comptable = comptable[sort_idx, :] mmix_new = mmix[:, sort_idx] betas = betas[..., sort_idx] pred_R2_maps = pred_R2_maps[:, :, sort_idx] pred_S0_maps = pred_S0_maps[:, :, sort_idx] F_S0_maps = F_S0_maps[:, sort_idx] F_R2_maps = F_R2_maps[:, sort_idx] Z_maps = Z_maps[:, sort_idx] WTS = WTS[:, sort_idx] PSC = PSC[:, sort_idx] tsoc_B = tsoc_B[:, sort_idx] tsoc_Babs = tsoc_Babs[:, sort_idx] else: mmix_new = mmix if verbose: # Echo-specific weight maps for each of the ICA components. io.filewrite(betas, op.join(out_dir, '{0}betas_catd.nii'.format(label)), ref_img) # Echo-specific maps of predicted values for R2 and S0 models for each # component. io.filewrite(utils.unmask(pred_R2_maps, mask), op.join(out_dir, '{0}R2_pred.nii'.format(label)), ref_img) io.filewrite(utils.unmask(pred_S0_maps, mask), op.join(out_dir, '{0}S0_pred.nii'.format(label)), ref_img) # Weight maps used to average metrics across voxels io.filewrite(utils.unmask(Z_maps**2., mask), op.join(out_dir, '{0}metric_weights.nii'.format(label)), ref_img) comptable = pd.DataFrame(comptable, columns=[ 'kappa', 'rho', 'variance explained', 'normalized variance explained' ]) comptable.index.name = 'component' # full selection including clustering criteria seldict = None if full_sel: LGR.info('Performing spatial clustering of components') csize = np.max([int(n_voxels * 0.0005) + 5, 20]) LGR.debug('Using minimum cluster size: {}'.format(csize)) for i_comp in range(n_components): # Cluster-extent threshold and binarize F-maps ccimg = io.new_nii_like( ref_img, np.squeeze(utils.unmask(F_R2_maps[:, i_comp], t2s != 0))) F_R2_clmaps[:, i_comp] = utils.threshold_map(ccimg, min_cluster_size=csize, threshold=fmin, mask=mask, binarize=True) countsigFR2 = F_R2_clmaps[:, i_comp].sum() ccimg = io.new_nii_like( ref_img, np.squeeze(utils.unmask(F_S0_maps[:, i_comp], t2s != 0))) F_S0_clmaps[:, i_comp] = utils.threshold_map(ccimg, min_cluster_size=csize, threshold=fmin, mask=mask, binarize=True) countsigFS0 = F_S0_clmaps[:, i_comp].sum() # Cluster-extent threshold and binarize Z-maps with CDT of p < 0.05 ccimg = io.new_nii_like( ref_img, np.squeeze(utils.unmask(Z_maps[:, i_comp], t2s != 0))) Z_clmaps[:, i_comp] = utils.threshold_map(ccimg, min_cluster_size=csize, threshold=1.95, mask=mask, binarize=True) # Cluster-extent threshold and binarize ranked signal-change map ccimg = io.new_nii_like( ref_img, utils.unmask(stats.rankdata(tsoc_Babs[:, i_comp]), t2s != 0)) Br_R2_clmaps[:, i_comp] = utils.threshold_map( ccimg, min_cluster_size=csize, threshold=(max(tsoc_Babs.shape) - countsigFR2), mask=mask, binarize=True) Br_S0_clmaps[:, i_comp] = utils.threshold_map( ccimg, min_cluster_size=csize, threshold=(max(tsoc_Babs.shape) - countsigFS0), mask=mask, binarize=True) seldict = {} selvars = [ 'WTS', 'tsoc_B', 'PSC', 'Z_maps', 'F_R2_maps', 'F_S0_maps', 'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', 'Br_R2_clmaps', 'Br_S0_clmaps' ] for vv in selvars: seldict[vv] = eval(vv) return seldict, comptable, betas, mmix_new
def threshold_to_match(maps, n_sig_voxels, mask, ref_img, csize=None): """Cluster-extent threshold a map to target number of significant voxels. Resulting maps have roughly the requested number of significant voxels, after cluster-extent thresholding. Parameters ---------- maps : (M x C) array_like Statistical maps to be thresholded. n_sig_voxels : (C) array_like Number of significant voxels to threshold to, for each map in maps. mask : (S) array_like Binary mask. ref_img : img_like Reference image to convert to niimgs with. csize : :obj:`int` or :obj:`None`, optional Minimum cluster size. If None, standard thresholding (non-cluster-extent) will be done. Default is None. Returns ------- clmaps : (S x C) array_like Cluster-extent thresholded and binarized maps. """ assert maps.shape[1] == n_sig_voxels.shape[0] n_voxels, n_components = maps.shape abs_maps = np.abs(maps) if csize is None: csize = np.max([int(n_voxels * 0.0005) + 5, 20]) else: csize = int(csize) clmaps = np.zeros([n_voxels, n_components], bool) for i_comp in range(n_components): # Initial cluster-defining threshold is defined based on the number # of significant voxels from the F-statistic maps. This threshold # will be relaxed until the number of significant voxels from both # maps is roughly equal. ccimg = io.new_nii_like(ref_img, utils.unmask(stats.rankdata(abs_maps[:, i_comp]), mask)) step = int(n_sig_voxels[i_comp] / 10) rank_thresh = n_voxels - n_sig_voxels[i_comp] while True: clmap = utils.threshold_map( ccimg, min_cluster_size=csize, threshold=rank_thresh, mask=mask, binarize=True, ) if rank_thresh <= 0: # all voxels significant break diff = n_sig_voxels[i_comp] - clmap.sum() if diff < 0 or clmap.sum() == 0: rank_thresh += step clmap = utils.threshold_map( ccimg, min_cluster_size=csize, threshold=rank_thresh, mask=mask, binarize=True, ) break else: rank_thresh -= step clmaps[:, i_comp] = clmap return clmaps