Пример #1
0
def tedpca(catd,
           OCcatd,
           combmode,
           mask,
           t2s,
           t2sG,
           stabilize,
           ref_img,
           tes,
           kdaw,
           rdaw,
           ste=0,
           wvpca=False):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input functional data
    OCcatd : (S x T) array_like
        Optimally-combined time series data
    combmode : {'t2s', 'ste'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'ste' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    stabilize : :obj:`bool`
        Whether to attempt to stabilize convergence of ICA by returning
        dimensionally-reduced data from PCA and component selection.
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    tes : :obj:`list`
        List of echo times associated with `catd`, in milliseconds
    kdaw : :obj:`float`
        Dimensionality augmentation weight for Kappa calculations
    rdaw : :obj:`float`
        Dimensionality augmentation weight for Rho calculations
    ste : :obj:`int` or :obj:`list` of :obj:`int`, optional
        Which echos to use in PCA. Values -1 and 0 are special, where a value
        of -1 will indicate using all the echos and 0 will indicate using the
        optimal combination of the echos. A list can be provided to indicate
        a subset of echos. Default: 0
    wvpca : :obj:`bool`, optional
        Whether to apply wavelet denoising to data. Default: False

    Returns
    -------
    n_components : :obj:`int`
        Number of components retained from PCA decomposition
    dd : (S x T) :obj:`numpy.ndarray`
        Dimensionally reduced optimally combined functional data

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.

    Outputs:

    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    pcastate.pkl              Values from PCA results.
    comp_table_pca.txt        PCA component table.
    mepca_mix.1D              PCA mixing matrix.
    ======================    =================================================
    """

    n_samp, n_echos, n_vols = catd.shape
    ste = np.array([int(ee) for ee in str(ste).split(',')])

    if len(ste) == 1 and ste[0] == -1:
        LGR.info('Computing PCA of optimally combined multi-echo data')
        d = OCcatd[utils.make_min_mask(OCcatd[:,
                                              np.newaxis, :])][:,
                                                               np.newaxis, :]
    elif len(ste) == 1 and ste[0] == 0:
        LGR.info('Computing PCA of spatially concatenated multi-echo data')
        d = catd[mask].astype('float64')
    else:
        LGR.info('Computing PCA of echo #%s' %
                 ','.join([str(ee) for ee in ste]))
        d = np.stack([catd[mask, ee] for ee in ste - 1],
                     axis=1).astype('float64')

    eim = np.squeeze(eimask(d))
    d = np.squeeze(d[eim])

    dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T  # var normalize ts
    dz = (dz - dz.mean()) / dz.std()  # var normalize everything

    if wvpca:
        dz, cAl = dwtmat(dz)

    if not op.exists('pcastate.pkl'):
        voxel_comp_weights, varex, comp_ts = run_svd(dz)

        # actual variance explained (normalized)
        varex_norm = varex / varex.sum()
        eigenvalue_elbow = getelbow(varex_norm, return_val=True)

        diff_varex_norm = np.abs(np.diff(varex_norm))
        lower_diff_varex_norm = diff_varex_norm[(len(diff_varex_norm) // 2):]
        varex_norm_thr = np.mean(
            [lower_diff_varex_norm.max(),
             diff_varex_norm.min()])
        varex_norm_min = varex_norm[
            (len(diff_varex_norm) // 2) + np.arange(len(lower_diff_varex_norm))
            [lower_diff_varex_norm >= varex_norm_thr][0] + 1]
        varex_norm_cum = np.cumsum(varex_norm)

        # Compute K and Rho for PCA comps
        eimum = np.atleast_2d(eim)
        eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
        eimum = eimum.prod(axis=1)
        o = np.zeros((mask.shape[0], *eimum.shape[1:]))
        o[mask] = eimum
        eimum = np.squeeze(o).astype(bool)

        vTmix = comp_ts.T
        vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T
        LGR.info('Making initial component selection guess from PCA results')
        _, ct_df, betasv, v_T = model.fitmodels_direct(catd,
                                                       comp_ts.T,
                                                       eimum,
                                                       t2s,
                                                       t2sG,
                                                       tes,
                                                       combmode,
                                                       ref_img,
                                                       mmixN=vTmixN,
                                                       full_sel=False)
        # varex_norm overrides normalized varex computed by fitmodels_direct
        ct_df['normalized variance explained'] = varex_norm

        # Save state
        fname = op.abspath('pcastate.pkl')
        LGR.info('Saving PCA results to: {}'.format(fname))
        pcastate = {
            'voxel_comp_weights': voxel_comp_weights,
            'varex': varex,
            'comp_ts': comp_ts,
            'comptable': ct_df,
            'eigenvalue_elbow': eigenvalue_elbow,
            'varex_norm_min': varex_norm_min,
            'varex_norm_cum': varex_norm_cum
        }
        try:
            with open(fname, 'wb') as handle:
                pickle.dump(pcastate, handle)
        except TypeError:
            LGR.warning('Could not save PCA solution')

    else:  # if loading existing state
        LGR.info('Loading PCA from: pcastate.pkl')
        with open('pcastate.pkl', 'rb') as handle:
            pcastate = pickle.load(handle)
        voxel_comp_weights, varex = pcastate['voxel_comp_weights'], pcastate[
            'varex']
        comp_ts = pcastate['comp_ts']
        ct_df = pcastate['comptable']
        eigenvalue_elbow = pcastate['eigenvalue_elbow']
        varex_norm_min = pcastate['varex_norm_min']
        varex_norm_cum = pcastate['varex_norm_cum']

    np.savetxt('mepca_mix.1D', comp_ts.T)

    # write component maps to 4D image
    comp_maps = np.zeros((OCcatd.shape[0], comp_ts.shape[0]))
    for i_comp in range(comp_ts.shape[0]):
        temp_comp_ts = comp_ts[i_comp, :][:, None]
        comp_map = utils.unmask(
            model.computefeats2(OCcatd, temp_comp_ts, mask), mask)
        comp_maps[:, i_comp] = np.squeeze(comp_map)
    io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img)

    fmin, fmid, fmax = utils.getfbounds(n_echos)
    kappa_thr = np.average(sorted(
        [fmin, getelbow(ct_df['kappa'], return_val=True) / 2, fmid]),
                           weights=[kdaw, 1, 1])
    rho_thr = np.average(sorted(
        [fmin, getelbow_cons(ct_df['rho'], return_val=True) / 2, fmid]),
                         weights=[rdaw, 1, 1])
    if int(kdaw) == -1:
        lim_idx = utils.andb([ct_df['kappa'] < fmid,
                              ct_df['kappa'] > fmin]) == 2
        kappa_lim = ct_df.loc[lim_idx, 'kappa'].values
        kappa_thr = kappa_lim[getelbow(kappa_lim)]

        lim_idx = utils.andb([ct_df['rho'] < fmid, ct_df['rho'] > fmin]) == 2
        rho_lim = ct_df.loc[lim_idx, 'rho'].values
        rho_thr = rho_lim[getelbow(rho_lim)]
        stabilize = True
    elif int(rdaw) == -1:
        lim_idx = utils.andb([ct_df['rho'] < fmid, ct_df['rho'] > fmin]) == 2
        rho_lim = ct_df.loc[lim_idx, 'rho'].values
        rho_thr = rho_lim[getelbow(rho_lim)]

    # Add new columns to comptable for classification
    ct_df['classification'] = 'accepted'
    ct_df['rationale'] = ''

    # Reject if low Kappa, Rho, and variance explained
    is_lowk = ct_df['kappa'] <= kappa_thr
    is_lowr = ct_df['rho'] <= rho_thr
    is_lowe = ct_df['normalized variance explained'] <= eigenvalue_elbow
    is_lowkre = is_lowk & is_lowr & is_lowe
    ct_df.loc[is_lowkre, 'classification'] = 'rejected'
    ct_df.loc[is_lowkre, 'rationale'] += 'low rho, kappa, and varex;'

    # Reject if low variance explained
    is_lows = ct_df['normalized variance explained'] <= varex_norm_min
    ct_df.loc[is_lows, 'classification'] = 'rejected'
    ct_df.loc[is_lows, 'rationale'] += 'low variance explained;'

    # Reject if Kappa over limit
    is_fmax1 = ct_df['kappa'] == F_MAX
    ct_df.loc[is_fmax1, 'classification'] = 'rejected'
    ct_df.loc[is_fmax1, 'rationale'] += 'kappa equals fmax;'

    # Reject if Rho over limit
    is_fmax2 = ct_df['rho'] == F_MAX
    ct_df.loc[is_fmax2, 'classification'] = 'rejected'
    ct_df.loc[is_fmax2, 'rationale'] += 'rho equals fmax;'

    if stabilize:
        temp7 = varex_norm_cum >= 0.95
        ct_df.loc[temp7, 'classification'] = 'rejected'
        ct_df.loc[temp7, 'rationale'] += 'cumulative var. explained above 95%;'
        under_fmin1 = ct_df['kappa'] <= fmin
        ct_df.loc[under_fmin1, 'classification'] = 'rejected'
        ct_df.loc[under_fmin1, 'rationale'] += 'kappa below fmin;'
        under_fmin2 = ct_df['rho'] <= fmin
        ct_df.loc[under_fmin2, 'classification'] = 'rejected'
        ct_df.loc[under_fmin2, 'rationale'] += 'rho below fmin;'

    ct_df.to_csv('comp_table_pca.txt',
                 sep='\t',
                 index=True,
                 index_label='component',
                 float_format='%.6f')

    sel_idx = ct_df['classification'] == 'accepted'
    n_components = np.sum(sel_idx)
    voxel_kept_comp_weighted = (voxel_comp_weights[:, sel_idx] *
                                varex[None, sel_idx])
    kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[sel_idx, :])

    if wvpca:
        kept_data = idwtmat(kept_data, cAl)

    LGR.info('Selected {0} components with Kappa threshold: {1:.02f}, '
             'Rho threshold: {2:.02f}'.format(n_components, kappa_thr,
                                              rho_thr))

    kept_data = stats.zscore(kept_data,
                             axis=1)  # variance normalize timeseries
    kept_data = stats.zscore(kept_data,
                             axis=None)  # variance normalize everything

    return n_components, kept_data
Пример #2
0
def tedpca(catd, OCcatd, combmode, mask, t2s, t2sG, stabilize,
           ref_img, tes, kdaw, rdaw, ste=0, mlepca=True):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input functional data
    OCcatd : (S x T) array_like
        Optimally-combined time series data
    combmode : {'t2s', 'ste'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'ste' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    stabilize : bool
        Whether to attempt to stabilize convergence of ICA by returning
        dimensionally-reduced data from PCA and component selection.
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    tes : list
        List of echo times associated with `catd`, in milliseconds
    kdaw : float
        Dimensionality augmentation weight for Kappa calculations
    rdaw : float
        Dimensionality augmentation weight for Rho calculations
    ste : int or list-of-int, optional
        Which echos to use in PCA. Values -1 and 0 are special, where a value
        of -1 will indicate using all the echos and 0 will indicate using the
        optimal combination of the echos. A list can be provided to indicate
        a subset of echos. Default: 0
    mlepca : bool, optional
        Whether to use the method originally explained in Minka, NIPS 2000 for
        guessing PCA dimensionality instead of a traditional SVD. Default: True

    Returns
    -------
    n_components : int
        Number of components retained from PCA decomposition
    dd : (S x E x T) :obj:`numpy.ndarray`
        Dimensionally-reduced functional data

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.
    """

    n_samp, n_echos, n_vols = catd.shape
    ste = np.array([int(ee) for ee in str(ste).split(',')])

    if len(ste) == 1 and ste[0] == -1:
        LGR.info('Computing PCA of optimally combined multi-echo data')
        d = OCcatd[utils.make_min_mask(OCcatd[:, np.newaxis, :])][:, np.newaxis, :]
    elif len(ste) == 1 and ste[0] == 0:
        LGR.info('Computing PCA of spatially concatenated multi-echo data')
        d = catd[mask].astype('float64')
    else:
        LGR.info('Computing PCA of echo #%s' % ','.join([str(ee) for ee in ste]))
        d = np.stack([catd[mask, ee] for ee in ste - 1], axis=1).astype('float64')

    eim = np.squeeze(eimask(d))
    d = np.squeeze(d[eim])

    dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T  # var normalize ts
    dz = (dz - dz.mean()) / dz.std()  # var normalize everything

    if not op.exists('pcastate.pkl'):
        # do PC dimension selection and get eigenvalue cutoff
        if mlepca:
            from sklearn.decomposition import PCA
            ppca = PCA(n_components='mle', svd_solver='full')
            ppca.fit(dz)
            v = ppca.components_
            s = ppca.explained_variance_
            u = np.dot(np.dot(dz, v.T), np.diag(1. / s))
        else:
            u, s, v = np.linalg.svd(dz, full_matrices=0)

        # actual variance explained (normalized)
        sp = s / s.sum()
        eigelb = getelbow_mod(sp, val=True)

        spdif = np.abs(np.diff(sp))
        spdifh = spdif[(len(spdif)//2):]
        spdthr = np.mean([spdifh.max(), spdif.min()])
        spmin = sp[(len(spdif)//2) + np.arange(len(spdifh))[spdifh >= spdthr][0] + 1]
        spcum = np.cumsum(sp)

        # Compute K and Rho for PCA comps
        eimum = np.atleast_2d(eim)
        eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
        eimum = eimum.prod(axis=1)
        o = np.zeros((mask.shape[0], *eimum.shape[1:]))
        o[mask] = eimum
        eimum = np.squeeze(o).astype(bool)

        vTmix = v.T
        vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T
        LGR.info('Making initial component selection guess from PCA results')
        _, ctb, betasv, v_T = model.fitmodels_direct(catd, v.T, eimum, t2s, t2sG,
                                                     tes, combmode, ref_img,
                                                     mmixN=vTmixN, full_sel=False)
        ctb = ctb[ctb[:, 0].argsort(), :]
        ctb = np.vstack([ctb.T[:3], sp]).T

        # Save state
        fname = op.abspath('pcastate.pkl')
        LGR.info('Saving PCA results to: {}'.format(fname))
        pcastate = {'u': u, 's': s, 'v': v, 'ctb': ctb,
                    'eigelb': eigelb, 'spmin': spmin, 'spcum': spcum}
        try:
            with open(fname, 'wb') as handle:
                pickle.dump(pcastate, handle)
        except TypeError:
            LGR.warning('Could not save PCA solution')

    else:  # if loading existing state
        LGR.info('Loading PCA from: {}'.format('pcastate.pkl'))
        with open('pcastate.pkl', 'rb') as handle:
            pcastate = pickle.load(handle)
        u, s, v = pcastate['u'], pcastate['s'], pcastate['v']
        ctb, eigelb = pcastate['ctb'], pcastate['eigelb']
        spmin, spcum = pcastate['spmin'], pcastate['spcum']

    np.savetxt('comp_table_pca.txt', ctb[ctb[:, 1].argsort(), :][::-1])
    np.savetxt('mepca_mix.1D', v[ctb[:, 1].argsort()[::-1], :].T)

    kappas = ctb[ctb[:, 1].argsort(), 1]
    rhos = ctb[ctb[:, 2].argsort(), 2]
    fmin, fmid, fmax = utils.getfbounds(n_echos)
    kappa_thr = np.average(sorted([fmin, getelbow_mod(kappas, val=True)/2, fmid]),
                           weights=[kdaw, 1, 1])
    rho_thr = np.average(sorted([fmin, getelbow_cons(rhos, val=True)/2, fmid]),
                         weights=[rdaw, 1, 1])
    if int(kdaw) == -1:
        kappas_lim = kappas[utils.andb([kappas < fmid, kappas > fmin]) == 2]
        kappa_thr = kappas_lim[getelbow_mod(kappas_lim)]
        rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2]
        rho_thr = rhos_lim[getelbow_mod(rhos_lim)]
        stabilize = True
    if int(kdaw) != -1 and int(rdaw) == -1:
        rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2]
        rho_thr = rhos_lim[getelbow_mod(rhos_lim)]

    is_hik = np.array(ctb[:, 1] > kappa_thr, dtype=np.int)
    is_hir = np.array(ctb[:, 2] > rho_thr, dtype=np.int)
    is_hie = np.array(ctb[:, 3] > eigelb, dtype=np.int)
    is_his = np.array(ctb[:, 3] > spmin, dtype=np.int)
    is_not_fmax1 = np.array(ctb[:, 1] != F_MAX, dtype=np.int)
    is_not_fmax2 = np.array(ctb[:, 2] != F_MAX, dtype=np.int)
    pcscore = (is_hik + is_hir + is_hie) * is_his * is_not_fmax1 * is_not_fmax2
    if stabilize:
        temp7 = np.array(spcum < 0.95, dtype=np.int)
        temp8 = np.array(ctb[:, 2] > fmin, dtype=np.int)
        temp9 = np.array(ctb[:, 1] > fmin, dtype=np.int)
        pcscore = pcscore * temp7 * temp8 * temp9

    pcsel = pcscore > 0
    dd = u.dot(np.diag(s*np.array(pcsel, dtype=np.int))).dot(v)

    n_components = s[pcsel].shape[0]
    LGR.info('Selected {0} components with Kappa threshold: {1:.02f}, '
             'Rho threshold: {2:.02f}'.format(n_components, kappa_thr, rho_thr))

    dd = stats.zscore(dd.T, axis=0).T  # variance normalize timeseries
    dd = stats.zscore(dd, axis=None)  # variance normalize everything

    return n_components, dd
Пример #3
0
def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG,
           ref_img, tes, algorithm='mle', source_tes=-1, kdaw=10., rdaw=1.,
           out_dir='.', verbose=False, low_mem=False):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    data_cat : (S x E x T) array_like
        Input functional data
    data_oc : (S x T) array_like
        Optimally combined time series data
    combmode : {'t2s', 'paid'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'paid' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    t2s : (S,) array_like
        Map of voxel-wise T2* estimates.
    t2sG : (S,) array_like
        Map of voxel-wise T2* estimates.
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    tes : :obj:`list`
        List of echo times associated with `data_cat`, in milliseconds
    algorithm : {'mle', 'kundu', 'kundu-stabilize'}, optional
        Method with which to select components in TEDPCA. Default is 'mle'.
    source_tes : :obj:`int` or :obj:`list` of :obj:`int`, optional
        Which echos to use in PCA. Values -1 and 0 are special, where a value
        of -1 will indicate using the optimal combination of the echos
        and 0  will indicate using all the echos. A list can be provided
        to indicate a subset of echos.
        Default: -1
    kdaw : :obj:`float`, optional
        Dimensionality augmentation weight for Kappa calculations. Must be a
        non-negative float, or -1 (a special value). Default is 10.
    rdaw : :obj:`float`, optional
        Dimensionality augmentation weight for Rho calculations. Must be a
        non-negative float, or -1 (a special value). Default is 1.
    out_dir : :obj:`str`, optional
        Output directory.
    verbose : :obj:`bool`, optional
        Whether to output files from fitmodels_direct or not. Default: False
    low_mem : :obj:`bool`, optional
        Whether to use incremental PCA (for low-memory systems) or not.
        Default: False

    Returns
    -------
    kept_data : (S x T) :obj:`numpy.ndarray`
        Dimensionally reduced optimally combined functional data
    n_components : :obj:`int`
        Number of components retained from PCA decomposition

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.

    Outputs:

    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    pcastate.pkl              Values from PCA results.
    comp_table_pca.txt        PCA component table.
    mepca_mix.1D              PCA mixing matrix.
    ======================    =================================================
    """
    if low_mem and algorithm == 'mle':
        LGR.warning('Low memory option is not compatible with MLE '
                    'dimensionality estimation. Switching to Kundu decision '
                    'tree.')
        algorithm = 'kundu'

    n_samp, n_echos, n_vols = data_cat.shape
    source_tes = np.array([int(ee) for ee in str(source_tes).split(',')])

    if len(source_tes) == 1 and source_tes[0] == -1:
        LGR.info('Computing PCA of optimally combined multi-echo data')
        data = data_oc[mask, :][:, np.newaxis, :]
    elif len(source_tes) == 1 and source_tes[0] == 0:
        LGR.info('Computing PCA of spatially concatenated multi-echo data')
        data = data_cat[mask, ...]
    else:
        LGR.info('Computing PCA of echo #{0}'.format(','.join([str(ee) for ee in source_tes])))
        data = np.stack([data_cat[mask, ee, :] for ee in source_tes - 1], axis=1)

    eim = np.squeeze(eimask(data))
    data = np.squeeze(data[eim])

    data_z = ((data.T - data.T.mean(axis=0)) / data.T.std(axis=0)).T  # var normalize ts
    data_z = (data_z - data_z.mean()) / data_z.std()  # var normalize everything

    if algorithm == 'mle':
        voxel_comp_weights, varex, varex_norm, comp_ts = run_mlepca(data_z)
    elif low_mem:
        voxel_comp_weights, varex, comp_ts = low_mem_pca(data_z)
        varex_norm = varex / varex.sum()
    else:
        ppca = PCA(copy=False, n_components=(n_vols - 1))
        ppca.fit(data_z)
        comp_ts = ppca.components_.T
        varex = ppca.explained_variance_
        voxel_comp_weights = np.dot(np.dot(data_z, comp_ts),
                                    np.diag(1. / varex))
        varex_norm = varex / varex.sum()

    # Compute Kappa and Rho for PCA comps
    eimum = np.atleast_2d(eim)
    eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
    eimum = eimum.prod(axis=1)
    o = np.zeros((mask.shape[0], *eimum.shape[1:]))
    o[mask, ...] = eimum
    eimum = np.squeeze(o).astype(bool)

    # Normalize each component's time series
    vTmixN = stats.zscore(comp_ts, axis=0)
    comptable, _, _, _ = metrics.dependence_metrics(
                data_cat, data_oc, comp_ts, t2s, tes, ref_img,
                reindex=False, mmixN=vTmixN, algorithm=None,
                label='mepca_', out_dir=out_dir, verbose=verbose)

    # varex_norm from PCA overrides varex_norm from dependence_metrics,
    # but we retain the original
    comptable['estimated normalized variance explained'] = \
        comptable['normalized variance explained']
    comptable['normalized variance explained'] = varex_norm

    np.savetxt('mepca_mix.1D', comp_ts)

    # write component maps to 4D image
    comp_maps = np.zeros((data_oc.shape[0], comp_ts.shape[1]))
    for i_comp in range(comp_ts.shape[1]):
        temp_comp_ts = comp_ts[:, i_comp][:, None]
        comp_map = utils.unmask(computefeats2(data_oc, temp_comp_ts, mask), mask)
        comp_maps[:, i_comp] = np.squeeze(comp_map)
    io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img)

    # Select components using decision tree
    if algorithm == 'kundu':
        comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=False)
    elif algorithm == 'kundu-stabilize':
        comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=True)
    elif algorithm == 'mle':
        LGR.info('Selected {0} components with MLE dimensionality '
                 'detection'.format(comptable.shape[0]))
        comptable['classification'] = 'accepted'
        comptable['rationale'] = ''

    comptable.to_csv('comp_table_pca.txt', sep='\t', index=True,
                     index_label='component', float_format='%.6f')

    acc = comptable[comptable.classification == 'accepted'].index.values
    n_components = acc.size
    voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] *
                                varex[None, acc])
    kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T)

    kept_data = stats.zscore(kept_data, axis=1)  # variance normalize time series
    kept_data = stats.zscore(kept_data, axis=None)  # variance normalize everything

    return kept_data, n_components
Пример #4
0
def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG,
           ref_img, tes, algorithm='mdl', source_tes=-1, kdaw=10., rdaw=1.,
           out_dir='.', verbose=False, low_mem=False):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    data_cat : (S x E x T) array_like
        Input functional data
    data_oc : (S x T) array_like
        Optimally combined time series data
    combmode : {'t2s', 'paid'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'paid' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    t2s : (S,) array_like
        Map of voxel-wise T2* estimates.
    t2sG : (S,) array_like
        Map of voxel-wise T2* estimates.
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    tes : :obj:`list`
        List of echo times associated with `data_cat`, in milliseconds
    algorithm : {'mle', 'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic'}, optional
        Method with which to select components in TEDPCA. Default is 'mdl'. PCA
        decomposition with the mdl, kic and aic options are based on a Moving Average
        (stationary Gaussian) process and are ordered from most to least aggresive.
        See (Li et al., 2007).
    source_tes : :obj:`int` or :obj:`list` of :obj:`int`, optional
        Which echos to use in PCA. Values -1 and 0 are special, where a value
        of -1 will indicate using the optimal combination of the echos
        and 0  will indicate using all the echos. A list can be provided
        to indicate a subset of echos.
        Default: -1
    kdaw : :obj:`float`, optional
        Dimensionality augmentation weight for Kappa calculations. Must be a
        non-negative float, or -1 (a special value). Default is 10.
    rdaw : :obj:`float`, optional
        Dimensionality augmentation weight for Rho calculations. Must be a
        non-negative float, or -1 (a special value). Default is 1.
    out_dir : :obj:`str`, optional
        Output directory.
    verbose : :obj:`bool`, optional
        Whether to output files from fitmodels_direct or not. Default: False
    low_mem : :obj:`bool`, optional
        Whether to use incremental PCA (for low-memory systems) or not.
        Default: False

    Returns
    -------
    kept_data : (S x T) :obj:`numpy.ndarray`
        Dimensionally reduced optimally combined functional data
    n_components : :obj:`int`
        Number of components retained from PCA decomposition

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.

    Outputs:

    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    pca_decomposition.json    PCA component table.
    pca_mixing.tsv            PCA mixing matrix.
    pca_components.nii.gz     Component weight maps.
    ======================    =================================================
    """
    if low_mem and algorithm == 'mle':
        LGR.warning('Low memory option is not compatible with MLE '
                    'dimensionality estimation. Switching to Kundu decision '
                    'tree.')
        algorithm = 'kundu'

    if algorithm == 'mle':
        alg_str = "using MLE dimensionality estimation (Minka, 2001)"
        RefLGR.info("Minka, T. P. (2001). Automatic choice of dimensionality "
                    "for PCA. In Advances in neural information processing "
                    "systems (pp. 598-604).")
    elif algorithm == 'kundu':
        alg_str = ("followed by the Kundu component selection decision "
                   "tree (Kundu et al., 2013)")
        RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., "
                    "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. "
                    "(2013). Integrated strategy for improving functional "
                    "connectivity mapping using multiecho fMRI. Proceedings "
                    "of the National Academy of Sciences, 110(40), "
                    "16187-16192.")
    elif algorithm == 'kundu-stabilize':
        alg_str = ("followed by the 'stabilized' Kundu component "
                   "selection decision tree (Kundu et al., 2013)")
        RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., "
                    "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. "
                    "(2013). Integrated strategy for improving functional "
                    "connectivity mapping using multiecho fMRI. Proceedings "
                    "of the National Academy of Sciences, 110(40), "
                    "16187-16192.")
    else:
        alg_str = ("based on the PCA component estimation with a Moving Average"
                   "(stationary Gaussian) process (Li et al., 2007)")
        RefLGR.info("Li, Y.O., Adalı, T. and Calhoun, V.D., (2007). "
                    "Estimating the number of independent components for "
                    "functional magnetic resonance imaging data. "
                    "Human brain mapping, 28(11), pp.1251-1266.")

    if source_tes == -1:
        dat_str = "the optimally combined data"
    elif source_tes == 0:
        dat_str = "the z-concatenated multi-echo data"
    else:
        dat_str = "a z-concatenated subset of echoes from the input data"

    RepLGR.info("Principal component analysis {0} was applied to "
                "{1} for dimensionality reduction.".format(alg_str, dat_str))

    n_samp, n_echos, n_vols = data_cat.shape
    source_tes = np.array([int(ee) for ee in str(source_tes).split(',')])

    if len(source_tes) == 1 and source_tes[0] == -1:
        LGR.info('Computing PCA of optimally combined multi-echo data')
        data = data_oc[mask, :][:, np.newaxis, :]
    elif len(source_tes) == 1 and source_tes[0] == 0:
        LGR.info('Computing PCA of spatially concatenated multi-echo data')
        data = data_cat[mask, ...]
    else:
        LGR.info('Computing PCA of echo #{0}'.format(','.join([str(ee) for ee in source_tes])))
        data = np.stack([data_cat[mask, ee, :] for ee in source_tes - 1], axis=1)

    eim = np.squeeze(_utils.eimask(data))
    data = np.squeeze(data[eim])

    data_z = ((data.T - data.T.mean(axis=0)) / data.T.std(axis=0)).T  # var normalize ts
    data_z = (data_z - data_z.mean()) / data_z.std()  # var normalize everything

    if algorithm in ['mdl', 'aic', 'kic']:
        data_img = io.new_nii_like(
            ref_img, utils.unmask(utils.unmask(data, eim), mask))
        mask_img = io.new_nii_like(ref_img,
                                   utils.unmask(eim, mask).astype(int))
        voxel_comp_weights, varex, varex_norm, comp_ts = ma_pca.ma_pca(
            data_img, mask_img, algorithm)
    elif algorithm == 'mle':
        voxel_comp_weights, varex, varex_norm, comp_ts = run_mlepca(data_z)
    elif low_mem:
        voxel_comp_weights, varex, comp_ts = low_mem_pca(data_z)
        varex_norm = varex / varex.sum()
    else:
        ppca = PCA(copy=False, n_components=(n_vols - 1))
        ppca.fit(data_z)
        comp_ts = ppca.components_.T
        varex = ppca.explained_variance_
        voxel_comp_weights = np.dot(np.dot(data_z, comp_ts),
                                    np.diag(1. / varex))
        varex_norm = varex / varex.sum()

    # Compute Kappa and Rho for PCA comps
    eimum = np.atleast_2d(eim)
    eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
    eimum = eimum.prod(axis=1)
    o = np.zeros((mask.shape[0], *eimum.shape[1:]))
    o[mask, ...] = eimum
    eimum = np.squeeze(o).astype(bool)

    # Normalize each component's time series
    vTmixN = stats.zscore(comp_ts, axis=0)
    comptable, _, _, _ = metrics.dependence_metrics(data_cat,
                                                    data_oc,
                                                    comp_ts,
                                                    t2s,
                                                    tes,
                                                    ref_img,
                                                    reindex=False,
                                                    mmixN=vTmixN,
                                                    algorithm=None,
                                                    label='mepca_',
                                                    out_dir=out_dir,
                                                    verbose=verbose)

    # varex_norm from PCA overrides varex_norm from dependence_metrics,
    # but we retain the original
    comptable['estimated normalized variance explained'] = \
        comptable['normalized variance explained']
    comptable['normalized variance explained'] = varex_norm

    # write component maps to 4D image
    comp_ts_z = stats.zscore(comp_ts, axis=0)
    comp_maps = utils.unmask(computefeats2(data_oc, comp_ts_z, mask), mask)
    io.filewrite(comp_maps, op.join(out_dir, 'pca_components.nii.gz'), ref_img)

    # Select components using decision tree
    if algorithm == 'kundu':
        comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=False)
    elif algorithm == 'kundu-stabilize':
        comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=True)
    elif algorithm == 'mle':
        LGR.info('Selected {0} components with MLE dimensionality '
                 'detection'.format(comptable.shape[0]))
        comptable['classification'] = 'accepted'
        comptable['rationale'] = ''

    elif algorithm in ['mdl', 'aic', 'kic']:
        LGR.info('Selected {0} components with {1} dimensionality '
                 'detection'.format(comptable.shape[0], algorithm))
        comptable['classification'] = 'accepted'
        comptable['rationale'] = ''

    # Save decomposition
    comp_names = [io.add_decomp_prefix(comp, prefix='pca', max_value=comptable.index.max())
                  for comp in comptable.index.values]

    mixing_df = pd.DataFrame(data=comp_ts, columns=comp_names)
    mixing_df.to_csv(op.join(out_dir, 'pca_mixing.tsv'), sep='\t', index=False)

    data_type = 'optimally combined data' if source_tes == -1 else 'z-concatenated data'
    comptable['Description'] = 'PCA fit to {0}.'.format(data_type)
    mmix_dict = {}
    mmix_dict['Method'] = ('Principal components analysis implemented by '
                           'sklearn. Components are sorted by variance '
                           'explained in descending order. '
                           'Component signs are flipped to best match the '
                           'data.')
    io.save_comptable(comptable, op.join(out_dir, 'pca_decomposition.json'),
                      label='pca', metadata=mmix_dict)

    acc = comptable[comptable.classification == 'accepted'].index.values
    n_components = acc.size
    voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] * varex[None, acc])
    kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T)

    kept_data = stats.zscore(kept_data, axis=1)  # variance normalize time series
    kept_data = stats.zscore(kept_data, axis=None)  # variance normalize everything

    return kept_data, n_components
Пример #5
0
def tedpca(catd,
           OCcatd,
           combmode,
           mask,
           t2s,
           t2sG,
           ref_img,
           tes,
           method='mle',
           ste=-1,
           kdaw=10.,
           rdaw=1.,
           wvpca=False,
           verbose=False):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input functional data
    OCcatd : (S x T) array_like
        Optimally combined time series data
    combmode : {'t2s', 'ste'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'ste' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    tes : :obj:`list`
        List of echo times associated with `catd`, in milliseconds
    kdaw : :obj:`float`
        Dimensionality augmentation weight for Kappa calculations
    rdaw : :obj:`float`
        Dimensionality augmentation weight for Rho calculations
    method : {'mle', 'kundu', 'kundu-stabilize'}, optional
        Method with which to select components in TEDPCA. Default is 'mle'.
    ste : :obj:`int` or :obj:`list` of :obj:`int`, optional
        Which echos to use in PCA. Values -1 and 0 are special, where a value
        of -1 will indicate using the optimal combination of the echos
        and 0  will indicate using all the echos. A list can be provided
        to indicate a subset of echos.
        Default: -1
    wvpca : :obj:`bool`, optional
        Whether to apply wavelet denoising to data. Default: False
    verbose : :obj:`bool`, optional
        Whether to output files from fitmodels_direct or not. Default: False

    Returns
    -------
    n_components : :obj:`int`
        Number of components retained from PCA decomposition
    dd : (S x T) :obj:`numpy.ndarray`
        Dimensionally reduced optimally combined functional data

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.

    Outputs:

    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    pcastate.pkl              Values from PCA results.
    comp_table_pca.txt        PCA component table.
    mepca_mix.1D              PCA mixing matrix.
    ======================    =================================================
    """

    n_samp, n_echos, n_vols = catd.shape
    ste = np.array([int(ee) for ee in str(ste).split(',')])

    if len(ste) == 1 and ste[0] == -1:
        LGR.info('Computing PCA of optimally combined multi-echo data')
        d = OCcatd[mask, :][:, np.newaxis, :]
    elif len(ste) == 1 and ste[0] == 0:
        LGR.info('Computing PCA of spatially concatenated multi-echo data')
        d = catd[mask, ...]
    else:
        LGR.info('Computing PCA of echo #%s' %
                 ','.join([str(ee) for ee in ste]))
        d = np.stack([catd[mask, ee, :] for ee in ste - 1], axis=1)

    eim = np.squeeze(eimask(d))
    d = np.squeeze(d[eim])

    dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T  # var normalize ts
    dz = (dz - dz.mean()) / dz.std()  # var normalize everything

    if wvpca:
        dz, cAl = dwtmat(dz)

    fname = op.abspath('pcastate.pkl')
    if op.exists('pcastate.pkl'):
        LGR.info('Loading PCA from: pcastate.pkl')
        with open('pcastate.pkl', 'rb') as handle:
            pcastate = pickle.load(handle)

        if pcastate['method'] != method:
            LGR.warning('Method from PCA state file ({0}) does not match '
                        'requested method ({1}).'.format(
                            pcastate['method'], method))
            state_found = False
        else:
            state_found = True
    else:
        state_found = False

    if not state_found:
        if method == 'mle':
            voxel_comp_weights, varex, comp_ts = run_mlepca(dz)
        else:
            ppca = PCA()
            ppca.fit(dz)
            comp_ts = ppca.components_
            varex = ppca.explained_variance_
            voxel_comp_weights = np.dot(np.dot(dz, comp_ts.T),
                                        np.diag(1. / varex))

        # actual variance explained (normalized)
        varex_norm = varex / varex.sum()

        # Compute K and Rho for PCA comps
        eimum = np.atleast_2d(eim)
        eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
        eimum = eimum.prod(axis=1)
        o = np.zeros((mask.shape[0], *eimum.shape[1:]))
        o[mask, ...] = eimum
        eimum = np.squeeze(o).astype(bool)

        vTmix = comp_ts.T
        vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T
        LGR.info('Making initial component selection guess from PCA results')
        _, ct_df, betasv, v_T = model.fitmodels_direct(catd,
                                                       comp_ts.T,
                                                       eimum,
                                                       t2s,
                                                       t2sG,
                                                       tes,
                                                       combmode,
                                                       ref_img,
                                                       mmixN=vTmixN,
                                                       full_sel=False,
                                                       label='mepca_',
                                                       verbose=verbose)
        # varex_norm overrides normalized varex computed by fitmodels_direct
        ct_df['normalized variance explained'] = varex_norm

        pcastate = {
            'method': method,
            'voxel_comp_weights': voxel_comp_weights,
            'varex': varex,
            'comp_ts': comp_ts,
            'comptable': ct_df
        }

        # Save state
        LGR.info('Saving PCA results to: {}'.format(fname))

        try:
            with open(fname, 'wb') as handle:
                pickle.dump(pcastate, handle)
        except TypeError:
            LGR.warning('Could not save PCA solution')
    else:  # if loading existing state
        voxel_comp_weights = pcastate['voxel_comp_weights']
        varex = pcastate['varex']
        comp_ts = pcastate['comp_ts']
        ct_df = pcastate['comptable']

    np.savetxt('mepca_mix.1D', comp_ts.T)

    # write component maps to 4D image
    comp_maps = np.zeros((OCcatd.shape[0], comp_ts.shape[0]))
    for i_comp in range(comp_ts.shape[0]):
        temp_comp_ts = comp_ts[i_comp, :][:, None]
        comp_map = utils.unmask(
            model.computefeats2(OCcatd, temp_comp_ts, mask), mask)
        comp_maps[:, i_comp] = np.squeeze(comp_map)
    io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img)

    # Add new columns to comptable for classification
    ct_df['classification'] = 'accepted'
    ct_df['rationale'] = ''

    # Select components using decision tree
    if method == 'kundu':
        ct_df = kundu_tedpca(ct_df, n_echos, kdaw, rdaw, stabilize=False)
    elif method == 'kundu-stabilize':
        ct_df = kundu_tedpca(ct_df, n_echos, kdaw, rdaw, stabilize=True)
    elif method == 'mle':
        LGR.info('Selected {0} components with MLE dimensionality '
                 'detection'.format(ct_df.shape[0]))

    ct_df.to_csv('comp_table_pca.txt',
                 sep='\t',
                 index=True,
                 index_label='component',
                 float_format='%.6f')

    sel_idx = ct_df['classification'] == 'accepted'
    n_components = np.sum(sel_idx)
    voxel_kept_comp_weighted = (voxel_comp_weights[:, sel_idx] *
                                varex[None, sel_idx])
    kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[sel_idx, :])

    if wvpca:
        kept_data = idwtmat(kept_data, cAl)

    kept_data = stats.zscore(kept_data,
                             axis=1)  # variance normalize time series
    kept_data = stats.zscore(kept_data,
                             axis=None)  # variance normalize everything

    return n_components, kept_data
Пример #6
0
def tedpca(catd,
           OCcatd,
           combmode,
           mask,
           t2s,
           t2sG,
           stabilize,
           ref_img,
           tes,
           kdaw,
           rdaw,
           ste=0,
           mlepca=True,
           wvpca=False):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input functional data
    OCcatd : (S x T) array_like
        Optimally-combined time series data
    combmode : {'t2s', 'ste'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'ste' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    stabilize : :obj:`bool`
        Whether to attempt to stabilize convergence of ICA by returning
        dimensionally-reduced data from PCA and component selection.
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    tes : :obj:`list`
        List of echo times associated with `catd`, in milliseconds
    kdaw : :obj:`float`
        Dimensionality augmentation weight for Kappa calculations
    rdaw : :obj:`float`
        Dimensionality augmentation weight for Rho calculations
    ste : :obj:`int` or :obj:`list` of :obj:`int`, optional
        Which echos to use in PCA. Values -1 and 0 are special, where a value
        of -1 will indicate using all the echos and 0 will indicate using the
        optimal combination of the echos. A list can be provided to indicate
        a subset of echos. Default: 0
    mlepca : :obj:`bool`, optional
        Whether to use the method originally explained in Minka, NIPS 2000 for
        guessing PCA dimensionality instead of a traditional SVD. Default: True
    wvpca : :obj:`bool`, optional
        Whether to apply wavelet denoising to data. Default: False

    Returns
    -------
    n_components : :obj:`int`
        Number of components retained from PCA decomposition
    dd : (S x E x T) :obj:`numpy.ndarray`
        Dimensionally-reduced functional data

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.

    Outputs:

    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    pcastate.pkl              Values from PCA results.
    comp_table_pca.txt        PCA component table.
    mepca_mix.1D              PCA mixing matrix.
    ======================    =================================================
    """

    n_samp, n_echos, n_vols = catd.shape
    ste = np.array([int(ee) for ee in str(ste).split(',')])

    if len(ste) == 1 and ste[0] == -1:
        LGR.info('Computing PCA of optimally combined multi-echo data')
        d = OCcatd[utils.make_min_mask(OCcatd[:,
                                              np.newaxis, :])][:,
                                                               np.newaxis, :]
    elif len(ste) == 1 and ste[0] == 0:
        LGR.info('Computing PCA of spatially concatenated multi-echo data')
        d = catd[mask].astype('float64')
    else:
        LGR.info('Computing PCA of echo #%s' %
                 ','.join([str(ee) for ee in ste]))
        d = np.stack([catd[mask, ee] for ee in ste - 1],
                     axis=1).astype('float64')

    eim = np.squeeze(eimask(d))
    d = np.squeeze(d[eim])

    dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T  # var normalize ts
    dz = (dz - dz.mean()) / dz.std()  # var normalize everything

    if wvpca:
        dz, cAl = dwtmat(dz)

    if not op.exists('pcastate.pkl'):
        # do PC dimension selection and get eigenvalue cutoff
        if mlepca:
            from sklearn.decomposition import PCA
            ppca = PCA(n_components='mle', svd_solver='full')
            ppca.fit(dz)
            v = ppca.components_
            s = ppca.explained_variance_
            u = np.dot(np.dot(dz, v.T), np.diag(1. / s))
        else:
            u, s, v = np.linalg.svd(dz, full_matrices=0)

        # actual variance explained (normalized)
        sp = s / s.sum()
        eigelb = getelbow_mod(sp, return_val=True)

        spdif = np.abs(np.diff(sp))
        spdifh = spdif[(len(spdif) // 2):]
        spdthr = np.mean([spdifh.max(), spdif.min()])
        spmin = sp[(len(spdif) // 2) +
                   np.arange(len(spdifh))[spdifh >= spdthr][0] + 1]
        spcum = np.cumsum(sp)

        # Compute K and Rho for PCA comps
        eimum = np.atleast_2d(eim)
        eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
        eimum = eimum.prod(axis=1)
        o = np.zeros((mask.shape[0], *eimum.shape[1:]))
        o[mask] = eimum
        eimum = np.squeeze(o).astype(bool)

        vTmix = v.T
        vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T
        LGR.info('Making initial component selection guess from PCA results')
        _, ctb, betasv, v_T = model.fitmodels_direct(catd,
                                                     v.T,
                                                     eimum,
                                                     t2s,
                                                     t2sG,
                                                     tes,
                                                     combmode,
                                                     ref_img,
                                                     mmixN=vTmixN,
                                                     full_sel=False)
        ctb = ctb[ctb[:, 0].argsort(), :]
        ctb = np.vstack([ctb.T[:3], sp]).T

        # Save state
        fname = op.abspath('pcastate.pkl')
        LGR.info('Saving PCA results to: {}'.format(fname))
        pcastate = {
            'u': u,
            's': s,
            'v': v,
            'ctb': ctb,
            'eigelb': eigelb,
            'spmin': spmin,
            'spcum': spcum
        }
        try:
            with open(fname, 'wb') as handle:
                pickle.dump(pcastate, handle)
        except TypeError:
            LGR.warning('Could not save PCA solution')

    else:  # if loading existing state
        LGR.info('Loading PCA from: pcastate.pkl')
        with open('pcastate.pkl', 'rb') as handle:
            pcastate = pickle.load(handle)
        u, s, v = pcastate['u'], pcastate['s'], pcastate['v']
        ctb, eigelb = pcastate['ctb'], pcastate['eigelb']
        spmin, spcum = pcastate['spmin'], pcastate['spcum']

    np.savetxt('comp_table_pca.txt', ctb[ctb[:, 1].argsort(), :][::-1])
    np.savetxt('mepca_mix.1D', v[ctb[:, 1].argsort()[::-1], :].T)

    kappas = ctb[ctb[:, 1].argsort(), 1]
    rhos = ctb[ctb[:, 2].argsort(), 2]
    fmin, fmid, fmax = utils.getfbounds(n_echos)
    kappa_thr = np.average(sorted(
        [fmin, getelbow_mod(kappas, return_val=True) / 2, fmid]),
                           weights=[kdaw, 1, 1])
    rho_thr = np.average(sorted(
        [fmin, getelbow_cons(rhos, return_val=True) / 2, fmid]),
                         weights=[rdaw, 1, 1])
    if int(kdaw) == -1:
        kappas_lim = kappas[utils.andb([kappas < fmid, kappas > fmin]) == 2]
        kappa_thr = kappas_lim[getelbow_mod(kappas_lim)]
        rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2]
        rho_thr = rhos_lim[getelbow_mod(rhos_lim)]
        stabilize = True
    if int(kdaw) != -1 and int(rdaw) == -1:
        rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2]
        rho_thr = rhos_lim[getelbow_mod(rhos_lim)]

    is_hik = np.array(ctb[:, 1] > kappa_thr, dtype=np.int)
    is_hir = np.array(ctb[:, 2] > rho_thr, dtype=np.int)
    is_hie = np.array(ctb[:, 3] > eigelb, dtype=np.int)
    is_his = np.array(ctb[:, 3] > spmin, dtype=np.int)
    is_not_fmax1 = np.array(ctb[:, 1] != F_MAX, dtype=np.int)
    is_not_fmax2 = np.array(ctb[:, 2] != F_MAX, dtype=np.int)
    pcscore = (is_hik + is_hir + is_hie) * is_his * is_not_fmax1 * is_not_fmax2
    if stabilize:
        temp7 = np.array(spcum < 0.95, dtype=np.int)
        temp8 = np.array(ctb[:, 2] > fmin, dtype=np.int)
        temp9 = np.array(ctb[:, 1] > fmin, dtype=np.int)
        pcscore = pcscore * temp7 * temp8 * temp9

    pcsel = pcscore > 0
    dd = u.dot(np.diag(s * np.array(pcsel, dtype=np.int))).dot(v)

    if wvpca:
        dd = idwtmat(dd, cAl)

    n_components = s[pcsel].shape[0]
    LGR.info('Selected {0} components with Kappa threshold: {1:.02f}, '
             'Rho threshold: {2:.02f}'.format(n_components, kappa_thr,
                                              rho_thr))

    dd = stats.zscore(dd.T, axis=0).T  # variance normalize timeseries
    dd = stats.zscore(dd, axis=None)  # variance normalize everything

    return n_components, dd