示例#1
0
文件: io.py 项目: TomMaullin/tedana
def write_split_ts(data, mmix, mask, acc, rej, midk, ref_img, suffix=''):
    """
    Splits `data` into denoised / noise / ignored time series and saves to disk

    Parameters
    ----------
    data : (S x T) array_like
        Input time series
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    mask : (S,) array_like
        Boolean mask array
    acc : list
        Indices of accepted (BOLD) components in `mmix`
    rej : list
        Indices of rejected (non-BOLD) components in `mmix`
    midk : list
        Indices of mid-K (questionable) components in `mmix`
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    suffix : str, optional
        Appended to name of saved files (before extension). Default: ''

    Returns
    -------
    varexpl : float
        Percent variance of data explained by extracted + retained components
    """

    # mask and de-mean data
    mdata = data[mask]
    dmdata = mdata.T - mdata.T.mean(axis=0)

    # get variance explained by retained components
    betas = model.get_coeffs(utils.unmask(dmdata.T, mask), mask, mmix)[mask]
    varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() / (dmdata**2.).sum()) * 100
    LGR.info('Variance explained by ICA decomposition: {:.02f}%'.format(varexpl))

    # create component and de-noised time series and save to files
    hikts = betas[:, acc].dot(mmix.T[acc, :])
    midkts = betas[:, midk].dot(mmix.T[midk, :])
    lowkts = betas[:, rej].dot(mmix.T[rej, :])
    dnts = data[mask] - lowkts - midkts

    if len(acc) != 0:
        fout = utils.filewrite(utils.unmask(hikts, mask), 'hik_ts_{0}'.format(suffix), ref_img)
        LGR.info('Writing high-Kappa time series: {}'.format(op.abspath(fout)))
    if len(midk) != 0:
        fout = utils.filewrite(utils.unmask(midkts, mask), 'midk_ts_{0}'.format(suffix), ref_img)
        LGR.info('Writing mid-Kappa time series: {}'.format(op.abspath(fout)))
    if len(rej) != 0:
        fout = utils.filewrite(utils.unmask(lowkts, mask), 'lowk_ts_{0}'.format(suffix), ref_img)
        LGR.info('Writing low-Kappa time series: {}'.format(op.abspath(fout)))

    fout = utils.filewrite(utils.unmask(dnts, mask), 'dn_ts_{0}'.format(suffix), ref_img)
    LGR.info('Writing denoised time series: {}'.format(op.abspath(fout)))

    return varexpl
示例#2
0
文件: io.py 项目: TomMaullin/tedana
def writeresults(ts, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img):
    """
    Denoises `ts` and saves all resulting files to disk

    Parameters
    ----------
    ts : (S x T) array_like
        Time series to denoise and save to disk
    mask : (S,) array_like
        Boolean mask array
    comptable : (N x 5) array_like
        Array with columns denoting (1) index of component, (2) Kappa score of
        component, (3) Rho score of component, (4) variance explained by
        component, and (5) normalized variance explained by component
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    acc : list
        Indices of accepted (BOLD) components in `mmix`
    rej : list
        Indices of rejected (non-BOLD) components in `mmix`
    midk : list
        Indices of mid-K (questionable) components in `mmix`
    empty : list
        Indices of ignored components in `mmix`
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    """

    fout = utils.filewrite(ts, 'ts_OC', ref_img)
    LGR.info('Writing optimally-combined time series: {}'.format(op.abspath(fout)))

    varexpl = write_split_ts(ts, mmix, mask, acc, rej, midk, ref_img, suffix='OC')

    ts_B = model.get_coeffs(ts, mask, mmix)
    fout = utils.filewrite(ts_B, 'betas_OC', ref_img)
    LGR.info('Writing full ICA coefficient feature set: {}'.format(op.abspath(fout)))

    if len(acc) != 0:
        fout = utils.filewrite(ts_B[:, acc], 'betas_hik_OC', ref_img)
        LGR.info('Writing denoised ICA coefficient feature set: {}'.format(op.abspath(fout)))
        fout = writefeats(split_ts(ts, mmix, mask, acc)[0],
                          mmix[:, acc], mask, ref_img, suffix='OC2')
        LGR.info('Writing Z-normalized spatial component maps: {}'.format(op.abspath(fout)))

    writect(comptable, n_vols, acc, rej, midk, empty, ctname='comp_table.txt',
            varexpl=varexpl)
    LGR.info('Writing component table: {}'.format(op.abspath('comp_table.txt')))
示例#3
0
文件: io.py 项目: fast-prakhar/tedana
def writefeats(data, mmix, mask, ref_img, suffix=''):
    """
    Converts `data` to component space with `mmix` and saves to disk

    Parameters
    ----------
    data : (S x T) array_like
        Input time series
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    mask : (S,) array_like
        Boolean mask array
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    suffix : str, optional
        Appended to name of saved files (before extension). Default: ''

    Returns
    -------
    fname : str
        Filepath to saved file
    """

    # write feature versions of components
    feats = utils.unmask(model.computefeats2(data, mmix, mask), mask)
    fname = utils.filewrite(feats, 'feats_{0}'.format(suffix), ref_img)

    return fname
示例#4
0
文件: io.py 项目: TomMaullin/tedana
def writefeats(data, mmix, mask, ref_img, suffix=''):
    """
    Converts `data` to component space with `mmix` and saves to disk

    Parameters
    ----------
    data : (S x T) array_like
        Input time series
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    mask : (S,) array_like
        Boolean mask array
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    suffix : str, optional
        Appended to name of saved files (before extension). Default: ''

    Returns
    -------
    fname : str
        Filepath to saved file
    """

    # write feature versions of components
    feats = utils.unmask(model.computefeats2(data, mmix, mask), mask)
    fname = utils.filewrite(feats, 'feats_{0}'.format(suffix), ref_img)

    return fname
示例#5
0
def tedana(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
           gscontrol=True, kdaw=10., rdaw=1., conv=2.5e-5, ste=-1,
           combmode='t2s', dne=False, initcost='tanh', finalcost='tanh',
           stabilize=False, fout=False, filecsdata=False, label=None,
           fixed_seed=42, debug=False, quiet=False):
    """
    Run the "canonical" TE-Dependent ANAlysis workflow.

    Parameters
    ----------
    data : :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    mixm : :obj:`str`, optional
        File containing mixing matrix. If not provided, ME-PCA and ME-ICA are
        done.
    ctab : :obj:`str`, optional
        File containing component table from which to extract pre-computed
        classifications.
    manacc : :obj:`str`, optional
        Comma separated list of manually accepted components in string form.
        Default is None.
    strict : :obj:`bool`, optional
        Ignore low-variance ambiguous components. Default is False.
    gscontrol : :obj:`bool`, optional
        Control global signal using spatial approach. Default is True.
    kdaw : :obj:`float`, optional
        Dimensionality augmentation weight (Kappa). Default is 10.
        -1 for low-dimensional ICA.
    rdaw : :obj:`float`, optional
        Dimensionality augmentation weight (Rho). Default is 1.
        -1 for low-dimensional ICA.
    conv : :obj:`float`, optional
        Convergence limit. Default is 2.5e-5.
    ste : :obj:`int`, optional
        Source TEs for models. 0 for all, -1 for optimal combination.
        Default is -1.
    combmode : {'t2s', 'ste'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser).
    dne : :obj:`bool`, optional
        Denoise each TE dataset separately. Default is False.
    initcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional
        Initial cost function for ICA. Default is 'tanh'.
    finalcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional
        Final cost function. Default is 'tanh'.
    stabilize : :obj:`bool`, optional
        Stabilize convergence by reducing dimensionality, for low quality data.
        Default is False.
    fout : :obj:`bool`, optional
        Save output TE-dependence Kappa/Rho SPMs. Default is False.
    filecsdata : :obj:`bool`, optional
        Save component selection data to file. Default is False.
    label : :obj:`str` or :obj:`None`, optional
        Label for output directory. Default is None.
    fixed_seed : :obj:`int`, optional
        Seeded value for ICA, for reproducibility.
    """

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # coerce data to samples x echos x time array
    LGR.info('Loading input data: {}'.format([op.abspath(f) for f in data]))
    catd, ref_img = utils.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    if fout:
        fout = ref_img
    else:
        fout = None

    kdaw, rdaw = float(kdaw), float(rdaw)

    if label is not None:
        out_dir = 'TED.{0}'.format(label)
    else:
        out_dir = 'TED'
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        LGR.info('Creating output directory: {}'.format(out_dir))
        os.mkdir(out_dir)
    else:
        LGR.info('Using output directory: {}'.format(out_dir))

    if mixm is not None and op.isfile(mixm):
        shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D'))
        shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm)))
    elif mixm is not None:
        raise IOError('Argument "mixm" must be an existing file.')

    if ctab is not None and op.isfile(ctab):
        shutil.copyfile(ctab, op.join(out_dir, 'comp_table.txt'))
        shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab)))
    elif ctab is not None:
        raise IOError('Argument "ctab" must be an existing file.')

    os.chdir(out_dir)

    LGR.info('Computing adapative mask')
    mask, masksum = utils.make_adaptive_mask(catd, minimum=False, getsum=True)
    LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp))

    LGR.info('Computing T2* map')
    t2s, s0, t2ss, s0s, t2sG, s0G = model.fit_decay(catd, tes, mask, masksum,
                                                    start_echo=1)

    # set a hard cap for the T2* map
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5,
                                      interpolation_method='lower')
    LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
    t2s[t2s > cap_t2s * 10] = cap_t2s
    utils.filewrite(t2s, op.join(out_dir, 't2sv'), ref_img)
    utils.filewrite(s0, op.join(out_dir, 's0v'), ref_img)
    utils.filewrite(t2ss, op.join(out_dir, 't2ss'), ref_img)
    utils.filewrite(s0s, op.join(out_dir, 's0vs'), ref_img)
    utils.filewrite(t2sG, op.join(out_dir, 't2svG'), ref_img)
    utils.filewrite(s0G, op.join(out_dir, 's0vG'), ref_img)

    # optimally combine data
    OCcatd = model.make_optcom(catd, t2sG, tes, mask, combmode)

    # regress out global signal unless explicitly not desired
    if gscontrol:
        catd, OCcatd = model.gscontrol_raw(catd, OCcatd, n_echos, ref_img)

    if mixm is None:
        n_components, dd = decomposition.tedpca(catd, OCcatd, combmode, mask, t2s, t2sG,
                                                stabilize, ref_img,
                                                tes=tes, kdaw=kdaw, rdaw=rdaw, ste=ste)
        mmix_orig = decomposition.tedica(n_components, dd, conv, fixed_seed, cost=initcost,
                                         final_cost=finalcost, verbose=debug)
        np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig)
        LGR.info('Making second component selection guess from ICA results')
        seldict, comptable, betas, mmix = model.fitmodels_direct(catd, mmix_orig,
                                                                 mask, t2s, t2sG,
                                                                 tes, combmode,
                                                                 ref_img,
                                                                 fout=fout,
                                                                 reindex=True)
        np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix)

        acc, rej, midk, empty = selection.selcomps(seldict, mmix, mask, ref_img, manacc,
                                                   n_echos, t2s, s0, strict_mode=strict,
                                                   filecsdata=filecsdata)
    else:
        LGR.info('Using supplied mixing matrix from ICA')
        mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D'))
        seldict, comptable, betas, mmix = model.fitmodels_direct(catd, mmix_orig,
                                                                 mask, t2s, t2sG,
                                                                 tes, combmode,
                                                                 ref_img,
                                                                 fout=fout)
        if ctab is None:
            acc, rej, midk, empty = selection.selcomps(seldict, mmix, mask,
                                                       ref_img, manacc,
                                                       n_echos, t2s, s0,
                                                       filecsdata=filecsdata,
                                                       strict_mode=strict)
        else:
            acc, rej, midk, empty = utils.ctabsel(ctab)

    if len(acc) == 0:
        LGR.warning('No BOLD components detected! Please check data and results!')

    utils.writeresults(OCcatd, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img)
    utils.gscontrol_mmix(OCcatd, mmix, mask, acc, rej, midk, ref_img)
    if dne:
        utils.writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img)
示例#6
0
文件: io.py 项目: fast-prakhar/tedana
def writeresults(ts, mask, comptable, mmix, n_vols, acc, rej, midk, empty,
                 ref_img):
    """
    Denoises `ts` and saves all resulting files to disk

    Parameters
    ----------
    ts : (S x T) array_like
        Time series to denoise and save to disk
    mask : (S,) array_like
        Boolean mask array
    comptable : (N x 5) array_like
        Array with columns denoting (1) index of component, (2) Kappa score of
        component, (3) Rho score of component, (4) variance explained by
        component, and (5) normalized variance explained by component
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    acc : list
        Indices of accepted (BOLD) components in `mmix`
    rej : list
        Indices of rejected (non-BOLD) components in `mmix`
    midk : list
        Indices of mid-K (questionable) components in `mmix`
    empty : list
        Indices of ignored components in `mmix`
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    """

    fout = utils.filewrite(ts, 'ts_OC', ref_img)
    LGR.info('Writing optimally-combined time series: {}'.format(
        op.abspath(fout)))

    varexpl = write_split_ts(ts,
                             mmix,
                             mask,
                             acc,
                             rej,
                             midk,
                             ref_img,
                             suffix='OC')

    ts_B = model.get_coeffs(ts, mask, mmix)
    fout = utils.filewrite(ts_B, 'betas_OC', ref_img)
    LGR.info('Writing full ICA coefficient feature set: {}'.format(
        op.abspath(fout)))

    if len(acc) != 0:
        fout = utils.filewrite(ts_B[:, acc], 'betas_hik_OC', ref_img)
        LGR.info('Writing denoised ICA coefficient feature set: {}'.format(
            op.abspath(fout)))
        fout = writefeats(split_ts(ts, mmix, mask, acc)[0],
                          mmix[:, acc],
                          mask,
                          ref_img,
                          suffix='OC2')
        LGR.info('Writing Z-normalized spatial component maps: {}'.format(
            op.abspath(fout)))

    writect(comptable,
            n_vols,
            acc,
            rej,
            midk,
            empty,
            ctname='comp_table.txt',
            varexpl=varexpl)
    LGR.info('Writing component table: {}'.format(
        op.abspath('comp_table.txt')))
示例#7
0
文件: io.py 项目: fast-prakhar/tedana
def gscontrol_mmix(OCcatd, mmix, mask, acc, rej, midk, ref_img):
    """
    Perform global signal regression.

    Parameters
    ----------
    OCcatd : (S x T) array_like
        Optimally-combined time series data
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `OCcatd`
    mask : (S,) array_like
        Boolean mask array
    acc : list
        Indices of accepted (BOLD) components in `mmix`
    rej : list
        Indices of rejected (non-BOLD) components in `mmix`
    midk : list
        Indices of mid-K (questionable) components in `mmix`
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    """

    Gmu = OCcatd.mean(axis=-1)
    Gstd = OCcatd.std(axis=-1)
    Gmask = (Gmu != 0)
    """
    Compute temporal regression
    """
    dat = (OCcatd[Gmask] - Gmu[Gmask][:, np.newaxis]) / Gstd[mask][:,
                                                                   np.newaxis]
    solG = np.linalg.lstsq(mmix, dat.T, rcond=None)[0]
    resid = dat - np.dot(solG.T, mmix.T)
    """
    Build BOLD time series without amplitudes, and save T1-like effect
    """
    bold_ts = np.dot(solG.T[:, acc], mmix[:, acc].T)
    sphis = bold_ts.min(axis=-1)
    sphis -= sphis.mean()
    utils.utils.filewrite(utils.utils.unmask(sphis, mask), 'sphis_hik',
                          ref_img)
    """
    Find the global signal based on the T1-like effect
    """
    sol = np.linalg.lstsq(np.atleast_2d(sphis).T, dat, rcond=None)
    glsig = sol[0]
    """
    T1 correct time series by regression
    """
    bold_noT1gs = bold_ts - np.dot(
        np.linalg.lstsq(glsig.T, bold_ts.T, rcond=None)[0].T, glsig)
    utils.utils.filewrite(
        utils.unmask(bold_noT1gs * Gstd[mask][:, np.newaxis], mask),
        'hik_ts_OC_T1c.nii', ref_img)
    """
    Make medn version of T1 corrected time series
    """
    utils.filewrite(
        Gmu[..., np.newaxis] + utils.unmask(
            (bold_noT1gs + resid) * Gstd[mask][:, np.newaxis], mask),
        'dn_ts_OC_T1c', ref_img)
    """
    Orthogonalize mixing matrix w.r.t. T1-GS
    """
    mmixnogs = mmix.T - np.dot(
        np.linalg.lstsq(glsig.T, mmix, rcond=None)[0].T, glsig)
    mmixnogs_mu = mmixnogs.mean(-1)
    mmixnogs_std = mmixnogs.std(-1)
    mmixnogs_norm = (mmixnogs -
                     mmixnogs_mu[:, np.newaxis]) / mmixnogs_std[:, np.newaxis]
    mmixnogs_norm = np.vstack(
        [np.atleast_2d(np.ones(max(glsig.shape))), glsig, mmixnogs_norm])
    """
    Write T1-GS corrected components and mixing matrix
    """
    sol = np.linalg.lstsq(mmixnogs_norm.T, dat.T, rcond=None)
    utils.filewrite(utils.unmask(sol[0].T[:, 2:], mask), 'betas_hik_OC_T1c',
                    ref_img)
    np.savetxt('meica_mix_T1c.1D', mmixnogs)
示例#8
0
文件: io.py 项目: fast-prakhar/tedana
def write_split_ts(data, mmix, mask, acc, rej, midk, ref_img, suffix=''):
    """
    Splits `data` into denoised / noise / ignored time series and saves to disk

    Parameters
    ----------
    data : (S x T) array_like
        Input time series
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    mask : (S,) array_like
        Boolean mask array
    acc : list
        Indices of accepted (BOLD) components in `mmix`
    rej : list
        Indices of rejected (non-BOLD) components in `mmix`
    midk : list
        Indices of mid-K (questionable) components in `mmix`
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    suffix : str, optional
        Appended to name of saved files (before extension). Default: ''

    Returns
    -------
    varexpl : float
        Percent variance of data explained by extracted + retained components
    """

    # mask and de-mean data
    mdata = data[mask]
    dmdata = mdata.T - mdata.T.mean(axis=0)

    # get variance explained by retained components
    betas = model.get_coeffs(utils.unmask(dmdata.T, mask), mask, mmix)[mask]
    varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() /
               (dmdata**2.).sum()) * 100
    LGR.info(
        'Variance explained by ICA decomposition: {:.02f}%'.format(varexpl))

    # create component and de-noised time series and save to files
    hikts = betas[:, acc].dot(mmix.T[acc, :])
    midkts = betas[:, midk].dot(mmix.T[midk, :])
    lowkts = betas[:, rej].dot(mmix.T[rej, :])
    dnts = data[mask] - lowkts - midkts

    if len(acc) != 0:
        fout = utils.filewrite(utils.unmask(hikts, mask),
                               'hik_ts_{0}'.format(suffix), ref_img)
        LGR.info('Writing high-Kappa time series: {}'.format(op.abspath(fout)))
    if len(midk) != 0:
        fout = utils.filewrite(utils.unmask(midkts, mask),
                               'midk_ts_{0}'.format(suffix), ref_img)
        LGR.info('Writing mid-Kappa time series: {}'.format(op.abspath(fout)))
    if len(rej) != 0:
        fout = utils.filewrite(utils.unmask(lowkts, mask),
                               'lowk_ts_{0}'.format(suffix), ref_img)
        LGR.info('Writing low-Kappa time series: {}'.format(op.abspath(fout)))

    fout = utils.filewrite(utils.unmask(dnts, mask),
                           'dn_ts_{0}'.format(suffix), ref_img)
    LGR.info('Writing denoised time series: {}'.format(op.abspath(fout)))

    return varexpl
示例#9
0
文件: io.py 项目: TomMaullin/tedana
def gscontrol_mmix(OCcatd, mmix, mask, acc, rej, midk, ref_img):
    """
    Perform global signal regression.

    Parameters
    ----------
    OCcatd : (S x T) array_like
        Optimally-combined time series data
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `OCcatd`
    mask : (S,) array_like
        Boolean mask array
    acc : list
        Indices of accepted (BOLD) components in `mmix`
    rej : list
        Indices of rejected (non-BOLD) components in `mmix`
    midk : list
        Indices of mid-K (questionable) components in `mmix`
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    """

    Gmu = OCcatd.mean(axis=-1)
    Gstd = OCcatd.std(axis=-1)
    Gmask = (Gmu != 0)

    """
    Compute temporal regression
    """
    dat = (OCcatd[Gmask] - Gmu[Gmask][:, np.newaxis]) / Gstd[mask][:, np.newaxis]
    solG = np.linalg.lstsq(mmix, dat.T, rcond=None)[0]
    resid = dat - np.dot(solG.T, mmix.T)

    """
    Build BOLD time series without amplitudes, and save T1-like effect
    """
    bold_ts = np.dot(solG.T[:, acc], mmix[:, acc].T)
    sphis = bold_ts.min(axis=-1)
    sphis -= sphis.mean()
    utils.utils.filewrite(utils.utils.unmask(sphis, mask), 'sphis_hik', ref_img)

    """
    Find the global signal based on the T1-like effect
    """
    sol = np.linalg.lstsq(np.atleast_2d(sphis).T, dat, rcond=None)
    glsig = sol[0]

    """
    T1 correct time series by regression
    """
    bold_noT1gs = bold_ts - np.dot(np.linalg.lstsq(glsig.T, bold_ts.T, rcond=None)[0].T, glsig)
    utils.utils.filewrite(utils.unmask(bold_noT1gs * Gstd[mask][:, np.newaxis], mask),
                          'hik_ts_OC_T1c.nii', ref_img)

    """
    Make medn version of T1 corrected time series
    """
    utils.filewrite(Gmu[..., np.newaxis] +
                    utils.unmask((bold_noT1gs+resid)*Gstd[mask][:, np.newaxis], mask),
                    'dn_ts_OC_T1c', ref_img)

    """
    Orthogonalize mixing matrix w.r.t. T1-GS
    """
    mmixnogs = mmix.T - np.dot(np.linalg.lstsq(glsig.T, mmix, rcond=None)[0].T, glsig)
    mmixnogs_mu = mmixnogs.mean(-1)
    mmixnogs_std = mmixnogs.std(-1)
    mmixnogs_norm = (mmixnogs - mmixnogs_mu[:, np.newaxis]) / mmixnogs_std[:, np.newaxis]
    mmixnogs_norm = np.vstack([np.atleast_2d(np.ones(max(glsig.shape))), glsig, mmixnogs_norm])

    """
    Write T1-GS corrected components and mixing matrix
    """
    sol = np.linalg.lstsq(mmixnogs_norm.T, dat.T, rcond=None)
    utils.filewrite(utils.unmask(sol[0].T[:, 2:], mask), 'betas_hik_OC_T1c', ref_img)
    np.savetxt('meica_mix_T1c.1D', mmixnogs)
示例#10
0
def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4):
    """
    Removes global signal from individual echo `catd` and `optcom` time series

    This function uses the spatial global signal estimation approach to
    to removal global signal out of individual echo time series datasets. The
    spatial global signal is estimated from the optimally combined data after
    detrending with a Legendre polynomial basis of `order = 0` and
    `degree = dtrank`.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input functional data
    optcom : (S x T) array_like
        Optimally-combined functional data (i.e., the output of `make_optcom`)
    n_echos : int
        Number of echos in data. Should be the same as `E` dimension of `catd`
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    dtrank : int, optional
        Specfies degree of Legendre polynomial basis function for estimating
        spatial global signal. Default: 4

    Returns
    -------
    dm_catd : (S x E x T) array_like
        Input `catd` with global signal removed from time series
    dm_optcom : (S x T) array_like
        Input `optcom` with global signal removed from time series
    """

    LGR.info('Applying amplitude-based T1 equilibration correction')

    # Legendre polynomial basis for denoising
    bounds = np.linspace(-1, 1, optcom.shape[-1])
    Lmix = np.column_stack([lpmv(0, vv, bounds) for vv in range(dtrank)])

    # compute mean, std, mask local to this function
    # inefficient, but makes this function a bit more modular
    Gmu = optcom.mean(axis=-1)  # temporal mean
    Gmask = Gmu != 0

    # find spatial global signal
    dat = optcom[Gmask] - Gmu[Gmask][:, np.newaxis]
    sol = np.linalg.lstsq(Lmix, dat.T,
                          rcond=None)[0]  # Legendre basis for detrending
    detr = dat - np.dot(sol.T, Lmix.T)[0]
    sphis = (detr).min(axis=1)
    sphis -= sphis.mean()
    utils.filewrite(utils.unmask(sphis, Gmask), 'T1gs', ref_img)

    # find time course ofc the spatial global signal
    # make basis with the Legendre basis
    glsig = np.linalg.lstsq(np.atleast_2d(sphis).T, dat, rcond=None)[0]
    glsig = stats.zscore(glsig, axis=None)
    np.savetxt('glsig.1D', glsig)
    glbase = np.hstack([Lmix, glsig.T])

    # Project global signal out of optimally combined data
    sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T, rcond=None)[0]
    tsoc_nogs = dat - np.dot(
        np.atleast_2d(sol[dtrank]).T, np.atleast_2d(
            glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis]

    utils.filewrite(optcom, 'tsoc_orig', ref_img)
    dm_optcom = utils.unmask(tsoc_nogs, Gmask)
    utils.filewrite(dm_optcom, 'tsoc_nogs', ref_img)

    # Project glbase out of each echo
    dm_catd = catd.copy()  # don't overwrite catd
    for echo in range(n_echos):
        dat = dm_catd[:, echo, :][Gmask]
        sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T, rcond=None)[0]
        e_nogs = dat - np.dot(
            np.atleast_2d(sol[dtrank]).T, np.atleast_2d(glbase.T[dtrank]))
        dm_catd[:, echo, :] = utils.unmask(e_nogs, Gmask)

    return dm_catd, dm_optcom
示例#11
0
文件: fit.py 项目: TomMaullin/tedana
def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4):
    """
    Removes global signal from individual echo `catd` and `optcom` time series

    This function uses the spatial global signal estimation approach to
    to removal global signal out of individual echo time series datasets. The
    spatial global signal is estimated from the optimally combined data after
    detrending with a Legendre polynomial basis of `order = 0` and
    `degree = dtrank`.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input functional data
    optcom : (S x T) array_like
        Optimally-combined functional data (i.e., the output of `make_optcom`)
    n_echos : int
        Number of echos in data. Should be the same as `E` dimension of `catd`
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    dtrank : int, optional
        Specfies degree of Legendre polynomial basis function for estimating
        spatial global signal. Default: 4

    Returns
    -------
    dm_catd : (S x E x T) array_like
        Input `catd` with global signal removed from time series
    dm_optcom : (S x T) array_like
        Input `optcom` with global signal removed from time series
    """

    LGR.info('Applying amplitude-based T1 equilibration correction')

    # Legendre polynomial basis for denoising
    bounds = np.linspace(-1, 1, optcom.shape[-1])
    Lmix = np.column_stack([lpmv(0, vv, bounds) for vv in range(dtrank)])

    # compute mean, std, mask local to this function
    # inefficient, but makes this function a bit more modular
    Gmu = optcom.mean(axis=-1)  # temporal mean
    Gmask = Gmu != 0

    # find spatial global signal
    dat = optcom[Gmask] - Gmu[Gmask][:, np.newaxis]
    sol = np.linalg.lstsq(Lmix, dat.T, rcond=None)[0]  # Legendre basis for detrending
    detr = dat - np.dot(sol.T, Lmix.T)[0]
    sphis = (detr).min(axis=1)
    sphis -= sphis.mean()
    utils.filewrite(utils.unmask(sphis, Gmask), 'T1gs', ref_img)

    # find time course ofc the spatial global signal
    # make basis with the Legendre basis
    glsig = np.linalg.lstsq(np.atleast_2d(sphis).T, dat, rcond=None)[0]
    glsig = stats.zscore(glsig, axis=None)
    np.savetxt('glsig.1D', glsig)
    glbase = np.hstack([Lmix, glsig.T])

    # Project global signal out of optimally combined data
    sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T, rcond=None)[0]
    tsoc_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T,
                             np.atleast_2d(glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis]

    utils.filewrite(optcom, 'tsoc_orig', ref_img)
    dm_optcom = utils.unmask(tsoc_nogs, Gmask)
    utils.filewrite(dm_optcom, 'tsoc_nogs', ref_img)

    # Project glbase out of each echo
    dm_catd = catd.copy()  # don't overwrite catd
    for echo in range(n_echos):
        dat = dm_catd[:, echo, :][Gmask]
        sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T, rcond=None)[0]
        e_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T,
                              np.atleast_2d(glbase.T[dtrank]))
        dm_catd[:, echo, :] = utils.unmask(e_nogs, Gmask)

    return dm_catd, dm_optcom
示例#12
0
def t2smap(data, tes, combmode='t2s', label=None):
    """
    Estimate T2 and S0, and optimally combine data across TEs.

    Parameters
    ----------
    data : :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    combmode : {'t2s', 'ste'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser).
    label : :obj:`str` or :obj:`None`, optional
        Label for output directory. Default is None.
    """
    if label is not None:
        suf = '_%s' % str(label)
    else:
        suf = ''
    tes, data, combmode = tes, data, combmode

    tes = [float(te) for te in tes]
    n_echos = len(tes)

    catd = utils.load_data(data, n_echos=n_echos)
    _, n_echos, _ = catd.shape

    ref_img = data[0] if isinstance(data, list) else data

    LGR.info('Computing adaptive mask')
    mask, masksum = utils.make_adaptive_mask(catd, minimum=False, getsum=True)
    utils.filewrite(masksum, 'masksum%s' % suf, ref_img, copy_header=False)

    LGR.info('Computing adaptive T2* map')
    t2s, s0, t2ss, s0vs, _, _ = model.t2sadmap(catd, tes, mask, masksum, 2)
    utils.filewrite(t2ss, 't2ss%s' % suf, ref_img, copy_header=False)
    utils.filewrite(s0vs, 's0vs%s' % suf, ref_img, copy_header=False)

    LGR.info('Computing optimal combination')
    tsoc = np.array(model.make_optcom(catd, t2s, tes, mask, combmode),
                    dtype=float)

    # Clean up numerical errors
    t2sm = t2s.copy()
    for n in (tsoc, s0, t2s, t2sm):
        np.nan_to_num(n, copy=False)

    s0[s0 < 0] = 0
    t2s[t2s < 0] = 0
    t2sm[t2sm < 0] = 0

    utils.filewrite(tsoc, 'ocv%s' % suf, ref_img, copy_header=False)
    utils.filewrite(s0, 's0v%s' % suf, ref_img, copy_header=False)
    utils.filewrite(t2s, 't2sv%s' % suf, ref_img, copy_header=False)
    utils.filewrite(t2sm, 't2svm%s' % suf, ref_img, copy_header=False)
示例#13
0
文件: io.py 项目: jdkent/tedana
def writeresults(ts, mask, comptable, mmix, n_vols, fixed_seed, acc, rej, midk,
                 empty, ref_img):
    """
    Denoises `ts` and saves all resulting files to disk

    Parameters
    ----------
    ts : (S x T) array_like
        Time series to denoise and save to disk
    mask : (S,) array_like
        Boolean mask array
    comptable : (N x 5) array_like
        Array with columns denoting (1) index of component, (2) Kappa score of
        component, (3) Rho score of component, (4) variance explained by
        component, and (5) normalized variance explained by component
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    n_vols : :obj:`int`
        Number of volumes in original time series
    fixed_seed: :obj:`int`
        Integer value used in seeding ICA
    acc : :obj:`list`
        Indices of accepted (BOLD) components in `mmix`
    rej : :obj:`list`
        Indices of rejected (non-BOLD) components in `mmix`
    midk : :obj:`list`
        Indices of mid-K (questionable) components in `mmix`
    empty : :obj:`list`
        Indices of ignored components in `mmix`
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk

    Notes
    -----
    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    ts_OC.nii                 Optimally combined 4D time series.
    hik_ts_OC.nii             High-Kappa time series. Generated by
                              :py:func:`tedana.utils.io.write_split_ts`.
    midk_ts_OC.nii            Mid-Kappa time series. Generated by
                              :py:func:`tedana.utils.io.write_split_ts`.
    low_ts_OC.nii             Low-Kappa time series. Generated by
                              :py:func:`tedana.utils.io.write_split_ts`.
    dn_ts_OC.nii              Denoised time series. Generated by
                              :py:func:`tedana.utils.io.write_split_ts`.
    betas_OC.nii              Full ICA coefficient feature set.
    betas_hik_OC.nii          Denoised ICA coefficient feature set.
    feats_OC2.nii             Z-normalized spatial component maps. Generated
                              by :py:func:`tedana.utils.io.writefeats`.
    comp_table.txt            Component table. Generated by
                              :py:func:`tedana.utils.io.writect`.
    ======================    =================================================
    """

    fout = utils.filewrite(ts, 'ts_OC', ref_img)
    LGR.info('Writing optimally-combined time series: {}'.format(
        op.abspath(fout)))

    varexpl = write_split_ts(ts,
                             mmix,
                             mask,
                             acc,
                             rej,
                             midk,
                             ref_img,
                             suffix='OC')

    ts_B = model.get_coeffs(ts, mmix, mask)
    fout = utils.filewrite(ts_B, 'betas_OC', ref_img)
    LGR.info('Writing full ICA coefficient feature set: {}'.format(
        op.abspath(fout)))

    if len(acc) != 0:
        fout = utils.filewrite(ts_B[:, acc], 'betas_hik_OC', ref_img)
        LGR.info('Writing denoised ICA coefficient feature set: {}'.format(
            op.abspath(fout)))
        fout = writefeats(split_ts(ts, mmix, mask, acc)[0],
                          mmix[:, acc],
                          mask,
                          ref_img,
                          suffix='OC2')
        LGR.info('Writing Z-normalized spatial component maps: {}'.format(
            op.abspath(fout)))

    writect(comptable,
            n_vols,
            fixed_seed,
            acc,
            rej,
            midk,
            empty,
            ctname='comp_table.txt',
            varexpl=varexpl)
    LGR.info('Writing component table: {}'.format(
        op.abspath('comp_table.txt')))
示例#14
0
文件: io.py 项目: jdkent/tedana
def gscontrol_mmix(optcom_ts, mmix, mask, acc, ref_img):
    """
    Perform global signal regression.

    Parameters
    ----------
    optcom_ts : (S x T) array_like
        Optimally combined time series data
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `optcom_ts`
    mask : (S,) array_like
        Boolean mask array
    acc : :obj:`list`
        Indices of accepted (BOLD) components in `mmix`
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk

    Notes
    -----
    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    sphis_hik.nii             T1-like effect.
    hik_ts_OC_T1c.nii         T1 corrected time series.
    dn_ts_OC_T1c.nii          Denoised version of T1 corrected time series
    betas_hik_OC_T1c.nii      T1-GS corrected components
    meica_mix_T1c.1D          T1-GS corrected mixing matrix
    ======================    =================================================
    """
    optcom_masked = optcom_ts[mask, :]
    optcom_mu = optcom_masked.mean(axis=-1)[:, np.newaxis]
    optcom_std = optcom_masked.std(axis=-1)[:, np.newaxis]
    """
    Compute temporal regression
    """
    data_norm = (optcom_masked - optcom_mu) / optcom_std
    cbetas = lstsq(mmix, data_norm.T, rcond=None)[0].T
    resid = data_norm - np.dot(cbetas, mmix.T)
    """
    Build BOLD time series without amplitudes, and save T1-like effect
    """
    bold_ts = np.dot(cbetas[:, acc], mmix[:, acc].T)
    t1_map = bold_ts.min(axis=-1)
    t1_map -= t1_map.mean()
    utils.filewrite(utils.unmask(t1_map, mask), 'sphis_hik', ref_img)
    t1_map = t1_map[:, np.newaxis]
    """
    Find the global signal based on the T1-like effect
    """
    glob_sig = lstsq(t1_map, data_norm, rcond=None)[0]
    """
    T1-correct time series by regression
    """
    bold_noT1gs = bold_ts - np.dot(
        lstsq(glob_sig.T, bold_ts.T, rcond=None)[0].T, glob_sig)
    utils.filewrite(utils.unmask(bold_noT1gs * optcom_std, mask),
                    'hik_ts_OC_T1c.nii', ref_img)
    """
    Make denoised version of T1-corrected time series
    """
    medn_ts = optcom_mu + ((bold_noT1gs + resid) * optcom_std)
    utils.filewrite(utils.unmask(medn_ts, mask), 'dn_ts_OC_T1c', ref_img)
    """
    Orthogonalize mixing matrix w.r.t. T1-GS
    """
    mmixnogs = mmix.T - np.dot(
        lstsq(glob_sig.T, mmix, rcond=None)[0].T, glob_sig)
    mmixnogs_mu = mmixnogs.mean(-1)[:, np.newaxis]
    mmixnogs_std = mmixnogs.std(-1)[:, np.newaxis]
    mmixnogs_norm = (mmixnogs - mmixnogs_mu) / mmixnogs_std
    mmixnogs_norm = np.vstack(
        [np.atleast_2d(np.ones(max(glob_sig.shape))), glob_sig, mmixnogs_norm])
    """
    Write T1-GS corrected components and mixing matrix
    """
    cbetas_norm = lstsq(mmixnogs_norm.T, data_norm.T, rcond=None)[0].T
    utils.filewrite(utils.unmask(cbetas_norm[:, 2:], mask), 'betas_hik_OC_T1c',
                    ref_img)
    np.savetxt('meica_mix_T1c.1D', mmixnogs)
示例#15
0
def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None,
                    strict=False, gscontrol=True, kdaw=10., rdaw=1., conv=2.5e-5,
                    ste=-1, combmode='t2s', dne=False,
                    initcost='tanh', finalcost='tanh',
                    stabilize=False, filecsdata=False, wvpca=False,
                    label=None, fixed_seed=42, debug=False, quiet=False):
    """
    Run the "canonical" TE-Dependent ANAlysis workflow.

    Parameters
    ----------
    data : :obj:`str` or :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list or str) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    mask : :obj:`str`, optional
        Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially
        aligned with `data`.
    mixm : :obj:`str`, optional
        File containing mixing matrix. If not provided, ME-PCA and ME-ICA are
        done.
    ctab : :obj:`str`, optional
        File containing component table from which to extract pre-computed
        classifications.
    manacc : :obj:`str`, optional
        Comma separated list of manually accepted components in string form.
        Default is None.
    strict : :obj:`bool`, optional
        Ignore low-variance ambiguous components. Default is False.
    gscontrol : :obj:`bool`, optional
        Control global signal using spatial approach. Default is True.
    kdaw : :obj:`float`, optional
        Dimensionality augmentation weight (Kappa). Default is 10.
        -1 for low-dimensional ICA.
    rdaw : :obj:`float`, optional
        Dimensionality augmentation weight (Rho). Default is 1.
        -1 for low-dimensional ICA.
    conv : :obj:`float`, optional
        Convergence limit. Default is 2.5e-5.
    ste : :obj:`int`, optional
        Source TEs for models. 0 for all, -1 for optimal combination.
        Default is -1.
    combmode : {'t2s', 'ste'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser).
    dne : :obj:`bool`, optional
        Denoise each TE dataset separately. Default is False.
    initcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional
        Initial cost function for ICA. Default is 'tanh'.
    finalcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional
        Final cost function. Default is 'tanh'.
    stabilize : :obj:`bool`, optional
        Stabilize convergence by reducing dimensionality, for low quality data.
        Default is False.
    filecsdata : :obj:`bool`, optional
        Save component selection data to file. Default is False.
    wvpca : :obj:`bool`, optional
        Whether or not to perform PCA on wavelet-transformed data.
        Default is False.
    label : :obj:`str` or :obj:`None`, optional
        Label for output directory. Default is None.

    Other Parameters
    ----------------
    fixed_seed : :obj:`int`, optional
        Value passed to ``mdp.numx_rand.seed()``.
        Set to a positive integer value for reproducible ICA results;
        otherwise, set to -1 for varying results across calls.
    debug : :obj:`bool`, optional
        Whether to run in debugging mode or not. Default is False.
    quiet : :obj:`bool`, optional
        If True, suppresses logging/printing of messages. Default is False.

    Notes
    -----
    PROCEDURE 2 : Computes ME-PCA and ME-ICA

    - Computes T2* map
    - Computes PCA of concatenated ME data, then computes TE-dependence of PCs
    - Computes ICA of TE-dependence PCs
    - Identifies TE-dependent ICs, outputs high-\kappa (BOLD) component
      and denoised time series

    or computes TE-dependence of each component of a general linear model
    specified by input (includes MELODIC FastICA mixing matrix)

    PROCEDURE 2a: Model fitting and component selection routines

    This workflow writes out several files, which are written out to a folder
    named TED.[ref_label].[label] if ``label`` is provided and TED.[ref_label]
    if not. ``ref_label`` is determined based on the name of the first ``data``
    file.

    Files are listed below:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    t2sv.nii                  Limited estimated T2* 3D map.
                              The difference between the limited and full maps
                              is that, for voxels affected by dropout where
                              only one echo contains good data, the full map
                              uses the single echo's value while the limited
                              map has a NaN.
    s0v.nii                   Limited S0 3D map.
                              The difference between the limited and full maps
                              is that, for voxels affected by dropout where
                              only one echo contains good data, the full map
                              uses the single echo's value while the limited
                              map has a NaN.
    t2ss.nii                  ???
    s0vs.nii                  ???
    t2svG.nii                 Full T2* map/timeseries. The difference between
                              the limited and full maps is that, for voxels
                              affected by dropout where only one echo contains
                              good data, the full map uses the single echo's
                              value while the limited map has a NaN.
    s0vG.nii                  Full S0 map/timeseries.
    __meica_mix.1D            A mixing matrix
    meica_mix.1D              Another mixing matrix
    ts_OC.nii                 Optimally combined timeseries.
    betas_OC.nii              Full ICA coefficient feature set.
    betas_hik_OC.nii          Denoised ICA coefficient feature set
    feats_OC2.nii             Z-normalized spatial component maps
    comp_table.txt            Component table
    sphis_hik.nii             T1-like effect
    hik_ts_OC_T1c.nii         T1 corrected time series by regression
    dn_ts_OC_T1c.nii          ME-DN version of T1 corrected time series
    betas_hik_OC_T1c.nii      T1-GS corrected components
    meica_mix_T1c.1D          T1-GS corrected mixing matrix
    ======================    =================================================

    If ``dne`` is set to True:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    hik_ts_e[echo].nii        High-Kappa timeseries for echo number ``echo``
    midk_ts_e[echo].nii       Mid-Kappa timeseries for echo number ``echo``
    lowk_ts_e[echo].nii       Low-Kappa timeseries for echo number ``echo``
    dn_ts_e[echo].nii         Denoised timeseries for echo number ``echo``
    ======================    =================================================
    """

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # coerce data to samples x echos x time array
    if isinstance(data, str):
        data = [data]

    LGR.info('Loading input data: {}'.format([f for f in data]))
    catd, ref_img = utils.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    kdaw, rdaw = float(kdaw), float(rdaw)

    try:
        ref_label = op.basename(ref_img).split('.')[0]
    except TypeError:
        ref_label = op.basename(str(data[0])).split('.')[0]

    if label is not None:
        out_dir = 'TED.{0}.{1}'.format(ref_label, label)
    else:
        out_dir = 'TED.{0}'.format(ref_label)
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        LGR.info('Creating output directory: {}'.format(out_dir))
        os.mkdir(out_dir)
    else:
        LGR.info('Using output directory: {}'.format(out_dir))

    if mixm is not None and op.isfile(mixm):
        shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D'))
        shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm)))
    elif mixm is not None:
        raise IOError('Argument "mixm" must be an existing file.')

    if ctab is not None and op.isfile(ctab):
        shutil.copyfile(ctab, op.join(out_dir, 'comp_table.txt'))
        shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab)))
    elif ctab is not None:
        raise IOError('Argument "ctab" must be an existing file.')

    os.chdir(out_dir)

    if mask is None:
        LGR.info('Computing adaptive mask')
    else:
        # TODO: add affine check
        LGR.info('Using user-defined mask')
    mask, masksum = utils.make_adaptive_mask(catd, mask=mask,
                                             minimum=False, getsum=True)
    LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp))

    LGR.info('Computing T2* map')
    t2s, s0, t2ss, s0s, t2sG, s0G = model.fit_decay(catd, tes, mask, masksum)

    # set a hard cap for the T2* map
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5,
                                      interpolation_method='lower')
    LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
    t2s[t2s > cap_t2s * 10] = cap_t2s
    utils.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img)
    utils.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img)
    utils.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img)
    utils.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img)
    utils.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img)
    utils.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img)

    # optimally combine data
    OCcatd = model.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode)

    # regress out global signal unless explicitly not desired
    if gscontrol:
        catd, OCcatd = model.gscontrol_raw(catd, OCcatd, n_echos, ref_img)

    if mixm is None:
        n_components, dd = decomposition.tedpca(catd, OCcatd, combmode, mask,
                                                t2s, t2sG, stabilize, ref_img,
                                                tes=tes, kdaw=kdaw, rdaw=rdaw,
                                                ste=ste, wvpca=wvpca)
        mmix_orig, fixed_seed = decomposition.tedica(n_components, dd, conv, fixed_seed,
                                                     cost=initcost, final_cost=finalcost,
                                                     verbose=debug)
        np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig)
        LGR.info('Making second component selection guess from ICA results')
        seldict, comptable, betas, mmix = model.fitmodels_direct(catd, mmix_orig,
                                                                 mask, t2s, t2sG,
                                                                 tes, combmode,
                                                                 ref_img,
                                                                 reindex=True)
        np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix)

        acc, rej, midk, empty = selection.selcomps(seldict, mmix, mask, ref_img, manacc,
                                                   n_echos, t2s, s0, strict_mode=strict,
                                                   filecsdata=filecsdata)
    else:
        LGR.info('Using supplied mixing matrix from ICA')
        mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D'))
        seldict, comptable, betas, mmix = model.fitmodels_direct(catd, mmix_orig,
                                                                 mask, t2s, t2sG,
                                                                 tes, combmode,
                                                                 ref_img)
        if ctab is None:
            acc, rej, midk, empty = selection.selcomps(seldict, mmix, mask,
                                                       ref_img, manacc,
                                                       n_echos, t2s, s0,
                                                       filecsdata=filecsdata,
                                                       strict_mode=strict)
        else:
            acc, rej, midk, empty = utils.ctabsel(ctab)

    if len(acc) == 0:
        LGR.warning('No BOLD components detected! Please check data and '
                    'results!')

    utils.writeresults(OCcatd, mask, comptable, mmix, fixed_seed, n_vols,
                       acc, rej, midk, empty, ref_img)
    utils.gscontrol_mmix(OCcatd, mmix, mask, acc, ref_img)
    if dne:
        utils.writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img)
示例#16
0
文件: t2smap.py 项目: jdkent/tedana
def t2smap_workflow(data, tes, fitmode='all', combmode='t2s', label=None):
    """
    Estimate T2 and S0, and optimally combine data across TEs.

    Parameters
    ----------
    data : :obj:`str` or :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list or str) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    fitmode : {'all', 'ts'}, optional
        Monoexponential model fitting scheme.
        'all' means that the model is fit, per voxel, across all timepoints.
        'ts' means that the model is fit, per voxel and per timepoint.
        Default is 'all'.
    combmode : {'t2s', 'ste'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser).
    label : :obj:`str` or :obj:`None`, optional
        Label for output directory. Default is None.

    Notes
    -----
    This workflow writes out several files, which are written out to a folder
    named TED.[ref_label].[label] if ``label`` is provided and TED.[ref_label]
    if not. ``ref_label`` is determined based on the name of the first ``data``
    file.

    Files are listed below:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    t2sv.nii                  Limited estimated T2* 3D map or 4D timeseries.
                              Will be a 3D map if ``fitmode`` is 'all' and a
                              4D timeseries if it is 'ts'.
    s0v.nii                   Limited S0 3D map or 4D timeseries.
    t2svG.nii                 Full T2* map/timeseries. The difference between
                              the limited and full maps is that, for voxels
                              affected by dropout where only one echo contains
                              good data, the full map uses the single echo's
                              value while the limited map has a NaN.
    s0vG.nii                  Full S0 map/timeseries.
    ts_OC.nii                 Optimally combined timeseries.
    ======================    =================================================
    """
    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # coerce data to samples x echos x time array
    if isinstance(data, str):
        data = [data]

    LGR.info('Loading input data: {}'.format([f for f in data]))
    catd, ref_img = utils.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    try:
        ref_label = os.path.basename(ref_img).split('.')[0]
    except TypeError:
        ref_label = os.path.basename(str(data[0])).split('.')[0]

    if label is not None:
        out_dir = 'TED.{0}.{1}'.format(ref_label, label)
    else:
        out_dir = 'TED.{0}'.format(ref_label)
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        LGR.info('Creating output directory: {}'.format(out_dir))
        os.mkdir(out_dir)
    else:
        LGR.info('Using output directory: {}'.format(out_dir))

    LGR.info('Computing adaptive mask')
    mask, masksum = utils.make_adaptive_mask(catd, minimum=False, getsum=True)

    LGR.info('Computing adaptive T2* map')
    if fitmode == 'all':
        (t2s_limited, s0_limited,
         t2ss, s0s,
         t2s_full, s0_full) = model.fit_decay(catd, tes, mask,
                                              masksum, start_echo=1)
    else:
        (t2s_limited, s0_limited,
         t2s_full, s0_full) = model.fit_decay_ts(catd, tes, mask, masksum,
                                                 start_echo=1)

    # set a hard cap for the T2* map/timeseries
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s_limited.flatten(), 99.5,
                                      interpolation_method='lower')
    LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
    t2s_limited[t2s_limited > cap_t2s * 10] = cap_t2s

    LGR.info('Computing optimal combination')
    # optimally combine data
    OCcatd = model.make_optcom(catd, tes, mask, t2s=t2s_full,
                               combmode=combmode)

    # clean up numerical errors
    for arr in (OCcatd, s0_limited, t2s_limited):
        np.nan_to_num(arr, copy=False)

    s0_limited[s0_limited < 0] = 0
    t2s_limited[t2s_limited < 0] = 0

    utils.filewrite(t2s_limited, op.join(out_dir, 't2sv.nii'), ref_img)
    utils.filewrite(s0_limited, op.join(out_dir, 's0v.nii'), ref_img)
    utils.filewrite(t2s_full, op.join(out_dir, 't2svG.nii'), ref_img)
    utils.filewrite(s0_full, op.join(out_dir, 's0vG.nii'), ref_img)
    utils.filewrite(OCcatd, op.join(out_dir, 'ts_OC.nii'), ref_img)
示例#17
0
def tedana(data,
           tes,
           mixm=None,
           ctab=None,
           manacc=None,
           strict=False,
           gscontrol=True,
           kdaw=10.,
           rdaw=1.,
           conv=2.5e-5,
           ste=-1,
           combmode='t2s',
           dne=False,
           initcost='tanh',
           finalcost='tanh',
           stabilize=False,
           fout=False,
           filecsdata=False,
           label=None,
           fixed_seed=42,
           debug=False,
           quiet=False):
    """
    Run the "canonical" TE-Dependent ANAlysis workflow.

    Parameters
    ----------
    data : :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    mixm : :obj:`str`, optional
        File containing mixing matrix. If not provided, ME-PCA and ME-ICA are
        done.
    ctab : :obj:`str`, optional
        File containing component table from which to extract pre-computed
        classifications.
    manacc : :obj:`str`, optional
        Comma separated list of manually accepted components in string form.
        Default is None.
    strict : :obj:`bool`, optional
        Ignore low-variance ambiguous components. Default is False.
    gscontrol : :obj:`bool`, optional
        Control global signal using spatial approach. Default is True.
    kdaw : :obj:`float`, optional
        Dimensionality augmentation weight (Kappa). Default is 10.
        -1 for low-dimensional ICA.
    rdaw : :obj:`float`, optional
        Dimensionality augmentation weight (Rho). Default is 1.
        -1 for low-dimensional ICA.
    conv : :obj:`float`, optional
        Convergence limit. Default is 2.5e-5.
    ste : :obj:`int`, optional
        Source TEs for models. 0 for all, -1 for optimal combination.
        Default is -1.
    combmode : {'t2s', 'ste'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser).
    dne : :obj:`bool`, optional
        Denoise each TE dataset separately. Default is False.
    initcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional
        Initial cost function for ICA. Default is 'tanh'.
    finalcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional
        Final cost function. Default is 'tanh'.
    stabilize : :obj:`bool`, optional
        Stabilize convergence by reducing dimensionality, for low quality data.
        Default is False.
    fout : :obj:`bool`, optional
        Save output TE-dependence Kappa/Rho SPMs. Default is False.
    filecsdata : :obj:`bool`, optional
        Save component selection data to file. Default is False.
    label : :obj:`str` or :obj:`None`, optional
        Label for output directory. Default is None.
    fixed_seed : :obj:`int`, optional
        Seeded value for ICA, for reproducibility.
    """

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # coerce data to samples x echos x time array
    LGR.info('Loading input data: {}'.format([op.abspath(f) for f in data]))
    catd, ref_img = utils.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    if fout:
        fout = ref_img
    else:
        fout = None

    kdaw, rdaw = float(kdaw), float(rdaw)

    if label is not None:
        out_dir = 'TED.{0}'.format(label)
    else:
        out_dir = 'TED'
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        LGR.info('Creating output directory: {}'.format(out_dir))
        os.mkdir(out_dir)
    else:
        LGR.info('Using output directory: {}'.format(out_dir))

    if mixm is not None and op.isfile(mixm):
        shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D'))
        shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm)))
    elif mixm is not None:
        raise IOError('Argument "mixm" must be an existing file.')

    if ctab is not None and op.isfile(ctab):
        shutil.copyfile(ctab, op.join(out_dir, 'comp_table.txt'))
        shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab)))
    elif ctab is not None:
        raise IOError('Argument "ctab" must be an existing file.')

    os.chdir(out_dir)

    LGR.info('Computing adapative mask')
    mask, masksum = utils.make_adaptive_mask(catd, minimum=False, getsum=True)
    LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp))

    LGR.info('Computing T2* map')
    t2s, s0, t2ss, s0s, t2sG, s0G = model.fit_decay(catd,
                                                    tes,
                                                    mask,
                                                    masksum,
                                                    start_echo=1)

    # set a hard cap for the T2* map
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s.flatten(),
                                      99.5,
                                      interpolation_method='lower')
    LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
    t2s[t2s > cap_t2s * 10] = cap_t2s
    utils.filewrite(t2s, op.join(out_dir, 't2sv'), ref_img)
    utils.filewrite(s0, op.join(out_dir, 's0v'), ref_img)
    utils.filewrite(t2ss, op.join(out_dir, 't2ss'), ref_img)
    utils.filewrite(s0s, op.join(out_dir, 's0vs'), ref_img)
    utils.filewrite(t2sG, op.join(out_dir, 't2svG'), ref_img)
    utils.filewrite(s0G, op.join(out_dir, 's0vG'), ref_img)

    # optimally combine data
    OCcatd = model.make_optcom(catd, t2sG, tes, mask, combmode)

    # regress out global signal unless explicitly not desired
    if gscontrol:
        catd, OCcatd = model.gscontrol_raw(catd, OCcatd, n_echos, ref_img)

    if mixm is None:
        n_components, dd = decomposition.tedpca(catd,
                                                OCcatd,
                                                combmode,
                                                mask,
                                                t2s,
                                                t2sG,
                                                stabilize,
                                                ref_img,
                                                tes=tes,
                                                kdaw=kdaw,
                                                rdaw=rdaw,
                                                ste=ste)
        mmix_orig = decomposition.tedica(n_components,
                                         dd,
                                         conv,
                                         fixed_seed,
                                         cost=initcost,
                                         final_cost=finalcost,
                                         verbose=debug)
        np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig)
        LGR.info('Making second component selection guess from ICA results')
        seldict, comptable, betas, mmix = model.fitmodels_direct(catd,
                                                                 mmix_orig,
                                                                 mask,
                                                                 t2s,
                                                                 t2sG,
                                                                 tes,
                                                                 combmode,
                                                                 ref_img,
                                                                 fout=fout,
                                                                 reindex=True)
        np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix)

        acc, rej, midk, empty = selection.selcomps(seldict,
                                                   mmix,
                                                   mask,
                                                   ref_img,
                                                   manacc,
                                                   n_echos,
                                                   t2s,
                                                   s0,
                                                   strict_mode=strict,
                                                   filecsdata=filecsdata)
    else:
        LGR.info('Using supplied mixing matrix from ICA')
        mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D'))
        seldict, comptable, betas, mmix = model.fitmodels_direct(catd,
                                                                 mmix_orig,
                                                                 mask,
                                                                 t2s,
                                                                 t2sG,
                                                                 tes,
                                                                 combmode,
                                                                 ref_img,
                                                                 fout=fout)
        if ctab is None:
            acc, rej, midk, empty = selection.selcomps(seldict,
                                                       mmix,
                                                       mask,
                                                       ref_img,
                                                       manacc,
                                                       n_echos,
                                                       t2s,
                                                       s0,
                                                       filecsdata=filecsdata,
                                                       strict_mode=strict)
        else:
            acc, rej, midk, empty = utils.ctabsel(ctab)

    if len(acc) == 0:
        LGR.warning(
            'No BOLD components detected! Please check data and results!')

    utils.writeresults(OCcatd, mask, comptable, mmix, n_vols, acc, rej, midk,
                       empty, ref_img)
    utils.gscontrol_mmix(OCcatd, mmix, mask, acc, rej, midk, ref_img)
    if dne:
        utils.writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img)
示例#18
0
def selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, t2s, s0, olevel=2,
             oversion=99, filecsdata=True, savecsdiag=True, strict_mode=False):
    """
    Labels components in `mmix`

    Parameters
    ----------
    seldict : :obj:`dict`
        As output from `fitmodels_direct`
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the number of volumes in the original data
    mask : (S,) array_like
        Boolean mask array
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    manacc : list
        Comma-separated list of indices of manually accepted components
    n_echos : int
        Number of echos in original data
    t2s : (S,) array_like
    s0 : (S,) array_like
    olevel : int, optional
        Default: 2
    oversion : int, optional
        Default: 99
    filecsdata: bool, optional
        Default: False
    savecsdiag: bool, optional
        Default: True
    strict_mode: bool, optional
        Default: False

    Returns
    -------
    acc : list
        Indices of accepted (BOLD) components in `mmix`
    rej : list
        Indices of rejected (non-BOLD) components in `mmix`
    midk : list
        Indices of mid-K (questionable) components in `mmix`
    ign : list
        Indices of ignored components in `mmix`
    """

    if filecsdata:
        import bz2
        if seldict is not None:
            LGR.info('Saving component selection data')
            with bz2.BZ2File('compseldata.pklbz', 'wb') as csstate_f:
                pickle.dump(seldict, csstate_f)
        else:
            try:
                with bz2.BZ2File('compseldata.pklbz', 'rb') as csstate_f:
                    seldict = pickle.load(csstate_f)
            except FileNotFoundError:
                LGR.warning('Failed to load component selection data')
                return None

    # List of components
    midk = []
    ign = []
    nc = np.arange(len(seldict['Kappas']))
    ncl = np.arange(len(seldict['Kappas']))

    # If user has specified components to accept manually
    if manacc:
        acc = sorted([int(vv) for vv in manacc.split(',')])
        midk = []
        rej = sorted(np.setdiff1d(ncl, acc))
        return acc, rej, midk, []  # Add string for ign

    """
    Do some tallies for no. of significant voxels
    """
    countsigFS0 = seldict['F_S0_clmaps'].sum(0)
    countsigFR2 = seldict['F_R2_clmaps'].sum(0)
    countnoise = np.zeros(len(nc))

    """
    Make table of dice values
    """
    dice_tbl = np.zeros([nc.shape[0], 2])
    for ii in ncl:
        dice_FR2 = utils.dice(utils.unmask(seldict['Br_clmaps_R2'][:, ii], mask)[t2s != 0],
                              seldict['F_R2_clmaps'][:, ii])
        dice_FS0 = utils.dice(utils.unmask(seldict['Br_clmaps_S0'][:, ii], mask)[t2s != 0],
                              seldict['F_S0_clmaps'][:, ii])
        dice_tbl[ii, :] = [dice_FR2, dice_FS0]  # step 3a here and above
    dice_tbl[np.isnan(dice_tbl)] = 0

    """
    Make table of noise gain
    """
    tt_table = np.zeros([len(nc), 4])
    counts_FR2_Z = np.zeros([len(nc), 2])
    for ii in nc:
        comp_noise_sel = utils.andb([np.abs(seldict['Z_maps'][:, ii]) > 1.95,
                                     seldict['Z_clmaps'][:, ii] == 0]) == 2
        countnoise[ii] = np.array(comp_noise_sel, dtype=np.int).sum()
        noise_FR2_Z_mask = utils.unmask(comp_noise_sel, mask)[t2s != 0]
        noise_FR2_Z = np.log10(np.unique(seldict['F_R2_maps'][noise_FR2_Z_mask, ii]))
        signal_FR2_Z_mask = utils.unmask(seldict['Z_clmaps'][:, ii], mask)[t2s != 0] == 1
        signal_FR2_Z = np.log10(np.unique(seldict['F_R2_maps'][signal_FR2_Z_mask, ii]))
        counts_FR2_Z[ii, :] = [len(signal_FR2_Z), len(noise_FR2_Z)]
        try:
            ttest = stats.ttest_ind(signal_FR2_Z, noise_FR2_Z, equal_var=True)
            # avoid DivideByZero RuntimeWarning
            if signal_FR2_Z.size > 0 and noise_FR2_Z.size > 0:
                mwu = stats.norm.ppf(stats.mannwhitneyu(signal_FR2_Z, noise_FR2_Z)[1])
            else:
                mwu = -np.inf
            tt_table[ii, 0] = np.abs(mwu) * ttest[0] / np.abs(ttest[0])
            tt_table[ii, 1] = ttest[1]
        except Exception:  # TODO: what is the error that might be caught here?
            pass
    tt_table[np.isnan(tt_table)] = 0
    tt_table[np.isinf(tt_table[:, 0]), 0] = np.percentile(tt_table[~np.isinf(tt_table[:, 0]), 0],
                                                          98)

    # Time series derivative kurtosis
    mmix_dt = (mmix[:-1] - mmix[1:])
    mmix_kurt = stats.kurtosis(mmix_dt)
    mmix_std = np.std(mmix_dt, axis=0)

    """
    Step 1: Reject anything that's obviously an artifact
    a. Estimate a null variance
    """
    LGR.debug('Rejecting gross artifacts based on Rho/Kappa values and S0/R2 counts')
    rej = ncl[utils.andb([seldict['Rhos'] > seldict['Kappas'], countsigFS0 > countsigFR2]) > 0]
    ncl = np.setdiff1d(ncl, rej)

    """
    Step 2: Compute 3-D spatial FFT of Beta maps to detect high-spatial
    frequency artifacts
    """
    LGR.debug('Computing 3D spatial FFT of beta maps to detect high-spatial frequency artifacts')
    # spatial information is important so for NIFTI we convert back to 3D space
    if utils.get_dtype(ref_img) == 'NIFTI':
        dim1 = np.prod(ref_img.shape[:2])
    else:
        dim1 = mask.shape[0]
    fproj_arr = np.zeros([dim1, len(nc)])
    fproj_arr_val = np.zeros([dim1, len(nc)])
    spr = []
    fdist = []
    for ii in nc:
        # convert data back to 3D array
        if utils.get_dtype(ref_img) == 'NIFTI':
            tproj = utils.new_nii_like(ref_img, utils.unmask(seldict['PSC'],
                                                             mask)[:, ii]).get_data()
        else:
            tproj = utils.unmask(seldict['PSC'], mask)[:, ii]
        fproj = np.fft.fftshift(np.abs(np.fft.rfftn(tproj)))
        fproj_z = fproj.max(axis=2)
        fproj[fproj == fproj.max()] = 0
        fproj_arr[:, ii] = stats.rankdata(fproj_z.flatten())
        fproj_arr_val[:, ii] = fproj_z.flatten()
        spr.append(np.array(fproj_z > fproj_z.max() / 4, dtype=np.int).sum())
        fprojr = np.array([fproj, fproj[:, :, ::-1]]).max(0)
        fdist.append(np.max([utils.fitgaussian(fproj.max(jj))[3:].max() for
                     jj in range(fprojr.ndim)]))
    fdist = np.array(fdist)
    spr = np.array(spr)

    """
    Step 3: Create feature space of component properties
    """
    LGR.debug('Creating feature space of component properties')
    fdist_pre = fdist.copy()
    fdist_pre[fdist > np.median(fdist) * 3] = np.median(fdist) * 3
    fdist_z = (fdist_pre - np.median(fdist_pre)) / fdist_pre.std()
    spz = (spr-spr.mean())/spr.std()
    Tz = (tt_table[:, 0] - tt_table[:, 0].mean()) / tt_table[:, 0].std()
    varex_ = np.log(seldict['varex'])
    Vz = (varex_-varex_.mean()) / varex_.std()
    Rz = (seldict['Rhos'] - seldict['Rhos'].mean()) / seldict['Rhos'].std()
    Ktz = np.log(seldict['Kappas']) / 2
    Ktz = (Ktz-Ktz.mean()) / Ktz.std()
    Rtz = np.log(seldict['Rhos']) / 2
    Rtz = (Rtz-Rtz.mean())/Rtz.std()
    KRr = stats.zscore(np.log(seldict['Kappas']) / np.log(seldict['Rhos']))
    cnz = (countnoise-countnoise.mean()) / countnoise.std()
    Dz = stats.zscore(np.arctanh(dice_tbl[:, 0] + 0.001))
    fz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z])

    """
    Step 3: Make initial guess of where BOLD components are and use DBSCAN
    to exclude noise components and find a sample set of 'good' components
    """
    LGR.debug('Making initial guess of BOLD components')
    # epsmap is [index,level of overlap with dicemask,
    # number of high Rho components]
    F05, F025, F01 = utils.getfbounds(n_echos)
    epsmap = []
    Rhos_sorted = np.array(sorted(seldict['Rhos']))[::-1]
    # Make an initial guess as to number of good components based on
    # consensus of control points across Rhos and Kappas
    KRcutguesses = [getelbow_mod(seldict['Rhos']), getelbow_cons(seldict['Rhos']),
                    getelbow_aggr(seldict['Rhos']), getelbow_mod(seldict['Kappas']),
                    getelbow_cons(seldict['Kappas']), getelbow_aggr(seldict['Kappas'])]
    Khighelbowval = stats.scoreatpercentile([getelbow_mod(seldict['Kappas'], val=True),
                                             getelbow_cons(seldict['Kappas'], val=True),
                                             getelbow_aggr(seldict['Kappas'], val=True)] +
                                            list(utils.getfbounds(n_echos)),
                                            75, interpolation_method='lower')
    KRcut = np.median(KRcutguesses)

    # only use exclusive when inclusive is extremely inclusive - double KRcut
    cond1 = getelbow_cons(seldict['Kappas']) > KRcut * 2
    cond2 = getelbow_mod(seldict['Kappas'], val=True) < F01
    if cond1 and cond2:
        Kcut = getelbow_mod(seldict['Kappas'], val=True)
    else:
        Kcut = getelbow_cons(seldict['Kappas'], val=True)
    # only use inclusive when exclusive is extremely exclusive - half KRcut
    # (remember for Rho inclusive is higher, so want both Kappa and Rho
    # to defaut to lower)
    if getelbow_cons(seldict['Rhos']) > KRcut * 2:
        Rcut = getelbow_mod(seldict['Rhos'], val=True)
    # for above, consider something like:
    # min([getelbow_mod(Rhos,True),sorted(Rhos)[::-1][KRguess] ])
    else:
        Rcut = getelbow_cons(seldict['Rhos'], val=True)
    if Rcut > Kcut:
        Kcut = Rcut  # Rcut should never be higher than Kcut
    KRelbow = utils.andb([seldict['Kappas'] > Kcut, seldict['Rhos'] < Rcut])
    # Make guess of Kundu et al 2011 plus remove high frequencies,
    # generally high variance, and high variance given low Kappa
    tt_lim = stats.scoreatpercentile(tt_table[tt_table[:, 0] > 0, 0],
                                     75, interpolation_method='lower') / 3
    KRguess = np.setdiff1d(np.setdiff1d(nc[KRelbow == 2], rej),
                           np.union1d(nc[tt_table[:, 0] < tt_lim],
                           np.union1d(np.union1d(nc[spz > 1],
                                                 nc[Vz > 2]),
                                      nc[utils.andb([seldict['varex'] > 0.5 *
                                         sorted(seldict['varex'])[::-1][int(KRcut)],
                                                seldict['Kappas'] < 2*Kcut]) == 2])))
    guessmask = np.zeros(len(nc))
    guessmask[KRguess] = 1

    # Throw lower-risk bad components out
    rejB = ncl[utils.andb([tt_table[ncl, 0] < 0,
                           seldict['varex'][ncl] > np.median(seldict['varex']), ncl > KRcut]) == 3]
    rej = np.union1d(rej, rejB)
    ncl = np.setdiff1d(ncl, rej)

    LGR.debug('Using DBSCAN to find optimal set of "good" BOLD components')
    for ii in range(20000):
        eps = .005 + ii * .005
        db = DBSCAN(eps=eps, min_samples=3).fit(fz.T)

        # it would be great to have descriptive names, here
        # DBSCAN found at least three non-noisy clusters
        cond1 = db.labels_.max() > 1
        # DBSCAN didn't detect more classes than the total # of components / 6
        cond2 = db.labels_.max() < len(nc) / 6
        # TODO: confirm if 0 is a special label for DBSCAN
        # my intuition here is that we're confirming DBSCAN labelled previously
        # rejected components as noise (i.e., no overlap between `rej` and
        # labelled DBSCAN components)
        cond3 = np.intersect1d(rej, nc[db.labels_ == 0]).shape[0] == 0
        # DBSCAN labelled less than half of the total components as noisy
        cond4 = np.array(db.labels_ == -1, dtype=int).sum() / float(len(nc)) < .5

        if cond1 and cond2 and cond3 and cond4:
            epsmap.append([ii, utils.dice(guessmask, db.labels_ == 0),
                           np.intersect1d(nc[db.labels_ == 0],
                           nc[seldict['Rhos'] > getelbow_mod(Rhos_sorted,
                                                             val=True)]).shape[0]])
        db = None

    epsmap = np.array(epsmap)
    LGR.debug('Found DBSCAN solutions for {}/20000 eps resolutions'.format(len(epsmap)))
    group0 = []
    dbscanfailed = False
    if len(epsmap) != 0:
        # Select index that maximizes Dice with guessmask but first
        # minimizes number of higher Rho components
        ii = int(epsmap[np.argmax(epsmap[epsmap[:, 2] == np.min(epsmap[:, 2]), 1], 0), 0])
        LGR.debug('Component selection tuning: {:.05f}'.format(epsmap[:, 1].max()))
        db = DBSCAN(eps=.005+ii*.005, min_samples=3).fit(fz.T)
        ncl = nc[db.labels_ == 0]
        ncl = np.setdiff1d(ncl, rej)
        ncl = np.setdiff1d(ncl, ncl[ncl > len(nc) - len(rej)])
        group0 = ncl.copy()
        group_n1 = nc[db.labels_ == -1]
        to_clf = np.setdiff1d(nc, np.union1d(ncl, rej))
    if len(group0) == 0 or len(group0) < len(KRguess) * .5:
        dbscanfailed = True
        LGR.debug('DBSCAN guess failed; using elbow guess method instead')
        ncl = np.setdiff1d(np.setdiff1d(nc[KRelbow == 2], rej),
                           np.union1d(nc[tt_table[:, 0] < tt_lim],
                           np.union1d(np.union1d(nc[spz > 1],
                                      nc[Vz > 2]),
                                      nc[utils.andb([seldict['varex'] > 0.5 *
                                                     sorted(seldict['varex'])[::-1][int(KRcut)],
                                                     seldict['Kappas'] < 2 * Kcut]) == 2])))
        group0 = ncl.copy()
        group_n1 = []
        to_clf = np.setdiff1d(nc, np.union1d(group0, rej))
    if len(group0) < 2 or (len(group0) < 4 and float(len(rej))/len(group0) > 3):
        LGR.warning('Extremely limited reliable BOLD signal space! '
                    'Not filtering components beyond BOLD/non-BOLD guesses.')
        midkfailed = True
        min_acc = np.array([])
        if len(group0) != 0:
            # For extremes, building in a 20% tolerance
            toacc_hi = np.setdiff1d(nc[utils.andb([fdist <= np.max(fdist[group0]),
                                                   seldict['Rhos'] < F025, Vz > -2]) == 3],
                                    np.union1d(group0, rej))
            min_acc = np.union1d(group0, toacc_hi)
            to_clf = np.setdiff1d(nc, np.union1d(min_acc, rej))
        diagstep_keys = ['Rejected components', 'Kappa-Rho cut point',
                         'Kappa cut point', 'Rho cut point', 'DBSCAN failed to converge',
                         'Mid-Kappa failed (limited BOLD signal)', 'Kappa-Rho guess',
                         'min_acc', 'toacc_hi']
        diagstep_vals = [rej.tolist(), KRcut, Kcut, Rcut, dbscanfailed,
                         midkfailed, KRguess.tolist(), min_acc.tolist(), toacc_hi.tolist()]

        with open('csstepdata.json', 'w') as ofh:
            json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True)
        return list(sorted(min_acc)), list(sorted(rej)), [], list(sorted(to_clf))

    # Find additional components to reject based on Dice - doing this here
    # since Dice is a little unstable, need to reference group0
    rej_supp = []
    dice_rej = False
    if not dbscanfailed and len(rej) + len(group0) < 0.75 * len(nc):
        dice_rej = True
        rej_supp = np.setdiff1d(np.setdiff1d(np.union1d(rej,
                                                        nc[dice_tbl[nc, 0] <= dice_tbl[nc, 1]]),
                                             group0), group_n1)
        rej = np.union1d(rej, rej_supp)

    # Temporal features
    # larger is worse - spike
    mmix_kurt_z = (mmix_kurt-mmix_kurt[group0].mean()) / mmix_kurt[group0].std()
    # smaller is worse - drift
    mmix_std_z = -1 * ((mmix_std-mmix_std[group0].mean()) / mmix_std[group0].std())
    mmix_kurt_z_max = np.max([mmix_kurt_z, mmix_std_z], 0)

    """
    Step 2: Classifiy midk and ignore using separte SVMs for
    different variance regimes
    # To render hyperplane:
    min_x = np.min(spz2);max_x=np.max(spz2)
    # plotting separating hyperplane
        ww = clf_.coef_[0]
        aa = -ww[0] / ww[1]
        # make sure the next line is long enough
        xx = np.linspace(min_x - 2, max_x + 2)
        yy = aa * xx - (clf_.intercept_[0]) / ww[1]
        plt.plot(xx, yy, '-')
    """
    LGR.debug('Attempting to classify midk components')
    # Tried getting rid of accepting based on SVM altogether,
    # now using only rejecting
    toacc_hi = np.setdiff1d(nc[utils.andb([fdist <= np.max(fdist[group0]),
                               seldict['Rhos'] < F025, Vz > -2]) == 3],
                            np.union1d(group0, rej))
    toacc_lo = np.intersect1d(to_clf,
                              nc[utils.andb([spz < 1, Rz < 0, mmix_kurt_z_max < 5,
                                             Dz > -1, Tz > -1, Vz < 0, seldict['Kappas'] >= F025,
                                             fdist < 3 * np.percentile(fdist[group0], 98)]) == 8])
    midk_clf, clf_ = do_svm(fproj_arr_val[:, np.union1d(group0, rej)].T,
                            [0] * len(group0) + [1] * len(rej),
                            fproj_arr_val[:, to_clf].T,
                            svmtype=2)
    midk = np.setdiff1d(to_clf[utils.andb([midk_clf == 1, seldict['varex'][to_clf] >
                                           np.median(seldict['varex'][group0])]) == 2],
                        np.union1d(toacc_hi, toacc_lo))
    # only use SVM to augment toacc_hi only if toacc_hi isn't already
    # conflicting with SVM choice
    if len(np.intersect1d(to_clf[utils.andb([midk_clf == 1,
                                             Vz[to_clf] > 0]) == 2], toacc_hi)) == 0:
        svm_acc_fail = True
        toacc_hi = np.union1d(toacc_hi, to_clf[midk_clf == 0])
    else:
        svm_acc_fail = False

    """
    Step 3: Compute variance associated with low T2* areas
    (e.g. draining veins and low T2* areas)
    # To write out veinmask
    veinout = np.zeros(t2s.shape)
    veinout[t2s!=0] = veinmaskf
    utils.filewrite(veinout, 'veinmaskf', ref_img)
    veinBout = utils.unmask(veinmaskB, mask)
    utils.filewrite(veinBout, 'veins50', ref_img)
    """
    LGR.debug('Computing variance associated with low T2* areas (e.g., draining veins)')
    tsoc_B_Zcl = np.zeros(seldict['tsoc_B'].shape)
    tsoc_B_Zcl[seldict['Z_clmaps'] != 0] = np.abs(seldict['tsoc_B'])[seldict['Z_clmaps'] != 0]
    sig_B = [stats.scoreatpercentile(tsoc_B_Zcl[tsoc_B_Zcl[:, ii] != 0, ii], 25)
             if len(tsoc_B_Zcl[tsoc_B_Zcl[:, ii] != 0, ii]) != 0
             else 0 for ii in nc]
    sig_B = np.abs(seldict['tsoc_B']) > np.tile(sig_B, [seldict['tsoc_B'].shape[0], 1])

    veinmask = utils.andb([t2s < stats.scoreatpercentile(t2s[t2s != 0], 15,
                                                         interpolation_method='lower'),
                           t2s != 0]) == 2
    veinmaskf = veinmask[mask]
    veinR = np.array(sig_B[veinmaskf].sum(0),
                     dtype=float) / sig_B[~veinmaskf].sum(0)
    veinR[np.isnan(veinR)] = 0

    veinc = np.union1d(rej, midk)
    rej_veinRZ = ((veinR-veinR[veinc].mean())/veinR[veinc].std())[veinc]
    rej_veinRZ[rej_veinRZ < 0] = 0
    rej_veinRZ[countsigFR2[veinc] > np.array(veinmaskf, dtype=int).sum()] = 0
    t2s_lim = [stats.scoreatpercentile(t2s[t2s != 0], 50,
                                       interpolation_method='lower'),
               stats.scoreatpercentile(t2s[t2s != 0], 80,
                                       interpolation_method='lower') / 2]
    phys_var_zs = []
    for t2sl_i in range(len(t2s_lim)):
        t2sl = t2s_lim[t2sl_i]
        veinW = sig_B[:, veinc]*np.tile(rej_veinRZ, [sig_B.shape[0], 1])
        veincand = utils.unmask(utils.andb([s0[t2s != 0] < np.median(s0[t2s != 0]),
                                t2s[t2s != 0] < t2sl]) >= 1,
                                t2s != 0)[mask]
        veinW[~veincand] = 0
        invein = veinW.sum(axis=1)[(utils.unmask(veinmaskf, mask) *
                                    utils.unmask(veinW.sum(axis=1) > 1, mask))[mask]]
        minW = 10 * (np.log10(invein).mean()) - 1 * 10**(np.log10(invein).std())
        veinmaskB = veinW.sum(axis=1) > minW
        tsoc_Bp = seldict['tsoc_B'].copy()
        tsoc_Bp[tsoc_Bp < 0] = 0
        vvex = np.array([(tsoc_Bp[veinmaskB, ii]**2.).sum() /
                         (tsoc_Bp[:, ii]**2.).sum() for ii in nc])
        group0_res = np.intersect1d(KRguess, group0)
        phys_var_zs.append((vvex - vvex[group0_res].mean()) / vvex[group0_res].std())
        veinBout = utils.unmask(veinmaskB, mask)
        utils.filewrite(veinBout.astype(float), 'veins_l%i' % t2sl_i, ref_img)

    # Mask to sample veins
    phys_var_z = np.array(phys_var_zs).max(0)
    Vz2 = (varex_ - varex_[group0].mean())/varex_[group0].std()

    """
    Step 4: Learn joint TE-dependence spatial and temporal models to move
    remaining artifacts to ignore class
    """
    LGR.debug('Learning joint TE-dependence spatial/temporal models to ignore remaining artifacts')

    to_ign = []

    minK_ign = np.max([F05, getelbow_cons(seldict['Kappas'], val=True)])
    newcest = len(group0) + len(toacc_hi[seldict['Kappas'][toacc_hi] > minK_ign])
    phys_art = np.setdiff1d(nc[utils.andb([phys_var_z > 3.5,
                                           seldict['Kappas'] < minK_ign]) == 2], group0)
    rank_diff = stats.rankdata(phys_var_z) - stats.rankdata(seldict['Kappas'])
    phys_art = np.union1d(np.setdiff1d(nc[utils.andb([phys_var_z > 2, rank_diff > newcest / 2,
                                                      Vz2 > -1]) == 3],
                                       group0), phys_art)
    # Want to replace field_art with an acf/SVM based approach
    # instead of a kurtosis/filter one
    field_art = np.setdiff1d(nc[utils.andb([mmix_kurt_z_max > 5,
                                            seldict['Kappas'] < minK_ign]) == 2], group0)
    field_art = np.union1d(np.setdiff1d(nc[utils.andb([mmix_kurt_z_max > 2,
                                           (stats.rankdata(mmix_kurt_z_max) -
                                            stats.rankdata(seldict['Kappas'])) > newcest / 2,
                                           Vz2 > 1, seldict['Kappas'] < F01]) == 4],
                                        group0), field_art)
    field_art = np.union1d(np.setdiff1d(nc[utils.andb([mmix_kurt_z_max > 3,
                                                       Vz2 > 3, seldict['Rhos'] >
                                                       np.percentile(seldict['Rhos'][group0],
                                                                     75)]) == 3],
                                        group0), field_art)
    field_art = np.union1d(np.setdiff1d(nc[utils.andb([mmix_kurt_z_max > 5, Vz2 > 5]) == 2],
                                        group0), field_art)
    misc_art = np.setdiff1d(nc[utils.andb([(stats.rankdata(Vz) -
                                            stats.rankdata(Ktz)) > newcest / 2,
                            seldict['Kappas'] < Khighelbowval]) == 2], group0)
    ign_cand = np.unique(list(field_art)+list(phys_art)+list(misc_art))
    midkrej = np.union1d(midk, rej)
    to_ign = np.setdiff1d(list(ign_cand), midkrej)
    toacc = np.union1d(toacc_hi, toacc_lo)
    ncl = np.setdiff1d(np.union1d(ncl, toacc), np.union1d(to_ign, midkrej))
    ign = np.setdiff1d(nc, list(ncl) + list(midk) + list(rej))
    orphan = np.setdiff1d(nc, list(ncl) + list(to_ign) + list(midk) + list(rej))

    # Last ditch effort to save some transient components
    if not strict_mode:
        Vz3 = (varex_ - varex_[ncl].mean())/varex_[ncl].std()
        ncl = np.union1d(ncl, np.intersect1d(orphan,
                                             nc[utils.andb([seldict['Kappas'] > F05,
                                                            seldict['Rhos'] < F025,
                                                            seldict['Kappas'] > seldict['Rhos'],
                                                            Vz3 <= -1,
                                                            Vz3 > -3,
                                                            mmix_kurt_z_max < 2.5]) == 6]))
        ign = np.setdiff1d(nc, list(ncl)+list(midk)+list(rej))
        orphan = np.setdiff1d(nc, list(ncl) + list(to_ign) + list(midk) + list(rej))

    if savecsdiag:
        diagstep_keys = ['Rejected components', 'Kappa-Rho cut point', 'Kappa cut',
                         'Rho cut', 'DBSCAN failed to converge', 'Kappa-Rho guess',
                         'Dice rejected', 'rej_supp', 'to_clf',
                         'Mid-kappa components', 'svm_acc_fail', 'toacc_hi', 'toacc_lo',
                         'Field artifacts', 'Physiological artifacts',
                         'Miscellaneous artifacts', 'ncl', 'Ignored components']
        diagstep_vals = [rej.tolist(), KRcut, Kcut, Rcut, dbscanfailed,
                         KRguess.tolist(), dice_rej, rej_supp.tolist(),
                         to_clf.tolist(), midk.tolist(), svm_acc_fail,
                         toacc_hi.tolist(), toacc_lo.tolist(),
                         field_art.tolist(), phys_art.tolist(),
                         misc_art.tolist(), ncl.tolist(), ign.tolist()]

        with open('csstepdata.json', 'w') as ofh:
            json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True)
        allfz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z])
        np.savetxt('csdata.txt', allfz)

    return list(sorted(ncl)), list(sorted(rej)), list(sorted(midk)), list(sorted(ign))
示例#19
0
def selcomps(seldict,
             mmix,
             mask,
             ref_img,
             manacc,
             n_echos,
             t2s,
             s0,
             olevel=2,
             oversion=99,
             filecsdata=True,
             savecsdiag=True,
             strict_mode=False):
    """
    Labels components in `mmix`

    Parameters
    ----------
    seldict : :obj:`dict`
        As output from `fitmodels_direct`
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the number of volumes in the original data
    mask : (S,) array_like
        Boolean mask array
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    manacc : list
        Comma-separated list of indices of manually accepted components
    n_echos : int
        Number of echos in original data
    t2s : (S,) array_like
    s0 : (S,) array_like
    olevel : int, optional
        Default: 2
    oversion : int, optional
        Default: 99
    filecsdata: bool, optional
        Default: False
    savecsdiag: bool, optional
        Default: True
    strict_mode: bool, optional
        Default: False

    Returns
    -------
    acc : list
        Indices of accepted (BOLD) components in `mmix`
    rej : list
        Indices of rejected (non-BOLD) components in `mmix`
    midk : list
        Indices of mid-K (questionable) components in `mmix`
    ign : list
        Indices of ignored components in `mmix`
    """

    if filecsdata:
        import bz2
        if seldict is not None:
            LGR.info('Saving component selection data')
            with bz2.BZ2File('compseldata.pklbz', 'wb') as csstate_f:
                pickle.dump(seldict, csstate_f)
        else:
            try:
                with bz2.BZ2File('compseldata.pklbz', 'rb') as csstate_f:
                    seldict = pickle.load(csstate_f)
            except FileNotFoundError:
                LGR.warning('Failed to load component selection data')
                return None

    # List of components
    midk = []
    ign = []
    nc = np.arange(len(seldict['Kappas']))
    ncl = np.arange(len(seldict['Kappas']))

    # If user has specified components to accept manually
    if manacc:
        acc = sorted([int(vv) for vv in manacc.split(',')])
        midk = []
        rej = sorted(np.setdiff1d(ncl, acc))
        return acc, rej, midk, []  # Add string for ign
    """
    Do some tallies for no. of significant voxels
    """
    countsigFS0 = seldict['F_S0_clmaps'].sum(0)
    countsigFR2 = seldict['F_R2_clmaps'].sum(0)
    countnoise = np.zeros(len(nc))
    """
    Make table of dice values
    """
    dice_tbl = np.zeros([nc.shape[0], 2])
    for ii in ncl:
        dice_FR2 = utils.dice(
            utils.unmask(seldict['Br_clmaps_R2'][:, ii], mask)[t2s != 0],
            seldict['F_R2_clmaps'][:, ii])
        dice_FS0 = utils.dice(
            utils.unmask(seldict['Br_clmaps_S0'][:, ii], mask)[t2s != 0],
            seldict['F_S0_clmaps'][:, ii])
        dice_tbl[ii, :] = [dice_FR2, dice_FS0]  # step 3a here and above
    dice_tbl[np.isnan(dice_tbl)] = 0
    """
    Make table of noise gain
    """
    tt_table = np.zeros([len(nc), 4])
    counts_FR2_Z = np.zeros([len(nc), 2])
    for ii in nc:
        comp_noise_sel = utils.andb([
            np.abs(seldict['Z_maps'][:, ii]) > 1.95,
            seldict['Z_clmaps'][:, ii] == 0
        ]) == 2
        countnoise[ii] = np.array(comp_noise_sel, dtype=np.int).sum()
        noise_FR2_Z_mask = utils.unmask(comp_noise_sel, mask)[t2s != 0]
        noise_FR2_Z = np.log10(
            np.unique(seldict['F_R2_maps'][noise_FR2_Z_mask, ii]))
        signal_FR2_Z_mask = utils.unmask(seldict['Z_clmaps'][:, ii],
                                         mask)[t2s != 0] == 1
        signal_FR2_Z = np.log10(
            np.unique(seldict['F_R2_maps'][signal_FR2_Z_mask, ii]))
        counts_FR2_Z[ii, :] = [len(signal_FR2_Z), len(noise_FR2_Z)]
        ttest = stats.ttest_ind(signal_FR2_Z, noise_FR2_Z, equal_var=True)
        # avoid DivideByZero RuntimeWarning
        if signal_FR2_Z.size > 0 and noise_FR2_Z.size > 0:
            mwu = stats.norm.ppf(
                stats.mannwhitneyu(signal_FR2_Z, noise_FR2_Z)[1])
        else:
            mwu = -np.inf
        tt_table[ii, 0] = np.abs(mwu) * ttest[0] / np.abs(ttest[0])
        tt_table[ii, 1] = ttest[1]
    tt_table[np.isnan(tt_table)] = 0
    tt_table[np.isinf(tt_table[:, 0]),
             0] = np.percentile(tt_table[~np.isinf(tt_table[:, 0]), 0], 98)

    # Time series derivative kurtosis
    mmix_dt = (mmix[:-1] - mmix[1:])
    mmix_kurt = stats.kurtosis(mmix_dt)
    mmix_std = np.std(mmix_dt, axis=0)
    """
    Step 1: Reject anything that's obviously an artifact
    a. Estimate a null variance
    """
    LGR.debug(
        'Rejecting gross artifacts based on Rho/Kappa values and S0/R2 counts')
    rej = ncl[utils.andb(
        [seldict['Rhos'] > seldict['Kappas'], countsigFS0 > countsigFR2]) > 0]
    ncl = np.setdiff1d(ncl, rej)
    """
    Step 2: Compute 3-D spatial FFT of Beta maps to detect high-spatial
    frequency artifacts
    """
    LGR.debug(
        'Computing 3D spatial FFT of beta maps to detect high-spatial frequency artifacts'
    )
    # spatial information is important so for NIFTI we convert back to 3D space
    if utils.get_dtype(ref_img) == 'NIFTI':
        dim1 = np.prod(check_niimg(ref_img).shape[:2])
    else:
        dim1 = mask.shape[0]
    fproj_arr = np.zeros([dim1, len(nc)])
    fproj_arr_val = np.zeros([dim1, len(nc)])
    spr = []
    fdist = []
    for ii in nc:
        # convert data back to 3D array
        if utils.get_dtype(ref_img) == 'NIFTI':
            tproj = utils.new_nii_like(
                ref_img,
                utils.unmask(seldict['PSC'], mask)[:, ii]).get_data()
        else:
            tproj = utils.unmask(seldict['PSC'], mask)[:, ii]
        fproj = np.fft.fftshift(np.abs(np.fft.rfftn(tproj)))
        fproj_z = fproj.max(axis=-1)
        fproj[fproj == fproj.max()] = 0
        spr.append(np.array(fproj_z > fproj_z.max() / 4, dtype=np.int).sum())
        fproj_arr[:, ii] = stats.rankdata(fproj_z.flatten())
        fproj_arr_val[:, ii] = fproj_z.flatten()
        if utils.get_dtype(ref_img) == 'NIFTI':
            fprojr = np.array([fproj, fproj[:, :, ::-1]]).max(0)
            fdist.append(
                np.max([
                    utils.fitgaussian(fproj.max(jj))[3:].max()
                    for jj in range(fprojr.ndim)
                ]))
        else:
            fdist = np.load(os.path.join(RESOURCES, 'fdist.npy'))
    if type(fdist) is not np.ndarray:
        fdist = np.array(fdist)
    spr = np.array(spr)
    # import ipdb; ipdb.set_trace()
    """
    Step 3: Create feature space of component properties
    """
    LGR.debug('Creating feature space of component properties')
    fdist_pre = fdist.copy()
    fdist_pre[fdist > np.median(fdist) * 3] = np.median(fdist) * 3
    fdist_z = (fdist_pre - np.median(fdist_pre)) / fdist_pre.std()
    spz = (spr - spr.mean()) / spr.std()
    Tz = (tt_table[:, 0] - tt_table[:, 0].mean()) / tt_table[:, 0].std()
    varex_ = np.log(seldict['varex'])
    Vz = (varex_ - varex_.mean()) / varex_.std()
    Rz = (seldict['Rhos'] - seldict['Rhos'].mean()) / seldict['Rhos'].std()
    Ktz = np.log(seldict['Kappas']) / 2
    Ktz = (Ktz - Ktz.mean()) / Ktz.std()
    Rtz = np.log(seldict['Rhos']) / 2
    Rtz = (Rtz - Rtz.mean()) / Rtz.std()
    KRr = stats.zscore(np.log(seldict['Kappas']) / np.log(seldict['Rhos']))
    cnz = (countnoise - countnoise.mean()) / countnoise.std()
    Dz = stats.zscore(np.arctanh(dice_tbl[:, 0] + 0.001))
    fz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z])
    """
    Step 3: Make initial guess of where BOLD components are and use DBSCAN
    to exclude noise components and find a sample set of 'good' components
    """
    LGR.debug('Making initial guess of BOLD components')
    # epsmap is [index,level of overlap with dicemask,
    # number of high Rho components]
    F05, F025, F01 = utils.getfbounds(n_echos)
    epsmap = []
    Rhos_sorted = np.array(sorted(seldict['Rhos']))[::-1]
    # Make an initial guess as to number of good components based on
    # consensus of control points across Rhos and Kappas
    KRcutguesses = [
        getelbow_mod(seldict['Rhos']),
        getelbow_cons(seldict['Rhos']),
        getelbow_aggr(seldict['Rhos']),
        getelbow_mod(seldict['Kappas']),
        getelbow_cons(seldict['Kappas']),
        getelbow_aggr(seldict['Kappas'])
    ]
    Khighelbowval = stats.scoreatpercentile([
        getelbow_mod(seldict['Kappas'], val=True),
        getelbow_cons(seldict['Kappas'], val=True),
        getelbow_aggr(seldict['Kappas'], val=True)
    ] + list(utils.getfbounds(n_echos)),
                                            75,
                                            interpolation_method='lower')
    KRcut = np.median(KRcutguesses)

    # only use exclusive when inclusive is extremely inclusive - double KRcut
    cond1 = getelbow_cons(seldict['Kappas']) > KRcut * 2
    cond2 = getelbow_mod(seldict['Kappas'], val=True) < F01
    if cond1 and cond2:
        Kcut = getelbow_mod(seldict['Kappas'], val=True)
    else:
        Kcut = getelbow_cons(seldict['Kappas'], val=True)
    # only use inclusive when exclusive is extremely exclusive - half KRcut
    # (remember for Rho inclusive is higher, so want both Kappa and Rho
    # to defaut to lower)
    if getelbow_cons(seldict['Rhos']) > KRcut * 2:
        Rcut = getelbow_mod(seldict['Rhos'], val=True)
    # for above, consider something like:
    # min([getelbow_mod(Rhos,True),sorted(Rhos)[::-1][KRguess] ])
    else:
        Rcut = getelbow_cons(seldict['Rhos'], val=True)
    if Rcut > Kcut:
        Kcut = Rcut  # Rcut should never be higher than Kcut
    KRelbow = utils.andb([seldict['Kappas'] > Kcut, seldict['Rhos'] < Rcut])
    # Make guess of Kundu et al 2011 plus remove high frequencies,
    # generally high variance, and high variance given low Kappa
    tt_lim = stats.scoreatpercentile(
        tt_table[tt_table[:, 0] > 0, 0], 75, interpolation_method='lower') / 3
    KRguess = np.setdiff1d(
        np.setdiff1d(nc[KRelbow == 2], rej),
        np.union1d(
            nc[tt_table[:, 0] < tt_lim],
            np.union1d(
                np.union1d(nc[spz > 1], nc[Vz > 2]), nc[utils.andb([
                    seldict['varex'] > 0.5 *
                    sorted(seldict['varex'])[::-1][int(KRcut)],
                    seldict['Kappas'] < 2 * Kcut
                ]) == 2])))
    guessmask = np.zeros(len(nc))
    guessmask[KRguess] = 1

    # Throw lower-risk bad components out
    rejB = ncl[utils.andb([
        tt_table[ncl, 0] < 0,
        seldict['varex'][ncl] > np.median(seldict['varex']), ncl > KRcut
    ]) == 3]
    rej = np.union1d(rej, rejB)
    ncl = np.setdiff1d(ncl, rej)

    LGR.debug('Using DBSCAN to find optimal set of "good" BOLD components')
    for ii in range(20000):
        eps = .005 + ii * .005
        db = DBSCAN(eps=eps, min_samples=3).fit(fz.T)

        # it would be great to have descriptive names, here
        # DBSCAN found at least three non-noisy clusters
        cond1 = db.labels_.max() > 1
        # DBSCAN didn't detect more classes than the total # of components / 6
        cond2 = db.labels_.max() < len(nc) / 6
        # TODO: confirm if 0 is a special label for DBSCAN
        # my intuition here is that we're confirming DBSCAN labelled previously
        # rejected components as noise (i.e., no overlap between `rej` and
        # labelled DBSCAN components)
        cond3 = np.intersect1d(rej, nc[db.labels_ == 0]).shape[0] == 0
        # DBSCAN labelled less than half of the total components as noisy
        cond4 = np.array(db.labels_ == -1, dtype=int).sum() / float(
            len(nc)) < .5

        if cond1 and cond2 and cond3 and cond4:
            epsmap.append([
                ii,
                utils.dice(guessmask, db.labels_ == 0),
                np.intersect1d(
                    nc[db.labels_ == 0],
                    nc[seldict['Rhos'] > getelbow_mod(Rhos_sorted, val=True)]).
                shape[0]
            ])
        db = None

    epsmap = np.array(epsmap)
    LGR.debug('Found DBSCAN solutions for {}/20000 eps resolutions'.format(
        len(epsmap)))
    group0 = []
    dbscanfailed = False
    if len(epsmap) != 0:
        # Select index that maximizes Dice with guessmask but first
        # minimizes number of higher Rho components
        ii = int(
            epsmap[np.argmax(epsmap[epsmap[:, 2] == np.min(epsmap[:, 2]),
                                    1], 0), 0])
        LGR.debug('Component selection tuning: {:.05f}'.format(
            epsmap[:, 1].max()))
        db = DBSCAN(eps=.005 + ii * .005, min_samples=3).fit(fz.T)
        ncl = nc[db.labels_ == 0]
        ncl = np.setdiff1d(ncl, rej)
        ncl = np.setdiff1d(ncl, ncl[ncl > len(nc) - len(rej)])
        group0 = ncl.copy()
        group_n1 = nc[db.labels_ == -1]
        to_clf = np.setdiff1d(nc, np.union1d(ncl, rej))
    if len(group0) == 0 or len(group0) < len(KRguess) * .5:
        dbscanfailed = True
        LGR.debug('DBSCAN guess failed; using elbow guess method instead')
        ncl = np.setdiff1d(
            np.setdiff1d(nc[KRelbow == 2], rej),
            np.union1d(
                nc[tt_table[:, 0] < tt_lim],
                np.union1d(
                    np.union1d(nc[spz > 1], nc[Vz > 2]), nc[utils.andb([
                        seldict['varex'] > 0.5 *
                        sorted(seldict['varex'])[::-1][int(KRcut)],
                        seldict['Kappas'] < 2 * Kcut
                    ]) == 2])))
        group0 = ncl.copy()
        group_n1 = []
        to_clf = np.setdiff1d(nc, np.union1d(group0, rej))
    if len(group0) < 2 or (len(group0) < 4
                           and float(len(rej)) / len(group0) > 3):
        LGR.warning('Extremely limited reliable BOLD signal space! '
                    'Not filtering components beyond BOLD/non-BOLD guesses.')
        midkfailed = True
        min_acc = np.array([])
        if len(group0) != 0:
            # For extremes, building in a 20% tolerance
            toacc_hi = np.setdiff1d(
                nc[utils.andb([
                    fdist <= np.max(fdist[group0]), seldict['Rhos'] < F025,
                    Vz > -2
                ]) == 3], np.union1d(group0, rej))
            min_acc = np.union1d(group0, toacc_hi)
            to_clf = np.setdiff1d(nc, np.union1d(min_acc, rej))
        else:
            toacc_hi = []
            min_acc = []
        diagstep_keys = [
            'Rejected components', 'Kappa-Rho cut point', 'Kappa cut point',
            'Rho cut point', 'DBSCAN failed to converge',
            'Mid-Kappa failed (limited BOLD signal)', 'Kappa-Rho guess',
            'min_acc', 'toacc_hi'
        ]
        diagstep_vals = [
            list(rej), KRcut, Kcut, Rcut, dbscanfailed, midkfailed,
            list(KRguess),
            list(min_acc),
            list(toacc_hi)
        ]
        with open('csstepdata.json', 'w') as ofh:
            json.dump(dict(zip(diagstep_keys, diagstep_vals)),
                      ofh,
                      indent=4,
                      sort_keys=True,
                      default=str)
        return list(sorted(min_acc)), list(sorted(rej)), [], list(
            sorted(to_clf))

    # Find additional components to reject based on Dice - doing this here
    # since Dice is a little unstable, need to reference group0
    rej_supp = []
    dice_rej = False
    if not dbscanfailed and len(rej) + len(group0) < 0.75 * len(nc):
        dice_rej = True
        rej_supp = np.setdiff1d(
            np.setdiff1d(
                np.union1d(rej, nc[dice_tbl[nc, 0] <= dice_tbl[nc, 1]]),
                group0), group_n1)
        rej = np.union1d(rej, rej_supp)

    # Temporal features
    # larger is worse - spike
    mmix_kurt_z = (mmix_kurt -
                   mmix_kurt[group0].mean()) / mmix_kurt[group0].std()
    # smaller is worse - drift
    mmix_std_z = -1 * (
        (mmix_std - mmix_std[group0].mean()) / mmix_std[group0].std())
    mmix_kurt_z_max = np.max([mmix_kurt_z, mmix_std_z], 0)
    """
    Step 2: Classifiy midk and ignore using separte SVMs for
    different variance regimes
    # To render hyperplane:
    min_x = np.min(spz2);max_x=np.max(spz2)
    # plotting separating hyperplane
        ww = clf_.coef_[0]
        aa = -ww[0] / ww[1]
        # make sure the next line is long enough
        xx = np.linspace(min_x - 2, max_x + 2)
        yy = aa * xx - (clf_.intercept_[0]) / ww[1]
        plt.plot(xx, yy, '-')
    """
    LGR.debug('Attempting to classify midk components')
    # Tried getting rid of accepting based on SVM altogether,
    # now using only rejecting
    toacc_hi = np.setdiff1d(
        nc[utils.andb([
            fdist <= np.max(fdist[group0]), seldict['Rhos'] < F025, Vz > -2
        ]) == 3], np.union1d(group0, rej))
    toacc_lo = np.intersect1d(
        to_clf, nc[utils.andb([
            spz < 1, Rz < 0, mmix_kurt_z_max < 5, Dz > -1, Tz > -1, Vz < 0,
            seldict['Kappas'] >= F025, fdist < 3 *
            np.percentile(fdist[group0], 98)
        ]) == 8])
    midk_clf, clf_ = do_svm(fproj_arr_val[:, np.union1d(group0, rej)].T,
                            [0] * len(group0) + [1] * len(rej),
                            fproj_arr_val[:, to_clf].T,
                            svmtype=2)
    midk = np.setdiff1d(
        to_clf[utils.andb([
            midk_clf == 1,
            seldict['varex'][to_clf] > np.median(seldict['varex'][group0])
        ]) == 2], np.union1d(toacc_hi, toacc_lo))
    # only use SVM to augment toacc_hi only if toacc_hi isn't already
    # conflicting with SVM choice
    if len(
            np.intersect1d(
                to_clf[utils.andb([midk_clf == 1, Vz[to_clf] > 0]) == 2],
                toacc_hi)) == 0:
        svm_acc_fail = True
        toacc_hi = np.union1d(toacc_hi, to_clf[midk_clf == 0])
    else:
        svm_acc_fail = False
    """
    Step 3: Compute variance associated with low T2* areas
    (e.g. draining veins and low T2* areas)
    # To write out veinmask
    veinout = np.zeros(t2s.shape)
    veinout[t2s!=0] = veinmaskf
    utils.filewrite(veinout, 'veinmaskf', ref_img)
    veinBout = utils.unmask(veinmaskB, mask)
    utils.filewrite(veinBout, 'veins50', ref_img)
    """
    LGR.debug(
        'Computing variance associated with low T2* areas (e.g., draining veins)'
    )
    tsoc_B_Zcl = np.zeros(seldict['tsoc_B'].shape)
    tsoc_B_Zcl[seldict['Z_clmaps'] != 0] = np.abs(
        seldict['tsoc_B'])[seldict['Z_clmaps'] != 0]
    sig_B = [
        stats.scoreatpercentile(tsoc_B_Zcl[tsoc_B_Zcl[:, ii] != 0, ii], 25)
        if len(tsoc_B_Zcl[tsoc_B_Zcl[:, ii] != 0, ii]) != 0 else 0 for ii in nc
    ]
    sig_B = np.abs(seldict['tsoc_B']) > np.tile(
        sig_B, [seldict['tsoc_B'].shape[0], 1])

    veinmask = utils.andb([
        t2s < stats.scoreatpercentile(
            t2s[t2s != 0], 15, interpolation_method='lower'), t2s != 0
    ]) == 2
    veinmaskf = veinmask[mask]
    veinR = np.array(sig_B[veinmaskf].sum(0),
                     dtype=float) / sig_B[~veinmaskf].sum(0)
    veinR[np.isnan(veinR)] = 0

    veinc = np.union1d(rej, midk)
    rej_veinRZ = ((veinR - veinR[veinc].mean()) / veinR[veinc].std())[veinc]
    rej_veinRZ[rej_veinRZ < 0] = 0
    rej_veinRZ[countsigFR2[veinc] > np.array(veinmaskf, dtype=int).sum()] = 0
    t2s_lim = [
        stats.scoreatpercentile(t2s[t2s != 0],
                                50,
                                interpolation_method='lower'),
        stats.scoreatpercentile(
            t2s[t2s != 0], 80, interpolation_method='lower') / 2
    ]
    phys_var_zs = []
    for t2sl_i in range(len(t2s_lim)):
        t2sl = t2s_lim[t2sl_i]
        veinW = sig_B[:, veinc] * np.tile(rej_veinRZ, [sig_B.shape[0], 1])
        veincand = utils.unmask(
            utils.andb([
                s0[t2s != 0] < np.median(s0[t2s != 0]), t2s[t2s != 0] < t2sl
            ]) >= 1, t2s != 0)[mask]
        veinW[~veincand] = 0
        invein = veinW.sum(
            axis=1)[(utils.unmask(veinmaskf, mask) *
                     utils.unmask(veinW.sum(axis=1) > 1, mask))[mask]]
        minW = 10 * (np.log10(invein).mean()) - 1 * 10**(
            np.log10(invein).std())
        veinmaskB = veinW.sum(axis=1) > minW
        tsoc_Bp = seldict['tsoc_B'].copy()
        tsoc_Bp[tsoc_Bp < 0] = 0
        vvex = np.array([
            (tsoc_Bp[veinmaskB, ii]**2.).sum() / (tsoc_Bp[:, ii]**2.).sum()
            for ii in nc
        ])
        group0_res = np.intersect1d(KRguess, group0)
        phys_var_zs.append(
            (vvex - vvex[group0_res].mean()) / vvex[group0_res].std())
        veinBout = utils.unmask(veinmaskB, mask)
        utils.filewrite(veinBout.astype(float), 'veins_l%i' % t2sl_i, ref_img)

    # Mask to sample veins
    phys_var_z = np.array(phys_var_zs).max(0)
    Vz2 = (varex_ - varex_[group0].mean()) / varex_[group0].std()
    """
    Step 4: Learn joint TE-dependence spatial and temporal models to move
    remaining artifacts to ignore class
    """
    LGR.debug(
        'Learning joint TE-dependence spatial/temporal models to ignore remaining artifacts'
    )

    to_ign = []

    minK_ign = np.max([F05, getelbow_cons(seldict['Kappas'], val=True)])
    newcest = len(group0) + len(
        toacc_hi[seldict['Kappas'][toacc_hi] > minK_ign])
    phys_art = np.setdiff1d(
        nc[utils.andb([phys_var_z > 3.5, seldict['Kappas'] < minK_ign]) == 2],
        group0)
    rank_diff = stats.rankdata(phys_var_z) - stats.rankdata(seldict['Kappas'])
    phys_art = np.union1d(
        np.setdiff1d(
            nc[utils.andb([phys_var_z > 2, rank_diff > newcest /
                           2, Vz2 > -1]) == 3], group0), phys_art)
    # Want to replace field_art with an acf/SVM based approach
    # instead of a kurtosis/filter one
    field_art = np.setdiff1d(
        nc[utils.andb([mmix_kurt_z_max > 5, seldict['Kappas'] < minK_ign]) ==
           2], group0)
    field_art = np.union1d(
        np.setdiff1d(
            nc[utils.andb([
                mmix_kurt_z_max > 2,
                (stats.rankdata(mmix_kurt_z_max) -
                 stats.rankdata(seldict['Kappas'])) > newcest /
                2, Vz2 > 1, seldict['Kappas'] < F01
            ]) == 4], group0), field_art)
    field_art = np.union1d(
        np.setdiff1d(
            nc[utils.andb([
                mmix_kurt_z_max > 3, Vz2 > 3,
                seldict['Rhos'] > np.percentile(seldict['Rhos'][group0], 75)
            ]) == 3], group0), field_art)
    field_art = np.union1d(
        np.setdiff1d(nc[utils.andb([mmix_kurt_z_max > 5, Vz2 > 5]) == 2],
                     group0), field_art)
    misc_art = np.setdiff1d(
        nc[utils.andb([(stats.rankdata(Vz) - stats.rankdata(Ktz)) > newcest /
                       2, seldict['Kappas'] < Khighelbowval]) == 2], group0)
    ign_cand = np.unique(list(field_art) + list(phys_art) + list(misc_art))
    midkrej = np.union1d(midk, rej)
    to_ign = np.setdiff1d(list(ign_cand), midkrej)
    toacc = np.union1d(toacc_hi, toacc_lo)
    ncl = np.setdiff1d(np.union1d(ncl, toacc), np.union1d(to_ign, midkrej))
    ign = np.setdiff1d(nc, list(ncl) + list(midk) + list(rej))
    orphan = np.setdiff1d(nc,
                          list(ncl) + list(to_ign) + list(midk) + list(rej))

    # Last ditch effort to save some transient components
    if not strict_mode:
        Vz3 = (varex_ - varex_[ncl].mean()) / varex_[ncl].std()
        ncl = np.union1d(
            ncl,
            np.intersect1d(
                orphan, nc[utils.andb([
                    seldict['Kappas'] > F05, seldict['Rhos'] < F025,
                    seldict['Kappas'] > seldict['Rhos'], Vz3 <= -1, Vz3 > -3,
                    mmix_kurt_z_max < 2.5
                ]) == 6]))
        ign = np.setdiff1d(nc, list(ncl) + list(midk) + list(rej))
        orphan = np.setdiff1d(
            nc,
            list(ncl) + list(to_ign) + list(midk) + list(rej))

    if savecsdiag:
        diagstep_keys = [
            'Rejected components', 'Kappa-Rho cut point', 'Kappa cut',
            'Rho cut', 'DBSCAN failed to converge', 'Kappa-Rho guess',
            'Dice rejected', 'rej_supp', 'to_clf', 'Mid-kappa components',
            'svm_acc_fail', 'toacc_hi', 'toacc_lo', 'Field artifacts',
            'Physiological artifacts', 'Miscellaneous artifacts', 'ncl',
            'Ignored components'
        ]
        diagstep_vals = [
            list(rej),
            KRcut.item(),
            Kcut.item(),
            Rcut.item(), dbscanfailed,
            list(KRguess), dice_rej,
            list(rej_supp),
            list(to_clf),
            list(midk), svm_acc_fail,
            list(toacc_hi),
            list(toacc_lo),
            list(field_art),
            list(phys_art),
            list(misc_art),
            list(ncl),
            list(ign)
        ]

        with open('csstepdata.json', 'w') as ofh:
            json.dump(dict(zip(diagstep_keys, diagstep_vals)),
                      ofh,
                      indent=4,
                      sort_keys=True,
                      default=str)
        allfz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z])
        np.savetxt('csdata.txt', allfz)

    return list(sorted(ncl)), list(sorted(rej)), list(sorted(midk)), list(
        sorted(ign))
示例#20
0
def selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, t2s, s0, olevel=2,
             oversion=99, filecsdata=True, savecsdiag=True, strict_mode=False):
    """
    Labels ICA components to keep or remove from denoised data

    The selection process uses pre-calculated parameters for each ICA component
    inputted into this function in `seldict` such as
    Kappa (a T2* weighting metric), Rho (an S0 weighting metric), and variance
    explained. Additonal selection metrics are calculated within this function
    and then used to classify each component into one of four groups.

    Parameters
    ----------
    seldict : :obj:`dict`
        As output from `fitmodels_direct`
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the number of volumes in the original data
    mask : (S,) array_like
        Boolean mask array
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    manacc : :obj:`list`
        Comma-separated list of indices of manually accepted components
    n_echos : :obj:`int`
        Number of echos in original data
    t2s : (S,) array_like
        Estimated T2* map
    s0 : (S,) array_like
        S0 map
    olevel : :obj:`int`, optional
        Default: 2
    oversion : :obj:`int`, optional
        Default: 99
    filecsdata: :obj:`bool`, optional
        Default: False
    savecsdiag: :obj:`bool`, optional
        Default: True
    strict_mode: :obj:`bool`, optional
        Default: False

    Returns
    -------
    acc : :obj:`list`
        Indices of accepted (BOLD) components in `mmix`
    rej : :obj:`list`
        Indices of rejected (non-BOLD) components in `mmix`
    midk : :obj:`list`
        Indices of mid-K (questionable) components in `mmix`
        These components are typically removed from the data during denoising
    ign : :obj:`list`
        Indices of ignored components in `mmix`
        Ignored components are considered to have too low variance to matter.
        They are not processed through the accept vs reject decision tree and
        are NOT removed during the denoising process

    Notes
    -----
    The selection algorithm used in this function is from work by prantikk
    It is from selcomps function in select_model_fft20e.py in
    version 3.2 of MEICA at:
    https://github.com/ME-ICA/me-ica/blob/b2781dd087ab9de99a2ec3925f04f02ce84f0adc/meica.libs/select_model_fft20e.py
    Many of the early publications using and evaulating the MEICA method used a
    different selection algorithm by prantikk. The final 2.5 version of that
    algorithm in the selcomps function in select_model.py at:
    https://github.com/ME-ICA/me-ica/blob/b2781dd087ab9de99a2ec3925f04f02ce84f0adc/meica.libs/select_model.py

    In both algorithms, the ICA component selection process uses multiple
    metrics that include: kappa, rho, variance explained, compent spatial
    weighting maps, noise and spatial frequency metrics, and measures of
    spatial overlap across metrics. The precise calculations may vary between
    algorithms. The most notable difference is that the v2.5 algorithm is a
    fixed decision tree where all sections were made based on whether
    combinations of metrics crossed various thresholds. In the v3.5 algorithm,
    clustering and support vector machines are also used to classify components
    based on how similar metrics in one component are similar to metrics in
    other components.
    """
    if mmix.ndim != 2:
        raise ValueError('Parameter mmix should be 2d, not {0}d'.format(mmix.ndim))
    elif t2s.ndim != 1:  # FIT not necessarily supported
        raise ValueError('Parameter t2s should be 1d, not {0}d'.format(t2s.ndim))
    elif s0.ndim != 1:  # FIT not necessarily supported
        raise ValueError('Parameter s0 should be 1d, not {0}d'.format(s0.ndim))
    elif not (t2s.shape[0] == s0.shape[0] == mask.shape[0]):
        raise ValueError('First dimensions (number of samples) of t2s ({0}), '
                         's0 ({1}), and mask ({2}) do not '
                         'match'.format(t2s.shape[0], s0.shape[0], mask.shape[0]))

    """
    handwerkerd and others are working to "hypercomment" this function to
    help everyone understand it sufficiently with the goal of eventually
    modularizing the algorithm. This is still a work-in-process with later
    sections not fully commented, some points of uncertainty are noted, and the
    summary of the full algorithm is not yet complete.

    There are sections of this code that calculate metrics that are used in
    the decision tree for the selection process and other sections that
    are part of the decision tree. Certain comments are prefaced with METRIC
    and variable names to make clear which are metrics and others are prefaced
    with SELECTION to make clear which are for applying metrics. METRICs tend
    to be summary values that contain a signal number per component.

    Note there are some variables that are calculated in one section of the code
    that are later transformed into another metric that is actually part of a
    selection criterion. This running list is an attempt to summarize
    intermediate metrics vs the metrics that are actually used in decision
    steps. For applied metrics that are made up of intermediate metrics defined
    in earlier sections of the code, the constituent metrics are noted. More
    metrics will be added to the applied metrics section as the commenting of
    this function continues.

    Intermediate Metrics:  seldict['F_S0_clmaps'] seldict['F_R2_clmaps']
        seldict['Br_clmaps_S0'] seldict['Br_clmaps_R2'] seldict['Z_maps']
        dice_tbl countnoise
        counts_FR2_Z tt_table mmix_kurt mmix_std
        spr fproj_arr_val fdist
        Rtz, Dz

    Applied Metrics:
        seldict['Rhos']
        seldict['Kappas']
        seldict['varex']
        countsigFS0
        countsigFR2
        fz (a combination of multiple z-scored metrics: tt_table,
            seldict['varex'], seldict['Kappa'], seldict['Rho'], countnoise,
            mmix_kurt, fdist)
        tt_table[:,0]
        spz (z score of spr)
        KRcut
    """

    """
    If seldict exists, save it into a pickle file called compseldata.pklbz
    that can be loaded directly into python for future analyses
    If seldict=None, load it from the pre-saved pickle file to use for the
    rest of this function
    """
    if filecsdata:
        import bz2
        if seldict is not None:
            LGR.info('Saving component selection data')
            with bz2.BZ2File('compseldata.pklbz', 'wb') as csstate_f:
                pickle.dump(seldict, csstate_f)
        else:
            try:
                with bz2.BZ2File('compseldata.pklbz', 'rb') as csstate_f:
                    seldict = pickle.load(csstate_f)
            except FileNotFoundError:
                LGR.warning('Failed to load component selection data')
                return None

    """
    List of components
    all_comps and acc_comps start out as an ordered list of the component numbers
    all_comps is constant throughout the function.
    acc_comps changes through his function as components are assigned to other
    categories (i.e. components that are classified as rejected are removed
    from acc_comps)
    """
    midk = []
    ign = []
    all_comps = np.arange(len(seldict['Kappas']))
    acc_comps = np.arange(len(seldict['Kappas']))

    """
    If user has specified components to accept manually, just assign those
    components to the accepted and rejected comp lists and end the function
    """
    if manacc:
        acc = sorted([int(vv) for vv in manacc.split(',')])
        midk = []
        rej = sorted(np.setdiff1d(all_comps, acc))
        ign = []
        return acc, rej, midk, ign  # Add string for ign

    """
    METRICS: countsigFS0 countsigFR2
    F_S0_clmaps & F_R2_clmaps are the thresholded & binarized clustered maps of
    significant fits for the separate S0 and R2 cross-echo models per component.
    Since the values are 0 or 1, the countsig variables are a count of the
    significant voxels per component.
    The cluster size is a function of the # of voxels in the mask.
    The cluster threshold is based on the # of echos acquired
    """
    countsigFS0 = seldict['F_S0_clmaps'].sum(0)
    countsigFR2 = seldict['F_R2_clmaps'].sum(0)
    countnoise = np.zeros(len(all_comps))

    """
    Make table of dice values
    METRICS: dice_tbl
    dice_FR2, dice_FS0 are calculated for each component and the concatenated
    values are in dice_tbl
    Br_clmaps_R2 and Br_clmaps_S0 are binarized clustered Z_maps.
    The volume being clustered is the rank order indices of the absolute value
    of the beta values for the fit between the optimally combined time series
    and the mixing matrix (i.e. the lowest beta value is 1 and the highest is
    the # of voxels).
    The cluster size is a function of the # of voxels in the mask.
    The cluster threshold are the voxels with beta ranks greater than
    countsigFS0 or countsigFR2 (i.e. roughly the same number of voxels will be
    in the countsig clusters as the ICA beta map clusters)
    These dice values are the Dice-Sorenson index for the Br_clmap_?? and the
    F_??_clmap.
    If handwerkerd understands this correctly, if the voxels with the above
    threshold F stats are clustered in the same voxels with the highest beta
    values, then the dice coefficient will be 1. If the thresholded F or betas
    aren't spatially clustered (i.e. the component map is less spatially smooth)
    or the clusters are in different locations (i.e. voxels with high betas
    are also noiser so they have lower F values), then the dice coefficients
    will be lower
    """
    dice_tbl = np.zeros([all_comps.shape[0], 2])
    for comp_num in all_comps:
        dice_FR2 = utils.dice(utils.unmask(seldict['Br_clmaps_R2'][:, comp_num],
                                           mask)[t2s != 0],
                              seldict['F_R2_clmaps'][:, comp_num])
        dice_FS0 = utils.dice(utils.unmask(seldict['Br_clmaps_S0'][:, comp_num],
                                           mask)[t2s != 0],
                              seldict['F_S0_clmaps'][:, comp_num])
        dice_tbl[comp_num, :] = [dice_FR2, dice_FS0]  # step 3a here and above
    dice_tbl[np.isnan(dice_tbl)] = 0

    """
    Make table of noise gain
    METRICS: countnoise, counts_FR2_Z, tt_table
    (This is a bit confusing & is handwerkerd's attempt at making sense of this)
    seldict['Z_maps'] is the Fisher Z normalized beta fits for the optimally
    combined time series and the mixing matrix. Z_clmaps is a binarized cluster
    of Z_maps with the cluster size based on the # of voxels and the cluster
    threshold of 1.95. utils.andb is a sum of the True values in arrays so
    comp_noise_sel is true for voxels where the Z values are greater than 1.95
    but not part of a cluster of Z values that are greater than 1.95.
    Spatially unclustered voxels with high Z values could be considerd noisy.
    countnoise is the # of voxels per component where comp_noise_sel is true.

    counts_FR2_Z is the number of voxels with Z values above the threshold
    that are in clusters (signal) and the number outside of clusters (noise)

    tt_table is a bit confusing. For each component, the first index is
    some type of normalized, log10, signal/noise t statistic and the second is
    the p value for the signal/noise t statistic (for the R2 model).
    In general, these should be bigger t or have lower p values when most of
    the Z values above threshold are inside clusters.
    Because of the log10, values below 1 are negative, which is later used as
    a threshold. It doesn't seem like the p values are ever used.
    """
    tt_table = np.zeros([len(all_comps), 4])
    counts_FR2_Z = np.zeros([len(all_comps), 2])
    for comp_num in all_comps:
        comp_noise_sel = utils.andb([np.abs(seldict['Z_maps'][:, comp_num]) > 1.95,
                                     seldict['Z_clmaps'][:, comp_num] == 0]) == 2
        countnoise[comp_num] = np.array(comp_noise_sel, dtype=np.int).sum()
        noise_FR2_Z_mask = utils.unmask(comp_noise_sel, mask)[t2s != 0]
        noise_FR2_Z = np.log10(np.unique(seldict['F_R2_maps'][noise_FR2_Z_mask, comp_num]))
        signal_FR2_Z_mask = utils.unmask(seldict['Z_clmaps'][:, comp_num], mask)[t2s != 0] == 1
        signal_FR2_Z = np.log10(np.unique(seldict['F_R2_maps'][signal_FR2_Z_mask, comp_num]))
        counts_FR2_Z[comp_num, :] = [len(signal_FR2_Z), len(noise_FR2_Z)]
        ttest = stats.ttest_ind(signal_FR2_Z, noise_FR2_Z, equal_var=True)
        # avoid DivideByZero RuntimeWarning
        if signal_FR2_Z.size > 0 and noise_FR2_Z.size > 0:
            mwu = stats.norm.ppf(stats.mannwhitneyu(signal_FR2_Z, noise_FR2_Z)[1])
        else:
            mwu = -np.inf
        tt_table[comp_num, 0] = np.abs(mwu) * ttest[0] / np.abs(ttest[0])
        tt_table[comp_num, 1] = ttest[1]
    tt_table[np.isnan(tt_table)] = 0
    tt_table[np.isinf(tt_table[:, 0]), 0] = np.percentile(tt_table[~np.isinf(tt_table[:, 0]), 0],
                                                          98)

    """
    Time series derivative kurtosis
    METRICS: mmix_kurt and mmix_std
    Take the derivative of the time series for each component in the ICA
    mixing matrix and calculate the kurtosis & standard deviation.
    handwerkerd thinks these metrics are later used to calculate measures
    of time series spikiness and drift in the component time series.
    """
    mmix_dt = (mmix[:-1, :] - mmix[1:, :])
    mmix_kurt = stats.kurtosis(mmix_dt)
    mmix_std = np.std(mmix_dt, axis=0)

    """
    SELECTION #1 (prantikk labeled "Step 1")
    Reject anything that is obviously an artifact
    Obvious artifacts are components with Rho>Kappa or with more clustered,
    significant voxels for the S0 model than the R2 model
    """
    LGR.debug('Rejecting gross artifacts based on Rho/Kappa values and S0/R2 '
              'counts')
    rej = acc_comps[utils.andb([seldict['Rhos'] > seldict['Kappas'],
                                countsigFS0 > countsigFR2]) > 0]
    acc_comps = np.setdiff1d(acc_comps, rej)

    """
    prantikk labeled "Step 2"
    Compute 3-D spatial FFT of Beta maps to detect high-spatial
    frequency artifacts

    METRIC spr, fproj_arr_val, fdist
    PSC is the mean centered beta map for each ICA component
    The FFT is sequentially calculated across each dimension of PSC & the max
    value is removed (probably the 0Hz bin). The maximum remaining frequency
    magnitude along the z dimenions is calculated leaving a 2D matrix.
    spr contains a count of the number of frequency bins in the 2D matrix where
    the frequency magnitude is greater than 4* the maximum freq in the matrix.
    spr is later z-normed across components into spz and this is actually used
    as a selection metric.
    handwerkerd interpretation: spr is bigger the more values of the fft are
    >1/4 the max. Thus, if you assume the highest mag bins are low frequency, &
    all components have roughly the same low freq power (i.e. a brain-shaped
    blob), then spr will be bigger the more high frequency bins have magnitudes
    that are more than 1/4 of the low frequency bins.

    fproj_arr_val is a flattened 1D vector of the 2D max projection fft
    of each component. This seems to be later used in an SVM to train on
    this value for rejected components to classify some remaining n_components
    as midk
    Note: fproj_arr is created here and is a ranked list of FFT values, but is
    not used anywhere in the code. Was fproj_arr_val supposed to contain this
    ranking?

    fdist isn't completely clear to handwerkerd yet but it looks like the fit of
    the fft of the spatial map to a Gaussian distribution. If so, then the
    worse the fit, the more high frequency power would be in the component
    """
    LGR.debug('Computing 3D spatial FFT of beta maps to detect high-spatial frequency artifacts')
    # spatial information is important so for NIFTI we convert back to 3D space
    if utils.get_dtype(ref_img) == 'NIFTI':
        dim1 = np.prod(check_niimg(ref_img).shape[:2])
    else:
        dim1 = mask.shape[0]
    fproj_arr = np.zeros([dim1, len(all_comps)])
    fproj_arr_val = np.zeros([dim1, len(all_comps)])
    spr = []
    fdist = []
    for comp_num in all_comps:
        # convert data back to 3D array
        if utils.get_dtype(ref_img) == 'NIFTI':
            tproj = utils.new_nii_like(ref_img, utils.unmask(seldict['PSC'],
                                                             mask)[:, comp_num]).get_data()
        else:
            tproj = utils.unmask(seldict['PSC'], mask)[:, comp_num]
        fproj = np.fft.fftshift(np.abs(np.fft.rfftn(tproj)))
        fproj_z = fproj.max(axis=-1)
        fproj[fproj == fproj.max()] = 0
        spr.append(np.array(fproj_z > fproj_z.max() / 4, dtype=np.int).sum())
        fproj_arr[:, comp_num] = stats.rankdata(fproj_z.flatten())
        fproj_arr_val[:, comp_num] = fproj_z.flatten()
        if utils.get_dtype(ref_img) == 'NIFTI':
            fprojr = np.array([fproj, fproj[:, :, ::-1]]).max(0)
            fdist.append(np.max([utils.fitgaussian(fproj.max(jj))[3:].max() for
                         jj in range(fprojr.ndim)]))
        else:
            fdist = np.load(os.path.join(RESOURCES, 'fdist.npy'))
    if type(fdist) is not np.ndarray:
        fdist = np.array(fdist)
    spr = np.array(spr)
    # import ipdb; ipdb.set_trace()

    """
    prantikk labelled Step 3
    Create feature space of component properties
    METRIC fz, spz, Rtz, Dz

    fz is matrix of multiple other metrics described above and calculated
    in this section. Most are all of these have one number per component and
    they are z-scored across components
    Attempted explanations in order:
    Tz: The z-scored t statistics of the spatial noisiness metric in tt_table
    Vz: The z-scored the natural log of the non-normalized variance explained
        of each component
    Ktz: The z-scored natural log of the Kappa values
    (the '/ 2' does not seem necessary beacuse it will be removed by z-scoring)
    KRr: The z-scored ratio of the natural log of Kappa / nat log of Rho
    (unclear why sometimes using stats.zcore and other times writing the eq out)
    cnz: The z-scored measure of a noisy voxel count where the noisy voxels are
         the voxels with large beta estimates, but aren't part of clusters
    Rz: z-scored rho values (why aren't this log scaled, like kappa in Ktz?)
    mmix_kurt: Probably a rough measure of the spikiness of each component's
        time series in the ICA mixing matrix
    fdist_z: z-score of fdist, which is probably a measure of high freq info
        in the spatial FFT of the components (with lower being more high freq?)

    NOT in fz:
    spz: Z-scored measure probably of how much high freq is in the data. Larger
        values mean more bins of the FFT have over 1/4 the power of the maximum
        bin (read about spr above for more info)
    Rtz: Z-scored natural log of the Rho values
    Dz: Z-scored Fisher Z transformed dice values of the overlap between
        clusters for the F stats and clusters of the ICA spatial beta maps with
        roughly the same number of voxels as in the clustered F maps.
        Dz saves this for the R2 model, there are also Dice coefs for the S0
        model in dice_tbl
    """
    LGR.debug('Creating feature space of component properties')
    fdist_pre = fdist.copy()
    fdist_pre[fdist > np.median(fdist) * 3] = np.median(fdist) * 3
    fdist_z = (fdist_pre - np.median(fdist_pre)) / fdist_pre.std()  # not z
    spz = stats.zscore(spr)
    Tz = stats.zscore(tt_table[:, 0])
    varex_log = np.log(seldict['varex'])
    Vz = stats.zscore(varex_log)
    Rz = stats.zscore(seldict['Rhos'])
    Ktz = stats.zscore(np.log(seldict['Kappas']) / 2)
    #  Rtz = stats.zscore(np.log(seldict['Rhos']) / 2)
    KRr = stats.zscore(np.log(seldict['Kappas']) / np.log(seldict['Rhos']))
    cnz = stats.zscore(countnoise)
    Dz = stats.zscore(np.arctanh(dice_tbl[:, 0] + 0.001))
    fz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z])

    """
    METRICS Kcut, Rcut, KRcut, KRcutguesses, Khighelbowval
    Step 3: Make initial guess of where BOLD components are and use DBSCAN
    to exclude noise components and find a sample set of 'good' components
    """
    LGR.debug('Making initial guess of BOLD components')
    # The F threshold for the echo fit (based on the # of echos) for p<0.05
    #    p<0.025, and p<0.001 (Confirm this is accurate since the function
    #    contains a lookup table rather than a calculation)
    F05, F025, F01 = utils.getfbounds(n_echos)
    # epsmap is [index,level of overlap with dicemask,
    # number of high Rho components]
    epsmap = []
    Rhos_sorted = np.array(sorted(seldict['Rhos']))[::-1]
    """
    Make an initial guess as to number of good components based on
     consensus of control points across Rhos and Kappas
    For terminology later, typically getelbow _aggr > _mod > _cons
      though this might not be universally true. A more "inclusive" threshold
      has a lower kappa since that means more components are above that thresh
      and are likely to be accepted. For Rho, a more "inclusive" threshold is
      higher since that means fewer components will be rejected based on rho.
    KRcut seems weird to handwerkerd. I see that the thresholds are slightly
     shifted for kappa & rho later in the code, but why would we ever want to
     set a common threhsold reference point for both? These are two different
     elbows on two different data sets.
    """
    KRcutguesses = [getelbow_mod(seldict['Rhos']),
                    getelbow_cons(seldict['Rhos']),
                    getelbow_aggr(seldict['Rhos']),
                    getelbow_mod(seldict['Kappas']),
                    getelbow_cons(seldict['Kappas']),
                    getelbow_aggr(seldict['Kappas'])]
    KRcut = np.median(KRcutguesses)
    """
    Also a bit weird to handwerkerd. This is the 75th percentile of Kappa F
    stats of the components with the 3 elbow selection criteria and the
    F states for 3 significance thresholds based on the # of echos.
    This is some type of way to get a significance criterion for a component
    fit, but it's include why this specific criterion is useful.
    """
    Khighelbowval = stats.scoreatpercentile([getelbow_mod(seldict['Kappas'],
                                                          return_val=True),
                                             getelbow_cons(seldict['Kappas'],
                                                           return_val=True),
                                             getelbow_aggr(seldict['Kappas'],
                                                           return_val=True)] +
                                            list(utils.getfbounds(n_echos)),
                                            75, interpolation_method='lower')
    """
    Default to the most inclusive kappa threshold (_cons) unless:
    1. That threshold is more than twice the median of Kappa & Rho thresholds
    2. and the moderate elbow is more inclusive than a p=0.01
    handwerkerd: This actually seems like a way to avoid using the theoretically
       most liberal threshold only when there was a bad estimate and _mod is
       is more inclusive. My one concern is that it's an odd way to test that
       the _mod elbow is any better. Why not at least see if _mod < _cons?
    prantikk's orig comment for this section is:
      "only use exclusive when inclusive is extremely inclusive - double KRcut"
    """
    cond1 = getelbow_cons(seldict['Kappas']) > KRcut * 2
    cond2 = getelbow_mod(seldict['Kappas'], return_val=True) < F01
    if cond1 and cond2:
        Kcut = getelbow_mod(seldict['Kappas'], return_val=True)
    else:
        Kcut = getelbow_cons(seldict['Kappas'], return_val=True)
    """
    handwerkerd: The goal seems to be to maximize the rejected components
       based on the rho cut by defaulting to a lower Rcut value. Again, if
       that is the goal, why not just test if _mod < _cons?
    prantikk's orig comment for this section is:
        only use inclusive when exclusive is extremely exclusive - half KRcut
        (remember for Rho inclusive is higher, so want both Kappa and Rho
        to defaut to lower)
    """
    if getelbow_cons(seldict['Rhos']) > KRcut * 2:
        Rcut = getelbow_mod(seldict['Rhos'], return_val=True)
    # for above, consider something like:
    # min([getelbow_mod(Rhos,True),sorted(Rhos)[::-1][KRguess] ])
    else:
        Rcut = getelbow_cons(seldict['Rhos'], return_val=True)

    # Rcut should never be higher than Kcut (handwerkerd: not sure why)
    if Rcut > Kcut:
        Kcut = Rcut

    # KRelbow has a 2 for components that are above the Kappa accept threshold
    # and below the rho reject threshold
    KRelbow = utils.andb([seldict['Kappas'] > Kcut,
                          seldict['Rhos'] < Rcut])
    """
    Make guess of Kundu et al 2011 plus remove high frequencies,
    generally high variance, and high variance given low Kappa
    the first index of tt_table is a t static of a what handwerkerd thinks
      is a spatial noise metric. Since log10 of these values are taken the >0
      threshold means the metric is >1. tt_lim seems to be a fairly aggressive
      percentile that is then divided by 3.
    """
    tt_lim = stats.scoreatpercentile(tt_table[tt_table[:, 0] > 0, 0],
                                     75, interpolation_method='lower') / 3
    """
    KRguess is a list of components to potentially accept. it starts with a
      list of components that cross the Kcut and Rcut threshold and weren't
      previously rejected for other reasons. From that list, it removes more
      components based on several additional criteria:
      1. tt_table less than the tt_lim threshold (spatial noisiness metric)
      2. spz (a z-scored probably high spatial freq metric) >1
      3. Vz (a z-scored variance explained metric) >2
      4. If both (seems to be if a component has a relatively high variance
          the acceptance threshold for Kappa values is doubled):
         A. The variance explained is greater than half the KRcut highest
             variance component
        B. Kappa is less than twice Kcut
    """
    temp = all_comps[utils.andb([seldict['varex'] > 0.5 *
                                 sorted(seldict['varex'])[::-1][int(KRcut)],
                                 seldict['Kappas'] < 2*Kcut]) == 2]
    KRguess = np.setdiff1d(np.setdiff1d(all_comps[KRelbow == 2], rej),
                           np.union1d(all_comps[tt_table[:, 0] < tt_lim],
                           np.union1d(np.union1d(all_comps[spz > 1],
                                                 all_comps[Vz > 2]),
                                      temp)))
    guessmask = np.zeros(len(all_comps))
    guessmask[KRguess] = 1
    """
    Throw lower-risk bad components out based on 3 criteria all being true:
      1. tt_table (a spatial noisiness metric) <0
      2. A components variance explains is greater than the median variance
         explained
      3. The component index is greater than the KRcut index. Since the
          components are sorted by kappa, this is another kappa thresholding)
    """
    rejB = acc_comps[utils.andb([tt_table[acc_comps, 0] < 0,
                                 seldict['varex'][acc_comps] > np.median(seldict['varex']),
                                 acc_comps > KRcut]) == 3]
    rej = np.union1d(rej, rejB)
    # adjust acc_comps again to only contain the remaining non-rejected components
    acc_comps = np.setdiff1d(acc_comps, rej)

    """
    This is where handwerkerd has paused in hypercommenting the function.
    """
    LGR.debug('Using DBSCAN to find optimal set of "good" BOLD components')
    for ii in range(20000):
        eps = .005 + ii * .005
        db = DBSCAN(eps=eps, min_samples=3).fit(fz.T)

        # it would be great to have descriptive names, here
        # DBSCAN found at least three non-noisy clusters
        cond1 = db.labels_.max() > 1
        # DBSCAN didn't detect more classes than the total # of components / 6
        cond2 = db.labels_.max() < len(all_comps) / 6
        # TODO: confirm if 0 is a special label for DBSCAN
        # my intuition here is that we're confirming DBSCAN labelled previously
        # rejected components as noise (i.e., no overlap between `rej` and
        # labelled DBSCAN components)
        cond3 = np.intersect1d(rej, all_comps[db.labels_ == 0]).shape[0] == 0
        # DBSCAN labelled less than half of the total components as noisy
        cond4 = np.array(db.labels_ == -1, dtype=int).sum() / float(len(all_comps)) < .5

        if cond1 and cond2 and cond3 and cond4:
            epsmap.append([ii, utils.dice(guessmask, db.labels_ == 0),
                           np.intersect1d(all_comps[db.labels_ == 0],
                           all_comps[seldict['Rhos'] > getelbow_mod(Rhos_sorted,
                                                                    return_val=True)]).shape[0]])
        db = None

    epsmap = np.array(epsmap)
    LGR.debug('Found DBSCAN solutions for {}/20000 eps resolutions'.format(len(epsmap)))
    group0 = []
    dbscanfailed = False
    if len(epsmap) != 0:
        # Select index that maximizes Dice with guessmask but first
        # minimizes number of higher Rho components
        ii = int(epsmap[np.argmax(epsmap[epsmap[:, 2] == np.min(epsmap[:, 2]), 1], 0), 0])
        LGR.debug('Component selection tuning: {:.05f}'.format(epsmap[:, 1].max()))
        db = DBSCAN(eps=.005+ii*.005, min_samples=3).fit(fz.T)
        acc_comps = all_comps[db.labels_ == 0]
        acc_comps = np.setdiff1d(acc_comps, rej)
        acc_comps = np.setdiff1d(acc_comps, acc_comps[acc_comps > len(all_comps) - len(rej)])
        group0 = acc_comps.copy()
        group_n1 = all_comps[db.labels_ == -1]
        to_clf = np.setdiff1d(all_comps, np.union1d(acc_comps, rej))

    if len(group0) == 0 or len(group0) < len(KRguess) * .5:
        dbscanfailed = True
        LGR.debug('DBSCAN guess failed; using elbow guess method instead')
        temp = all_comps[utils.andb([seldict['varex'] > 0.5 *
                                     sorted(seldict['varex'])[::-1][int(KRcut)],
                                     seldict['Kappas'] < 2 * Kcut]) == 2]
        acc_comps = np.setdiff1d(np.setdiff1d(all_comps[KRelbow == 2], rej),
                                 np.union1d(all_comps[tt_table[:, 0] < tt_lim],
                                 np.union1d(np.union1d(all_comps[spz > 1],
                                                       all_comps[Vz > 2]),
                                            temp)))
        group0 = acc_comps.copy()
        group_n1 = []
        to_clf = np.setdiff1d(all_comps, np.union1d(group0, rej))

    if len(group0) < 2 or (len(group0) < 4 and float(len(rej))/len(group0) > 3):
        LGR.warning('Extremely limited reliable BOLD signal space! '
                    'Not filtering components beyond BOLD/non-BOLD guesses.')
        midkfailed = True
        min_acc = np.array([])
        if len(group0) != 0:
            # For extremes, building in a 20% tolerance
            toacc_hi = np.setdiff1d(all_comps[utils.andb([fdist <= np.max(fdist[group0]),
                                                          seldict['Rhos'] < F025,
                                                          Vz > -2]) == 3],
                                    np.union1d(group0, rej))
            min_acc = np.union1d(group0, toacc_hi)
            to_clf = np.setdiff1d(all_comps, np.union1d(min_acc, rej))
        else:
            toacc_hi = []
            min_acc = []
        diagstep_keys = ['Rejected components', 'Kappa-Rho cut point',
                         'Kappa cut point', 'Rho cut point',
                         'DBSCAN failed to converge',
                         'Mid-Kappa failed (limited BOLD signal)',
                         'Kappa-Rho guess',
                         'min_acc', 'toacc_hi']
        diagstep_vals = [list(rej), KRcut, Kcut, Rcut, dbscanfailed,
                         midkfailed, list(KRguess), list(min_acc), list(toacc_hi)]
        with open('csstepdata.json', 'w') as ofh:
            json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh,
                      indent=4, sort_keys=True, default=str)
        return list(sorted(min_acc)), list(sorted(rej)), [], list(sorted(to_clf))

    # Find additional components to reject based on Dice - doing this here
    # since Dice is a little unstable, need to reference group0
    rej_supp = []
    dice_rej = False
    if not dbscanfailed and len(rej) + len(group0) < 0.75 * len(all_comps):
        dice_rej = True
        temp = all_comps[dice_tbl[all_comps, 0] <= dice_tbl[all_comps, 1]]
        rej_supp = np.setdiff1d(np.setdiff1d(np.union1d(rej, temp),
                                             group0), group_n1)
        rej = np.union1d(rej, rej_supp)

    # Temporal features
    # larger is worse - spike
    mmix_kurt_z = (mmix_kurt-mmix_kurt[group0].mean()) / mmix_kurt[group0].std()
    # smaller is worse - drift
    mmix_std_z = -1 * ((mmix_std-mmix_std[group0].mean()) / mmix_std[group0].std())
    mmix_kurt_z_max = np.max([mmix_kurt_z, mmix_std_z], 0)

    """
    Step 2: Classifiy midk and ignore using separate SVMs for
    different variance regimes
    # To render hyperplane:
    min_x = np.min(spz2);max_x=np.max(spz2)
    # plotting separating hyperplane
        ww = clf_.coef_[0]
        aa = -ww[0] / ww[1]
        # make sure the next line is long enough
        xx = np.linspace(min_x - 2, max_x + 2)
        yy = aa * xx - (clf_.intercept_[0]) / ww[1]
        plt.plot(xx, yy, '-')
    """
    LGR.debug('Attempting to classify midk components')
    # Tried getting rid of accepting based on SVM altogether,
    # now using only rejecting
    toacc_hi = np.setdiff1d(all_comps[utils.andb([fdist <= np.max(fdist[group0]),
                                                  seldict['Rhos'] < F025, Vz > -2]) == 3],
                            np.union1d(group0, rej))
    temp = utils.andb([spz < 1, Rz < 0,
                       mmix_kurt_z_max < 5,
                       Dz > -1, Tz > -1, Vz < 0,
                       seldict['Kappas'] >= F025,
                       fdist < 3 * np.percentile(fdist[group0], 98)]) == 8
    toacc_lo = np.intersect1d(to_clf, all_comps[temp])
    midk_clf, clf_ = do_svm(fproj_arr_val[:, np.union1d(group0, rej)].T,
                            [0] * len(group0) + [1] * len(rej),
                            fproj_arr_val[:, to_clf].T,
                            svmtype=2)
    midk = np.setdiff1d(to_clf[utils.andb([midk_clf == 1, seldict['varex'][to_clf] >
                                           np.median(seldict['varex'][group0])]) == 2],
                        np.union1d(toacc_hi, toacc_lo))

    # only use SVM to augment toacc_hi only if toacc_hi isn't already
    # conflicting with SVM choice
    if len(np.intersect1d(to_clf[utils.andb([midk_clf == 1,
                                             Vz[to_clf] > 0]) == 2],
                          toacc_hi)) == 0:
        svm_acc_fail = True
        toacc_hi = np.union1d(toacc_hi, to_clf[midk_clf == 0])
    else:
        svm_acc_fail = False

    """
    Step 3: Compute variance associated with low T2* areas
    (e.g. draining veins and low T2* areas)
    # To write out veinmask
    veinout = np.zeros(t2s.shape)
    veinout[t2s!=0] = veinmaskf
    utils.filewrite(veinout, 'veinmaskf', ref_img)
    veinBout = utils.unmask(veinmaskB, mask)
    utils.filewrite(veinBout, 'veins50', ref_img)
    """
    LGR.debug('Computing variance associated with low T2* areas (e.g., '
              'draining veins)')
    tsoc_B_Zcl = np.zeros(seldict['tsoc_B'].shape)
    tsoc_B_Zcl[seldict['Z_clmaps'] != 0] = np.abs(seldict['tsoc_B'])[seldict['Z_clmaps'] != 0]
    sig_B = [stats.scoreatpercentile(tsoc_B_Zcl[tsoc_B_Zcl[:, ii] != 0, ii], 25)
             if len(tsoc_B_Zcl[tsoc_B_Zcl[:, ii] != 0, ii]) != 0
             else 0 for ii in all_comps]
    sig_B = np.abs(seldict['tsoc_B']) > np.tile(sig_B, [seldict['tsoc_B'].shape[0], 1])

    veinmask = utils.andb([t2s < stats.scoreatpercentile(t2s[t2s != 0], 15,
                                                         interpolation_method='lower'),
                           t2s != 0]) == 2
    veinmaskf = veinmask[mask]
    veinR = np.array(sig_B[veinmaskf].sum(0),
                     dtype=float) / sig_B[~veinmaskf].sum(0)
    veinR[np.isnan(veinR)] = 0

    veinc = np.union1d(rej, midk)
    rej_veinRZ = ((veinR-veinR[veinc].mean())/veinR[veinc].std())[veinc]
    rej_veinRZ[rej_veinRZ < 0] = 0
    rej_veinRZ[countsigFR2[veinc] > np.array(veinmaskf, dtype=int).sum()] = 0
    t2s_lim = [stats.scoreatpercentile(t2s[t2s != 0], 50,
                                       interpolation_method='lower'),
               stats.scoreatpercentile(t2s[t2s != 0], 80,
                                       interpolation_method='lower') / 2]
    phys_var_zs = []
    for t2sl_i in range(len(t2s_lim)):
        t2sl = t2s_lim[t2sl_i]
        veinW = sig_B[:, veinc]*np.tile(rej_veinRZ, [sig_B.shape[0], 1])
        veincand = utils.unmask(utils.andb([s0[t2s != 0] < np.median(s0[t2s != 0]),
                                t2s[t2s != 0] < t2sl]) >= 1,
                                t2s != 0)[mask]
        veinW[~veincand] = 0
        invein = veinW.sum(axis=1)[(utils.unmask(veinmaskf, mask) *
                                    utils.unmask(veinW.sum(axis=1) > 1, mask))[mask]]
        minW = 10 * (np.log10(invein).mean()) - 1 * 10**(np.log10(invein).std())
        veinmaskB = veinW.sum(axis=1) > minW
        tsoc_Bp = seldict['tsoc_B'].copy()
        tsoc_Bp[tsoc_Bp < 0] = 0
        vvex = np.array([(tsoc_Bp[veinmaskB, ii]**2.).sum() /
                         (tsoc_Bp[:, ii]**2.).sum() for ii in all_comps])
        group0_res = np.intersect1d(KRguess, group0)
        phys_var_zs.append((vvex - vvex[group0_res].mean()) / vvex[group0_res].std())
        veinBout = utils.unmask(veinmaskB, mask)
        utils.filewrite(veinBout.astype(float), 'veins_l%i' % t2sl_i, ref_img)

    # Mask to sample veins
    phys_var_z = np.array(phys_var_zs).max(0)
    Vz2 = (varex_log - varex_log[group0].mean())/varex_log[group0].std()

    """
    Step 4: Learn joint TE-dependence spatial and temporal models to move
    remaining artifacts to ignore class
    """
    LGR.debug('Learning joint TE-dependence spatial/temporal models to ignore remaining artifacts')

    to_ign = []

    minK_ign = np.max([F05, getelbow_cons(seldict['Kappas'], return_val=True)])
    newcest = len(group0) + len(toacc_hi[seldict['Kappas'][toacc_hi] > minK_ign])
    phys_art = np.setdiff1d(all_comps[utils.andb([phys_var_z > 3.5,
                                                  seldict['Kappas'] < minK_ign]) == 2], group0)
    rank_diff = stats.rankdata(phys_var_z) - stats.rankdata(seldict['Kappas'])
    phys_art = np.union1d(np.setdiff1d(all_comps[utils.andb([phys_var_z > 2,
                                                             rank_diff > newcest / 2,
                                                             Vz2 > -1]) == 3],
                                       group0), phys_art)
    # Want to replace field_art with an acf/SVM based approach
    # instead of a kurtosis/filter one
    field_art = np.setdiff1d(all_comps[utils.andb([mmix_kurt_z_max > 5,
                                                   seldict['Kappas'] < minK_ign]) == 2], group0)
    temp = (stats.rankdata(mmix_kurt_z_max) - stats.rankdata(seldict['Kappas'])) > newcest / 2
    field_art = np.union1d(np.setdiff1d(all_comps[utils.andb([mmix_kurt_z_max > 2,
                                                              temp,
                                                              Vz2 > 1,
                                                              seldict['Kappas'] < F01]) == 4],
                                        group0), field_art)
    temp = seldict['Rhos'] > np.percentile(seldict['Rhos'][group0], 75)
    field_art = np.union1d(np.setdiff1d(all_comps[utils.andb([mmix_kurt_z_max > 3,
                                                              Vz2 > 3,
                                                              temp]) == 3],
                                        group0), field_art)
    field_art = np.union1d(np.setdiff1d(all_comps[utils.andb([mmix_kurt_z_max > 5, Vz2 > 5]) == 2],
                                        group0), field_art)
    misc_art = np.setdiff1d(all_comps[utils.andb([(stats.rankdata(Vz) -
                                                   stats.rankdata(Ktz)) > newcest / 2,
                            seldict['Kappas'] < Khighelbowval]) == 2], group0)
    ign_cand = np.unique(list(field_art)+list(phys_art)+list(misc_art))
    midkrej = np.union1d(midk, rej)
    to_ign = np.setdiff1d(list(ign_cand), midkrej)
    toacc = np.union1d(toacc_hi, toacc_lo)
    acc_comps = np.setdiff1d(np.union1d(acc_comps, toacc), np.union1d(to_ign, midkrej))
    ign = np.setdiff1d(all_comps, list(acc_comps) + list(midk) + list(rej))
    orphan = np.setdiff1d(all_comps, list(acc_comps) + list(to_ign) + list(midk) + list(rej))

    # Last ditch effort to save some transient components
    if not strict_mode:
        Vz3 = (varex_log - varex_log[acc_comps].mean()) / varex_log[acc_comps].std()
        temp = utils.andb([seldict['Kappas'] > F05,
                           seldict['Rhos'] < F025,
                           seldict['Kappas'] > seldict['Rhos'],
                           Vz3 <= -1,
                           Vz3 > -3,
                           mmix_kurt_z_max < 2.5])
        acc_comps = np.union1d(acc_comps,
                               np.intersect1d(orphan, all_comps[temp == 6]))
        ign = np.setdiff1d(all_comps, list(acc_comps)+list(midk)+list(rej))
        orphan = np.setdiff1d(all_comps, list(acc_comps) + list(to_ign) + list(midk) + list(rej))

    if savecsdiag:
        diagstep_keys = ['Rejected components', 'Kappa-Rho cut point', 'Kappa cut',
                         'Rho cut', 'DBSCAN failed to converge', 'Kappa-Rho guess',
                         'Dice rejected', 'rej_supp', 'to_clf',
                         'Mid-kappa components', 'svm_acc_fail', 'toacc_hi', 'toacc_lo',
                         'Field artifacts', 'Physiological artifacts',
                         'Miscellaneous artifacts', 'acc_comps', 'Ignored components']
        diagstep_vals = [list(rej), KRcut.item(), Kcut.item(), Rcut.item(),
                         dbscanfailed, list(KRguess), dice_rej,
                         list(rej_supp), list(to_clf), list(midk),
                         svm_acc_fail, list(toacc_hi), list(toacc_lo),
                         list(field_art), list(phys_art),
                         list(misc_art), list(acc_comps), list(ign)]

        with open('csstepdata.json', 'w') as ofh:
            json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh,
                      indent=4, sort_keys=True, default=str)
        allfz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z])
        np.savetxt('csdata.txt', allfz)

    return list(sorted(acc_comps)), list(sorted(rej)), list(sorted(midk)), list(sorted(ign))