Exemplo n.º 1
0
dm_with_nuissance = dm.add_poly(2, include_lower=True)
dm_with_nuissance.heatmap()

#########################################################################
# We can see that 3 new columns were added to the design matrix. We can also inspect the change to the meta-data. Notice that the Design Matrix is aware of the existence of three polynomial terms now.

print(dm_with_nuissance.details())

#########################################################################
# Discrete Cosine Basis Functions
# *******************************
#
# Polynomial variables are not the only type of nuisance covariates that can be generated for you. Design Matrix also supports the creation of discrete-cosine basis functions ala SPM. This will create a series of filters added as new columns based on a specified duration, defaulting to 180s. Let's create DCT filters for 20s durations in our toy data.

# Short filter duration for our simple example
dm_with_cosine = dm.add_dct_basis(duration=20)
dm_with_cosine.heatmap()

#########################################################################
# Data operations
# ---------------
#
# Performing convolution
# **********************
#
# Design Matrix makes it easy to perform convolution and will auto-ignore all columns that are consider polynomials. The default convolution kernel is the Glover (1999) HRF parameterized by the glover_hrf implementation in nipy (see nltools.externals.hrf for details). However, any arbitrary kernel can be passed as a 1d numpy array, or multiple kernels can be passed as a 2d numpy array for highly flexible convolution across many types of data (e.g. SCR).

dm_with_nuissance_c = dm_with_nuissance.convolve()
print(dm_with_nuissance_c.details())
dm_with_nuissance_c.heatmap()
Exemplo n.º 2
0
def test_designmat(tmpdir):

    mat1 = Design_Matrix({
    'X':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2],
    'Y':[3, 0, 0, 6, 9, 9, 10, 10, 1, 10],
    'Z':[2, 2, 2, 2, 7, 0, 1, 3, 3, 2],
    'intercept':[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    },
    sampling_rate=2.0,polys=['intercept'])

    mat2 = Design_Matrix({
    'X':[9, 9, 2, 7, 5, 0, 1, 1, 1, 2],
    'Y':[3, 3, 3, 6, 9, 0, 1, 10, 1, 10],
    'Z':[2, 6, 3, 2, 7, 0, 1, 7, 8, 8],
    'intercept':[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    },
    sampling_rate=2.0, polys=['intercept'])

    # Appending
    # Basic horz cat
    new_mat = mat1.append(mat1,axis=1)
    assert new_mat.shape == (mat1.shape[0], mat1.shape[1] + mat2.shape[1])
    both_cols = list(mat1.columns) + list(mat1.columns)
    assert all(new_mat.columns == both_cols)
    # Basic vert cat
    new_mat = mat1.append(mat1,axis=0)
    assert new_mat.shape == (mat1.shape[0]*2, mat1.shape[1]+1)
    # Advanced vert cat
    new_mat = mat1.append(mat1,axis=0,keep_separate=False)
    assert new_mat.shape == (mat1.shape[0]*2,mat1.shape[1])
    # More advanced vert cat
    new_mat = mat1.append(mat1,axis=0,add_poly=2)
    assert new_mat.shape == (mat1.shape[0]*2, 9)

    #convolution doesn't affect intercept
    assert all(mat1.convolve().iloc[:, -1] == mat1.iloc[:, -1])
    #but it still works
    assert (mat1.convolve().iloc[:, :3].values != mat1.iloc[:, :3].values).any()

    #Test vifs
    expectedVifs = np.array([ 1.03984251, 1.02889877, 1.02261945])
    assert np.allclose(expectedVifs,mat1.vif())

    #poly
    mat1.add_poly(order=4).shape[1] == mat1.shape[1]+4
    mat1.add_poly(order=4, include_lower=False).shape[1] == mat1.shape[1]+1

    #zscore
    z = mat1.zscore(columns=['X', 'Z'])
    assert (z['Y'] == mat1['Y']).all()
    assert z.shape == mat1.shape

    # clean
    mat = Design_Matrix({
    'X':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2],
    'A':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2],
    'Y':[3, 0, 0, 6, 9, 9, 10, 10, 1, 10],
    'Z':[2, 2, 2, 2, 7, 0, 1, 3, 3, 2],
    'C':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2],
    'intercept':[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    },
    sampling_rate=2.0,polys=['intercept'])
    mat = mat[['X','A','Y','Z','C','intercept']]
    assert all(mat.clean().columns == ['X','Y','Z','intercept'])

    # replace data
    mat = Design_Matrix({
    'X':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2],
    'A':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2],
    'Y':[3, 0, 0, 6, 9, 9, 10, 10, 1, 10],
    'Z':[2, 2, 2, 2, 7, 0, 1, 3, 3, 2],
    'C':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2]
    },
    sampling_rate=2.0)

    mat = mat.replace_data(np.ones((mat.shape[0],mat.shape[1]-1)),column_names=['a','b','c','d'])

    assert(np.allclose(mat.values,1))
    assert(all(mat.columns == ['a','b','c','d']))

    #DCT basis_mat
    mat = Design_Matrix(np.random.randint(2,size=(500,3)),sampling_rate=2.0)
    mat = mat.add_dct_basis()
    assert len(mat.polys) == 11
    assert mat.shape[1] == 14

    #Up and down sampling
    mat = Design_Matrix(np.random.randint(2,size=(500,4)),sampling_rate=2.0,columns=['a','b','c','d'])
    target = 1
    assert mat.upsample(target).shape[0] == mat.shape[0]*2 - target*2
    target = 4
    assert mat.downsample(target).shape[0] == mat.shape[0]/2
    def get_terms(confound_file, noise_transforms, noise_regressors, TR, options):
        '''
        Gathers confounds (and transformations) into a pandas dataframe.
        Input [Mandatory]:
            confound_file [string]: path to confound.tsv file, given by fmriprep.
            noise_transforms [list of strings]:
                noise transforms to be applied to select noise_regressors above. Possible values are 'quad', 'tderiv', and 'quadtderiv', standing for quadratic function of value, temporal derivative of value, and quadratic function of temporal derivative.
                e.g. model_wf.inputs.inputspec.noise_transforms = ['quad', 'tderiv', 'quadtderiv']
            noise_regressors [list of strings]:
                column names in confounds.tsv, specifying desired noise regressors for model.
                IF noise_transforms are to be applied to a regressor, add '*' to the name.
                e.g. model_wf.inputs.inputspec.noise_regressors = ['CSF', 'WhiteMatter', 'GlobalSignal', 'X*', 'Y*', 'Z*', 'RotX*', 'RotY*', 'RotZ*']
            TR [float]:
                Scanner TR value in seconds.
            options: dictionary with the following entries
                remove_steadystateoutlier [boolean]:
                    Should always be True. Remove steady state outliers from bold timecourse, specified in fmriprep confounds file.
                ICA_AROMA [boolean]:
                    Use AROMA error components, from fmriprep confounds file.
                poly_trend [integer. Use None to skip]:
                    If given, polynomial trends will be added to run confounds, up to the order of the integer
                    e.g. "0", gives an intercept, "1" gives intercept + linear trend,
                    "2" gives intercept + linear trend + quadratic.
                dct_basis [integer. Use None to skip]:
                    If given, adds a discrete cosine transform, with a length (in seconds) of the interger specified.
                        Adds unit scaled cosine basis functions to Design_Matrix columns,
                        based on spm-style discrete cosine transform for use in
                        high-pass filtering. Does not add intercept/constant.
        '''
        import numpy as np
        import pandas as pd
        from nltools.data import Design_Matrix

        df_cf = pd.DataFrame(pd.read_csv(confound_file, sep='\t', parse_dates=False))
        transfrm_list = []
        for idx, entry in enumerate(noise_regressors): # get entries marked with *, indicating they should be transformed.
            if '*' in entry:
                transfrm_list.append(entry.replace('*', '')) # add entry to transformation list if it has *.
                noise_regressors[idx] = entry.replace('*', '')

        confounds = df_cf[noise_regressors]
        transfrmd_cnfds = df_cf[transfrm_list] # for transforms
        TR_time = pd.Series(np.arange(0.0, TR*transfrmd_cnfds.shape[0], TR)) # time series for derivatives.
        if 'quad' in noise_transforms:
            quad = np.square(transfrmd_cnfds)
            confounds = confounds.join(quad, rsuffix='_quad')
        if 'tderiv' in noise_transforms:
            tderiv = pd.DataFrame(pd.Series(np.gradient(transfrmd_cnfds[col]), TR_time)
                                  for col in transfrmd_cnfds).T
            tderiv.columns = transfrmd_cnfds.columns
            tderiv.index = confounds.index
            confounds = confounds.join(tderiv, rsuffix='_tderiv')
        if 'quadtderiv' in noise_transforms:
            quadtderiv = np.square(tderiv)
            confounds = confounds.join(quadtderiv, rsuffix='_quadtderiv')
        if options['remove_steadystateoutlier']:
            if not df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^non_steady_state_outlier')]].empty:
                confounds = confounds.join(df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^non_steady_state_outlier')]])
            elif not df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^NonSteadyStateOutlier')]].empty:
                confounds = confounds.join(df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^NonSteadyStateOutlier')]]) # old syntax
        if options['ICA_AROMA']:
            if not df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^aroma_motion')]].empty:
                confounds = confounds.join(df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^aroma_motion')]])
            elif not df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^AROMAAggrComp')]].empty:
                confounds = confounds.join(df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^AROMAAggrComp')]]) # old syntax
        confounds = Design_Matrix(confounds, sampling_freq=1/TR)
        if isinstance(options['poly_trend'], int):
            confounds = confounds.add_poly(order = options['poly_trend']) # these do not play nice with high pass filters.
        if isinstance(options['dct_basis'], int):
            confounds = confounds.add_dct_basis(duration=options['dct_basis']) # these do not play nice with high pass filters.
        return confounds