示例#1
0
def compare_outliers(data, conv, plot=False):
    """ Return standard deviation across voxels for 4D array `data`

    Parameters
    ----------
    data : 4D array
        4D array from FMRI run with last axis indexing volumes.  
    conv : 2D array of the convolved time course

    Returns
    -------
    meanMRSS : mean MRSS of simple regression on the convolved time course. 
    outmeanMRSS : mean MRSS of simple regression on the convolved time course after dropping the extended rms outliers. 
    """
    rms_values = vol_rms_diff(data)
    rms_outliers, rms_thresholds = iqr_outliers(rms_values)
    extended_indices = extend_diff_outliers(rms_outliers)

    X = np.ones((len(conv), 2))
    X[:, 1] = conv

    B, junk = glm_multiple(data, X)
    MRSS, fitted, residuals = glm_diagnostics(B, X, data)
    meanMRSS = np.mean(MRSS)

    mask = np.ones(X.shape[0])
    mask[extended_indices] = 0
    outX = X[mask.nonzero()[0], :]

    outB, junk = glm_multiple(data[..., mask.nonzero()[0]], outX)
    outMRSS, outfitted, outresiduals = glm_diagnostics(
        outB, outX, data[..., mask.nonzero()[0]])
    outmeanMRSS = np.mean(outMRSS)

    if plot == True:
        rms_values = np.resize(rms_values, len(rms_values) + 1)
        rms_values[-1] = 0
        plt.plot(rms_values, "k")
        plt.plot(extended_indices, rms_values[extended_indices], "ro")
        plt.axhline(rms_thresholds[0], ls="--")
        plt.axhline(rms_thresholds[1], ls="--")

        hand_out = mlines.Line2D([], [],
                                 color="r",
                                 marker="o",
                                 ls="None",
                                 label="Outliers")
        hand_thresh = mlines.Line2D([], [],
                                    color="b",
                                    ls="--",
                                    label="Thresholds")
        plt.legend(handles=[hand_out, hand_thresh], numpoints=1)

    return meanMRSS, outmeanMRSS
示例#2
0
def test_glm_multiple(): 
    # example from http://www.jarrodmillman.com/rcsds/lectures/glm_intro.html
    # it should be pointed out that hypothesis just looks at simple linear regression

    psychopathy = [11.416,   4.514,  12.204,  14.835,
    8.416,   6.563,  17.343, 13.02,
    15.19 ,  11.902,  22.721,  22.324]
    clammy = [0.389,  0.2  ,  0.241,  0.463,
    4.585,  1.097,  1.642,  4.972,
    7.957,  5.585,  5.527,  6.964]  
    berkeley_indicator = [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]
    stanford_indicator = [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0]
    mit_indicator      = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1]
    schools=[berkeley_indicator,stanford_indicator,mit_indicator]

    Y = np.array([psychopathy,clammy])

    X = np.ones((len(berkeley_indicator),3)) # we aren't including the [1] as a column here

    for i,school in enumerate(schools):
        X[:,i]=school

    b,X =glm_multiple(Y,X)

    # from lecture notes
    assert round(b[0,0],5) == 10.74225
    assert round(b[0,1],5) == 11.3355
    assert round(b[0,2],5) == 18.03425
示例#3
0
def test_glm_multiple():
    # example from http://www.jarrodmillman.com/rcsds/lectures/glm_intro.html
    # it should be pointed out that hypothesis just looks at simple linear regression

    psychopathy = [
        11.416, 4.514, 12.204, 14.835, 8.416, 6.563, 17.343, 13.02, 15.19,
        11.902, 22.721, 22.324
    ]
    clammy = [
        0.389, 0.2, 0.241, 0.463, 4.585, 1.097, 1.642, 4.972, 7.957, 5.585,
        5.527, 6.964
    ]
    berkeley_indicator = [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]
    stanford_indicator = [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0]
    mit_indicator = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1]
    schools = [berkeley_indicator, stanford_indicator, mit_indicator]

    Y = np.array([psychopathy, clammy])

    X = np.ones((len(berkeley_indicator),
                 3))  # we aren't including the [1] as a column here

    for i, school in enumerate(schools):
        X[:, i] = school

    b, X = glm_multiple(Y, X)

    # from lecture notes
    assert round(b[0, 0], 5) == 10.74225
    assert round(b[0, 1], 5) == 11.3355
    assert round(b[0, 2], 5) == 18.03425
示例#4
0
def fourier_predict_underlying_noise(y_mean,p):
	""" predicts the underlying noise using fourier series and glm

	Parameters:
	-----------
	y_mean: 1 dimensional np.array
	p: number of fourier series (pairs)

	Returns:
	--------
	X: glm_matrix (first column is all 1s)
	fitted: the fitted values from glm
	residuals: the residuals betwen fitted and y_mean
	MRSS: MRSS from glm function (general output from glm_diagnostics)

	Note:
	-----
	Does a backwards approach to fouriers (possibly sacrificing orthogonality), wants to
	look at maximum period first
	"""
	n= y_mean.shape[0]
	X=fourier_creation(n,p)
	beta, junk=glm_multiple(y_mean,X)
	MRSS, fitted, residuals = glm_diagnostics(beta, X, y_mean)

	return X,MRSS,fitted,residuals
示例#5
0
def compare_outliers(data, conv, plot=False):
    """ Return standard deviation across voxels for 4D array `data`

    Parameters
    ----------
    data : 4D array
        4D array from FMRI run with last axis indexing volumes.  
    conv : 2D array of the convolved time course

    Returns
    -------
    meanMRSS : mean MRSS of simple regression on the convolved time course. 
    outmeanMRSS : mean MRSS of simple regression on the convolved time course after dropping the extended rms outliers. 
    """
    rms_values = vol_rms_diff(data)
    rms_outliers, rms_thresholds = iqr_outliers(rms_values)
    extended_indices = extend_diff_outliers(rms_outliers)

    X = np.ones((len(conv), 2))
    X[:, 1] = conv

    B, junk = glm_multiple(data, X)
    MRSS, fitted, residuals = glm_diagnostics(B, X, data)
    meanMRSS = np.mean(MRSS)

    mask = np.ones(X.shape[0])
    mask[extended_indices] = 0
    outX = X[mask.nonzero()[0],:]

    outB, junk = glm_multiple(data[...,mask.nonzero()[0]], outX)
    outMRSS, outfitted, outresiduals = glm_diagnostics(outB, outX, data[...,mask.nonzero()[0]])
    outmeanMRSS = np.mean(outMRSS)
    
    if plot==True:
        rms_values = np.resize(rms_values, len(rms_values)+1)
        rms_values[-1] = 0
        plt.plot(rms_values, "k")
        plt.plot(extended_indices, rms_values[extended_indices], "ro")
        plt.axhline(rms_thresholds[0], ls="--")
        plt.axhline(rms_thresholds[1], ls="--")

        hand_out = mlines.Line2D([], [], color="r", marker="o", ls="None", label="Outliers")
        hand_thresh = mlines.Line2D([], [], color="b", ls="--", label="Thresholds")
        plt.legend(handles=[hand_out, hand_thresh], numpoints=1)
    
    return meanMRSS, outmeanMRSS
#################
# First Attempt #
#################

# First approach allowed for fourier strength to be fit to each voxel,
# 	potentially overcorrecting and masking some response to neural stimulation

# X matrix
X = np.ones((n_vols, 9))  # changed since fourier needs more
X[:, 1] = convolution_specialized(cond_all[:, 0], np.ones(len(cond_all)), hrf_single, all_tr_times)
X[:, 2] = np.linspace(-1, 1, num=X.shape[0])  # drift
X[:, 3:] = fourier_creation(X.shape[0], 3)[:, 1:]

# modeling voxel hemodynamic response
beta, junk = glm_multiple(data, X)
MRSS, fitted, residuals = glm_diagnostics(beta, X, data)

# individual voxel analysis

plt.plot(all_tr_times, data[41, 47, 2], label="actual", color="b")
plt.plot(all_tr_times, fitted[41, 47, 2], label="predicted", color="r")
plt.title("Data for sub001, voxel [41, 47, 2],fourier 3 fit to voxel")
plt.xlabel("Time")
plt.ylabel("Hemodynamic response")
plt.legend(loc="upper right", shadow=True, fontsize="smaller")
plt.savefig(location_of_images + "noise_correction__fit_to_voxel_fitted.png")
plt.close()

plt.plot(all_tr_times, residuals[41, 47, 2], label="residuals", color="b")
plt.plot([0, max(all_tr_times)], [0, 0], label="origin (residual=0)", color="k")
示例#7
0
# Some diagnostics. 
MRSS_np, fitted_np, residuals_np = glm_diagnostics(B_np, X_np, data)

# Print out the mean MRSS.
print("MRSS using np convolution function: "+str(np.mean(MRSS_np)))

# Plot the time course for a single voxel with the fitted values. 
# Looks pretty bad. 
plt.plot(data[41, 47, 2])
plt.plot(fitted_np[41, 47, 2])
plt.savefig(location_of_images+"glm_plot_np.png")
plt.close()




X_my3=np.ones((data.shape[-1],4))
for i in range(2):
     X_my3[:,i+1]=my_hrf**(i+1)
B_my3, X_my3 = glm_multiple(data, X_my3)
MRSS_my3, fitted_my3, residuals_my3 = glm_diagnostics(B_my3, X_my3, data)
print("MRSS using 'my' convolution function, 3rd degree polynomial: "+str(np.mean(MRSS_my3))+ ", but the chart looks better")

plt.plot(data[41, 47, 2])
plt.plot(fitted_my3[41, 47, 2])
plt.savefig(location_of_images+"glm_plot_my3.png")
plt.close()


示例#8
0
conds = [cond1[:,0],cond2[:,0],cond3[:,0]]
for i,cond in enumerate(conds):
	X_my[:,i+1]=convolution_specialized(cond,np.ones(len(cond)),hrf_single,all_tr_times)


##########
#    GLM #  
##########


###################
#     np.convolve #
###################

B_np,junk=glm_multiple(data,X_np)

###############################
#     convolution_specialized #
###############################


B_my,junk=glm_multiple(data,X_my)



#############
# 4. Review #
#############
""""
# Looks like splitting up the conditions does a few things
示例#9
0
# Now get the estimated coefficients and design matrix for doing
# regression on the convolved time course.
B_np, X_np = glm(data, np_hrf)

# Some diagnostics.
MRSS_np, fitted_np, residuals_np = glm_diagnostics(B_np, X_np, data)

# Print out the mean MRSS.
print("MRSS using np convolution function: " + str(np.mean(MRSS_np)))

# Plot the time course for a single voxel with the fitted values.
# Looks pretty bad.
plt.plot(data[41, 47, 2])
plt.plot(fitted_np[41, 47, 2])
plt.savefig(location_of_images + "glm_plot_np.png")
plt.close()

X_my3 = np.ones((data.shape[-1], 4))
for i in range(2):
    X_my3[:, i + 1] = my_hrf**(i + 1)
B_my3, X_my3 = glm_multiple(data, X_my3)
MRSS_my3, fitted_my3, residuals_my3 = glm_diagnostics(B_my3, X_my3, data)
print("MRSS using 'my' convolution function, 3rd degree polynomial: " +
      str(np.mean(MRSS_my3)) + ", but the chart looks better")

plt.plot(data[41, 47, 2])
plt.plot(fitted_my3[41, 47, 2])
plt.savefig(location_of_images + "glm_plot_my3.png")
plt.close()
示例#10
0
def t_stat_mult_regression_single(data_4d, X, c = () ):
    """
    Return four values, the estimated beta, t-value, 
    degrees of freedom, and p-value for the given t-value
    
    Parameters
    ----------
    data_4d: numpy array of 4 dimensions 
        The image data of one subject
    X: numpy array 
        the matrix to be put into the glm_mutiple function
    c: numpy array of 1 dimension
        The contrast vector fo the weights of the beta vector. 
        If not entered, it will be set as np.array([0,1,...]) which corresponds 
        to beta_1

    Note that the fourth dimension of `data_4d` (time or the number 
    of volumes) must be the same as the number of rows that X has. 
    
    Returns
    -------
    beta: estimated beta values
    
    t: numpy array of 1 dimension (spe)
        t-value of the betas
    
    df: int
        degrees of freedom
    
    p: numpy array of 1 dimension
        p-value corresponding to the t-value and degrees of freedom
    """

    # Make sure y, X, c are all arrays
    beta, X = glm_multiple(data_4d, X)

    # dealing with no c put in
    if c is ():
        c = np.zeros(X.shape[-1])
        c[1]=1


    c = np.atleast_2d(c).T  # As column vector


    # Calculate the parameters - b hat
    beta = np.reshape(beta, (-1, beta.shape[-1])).T

    fitted = X.dot(beta)
    # Residual error
    y = np.reshape(data_4d, (-1, data_4d.shape[-1]))
    errors = y.T - fitted
    # Residual sum of squares
    RSS = (errors**2).sum(axis=0)
 
    df = X.shape[0] - npl.matrix_rank(X)
    # Mean residual sum of squares
    MRSS = RSS / df
    # calculate bottom half of t statistic
    
    SE = np.sqrt(MRSS * c.T.dot(npl.pinv(X.T.dot(X)).dot(c)))
    zeros = np.where(SE==0)
    SE[zeros] = 1
    t = c.T.dot(beta) / SE

    t[:,zeros] =0
    # Get p value for t value using cumulative density dunction
    # (CDF) of t distribution
    ltp = t_dist.cdf(abs(t), df) # lower tail p
    p = 1 - ltp # upper tail p
    
    return beta.T, t, df, p
示例#11
0
#################
# First Attempt #
#################

# First approach allowed for fourier strength to be fit to each voxel,
#	potentially overcorrecting and masking some response to neural stimulation

# X matrix
X = np.ones((n_vols, 9))  #changed since fourier needs more
X[:, 1] = convolution_specialized(cond_all[:, 0], np.ones(len(cond_all)),
                                  hrf_single, all_tr_times)
X[:, 2] = np.linspace(-1, 1, num=X.shape[0])  #drift
X[:, 3:] = fourier_creation(X.shape[0], 3)[:, 1:]

# modeling voxel hemodynamic response
beta, junk = glm_multiple(data, X)
MRSS, fitted, residuals = glm_diagnostics(beta, X, data)

# individual voxel analysis

plt.plot(all_tr_times, data[41, 47, 2], label="actual", color="b")
plt.plot(all_tr_times, fitted[41, 47, 2], label="predicted", color="r")
plt.title("Data for sub001, voxel [41, 47, 2],fourier 3 fit to voxel")
plt.xlabel("Time")
plt.ylabel("Hemodynamic response")
plt.legend(loc='upper right', shadow=True, fontsize="smaller")
plt.savefig(location_of_images + 'noise_correction__fit_to_voxel_fitted.png')
plt.close()

plt.plot(all_tr_times, residuals[41, 47, 2], label="residuals", color="b")
plt.plot([0, max(all_tr_times)], [0, 0],
示例#12
0
def t_stat_mult_regression_single(data_4d, X, c=()):
    """
    Return four values, the estimated beta, t-value, 
    degrees of freedom, and p-value for the given t-value
    
    Parameters
    ----------
    data_4d: numpy array of 4 dimensions 
        The image data of one subject
    X: numpy array 
        the matrix to be put into the glm_mutiple function
    c: numpy array of 1 dimension
        The contrast vector fo the weights of the beta vector. 
        If not entered, it will be set as np.array([0,1,...]) which corresponds 
        to beta_1

    Note that the fourth dimension of `data_4d` (time or the number 
    of volumes) must be the same as the number of rows that X has. 
    
    Returns
    -------
    beta: estimated beta values
    
    t: numpy array of 1 dimension (spe)
        t-value of the betas
    
    df: int
        degrees of freedom
    
    p: numpy array of 1 dimension
        p-value corresponding to the t-value and degrees of freedom
    """

    # Make sure y, X, c are all arrays
    beta, X = glm_multiple(data_4d, X)

    # dealing with no c put in
    if c is ():
        c = np.zeros(X.shape[-1])
        c[1] = 1

    c = np.atleast_2d(c).T  # As column vector

    # Calculate the parameters - b hat
    beta = np.reshape(beta, (-1, beta.shape[-1])).T

    fitted = X.dot(beta)
    # Residual error
    y = np.reshape(data_4d, (-1, data_4d.shape[-1]))
    errors = y.T - fitted
    # Residual sum of squares
    RSS = (errors**2).sum(axis=0)

    df = X.shape[0] - npl.matrix_rank(X)
    # Mean residual sum of squares
    MRSS = RSS / df
    # calculate bottom half of t statistic

    SE = np.sqrt(MRSS * c.T.dot(npl.pinv(X.T.dot(X)).dot(c)))
    zeros = np.where(SE == 0)
    SE[zeros] = 1
    t = c.T.dot(beta) / SE

    t[:, zeros] = 0
    # Get p value for t value using cumulative density dunction
    # (CDF) of t distribution
    ltp = t_dist.cdf(abs(t), df)  # lower tail p
    p = 1 - ltp  # upper tail p

    return beta.T, t, df, p
示例#13
0
def t_stat_mult_regression(data_4d, X):
    """
    Return four values, the estimated beta, t-value, 
    degrees of freedom, and p-value for the given t-value
    
    Parameters
    ----------
    data_4d: numpy array of 4 dimensions 
        The image data of one subject
    X: numpy array 
        the matrix to be put into the glm_mutiple function

    Note that the fourth dimension of `data_4d` (time or the number 
    of volumes) must be the same as the number of rows that X has. 
    
    Returns
    -------
    beta: estimated beta values
    
    t: numpy array of 2 dimensions
        t-value of the betas
    
    df: int
        degrees of freedom
    
    p: numpy array of 2 dimensions
        p-value corresponding to the t-value and degrees of freedom
    """

    beta, X = glm_multiple(data_4d, X)

    # Calculate the parameters - b hat
    beta = np.reshape(beta, (-1, beta.shape[-1])).T

    fitted = X.dot(beta)
    # Residual error
    y = np.reshape(data_4d, (-1, data_4d.shape[-1]))
    errors = y.T - fitted
    # Residual sum of squares
    RSS = (errors**2).sum(axis=0)

    df = X.shape[0] - npl.matrix_rank(X)
    # Mean residual sum of squares
    MRSS = RSS / df
    # calculate bottom half of t statistic
    Cov_beta = npl.pinv(X.T.dot(X))

    SE = np.zeros(beta.shape)
    for i in range(X.shape[-1]):
        c = np.zeros(X.shape[-1])
        c[i] = 1
        c = np.atleast_2d(c).T
        SE[i, :] = np.sqrt(MRSS * c.T.dot(npl.pinv(X.T.dot(X)).dot(c)))

    zeros = np.where(SE == 0)
    SE[zeros] = 1
    t = beta / SE

    t[:, zeros] = 0
    # Get p value for t value using cumulative density dunction
    # (CDF) of t distribution
    ltp = t_dist.cdf(abs(t), df)  # lower tail p
    p = 1 - ltp  # upper tail p

    return beta.T, t, df, p
示例#14
0
#################
# First Attempt #
#################

# First approach allowed for fourier strength to be fit to each voxel,
#	potentially overcorrecting and masking some response to neural stimulation

# X matrix
X = np.ones((n_vols, 6))
X[:, 1] = convolution_specialized(cond_all[:, 0], np.ones(len(cond_all)),
                                  hrf_single, all_tr_times)
X[:, 2] = np.linspace(-1, 1, num=X.shape[0])  #drift
X[:, 3:] = fourier_creation(X.shape[0], 3)[:, 1:]

# modeling voxel hemodynamic response
beta, junk = glm_multiple(data, X)
MRSS, fitted, residuals = glm_diagnostics(beta, X, data)

# individual voxel analysis

plt.plot(all_tr_times, data[41, 47, 2], label="actual", color="b")
plt.plot(all_tr_times, fitted[41, 47, 2], label="predicted", color="r")
plt.title("Data for sub001, voxel [41, 47, 2],fourier 3 fit to voxel")
plt.xlabel("Time")
plt.ylabel("Hemodynamic response")
plt.legend(loc='upper right', shadow=True, fontsize="smaller")
plt.savefig(location_of_images + 'noise_correction__fit_to_voxel_fitted.png')
plt.close()

plt.plot(all_tr_times, residuals[41, 47, 2], label="residuals", color="b")
plt.plot([0, max(all_tr_times)], [0, 0],
示例#15
0
def t_stat_mult_regression(data_4d, X):
    """
    Return four values, the estimated beta, t-value, 
    degrees of freedom, and p-value for the given t-value
    
    Parameters
    ----------
    data_4d: numpy array of 4 dimensions 
        The image data of one subject
    X: numpy array 
        the matrix to be put into the glm_mutiple function

    Note that the fourth dimension of `data_4d` (time or the number 
    of volumes) must be the same as the number of rows that X has. 
    
    Returns
    -------
    beta: estimated beta values
    
    t: numpy array of 2 dimensions
        t-value of the betas
    
    df: int
        degrees of freedom
    
    p: numpy array of 2 dimensions
        p-value corresponding to the t-value and degrees of freedom
    """

    beta, X = glm_multiple(data_4d, X)

    # Calculate the parameters - b hat
    beta = np.reshape(beta, (-1, beta.shape[-1])).T

    fitted = X.dot(beta)
    # Residual error
    y = np.reshape(data_4d, (-1, data_4d.shape[-1]))
    errors = y.T - fitted
    # Residual sum of squares
    RSS = (errors**2).sum(axis=0)
 
    df = X.shape[0] - npl.matrix_rank(X)
    # Mean residual sum of squares
    MRSS = RSS / df
    # calculate bottom half of t statistic
    Cov_beta=npl.pinv(X.T.dot(X))

    SE =np.zeros(beta.shape)
    for i in range(X.shape[-1]):
        c = np.zeros(X.shape[-1])
        c[i]=1
        c = np.atleast_2d(c).T
        SE[i,:]= np.sqrt(MRSS* c.T.dot(npl.pinv(X.T.dot(X)).dot(c)))


    zeros = np.where(SE==0)
    SE[zeros] = 1
    t = beta / SE

    t[:,zeros] =0
    # Get p value for t value using cumulative density dunction
    # (CDF) of t distribution
    ltp = t_dist.cdf(abs(t), df) # lower tail p
    p = 1 - ltp # upper tail p
    
    return beta.T, t, df, p
conds = [cond1[:,0],cond2[:,0],cond3[:,0]]
for i,cond in enumerate(conds):
	X_my[:,i+1]=convolution_specialized(cond,np.ones(len(cond)),hrf_single,all_tr_times)


##########
#    GLM #  
##########


###################
#     np.convolve #
###################

B_np,junk=glm_multiple(data,X_np)

###############################
#     convolution_specialized #
###############################


B_my,junk=glm_multiple(data,X_my)



#############
# 4. Review #
#############
""""
# Looks like splitting up the conditions does a few things
#################
# First Attempt #
#################

# First approach allowed for fourier strength to be fit to each voxel, 
#	potentially overcorrecting and masking some response to neural stimulation

# X matrix
X = np.ones((n_vols,6))
X[:,1]=convolution_specialized(cond_all[:,0],np.ones(len(cond_all)),hrf_single,all_tr_times)
X[:,2]=np.linspace(-1,1,num=X.shape[0]) #drift
X[:,3:]=fourier_creation(X.shape[0],3)[:,1:]

# modeling voxel hemodynamic response
beta,junk=glm_multiple(data,X)
MRSS, fitted, residuals = glm_diagnostics(beta, X, data)

# individual voxel analysis

plt.plot(all_tr_times,data[41, 47, 2],label="actual",color="b")
plt.plot(all_tr_times,fitted[41, 47, 2], label="predicted",color="r")
plt.title("Data for sub001, voxel [41, 47, 2],fourier 3 fit to voxel")
plt.xlabel("Time")
plt.ylabel("Hemodynamic response")
plt.legend(loc='upper right', shadow=True,fontsize="smaller")
plt.savefig(location_of_images+'noise_correction__fit_to_voxel_fitted.png')
plt.close()

plt.plot(all_tr_times,residuals[41, 47, 2],label="residuals",color="b")
plt.plot([0,max(all_tr_times)],[0,0],label="origin (residual=0)",color="k")