def test_glm(): # Read in the image data. img = nib.load(pathtoclassdata + "ds114_sub009_t2r1.nii") data = img.get_data()[..., 4:] # Read in the convolutions. convolved = np.loadtxt(pathtoclassdata + "ds114_sub009_t2r1_conv.txt")[4:] # Create design matrix. actual_design = np.ones((len(convolved), 2)) actual_design[:, 1] = convolved # Calculate betas, copied from the exercise. data_2d = np.reshape(data, (-1, data.shape[-1])) actual_B = npl.pinv(actual_design).dot(data_2d.T) actual_B_4d = np.reshape(actual_B.T, img.shape[:-1] + (-1,)) # Run function. exp_B_4d, exp_design = glm(data, convolved) assert_almost_equal(actual_B_4d, exp_B_4d) assert_almost_equal(actual_design, exp_design) # Pick a single voxel to check diagnostics. # Calculate actual fitted values, residuals, and MRSS of voxel. actual_fitted = actual_design.dot(actual_B_4d[42, 32, 19]) actual_residuals = data[42, 32, 19] - actual_fitted actual_MRSS = np.sum(actual_residuals**2)/(actual_design.shape[0] - npl.matrix_rank(actual_design)) # Calculate using glm_diagnostics function. exp_MRSS, exp_fitted, exp_residuals = glm_diagnostics(exp_B_4d, exp_design, data) assert_almost_equal(actual_fitted, exp_fitted[42, 32, 19]) assert_almost_equal(actual_residuals, exp_residuals[42, 32, 19]) assert_almost_equal(actual_MRSS, exp_MRSS[42, 32, 19])
def test_glm(): # Read in the image data. img = nib.load(pathtoclassdata + "ds114_sub009_t2r1.nii") data = img.get_data()[..., 4:] # Read in the convolutions. convolved = np.loadtxt(pathtoclassdata + "ds114_sub009_t2r1_conv.txt")[4:] # Create design matrix. actual_design = np.ones((len(convolved), 2)) actual_design[:, 1] = convolved # Calculate betas, copied from the exercise. data_2d = np.reshape(data, (-1, data.shape[-1])) actual_B = npl.pinv(actual_design).dot(data_2d.T) actual_B_4d = np.reshape(actual_B.T, img.shape[:-1] + (-1, )) # Run function. exp_B_4d, exp_design = glm(data, convolved) assert_almost_equal(actual_B_4d, exp_B_4d) assert_almost_equal(actual_design, exp_design) # Pick a single voxel to check diagnostics. # Calculate actual fitted values, residuals, and MRSS of voxel. actual_fitted = actual_design.dot(actual_B_4d[42, 32, 19]) actual_residuals = data[42, 32, 19] - actual_fitted actual_MRSS = np.sum(actual_residuals**2) / ( actual_design.shape[0] - npl.matrix_rank(actual_design)) # Calculate using glm_diagnostics function. exp_MRSS, exp_fitted, exp_residuals = glm_diagnostics( exp_B_4d, exp_design, data) assert_almost_equal(actual_fitted, exp_fitted[42, 32, 19]) assert_almost_equal(actual_residuals, exp_residuals[42, 32, 19]) assert_almost_equal(actual_MRSS, exp_MRSS[42, 32, 19])
def mcmc_serial(intensities_obs, mapping_states_obs, mapping_peptides, cfg, known_concentrations=None, mapping_known_concentrations=None, peptide_features=None, **kwargs): ''' Serial MCMC sampler for posterior of state-level censoring model. Parameters ---------- - intensities_obs : array_like A 1d array of length n_obs_states for which each entry contains the observed (summed) log state intensity. This must be aligned to mapping_states_obs and all entires must be > -inf; no missing peptides. - mapping_states_obs : array_like, 1 dimension, nonnegative ints A 1d integer array of length n_obs_states for which each entry contains the index of the peptide that corresponds to the given observed state. Peptide indices can range over 0 <= i < n_peptides. Not every peptide index is required to appear in this mapping; only observed peptides should be included. Also note that peptides are indexed overall, not within protein. - mapping_peptides : array_like, 1 dimension, nonnegative ints A 1d integer array of length n_peptides for which each entry contains the index of the protein that corresponds to the given peptide. Protein indices can range over 0 <= i < n_proteins. Every peptide and protein to be included in the model should be included here. That is, both observed and unobserved peptides should appear in this mapping. - cfg : dictionary A dictionary (typically generated from a YAML file) containing priors and settings for the MCMC algorithm. Its exact form will be documented elsewhere. It will have at least three sections: priors, containing one entry per parameter, settings, containing settings for the MCMC algorithm, and init, containing initial values for certain parameters. Returns ------- - draws : dictionary 1- and 2-dimensional ndarrays containing the posterior samples for each parameter. - accept_stats : dictionary Dictionary containing number of acceptances for each MH step. ''' # Determine whether algorithm is running with supervision try: supervised = cfg['priors']['supervised'] except: print >> sys.stderr, 'Defaulting to unsupervised algorithm' supervised = False # If supervised, determine whether to model distribution of concentrations # If this is False, prior on $\beta_1$ is scaled by $|\beta_1|^{n_{mis}}$. if supervised: try: concentration_dist = cfg['priors']['concentration_dist'] except: print >> sys.stderr, 'Defaulting to flat prior on concentrations' concentration_dist = False # Determine whether peptide features are present and, if so, their size if peptide_features is None: n_peptide_features = 0 else: n_peptide_features = peptide_features.shape[1] # Convert inputs to np.ndarrays as needed if type(intensities_obs) is not np.ndarray: intensities_obs = np.asanyarray(intensities_obs) if type(mapping_states_obs) is not np.ndarray: mapping_states_obs = np.asanyarray(mapping_states_obs, dtype=np.int) if type(mapping_peptides) is not np.ndarray: mapping_peptides = np.asanyarray(mapping_peptides, dtype=np.int) # Extract proposal DFs try: prop_df_y_mis = cfg['settings']['prop_df_y_mis'] except: prop_df_y_mis = 5.0 try: prop_df_eta = cfg['settings']['prop_df_eta'] except: prop_df_eta = 10. # Extract dimensions from input # Number of iterations from cfg n_iterations = cfg['settings']['n_iterations'] # Number of peptides and proteins from mapping_peptides n_peptides = np.size(mapping_peptides) n_proteins = 1 + np.max(mapping_peptides) # Check for validity of mapping vectors if (not issubclass(mapping_states_obs.dtype.type, np.integer) or np.min(mapping_states_obs) < 0 or np.max(mapping_states_obs) > n_peptides - 1): raise ValueError('State to peptide mapping (mapping_states_obs)' ' is not valid') if (not issubclass(mapping_peptides.dtype.type, np.integer) or np.min(mapping_peptides) < 0 or np.max(mapping_peptides) > n_peptides - 1): raise ValueError('Peptide to protein mapping (mapping_peptides)' ' is not valid') # Compute tabulations that are invariant across iterations # Total number of observed states n_obs_states = np.size(intensities_obs) # Tabulate peptides per protein n_peptides_per_protein = np.bincount(mapping_peptides) peptides_obs = np.unique(mapping_states_obs) n_obs_peptides_per_protein = np.bincount(mapping_peptides[peptides_obs], minlength=n_proteins) # Tabulate number of observed states per peptide n_obs_states_per_peptide = np.bincount(mapping_states_obs, minlength=n_peptides) # Sum observed intensities per peptide total_intensity_obs_per_peptide = np.bincount(mapping_states_obs, weights=intensities_obs, minlength=n_peptides) # Allocate data structures for draws # Data structures for supervised algorithm if supervised: beta_draws = np.empty((n_iterations, 2)) concentration_draws = np.empty((n_iterations, n_proteins)) mean_concentration_draws = np.zeros((n_iterations)) prec_concentration_draws = np.zeros((n_iterations)) # Peptide- and protein-level means gamma_draws = np.empty((n_iterations, n_peptides)) mu_draws = np.empty((n_iterations, n_proteins)) # Number of censored states per peptide n_cen_states_per_peptide_draws = np.zeros((n_iterations, n_peptides), dtype=np.integer) # State- and peptide-level variances sigmasq_draws = np.empty((n_iterations, n_proteins)) tausq_draws = np.empty((n_iterations, n_proteins)) # Hyperparameters for state-level variance model shape_sigmasq = np.empty(n_iterations) rate_sigmasq = np.empty(n_iterations) # Hyperparameters for peptide-level variance model shape_tausq = np.empty(n_iterations) rate_tausq = np.empty(n_iterations) # Censoring probability model parameters eta_draws = np.zeros((n_iterations, 2 + n_peptide_features * 2)) p_rnd_cen = np.empty(n_iterations) # Number of states model parameters r = np.empty(n_iterations) lmbda = np.empty(n_iterations) # Compute initial values for MCMC iterations # p_rnd_cen from cfg p_rnd_cen[0] = cfg['init']['p_rnd_cen'] # eta from cfg; bivariate normal draw eta0 = cfg['init']['eta'] eta_draws[0, 0] = eta0['mean'][0] + eta0['sd'][0] * np.random.randn(1) eta_draws[0, 1] = eta0['mean'][1] if eta0['sd'][1] > 0: eta_draws[0, 1] += (eta0['cor'] * eta0['sd'][1] / eta0['sd'][0] * (eta_draws[0, 0] - eta0['mean'][0])) eta_draws[0, 1] += (np.sqrt(1. - eta0['cor'] ** 2) * eta0['sd'][1] * np.random.randn(1)) # Number of states parameters from MAP estimator based on number of observed # peptides; very crude, but not altogether terrible. Note that this ignores # the +1 location shift in the actual n_states distribution. kwargs = {'x': n_obs_states_per_peptide[n_obs_states_per_peptide > 0] - 1, 'transform': True} kwargs.update(cfg['priors']['n_states_dist']) r[0], lmbda[0] = lib.map_estimator_nbinom(**kwargs) lmbda[0] = 1. - lmbda[0] # Hyperparameters for state- and peptide-level variance distributions # directly from cfg shape_sigmasq[0], rate_sigmasq[0] = ( cfg['init']['sigmasq_dist']['shape'], cfg['init']['sigmasq_dist']['rate']) shape_tausq[0], rate_tausq[0] = (cfg['init']['tausq_dist']['shape'], cfg['init']['tausq_dist']['rate']) # State- and peptide-level variances via inverse-gamma draws sigmasq_draws[0] = 1. / np.random.gamma(shape=shape_sigmasq[0], scale=1. / rate_sigmasq[0], size=n_proteins) tausq_draws[0] = 1. / np.random.gamma(shape=shape_tausq[0], scale=1. / rate_tausq[0], size=n_proteins) # Mapping from protein to peptide conditional variances for convenience var_peptide_conditional = sigmasq_draws[0, mapping_peptides] # Protein-level means using mean observed intensity; excluding missing # peptides mu_draws[0] = (np.bincount(mapping_peptides, total_intensity_obs_per_peptide / np.maximum(1, n_obs_states_per_peptide)) / n_obs_peptides_per_protein) mu_draws[0, n_obs_peptides_per_protein < 1] = np.nanmin(mu_draws[0]) if supervised: # Simple initialization for supervised algorithm # Initialize beta from regression of mu against known concentrations X = np.ones((known_concentrations.size, 2)) X[:,1] = known_concentrations beta_draws[0] = glm.wls(X=X, y=mu_draws[0, mapping_known_concentrations], w=1.)['b'] # Adjust known concentrations in mu accordingly mu_draws[0, mapping_known_concentrations] = beta_draws[0,0] + \ beta_draws[0,1] * known_concentrations # And, initialize the concentration draws using the updates mu's concentration_draws[0] = (mu_draws[0] - beta_draws[0,0]) / \ beta_draws[0,1] if concentration_dist: # Initialize hyperparameters on concentration distribution mean_concentration_draws[0] = np.mean(concentration_draws[0]) prec_concentration_draws[0] = 1. / np.var(concentration_draws[0]) # Peptide-level means using mean observed intensity; imputing missing # peptides as protein observed means gamma_draws[0] = mu_draws[0, mapping_peptides] gamma_draws[0, peptides_obs] = ( total_intensity_obs_per_peptide[peptides_obs] / n_obs_states_per_peptide[peptides_obs]) # Instantiate GLM family for eta step try: glm_link_name = cfg["priors"]["glm_link"].title() except: print >> sys.stderr, "GLM link not specified; defaulting to logit" glm_link_name = "Logit" glm_link = getattr(glm.links, glm_link_name) glm_family = glm.families.Binomial(link=glm_link) # Setup function for prior log density on eta, if requested try: prior_scale = cfg["priors"]["eta"]["prior_scale"] prior_center = cfg["priors"]["eta"]["prior_center"] except: prior_scale = None prior_center = None if prior_scale is not None: # Gelman's weakly-informative prior (2008) def dprior_eta(eta, prior_scale=5., prior_center=0.): return -np.log(1. + ((eta[1] - prior_center) / prior_scale)**2) prior_eta_kwargs = {'prior_scale': prior_scale, 'prior_center': prior_center} else: dprior_eta = None prior_eta_kwargs = {} # Initialize dictionary for acceptance statistics accept_stats = {'sigmasq_dist': 0, 'tausq_dist': 0, 'n_states_dist': 0, 'eta': 0} # Master loop for MCMC iterations for t in xrange(1, n_iterations): # (1) Draw missing data (n_cen and censored state intensities) given all # other parameters. Exact draw via rejection samplers. # (1a) Obtain p_int_cen per peptide and approximatations of censored # intensity posteriors. eta_0_effective = eta_draws[t - 1, 0] eta_1_effective = eta_draws[t - 1, 1] if n_peptide_features > 0: eta_0_effective += np.dot( peptide_features, eta_draws[t - 1, 2:(2 + n_peptide_features)] ) eta_1_effective += np.dot( peptide_features, eta_draws[t - 1, (2 + n_peptide_features):] ) kwargs = {'eta_0': eta_0_effective, 'eta_1': eta_1_effective, 'mu': gamma_draws[t - 1], 'sigmasq': var_peptide_conditional, 'glm_link_name': glm_link_name} cen_dist = lib.characterize_censored_intensity_dist(**kwargs) # (1b) Draw number of censored states per peptide n_cen_states_per_peptide = lib.rncen(n_obs=n_obs_states_per_peptide, p_rnd_cen=p_rnd_cen[t - 1], p_int_cen=cen_dist['p_int_cen'], lmbda=lmbda[t - 1], r=r[t - 1]) n_cen_states_per_peptide_draws[t] = n_cen_states_per_peptide # Update state-level counts n_states_per_peptide = (n_obs_states_per_peptide + n_cen_states_per_peptide) n_states_per_protein = np.bincount(mapping_peptides, weights=n_states_per_peptide) n_states = np.sum(n_states_per_peptide) # (1c) Draw censored intensities kwargs['n_cen'] = n_cen_states_per_peptide kwargs['p_rnd_cen'] = p_rnd_cen[t - 1] kwargs['propDf'] = prop_df_y_mis kwargs.update(cen_dist) intensities_cen, mapping_states_cen, W = lib.rintensities_cen(**kwargs) # (2) Update random censoring probability. Gibbs step. p_rnd_cen[t] = updates.rgibbs_p_rnd_cen(n_rnd_cen=np.sum(W), n_states=n_states, **cfg['priors']['p_rnd_cen']) # Sum observed intensities per peptide total_intensity_cen_per_peptide = np.bincount(mapping_states_cen, weights=intensities_cen, minlength=n_peptides) # Compute mean intensities per peptide mean_intensity_per_peptide = ((total_intensity_obs_per_peptide + total_intensity_cen_per_peptide) / n_states_per_peptide) # (3) Update peptide-level mean parameters (gamma). Gibbs step. gamma_draws[t] = updates.rgibbs_gamma( mu=mu_draws[t - 1, mapping_peptides], tausq=tausq_draws[t - 1, mapping_peptides], sigmasq=var_peptide_conditional, y_bar=mean_intensity_per_peptide, n_states=n_states_per_peptide) mean_gamma_by_protein = np.bincount(mapping_peptides, weights=gamma_draws[t]) mean_gamma_by_protein /= n_peptides_per_protein # (4) Update protein-level concentrations if supervised: if concentration_dist: # (4a) Update coefficients given concentrations. Gibbs step. # Only yields sane answers if modeling distribution of # concentrations. beta_draws[t] = updates.rgibbs_beta( concentrations=concentration_draws[t-1], gamma_bar=mean_gamma_by_protein, tausq=tausq_draws[t - 1], n_peptides=n_peptides_per_protein, **cfg['priors']['beta_concentration']) else: # (4a) Update coefficients given concentrations. Gibbs step. # Rao-Blackwellized version, implicitly scaling prior on # $\beta_1$ by $|\beta_1|^{n_{mis}} beta_draws[t] = updates.rgibbs_beta( concentrations=known_concentrations, gamma_bar=mean_gamma_by_protein[ mapping_known_concentrations], tausq=tausq_draws[t - 1, mapping_known_concentrations], n_peptides=n_peptides_per_protein[ mapping_known_concentrations], **cfg['priors']['beta_concentration']) # (4b) Update concentrations given coefficients. Gibbs step. concentration_draws[t] = updates.rgibbs_concentration( gamma_bar=mean_gamma_by_protein, tausq=tausq_draws[t - 1], n_peptides=n_peptides_per_protein, beta=beta_draws[t], mean_concentration=mean_concentration_draws[t-1], prec_concentration=prec_concentration_draws[t-1]) concentration_draws[t, mapping_known_concentrations] = \ known_concentrations if concentration_dist: # (4c) Update concentration distribution hyperparameters mean_concentration_draws[t] = np.random.normal( loc=np.mean(concentration_draws[t]), scale=np.sqrt(1. / prec_concentration_draws[t-1] / n_proteins), size=1) prec_concentration_draws[t] = 1. / updates.rgibbs_variances( rss=np.sum((concentration_draws[t] - mean_concentration_draws[t])**2), n=n_proteins, **cfg['priors']['prec_concentration']) # Set mu based on concentrations and betas mu_draws[t] = \ beta_draws[t,0] + beta_draws[t,1] * concentration_draws[t] else: # (4) Update protein-level mean parameters (mu). Gibbs step. mu_draws[t] = updates.rgibbs_mu(gamma_bar=mean_gamma_by_protein, tausq=tausq_draws[t - 1], n_peptides=n_peptides_per_protein, **cfg['priors']['mu']) # (5) Update state-level variance parameters (sigmasq). Gibbs step. rss_by_state = ( intensities_obs - gamma_draws[t, mapping_states_obs]) ** 2 rss_by_protein = np.bincount(mapping_peptides[mapping_states_obs], weights=rss_by_state, minlength=n_proteins) rss_by_state = ( intensities_cen - gamma_draws[t, mapping_states_cen]) ** 2 rss_by_protein += np.bincount(mapping_peptides[mapping_states_cen], weights=rss_by_state, minlength=n_proteins) sigmasq_draws[t] = updates.rgibbs_variances( rss=rss_by_protein, n=n_states_per_protein, prior_shape=shape_sigmasq[ t - 1], prior_rate=rate_sigmasq[t - 1]) # Mapping from protein to peptide conditional variances for convenience var_peptide_conditional = sigmasq_draws[t, mapping_peptides] # (6) Update peptide-level variance parameters (tausq). Gibbs step. rss_by_peptide = (gamma_draws[t] - mu_draws[t, mapping_peptides]) ** 2 rss_by_protein = np.bincount(mapping_peptides, weights=rss_by_peptide) tausq_draws[t] = updates.rgibbs_variances( rss=rss_by_protein, n=n_peptides_per_protein, prior_shape=shape_tausq[ t - 1], prior_rate=rate_tausq[t - 1]) # (7) Update state-level variance hyperparameters (sigmasq # distribution). Conditional independence-chain MH step. result = updates.rmh_variance_hyperparams( variances=sigmasq_draws[t], shape_prev=shape_sigmasq[ t - 1], rate_prev=rate_sigmasq[ t - 1], **cfg['priors']['sigmasq_dist']) (shape_sigmasq[t], rate_sigmasq[t]), accept = result accept_stats['sigmasq_dist'] += accept # (8) Update peptide-level variance hyperparameters (tausq # distribution). Conditional independence-chain MH step. result = updates.rmh_variance_hyperparams( variances=tausq_draws[t], shape_prev=shape_tausq[ t - 1], rate_prev=rate_tausq[t - 1], **cfg['priors']['tausq_dist']) (shape_tausq[t], rate_tausq[t]), accept = result accept_stats['tausq_dist'] += accept # (9) Update parameter for negative-binomial n_states distribution (r # and lmbda). Conditional independence-chain MH step. result = updates.rmh_nbinom_hyperparams( x=n_states_per_peptide - 1, r_prev=r[ t - 1], p_prev=1. - lmbda[t - 1], **cfg['priors']['n_states_dist']) (r[t], lmbda[t]), accept = result lmbda[t] = 1. - lmbda[t] accept_stats['n_states_dist'] += accept # (10) Update coefficients of intensity-based probabilistic censoring # model (eta). Conditional independence-chain MH step. # (10a) Build design matrix and response. Only using observed and # intensity-censored states. n_at_risk = n_obs_states + np.sum(W < 1) X = np.zeros((n_at_risk + n_peptide_features * 2, 2 + n_peptide_features * 2)) X[:n_at_risk, 0] = 1. X[:n_at_risk, 1] = np.r_[intensities_obs, intensities_cen[W < 1]] if n_peptide_features > 0: peptide_features_by_state = peptide_features[ np.r_[mapping_states_obs, mapping_states_cen[W < 1]] ] X[:n_at_risk, 2:(2 + n_peptide_features)] = \ peptide_features_by_state X[:n_at_risk, (2 + n_peptide_features):] = \ (peptide_features_by_state.T * X[:n_at_risk, 1]).T X[n_at_risk:, 2:] = np.eye(n_peptide_features * 2) y = np.zeros(n_at_risk + n_peptide_features * 2) y[:n_obs_states] = 1. if n_peptide_features > 0: y[n_at_risk:] = 0.5 w = np.ones_like(y) if n_peptide_features > 0: w[n_at_risk:(n_at_risk + n_peptide_features)] = \ cfg['priors']['eta_features']['primary_pseudoobs'] w[(n_at_risk + n_peptide_features):] = \ cfg['priors']['eta_features']['interaction_pseudoobs'] # (10b) Estimate GLM parameters. fit_eta = glm.glm(y=y, X=X, w=w, family=glm_family, info=True) if np.all(np.isfinite(fit_eta['b_hat'])): # (10c) Execute MH step. eta_draws[t], accept = glm.mh_update_glm_coef( b_prev=eta_draws[t - 1], y=y, X=X, family=glm_family, propDf=prop_df_eta, prior_log_density=dprior_eta, prior_kwargs=prior_eta_kwargs, **fit_eta) accept_stats['eta'] += accept else: eta_draws[t] = eta_draws[t-1] if (cfg['settings']['verbose'] > 0 and t % cfg['settings']['verbose_interval'] == 0): print >> sys.stderr, 'Iteration %d complete' % t # Build dictionary of draws to return draws = {'mu': mu_draws, 'gamma': gamma_draws, 'eta': eta_draws, 'p_rnd_cen': p_rnd_cen, 'lmbda': lmbda, 'r': r, 'sigmasq': sigmasq_draws, 'tausq': tausq_draws, 'n_cen_states_per_peptide': n_cen_states_per_peptide_draws, 'shape_tausq': shape_tausq, 'rate_tausq': rate_tausq, 'shape_sigmasq': shape_sigmasq, 'rate_sigmasq': rate_sigmasq} # Add additional information for supervised algorithm if supervised: draws.update({ 'beta': beta_draws, 'concentration': concentration_draws}) if concentration_dist: draws.update({ 'mean_concentration': mean_concentration_draws, 'var_concentration': 1. / prec_concentration_draws}) return (draws, accept_stats)
# iii. Comparision of the two functions (single voxel response) # ################################################################# ############## ############## ############################################ # a. Pick a good voxel to compare against # ############################################ from glm import glm from Image_Visualizing import present_3d beta_np,X_np=glm(data,conv_np) # beta_2,X_2=glm(data,conv_2) not correct shape beta_3,X_3=glm(data,conv_3) beta_4,X_4=glm(data,conv_4_30) #beta_5,X_5=glm(data,conv_5) # non-np are stronger/more clear plt.imshow(present_3d(beta_np[...,1]),cmap="gray",interpolation="nearest") plt.imshow(present_3d(beta_3[...,1]),cmap="gray",interpolation="nearest") plt.imshow(present_3d(beta_4[...,1]),cmap="gray",interpolation="nearest") #plt.imshow(present_3d(beta_5[...,1]),cmap="gray",interpolation="nearest") plt.imshow(beta_4[...,2,1],cmap="gray",interpolation="nearest") plt.colorbar()
cond_all = sorted(cond_all, key=lambda x: x[0]) np.savetxt(condition_location + "cond_all.txt", cond_all) neural_prediction = events2neural(condition_location + "cond_all.txt", TR, n_vols) convolved = np.convolve(neural_prediction, hrf_at_trs) # hrf_at_trs sample data N = len(neural_prediction) # N == n_vols == 173 M = len(hrf_at_trs) # M == 12 np_hrf = convolved[:N] ################### # From GLM function ################### np_B, np_X = glm(data, np_hrf) #################################### # GLM Diagnostics (to get residuals) ################################### np_MRSS, np_fitted, np_residuals = glm_diagnostics(np_B, np_X, data) ########################### #Shapiro-Wilks on Residuals ########################### #Shapiro-Wilks: tests the null hypothesis that the data was #drawn from a normal distribution. sw_pvals = check_sw(np_residuals) print(np.mean(sw_pvals > 0.05))
print("********") ############## ############## ################################################################# # iii. Comparision of the two functions (single voxel response) # ################################################################# ############## ############## ############################################ # a. Pick a good voxel to compare against # ############################################ from glm import glm from Image_Visualizing import present_3d beta_np, X_np = glm(data, conv_np) # beta_2,X_2=glm(data,conv_2) not correct shape beta_3, X_3 = glm(data, conv_3) beta_4, X_4 = glm(data, conv_4_30) #beta_5,X_5=glm(data,conv_5) # non-np are stronger/more clear plt.imshow(present_3d(beta_np[..., 1]), cmap="gray", interpolation="nearest") plt.imshow(present_3d(beta_3[..., 1]), cmap="gray", interpolation="nearest") plt.imshow(present_3d(beta_4[..., 1]), cmap="gray", interpolation="nearest") #plt.imshow(present_3d(beta_5[...,1]),cmap="gray",interpolation="nearest") plt.imshow(beta_4[..., 2, 1], cmap="gray", interpolation="nearest") plt.colorbar() plt.close()
np_hrf=convolved[:N] ############################# ############################# # Analysis and diagonistics # ############################# ############################# ####################### # a. (my) convolution # ####################### # Now get the estimated coefficients and design matrix for doing # regression on the convolved time course. B_my, X_my = glm(data, my_hrf) # Some diagnostics. MRSS_my, fitted_my, residuals_my = glm_diagnostics(B_my, X_my, data) # Print out the mean MRSS. print("MRSS using 'my' convolution function: "+str(np.mean(MRSS_my))) # Plot the time course for a single voxel with the fitted values. # Looks pretty bad. plt.plot(data[41, 47, 2]) #change from cherry-picking plt.plot(fitted_my[41, 47, 2]) plt.savefig(location_of_images+"glm_plot_my.png") plt.close()
M = len(hrf_at_trs) # M == 12 np_hrf = convolved[:N] ############################# ############################# # Analysis and diagonistics # ############################# ############################# ####################### # a. (my) convolution # ####################### # Now get the estimated coefficients and design matrix for doing # regression on the convolved time course. B_my, X_my = glm(data, my_hrf) # Some diagnostics. MRSS_my, fitted_my, residuals_my = glm_diagnostics(B_my, X_my, data) # Print out the mean MRSS. print("MRSS using 'my' convolution function: " + str(np.mean(MRSS_my))) # Plot the time course for a single voxel with the fitted values. # Looks pretty bad. plt.plot(data[41, 47, 2]) #change from cherry-picking plt.plot(fitted_my[41, 47, 2]) plt.savefig(location_of_images + "glm_plot_my.png") plt.close() ##################
def t_stat(data_4d, convolved, c = [0,1]): """ Return four values, the estimated beta, t-value, degrees of freedom, and p-value for the given t-value Parameters ---------- data_4d: numpy array of 4 dimensions The image data of one subject convolved: numpy array of 1 dimension The convolved time course c: numpy array of 1 dimension The contrast vector fo the weights of the beta vector. Default is [0,1] which corresponds to beta_1 Note that the fourth dimension of `data_4d` (time or the number of volumes) must be the same as the length of `convolved`. Returns ------- beta: estimated beta values t: numpy array of 1 dimension t-value of the betas df: int degrees of freedom p: numpy array of 1 dimension p-value corresponding to the t-value and degrees of freedom """ # Make sure y, X, c are all arrays beta, X = glm(data_4d, convolved) c = np.atleast_2d(c).T # As column vector # Calculate the parameters - b hat beta = np.reshape(beta, (-1, beta.shape[-1])).T fitted = X.dot(beta) # Residual error y = np.reshape(data_4d, (-1, data_4d.shape[-1])) errors = y.T - fitted # Residual sum of squares RSS = (errors**2).sum(axis=0) df = X.shape[0] - npl.matrix_rank(X) # Mean residual sum of squares MRSS = RSS / df # calculate bottom half of t statistic SE = np.sqrt(MRSS * c.T.dot(npl.pinv(X.T.dot(X)).dot(c))) zeros = np.where(SE==0) SE[zeros] = 1 t = c.T.dot(beta) / SE t[:,zeros] =0 # Get p value for t value using cumulative density dunction # (CDF) of t distribution ltp = t_dist.cdf(abs(t), df) # lower tail p p = 1 - ltp # upper tail p return beta, t, df, p
# iii. Comparision of the two functions (single voxel response) # ################################################################# ############## ############## ############################################ # a. Pick a good voxel to compare against # ############################################ # Remember the names of the of the two different methods # my convolution: all_stimuli_convolution_best_length # np.convolve: convolve_np from glm import glm from Image_Visualizing import present_3d beta_my, X_my = glm(data, all_stimuli_convolution_best_length) beta_np, X_np = glm(data, convolve_np) plt.imshow(present_3d(beta_my[..., 1]), cmap="gray", interpolation="nearest") plt.imshow(present_3d(beta_np[..., 1]), cmap="gray", interpolation="nearest") plt.imshow(beta_my[..., 2, 1], cmap="gray", interpolation="nearest") plt.colorbar() plt.close() # From visual analysis # In the regression has a really high beta_1 value at: # beta_my[41,47,2,1] (voxel data[41,47,2] ) # lets use the comparisons (I know that is not good practice to check created X based on betas based on X) ###########################################
def worker(comm, rank, data, cfg): ''' Worker-node process for parallel MCMC sampler. Receives parameters and commands from master node. Runs local updates and distributed components of shared draws. Parameters ---------- - comm : mpi4py.MPI.COMM Initialized MPI communicator. - rank : int Rank (>= MPIROOT) of worker. - data : dictionary Data as output from load_data with rank > 0. - init : dictionary Initial parameter values as output from initialize. - cfg : dictionary Configuration dictionary containing priors, settings, and paths for analysis. Its format is specified in detail in separate documentation. Returns ------- - draws : dictionary 1- and 2-dimensional ndarrays containing the posterior samples for each protein- and ppeptide-specific parameter. Shared parameters are handled by the master process. - mapping_peptides : integer ndarray Worker-specific peptide to protein mapping provided in data. - proteins_worker : array_like, 1 dimension, nonnegative ints A 1d integer array of length n_proteins containing the indices (in the original dataset) of the proteins assigned to the given worker. - peptides_worker : array_like, 1 dimension, nonnegative ints A 1d integer array of length n_peptides containing the indices (in the original dataset) of the peptides assigned to the given worker. ''' # Determine whether algorithm is running with supervision try: supervised = cfg['priors']['supervised'] except KeyError: print >> sys.stderr, 'Defaulting to unsupervised algorithm' supervised = False # If supervised, determine whether to model distribution of concentrations # If this is False, prior on $\beta_1$ is scaled by $|\beta_1|^{n_{mis}}$. if supervised: try: concentration_dist = cfg['priors']['concentration_dist'] except KeyError: print >> sys.stderr, 'Defaulting to flat prior on concentrations' concentration_dist = False # Get information on peptide features if they're available have_peptide_features = cfg['priors'].has_key('path_peptide_features') if have_peptide_features: n_peptide_features = data['peptide_features_worker'].shape[1] else: n_peptide_features = 0 # Extract proposal DFs try: prop_df_y_mis = cfg['settings']['prop_df_y_mis'] except KeyError: prop_df_y_mis = 5.0 # Create references to relevant data entries in local namespace mapping_peptides = data['mapping_peptides'] intensities_obs = data['intensities_obs'] mapping_states_obs = data['mapping_states_obs'] # Data specific to the semi-supervised algorithm if supervised: known_concentrations = data['known_concentrations'] mapping_known_concentrations = data['mapping_known_concentrations'] # Extract dimensions from input # Number of iterations from cfg n_iterations = cfg['settings']['n_iterations'] # Number of peptides and proteins from mapping_peptides n_peptides = np.size(mapping_peptides) n_proteins = 1 + np.max(mapping_peptides) # Compute tabulations that are invariant across iterations # Total number of observed states n_obs_states = np.size(intensities_obs) # Tabulate peptides per protein n_peptides_per_protein = np.bincount(mapping_peptides) peptides_obs = np.unique(mapping_states_obs) n_obs_peptides_per_protein = np.bincount(mapping_peptides[peptides_obs], minlength=n_proteins) # Tabulate number of observed states per peptide n_obs_states_per_peptide = np.bincount(mapping_states_obs, minlength=n_peptides) # Sum observed intensities per peptide total_intensity_obs_per_peptide = np.bincount(mapping_states_obs, weights=intensities_obs, minlength=n_peptides) # Allocate data structures for draws # Peptide- and protein-level means gamma_draws = np.empty((n_iterations, n_peptides)) mu_draws = np.empty((n_iterations, n_proteins)) # Concentrations, if supervised if supervised: concentration_draws = np.empty((n_iterations, n_proteins)) # Number of censored states per peptide n_cen_states_per_peptide_draws = np.zeros((n_iterations, n_peptides), dtype=np.integer) # State- and peptide-level variances sigmasq_draws = np.empty((n_iterations, n_proteins)) tausq_draws = np.empty((n_iterations, n_proteins)) # Instantiate GLM family for eta step try: glm_link_name = cfg["priors"]["glm_link"].title() except KeyError: print >> sys.stderr, "GLM link not specified; defaulting to logit" glm_link_name = "Logit" glm_link = getattr(glm.links, glm_link_name) glm_family = glm.families.Binomial(link=glm_link) # Setup data structure for shared parameters/hyperparameters sync # Layout: # - 0:2 : shape_sigmasq, rate_sigmasq # - 2:4 : shape_tausq, rate_tausq # - 4:6 : r, lmbda # - 6:8 : eta # - 8 : p_rnd_cen # If supervised, 4 additional entries are used: # - 9:11: beta # - 11 : mean_concentration # - 12 : prec_concentration params_shared = np.empty(9 + 4 * supervised, dtype=np.double) # Prepare to receive tasks working = True status = MPI.Status() t = np.array(0) # Primary send-receive loop for MCMC iterations while working: # Receive iteration and task information comm.Recv([t, MPI.INT], source=MPIROOT, tag=MPI.ANY_TAG, status=status) task = status.Get_tag() if task == TAGS['STOP']: working = False elif task == TAGS['SYNC']: # Synchronize shared parameters/hyperparameters comm.Bcast(params_shared, root=MPIROOT) shape_sigmasq, rate_sigmasq = params_shared[0:2] shape_tausq, rate_tausq = params_shared[2:4] r, lmbda = params_shared[4:6] eta = params_shared[6:8] p_rnd_cen = params_shared[8] if supervised: beta = params_shared[9:11] mean_concentration = params_shared[11] prec_concentration = params_shared[12] elif task == TAGS['INIT']: # Compute initial values for MCMC iterations # Protein-level means using mean observed intensity; excluding # missing peptides mu_draws[0] = ( np.bincount(mapping_peptides, total_intensity_obs_per_peptide / np.maximum(1, n_obs_states_per_peptide)) / n_obs_peptides_per_protein) mu_draws[0, n_obs_peptides_per_protein < 1] = np.nanmin(mu_draws[0]) # Peptide-level means using mean observed intensity; imputing # missing peptides as protein observed means gamma_draws[0] = mu_draws[0, mapping_peptides] gamma_draws[0, peptides_obs] = ( total_intensity_obs_per_peptide[peptides_obs] / n_obs_states_per_peptide[peptides_obs] ) # State- and peptide-level variances via inverse-gamma draws sigmasq_draws[0] = 1. / np.random.gamma(shape=shape_sigmasq, scale=1. / rate_sigmasq, size=n_proteins) tausq_draws[0] = 1. / np.random.gamma(shape=shape_tausq, scale=1. / rate_tausq, size=n_proteins) # Mapping from protein to peptide conditional variances for # convenience var_peptide_conditional = sigmasq_draws[0, mapping_peptides] # Number of states parameters from local MAP estimator based on # number of observed peptides; very crude, but not altogether # terrible. Note that this ignores the +1 location shift in the # actual n_states distribution. kwargs = { 'x': n_obs_states_per_peptide[n_obs_states_per_peptide > 0] - 1, 'transform': True} kwargs.update(cfg['priors']['n_states_dist']) r, lmbda = lib.map_estimator_nbinom(**kwargs) lmbda = 1. - lmbda # Combine local estimates at master for initialization. # Values synchronize at first iteration during SYNC task. comm.Reduce([np.array([r, lmbda]), MPI.DOUBLE], None, op=MPI.SUM, root=MPIROOT) if supervised: # Run Gibbs update on concentration-intensity coefficients using # noninformative prior. updates_parallel.rgibbs_worker_beta( comm=comm, concentrations=known_concentrations, gamma_bar=mu_draws[0, mapping_known_concentrations], tausq=tausq_draws[0, mapping_known_concentrations], n_peptides=n_peptides_per_protein[ mapping_known_concentrations], MPIROOT=MPIROOT) elif task == TAGS['LOCAL']: # (1) Draw missing data (n_cen and censored state intensities) given # all other parameters. Exact draw via rejection samplers. # (1a) Obtain p_int_cen per peptide and approximatations of censored # intensity posteriors. eta_0_effective = eta[0] eta_1_effective = eta[1] if n_peptide_features > 0: eta_0_effective += np.dot(data['peptide_features_worker'], eta[2:(2 + n_peptide_features)]) eta_1_effective += np.dot(data['peptide_features_worker'], eta[(2 + n_peptide_features):]) kwargs = {'eta_0': eta_0_effective, 'eta_1': eta_1_effective, 'mu': gamma_draws[t - 1], 'sigmasq': var_peptide_conditional, 'glm_link_name': glm_link_name} cen_dist = lib.characterize_censored_intensity_dist(**kwargs) # (1b) Draw number of censored states per peptide n_cen_states_per_peptide = lib.rncen( n_obs=n_obs_states_per_peptide, p_rnd_cen=p_rnd_cen, p_int_cen=cen_dist[ 'p_int_cen'], lmbda=lmbda, r=r) n_cen_states_per_peptide_draws[t] = n_cen_states_per_peptide # Update state-level counts n_states_per_peptide = (n_obs_states_per_peptide + n_cen_states_per_peptide) n_states_per_protein = np.bincount(mapping_peptides, weights=n_states_per_peptide) n_states = np.sum(n_states_per_peptide) # (1c) Draw censored intensities kwargs['n_cen'] = n_cen_states_per_peptide kwargs['p_rnd_cen'] = p_rnd_cen kwargs['propDf'] = prop_df_y_mis kwargs.update(cen_dist) intensities_cen, mapping_states_cen, W = lib.rintensities_cen( **kwargs) # Sum observed intensities per peptide total_intensity_cen_per_peptide = np.bincount( mapping_states_cen, weights=intensities_cen, minlength=n_peptides) # Compute mean intensities per peptide mean_intensity_per_peptide = ((total_intensity_obs_per_peptide + total_intensity_cen_per_peptide) / n_states_per_peptide) # (2) Update peptide-level mean parameters (gamma). Gibbs step. gamma_draws[t] = updates_serial.rgibbs_gamma( mu=mu_draws[t - 1, mapping_peptides], tausq=tausq_draws[t - 1, mapping_peptides], sigmasq=var_peptide_conditional, y_bar=mean_intensity_per_peptide, n_states=n_states_per_peptide) mean_gamma_by_protein = np.bincount(mapping_peptides, weights=gamma_draws[t]) mean_gamma_by_protein /= n_peptides_per_protein if supervised: # (3) Update concentrations given coefficients. Gibbs step. concentration_draws[t] = updates_serial.rgibbs_concentration( gamma_bar=mean_gamma_by_protein, tausq=tausq_draws[t - 1], n_peptides=n_peptides_per_protein, beta=beta, mean_concentration=mean_concentration, prec_concentration=prec_concentration) concentration_draws[t, mapping_known_concentrations] = \ known_concentrations mu_draws[t] = beta[0] + beta[1] * concentration_draws[t] else: # (3) Update protein-level mean parameters (mu). Gibbs step. mu_draws[t] = updates_serial.rgibbs_mu( gamma_bar=mean_gamma_by_protein, tausq=tausq_draws[t - 1], n_peptides=n_peptides_per_protein, **cfg['priors']['mu']) # (4) Update state-level variance parameters (sigmasq). Gibbs step. rss_by_state = ((intensities_obs - gamma_draws[t, mapping_states_obs]) ** 2) rss_by_protein = np.bincount(mapping_peptides[mapping_states_obs], weights=rss_by_state, minlength=n_proteins) rss_by_state = ((intensities_cen - gamma_draws[t, mapping_states_cen]) ** 2) rss_by_protein += np.bincount(mapping_peptides[mapping_states_cen], weights=rss_by_state, minlength=n_proteins) sigmasq_draws[t] = updates_serial.rgibbs_variances( rss=rss_by_protein, n=n_states_per_protein, prior_shape=shape_sigmasq, prior_rate=rate_sigmasq) # Mapping from protein to peptide conditional variances for # convenience var_peptide_conditional = sigmasq_draws[t, mapping_peptides] # (5) Update peptide-level variance parameters (tausq). Gibbs step. rss_by_peptide = ( gamma_draws[t] - mu_draws[t, mapping_peptides]) ** 2 rss_by_protein = np.bincount(mapping_peptides, weights=rss_by_peptide) tausq_draws[t] = updates_serial.rgibbs_variances( rss=rss_by_protein, n=n_peptides_per_protein, prior_shape=shape_tausq, prior_rate=rate_tausq) elif task == TAGS['SIGMA']: # Run distributed MH step for sigmasq hyperparameters updates_parallel.rmh_worker_variance_hyperparams( comm=comm, variances=sigmasq_draws[t], MPIROOT=MPIROOT) elif task == TAGS['TAU']: # Run distributed MH step for sigmasq hyperparameters updates_parallel.rmh_worker_variance_hyperparams( comm=comm, variances=tausq_draws[t], MPIROOT=MPIROOT) elif task == TAGS['NSTATES']: # Run distributed MH step for n_states hyperparameters updates_parallel.rmh_worker_nbinom_hyperparams( comm=comm, x=n_states_per_peptide - 1, r_prev=r, p_prev=1. - lmbda, MPIROOT=MPIROOT, **cfg['priors']['n_states_dist']) elif task == TAGS['ETA']: # Run distributed MH step for eta (coefficients in censoring model) # Build design matrix and response. Only using observed and # intensity-censored states. n_at_risk = n_obs_states + np.sum(W < 1) X = np.zeros((n_at_risk + n_peptide_features * 2, 2 + n_peptide_features * 2)) X[:n_at_risk, 0] = 1. X[:n_at_risk, 1] = np.r_[intensities_obs, intensities_cen[W < 1]] if n_peptide_features > 0: peptide_features_by_state = data['peptide_features_worker'][ np.r_[mapping_states_obs, mapping_states_cen[W < 1]] ] X[:n_at_risk, 2:(2 + n_peptide_features)] = \ peptide_features_by_state X[:n_at_risk, (2 + n_peptide_features):] = \ (peptide_features_by_state.T * X[:n_at_risk, 1]).T X[n_at_risk:, 2:] = np.eye(n_peptide_features * 2) y = np.zeros(n_at_risk + n_peptide_features * 2) y[:n_obs_states] = 1. if n_peptide_features > 0: y[n_at_risk:] = 0.5 w = np.ones_like(y) if n_peptide_features > 0: w[n_at_risk:(n_at_risk + n_peptide_features)] = ( cfg['priors']['eta_features']['primary_pseudoobs'] / (comm.Get_size() - 1.)) w[(n_at_risk + n_peptide_features):] = ( cfg['priors']['eta_features']['interaction_pseudoobs'] / (comm.Get_size() - 1.)) # Estimate GLM parameters. fit_eta = glm.glm(y=y, X=X, w=w, family=glm_family, info=True, cov=True) # Handle distributed computation draw updates_parallel.rmh_worker_glm_coef( comm=comm, b_prev=eta, family=glm_family, y=y, X=X, w=w, MPIROOT=MPIROOT, **fit_eta) elif task == TAGS['PRNDCEN']: # Run distributed Gibbs step for p_rnd_cen updates_parallel.rgibbs_worker_p_rnd_cen( comm=comm, n_rnd_cen=np.sum(W, dtype=np.int), n_states=n_states, MPIROOT=MPIROOT) elif task == TAGS['BETA']: # Run distributed Gibbs step for coefficients of # concentration-intensity relationship if concentration_dist: updates_parallel.rgibbs_worker_beta( comm=comm, concentrations=concentration_draws[t], gamma_bar=mean_gamma_by_protein, tausq=tausq_draws[t], n_peptides=n_peptides_per_protein, MPIROOT=MPIROOT) else: updates_parallel.rgibbs_worker_beta( comm=comm, concentrations=known_concentrations, gamma_bar=mean_gamma_by_protein[ mapping_known_concentrations], tausq=tausq_draws[t, mapping_known_concentrations], n_peptides=n_peptides_per_protein[ mapping_known_concentrations], MPIROOT=MPIROOT) elif task == TAGS['CONCENTRATION_DIST']: # Run distributed Gibbs step for hyperparameters of concentration # distribution updates_parallel.rgibbs_worker_concentration_dist( comm=comm, concentrations=concentration_draws[t], MPIROOT=MPIROOT) elif task == TAGS['SAVE']: # Construct path for worker-specific results path_worker = cfg['output']['pattern_results_worker'] % rank # Setup draws to return draws = {'mu': mu_draws, 'gamma': gamma_draws, 'sigmasq': sigmasq_draws, 'tausq': tausq_draws, 'n_cen_states_per_peptide': n_cen_states_per_peptide_draws, } if supervised: draws.update({'concentration': concentration_draws}) lib.write_to_hdf5( path=path_worker, compress=cfg['output']['compress'], draws=draws, mapping_peptides=data['mapping_peptides'], proteins_worker=data['proteins_worker']) # Setup draws to return draws = {'mu': mu_draws, 'gamma': gamma_draws, 'sigmasq': sigmasq_draws, 'tausq': tausq_draws, 'n_cen_states_per_peptide': n_cen_states_per_peptide_draws, } if supervised: draws.update({ 'concentration': concentration_draws}) return (draws, data['mapping_peptides'], data['proteins_worker'], data['peptides_worker'])
def t_stat(data_4d, convolved, c=[0, 1]): """ Return four values, the estimated beta, t-value, degrees of freedom, and p-value for the given t-value Parameters ---------- data_4d: numpy array of 4 dimensions The image data of one subject convolved: numpy array of 1 dimension The convolved time course c: numpy array of 1 dimension The contrast vector fo the weights of the beta vector. Default is [0,1] which corresponds to beta_1 Note that the fourth dimension of `data_4d` (time or the number of volumes) must be the same as the length of `convolved`. Returns ------- beta: estimated beta values t: numpy array of 1 dimension t-value of the betas df: int degrees of freedom p: numpy array of 1 dimension p-value corresponding to the t-value and degrees of freedom """ # Make sure y, X, c are all arrays beta, X = glm(data_4d, convolved) c = np.atleast_2d(c).T # As column vector # Calculate the parameters - b hat beta = np.reshape(beta, (-1, beta.shape[-1])).T fitted = X.dot(beta) # Residual error y = np.reshape(data_4d, (-1, data_4d.shape[-1])) errors = y.T - fitted # Residual sum of squares RSS = (errors**2).sum(axis=0) df = X.shape[0] - npl.matrix_rank(X) # Mean residual sum of squares MRSS = RSS / df # calculate bottom half of t statistic SE = np.sqrt(MRSS * c.T.dot(npl.pinv(X.T.dot(X)).dot(c))) zeros = np.where(SE == 0) SE[zeros] = 1 t = c.T.dot(beta) / SE t[:, zeros] = 0 # Get p value for t value using cumulative density dunction # (CDF) of t distribution ltp = t_dist.cdf(abs(t), df) # lower tail p p = 1 - ltp # upper tail p return beta, t, df, p
################################################################# ############## ############## ############################################ # a. Pick a good voxel to compare against # ############################################ # Remember the names of the of the two different methods # my convolution: all_stimuli_convolution_best_length # np.convolve: convolve_np from glm import glm from Image_Visualizing import present_3d beta_my,X_my=glm(data,all_stimuli_convolution_best_length) beta_np,X_np=glm(data,convolve_np) plt.imshow(present_3d(beta_my[...,1]),cmap="gray",interpolation="nearest") plt.imshow(present_3d(beta_np[...,1]),cmap="gray",interpolation="nearest") plt.imshow(beta_my[...,2,1],cmap="gray",interpolation="nearest") plt.colorbar() plt.close() # From visual analysis # In the regression has a really high beta_1 value at: # beta_my[41,47,2,1] (voxel data[41,47,2] ) # lets use the comparisons (I know that is not good practice to check created X based on betas based on X)
# creating the .txt file for the events2neural function cond_all=np.row_stack((cond1,cond2,cond3)) cond_all=sorted(cond_all,key= lambda x:x[0]) np.savetxt(condition_location+"cond_all.txt",cond_all) neural_prediction = events2neural(condition_location+"cond_all.txt",TR,n_vols) convolved = np.convolve(neural_prediction, hrf_at_trs) # hrf_at_trs sample data N = len(neural_prediction) # N == n_vols == 173 M = len(hrf_at_trs) # M == 12 np_hrf=convolved[:N] ################### # From GLM function ################### np_B, np_X = glm(data, np_hrf) #################################### # GLM Diagnostics (to get residuals) ################################### np_MRSS, np_fitted, np_residuals = glm_diagnostics(np_B, np_X, data) ########################### #Shapiro-Wilks on Residuals ########################### # Shapiro-Wilks: tests the null hypothesis that the data was # drawn from a normal distribution. # Using 4-d residuals.