Exemplo n.º 1
0
def test_glm():
    # Read in the image data.
    img = nib.load(pathtoclassdata + "ds114_sub009_t2r1.nii")
    data = img.get_data()[..., 4:]
    # Read in the convolutions. 
    convolved = np.loadtxt(pathtoclassdata + "ds114_sub009_t2r1_conv.txt")[4:]
    # Create design matrix. 
    actual_design = np.ones((len(convolved), 2))
    actual_design[:, 1] = convolved
    
    # Calculate betas, copied from the exercise. 
    data_2d = np.reshape(data, (-1, data.shape[-1]))
    actual_B = npl.pinv(actual_design).dot(data_2d.T)
    actual_B_4d = np.reshape(actual_B.T, img.shape[:-1] + (-1,))
    
    # Run function.
    exp_B_4d, exp_design = glm(data, convolved)
    assert_almost_equal(actual_B_4d, exp_B_4d)
    assert_almost_equal(actual_design, exp_design)

    # Pick a single voxel to check diagnostics. 
    # Calculate actual fitted values, residuals, and MRSS of voxel.
    actual_fitted = actual_design.dot(actual_B_4d[42, 32, 19])
    actual_residuals = data[42, 32, 19] - actual_fitted
    actual_MRSS = np.sum(actual_residuals**2)/(actual_design.shape[0] - npl.matrix_rank(actual_design))
    
    # Calculate using glm_diagnostics function.
    exp_MRSS, exp_fitted, exp_residuals = glm_diagnostics(exp_B_4d, exp_design, data)
    assert_almost_equal(actual_fitted, exp_fitted[42, 32, 19])
    assert_almost_equal(actual_residuals, exp_residuals[42, 32, 19])
    assert_almost_equal(actual_MRSS, exp_MRSS[42, 32, 19])
Exemplo n.º 2
0
def test_glm():
    # Read in the image data.
    img = nib.load(pathtoclassdata + "ds114_sub009_t2r1.nii")
    data = img.get_data()[..., 4:]
    # Read in the convolutions.
    convolved = np.loadtxt(pathtoclassdata + "ds114_sub009_t2r1_conv.txt")[4:]
    # Create design matrix.
    actual_design = np.ones((len(convolved), 2))
    actual_design[:, 1] = convolved

    # Calculate betas, copied from the exercise.
    data_2d = np.reshape(data, (-1, data.shape[-1]))
    actual_B = npl.pinv(actual_design).dot(data_2d.T)
    actual_B_4d = np.reshape(actual_B.T, img.shape[:-1] + (-1, ))

    # Run function.
    exp_B_4d, exp_design = glm(data, convolved)
    assert_almost_equal(actual_B_4d, exp_B_4d)
    assert_almost_equal(actual_design, exp_design)

    # Pick a single voxel to check diagnostics.
    # Calculate actual fitted values, residuals, and MRSS of voxel.
    actual_fitted = actual_design.dot(actual_B_4d[42, 32, 19])
    actual_residuals = data[42, 32, 19] - actual_fitted
    actual_MRSS = np.sum(actual_residuals**2) / (
        actual_design.shape[0] - npl.matrix_rank(actual_design))

    # Calculate using glm_diagnostics function.
    exp_MRSS, exp_fitted, exp_residuals = glm_diagnostics(
        exp_B_4d, exp_design, data)
    assert_almost_equal(actual_fitted, exp_fitted[42, 32, 19])
    assert_almost_equal(actual_residuals, exp_residuals[42, 32, 19])
    assert_almost_equal(actual_MRSS, exp_MRSS[42, 32, 19])
Exemplo n.º 3
0
def mcmc_serial(intensities_obs, mapping_states_obs, mapping_peptides, cfg,
                known_concentrations=None, mapping_known_concentrations=None,
                peptide_features=None, **kwargs):
    '''
    Serial MCMC sampler for posterior of state-level censoring model.

    Parameters
    ----------
        - intensities_obs : array_like
            A 1d array of length n_obs_states for which each entry contains the
            observed (summed) log state intensity.
            This must be aligned to mapping_states_obs and all entires must be
            > -inf; no missing peptides.
        - mapping_states_obs : array_like, 1 dimension, nonnegative ints
            A 1d integer array of length n_obs_states for which each entry
            contains the index of the peptide that corresponds to the given
            observed state. Peptide indices can range over 0 <= i < n_peptides.
            Not every peptide index is required to appear in this mapping; only
            observed peptides should be included. Also note that peptides are
            indexed overall, not within protein.
        - mapping_peptides : array_like, 1 dimension, nonnegative ints
            A 1d integer array of length n_peptides for which each entry
            contains the index of the protein that corresponds to the given
            peptide. Protein indices can range over 0 <= i < n_proteins.
            Every peptide and protein to be included in the model should be
            included here. That is, both observed and unobserved peptides should
            appear in this mapping.
        - cfg : dictionary
            A dictionary (typically generated from a YAML file) containing
            priors and settings for the MCMC algorithm. Its exact form will be
            documented elsewhere. It will have at least three sections: priors,
            containing one entry per parameter, settings, containing settings
            for the MCMC algorithm, and init, containing initial values for
            certain parameters.

    Returns
    -------
        - draws : dictionary
            1- and 2-dimensional ndarrays containing the posterior samples for
            each parameter.
        - accept_stats : dictionary
            Dictionary containing number of acceptances for each MH step.

    '''
    # Determine whether algorithm is running with supervision
    try:
        supervised = cfg['priors']['supervised']
    except:
        print >> sys.stderr, 'Defaulting to unsupervised algorithm'
        supervised = False

    # If supervised, determine whether to model distribution of concentrations
    # If this is False, prior on $\beta_1$ is scaled by $|\beta_1|^{n_{mis}}$.
    if supervised:
        try:
            concentration_dist = cfg['priors']['concentration_dist']
        except:
            print >> sys.stderr, 'Defaulting to flat prior on concentrations'
            concentration_dist = False
    
    # Determine whether peptide features are present and, if so, their size
    if peptide_features is None:
        n_peptide_features = 0
    else:
        n_peptide_features = peptide_features.shape[1]

    # Convert inputs to np.ndarrays as needed
    if type(intensities_obs) is not np.ndarray:
        intensities_obs = np.asanyarray(intensities_obs)
    if type(mapping_states_obs) is not np.ndarray:
        mapping_states_obs = np.asanyarray(mapping_states_obs, dtype=np.int)
    if type(mapping_peptides) is not np.ndarray:
        mapping_peptides = np.asanyarray(mapping_peptides, dtype=np.int)

    # Extract proposal DFs
    try:
        prop_df_y_mis = cfg['settings']['prop_df_y_mis']
    except:
        prop_df_y_mis = 5.0

    try:
        prop_df_eta = cfg['settings']['prop_df_eta']
    except:
        prop_df_eta = 10.

    # Extract dimensions from input

    # Number of iterations from cfg
    n_iterations = cfg['settings']['n_iterations']

    # Number of peptides and proteins from mapping_peptides
    n_peptides = np.size(mapping_peptides)
    n_proteins = 1 + np.max(mapping_peptides)

    # Check for validity of mapping vectors
    if (not issubclass(mapping_states_obs.dtype.type, np.integer) or
        np.min(mapping_states_obs) < 0 or
            np.max(mapping_states_obs) > n_peptides - 1):
        raise ValueError('State to peptide mapping (mapping_states_obs)'
                         ' is not valid')

    if (not issubclass(mapping_peptides.dtype.type, np.integer) or
        np.min(mapping_peptides) < 0 or
            np.max(mapping_peptides) > n_peptides - 1):
        raise ValueError('Peptide to protein mapping (mapping_peptides)'
                         ' is not valid')

    # Compute tabulations that are invariant across iterations

    # Total number of observed states
    n_obs_states = np.size(intensities_obs)

    # Tabulate peptides per protein
    n_peptides_per_protein = np.bincount(mapping_peptides)
    peptides_obs = np.unique(mapping_states_obs)
    n_obs_peptides_per_protein = np.bincount(mapping_peptides[peptides_obs],
                                             minlength=n_proteins)

    # Tabulate number of observed states per peptide
    n_obs_states_per_peptide = np.bincount(mapping_states_obs,
                                           minlength=n_peptides)

    # Sum observed intensities per peptide
    total_intensity_obs_per_peptide = np.bincount(mapping_states_obs,
                                                  weights=intensities_obs,
                                                  minlength=n_peptides)

    # Allocate data structures for draws

    # Data structures for supervised algorithm
    if supervised:
        beta_draws = np.empty((n_iterations, 2))
        concentration_draws = np.empty((n_iterations, n_proteins))
        mean_concentration_draws = np.zeros((n_iterations))
        prec_concentration_draws = np.zeros((n_iterations))

    # Peptide- and protein-level means
    gamma_draws = np.empty((n_iterations, n_peptides))
    mu_draws = np.empty((n_iterations, n_proteins))

    # Number of censored states per peptide
    n_cen_states_per_peptide_draws = np.zeros((n_iterations, n_peptides),
                                              dtype=np.integer)

    # State- and peptide-level variances
    sigmasq_draws = np.empty((n_iterations, n_proteins))
    tausq_draws = np.empty((n_iterations, n_proteins))

    # Hyperparameters for state-level variance model
    shape_sigmasq = np.empty(n_iterations)
    rate_sigmasq = np.empty(n_iterations)

    # Hyperparameters for peptide-level variance model
    shape_tausq = np.empty(n_iterations)
    rate_tausq = np.empty(n_iterations)

    # Censoring probability model parameters
    eta_draws = np.zeros((n_iterations, 2 + n_peptide_features * 2))
    p_rnd_cen = np.empty(n_iterations)

    # Number of states model parameters
    r = np.empty(n_iterations)
    lmbda = np.empty(n_iterations)

    # Compute initial values for MCMC iterations

    # p_rnd_cen from cfg
    p_rnd_cen[0] = cfg['init']['p_rnd_cen']

    # eta from cfg; bivariate normal draw
    eta0 = cfg['init']['eta']
    eta_draws[0, 0] = eta0['mean'][0] + eta0['sd'][0] * np.random.randn(1)
    eta_draws[0, 1] = eta0['mean'][1]
    if eta0['sd'][1] > 0:
        eta_draws[0, 1] += (eta0['cor'] * eta0['sd'][1] / eta0['sd'][0] *
                           (eta_draws[0, 0] - eta0['mean'][0]))
        eta_draws[0, 1] += (np.sqrt(1. - eta0['cor'] ** 2) * eta0['sd'][1] *
                            np.random.randn(1))

    # Number of states parameters from MAP estimator based on number of observed
    # peptides; very crude, but not altogether terrible. Note that this ignores
    # the +1 location shift in the actual n_states distribution.
    kwargs = {'x': n_obs_states_per_peptide[n_obs_states_per_peptide > 0] - 1,
              'transform': True}
    kwargs.update(cfg['priors']['n_states_dist'])
    r[0], lmbda[0] = lib.map_estimator_nbinom(**kwargs)
    lmbda[0] = 1. - lmbda[0]

    # Hyperparameters for state- and peptide-level variance distributions
    # directly from cfg
    shape_sigmasq[0], rate_sigmasq[0] = (
        cfg['init']['sigmasq_dist']['shape'],
        cfg['init']['sigmasq_dist']['rate'])
    shape_tausq[0], rate_tausq[0] = (cfg['init']['tausq_dist']['shape'],
                                     cfg['init']['tausq_dist']['rate'])

    # State- and peptide-level variances via inverse-gamma draws
    sigmasq_draws[0] = 1. / np.random.gamma(shape=shape_sigmasq[0],
                                            scale=1. / rate_sigmasq[0],
                                            size=n_proteins)
    tausq_draws[0] = 1. / np.random.gamma(shape=shape_tausq[0],
                                          scale=1. / rate_tausq[0],
                                          size=n_proteins)

    # Mapping from protein to peptide conditional variances for convenience
    var_peptide_conditional = sigmasq_draws[0, mapping_peptides]

    # Protein-level means using mean observed intensity; excluding missing
    # peptides
    mu_draws[0] = (np.bincount(mapping_peptides,
                               total_intensity_obs_per_peptide /
                               np.maximum(1, n_obs_states_per_peptide)) /
                   n_obs_peptides_per_protein)
    mu_draws[0, n_obs_peptides_per_protein < 1] = np.nanmin(mu_draws[0])

    if supervised:
        # Simple initialization for supervised algorithm
        # Initialize beta from regression of mu against known concentrations
        X = np.ones((known_concentrations.size, 2))
        X[:,1] = known_concentrations
        beta_draws[0] = glm.wls(X=X,
                                y=mu_draws[0, mapping_known_concentrations],
                                w=1.)['b']

        # Adjust known concentrations in mu accordingly
        mu_draws[0, mapping_known_concentrations] = beta_draws[0,0] + \
                beta_draws[0,1] * known_concentrations

        # And, initialize the concentration draws using the updates mu's
        concentration_draws[0] = (mu_draws[0] - beta_draws[0,0]) / \
                beta_draws[0,1]

        if concentration_dist:
            # Initialize hyperparameters on concentration distribution
            mean_concentration_draws[0] = np.mean(concentration_draws[0])
            prec_concentration_draws[0] = 1. / np.var(concentration_draws[0])

    # Peptide-level means using mean observed intensity; imputing missing
    # peptides as protein observed means
    gamma_draws[0] = mu_draws[0, mapping_peptides]
    gamma_draws[0, peptides_obs] = (
        total_intensity_obs_per_peptide[peptides_obs] /
        n_obs_states_per_peptide[peptides_obs])

    # Instantiate GLM family for eta step
    try:
        glm_link_name = cfg["priors"]["glm_link"].title()
    except:
        print >> sys.stderr, "GLM link not specified; defaulting to logit"
        glm_link_name = "Logit"
    glm_link = getattr(glm.links, glm_link_name)
    glm_family = glm.families.Binomial(link=glm_link)

    # Setup function for prior log density on eta, if requested
    try:
        prior_scale = cfg["priors"]["eta"]["prior_scale"]
        prior_center = cfg["priors"]["eta"]["prior_center"]
    except:
        prior_scale = None
        prior_center = None

    if prior_scale is not None:
        # Gelman's weakly-informative prior (2008)
        def dprior_eta(eta, prior_scale=5., prior_center=0.):
            return -np.log(1. + ((eta[1] - prior_center) / prior_scale)**2)

        prior_eta_kwargs = {'prior_scale': prior_scale,
                            'prior_center': prior_center}
    else:
        dprior_eta = None
        prior_eta_kwargs = {}

    # Initialize dictionary for acceptance statistics
    accept_stats = {'sigmasq_dist': 0,
                    'tausq_dist': 0,
                    'n_states_dist': 0,
                    'eta': 0}

    # Master loop for MCMC iterations
    for t in xrange(1, n_iterations):
        # (1) Draw missing data (n_cen and censored state intensities) given all
        #   other parameters. Exact draw via rejection samplers.

        # (1a) Obtain p_int_cen per peptide and approximatations of censored
        #   intensity posteriors.
        eta_0_effective = eta_draws[t - 1, 0]
        eta_1_effective = eta_draws[t - 1, 1]
        if n_peptide_features > 0:
            eta_0_effective += np.dot(
                peptide_features, eta_draws[t - 1, 2:(2 + n_peptide_features)]
            )
            eta_1_effective += np.dot(
                peptide_features, eta_draws[t - 1, (2 + n_peptide_features):]
            )
            
        kwargs = {'eta_0': eta_0_effective,
                  'eta_1': eta_1_effective,
                  'mu': gamma_draws[t - 1],
                  'sigmasq': var_peptide_conditional,
                  'glm_link_name': glm_link_name}
        cen_dist = lib.characterize_censored_intensity_dist(**kwargs)

        # (1b) Draw number of censored states per peptide
        n_cen_states_per_peptide = lib.rncen(n_obs=n_obs_states_per_peptide,
                                             p_rnd_cen=p_rnd_cen[t - 1],
                                             p_int_cen=cen_dist['p_int_cen'],
                                             lmbda=lmbda[t - 1], r=r[t - 1])
        n_cen_states_per_peptide_draws[t] = n_cen_states_per_peptide
        # Update state-level counts
        n_states_per_peptide = (n_obs_states_per_peptide +
                                n_cen_states_per_peptide)
        n_states_per_protein = np.bincount(mapping_peptides,
                                           weights=n_states_per_peptide)
        n_states = np.sum(n_states_per_peptide)

        # (1c) Draw censored intensities
        kwargs['n_cen'] = n_cen_states_per_peptide
        kwargs['p_rnd_cen'] = p_rnd_cen[t - 1]
        kwargs['propDf'] = prop_df_y_mis
        kwargs.update(cen_dist)
        intensities_cen, mapping_states_cen, W = lib.rintensities_cen(**kwargs)

        # (2) Update random censoring probability. Gibbs step.
        p_rnd_cen[t] = updates.rgibbs_p_rnd_cen(n_rnd_cen=np.sum(W),
                                                n_states=n_states,
                                                **cfg['priors']['p_rnd_cen'])

        # Sum observed intensities per peptide
        total_intensity_cen_per_peptide = np.bincount(mapping_states_cen,
                                                      weights=intensities_cen,
                                                      minlength=n_peptides)

        # Compute mean intensities per peptide
        mean_intensity_per_peptide = ((total_intensity_obs_per_peptide +
                                       total_intensity_cen_per_peptide) /
                                      n_states_per_peptide)

        # (3) Update peptide-level mean parameters (gamma). Gibbs step.
        gamma_draws[t] = updates.rgibbs_gamma(
            mu=mu_draws[t - 1, mapping_peptides],
            tausq=tausq_draws[t - 1, mapping_peptides],
            sigmasq=var_peptide_conditional,
            y_bar=mean_intensity_per_peptide,
            n_states=n_states_per_peptide)
        mean_gamma_by_protein = np.bincount(mapping_peptides,
                                            weights=gamma_draws[t])
        mean_gamma_by_protein /= n_peptides_per_protein

        # (4) Update protein-level concentrations
        if supervised:
            if concentration_dist:
                # (4a) Update coefficients given concentrations. Gibbs step.
                # Only yields sane answers if modeling distribution of
                # concentrations.
                beta_draws[t] = updates.rgibbs_beta(
                    concentrations=concentration_draws[t-1],
                    gamma_bar=mean_gamma_by_protein, tausq=tausq_draws[t - 1],
                    n_peptides=n_peptides_per_protein,
                    **cfg['priors']['beta_concentration'])
            else:
                # (4a) Update coefficients given concentrations. Gibbs step.
                # Rao-Blackwellized version, implicitly scaling prior on
                # $\beta_1$ by $|\beta_1|^{n_{mis}}
                beta_draws[t] = updates.rgibbs_beta(
                    concentrations=known_concentrations,
                    gamma_bar=mean_gamma_by_protein[
                        mapping_known_concentrations],
                    tausq=tausq_draws[t - 1, mapping_known_concentrations],
                    n_peptides=n_peptides_per_protein[
                        mapping_known_concentrations],
                    **cfg['priors']['beta_concentration'])

            # (4b) Update concentrations given coefficients. Gibbs step.
            concentration_draws[t] = updates.rgibbs_concentration(
                gamma_bar=mean_gamma_by_protein, tausq=tausq_draws[t - 1],
                n_peptides=n_peptides_per_protein, beta=beta_draws[t],
                mean_concentration=mean_concentration_draws[t-1],
                prec_concentration=prec_concentration_draws[t-1])
            concentration_draws[t, mapping_known_concentrations] = \
                    known_concentrations

            if concentration_dist:
                # (4c) Update concentration distribution hyperparameters
                mean_concentration_draws[t] = np.random.normal(
                    loc=np.mean(concentration_draws[t]),
                    scale=np.sqrt(1. / prec_concentration_draws[t-1] /
                                  n_proteins), size=1)
                prec_concentration_draws[t] = 1. / updates.rgibbs_variances(
                    rss=np.sum((concentration_draws[t] -
                                mean_concentration_draws[t])**2),
                    n=n_proteins,
                    **cfg['priors']['prec_concentration'])

            # Set mu based on concentrations and betas
            mu_draws[t] = \
                    beta_draws[t,0] + beta_draws[t,1] * concentration_draws[t]
        else:
            # (4) Update protein-level mean parameters (mu). Gibbs step.
            mu_draws[t] = updates.rgibbs_mu(gamma_bar=mean_gamma_by_protein,
                                            tausq=tausq_draws[t - 1],
                                            n_peptides=n_peptides_per_protein,
                                            **cfg['priors']['mu'])

        # (5) Update state-level variance parameters (sigmasq). Gibbs step.
        rss_by_state = (
            intensities_obs - gamma_draws[t, mapping_states_obs]) ** 2
        rss_by_protein = np.bincount(mapping_peptides[mapping_states_obs],
                                     weights=rss_by_state,
                                     minlength=n_proteins)
        rss_by_state = (
            intensities_cen - gamma_draws[t, mapping_states_cen]) ** 2
        rss_by_protein += np.bincount(mapping_peptides[mapping_states_cen],
                                      weights=rss_by_state,
                                      minlength=n_proteins)
        sigmasq_draws[t] = updates.rgibbs_variances(
            rss=rss_by_protein, n=n_states_per_protein,
            prior_shape=shape_sigmasq[ t - 1], prior_rate=rate_sigmasq[t - 1])

        # Mapping from protein to peptide conditional variances for convenience
        var_peptide_conditional = sigmasq_draws[t, mapping_peptides]

        # (6) Update peptide-level variance parameters (tausq). Gibbs step.
        rss_by_peptide = (gamma_draws[t] - mu_draws[t, mapping_peptides]) ** 2
        rss_by_protein = np.bincount(mapping_peptides, weights=rss_by_peptide)
        tausq_draws[t] = updates.rgibbs_variances(
            rss=rss_by_protein, n=n_peptides_per_protein,
            prior_shape=shape_tausq[ t - 1], prior_rate=rate_tausq[t - 1])

        # (7) Update state-level variance hyperparameters (sigmasq
        #   distribution). Conditional independence-chain MH step.
        result = updates.rmh_variance_hyperparams(
            variances=sigmasq_draws[t], shape_prev=shape_sigmasq[ t - 1],
            rate_prev=rate_sigmasq[ t - 1], **cfg['priors']['sigmasq_dist'])
        (shape_sigmasq[t], rate_sigmasq[t]), accept = result
        accept_stats['sigmasq_dist'] += accept

        # (8) Update peptide-level variance hyperparameters (tausq
        #   distribution). Conditional independence-chain MH step.
        result = updates.rmh_variance_hyperparams(
            variances=tausq_draws[t], shape_prev=shape_tausq[ t - 1],
            rate_prev=rate_tausq[t - 1], **cfg['priors']['tausq_dist'])
        (shape_tausq[t], rate_tausq[t]), accept = result
        accept_stats['tausq_dist'] += accept

        # (9) Update parameter for negative-binomial n_states distribution (r
        #   and lmbda). Conditional independence-chain MH step.
        result = updates.rmh_nbinom_hyperparams(
            x=n_states_per_peptide - 1,
            r_prev=r[ t - 1], p_prev=1. - lmbda[t - 1],
            **cfg['priors']['n_states_dist'])
        (r[t], lmbda[t]), accept = result
        lmbda[t] = 1. - lmbda[t]
        accept_stats['n_states_dist'] += accept

        # (10) Update coefficients of intensity-based probabilistic censoring
        #   model (eta). Conditional independence-chain MH step.
        # (10a) Build design matrix and response. Only using observed and
        # intensity-censored states.
        n_at_risk = n_obs_states + np.sum(W < 1)
        X = np.zeros((n_at_risk + n_peptide_features * 2,
                     2 + n_peptide_features * 2))
        X[:n_at_risk, 0] = 1.
        X[:n_at_risk, 1] = np.r_[intensities_obs, intensities_cen[W < 1]]
        if n_peptide_features > 0:
            peptide_features_by_state = peptide_features[
                np.r_[mapping_states_obs, mapping_states_cen[W < 1]]
            ]
            X[:n_at_risk, 2:(2 + n_peptide_features)] = \
                peptide_features_by_state
            X[:n_at_risk, (2 + n_peptide_features):] = \
                (peptide_features_by_state.T * X[:n_at_risk, 1]).T
            X[n_at_risk:, 2:] = np.eye(n_peptide_features * 2)
        
        y = np.zeros(n_at_risk + n_peptide_features * 2)
        y[:n_obs_states] = 1.
        if n_peptide_features > 0:
            y[n_at_risk:] = 0.5
        
        w = np.ones_like(y)
        if n_peptide_features > 0:
            w[n_at_risk:(n_at_risk + n_peptide_features)] = \
                cfg['priors']['eta_features']['primary_pseudoobs']
            w[(n_at_risk + n_peptide_features):] = \
                cfg['priors']['eta_features']['interaction_pseudoobs']

        # (10b) Estimate GLM parameters.
        fit_eta = glm.glm(y=y, X=X, w=w, family=glm_family, info=True)
        
        if np.all(np.isfinite(fit_eta['b_hat'])):
            # (10c) Execute MH step.
            eta_draws[t], accept = glm.mh_update_glm_coef(
                b_prev=eta_draws[t - 1], y=y, X=X, family=glm_family,
                propDf=prop_df_eta, prior_log_density=dprior_eta,
                prior_kwargs=prior_eta_kwargs, **fit_eta)
            accept_stats['eta'] += accept
        else:
            eta_draws[t] = eta_draws[t-1]

        if (cfg['settings']['verbose'] > 0 and
                t % cfg['settings']['verbose_interval'] == 0):
            print >> sys.stderr, 'Iteration %d complete' % t

    # Build dictionary of draws to return
    draws = {'mu': mu_draws,
             'gamma': gamma_draws,
             'eta': eta_draws,
             'p_rnd_cen': p_rnd_cen,
             'lmbda': lmbda,
             'r': r,
             'sigmasq': sigmasq_draws,
             'tausq': tausq_draws,
             'n_cen_states_per_peptide': n_cen_states_per_peptide_draws,
             'shape_tausq': shape_tausq,
             'rate_tausq': rate_tausq,
             'shape_sigmasq': shape_sigmasq,
             'rate_sigmasq': rate_sigmasq}
    
    # Add additional information for supervised algorithm
    if supervised:
        draws.update({
            'beta': beta_draws,
            'concentration': concentration_draws})
        if concentration_dist:
            draws.update({
                'mean_concentration': mean_concentration_draws,
                'var_concentration': 1. / prec_concentration_draws})

    return (draws, accept_stats)
# iii. Comparision of the two functions (single voxel response) #
#################################################################
##############                                     ##############


############################################
# a. Pick a good voxel to compare against  #
############################################



from glm import glm
from Image_Visualizing import present_3d


beta_np,X_np=glm(data,conv_np)
# beta_2,X_2=glm(data,conv_2) not correct shape
beta_3,X_3=glm(data,conv_3)
beta_4,X_4=glm(data,conv_4_30)
#beta_5,X_5=glm(data,conv_5)


# non-np are stronger/more clear
plt.imshow(present_3d(beta_np[...,1]),cmap="gray",interpolation="nearest")
plt.imshow(present_3d(beta_3[...,1]),cmap="gray",interpolation="nearest")
plt.imshow(present_3d(beta_4[...,1]),cmap="gray",interpolation="nearest")
#plt.imshow(present_3d(beta_5[...,1]),cmap="gray",interpolation="nearest")


plt.imshow(beta_4[...,2,1],cmap="gray",interpolation="nearest")
plt.colorbar()
Exemplo n.º 5
0
cond_all = sorted(cond_all, key=lambda x: x[0])
np.savetxt(condition_location + "cond_all.txt", cond_all)

neural_prediction = events2neural(condition_location + "cond_all.txt", TR,
                                  n_vols)
convolved = np.convolve(neural_prediction,
                        hrf_at_trs)  # hrf_at_trs sample data
N = len(neural_prediction)  # N == n_vols == 173
M = len(hrf_at_trs)  # M == 12
np_hrf = convolved[:N]

###################
# From GLM function
###################

np_B, np_X = glm(data, np_hrf)

####################################
# GLM Diagnostics (to get residuals)
###################################

np_MRSS, np_fitted, np_residuals = glm_diagnostics(np_B, np_X, data)

###########################
#Shapiro-Wilks on Residuals
###########################
#Shapiro-Wilks: tests the null hypothesis that the data was
#drawn from a normal distribution.

sw_pvals = check_sw(np_residuals)
print(np.mean(sw_pvals > 0.05))
Exemplo n.º 6
0
print("********")

##############                                     ##############
#################################################################
# iii. Comparision of the two functions (single voxel response) #
#################################################################
##############                                     ##############

############################################
# a. Pick a good voxel to compare against  #
############################################

from glm import glm
from Image_Visualizing import present_3d

beta_np, X_np = glm(data, conv_np)
# beta_2,X_2=glm(data,conv_2) not correct shape
beta_3, X_3 = glm(data, conv_3)
beta_4, X_4 = glm(data, conv_4_30)
#beta_5,X_5=glm(data,conv_5)

# non-np are stronger/more clear
plt.imshow(present_3d(beta_np[..., 1]), cmap="gray", interpolation="nearest")
plt.imshow(present_3d(beta_3[..., 1]), cmap="gray", interpolation="nearest")
plt.imshow(present_3d(beta_4[..., 1]), cmap="gray", interpolation="nearest")
#plt.imshow(present_3d(beta_5[...,1]),cmap="gray",interpolation="nearest")

plt.imshow(beta_4[..., 2, 1], cmap="gray", interpolation="nearest")
plt.colorbar()
plt.close()
Exemplo n.º 7
0
np_hrf=convolved[:N]


#############################
#############################
# Analysis and diagonistics #
#############################
#############################

#######################
# a. (my) convolution #
#######################

# Now get the estimated coefficients and design matrix for doing
# regression on the convolved time course. 
B_my, X_my = glm(data, my_hrf)

# Some diagnostics. 
MRSS_my, fitted_my, residuals_my = glm_diagnostics(B_my, X_my, data)

# Print out the mean MRSS.
print("MRSS using 'my' convolution function: "+str(np.mean(MRSS_my)))

# Plot the time course for a single voxel with the fitted values. 
# Looks pretty bad. 
plt.plot(data[41, 47, 2]) #change from cherry-picking 
plt.plot(fitted_my[41, 47, 2])
plt.savefig(location_of_images+"glm_plot_my.png")
plt.close()

Exemplo n.º 8
0
M = len(hrf_at_trs)  # M == 12
np_hrf = convolved[:N]

#############################
#############################
# Analysis and diagonistics #
#############################
#############################

#######################
# a. (my) convolution #
#######################

# Now get the estimated coefficients and design matrix for doing
# regression on the convolved time course.
B_my, X_my = glm(data, my_hrf)

# Some diagnostics.
MRSS_my, fitted_my, residuals_my = glm_diagnostics(B_my, X_my, data)

# Print out the mean MRSS.
print("MRSS using 'my' convolution function: " + str(np.mean(MRSS_my)))

# Plot the time course for a single voxel with the fitted values.
# Looks pretty bad.
plt.plot(data[41, 47, 2])  #change from cherry-picking
plt.plot(fitted_my[41, 47, 2])
plt.savefig(location_of_images + "glm_plot_my.png")
plt.close()

##################
Exemplo n.º 9
0
def t_stat(data_4d, convolved, c = [0,1]):
    """
    Return four values, the estimated beta, t-value, 
    degrees of freedom, and p-value for the given t-value
    
    Parameters
    ----------
    data_4d: numpy array of 4 dimensions 
        The image data of one subject
    convolved: numpy array of 1 dimension
        The convolved time course
    c: numpy array of 1 dimension
        The contrast vector fo the weights of the beta vector. 
        Default is [0,1] which corresponds to beta_1

    Note that the fourth dimension of `data_4d` (time or the number 
    of volumes) must be the same as the length of `convolved`. 
    
    Returns
    -------
    beta: estimated beta values
    
    t: numpy array of 1 dimension
        t-value of the betas
    
    df: int
        degrees of freedom
    
    p: numpy array of 1 dimension
        p-value corresponding to the t-value and degrees of freedom
    """

    # Make sure y, X, c are all arrays
    beta, X = glm(data_4d, convolved)
    c = np.atleast_2d(c).T  # As column vector
    # Calculate the parameters - b hat
    beta = np.reshape(beta, (-1, beta.shape[-1])).T

    fitted = X.dot(beta)
    # Residual error
    y = np.reshape(data_4d, (-1, data_4d.shape[-1]))
    errors = y.T - fitted
    # Residual sum of squares
    RSS = (errors**2).sum(axis=0)
 
    df = X.shape[0] - npl.matrix_rank(X)
    # Mean residual sum of squares
    MRSS = RSS / df
    # calculate bottom half of t statistic
    
    SE = np.sqrt(MRSS * c.T.dot(npl.pinv(X.T.dot(X)).dot(c)))
    zeros = np.where(SE==0)
    SE[zeros] = 1
    t = c.T.dot(beta) / SE

    t[:,zeros] =0
    # Get p value for t value using cumulative density dunction
    # (CDF) of t distribution
    ltp = t_dist.cdf(abs(t), df) # lower tail p
    p = 1 - ltp # upper tail p
    
    return beta, t, df, p
Exemplo n.º 10
0
# iii. Comparision of the two functions (single voxel response) #
#################################################################
##############                                     ##############

############################################
# a. Pick a good voxel to compare against  #
############################################

# Remember the names of the of the two different methods
# my convolution: all_stimuli_convolution_best_length
# np.convolve:  convolve_np

from glm import glm
from Image_Visualizing import present_3d

beta_my, X_my = glm(data, all_stimuli_convolution_best_length)
beta_np, X_np = glm(data, convolve_np)

plt.imshow(present_3d(beta_my[..., 1]), cmap="gray", interpolation="nearest")
plt.imshow(present_3d(beta_np[..., 1]), cmap="gray", interpolation="nearest")

plt.imshow(beta_my[..., 2, 1], cmap="gray", interpolation="nearest")
plt.colorbar()
plt.close()

# From visual analysis
# In the regression has a really high beta_1 value at:
# beta_my[41,47,2,1] (voxel data[41,47,2] )
# lets use the comparisons (I know that is not good practice to check created X based on betas based on X)

###########################################
Exemplo n.º 11
0
def worker(comm, rank, data, cfg):
    '''
    Worker-node process for parallel MCMC sampler.
    Receives parameters and commands from master node.
    Runs local updates and distributed components of shared draws.

    Parameters
    ----------
        - comm : mpi4py.MPI.COMM
            Initialized MPI communicator.
        - rank : int
            Rank (>= MPIROOT) of worker.
        - data : dictionary
            Data as output from load_data with rank > 0.
        - init : dictionary
            Initial parameter values as output from initialize.
        - cfg : dictionary
            Configuration dictionary containing priors, settings, and paths for
            analysis. Its format is specified in detail in separate
            documentation.

    Returns
    -------
        - draws : dictionary
            1- and 2-dimensional ndarrays containing the posterior samples for
            each protein- and ppeptide-specific parameter. Shared parameters are
            handled by the master process.
        - mapping_peptides : integer ndarray
            Worker-specific peptide to protein mapping provided in data.
        - proteins_worker : array_like, 1 dimension, nonnegative ints
            A 1d integer array of length n_proteins containing the indices (in
            the original dataset) of the proteins assigned to the given worker.
        - peptides_worker : array_like, 1 dimension, nonnegative ints
            A 1d integer array of length n_peptides containing the indices (in
            the original dataset) of the peptides assigned to the given worker.
    '''
    # Determine whether algorithm is running with supervision
    try:
        supervised = cfg['priors']['supervised']
    except KeyError:
        print >> sys.stderr, 'Defaulting to unsupervised algorithm'
        supervised = False

    # If supervised, determine whether to model distribution of concentrations
    # If this is False, prior on $\beta_1$ is scaled by $|\beta_1|^{n_{mis}}$.
    if supervised:
        try:
            concentration_dist = cfg['priors']['concentration_dist']
        except KeyError:
            print >> sys.stderr, 'Defaulting to flat prior on concentrations'
            concentration_dist = False

    # Get information on peptide features if they're available
    have_peptide_features = cfg['priors'].has_key('path_peptide_features')
    if have_peptide_features:
        n_peptide_features = data['peptide_features_worker'].shape[1]
    else:
        n_peptide_features = 0

    # Extract proposal DFs
    try:
        prop_df_y_mis = cfg['settings']['prop_df_y_mis']
    except KeyError:
        prop_df_y_mis = 5.0

    # Create references to relevant data entries in local namespace
    mapping_peptides = data['mapping_peptides']
    intensities_obs = data['intensities_obs']
    mapping_states_obs = data['mapping_states_obs']
    # Data specific to the semi-supervised algorithm
    if supervised:
        known_concentrations = data['known_concentrations']
        mapping_known_concentrations = data['mapping_known_concentrations']

    # Extract dimensions from input

    # Number of iterations from cfg
    n_iterations = cfg['settings']['n_iterations']

    # Number of peptides and proteins from mapping_peptides
    n_peptides = np.size(mapping_peptides)
    n_proteins = 1 + np.max(mapping_peptides)

    # Compute tabulations that are invariant across iterations

    # Total number of observed states
    n_obs_states = np.size(intensities_obs)

    # Tabulate peptides per protein
    n_peptides_per_protein = np.bincount(mapping_peptides)
    peptides_obs = np.unique(mapping_states_obs)
    n_obs_peptides_per_protein = np.bincount(mapping_peptides[peptides_obs],
                                             minlength=n_proteins)

    # Tabulate number of observed states per peptide
    n_obs_states_per_peptide = np.bincount(mapping_states_obs,
                                           minlength=n_peptides)

    # Sum observed intensities per peptide
    total_intensity_obs_per_peptide = np.bincount(mapping_states_obs,
                                                  weights=intensities_obs,
                                                  minlength=n_peptides)

    # Allocate data structures for draws

    # Peptide- and protein-level means
    gamma_draws = np.empty((n_iterations, n_peptides))
    mu_draws = np.empty((n_iterations, n_proteins))

    # Concentrations, if supervised
    if supervised:
        concentration_draws = np.empty((n_iterations, n_proteins))

    # Number of censored states per peptide
    n_cen_states_per_peptide_draws = np.zeros((n_iterations, n_peptides),
                                              dtype=np.integer)

    # State- and peptide-level variances
    sigmasq_draws = np.empty((n_iterations, n_proteins))
    tausq_draws = np.empty((n_iterations, n_proteins))

    # Instantiate GLM family for eta step
    try:
        glm_link_name = cfg["priors"]["glm_link"].title()
    except KeyError:
        print >> sys.stderr, "GLM link not specified; defaulting to logit"
        glm_link_name = "Logit"
    glm_link = getattr(glm.links, glm_link_name)
    glm_family = glm.families.Binomial(link=glm_link)

    # Setup data structure for shared parameters/hyperparameters sync
    # Layout:
    #   - 0:2 : shape_sigmasq, rate_sigmasq
    #   - 2:4 : shape_tausq, rate_tausq
    #   - 4:6 : r, lmbda
    #   - 6:8 : eta
    #   - 8   : p_rnd_cen
    # If supervised, 4 additional entries are used:
    #   - 9:11: beta
    #   - 11  : mean_concentration
    #   - 12  : prec_concentration
    params_shared = np.empty(9 + 4 * supervised, dtype=np.double)

    # Prepare to receive tasks
    working = True
    status = MPI.Status()
    t = np.array(0)

    # Primary send-receive loop for MCMC iterations
    while working:
        # Receive iteration and task information
        comm.Recv([t, MPI.INT], source=MPIROOT, tag=MPI.ANY_TAG, status=status)
        task = status.Get_tag()

        if task == TAGS['STOP']:
            working = False
        elif task == TAGS['SYNC']:
            # Synchronize shared parameters/hyperparameters
            comm.Bcast(params_shared, root=MPIROOT)

            shape_sigmasq, rate_sigmasq = params_shared[0:2]
            shape_tausq, rate_tausq = params_shared[2:4]
            r, lmbda = params_shared[4:6]
            eta = params_shared[6:8]
            p_rnd_cen = params_shared[8]

            if supervised:
                beta = params_shared[9:11]
                mean_concentration = params_shared[11]
                prec_concentration = params_shared[12]
        elif task == TAGS['INIT']:
            # Compute initial values for MCMC iterations

            # Protein-level means using mean observed intensity; excluding
            # missing peptides
            mu_draws[0] = (
                np.bincount(mapping_peptides, total_intensity_obs_per_peptide /
                            np.maximum(1, n_obs_states_per_peptide)) /
                n_obs_peptides_per_protein)
            mu_draws[0, n_obs_peptides_per_protein < 1] = np.nanmin(mu_draws[0])

            # Peptide-level means using mean observed intensity; imputing
            # missing peptides as protein observed means
            gamma_draws[0] = mu_draws[0, mapping_peptides]
            gamma_draws[0, peptides_obs] = (
                total_intensity_obs_per_peptide[peptides_obs] /
                n_obs_states_per_peptide[peptides_obs]
            )

            # State- and peptide-level variances via inverse-gamma draws
            sigmasq_draws[0] = 1. / np.random.gamma(shape=shape_sigmasq,
                                                    scale=1. / rate_sigmasq,
                                                    size=n_proteins)
            tausq_draws[0] = 1. / np.random.gamma(shape=shape_tausq,
                                                  scale=1. / rate_tausq,
                                                  size=n_proteins)

            # Mapping from protein to peptide conditional variances for
            # convenience
            var_peptide_conditional = sigmasq_draws[0, mapping_peptides]

            # Number of states parameters from local MAP estimator based on
            # number of observed peptides; very crude, but not altogether
            # terrible. Note that this ignores the +1 location shift in the
            # actual n_states distribution.
            kwargs = {
                'x': n_obs_states_per_peptide[n_obs_states_per_peptide > 0] - 1,
                'transform': True}
            kwargs.update(cfg['priors']['n_states_dist'])
            r, lmbda = lib.map_estimator_nbinom(**kwargs)
            lmbda = 1. - lmbda

            # Combine local estimates at master for initialization.
            # Values synchronize at first iteration during SYNC task.
            comm.Reduce([np.array([r, lmbda]), MPI.DOUBLE], None,
                        op=MPI.SUM, root=MPIROOT)

            if supervised:
                # Run Gibbs update on concentration-intensity coefficients using
                # noninformative prior.
                updates_parallel.rgibbs_worker_beta(
                    comm=comm, concentrations=known_concentrations,
                    gamma_bar=mu_draws[0, mapping_known_concentrations],
                    tausq=tausq_draws[0, mapping_known_concentrations],
                    n_peptides=n_peptides_per_protein[
                        mapping_known_concentrations], MPIROOT=MPIROOT)
        elif task == TAGS['LOCAL']:
            # (1) Draw missing data (n_cen and censored state intensities) given
            #   all other parameters. Exact draw via rejection samplers.

            # (1a) Obtain p_int_cen per peptide and approximatations of censored
            #   intensity posteriors.
            eta_0_effective = eta[0]
            eta_1_effective = eta[1]
            if n_peptide_features > 0:
                eta_0_effective += np.dot(data['peptide_features_worker'],
                                          eta[2:(2 + n_peptide_features)])
                eta_1_effective += np.dot(data['peptide_features_worker'],
                                          eta[(2 + n_peptide_features):])

            kwargs = {'eta_0': eta_0_effective,
                      'eta_1': eta_1_effective,
                      'mu': gamma_draws[t - 1],
                      'sigmasq': var_peptide_conditional,
                      'glm_link_name': glm_link_name}
            cen_dist = lib.characterize_censored_intensity_dist(**kwargs)

            # (1b) Draw number of censored states per peptide
            n_cen_states_per_peptide = lib.rncen(
                n_obs=n_obs_states_per_peptide,
                p_rnd_cen=p_rnd_cen,
                p_int_cen=cen_dist[
                    'p_int_cen'],
                lmbda=lmbda, r=r)
            n_cen_states_per_peptide_draws[t] = n_cen_states_per_peptide
            # Update state-level counts
            n_states_per_peptide = (n_obs_states_per_peptide +
                                    n_cen_states_per_peptide)
            n_states_per_protein = np.bincount(mapping_peptides,
                                               weights=n_states_per_peptide)
            n_states = np.sum(n_states_per_peptide)

            # (1c) Draw censored intensities
            kwargs['n_cen'] = n_cen_states_per_peptide
            kwargs['p_rnd_cen'] = p_rnd_cen
            kwargs['propDf'] = prop_df_y_mis
            kwargs.update(cen_dist)
            intensities_cen, mapping_states_cen, W = lib.rintensities_cen(
                **kwargs)

            # Sum observed intensities per peptide
            total_intensity_cen_per_peptide = np.bincount(
                mapping_states_cen, weights=intensities_cen,
                minlength=n_peptides)

            # Compute mean intensities per peptide
            mean_intensity_per_peptide = ((total_intensity_obs_per_peptide +
                                           total_intensity_cen_per_peptide) /
                                          n_states_per_peptide)

            # (2) Update peptide-level mean parameters (gamma). Gibbs step.
            gamma_draws[t] = updates_serial.rgibbs_gamma(
                mu=mu_draws[t - 1, mapping_peptides],
                tausq=tausq_draws[t - 1, mapping_peptides],
                sigmasq=var_peptide_conditional,
                y_bar=mean_intensity_per_peptide, n_states=n_states_per_peptide)
            mean_gamma_by_protein = np.bincount(mapping_peptides,
                                                weights=gamma_draws[t])
            mean_gamma_by_protein /= n_peptides_per_protein

            if supervised:
                # (3) Update concentrations given coefficients. Gibbs step.
                concentration_draws[t] = updates_serial.rgibbs_concentration(
                    gamma_bar=mean_gamma_by_protein, tausq=tausq_draws[t - 1],
                    n_peptides=n_peptides_per_protein, beta=beta,
                    mean_concentration=mean_concentration,
                    prec_concentration=prec_concentration)
                concentration_draws[t, mapping_known_concentrations] = \
                        known_concentrations

                mu_draws[t] = beta[0] + beta[1] * concentration_draws[t]
            else:
                # (3) Update protein-level mean parameters (mu). Gibbs step.
                mu_draws[t] = updates_serial.rgibbs_mu(
                    gamma_bar=mean_gamma_by_protein, tausq=tausq_draws[t - 1],
                    n_peptides=n_peptides_per_protein, **cfg['priors']['mu'])

            # (4) Update state-level variance parameters (sigmasq). Gibbs step.
            rss_by_state = ((intensities_obs -
                             gamma_draws[t, mapping_states_obs]) ** 2)
            rss_by_protein = np.bincount(mapping_peptides[mapping_states_obs],
                                         weights=rss_by_state,
                                         minlength=n_proteins)
            rss_by_state = ((intensities_cen -
                             gamma_draws[t, mapping_states_cen]) ** 2)
            rss_by_protein += np.bincount(mapping_peptides[mapping_states_cen],
                                          weights=rss_by_state,
                                          minlength=n_proteins)
            sigmasq_draws[t] = updates_serial.rgibbs_variances(
                rss=rss_by_protein, n=n_states_per_protein,
                prior_shape=shape_sigmasq, prior_rate=rate_sigmasq)

            # Mapping from protein to peptide conditional variances for
            # convenience
            var_peptide_conditional = sigmasq_draws[t, mapping_peptides]

            # (5) Update peptide-level variance parameters (tausq). Gibbs step.
            rss_by_peptide = (
                gamma_draws[t] - mu_draws[t, mapping_peptides]) ** 2
            rss_by_protein = np.bincount(mapping_peptides,
                                         weights=rss_by_peptide)
            tausq_draws[t] = updates_serial.rgibbs_variances(
                rss=rss_by_protein, n=n_peptides_per_protein,
                prior_shape=shape_tausq, prior_rate=rate_tausq)
        elif task == TAGS['SIGMA']:
            # Run distributed MH step for sigmasq hyperparameters
            updates_parallel.rmh_worker_variance_hyperparams(
                comm=comm, variances=sigmasq_draws[t], MPIROOT=MPIROOT)
        elif task == TAGS['TAU']:
            # Run distributed MH step for sigmasq hyperparameters
            updates_parallel.rmh_worker_variance_hyperparams(
                comm=comm, variances=tausq_draws[t], MPIROOT=MPIROOT)
        elif task == TAGS['NSTATES']:
            # Run distributed MH step for n_states hyperparameters
            updates_parallel.rmh_worker_nbinom_hyperparams(
                comm=comm, x=n_states_per_peptide - 1, r_prev=r,
                p_prev=1. - lmbda, MPIROOT=MPIROOT,
                **cfg['priors']['n_states_dist'])
        elif task == TAGS['ETA']:
            # Run distributed MH step for eta (coefficients in censoring model)

            # Build design matrix and response. Only using observed and
            # intensity-censored states.
            n_at_risk = n_obs_states + np.sum(W < 1)
            X = np.zeros((n_at_risk + n_peptide_features * 2,
                          2 + n_peptide_features * 2))
            X[:n_at_risk, 0] = 1.
            X[:n_at_risk, 1] = np.r_[intensities_obs, intensities_cen[W < 1]]
            if n_peptide_features > 0:
                peptide_features_by_state = data['peptide_features_worker'][
                    np.r_[mapping_states_obs, mapping_states_cen[W < 1]]
                ]
                X[:n_at_risk, 2:(2 + n_peptide_features)] = \
                    peptide_features_by_state
                X[:n_at_risk, (2 + n_peptide_features):] = \
                    (peptide_features_by_state.T * X[:n_at_risk, 1]).T
                X[n_at_risk:, 2:] = np.eye(n_peptide_features * 2)

            y = np.zeros(n_at_risk + n_peptide_features * 2)
            y[:n_obs_states] = 1.
            if n_peptide_features > 0:
                y[n_at_risk:] = 0.5

            w = np.ones_like(y)
            if n_peptide_features > 0:
                w[n_at_risk:(n_at_risk + n_peptide_features)] = (
                    cfg['priors']['eta_features']['primary_pseudoobs'] /
                    (comm.Get_size() - 1.))
                w[(n_at_risk + n_peptide_features):] = (
                    cfg['priors']['eta_features']['interaction_pseudoobs'] /
                    (comm.Get_size() - 1.))

            # Estimate GLM parameters.
            fit_eta = glm.glm(y=y, X=X, w=w, family=glm_family, info=True,
                              cov=True)

            # Handle distributed computation draw
            updates_parallel.rmh_worker_glm_coef(
                comm=comm, b_prev=eta, family=glm_family, y=y, X=X, w=w,
                MPIROOT=MPIROOT, **fit_eta)
        elif task == TAGS['PRNDCEN']:
            # Run distributed Gibbs step for p_rnd_cen
            updates_parallel.rgibbs_worker_p_rnd_cen(
                comm=comm, n_rnd_cen=np.sum(W, dtype=np.int), n_states=n_states,
                MPIROOT=MPIROOT)
        elif task == TAGS['BETA']:
            # Run distributed Gibbs step for coefficients of
            # concentration-intensity relationship
            if concentration_dist:
                updates_parallel.rgibbs_worker_beta(
                    comm=comm, concentrations=concentration_draws[t],
                    gamma_bar=mean_gamma_by_protein,
                    tausq=tausq_draws[t],
                    n_peptides=n_peptides_per_protein, MPIROOT=MPIROOT)
            else:
                updates_parallel.rgibbs_worker_beta(
                    comm=comm, concentrations=known_concentrations,
                    gamma_bar=mean_gamma_by_protein[
                        mapping_known_concentrations],
                    tausq=tausq_draws[t, mapping_known_concentrations],
                    n_peptides=n_peptides_per_protein[
                        mapping_known_concentrations], MPIROOT=MPIROOT)
        elif task == TAGS['CONCENTRATION_DIST']:
            # Run distributed Gibbs step for hyperparameters of concentration
            # distribution
            updates_parallel.rgibbs_worker_concentration_dist(
                comm=comm, concentrations=concentration_draws[t],
                MPIROOT=MPIROOT)
        elif task == TAGS['SAVE']:
            # Construct path for worker-specific results
            path_worker = cfg['output']['pattern_results_worker'] % rank

            # Setup draws to return
            draws = {'mu': mu_draws,
                     'gamma': gamma_draws,
                     'sigmasq': sigmasq_draws,
                     'tausq': tausq_draws,
                     'n_cen_states_per_peptide': n_cen_states_per_peptide_draws,
                    }
            if supervised:
                draws.update({'concentration': concentration_draws})
            lib.write_to_hdf5(
                path=path_worker, compress=cfg['output']['compress'],
                draws=draws, mapping_peptides=data['mapping_peptides'],
                proteins_worker=data['proteins_worker'])

    # Setup draws to return
    draws = {'mu': mu_draws,
             'gamma': gamma_draws,
             'sigmasq': sigmasq_draws,
             'tausq': tausq_draws,
             'n_cen_states_per_peptide': n_cen_states_per_peptide_draws,
            }
    if supervised:
        draws.update({
            'concentration': concentration_draws})

    return (draws, data['mapping_peptides'],
            data['proteins_worker'], data['peptides_worker'])
Exemplo n.º 12
0
def t_stat(data_4d, convolved, c=[0, 1]):
    """
    Return four values, the estimated beta, t-value, 
    degrees of freedom, and p-value for the given t-value
    
    Parameters
    ----------
    data_4d: numpy array of 4 dimensions 
        The image data of one subject
    convolved: numpy array of 1 dimension
        The convolved time course
    c: numpy array of 1 dimension
        The contrast vector fo the weights of the beta vector. 
        Default is [0,1] which corresponds to beta_1

    Note that the fourth dimension of `data_4d` (time or the number 
    of volumes) must be the same as the length of `convolved`. 
    
    Returns
    -------
    beta: estimated beta values
    
    t: numpy array of 1 dimension
        t-value of the betas
    
    df: int
        degrees of freedom
    
    p: numpy array of 1 dimension
        p-value corresponding to the t-value and degrees of freedom
    """

    # Make sure y, X, c are all arrays
    beta, X = glm(data_4d, convolved)
    c = np.atleast_2d(c).T  # As column vector
    # Calculate the parameters - b hat
    beta = np.reshape(beta, (-1, beta.shape[-1])).T

    fitted = X.dot(beta)
    # Residual error
    y = np.reshape(data_4d, (-1, data_4d.shape[-1]))
    errors = y.T - fitted
    # Residual sum of squares
    RSS = (errors**2).sum(axis=0)

    df = X.shape[0] - npl.matrix_rank(X)
    # Mean residual sum of squares
    MRSS = RSS / df
    # calculate bottom half of t statistic

    SE = np.sqrt(MRSS * c.T.dot(npl.pinv(X.T.dot(X)).dot(c)))
    zeros = np.where(SE == 0)
    SE[zeros] = 1
    t = c.T.dot(beta) / SE

    t[:, zeros] = 0
    # Get p value for t value using cumulative density dunction
    # (CDF) of t distribution
    ltp = t_dist.cdf(abs(t), df)  # lower tail p
    p = 1 - ltp  # upper tail p

    return beta, t, df, p
#################################################################
##############                                     ##############


############################################
# a. Pick a good voxel to compare against  #
############################################

# Remember the names of the of the two different methods
# my convolution: all_stimuli_convolution_best_length
# np.convolve:  convolve_np

from glm import glm
from Image_Visualizing import present_3d

beta_my,X_my=glm(data,all_stimuli_convolution_best_length)
beta_np,X_np=glm(data,convolve_np)

plt.imshow(present_3d(beta_my[...,1]),cmap="gray",interpolation="nearest")
plt.imshow(present_3d(beta_np[...,1]),cmap="gray",interpolation="nearest")


plt.imshow(beta_my[...,2,1],cmap="gray",interpolation="nearest")
plt.colorbar()
plt.close()

# From visual analysis
# In the regression has a really high beta_1 value at:
# beta_my[41,47,2,1] (voxel data[41,47,2] )
# lets use the comparisons (I know that is not good practice to check created X based on betas based on X)
Exemplo n.º 14
0
# creating the .txt file for the events2neural function
cond_all=np.row_stack((cond1,cond2,cond3))
cond_all=sorted(cond_all,key= lambda x:x[0])
np.savetxt(condition_location+"cond_all.txt",cond_all)

neural_prediction = events2neural(condition_location+"cond_all.txt",TR,n_vols)
convolved = np.convolve(neural_prediction, hrf_at_trs) # hrf_at_trs sample data
N = len(neural_prediction)  # N == n_vols == 173
M = len(hrf_at_trs)  # M == 12
np_hrf=convolved[:N]

###################
# From GLM function
###################

np_B, np_X = glm(data, np_hrf)


####################################
# GLM Diagnostics (to get residuals)
###################################

np_MRSS, np_fitted, np_residuals = glm_diagnostics(np_B, np_X, data)

###########################
#Shapiro-Wilks on Residuals
###########################
# Shapiro-Wilks: tests the null hypothesis that the data was 
# drawn from a normal distribution.

# Using 4-d residuals.