示例#1
0
def softmax(dist, return_numpy=True):
    """ Computes the softmax function on a set of values

    """
    if utils.is_distribution(dist):
        if dist.IS_AOA:
            output = []
            for i in range(len(dist.values)):
                output[i] = softmax(dist.values[i], return_numpy=True)
            output = utils.to_categorical(np.array(output))
        else:
            dist = np.copy(dist.values)

    output = dist - dist.max(axis=0)
    output = np.exp(output)
    output = output / np.sum(output, axis=0)
    if return_numpy:
        return output
    else:
        return utils.to_categorical(output)
示例#2
0
文件: test.py 项目: ivanmetel/pymdp
def softmax(dist, return_numpy=True):
    """ 
    Computes the softmax function on a set of values
    """

    dist = utils.to_numpy(dist)

    output = []
    if utils.is_arr_of_arr(dist):
        for i in range(len(dist.values)):
            output.append(softmax(dist[i]), return_numpy=True)

    output = dist - dist.max(axis=0)
    output = np.exp(output)
    output = output / np.sum(output, axis=0)
    if return_numpy:
        return output
    else:
        return utils.to_categorical(output)
示例#3
0
def update_posterior_states(A,
                            obs,
                            prior=None,
                            return_numpy=True,
                            method=FPI,
                            **kwargs):
    """ 
    Update marginal posterior over hidden states using variational inference
        Can optionally set message passing algorithm used for inference
    
    Parameters
    ----------
    - 'A' [numpy nd.array (matrix or tensor or array-of-arrays) or Categorical]:
        Observation likelihood of the generative model, mapping from hidden states to observations
        Used to invert generative model to obtain marginal likelihood over hidden states, 
        given the observation
    - 'obs' [numpy 1D array, array of arrays (with 1D numpy array entries), int or tuple]:
        The observation (generated by the environment). If single modality, this can be a 1D array 
        (one-hot vector representation) or an int (observation index)
        If multi-modality, this can be an array of arrays (whose entries are 1D one-hot vectors) 
        or a tuple (of observation indices)
    - 'prior' [numpy 1D array, array of arrays (with 1D numpy array entries), Categorical, or None]:
        Prior beliefs about hidden states, to be integrated with the marginal likelihood to obtain
         a posterior distribution. 
        If None, prior is set to be equal to a flat categorical distribution (at the level of 
        the individual inference functions).
        (optional)
    - 'return_numpy' [bool]:
        True/False flag to determine whether the posterior is returned as a numpy array or a Categorical
    - 'method' [str]:
        Algorithm used to perform the variational inference. 
        Options: 'FPI' - Fixed point iteration 
                    - http://www.cs.cmu.edu/~guestrin/Class/10708/recitations/r9/VI-view.pdf, 
                    slides 13- 18
                    - http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.137.221&rep=rep1&type=pdf, 
                    slides 24 - 38
                 'VMP  - Variational message passing (not implemented)
                 'MMP' - Marginal message passing (not implemented)
                 'BP'  - Belief propagation (not implemented)
                 'EP'  - Expectation propagation (not implemented)
                 'CV'  - CLuster variation method (not implemented)
    - **kwargs: 
        List of keyword/parameter arguments corresponding to parameter values for the respective 
        variational inference algorithm

    Returns
    ----------
    - 'qs' [numpy 1D array, array of arrays (with 1D numpy array entries), or Categorical]:
        Marginal posterior beliefs over hidden states 
    """

    # safe convert to numpy
    A = utils.to_numpy(A)

    # collect model dimensions
    if utils.is_arr_of_arr(A):
        n_factors = A[0].ndim - 1
        n_states = list(A[0].shape[1:])
        n_modalities = len(A)
        n_observations = []
        for m in range(n_modalities):
            n_observations.append(A[m].shape[0])
    else:
        n_factors = A.ndim - 1
        n_states = list(A.shape[1:])
        n_modalities = 1
        n_observations = [A.shape[0]]

    obs = process_observations(obs, n_modalities, n_observations)
    if prior is not None:
        prior = process_priors(prior, n_factors)

    if method is FPI:
        qs = run_fpi(A, obs, n_observations, n_states, prior, **kwargs)
    elif method is VMP:
        raise NotImplementedError(f"{VMP} is not implemented")
    elif method is MMP:
        raise NotImplementedError(f"{MMP} is not implemented")
    elif method is BP:
        raise NotImplementedError(f"{BP} is not implemented")
    elif method is EP:
        raise NotImplementedError(f"{EP} is not implemented")
    elif method is CV:
        raise NotImplementedError(f"{CV} is not implemented")
    else:
        raise ValueError(f"{method} is not implemented")

    if return_numpy:
        return qs
    else:
        return utils.to_categorical(qs)
示例#4
0
def get_expected_obs(qs_pi, A, return_numpy=False):
    """
    Given a posterior predictive density Qs_pi and an observation likelihood model A,
    get the expected observations given the predictive posterior.

    Parameters
    ----------
    qs_pi [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), Categorical 
    (either single-factor or AoA), or list]:
        Posterior predictive density over hidden states. If a list, each entry of the list is the 
        posterior predictive for a given timepoint of an expected trajectory
    A [numpy nd-array, array-of-arrays (where each entry is a numpy nd-array), or Categorical 
    (either single-factor of AoA)]:
        Observation likelihood mapping from hidden states to observations, with different modalities 
        (if there are multiple) stored in different arrays
    return_numpy [Boolean]:
        True/False flag to determine whether output of function is a numpy array or a Categorical
    Returns
    -------
    qo_pi [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), Categorical 
    (either single-factor or AoA), or list]:
        Expected observations under the given policy. If a list, a list of the expected observations 
        over the time horizon of policy evaluation, where
        each entry is the expected observations at a given timestep. 
    """

    # initialise expected observations
    qo_pi = []
    A = utils.to_numpy(A)

    if isinstance(qs_pi, list):
        n_steps = len(qs_pi)
        for t in range(n_steps):
            qs_pi[t] = utils.to_numpy(qs_pi[t], flatten=True)
    else:
        n_steps = 1
        qs_pi = [utils.to_numpy(qs_pi, flatten=True)]

    if utils.is_arr_of_arr(A):

        num_modalities = len(A)

        for t in range(n_steps):
            qo_pi_t = np.empty(num_modalities, dtype=object)
            qo_pi.append(qo_pi_t)

        # get expected observations over time
        for t in range(n_steps):
            for modality in range(num_modalities):
                qo_pi[t][modality] = spm_dot(A[modality], qs_pi[t])

    else:
        # get expected observations over time
        for t in range(n_steps):
            qo_pi.append(spm_dot(A, qs_pi[t]))

    if return_numpy:
        if n_steps == 1:
            return qo_pi[0]
        else:
            return qo_pi
    else:
        if n_steps == 1:
            return utils.to_categorical(qo_pi[0])
        else:
            for t in range(n_steps):
                qo_pi[t] = utils.to_categorical(qo_pi[t])
            return qo_pi
示例#5
0
def update_posterior_policies(
    qs,
    A,
    B,
    C,
    policies,
    use_utility=True,
    use_states_info_gain=True,
    use_param_info_gain=False,
    pA=None,
    pB=None,
    gamma=16.0,
    return_numpy=True,
):
    """ Updates the posterior beliefs about policies based on expected free energy prior

        @TODO: Needs to be amended for use with multi-step policies (where possible_policies is a 
        list of np.arrays (n_step x n_factor), not just a list of tuples as it is now)

        Parameters
        ----------
        - `qs` [1D numpy array, array-of-arrays, or Categorical (either single- or multi-factor)]:
            Current marginal beliefs about hidden state factors
        - `A` [numpy ndarray, array-of-arrays (in case of multiple modalities), or Categorical 
                (both single and multi-modality)]:
            Observation likelihood model (beliefs about the likelihood mapping entertained by the agent)
        - `B` [numpy ndarray, array-of-arrays (in case of multiple hidden state factors), or Categorical 
                (both single and multi-factor)]:
                Transition likelihood model (beliefs about the likelihood mapping entertained by the agent)
        - `C` [numpy 1D-array, array-of-arrays (in case of multiple modalities), or Categorical 
                (both single and multi-modality)]:
            Prior beliefs about outcomes (prior preferences)
        - `policies` [list of tuples]:
            A list of all the possible policies, each expressed as a tuple of indices, where a given 
            index corresponds to an action on a particular hidden state factor e.g. policies[1][2] yields the 
            index of the action under policy 1 that affects hidden state factor 2
        - `use_utility` [bool]:
            Whether to calculate utility term, i.e how much expected observation confer with prior expectations
        - `use_states_info_gain` [bool]:
            Whether to calculate state information gain
        - `use_param_info_gain` [bool]:
            Whether to calculate parameter information gain @NOTE requires pA or pB to be specified 
        - `pA` [numpy ndarray, array-of-arrays (in case of multiple modalities), or Dirichlet 
                (both single and multi-modality)]:
            Prior dirichlet parameters for A. Defaults to none, in which case info gain w.r.t. Dirichlet 
            parameters over A is skipped.
        - `pB` [numpy ndarray, array-of-arrays (in case of multiple hidden state factors), or 
            Dirichlet (both single and multi-factor)]:
            Prior dirichlet parameters for B. Defaults to none, in which case info gain w.r.t. 
            Dirichlet parameters over A is skipped.
        - `gamma` [float, defaults to 16.0]:
            Precision over policies, used as the inverse temperature parameter of a softmax transformation 
            of the expected free energies of each policy
        - `return_numpy` [Boolean]:
            True/False flag to determine whether output of function is a numpy array or a Categorical
        
        Returns
        --------
        - `qp` [1D numpy array or Categorical]:
            Posterior beliefs about policies, defined here as a softmax function of the 
            expected free energies of policies
        - `efe` - [1D numpy array or Categorical]:
            The expected free energies of policies

    """
    n_policies = len(policies)
    efe = np.zeros(n_policies)
    q_pi = np.zeros((n_policies, 1))

    for idx, policy in enumerate(policies):
        qs_pi = get_expected_states(qs, B, policy)
        qo_pi = get_expected_obs(qs_pi, A)

        if use_utility:
            efe[idx] += calc_expected_utility(qo_pi, C)

        if use_states_info_gain:
            efe[idx] += calc_states_info_gain(A, qs_pi)

        if use_param_info_gain:
            if pA is not None:
                efe[idx] += calc_pA_info_gain(pA, qo_pi, qs_pi)
            if pB is not None:
                efe[idx] += calc_pB_info_gain(pB, qs_pi, qs, policy)

    q_pi = softmax(efe * gamma)

    if return_numpy:
        q_pi = q_pi / q_pi.sum(axis=0)  # type: ignore
    else:
        q_pi = utils.to_categorical(q_pi)
        q_pi.normalize()

    return q_pi, efe
示例#6
0
def get_expected_states(qs, B, policy, return_numpy=False):
    """
    Given a posterior density qs, a transition likelihood model B, and a policy, 
    get the state distribution expected under that policy's pursuit

    Parameters
    ----------
    - `qs` [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or 
    Categorical (either single-factor or AoA)]:
        Current posterior beliefs about hidden states
    - `B` [numpy nd-array, array-of-arrays (where each entry is a numpy nd-array), or Categorical 
        (either single-factor of AoA)]:
        Transition likelihood mapping from states at t to states at t + 1, with different actions 
        (per factor) stored along the lagging dimension
   - `policy` [np.arrays]:
        np.array of size (policy_len x n_factors) where each value corrresponds to a control state
    - `return_numpy` [Boolean]:
        True/False flag to determine whether output of function is a numpy array or a Categorical
    Returns
    -------
    - `qs_pi` [ list of np.arrays with len n_steps, where in case of multiple hidden state factors, 
    each np.array in the list is a 1 x n_factors array-of-arrays, otherwise a list of 1D numpy arrays]:
        Expected states under the given policy - also known as the 'posterior predictive density'

    """
    n_steps = policy.shape[0]
    n_factors = policy.shape[1]
    qs = utils.to_numpy(qs, flatten=True)
    B = utils.to_numpy(B)

    # initialise beliefs over expected states
    qs_pi = []
    if utils.is_arr_of_arr(B):
        for t in range(n_steps):
            qs_pi_t = np.empty(n_factors, dtype=object)
            qs_pi.append(qs_pi_t)

        # initialise expected states after first action using current posterior (t = 0)
        for control_factor, control in enumerate(policy[0, :]):
            qs_pi[0][control_factor] = spm_dot(B[control_factor][:, :, control], qs[control_factor])

        # get expected states over time
        if n_steps > 1:
            for t in range(1, n_steps):
                for control_factor, control in enumerate(policy[t, :]):
                    qs_pi[t][control_factor] = spm_dot(
                        B[control_factor][:, :, control], qs_pi[t - 1][control_factor]
                    )
    else:
        # initialise expected states after first action using current posterior (t = 0)
        qs_pi.append(spm_dot(B[:, :, policy[0, 0]], qs))

        # then loop over future timepoints
        if n_steps > 1:
            for t in range(1, n_steps):
                qs_pi.append(spm_dot(B[:, :, policy[t, 0]], qs_pi[t - 1]))

    if return_numpy:
        if len(qs_pi) == 1:
            return qs_pi[0]
        else:
            return qs_pi
    else:
        if len(qs_pi) == 1:
            return utils.to_categorical(qs_pi[0])
        else:
            for t in range(n_steps):
                qs_pi[t] = utils.to_categorical(qs_pi[t])
            return qs_pi
示例#7
0
def update_transition_dirichlet(pB,
                                B,
                                actions,
                                qs,
                                qs_prev,
                                lr=1.0,
                                factors="all",
                                return_numpy=True):
    """
    Update Dirichlet parameters that parameterize the transition model of the generative model 
    (describing the probabilistic mapping between hidden states over time).

    Parameters
    -----------
   -  pB [numpy nd.array, array-of-arrays (with np.ndarray entries), or Dirichlet 
   (either single-modality or AoA)]:
        The prior Dirichlet parameters of the generative model, parameterizing the agent's 
        beliefs about the transition likelihood. 
    - B [numpy nd.array, object-like array of arrays, or Categorical (either single-modality or AoA)]:
        The transition likelihood of the generative model. 
    - actions [numpy 1D array]:
        A 1D numpy array of shape (num_control_factors,) containing the action(s) performed at 
        a given timestep.
    - qs [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical 
    (either single-factor or AoA)]:
        Current marginal posterior beliefs about hidden state factors
    - qs_prev [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or 
    Categorical (either single-factor or AoA)]:
        Past marginal posterior beliefs about hidden state factors
    - lr [float, optional]:
        Learning rate.
    - return_numpy [bool, optional]:
        Logical flag to determine whether output is a numpy array or a Dirichlet
    - factors [list, optional]:
        Indices (in terms of range(Nf)) of the hidden state factors to include in learning.
        Defaults to 'all', meaning that transition likelihood matrices for all hidden state factors
        are updated as a function of transitions in the different control factors (i.e. actions)
    """

    pB = utils.to_numpy(pB)
    B = utils.to_numpy(B)

    if utils.is_arr_of_arr(pB):
        n_factors = len(pB)
    else:
        n_factors = 1

    if return_numpy:
        pB_updated = copy.deepcopy(pB)
    else:
        pB_updated = utils.to_dirichlet(copy.deepcopy(pB))

    if not utils.is_distribution(qs):
        qs = utils.to_categorical(qs)

    if factors == "all":
        if n_factors == 1:
            dfdb = qs.cross(qs_prev, return_numpy=True)
            dfdb = dfdb * (B[:, :, actions[0]] > 0).astype("float")
            pB_updated[:, :,
                       actions[0]] = pB_updated[:, :, actions[0]] + (lr * dfdb)

        elif n_factors > 1:
            for factor in range(n_factors):
                dfdb = qs[factor].cross(qs_prev[factor], return_numpy=True)
                dfdb = dfdb * (B[factor][:, :, actions[factor]] >
                               0).astype("float")
                pB_updated[factor][:, :, actions[factor]] = pB_updated[
                    factor][:, :, actions[factor]] + (lr * dfdb)
    else:
        for factor in factors:
            dfdb = qs[factor].cross(qs_prev[factor], return_numpy=True)
            dfdb = dfdb * (B[factor][:, :, actions[factor]] >
                           0).astype("float")
            pB_updated[factor][:, :, actions[factor]] = pB_updated[
                factor][:, :, actions[factor]] + (lr * dfdb)

    return pB_updated
示例#8
0
def update_likelihood_dirichlet(pA,
                                A,
                                obs,
                                qs,
                                lr=1.0,
                                modalities="all",
                                return_numpy=True):
    """ Update Dirichlet parameters of the likelihood distribution 

    Parameters
    -----------
    - pA [numpy nd.array, array-of-arrays (with np.ndarray entries), or Dirichlet 
    (either single-modality or AoA)]:
        The prior Dirichlet parameters of the generative model, parameterizing the 
        agent's beliefs about the observation likelihood. 
    - A [numpy nd.array, object-like array of arrays, or Categorical (either single-modality or AoA)]:
        The observation likelihood of the generative model. 
    - obs [numpy 1D array, array-of-arrays (with 1D numpy array entries), int or tuple]:
        A discrete observation (possible multi-modality) used in the update equation
    - qs [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), 
    or Categorical (either single-factor or AoA)]:
        Current marginal posterior beliefs about hidden state factors
    - lr [float, optional]:
        Learning rate.
    - return_numpy [bool, optional]:
        Logical flag to determine whether output is a numpy array or a Dirichlet
    - modalities [list, optional]:
        Indices (in terms of range(n_modalities)) of the observation modalities to include 
        in learning.Defaults to 'all', meaning that observation likelihood matrices 
        for all modalities are updated using their respective observations.
    """

    pA = utils.to_numpy(pA)
    A = utils.to_numpy(A)

    if utils.is_arr_of_arr(pA):
        n_modalities = len(pA)
        n_observations = [
            pA[modality].shape[0] for modality in range(n_modalities)
        ]
    else:
        n_modalities = 1
        n_observations = [pA.shape[0]]

    if return_numpy:
        pA_updated = copy.deepcopy(pA)
    else:
        pA_updated = utils.to_dirichlet(copy.deepcopy(pA))

    # observation index
    if isinstance(obs, (int, np.integer)):
        obs = np.eye(A.shape[0])[obs]

    # observation indices
    elif isinstance(obs, tuple):
        obs = np.array(
            [
                np.eye(n_observations[modality])[obs[modality]]
                for modality in range(n_modalities)
            ],
            dtype=object,
        )

    # convert to Categorical to make the cross product easier
    obs = utils.to_categorical(obs)

    if modalities == "all":
        if n_modalities == 1:
            dfda = obs.cross(qs, return_numpy=True)
            dfda = dfda * (A > 0).astype("float")
            pA_updated = pA_updated + (lr * dfda)

        elif n_modalities > 1:
            for modality in range(n_modalities):
                dfda = obs[modality].cross(qs, return_numpy=True)
                dfda = dfda * (A[modality] > 0).astype("float")
                pA_updated[modality] = pA_updated[modality] + (lr * dfda)
    else:
        for modality in modalities:
            dfda = obs[modality].cross(qs, return_numpy=True)
            dfda = dfda * (A[modality] > 0).astype("float")
            pA_updated[modality] = pA_updated[modality] + (lr * dfda)

    return pA_updated
示例#9
0
def update_posterior_policies_mmp(
    qs_seq_pi,
    A,
    B,
    C,
    policies,
    use_utility=True,
    use_states_info_gain=True,
    use_param_info_gain=False,
    prior=None,
    pA=None,
    pB=None,
    F=None,
    E=None,
    gamma=16.0,
    return_numpy=True,
):
    """
    `qs_seq_pi`: numpy object array that stores posterior marginals beliefs over hidden states for each policy. 
                The structure is nested as policies --> timesteps --> hidden state factors. So qs_seq_pi[p_idx][t][f] is the belief about factor `f` at time `t`, under policy `p_idx`
    `A`: numpy object array that stores likelihood mappings for each modality.
    `B`: numpy object array that stores transition matrices (possibly action-conditioned) for each hidden state factor
    `policies`: numpy object array that stores each (potentially-multifactorial) policy in `policies[p_idx]`. Shape of `policies[p_idx]` is `(num_timesteps, num_factors)`
    `use_utility`: Boolean that determines whether expected utility should be incorporated into computation of EFE (default: `True`)
    `use_states_info_gain`: Boolean that determines whether state epistemic value (info gain about hidden states) should be incorporated into computation of EFE (default: `True`)
    `use_param_info_gain`: Boolean that determines whether parameter epistemic value (info gain about generative model parameters) should be incorporated into computation of EFE (default: `False`)
    `prior`: numpy object array that stores priors over hidden states - this matters when computing the first value of the parameter info gain for the Dirichlet parameters over B
    `pA`: numpy object array that stores Dirichlet priors over likelihood mappings (one per modality)
    `pB`: numpy object array that stores Dirichlet priors over transition mappings (one per hidden state factor)
    `F` : 1D numpy array that stores variational free energy of each policy 
    `E` : 1D numpy array that stores prior probability each policy (e.g. 'habits')
    `gamma`: Float that encodes the precision over policies
    `return_numpy`: Boolean that determines whether output should be a numpy array or an instance of the Categorical class (default: `True`)
    """

    A = utils.to_numpy(A)
    B = utils.to_numpy(B)
    num_obs, num_states, num_modalities, num_factors = utils.get_model_dimensions(
        A, B)
    horizon = len(qs_seq_pi[0])
    num_policies = len(qs_seq_pi)

    # initialise`qo_seq` as object arrays to initially populate `qo_seq_pi`
    qo_seq = utils.obj_array(horizon)
    for t in range(horizon):
        qo_seq[t] = utils.obj_array_zeros(num_obs)

    # initialise expected observations
    qo_seq_pi = utils.obj_array(num_policies)
    for p_idx in range(num_policies):
        # qo_seq_pi[p_idx] = copy.deepcopy(obs_over_time)
        qo_seq_pi[p_idx] = qo_seq

    efe = np.zeros(num_policies)

    if F is None:
        F = np.zeros(num_policies)
    if E is None:
        E = np.zeros(num_policies)

    for p_idx, policy in enumerate(policies):

        qs_seq_pi_i = qs_seq_pi[p_idx]

        for t in range(horizon):

            qo_pi_t = get_expected_obs(qs_seq_pi_i[t], A)
            qo_seq_pi[p_idx][t] = qo_pi_t

            if use_utility:
                efe[p_idx] += calc_expected_utility(qo_seq_pi[p_idx][t], C)

            if use_states_info_gain:
                efe[p_idx] += calc_states_info_gain(A, qs_seq_pi_i[t])

            if use_param_info_gain:
                if pA is not None:
                    efe[p_idx] += calc_pA_info_gain(pA, qo_seq_pi[p_idx][t],
                                                    qs_seq_pi_i[t])
                if pB is not None:
                    if t > 0:
                        efe[p_idx] += calc_pB_info_gain(
                            pB, qs_seq_pi_i[t], qs_seq_pi_i[t - 1], policy)
                    else:
                        if prior is not None:
                            efe[p_idx] += calc_pB_info_gain(
                                pB, qs_seq_pi_i[t], prior, policy)

    q_pi = softmax(efe * gamma - F + E)
    if return_numpy:
        q_pi = q_pi / q_pi.sum(axis=0)
    else:
        q_pi = utils.to_categorical(q_pi)
        q_pi.normalize()
    return q_pi, efe