def test_update_pB_multiFactor_withActions_allFactors(self):
        """
        Test for updating prior Dirichlet parameters over transition likelihood (pB)
        in the case that there are mulitple hidden state factors, and there 
        are actions. All factors are updated
        """

        n_states = [3, 4, 5]
        n_control = [3, 4, 5]
        qs_prev = Categorical(values = construct_init_qs(n_states))
        qs = Categorical(values = construct_init_qs(n_states))
        learning_rate = 1.0

        B = Categorical(values = construct_generic_B(n_states,n_control))
        B.normalize()
        pB = Dirichlet(values = construct_pB(n_states,n_control))

        action = np.array([np.random.randint(nc) for nc in n_control])

        pB_updated = core.update_transition_dirichlet(pB,B,action,qs,qs_prev,lr=learning_rate,factors='all',return_numpy=True)

        validation_pB = pB.copy()

        for factor, _ in enumerate(n_control):

            validation_pB = pB[factor].copy()

            validation_pB[:,:,action[factor]] += learning_rate * core.spm_cross(qs[factor].values, qs_prev[factor].values) * (B[factor][:, :, action[factor]].values > 0)
                
            self.assertTrue(np.all(pB_updated[factor]==validation_pB.values))
 def test_normalize_multi_factor(self):
     values_1 = np.random.rand(5)
     values_2 = np.random.rand(4, 3)
     values = np.array([values_1, values_2])
     c = Categorical(values=values)
     c.normalize()
     self.assertTrue(c.is_normalized())
    def test_update_pB_multiFactor_noActions_someFactors(self):
        """
        Test for updating prior Dirichlet parameters over transition likelihood (pB)
        in the case that there are mulitple hidden state factors, and there 
        are no actions. Some factors are updated.
        """

        n_states = [3, 4, 5]
        n_control = [1, 1, 1] 
        qs_prev = Categorical(values = construct_init_qs(n_states))
        qs = Categorical(values = construct_init_qs(n_states))
        learning_rate = 1.0

        factors_to_update = [0, 2]

        B = Categorical(values = np.array([np.random.rand(ns, ns, n_control[factor]) for factor, ns in enumerate(n_states)]))
        B.normalize()
        pB = Dirichlet(values = np.array([np.ones_like(B[factor].values) for factor in range(len(n_states))]))

        action = np.array([np.random.randint(nc) for nc in n_control])

        pB_updated = core.update_transition_dirichlet(pB,B,action,qs,qs_prev,lr=learning_rate,factors=factors_to_update,return_numpy=True)

        validation_pB = pB.copy()

        for factor, _ in enumerate(n_control):

            validation_pB = pB[factor].copy()

            if factor in factors_to_update:
                validation_pB[:,:,action[factor]] += learning_rate * core.spm_cross(qs[factor].values, qs_prev[factor].values) * (B[factor][:, :, action[factor]].values > 0)
                
            self.assertTrue(np.all(pB_updated[factor]==validation_pB.values))
    def test_state_info_gain(self):
        """
        Test the states_info_gain function. Demonstrates working
        by manipulating uncertainty in the likelihood matrices (A or B)
        in a ways that alternatively change the resolvability of uncertainty
        (via an imprecise expected state and a precise mapping, or high ambiguity
        and imprecise mapping).
        """

        n_states = [2]
        n_control = [2]

        qs = Categorical(values=np.eye(n_states[0])[0])

        # add some uncertainty into the consequences of the second policy, which
        # leads to increased epistemic value of observations, in case of pursuing
        # that policy -- in the case of a precise observation likelihood model
        B_matrix = construct_generic_B(n_states, n_control)
        B_matrix[:, :, 1] = core.softmax(B_matrix[:, :, 1])
        B = Categorical(values=B_matrix)

        # single timestep
        n_step = 1
        policies = core.construct_policies(n_states,
                                           n_control,
                                           policy_len=n_step)

        # single observation modality
        num_obs = [2]

        # create noiseless identity A matrix
        A = Categorical(values=np.eye(num_obs[0]))

        state_info_gains = np.zeros(len(policies))

        for idx, policy in enumerate(policies):

            qs_pi = core.get_expected_states(qs, B, policy)

            state_info_gains[idx] += core.calc_states_info_gain(A, qs_pi)

        self.assertGreater(state_info_gains[1], state_info_gains[0])

        # we can 'undo' the epistemic bonus of the second policy by making the A matrix
        # totally ambiguous, thus observations cannot resolve uncertainty about hidden states
        # - in this case, uncertainty in the posterior beliefs doesn't matter

        A = Categorical(values=np.ones((num_obs[0], num_obs[0])))
        A.normalize()

        state_info_gains = np.zeros(len(policies))

        for idx, policy in enumerate(policies):

            qs_pi = core.get_expected_states(qs, B, policy)

            state_info_gains[idx] += core.calc_states_info_gain(A, qs_pi)

        self.assertEqual(state_info_gains[0], state_info_gains[1])
示例#5
0
    def _construct_D_prior(self):
        if self.n_factors == 1:
            D = Categorical(values=np.ones(*self.n_states))
        else:
            D = Categorical(
                values=np.array([np.ones(Ns) for Ns in self.n_states]))
        D.normalize()

        return D
示例#6
0
 def _construct_A_distribution(self):
     if self.n_modalities == 1:
         A = Categorical(values=np.random.rand(*(self.n_observations[0] +
                                                 self.n_states)))
     else:
         A = np.empty(self.n_modalities, dtype=object)
         for modality, no in enumerate(self.n_observations):
             A[modality] = np.random.rand(*([no] + self.n_states))
         A = Categorical(values=A)
     A.normalize()
     return A
示例#7
0
def sample_action(q_pi, policies, n_control, sampling_type="marginal_action"):
    """
    Samples action from posterior over policies, using one of two methods. 
    Parameters
    ----------
    q_pi [1D numpy.ndarray or Categorical]:
        Posterior beliefs about (possibly multi-step) policies.
    policies [list of numpy ndarrays]:
        List of arrays that indicate the policies under consideration. Each element within the list is a matrix that stores the 
        the indices of the actions  upon the separate hidden state factors, at each timestep (nStep x nControlFactor)
    n_control [list of integers]:
        List of the dimensionalities of the different (controllable)) hidden state factors
    sampling_type [string, 'marginal_action' or 'posterior_sample']:
        Indicates whether the sampled action for a given hidden state factor is given by the evidence for that action, marginalized across different policies ('marginal_action')
        or simply the action entailed by a sample from the posterior over policies
    Returns
    ----------
    selectedPolicy [1D numpy ndarray]:
        Numpy array containing the indices of the actions along each control factor
    """

    n_factors = len(n_control)

    if sampling_type == "marginal_action":

        if utils.is_distribution(q_pi):
            q_pi = utils.to_numpy(q_pi)

        action_marginals = np.empty(n_factors, dtype=object)
        for c_idx in range(n_factors):
            action_marginals[c_idx] = np.zeros(n_control[c_idx])

        # weight each action according to its integrated posterior probability over policies and timesteps
        for pol_idx, policy in enumerate(policies):
            for t in range(policy.shape[0]):
                for factor_i, action_i in enumerate(policy[t, :]):
                    action_marginals[factor_i][action_i] += q_pi[pol_idx]

        action_marginals = Categorical(values=action_marginals)
        action_marginals.normalize()
        selected_policy = np.array(action_marginals.sample())

    elif sampling_type == "posterior_sample":
        if utils.is_distribution(q_pi):
            policy_index = q_pi.sample()
            selected_policy = policies[policy_index]
        else:
            q_pi = Categorical(values=q_pi)
            policy_index = q_pi.sample()
            selected_policy = policies[policy_index]

    return selected_policy
示例#8
0
def sample_action(p_i, possible_policies, Nu, sampling_type="marginal_action"):
    """
    Samples action from posterior over policies, using one of two methods. 
    @TODO: Needs to be amended for use with multi-step policies (where possible_policies is a list of np.arrays (nStep x nFactor), not just a list of tuples as it is now)
    Parameters
    ----------
    p_i [1D numpy.ndarray or Categorical]:
        Variational posterior over policies.
    possible_policies [list of tuples]:
        List of tuples that indicate the possible policies under consideration. Each tuple stores the actions taken upon the separate hidden state factors. 
        Same length as p_i.
    Nu [list of integers]:
        List of the dimensionalities of the different (controllable)) hidden states
    sampling_type [string, 'marginal_action' or 'posterior_sample']:
        Indicates whether the sampled action for a given hidden state factor is given by the evidence for that action, marginalized across different policies ('marginal_action')
        or simply the action entailed by the policy sampled from the posterior. 
    Returns
    ----------
    selectedPolicy [tuple]:
        tuple containing the list of actions selected by the agent
    """

    numControls = len(Nu)

    if sampling_type == "marginal_action":

        if isinstance(p_i, Categorical):
            p_i = p_i.values.squeeze()

        action_marginals = np.empty(numControls, dtype=object)
        for nu_i in range(numControls):
            action_marginals[nu_i] = np.zeros(Nu[nu_i])

        # Weight each action according to the posterior probability it gets across policies
        for pol_i, policy in enumerate(possible_policies):
            for nu_i, a_i in enumerate(policy):
                action_marginals[nu_i][a_i] += p_i[pol_i]

        action_marginals = Categorical(values=action_marginals)
        action_marginals.normalize()
        selected_policy = action_marginals.sample()

    elif sampling_type == "posterior_sample":
        if isinstance(p_i, Categorical):
            policy_index = p_i.sample()
            selected_policy = possible_policies[policy_index]
        else:
            sample_onehot = np.random.multinomial(1, p_i.squeeze())
            policy_index = np.where(sample_onehot == 1)[0][0]
            selected_policy = possible_policies[policy_index]

    return selected_policy
    def test_dot_function_f(self):
        """ Test for when the outcome modality is a trivially one-dimensional vector, meaning
        the return of spm_dot is a scalar - this tests that the spm_dot function
        successfully wraps such scalar returns into an array """

        states = np.empty(2, dtype=object)
        states[0] = np.array([0.75, 0.25])
        states[1] = np.array([0.5, 0.5])
        No = 1
        A = Categorical(values=np.ones([No] + list(states.shape)))
        A.normalize()

        # return the result as a Categorical
        result_cat = A.dot(states, return_numpy=False)
        self.assertTrue(np.prod(result_cat.shape) == 1)

        # return the result as a numpy array
        result_np = A.dot(states, return_numpy=True)
        self.assertTrue(np.prod(result_np.shape) == 1)
示例#10
0
    def _construct_B_distribution(self):
        if self.n_factors == 1:
            B = np.eye(*self.n_states)[:, :, np.newaxis]
            if 0 in self.control_fac_idx:
                B = np.tile(B, (1, 1, self.n_controls[0]))
                B = B.transpose(1, 2, 0)
        else:
            B = np.empty(self.n_factors, dtype=object)

            for factor, ns in enumerate(self.n_states):
                B_basic = np.eye(ns)[:, :, np.newaxis]
                if factor in self.control_fac_idx:
                    B[factor] = np.tile(B_basic,
                                        (1, 1, self.n_controls[factor]))
                    B[factor] = B[factor].transpose(1, 2, 0)
                else:
                    B[factor] = B_basic

        B = Categorical(values=B)
        B.normalize()
        return B
    def test_update_pB_multiFactor_someControllable(self):
        """
        Test for updating prior Dirichlet parameters over transition likelihood (pB)
        in the case that there are mulitple hidden state factors, and some of them 
        are controllable. All factors are updated.
        """

        n_states = [3, 4, 5]
        n_control = [3, 1, 1]
        qs_prev = Categorical(values = construct_init_qs(n_states))
        qs = Categorical(values = construct_init_qs(n_states))
        learning_rate = 1.0

        B_values = np.empty(len(n_states),dtype=object)
        pB_values = np.empty(len(n_states),dtype=object)
        for factor, ns in enumerate(n_states):
            B_values[factor] = np.random.rand(ns, ns, n_control[factor])
            pB_values[factor] = np.ones( (ns, ns, n_control[factor]) )

        B = Categorical(values = B_values)
        B.normalize()
        pB = Dirichlet(values = pB_values)

        action = np.array([np.random.randint(nc) for nc in n_control])

        pB_updated = core.update_transition_dirichlet(pB,B,action,qs,qs_prev,lr=learning_rate,factors='all',return_numpy=True)

        validation_pB = pB.copy()

        for factor, _ in enumerate(n_control):

            validation_pB = pB[factor].copy()

            validation_pB[:,:,action[factor]] += learning_rate * core.spm_cross(qs[factor].values, qs_prev[factor].values) * (B[factor][:, :, action[factor]].values > 0)
                
            self.assertTrue(np.all(pB_updated[factor]==validation_pB.values))
    def test_update_pB_singleFactor_noActions(self):
        """
        Test for updating prior Dirichlet parameters over transition likelihood (pB)
        in the case that the one and only hidden state factor is updated, and there 
        are no actions.
        """

        n_states = [3]
        n_control = [1] # this is how we encode the fact that there aren't any actions
        qs_prev = Categorical(values = construct_init_qs(n_states))
        qs = Categorical(values = construct_init_qs(n_states))
        learning_rate = 1.0

        B = Categorical(values = np.random.rand(n_states[0],n_states[0],n_control[0]))
        B.normalize()
        pB = Dirichlet(values = np.ones_like(B.values))

        action = np.array([np.random.randint(nc) for nc in n_control])

        pB_updated = core.update_transition_dirichlet(pB,B,action,qs,qs_prev,lr=learning_rate,factors="all",return_numpy=True)

        validation_pB = pB.copy()
        validation_pB[:,:,0] += learning_rate * core.spm_cross(qs.values, qs_prev.values) * (B[:, :, action[0]].values > 0)
        self.assertTrue(np.all(pB_updated==validation_pB.values))
 def test_normalize_two_dim(self):
     values = np.array([[1.0, 1.0], [1.0, 1.0]])
     c = Categorical(values=values)
     expected_values = np.array([[0.5, 0.5], [0.5, 0.5]])
     c.normalize()
     self.assertTrue(np.array_equal(c.values, expected_values))
示例#14
0
of the generative model to be a set of uniform Categorical distributions. Namely, the agent has no strong prior beliefs
about where it is likely to move. However, it will build beliefs about these transitions as it observes itself moving.

@NOTE: Linear indices are in row-major ordering (the default for numpy), to be contrasted with column-major ordering
(the default for MATLAB). This means that the upper-right position in the grid [0,1] corresponds to linear index 1, and the lower-left position [1,0]
corresponds to linear index 2. [0,0] and [1,1] are linear indices 0 and 3, respectively (invariant w.r.t row-major and column-major indexing).

# Posterior (recognition density)
We initialise the posterior beliefs `qs` about hidden states (namely, beliefs about 'where I am') as a flat distribution over the possible states. This requires the
agent to first 'evince' a proprioceptive observation from the environment (e.g. a bodily sensation of where the agent is) before updating its posterior to be centered
on the true, evidence-supported location.

"""

env_shape = [3, 3]
n_states = np.prod(env_shape)

env = GridWorldEnv(shape=env_shape)

likelihood_matrix = env.get_likelihood_dist()
A = Categorical(values=likelihood_matrix)
A.remove_zeros()
plot_likelihood(A, 'Observation likelihood')

B = Categorical(values=np.ones((n_states, n_states)))
B.normalize()
plot_likelihood(B, 'Transition likelihood')

qs = Categorical(dims=[env.n_states])
qs.normalize()
We initialise the posterior beliefs `qs` about hidden states (namely, beliefs about 'where I am') as a flat distribution over the possible states. This requires the
agent to first gather a proprioceptive observation from the environment (e.g. a bodily sensation of where it feels itself to be) before updating its posterior to be centered
on the true, evidence-supported location.
"""

likelihood_matrix = env.get_likelihood_dist()
A = Categorical(values=likelihood_matrix)
A.remove_zeros()
plot_likelihood(A, 'Observation likelihood')

b = Dirichlet(values=np.ones((n_states, n_states)))
B = b.mean()
plot_likelihood(B, 'Initial transition likelihood')

D = Categorical(values=np.ones(n_states))
D.normalize()

qs = Categorical(dims=[env.n_states])
qs.normalize()
"""
Run the dynamics of the environment and inference. Start by eliciting one observation from the environment
"""

# reset environment
first_state = env.reset(init_state=s[0])

# get an observation, given the state
first_obs = gp_likelihood[:, first_state]

# turn observation into an index
first_obs = np.where(first_obs)[0][0]