Пример #1
0
def param_mean_update(
    prev_p_op_mean: np.array,
    prev_param_mean: np.array,
    prev_gradient: np.array,
    p_k: np.array,
    param_var,
    op_choice: int,
):
    """
    k-ToM updates its estimates of opponent's parameter values

    Examples:
        >>> param_mean_update(prev_p_op_mean, prev_param_mean = np.array([[0, 0, 0]]), prev_gradient = np.array([0, 0, 0]), p_k = np.array([0, 0, 0]), param_var, op_choice)
    """
    # Input variable transforms
    param_var = np.exp(param_var) * prev_gradient

    # Calculate
    new_param_mean = (prev_param_mean + p_k[:, np.newaxis] * param_var *
                      (op_choice - inv_logit(prev_p_op_mean))[:, np.newaxis])

    # Used for numerical purposes (similar to the VBA package)
    new_param_mean = logit(inv_logit(new_param_mean))

    return new_param_mean
Пример #2
0
def p_op_mean0_update(prev_p_op_mean0: float, p_op_var0: float,
                      op_choice: int):
    """0-ToM updates mean choice probability estimate"""
    # Input variable transforms
    p_op_var0 = np.exp(p_op_var0)

    # Update
    new_p_op_mean0 = prev_p_op_mean0 + p_op_var0 * (op_choice -
                                                    inv_logit(prev_p_op_mean0))

    # For numerical purposes, according to the VBA package
    new_p_op_mean0 = logit(inv_logit(new_p_op_mean0))

    return new_p_op_mean0
Пример #3
0
def softmax(expected_payoff, params: dict) -> float:
    """
    Softmax function for calculating own probability of choosing 1
    """
    # Extract necessary parameters
    b_temp = params["b_temp"]
    if "bias" in params:
        bias = params["bias"]

    # Input variable transforms
    b_temp = np.exp(b_temp)

    # Divide by temperature parameter
    expected_payoff = expected_payoff / b_temp

    # Add bias, optional
    if "bias" in params:
        expected_payoff = expected_payoff + bias

    # The logit transform completes the softmax function
    p_self = inv_logit(expected_payoff)

    # Set output bounds
    if p_self > 0.999:
        p_self = 0.999
        # warn("Choice probability constrained at upper bound 0.999 to avoid
        # rounding errors", Warning)
    if p_self < 0.001:
        p_self = 0.001
        # warn("Choice probability constrained at lower bound 0.001 to avoid
        # rounding errors", Warning)

    return p_self
Пример #4
0
def p_op_var0_update(prev_p_op_mean0: float, prev_p_op_var0: float,
                     volatility: float):
    """Variance update of the 0-ToM

    Examples:
        >>> p_op_var0_update(1, 0.2, 1)
        0.8348496471878395
        >>> #Higher volatility results in a higher variance
        >>> p_op_var0_update(1, 0.2, 1) < p_op_var0_update (1, 0.2, 2)
        True
        >>> #Mean close to 0.5 gives lower variance
        >>> p_op_var0_update(1, 0.45, 1) < p_op_var0_update (1, 0.2, 2)
        True
    """
    # Input variable transforms
    volatility = np.exp(volatility)
    prev_p_op_var0 = np.exp(prev_p_op_var0)

    prev_p_op_mean0 = inv_logit(prev_p_op_mean0)

    # Update
    new_p_op_var0 = 1 / ((1 /
                          (volatility + prev_p_op_var0)) + prev_p_op_mean0 *
                         (1 - prev_p_op_mean0))

    # Output variable transform
    new_p_op_var0 = np.log(new_p_op_var0)

    return new_p_op_var0
Пример #5
0
def p_k_udpate(prev_p_k: np.array,
               p_opk_approx: np.array,
               op_choice: int,
               dilution=None):
    """
    k-ToM updates its estimate of opponents sophistication level.
    If k-ToM has a dilution parameter, it does a partial forgetting of learned
    estimates.

    Examples:
        >>> p_k_udpate(prev_p_k = np.array([1.]), p_opk_approx = np.array([-0.69314718]), op_choice = 1, dilution = None)
    """
    # Input variable transforms
    p_opk_approx = np.exp(p_opk_approx)
    if dilution:
        dilution = inv_logit(dilution)

    # Do partial forgetting
    if dilution:
        prev_p_k = (1 - dilution) * prev_p_k + dilution / len(prev_p_k)

    # Calculate
    new_p_k = op_choice * (prev_p_k * p_opk_approx /
                           sum(prev_p_k * p_opk_approx)) + (1 - op_choice) * (
                               prev_p_k *
                               (1 - p_opk_approx) / sum(prev_p_k *
                                                        (1 - p_opk_approx)))

    # Force probability sum to 1
    if len(new_p_k) > 1:
        new_p_k[-1] = 1 - sum(new_p_k[:-1])

    return new_p_k
Пример #6
0
    def compute(self, choice, t_ss, t_ll, r_ss, r_ll, r, tau):
        def discount(delay):
            return np.exp(-delay * r)

        v_ss = r_ss * discount(t_ss)
        v_ll = r_ll * discount(t_ll)

        # Probability to choose an option with late and large rewards.
        p_obs = inv_logit(tau * (v_ll - v_ss))
        return bernoulli.logpmf(choice, p_obs)
Пример #7
0
    def compute(self, choice, t_ss, t_ll, r_ss, r_ll, beta, delta, tau):
        def discount(delay):
            return np.where(delay == 0, np.ones_like(beta * delta * delay),
                            beta * np.power(delta, delay))

        v_ss = r_ss * discount(t_ss)
        v_ll = r_ll * discount(t_ll)

        # Probability to choose an option with late and large rewards.
        p_obs = inv_logit(tau * (v_ll - v_ss))
        return bernoulli.logpmf(choice, p_obs)
Пример #8
0
def k_tom(prev_internal_states: dict, params: dict, self_choice: int,
          op_choice: int, level: int, agent: int, p_matrix: PayoffMatrix,
          **kwargs) -> Tuple[int, dict]:
    """The full k-ToM implementation

    Args:
        prev_internal_states (dict):  Dict of previous internal states
        params (dict): The parameters
        self_choice (int): the agent choice the previous round
        op_choice (int): The opponents choice the previous round
        level (int): The sophistication level of the agent
        agent (int): the perspective of the agent in the payoff matrix
        p_matrix (PayoffMatrix): a payoff matrix


    Returns:
        Tuple[int, dict]: a tuple containing the choice and the updated internal states
    """

    # Update estimates of opponent based on behaviour
    if self_choice is not None:
        new_internal_states = learning_function(prev_internal_states, params,
                                                self_choice, op_choice, level,
                                                agent, p_matrix, **kwargs)

    else:  # If first round or missed round, make no update
        new_internal_states = prev_internal_states

    # Calculate own decision probability
    p_self, p_op = decision_function(new_internal_states, params, agent, level,
                                     p_matrix)

    # Probability transform
    p_self = inv_logit(p_self)

    # Save own choice probability
    new_internal_states["own_states"]["p_self"] = p_self
    new_internal_states["own_states"]["p_op"] = p_op

    # Make decision
    choice = np.random.binomial(1, p_self)

    return (choice, new_internal_states)
Пример #9
0
def p_op0_fun(p_op_mean0: float, p_op_var0: float):
    """
    0-ToM combines the mean and variance of its parameter estimate into a
    final choice probability estimate.
    To avoid unidentifiability problems this function does not use 0-ToM's volatility parameter.

    Examples:
    >>> p_op0_fun(p_op_mean0 = 0.7, p_op_var0 = 0.3)
    """
    # Constants
    a = 0.36

    # Input variable transforms
    p_op_var0 = np.exp(p_op_var0)

    # Calculate
    p_op0 = p_op_mean0 / np.sqrt(1 + a * p_op_var0)

    # Output variable transforms
    p_op0 = inv_logit(p_op0)
    return p_op0
Пример #10
0
def p_opk_fun(p_op_mean: np.array, param_var: np.array, gradient: np.array):
    """
    k-ToM combines the mean choice probability estimate and the variances of
    its parameter estimates into a final choice probability estimate.
    To avoid unidentifiability problems this function does not use 0-ToM's volatility parameter.
    """
    # Constants
    a = 0.36

    # Input variable transforms
    param_var = np.exp(param_var)

    # Prepare variance by weighing with gradient
    var_prepped = np.sum((param_var * gradient**2), axis=1)

    # Calculate
    p_opk = p_op_mean / np.sqrt(1 + a * var_prepped)

    # Output variable transform
    p_opk = inv_logit(p_opk)

    return p_opk
Пример #11
0
def param_var_update(prev_p_op_mean: np.array,
                     prev_param_var: np.array,
                     prev_gradient: np.array,
                     p_k: np.array,
                     volatility: float,
                     volatility_dummy=None,
                     **kwargs):
    """
    k-ToM updates its uncertainty / variance on its estimates of opponent's
    parameter values

    Examples:
        >>> param_var_update(prev_param_mean = np.array([[0, 0, 0]]), \
            prev_param_var = np.array([[0, 0, 0]]), \
            prev_gradient = np.array([0, 0, 0]), p_k = np.array([1.]), \
            volatility = -2, volatility_dummy = None)
        array([[0.12692801, 0.        , 0.        ]])
    """
    # Dummy constant: sets volatility to 0 for all except volatility opponent
    # parameter estimates
    if volatility_dummy is None:
        volatility_dummy = np.zeros(prev_param_var.shape[1] - 1)
        volatility_dummy = np.concatenate(([1], volatility_dummy), axis=None)

    # Input variable transforms
    prev_p_op_mean = inv_logit(prev_p_op_mean)
    prev_param_var = np.exp(prev_param_var)
    volatility = np.exp(volatility) * volatility_dummy

    # Calculate
    new_var = 1 / (1 / (prev_param_var + volatility) +
                   p_k[:, np.newaxis] * prev_p_op_mean[:, np.newaxis] *
                   (1 - prev_p_op_mean[:, np.newaxis]) * prev_gradient**2)

    # Output variable transform
    new_var = np.log(new_var)

    return new_var
Пример #12
0
def p_opk_approx_fun(
    prev_p_op_mean: np.array,
    prev_param_var: np.array,
    prev_gradient: np.array,
    level: int,
):
    """
    Approximates the estimated choice probability of the opponent on the
    previous round.
    A semi-analytical approximation derived in Daunizeau, J. (2017)

    Examples:
        >>> p_opk_approx_fun(prev_p_op_mean = np.array([0]), prev_param_var = np.array([[0, 0, 0]]), prev_gradient = np.array([[0, 0, 0]]), level = 1)
    """
    # Constants
    a = 0.205
    b = -0.319
    c = 0.781
    d = 0.870

    # Input variable transforms
    prev_param_var = np.exp(prev_param_var)

    # Prepare variance by weighing with gradient
    prev_var_prepped = np.zeros(level)
    for level_idx in range(level):
        prev_var_prepped[level_idx] = prev_param_var[level_idx, :].T.dot(
            prev_gradient[level_idx, :]**2)

    # Equation
    p_opk_approx = (prev_p_op_mean + b *
                    prev_var_prepped**c) / np.sqrt(1 + a * prev_var_prepped**d)

    # Output variable transform
    p_opk_approx = np.log(inv_logit(p_opk_approx))

    return p_opk_approx
Пример #13
0
def logit_mean(x):
    return logit(np.mean(inv_logit(x)))
Пример #14
0
def func_logistic_log_lik(choice, stimulus, guess_rate, lapse_rate, threshold,
                          slope):
    f = inv_logit(slope * (stimulus - threshold))
    p = guess_rate + (1 - guess_rate - lapse_rate) * f
    return bernoulli.logpmf(choice, p)
Пример #15
0
 def compute(self, choice, p_var, a_var, r_var, r_fix, alpha, beta, gamma):
     sv_var = np.power(r_var, alpha)
     sv_var = np.power(p_var, 1 + beta * a_var) * sv_var
     sv_fix = .5 * np.power(r_fix, alpha)
     p_obs = inv_logit(gamma * (sv_var - sv_fix))
     return bernoulli.logpmf(choice, p_obs)
Пример #16
0
def logit_mean(x):
    return logit(np.mean(inv_logit(x)))
Пример #17
0
# Make empty list for inserting parameter values
parvals = [0]*len(params_means)

# For each simulation
for sim in range(n_sim):

    print(f"Simulation {sim}")

    # Resample parameter values
    for idx, mean in enumerate(params_means):
        # The first four parameters are probability parameters
        if idx <= 3:
            # So they have to be constrained between 0 and 1 by a
            # logit-inv_logit transform
            parvals[idx] = inv_logit(np.random.normal(logit(mean),
                                                      params_vars[idx]))
        # But the other parameters
        else:
            # Can just be sampled
            parvals[idx] = np.random.normal(mean, params_vars[idx])

    # Save them for group input
    all_params = [{'bias': parvals[0]}, {'prob_stay': parvals[1],
                  'prob_switch': parvals[2]}, {'learning_rate': parvals[3]},
                  {'volatility': parvals[4], 'b_temp': parvals[5]},
                  {'volatility': parvals[6], 'b_temp': parvals[7]},
                  {'volatility': parvals[8], 'b_temp': parvals[9]},
                  {'volatility': parvals[10], 'b_temp': parvals[11]},
                  {'volatility': parvals[12], 'b_temp': parvals[13]},
                  {'volatility': parvals[14], 'b_temp': parvals[15]}]