def param_mean_update( prev_p_op_mean: np.array, prev_param_mean: np.array, prev_gradient: np.array, p_k: np.array, param_var, op_choice: int, ): """ k-ToM updates its estimates of opponent's parameter values Examples: >>> param_mean_update(prev_p_op_mean, prev_param_mean = np.array([[0, 0, 0]]), prev_gradient = np.array([0, 0, 0]), p_k = np.array([0, 0, 0]), param_var, op_choice) """ # Input variable transforms param_var = np.exp(param_var) * prev_gradient # Calculate new_param_mean = (prev_param_mean + p_k[:, np.newaxis] * param_var * (op_choice - inv_logit(prev_p_op_mean))[:, np.newaxis]) # Used for numerical purposes (similar to the VBA package) new_param_mean = logit(inv_logit(new_param_mean)) return new_param_mean
def p_op_mean0_update(prev_p_op_mean0: float, p_op_var0: float, op_choice: int): """0-ToM updates mean choice probability estimate""" # Input variable transforms p_op_var0 = np.exp(p_op_var0) # Update new_p_op_mean0 = prev_p_op_mean0 + p_op_var0 * (op_choice - inv_logit(prev_p_op_mean0)) # For numerical purposes, according to the VBA package new_p_op_mean0 = logit(inv_logit(new_p_op_mean0)) return new_p_op_mean0
def softmax(expected_payoff, params: dict) -> float: """ Softmax function for calculating own probability of choosing 1 """ # Extract necessary parameters b_temp = params["b_temp"] if "bias" in params: bias = params["bias"] # Input variable transforms b_temp = np.exp(b_temp) # Divide by temperature parameter expected_payoff = expected_payoff / b_temp # Add bias, optional if "bias" in params: expected_payoff = expected_payoff + bias # The logit transform completes the softmax function p_self = inv_logit(expected_payoff) # Set output bounds if p_self > 0.999: p_self = 0.999 # warn("Choice probability constrained at upper bound 0.999 to avoid # rounding errors", Warning) if p_self < 0.001: p_self = 0.001 # warn("Choice probability constrained at lower bound 0.001 to avoid # rounding errors", Warning) return p_self
def p_op_var0_update(prev_p_op_mean0: float, prev_p_op_var0: float, volatility: float): """Variance update of the 0-ToM Examples: >>> p_op_var0_update(1, 0.2, 1) 0.8348496471878395 >>> #Higher volatility results in a higher variance >>> p_op_var0_update(1, 0.2, 1) < p_op_var0_update (1, 0.2, 2) True >>> #Mean close to 0.5 gives lower variance >>> p_op_var0_update(1, 0.45, 1) < p_op_var0_update (1, 0.2, 2) True """ # Input variable transforms volatility = np.exp(volatility) prev_p_op_var0 = np.exp(prev_p_op_var0) prev_p_op_mean0 = inv_logit(prev_p_op_mean0) # Update new_p_op_var0 = 1 / ((1 / (volatility + prev_p_op_var0)) + prev_p_op_mean0 * (1 - prev_p_op_mean0)) # Output variable transform new_p_op_var0 = np.log(new_p_op_var0) return new_p_op_var0
def p_k_udpate(prev_p_k: np.array, p_opk_approx: np.array, op_choice: int, dilution=None): """ k-ToM updates its estimate of opponents sophistication level. If k-ToM has a dilution parameter, it does a partial forgetting of learned estimates. Examples: >>> p_k_udpate(prev_p_k = np.array([1.]), p_opk_approx = np.array([-0.69314718]), op_choice = 1, dilution = None) """ # Input variable transforms p_opk_approx = np.exp(p_opk_approx) if dilution: dilution = inv_logit(dilution) # Do partial forgetting if dilution: prev_p_k = (1 - dilution) * prev_p_k + dilution / len(prev_p_k) # Calculate new_p_k = op_choice * (prev_p_k * p_opk_approx / sum(prev_p_k * p_opk_approx)) + (1 - op_choice) * ( prev_p_k * (1 - p_opk_approx) / sum(prev_p_k * (1 - p_opk_approx))) # Force probability sum to 1 if len(new_p_k) > 1: new_p_k[-1] = 1 - sum(new_p_k[:-1]) return new_p_k
def compute(self, choice, t_ss, t_ll, r_ss, r_ll, r, tau): def discount(delay): return np.exp(-delay * r) v_ss = r_ss * discount(t_ss) v_ll = r_ll * discount(t_ll) # Probability to choose an option with late and large rewards. p_obs = inv_logit(tau * (v_ll - v_ss)) return bernoulli.logpmf(choice, p_obs)
def compute(self, choice, t_ss, t_ll, r_ss, r_ll, beta, delta, tau): def discount(delay): return np.where(delay == 0, np.ones_like(beta * delta * delay), beta * np.power(delta, delay)) v_ss = r_ss * discount(t_ss) v_ll = r_ll * discount(t_ll) # Probability to choose an option with late and large rewards. p_obs = inv_logit(tau * (v_ll - v_ss)) return bernoulli.logpmf(choice, p_obs)
def k_tom(prev_internal_states: dict, params: dict, self_choice: int, op_choice: int, level: int, agent: int, p_matrix: PayoffMatrix, **kwargs) -> Tuple[int, dict]: """The full k-ToM implementation Args: prev_internal_states (dict): Dict of previous internal states params (dict): The parameters self_choice (int): the agent choice the previous round op_choice (int): The opponents choice the previous round level (int): The sophistication level of the agent agent (int): the perspective of the agent in the payoff matrix p_matrix (PayoffMatrix): a payoff matrix Returns: Tuple[int, dict]: a tuple containing the choice and the updated internal states """ # Update estimates of opponent based on behaviour if self_choice is not None: new_internal_states = learning_function(prev_internal_states, params, self_choice, op_choice, level, agent, p_matrix, **kwargs) else: # If first round or missed round, make no update new_internal_states = prev_internal_states # Calculate own decision probability p_self, p_op = decision_function(new_internal_states, params, agent, level, p_matrix) # Probability transform p_self = inv_logit(p_self) # Save own choice probability new_internal_states["own_states"]["p_self"] = p_self new_internal_states["own_states"]["p_op"] = p_op # Make decision choice = np.random.binomial(1, p_self) return (choice, new_internal_states)
def p_op0_fun(p_op_mean0: float, p_op_var0: float): """ 0-ToM combines the mean and variance of its parameter estimate into a final choice probability estimate. To avoid unidentifiability problems this function does not use 0-ToM's volatility parameter. Examples: >>> p_op0_fun(p_op_mean0 = 0.7, p_op_var0 = 0.3) """ # Constants a = 0.36 # Input variable transforms p_op_var0 = np.exp(p_op_var0) # Calculate p_op0 = p_op_mean0 / np.sqrt(1 + a * p_op_var0) # Output variable transforms p_op0 = inv_logit(p_op0) return p_op0
def p_opk_fun(p_op_mean: np.array, param_var: np.array, gradient: np.array): """ k-ToM combines the mean choice probability estimate and the variances of its parameter estimates into a final choice probability estimate. To avoid unidentifiability problems this function does not use 0-ToM's volatility parameter. """ # Constants a = 0.36 # Input variable transforms param_var = np.exp(param_var) # Prepare variance by weighing with gradient var_prepped = np.sum((param_var * gradient**2), axis=1) # Calculate p_opk = p_op_mean / np.sqrt(1 + a * var_prepped) # Output variable transform p_opk = inv_logit(p_opk) return p_opk
def param_var_update(prev_p_op_mean: np.array, prev_param_var: np.array, prev_gradient: np.array, p_k: np.array, volatility: float, volatility_dummy=None, **kwargs): """ k-ToM updates its uncertainty / variance on its estimates of opponent's parameter values Examples: >>> param_var_update(prev_param_mean = np.array([[0, 0, 0]]), \ prev_param_var = np.array([[0, 0, 0]]), \ prev_gradient = np.array([0, 0, 0]), p_k = np.array([1.]), \ volatility = -2, volatility_dummy = None) array([[0.12692801, 0. , 0. ]]) """ # Dummy constant: sets volatility to 0 for all except volatility opponent # parameter estimates if volatility_dummy is None: volatility_dummy = np.zeros(prev_param_var.shape[1] - 1) volatility_dummy = np.concatenate(([1], volatility_dummy), axis=None) # Input variable transforms prev_p_op_mean = inv_logit(prev_p_op_mean) prev_param_var = np.exp(prev_param_var) volatility = np.exp(volatility) * volatility_dummy # Calculate new_var = 1 / (1 / (prev_param_var + volatility) + p_k[:, np.newaxis] * prev_p_op_mean[:, np.newaxis] * (1 - prev_p_op_mean[:, np.newaxis]) * prev_gradient**2) # Output variable transform new_var = np.log(new_var) return new_var
def p_opk_approx_fun( prev_p_op_mean: np.array, prev_param_var: np.array, prev_gradient: np.array, level: int, ): """ Approximates the estimated choice probability of the opponent on the previous round. A semi-analytical approximation derived in Daunizeau, J. (2017) Examples: >>> p_opk_approx_fun(prev_p_op_mean = np.array([0]), prev_param_var = np.array([[0, 0, 0]]), prev_gradient = np.array([[0, 0, 0]]), level = 1) """ # Constants a = 0.205 b = -0.319 c = 0.781 d = 0.870 # Input variable transforms prev_param_var = np.exp(prev_param_var) # Prepare variance by weighing with gradient prev_var_prepped = np.zeros(level) for level_idx in range(level): prev_var_prepped[level_idx] = prev_param_var[level_idx, :].T.dot( prev_gradient[level_idx, :]**2) # Equation p_opk_approx = (prev_p_op_mean + b * prev_var_prepped**c) / np.sqrt(1 + a * prev_var_prepped**d) # Output variable transform p_opk_approx = np.log(inv_logit(p_opk_approx)) return p_opk_approx
def logit_mean(x): return logit(np.mean(inv_logit(x)))
def func_logistic_log_lik(choice, stimulus, guess_rate, lapse_rate, threshold, slope): f = inv_logit(slope * (stimulus - threshold)) p = guess_rate + (1 - guess_rate - lapse_rate) * f return bernoulli.logpmf(choice, p)
def compute(self, choice, p_var, a_var, r_var, r_fix, alpha, beta, gamma): sv_var = np.power(r_var, alpha) sv_var = np.power(p_var, 1 + beta * a_var) * sv_var sv_fix = .5 * np.power(r_fix, alpha) p_obs = inv_logit(gamma * (sv_var - sv_fix)) return bernoulli.logpmf(choice, p_obs)
def logit_mean(x): return logit(np.mean(inv_logit(x)))
# Make empty list for inserting parameter values parvals = [0]*len(params_means) # For each simulation for sim in range(n_sim): print(f"Simulation {sim}") # Resample parameter values for idx, mean in enumerate(params_means): # The first four parameters are probability parameters if idx <= 3: # So they have to be constrained between 0 and 1 by a # logit-inv_logit transform parvals[idx] = inv_logit(np.random.normal(logit(mean), params_vars[idx])) # But the other parameters else: # Can just be sampled parvals[idx] = np.random.normal(mean, params_vars[idx]) # Save them for group input all_params = [{'bias': parvals[0]}, {'prob_stay': parvals[1], 'prob_switch': parvals[2]}, {'learning_rate': parvals[3]}, {'volatility': parvals[4], 'b_temp': parvals[5]}, {'volatility': parvals[6], 'b_temp': parvals[7]}, {'volatility': parvals[8], 'b_temp': parvals[9]}, {'volatility': parvals[10], 'b_temp': parvals[11]}, {'volatility': parvals[12], 'b_temp': parvals[13]}, {'volatility': parvals[14], 'b_temp': parvals[15]}]