def _generate_local_adjacency_list( IC, players, transition_function, depth=10): level = [IC] T = {} # this would be a good place to eventually to put limitations in for space for _ in range(depth): next_level = [] for state in level: actions = [ players[player].get_action (perspective(player, state)) for player in range(len(players))] T[state] = transition_function( state, actions) for _, new_state in T[state]: if new_state not in T: next_level.append(new_state) level = next_level return T
def _expected_payoff_player_m(tree, players, m): if tree[0][0] == 0: return 0 eppm = _expected_payoff_player_m u_f = players[m].utility_function q_p = perspective(m, tree[0][1]) util = u_f(q_p) return tree[0][0] * (util + players[m].gamma * sum([eppm(tree[1][child], players, m) for child in range(len(tree[1]))]))
def _expected_payoffs(IC, players, transition_function, eval_depth=1): payoffs = [0 for _ in players] decision_heap = [] heapq.heappush(decision_heap, (0, (1., IC))) while decision_heap: node = heapq.heappop(decision_heap) for m in range(len(players)): util = players[m].utility_function(perspective(m, node[1][1])) payoffs[m] += util * players[m].gamma**-node[0] * node[1][0] if -node[0] < eval_depth: actions = [ players[m].get_action( perspective( m, node[1][1])) for m in range( len(players))] children = transition_function(node[1][1], actions) for child in children: heapq.heappush( decision_heap, (node[0] - 1, (node[1][0] * child[0], child[1]))) return payoffs