def select(self,incremental=False): """ @param incremental: if C{True}, then select each key value in series (rather than picking out a joint vector all at once, default is C{False}) """ if incremental: # Sample each key and keep track how likely each individual choice was sample = KeyedVector() keys = self.domain()[0].keys() index = 0 while len(self) > 1: key = keys[index] dist = self.marginal(key) if len(dist) > 1: # Have to make a choice here element,sample[key] = dist.sample(True) # Figure out where the "spinner" ended up across entire pie chart for other in dist.domain(): if other == element: break else: sample[key] += dist[other] for vector in self.domain(): if vector[key] != element: del self[vector] self.normalize() index += 1 return sample else: Distribution.select(self)
def next_victim(self, world): """ Generate an expectation about what room the player will enter next """ player = world.agents[self.parser.player_name()] action = world.getAction(player.name, unique=True) if action['verb'] == 'triage_Green': # Triaging green as we speak return Distribution({'Green': 1}) elif action['verb'] == 'triage_Gold': # Triaging yellow as we speak return Distribution({'Yellow': 1}) # Not so obvious who will be next agent = world.agents['ATOMIC'] beliefs = agent.getBelief() if len(beliefs) == 1: agent_model, agent_beliefs = next(iter(beliefs.items())) else: raise NotImplementedError( 'Unable to generate predictions unless agent has unique model') location = world.getState(player.name, 'loc', unique=True) prediction = None for player_model, player_model_prob in world.getModel( player.name, agent_beliefs).items(): player_beliefs = player.models[player_model]['beliefs'] fov = world.getState(player.name, 'vicInFOV', player_beliefs, unique=True) if fov in {'Yellow', 'Green'}: # The next victim found is the one the player is looking at now next_seen = Distribution({fov: 1}) else: # The next victim found is one in the player's current location next_seen = { 'Yellow': world.getState(WORLD, 'ctr_{}_Gold'.format(location), player_beliefs).expectation(), 'Green': world.getState(WORLD, 'ctr_{}_Green'.format(location), player_beliefs).expectation() } if sum(next_seen.values()) == 0: # No victim in the current room next_seen = {'Yellow': 1, 'Green': 1} next_seen = Distribution(next_seen) next_seen.normalize() if prediction is None: prediction = next_seen.scale_prob(player_model_prob) else: prediction = prediction.__class__({ color: prob + next_seen[color] * player_model_prob for color, prob in prediction.items() }) return prediction
def __getitem__(self,index): if self.isLeaf(): return self.children[None] elif self.branch is None: # Probabilistic branch result = {} for element in self.children.domain(): prob = self.children[element] subtree = element[index] if isinstance(subtree,Distribution): for subelement in subtree.domain(): try: result[subelement] += prob*subtree[subelement] except KeyError: result[subelement] = prob*subtree[subelement] else: try: result[subtree] += prob except KeyError: result[subtree] = prob return Distribution(result) else: # Deterministic branch subindex = self.branch.evaluate(index) try: child = self.children[subindex] except KeyError: logging.error('Missing child for case %s in tree:\n%s' % (subindex,self)) raise ValueError('Missing child for case %s in tree' % (subindex)) return child[index]
def marginal(self,key): result = {} for row in self.domain(): try: result[row[key]] += self[row] except KeyError: result[row[key]] = self[row] return Distribution(result)
def post_step(self, world, act): t = world.getState(WORLD, 'seconds', unique=True) if len(self.model_data) == 0 or self.model_data[-1]['Timestep'] != t: # Haven't made some inference for this timestep (maybe wait until last one?) player_name = self.player_name() player = world.agents[player_name] agent = world.agents['ATOMIC'] # Store beliefs over player models beliefs = agent.getBelief() if len(beliefs) > 1: raise RuntimeError('Agent {} has {} possible models in true state'.format(agent.name, len(beliefs))) beliefs = next(iter(beliefs.values())) player_model = world.getFeature(modelKey(player_name), beliefs) for model in player_model.domain(): entry = {'Timestep': t, 'Belief': player_model[model]} # Find root model (i.e., remove the auto-generated numbers from the name) while player.models[player.models[model]['parent']]['parent'] is not None: model = player.models[model]['parent'] entry['Model'] = model[len(player_name) + 1:] self.model_data.append(entry) if self.condition_dist: condition_dist = Distribution() for model, model_prob in player_model.items(): for condition, condition_prob in self.condition_dist[model_to_cluster(model)].items(): condition_dist.addProb(condition, model_prob*condition_prob) condition_dist.normalize() for condition, condition_prob in condition_dist.items(): self.condition_data.append({'Timestep': t, 'Belief': condition_prob, 'Condition': condition})
def multiply_matrix(self, other): # Focus on subset that this matrix affects substates = self.substate(other.getKeysIn(), True) if substates: destination = self.collapse(substates) else: destination = None # if destination: # print self.distributions[destination] # print len(self.distributions[destination]) # destination = self.findUncertainty(substates) # Go through each key this matrix sets for rowKey, vector in other.items(): result = Distribution() if destination is None: # Every value is 100% total = 0 for colKey in vector.keys(): if colKey == keys.CONSTANT: # Doesn't really matter total += vector[colKey] else: substate = self.keyMap[colKey] value = self.distributions[substate].first()[colKey] total += vector[colKey] * value assert not rowKey in self.keyMap, '%s already exists' % ( rowKey) destination = len(self.distributions) while destination in self.distributions: destination -= 1 # destination = max(self.keyMap.values())+1 assert not destination in self.distributions, self.distributions[ destination] self.join(rowKey, total, destination) else: # There is at least one uncertain multiplicand for state in self.distributions[destination].domain(): prob = self.distributions[destination][state] del self.distributions[destination][state] total = 0 for colKey in vector.keys(): if colKey == keys.CONSTANT: # Doesn't really matter total += vector[colKey] else: substate = self.keyMap[colKey] if substate == destination: value = state[colKey] else: # Certainty value = self.distributions[substate].first( )[colKey] total += vector[colKey] * value state[rowKey] = total self.distributions[destination][state] = prob self.keyMap[rowKey] = destination
def load_clusters(fname): ignore = {'Cluster', 'Player name', 'Filename'} cluster_map = {} raw_weights = {} reward_weights = {} condition_map = {None: {}} with open(fname, 'r') as csvfile: reader = csv.DictReader(csvfile) for row in reader: cluster = int(row['Cluster']) weights = [ np.array([ float(value) for field, value in row.items() if field not in ignore ]) ] raw_weights[cluster] = raw_weights.get(cluster, []) + weights cluster_map[row['Player name']] = cluster cluster_map[row['Filename']] = cluster condition = filename_to_condition( os.path.splitext(os.path.basename(row['Filename']))[0]) condition_label = '{} {}'.format(condition['CondBtwn'], condition['CondWin'][1]) if cluster not in condition_map: condition_map[cluster] = {} condition_map[cluster][ condition_label] = condition_map[cluster].get( condition_label, 0) + 1 # Update stats for universal cluster # raw_weights[None] = raw_weights.get(None, []) + weights condition_map[None][condition_label] = condition_map[cluster].get( condition_label, 0) + 1 for cluster, weights in raw_weights.items(): reward_weights[cluster] = np.mean(weights, axis=0) condition_map[cluster] = Distribution(condition_map[cluster]) condition_map[cluster].normalize() condition_map[None] = Distribution(condition_map[None]) condition_map[None].normalize() logging.info('Baseline conditions: {}'.format(', '.join([ 'P({})={}'.format(c, p) for c, p in sorted(condition_map[None].items()) ]))) return reward_weights, cluster_map, condition_map
def set_player_models(world, observer_name, player_name, victims, param_list): """ :param world: the PsychSim World :type world: World :param observer_name: the name of the agent whose beliefs we will be specifying :type observer_name: str :param player_name: the name of the player agent to be modeled :type player_name: str :param param_list: list of dictionaries of model parameter specifications :type param_list: List[Dict] :param victims: specification of victims :type victims: Victims """ observer = world.agents[observer_name] player = world.agents[player_name] # observer does not model itself observer.resetBelief(ignore={modelKey(observer.name)}) # get the canonical name of the "true" player model true_model = player.get_true_model() for param_dict in param_list: model_name = param_dict['name'] if model_name != true_model: player.addModel(model_name, parent=true_model, horizon=param_dict.get('horizon', 2), rationality=param_dict.get('rationality', 0.5), selection=param_dict.get('selection', 'distribution')) if isinstance(next(iter(param_dict['reward'].keys())), str): victims.makeVictimReward(player, model_name, param_dict['reward']) else: for feature, weight in param_dict['reward'].items(): feature.set_reward(player, weight, model_name) beliefs = player.resetBelief(model=model_name, ignore={modelKey(observer.name)}) # observer has uniform prior distribution over possible player models if len(player.models) > 1: world.setMentalModel( observer.name, player.name, Distribution({ param_dict['name']: 1. / (len(player.models) - 1) for param_dict in param_list })) # observer sees everything except true models observer.omega = [ key for key in world.state.keys() if key not in {modelKey(player.name), modelKey(observer.name)} ] # rewardKey(player.name),
def collapseProbabilistic(self): """ Utility method that combines any consecutive probabilistic branches at this node into a single distribution """ if self.isProbabilistic(): collapse = False distribution = Distribution(self.children) for child in self.children.domain(): if child.isProbabilistic(): # Probabilistic branch to merge collapse = True child.collapseProbabilistic() del distribution[child] for grandchild in child.children.domain(): try: distribution[grandchild] += self.children[child]*child.children[grandchild] except KeyError: distribution[grandchild] = self.children[child]*child.children[grandchild] if collapse: assert sum(distribution.values()) == 1. self.makeProbabilistic(distribution)
def __rmul__(self, other): if isinstance(other, KeyedVector): result = {} for vector in self.domain(): product = other * vector try: result[product] += self[vector] except KeyError: result[product] = self[vector] return Distribution(result) else: raise NotImplementedError
def discretize_feature_in_place(world, feature, num_bins): """ Discretizes the given feature's value/distribution according to the number of intended groups in place, i.e., by directly changing its value. :param World world: the PsychSim world in which the feature is defined. :param str feature: the named feature to be discretized. :param int num_bins: the number of discretization bins or buckets. :return: """ variable = world.variables[feature] high = variable['hi'] low = variable['lo'] ran = float(high - low) dist = world.getFeature(feature) new_dist = Distribution() for val, prob in dist.items(): val = int(round((float(val - low) / ran) * (num_bins - 1))) * (ran / (num_bins - 1)) + low new_dist[val] = prob world.setFeature(feature, new_dist)
def __getitem__(self, index): if self.isLeaf(): return self.children[None] elif self.branch is None: # Probabilistic branch result = {} for element in self.children.domain(): prob = self.children[element] subtree = element[index] if isinstance(subtree, Distribution): for subelement in subtree.domain(): try: result[subelement] += prob * subtree[subelement] except KeyError: result[subelement] = prob * subtree[subelement] else: try: result[subtree] += prob except KeyError: result[subtree] = prob return Distribution(result) else: # Deterministic branch return self.children[self.branch.evaluate(index)][index]
def load_clusters(fname): ignore = {'Cluster', 'Player name', 'Filename'} cluster_map = {} raw_weights = {} reward_weights = {} condition_map = {} with open(fname, 'r') as csvfile: reader = csv.DictReader(csvfile) for row in reader: cluster = int(row['Cluster']) raw_weights[cluster] = raw_weights.get(cluster, []) + [np.array([float(value) for field, value in row.items() if field not in ignore])] cluster_map[row['Player name']] = cluster cluster_map[row['Filename']] = cluster condition = filename_to_condition(os.path.splitext(os.path.basename(row['Filename']))[0]) condition_label = '{} {}'.format(condition['CondBtwn'], condition['CondWin'][1]) if cluster not in condition_map: condition_map[cluster] = {} condition_map[cluster][condition_label] = condition_map[cluster].get(condition_label, 0) + 1 for cluster, weights in raw_weights.items(): reward_weights[cluster] = np.mean(weights, axis=0) condition_map[cluster] = Distribution(condition_map[cluster]) condition_map[cluster].normalize() return reward_weights, cluster_map, condition_map
rights.append(action) # create a new model for the agent agent.addModel(get_fake_model_name(agent), parent=agent.get_true_model()) # defines payoff matrices agent1.setReward(get_reward_tree(agent1, sides[0], sides[1]), 1) agent2.setReward(get_reward_tree(agent2, sides[1], sides[0]), 1) # define order world.setOrder([{agent1.name, agent2.name}]) # add mental model of the other for each agent world.setMentalModel(agent1.name, agent2.name, Distribution({get_fake_model_name(agent2): 1})) world.setMentalModel(agent2.name, agent1.name, Distribution({get_fake_model_name(agent1): 1})) # 'hides' right actions from models by setting them illegal # (therefore agents should always choose right because they think the other will choose left) set_illegal_action(agent1, rights[0], [get_fake_model_name(agent1)]) set_illegal_action(agent2, rights[1], [get_fake_model_name(agent2)]) # # ** unnecessary / just for illustration **: set left actions legal for both the agents and their models # set_legal_action(agent1, lefts[0], [agent1.get_true_model(), get_fake_model_name(agent1)]) # set_legal_action(agent2, lefts[1], [agent2.get_true_model(), get_fake_model_name(agent2)]) agent1.resetBelief(model=agent1.get_true_model()) agent1.resetBelief(model=get_fake_model_name(agent1)) agent2.resetBelief(model=agent2.get_true_model())
action='store_true', help= 'Whether to log agents\' rewards wrt chosen actions in addition to current state.' ) args = parser.parse_args() args.log_rewards = True conv = Converter() conv.convert_file(RDDL_FILE, verbose=True) p1 = conv.world.agents['p1'] p1.create_belief_state() p1.set_observations(unobservable={stateKey('p2', 'correct_sem')}) p1.setBelief(stateKey('p2', 'correct_sem'), Distribution({ True: 0.5, False: 0.5 })) p2 = conv.world.agents['p2'] p2.create_belief_state() p2.set_fully_observable() p1_zero = p1.zero_level() p1.create_belief_state(model=p1_zero) p1.setAttribute('selection', 'distribution', p1_zero) p2_zero = p2.zero_level() p2.create_belief_state(model=p2_zero) p2.setAttribute('selection', 'distribution', p2_zero) conv.world.setModel(p2.name, p2_zero, p1.name, p1.get_true_model()) conv.world.setModel(p1.name, p1_zero, p2.name, p2.get_true_model())
} })) tree = makeTree(setToConstantMatrix(my_dec, COOPERATED)) world.setDynamics(my_dec, action, tree) # defines payoff matrices (equal to both agents) agent1.setReward(get_reward_tree(agent1, agents_dec[0], agents_dec[1]), 1) agent2.setReward(get_reward_tree(agent2, agents_dec[1], agents_dec[0]), 1) # define order my_turn_order = [{agent1.name, agent2.name}] world.setOrder(my_turn_order) # add true mental model of the other to each agent world.setMentalModel(agent1.name, agent2.name, Distribution({agent2.get_true_model(): 1})) world.setMentalModel(agent2.name, agent1.name, Distribution({agent1.get_true_model(): 1})) for h in range(MAX_HORIZON + 1): logging.info('====================================') logging.info(f'Horizon {h}') # set horizon (also to the true model!) and reset decisions for i in range(len(agents)): agents[i].setHorizon(h) agents[i].setHorizon(h, agents[i].get_true_model()) world.setFeature(agents_dec[i], NOT_DECIDED, recurse=True) for t in range(NUM_STEPS):
def setup(): global args np.random.seed(args.seed) # create world and add agents world = World() world.memory = False world.parallel = args.parallel agents = [] agent_features = {} for ag in range(args.agents): agent = Agent('Agent' + str(ag)) world.addAgent(agent) agents.append(agent) # set agent's params agent.setAttribute('discount', 1) agent.setHorizon(args.horizon) # add features, initialize at random features = [] agent_features[agent] = features for f in range(args.features_agent): feat = world.defineState(agent.name, 'Feature{}'.format(f), int, lo=0, hi=1000) world.setFeature(feat, np.random.randint(0, MAX_FEATURE_VALUE)) features.append(feat) # set random reward function agent.setReward(maximizeFeature(np.random.choice(features), agent.name), 1) # add mental copy of true model and make it static (we do not have beliefs in the models) agent.addModel(get_fake_model_name(agent), parent=get_true_model_name(agent)) agent.setAttribute('static', True, get_fake_model_name(agent)) # add actions for ac in range(args.actions): action = agent.addAction({'verb': '', 'action': 'Action{}'.format(ac)}) i = ac while i + args.features_action < args.features_agent: weights = {} for j in range(args.features_action): weights[features[i + j + 1]] = 1 tree = makeTree(multi_set_matrix(features[i], weights)) world.setDynamics(features[i], action, tree) i += args.features_action # define order world.setOrder([set(ag.name for ag in agents)]) for agent in agents: # test belief update: # - set a belief in one feature to the actual initial value (should not change outcomes) # world.setModel(agent.name, Distribution({True: 1.0})) rand_feat = np.random.choice(agent_features[agent]) agent.setBelief(rand_feat, world.getValue(rand_feat)) print('{} will always observe {}={}'.format(agent.name, rand_feat, world.getValue(rand_feat))) # set mental model of each agent in all other agents for i in range(args.agents): for j in range(i + 1, args.agents): world.setMentalModel(agents[i].name, agents[j].name, Distribution({get_fake_model_name(agents[j]): 1})) world.setMentalModel(agents[j].name, agents[i].name, Distribution({get_fake_model_name(agents[i]): 1})) return world
action = agent.addAction({'verb': 'move', 'action': 'left'}) tree = makeTree(incrementMatrix(pos, -1)) world.setDynamics(pos, action, tree) action = agent.addAction({'verb': 'move', 'action': 'right'}) tree = makeTree(incrementMatrix(pos, 1)) world.setDynamics(pos, action, tree) # define rewards (maximize position, i.e., always go right) agent.setReward(maximizeFeature(pos, agent.name), 1) # set order world.setOrder([agent.name]) # agent has initial beliefs about its position, which will be updated after executing actions agent.omega = {actionKey(agent.name)} # todo should not need this agent.setBelief(pos, Distribution({10: 0.5, 12: 0.5})) # agent.setBelief(pos, 10, get_true_model_name(agent)) print('====================================') print('Initial beliefs:') world.printBeliefs(agent.name) for i in range(MAX_STEPS): print('====================================') print('Current pos: {0}'.format(world.getValue(pos))) # decision: left, right or no-move? step = world.step() # prints all models and beliefs print('____________________________________')