Exemplo n.º 1
0
    def get_value(self, state):
        # collects feature value distribution
        values = []
        probs = []
        for loc_kv, loc_p in state.distributions[state.keyMap[self.location_feat]].items():
            # gets current location
            loc = self.world.float2value(self.location_feat, loc_kv[self.location_feat])
            loc_freq_feat = get_num_visits_location_key(self.agent, loc)

            for loc_freq_kv, loc_freq_p in state.distributions[state.keyMap[loc_freq_feat]].items():
                # gets visitation frequency at current location
                freq = loc_freq_kv[loc_freq_feat]

                if self.inverse:
                    for time_kv, time_p in state.distributions[state.keyMap[self.time_feat]].items():
                        # gets current time
                        time = time_kv[self.time_feat]
                        values.append(time - freq)
                        probs.append(loc_p * loc_freq_p * time_p)
                else:
                    values.append(freq)
                    probs.append(loc_p * loc_freq_p)

        # returns weighted average
        return np.array(values).dot(np.array(probs)) * self.normalize_factor
Exemplo n.º 2
0
    def set_reward(self, agent, weight, model=None):
        rwd_feat = rewardKey(agent.name)

        # compares agent's current location
        rwd_tree = {'if': equalRow(self.location_feat, self.all_locations),
                    None: noChangeMatrix(rwd_feat)}

        # get visitation count according to location
        for i, loc in enumerate(self.all_locations):
            loc_freq_feat = get_num_visits_location_key(agent, loc)
            rwd_tree[i] = dynamicsMatrix(rwd_feat, {self.time_feat: 1., loc_freq_feat: -1.}) \
                if self.inverse else setToFeatureMatrix(rwd_feat, loc_freq_feat)

        agent.setReward(makeTree(rwd_tree), weight * self.normalize_factor, model)
Exemplo n.º 3
0
    def set_reward(self, agent, weight, model=None):
        rwd_feat = rewardKey(agent.name)

        # compares agent's current location
        rwd_tree = {'if': equalRow(self.location_feat, self.all_locations),
                    None: noChangeMatrix(rwd_feat)}

        # get binary value according to visitation of location
        for i, loc in enumerate(self.all_locations):
            loc_freq_feat = get_num_visits_location_key(agent, loc)
            rwd_tree[i] = {'if': thresholdRow(loc_freq_feat, 1),
                           True: setToConstantMatrix(rwd_feat, 1),
                           False: setToConstantMatrix(rwd_feat, 0)}

        agent.setReward(makeTree(rwd_tree), weight * self.normalize_factor, model)
Exemplo n.º 4
0
def get_locations_frequencies(trajectories, agents, locations):
    """
    Gets the visitation frequencies of the agent for each location according to the provided trajectories.
    :param list[list[(World, Distribution)]] trajectories: the set of trajectories, containing sequences of state-action pairs.
    :param Agent or list[Agent] agents: a list with the agent for each trajectory set whose location frequencies we want to retrieve.
    :param list[str] locations: the list of possible world locations.
    :rtype: dict[str,float]
    :return: the visitation frequencies for each location.
    """
    if isinstance(agents, Agent):
        agents = [agents] * len(trajectories)
    assert len(trajectories) == len(
        agents), 'One agent per set of trajectories has to be provided'

    data = np.zeros(len(locations))
    for i in range(len(trajectories)):
        world = trajectories[i][-1][0]
        traj_data = []
        for loc in locations:
            loc_freq_feat = get_num_visits_location_key(agents[i], loc)
            traj_data.append(world.getFeature(loc_freq_feat).expectation())
        data += traj_data
    return dict(zip(locations, data))