Exemplo n.º 1
0
    def __init__(self,
                 *,
                 num_agents,
                 num_hazards,
                 identical_rewards,
                 observation_type,
                 hazard_risk=0.5,
                 collision_risk=0.0):
        '''
        Args:
         - num_agents [int] number of agents in scenario
         - num_hazards [int] number of hazards landmarks in the scenario
         - identical_rewards [bool] true if all agents receieve exact same reward, false if rewards are "local" to agents
         - observation_type [str] "direct" if observation directly of each entity, "histogram" if bin entities in spacial grid
         - hazard_risk [float] max probability of failure caused by hazard landmark
         - collision_risk [float] probability of failure caused by collision
        '''

        # check inputs
        assert isinstance(num_agents, int)
        assert isinstance(num_hazards, int)
        assert (num_hazards == 0 or num_hazards == 1)
        assert isinstance(identical_rewards, bool)
        assert (observation_type == "direct"
                or observation_type == "histogram")
        assert (hazard_risk >= 0.0 and hazard_risk <= 1.0)
        assert (collision_risk >= 0.0 and collision_risk <= 1.0)

        # set member vars
        self.num_agents = num_agents
        self.num_hazards = num_hazards
        self.identical_rewards = identical_rewards
        self.observation_type = observation_type
        self.hazard_risk = hazard_risk
        self.collision_risk = collision_risk

        # create list of landmarks
        # Note: RadialReward function is not directly used for calculating reward in this scenario, thus peak value of 0.0.
        #   non-zero radius used for numerical reasons
        landmarks = []
        for i in range(self.num_agents):
            landmarks.append(
                RiskRewardLandmark(risk_fn=None,
                                   reward_fn=RadialReward(1.0, 0.0)))
        for i in range(self.num_hazards):
            landmarks.append(
                RiskRewardLandmark(risk_fn=RadialRisk(_LANDMARK_SIZE, 0.5),
                                   reward_fn=RadialReward(1.0, 0.0)))
        self.scenario_landmarks = landmarks
        self.n_landmarks = len(self.scenario_landmarks)
Exemplo n.º 2
0
    def make_world(self):
        world = HazardousWorld()

        # set scenario-specific world parameters
        world.collaborative = True
        world.systemic_rewards = True
        world.identical_rewards = False
        world.dim_c = 0 # observation-based communication
        world.max_communication_distance = _MAX_COMMUNICATION_DISTANCE
        world.distance_resistance_gain = _DISTANCE_RESISTANCE_GAIN
        world.distance_resistance_exponent = _DISTANCE_RESISTANCE_EXPONENT

        # add landmarks
        world.origin_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 10.0))
        world.destination_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 10.0))
        world.landmarks = [world.origin_terminal_landmark, world.destination_terminal_landmark]
        for lm in _NON_TERMINAL_LANDMARKS:
            world.landmarks.append(lm)

        for i, landmark in enumerate(world.landmarks):
            landmark.name = 'landmark_%d' % i
            landmark.collide = False
            landmark.movable = False
            landmark.size = _LANDMARK_SIZE
            # properties for landmarks
            if isinstance(landmark, RiskRewardLandmark) and landmark.is_hazard:
                #TODO: make colors heatmap of risk probability over all bounds
                landmark.color = np.array([landmark.risk_fn.get_failure_probability(0,0) + .1, 0, 0])
            else:
                landmark.color = np.array([0.25, 0.25, 0.25])

        # make initial conditions
        self.reset_world(world)
        return world
    def make_world(self):
        world = HazardousWorld()

        # set scenario-specific world parameters
        world.collaborative = True
        world.systemic_rewards = True
        world.identical_rewards = False
        world.dim_c = 0  # observation-based communication
        world.connection_reward = _CONNECTION_REWARD
        world.termination_reward = _TERMINATION_REWARD
        world.render_connections = True

        # add landmarks
        world.origin_terminal_landmark = RiskRewardLandmark(
            risk_fn=None, reward_fn=RadialReward(1.0, 10.0))
        world.destination_terminal_landmark = RiskRewardLandmark(
            risk_fn=None, reward_fn=RadialReward(1.0, 10.0))
        world.hazard_landmark = _NON_TERMINAL_LANDMARKS[0]
        world.hazard_landmark.ignore_connection_rendering = True
        world.landmarks = [
            world.origin_terminal_landmark,
            world.destination_terminal_landmark, world.hazard_landmark
        ]

        for i, landmark in enumerate(world.landmarks):
            landmark.name = 'landmark_%d' % i
            landmark.collide = False
            landmark.movable = False
            landmark.size = _LANDMARK_SIZE
            # properties for landmarks
            if isinstance(landmark, RiskRewardLandmark) and landmark.is_hazard:
                #TODO: make colors heatmap of risk probability over all bounds
                landmark.color = np.array([
                    landmark.risk_fn.get_failure_probability(0, 0) + .1, 0, 0
                ])
            else:
                landmark.color = np.array([0.25, 0.25, 0.25])

        # make initial conditions
        self.reset_world(world)
        return world
Exemplo n.º 4
0
    def make_world(self):
        world = HazardousWorld()

        # set scenario-specific world parameters
        world.collaborative = True
        world.systemic_rewards = True
        world.identical_rewards = False
        world.dim_c = 0  # observation-based communication
        world.max_communication_distance = _MAX_COMMUNICATION_DISTANCE
        world.distance_resistance_gain = _DISTANCE_RESISTANCE_GAIN

        # create and add terminal landmarks
        # no intermediate landmarks in this scenario
        world.origin_terminal_landmark = RiskRewardLandmark(
            risk_fn=None, reward_fn=RadialReward(1.0, 10.0))
        world.origin_terminal_landmark.name = 'origin'
        world.origin_terminal_landmark.state.p_pos = np.array([-0.75, -0.75])
        world.destination_terminal_landmark = RiskRewardLandmark(
            risk_fn=None, reward_fn=RadialReward(1.0, 10.0))
        world.destination_terminal_landmark.name = 'destination'
        world.destination_terminal_landmark.state.p_pos = np.array(
            [0.75, 0.75])

        # create landmark list and set properties
        world.landmarks = [
            world.origin_terminal_landmark, world.destination_terminal_landmark
        ]
        for i, landmark in enumerate(world.landmarks):
            landmark.p_vel = np.zeros(world.dim_p)
            landmark.collide = False
            landmark.movable = False
            landmark.size = _LANDMARK_SIZE
            landmark.color = np.array(
                [landmark.risk_fn.get_failure_probability(0, 0) + .1, 0, 0])

        # make initial conditions
        self.reset_world(world)
        return world
Exemplo n.º 5
0
from particle_environments.common import RadialPolynomialRewardFunction2D as RadialReward
from particle_environments.common import RadialBernoulliRiskFunction2D as RadialRisk
from rl_algorithms.scenariolearning import ScenarioHeuristicAgentTrainer

# Scenario Parameters
_MAX_COMMUNICATION_DISTANCE = np.inf
_AGENT_SIZE = 0.15
_LANDMARK_SIZE = 0.05
_AGENT_OBSERVATION_LEN = 5
_LANDMARK_OBSERVATION_LEN = 3
_NUM_AGENTS = 3

_LANDMARKS = []
_LANDMARKS.append(
    RiskRewardLandmark(risk_fn=RadialRisk(_LANDMARK_SIZE, 0.5),
                       reward_fn=RadialReward(1.0, 0.0)))
_LANDMARKS.append(
    RiskRewardLandmark(risk_fn=None, reward_fn=RadialReward(1.0, 0.0)))
_LANDMARKS.append(
    RiskRewardLandmark(risk_fn=None, reward_fn=RadialReward(1.0, 0.0)))
_LANDMARKS.append(
    RiskRewardLandmark(risk_fn=None, reward_fn=RadialReward(1.0, 0.0)))
_N_LANDMARKS = len(_LANDMARKS)


class Scenario(BaseScenario):
    # static class
    num_agents = _NUM_AGENTS

    def make_world(self):
        world = HazardousWorld(collision_termination_probability=0.0)
_MAX_CONNECTION_DISTANCE = 0.35
_MAX_OBSERVATION_DISTANCE = 1.0
_CONNECTION_REWARD = 1.0
_TERMINATION_REWARD = -0.0
_AGENT_SIZE = 0.01
_LANDMARK_SIZE = 0.025
_N_RADIAL_BINS = 4
_N_ANGULAR_BINS = 8
_N_OBSERVED_TERMINATIONS = 5
_N_TERMINALS = 2
_ZERO_THRESHOLD = 1e-6

_NON_TERMINAL_LANDMARKS = []
_NON_TERMINAL_LANDMARKS.append(
    RiskRewardLandmark(risk_fn=RadialRisk(0.1),
                       reward_fn=RadialReward(0.1, 10.0)))


class Scenario(BaseScenario):
    # static class
    num_agents = 20

    def make_world(self):
        world = HazardousWorld()

        # set scenario-specific world parameters
        world.collaborative = True
        world.systemic_rewards = True
        world.identical_rewards = False
        world.dim_c = 0  # observation-based communication
        world.connection_reward = _CONNECTION_REWARD
Exemplo n.º 7
0
from particle_environments.mager.world import TemporarilyObservableRiskRewardLandmark as TORRLandmark
from particle_environments.mager.observation import format_observation
from particle_environments.common import is_collision, distance, delta_pos
from particle_environments.common import RadialPolynomialRewardFunction2D as RadialReward
from particle_environments.common import RadialBernoulliRiskFunction2D as RadialRisk
from particle_environments.common import DefaultParameters as DP

# Scenario Parameters
_MAX_COMMUNICATION_DISTANCE = 0.5
_AGENT_SIZE = 0.01
_LANDMARK_SIZE = 0.025

_LANDMARKS = []
_LANDMARKS.append(
    TORRLandmark(risk_fn=RadialRisk(0.1),
                 reward_fn=RadialReward(0.15, 10.0),
                 observe_duration=1.0))
_LANDMARKS.append(
    TORRLandmark(risk_fn=RadialRisk(0.1),
                 reward_fn=RadialReward(0.15, 10.0),
                 observe_duration=1.0))
_LANDMARKS.append(
    TORRLandmark(risk_fn=RadialRisk(0.1),
                 reward_fn=RadialReward(0.15, 10.0),
                 observe_duration=1.0))


class Scenario(BaseScenario):
    # static class
    num_agents = 10