def build_q_model_and_distribution_comp(policy, obs_space, action_space, config): # Keys of the observation space that must be used at train and test time policy.train_obs_keys = config["train_obs_keys"] policy.test_obs_keys = config["test_obs_keys"] # Check whether policy observation space is inside a Tuple space policy.requires_tupling = False if isinstance(action_space, Tuple) and len(action_space.spaces) == 1: policy.action_space = action_space.spaces[0] action_space = action_space.spaces[0] policy.requires_tupling = True if not isinstance(action_space, Discrete): raise UnsupportedSpaceException( "Action space {} is not supported for DQN.".format(action_space)) # Get real observation space if isinstance(obs_space, Box): assert hasattr(obs_space, "original_space"), "Invalid observation space" obs_space = obs_space.original_space if isinstance(obs_space, Tuple): obs_space = obs_space.spaces[0] assert isinstance(obs_space, Dict), "Invalid observation space" policy.has_action_mask = "action_mask" in obs_space.spaces assert all([k in obs_space.spaces for k in policy.train_obs_keys ]), "Invalid train keys specification" assert all([k in obs_space.spaces for k in policy.test_obs_keys ]), "Invalid test keys specification" # Get observation space used for training if config["train_obs_space"] is None: train_obs_space = obs_space else: train_obs_space = config["train_obs_space"] if isinstance(train_obs_space, Box): assert hasattr(train_obs_space, "original_space"), "Invalid observation space" train_obs_space = train_obs_space.original_space if isinstance(train_obs_space, Tuple): train_obs_space = train_obs_space.spaces[0] # Obs spaces used for training and testing sp = Dict({k: obs_space.spaces[k] for k in policy.test_obs_keys}) policy.real_test_obs_space = flatten_space(sp) policy.real_test_obs_space.original_space = sp sp = Dict({k: train_obs_space.spaces[k] for k in policy.train_obs_keys}) policy.real_train_obs_space = flatten_space(sp) policy.real_train_obs_space.original_space = sp policy.n_actions = action_space.n model_space = Dict({ k: obs_space.spaces[k] for k in policy.test_obs_keys if k != "action_mask" and k != "signal" }) return build_q_models(policy, flatten_space(model_space), action_space, config), \ TorchCategorical
def __init__(self, env: Env, flatten_obs=True, flatten_actions=True): super(Flatten, self).__init__(env) self._flatten_obs = flatten_obs self._flatten_actions = flatten_actions if flatten_obs: self.observation_space = spaces.flatten_space( env.observation_space) if flatten_actions: self.action_space = spaces.flatten_space(env.action_space) self.action_space = Box(low=-1.0, high=1.0, shape=self.action_space.shape)
def test_flatten(self): # We flatten Discrete to 1 value assert su.flatdim(self.space) == 25 # gym flattens Discrete to one-hot assert gyms.flatdim(self.space) == 35 asample = su.torch_point(self.space, self.space.sample()) flattened = su.flatten(self.space, asample) unflattened = su.unflatten(self.space, flattened) assert self.same(asample, unflattened) # suppress `UserWarning: WARN: Box bound precision lowered by casting to float32` with warnings.catch_warnings(): warnings.simplefilter("ignore") flattened_space = su.flatten_space(self.space) assert flattened_space.shape == (25, ) # The maximum comes from Discrete(11) assert flattened_space.high.max() == 11.0 assert flattened_space.low.min() == -10.0 gym_flattened_space = gyms.flatten_space(self.space) assert gym_flattened_space.shape == (35, ) # The maximum comes from Box(-10, 10, (3, 4)) assert gym_flattened_space.high.max() == 10.0 assert gym_flattened_space.low.min() == -10.0
def __init__(self, env: gym.Env): """Flattens the observations of an environment. Args: env: The environment to apply the wrapper """ super().__init__(env) self.observation_space = spaces.flatten_space(env.observation_space)
def __init__(self, env, continuous=True): super().__init__(env) self.CONTINUOUS = continuous # Work arounds for continuous and discrete spaces if self.CONTINUOUS == False: self.action_space = MultiDiscrete( [space.n for _, space in env.action_space.spaces.items()]) self.labels = env.action_space.spaces.keys() else: self.action_space = flatten_space(self.action_space)
def __init__(self): self.__version__ = "0.1.0" self.viewer = None self.template = None self.canvas = None self.template_radial_map = None self.cur_state = {} self.state_history = [] self.action_history = [] self.painter = Painter() self.renderer = None self._prev_loss = 0 self._configure_environment() logger.info(f"PaintingEnv - Version {self.__version__}") self.cur_step = 0 # -- ACTION SPACE -- # # ------------------ # color_space = spaces.Box( np.array([-0.3, -0.3, -0.3]), np.array([0.3, 0.3, 0.3]) ) # (hue, saturation, value) motion_space = spaces.Box( np.array([-math.pi/4, 1, -3, 0]), np.array([math.pi/4, 10, 3, 1]) ) # (direction, distance, radius, pendown) # brush_space = spaces.Box(np.array([0]),np.array([1])) # (pen up, pen down) self.action_space = spaces.Dict( {"color": color_space, "motion": motion_space} ) self.action_space = spaces.flatten_space(self.action_space) # -- OBSERVATION SPACE -- # # ----------------------- # img_patch_space = spaces.Box(low=0, high=1, shape=OBS_FRAME_SHAPE) color_space = spaces.Box(np.array([0, 0, 0]), np.array([1, 1, 1]), dtype=np.float32) motion_space = spaces.Box(np.array([0, 0, 0]), np.array([2 * math.pi, 15, 1]), dtype=np.float32) # brush_space = spaces.Box(np.array([0]),np.array([1]), dtype=np.float32) self.observation_space = spaces.Dict( { "patch": img_patch_space, "color": color_space, "motion": motion_space, } ) # self.observation_space = spaces.flatten_space(self.observation_space) self.seed() self.reset()
def __init__(self, env): super(GoalEnvFlattenObservation, self).__init__(env) wrapped_observation_space = env.observation_space assert isinstance(wrapped_observation_space, spaces.Dict), ( "GoalEnvFlattenObservation is only usable with dict observations.") unwrapped_observation_space = wrapped_observation_space['observation'] self.observation_space = spaces.Dict([ ('observation', copy.deepcopy(spaces.flatten_space(unwrapped_observation_space))), ('achieved_goal', copy.deepcopy(wrapped_observation_space.spaces['achieved_goal'])), ('desired_goal', copy.deepcopy(wrapped_observation_space.spaces['desired_goal'])) ])
def __init__(self, env): super(FlattenObservation, self).__init__(env) self.observation_space = spaces.flatten_space(env.observation_space)
def make_model_and_action_dist(policy, obs_space, action_space, config): """create model neural network""" policy.device = (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")) policy.log_stats = config["log_stats"] # flag to log statistics if policy.log_stats: policy.stats_dict = {} policy.stats_fn = config["stats_fn"] # Keys of the observation space that must be used at train and test time ('signal' and 'mask' will be excluded # from the actual obs space) policy.train_obs_keys = config["train_obs_keys"] policy.test_obs_keys = config["test_obs_keys"] # Check whether policy observation space is inside a Tuple space policy.requires_tupling = False if isinstance(action_space, Tuple) and len(action_space.spaces) == 1: policy.action_space = action_space.spaces[0] action_space = action_space.spaces[0] policy.requires_tupling = True if not isinstance(action_space, Discrete): raise UnsupportedSpaceException( "Action space {} is not supported for DQN.".format(action_space)) # Get real observation space if isinstance(obs_space, Box): assert hasattr(obs_space, "original_space"), "Invalid observation space" obs_space = obs_space.original_space if isinstance(obs_space, Tuple): obs_space = obs_space.spaces[0] assert isinstance(obs_space, Dict), "Invalid observation space" policy.has_action_mask = "action_mask" in obs_space.spaces assert all([k in obs_space.spaces for k in policy.train_obs_keys]), "Invalid train keys specification" assert all([k in obs_space.spaces for k in policy.test_obs_keys]), "Invalid test keys specification" # Get observation space used for training if config["train_obs_space"] is None: train_obs_space = obs_space else: train_obs_space = config["train_obs_space"] if isinstance(train_obs_space, Box): assert hasattr(train_obs_space, "original_space"), "Invalid observation space" train_obs_space = train_obs_space.original_space if isinstance(train_obs_space, Tuple): train_obs_space = train_obs_space.spaces[0] # Obs spaces used for training and testing sp = Dict({ k: obs_space.spaces[k] for k in policy.test_obs_keys }) policy.real_test_obs_space = flatten_space(sp) policy.real_test_obs_space.original_space = sp model_space = Dict({ k: obs_space.spaces[k] for k in policy.test_obs_keys if k != "signal" and k != "action_mask" }) sp = Dict({ k: train_obs_space.spaces[k] for k in policy.train_obs_keys }) policy.real_train_obs_space = flatten_space(sp) policy.real_train_obs_space.original_space = sp policy.n_actions = action_space.n def update_target(): pass policy.update_target = update_target model = FullyConnectedNetwork(flatten_space(model_space), action_space, action_space.n, name="FcNet", model_config=config['model']).to(policy.device) return model, ModelCatalog.get_action_dist(action_space, config, framework='torch')
def __init__(self, obs_space, action_space, config): """ Only Dict observation spaces are allowed""" super().__init__(obs_space, action_space, config) # General configs self.framework = "torch" self.n_agents = len(obs_space.original_space.spaces) assert self.n_agents == 2, "At this moment only two-team agents are supported {} is not a valid number"\ .format(self.n_agents) # TODO (fede) increase the number of team agents to arbitrary self.n_actions = action_space.spaces[0].n self.device = (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")) self.beta = config["beta"] self.gamma = self.beta * config['factor_ent'] self.n_train_signals = config['n_train_signals'] # Flag that regulates whether to log statistics self.log_stats = config["log_stats"] self.eval_fn = config["stats_fn"] # Keys from the observation space that must be used at training and test time self.train_obs_keys = config["train_obs_keys"] self.test_obs_keys = config["test_obs_keys"] # Get and validate real observation space # (Assumed uniform observation and action spaces for the players) agent_obs_space = obs_space.original_space.spaces[0] assert isinstance(agent_obs_space, Dict), "Invalid observation space" assert "signal" in agent_obs_space.spaces, "Observation space must contain field 'signal'" + \ str(agent_obs_space.spaces) self.real_test_obs_space = flatten_space( Tuple([agent_obs_space] * self.n_agents)) self.real_test_obs_space.original_space = Tuple([agent_obs_space] * self.n_agents) self.test_obs_size = _get_size(self.real_test_obs_space) self.signal_size = _get_size(agent_obs_space.spaces["signal"]) if "action_mask" in agent_obs_space.spaces: mask_shape = tuple(agent_obs_space.spaces["action_mask"].shape) assert mask_shape == ( self.n_actions, ), "Invalid shape for action mask" # Get and validate train observation space # (Assumed uniform observation and action spaces for the players) if config["train_obs_space"] is None: train_obs_space = agent_obs_space else: train_obs_space = config["train_obs_space"] if isinstance(train_obs_space, Tuple): train_obs_space = train_obs_space.spaces[0] self.real_train_obs_space = flatten_space( Tuple([train_obs_space] * self.n_agents)) self.real_train_obs_space.original_space = Tuple([train_obs_space] * self.n_agents) agent_obs_space_signaled = Tuple([ Dict({ **{ k: agent_obs_space.spaces[k] for k in self.test_obs_keys if k != "signal" and k != "action_mask" }, **{ "signal": MultiDiscrete([2] * self.n_train_signals) } }) ] * self.n_agents) # training signaler self.signaler = Signaler() # Models self.model = ModelCatalog.get_model_v2( agent_obs_space_signaled, action_space, self.n_actions, config["model"], framework="torch", name="SignaledFCNet", default_model=MultiAgentFullyConnectedNetwork) self.signaler_model = ModelCatalog.get_model_v2( MultiDiscrete([2]), Discrete(self.n_train_signals), self.n_train_signals, config['sig_model'], framework="torch", name="SignalerNet", default_model=FullyConnectedNetwork) # exploration self.exploration = self._create_exploration() # Setup the optimizer and loss TODO (fede): add custom choice possibility for optimiser self.model_optimiser = config["model_optimiser"]["type"]( self.model.parameters(), lr=config["model_optimiser"]["lr"]) self.signaler_optimiser = config["sig_model_optimiser"]["type"]( self.signaler_model.parameters(), lr=config["sig_model_optimiser"]["lr"]) # lr/beta scheduling algorithm (experimental) # self._curr_ts = 0 # self._prev_lr_update = 0 # self._prev_beta_update = 0 self.classification_loss = nn.CrossEntropyLoss() def EntropyLoss(dist, reduce=True): S = nn.Softmax(dim=-1) LS = nn.LogSoftmax(dim=-1) b = S(dist) * LS(dist) b = torch.sum(b, 1) if reduce: b = torch.mean(b) return b self.entropy_loss = EntropyLoss