def test_flatten_space_boxes(space): flat_space = utils.flatten_space(space) assert isinstance(flat_space, Box), f"Expected {type(flat_space)} to equal {Box}" flatdim = utils.flatdim(space) (single_dim, ) = flat_space.shape assert single_dim == flatdim, f"Expected {single_dim} to equal {flatdim}"
def test_flat_space_contains_flat_points(space): some_samples = [space.sample() for _ in range(10)] flattened_samples = [utils.flatten(space, sample) for sample in some_samples] flat_space = utils.flatten_space(space) for i, flat_sample in enumerate(flattened_samples): assert ( flat_sample in flat_space ), f"Expected sample #{i} {flat_sample} to be in {flat_space}"
def test_flatten_space_boxes(space): flat_space = utils.flatten_space(space) assert isinstance(flat_space, Box), "Expected {} to equal {}".format( type(flat_space), Box) flatdim = utils.flatdim(space) (single_dim, ) = flat_space.shape assert single_dim == flatdim, "Expected {} to equal {}".format( single_dim, flatdim)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) self.obs_sizes = _get_size(obs_space.spaces[0]) self.n_players = len(obs_space.spaces) self.n_actions = action_space.spaces[0].n os = [] ms = [] intermediate_space = Box(low=0, high=2, shape=(8, ), dtype=np.float32) for pl in range(self.n_players): os.append(flatten_space(obs_space.spaces[pl].spaces['obs'])) mid_space = flatten_space( Dict({ 'obs': intermediate_space, 'signal': obs_space.spaces[pl]['signal'] })) ms.append(mid_space) assert self.n_players <= 2, "Not yet supported for more than 2 players" # TODO: make it support n_players > 2 embed_config = { "fcnet_hiddens": [128, 128], "fcnet_activation": 'relu', "max_seq_len": 20 } self.embed_pl_models = { pl: FullyConnectedNetwork(os[pl], intermediate_space, 8, embed_config, "{}_embeding".format(pl)) for pl in range(self.n_players) } self.pl_models = { pl: FullyConnectedNetwork(ms[pl], action_space.spaces[pl], action_space.spaces[pl].n, model_config, name) for pl in range(self.n_players) } # Set models as attributes to obtain parameters for pl in range(self.n_players): setattr(self, "embed_model_{}".format(pl), self.embed_pl_models[pl]) setattr(self, "model_{}".format(pl), self.pl_models[pl])
def test_flat_space_contains_flat_points(space): some_samples = [space.sample() for _ in range(10)] flattened_samples = [ utils.flatten(space, sample) for sample in some_samples ] flat_space = utils.flatten_space(space) for i, flat_sample in enumerate(flattened_samples): assert flat_sample in flat_space,\ 'Expected sample #{} {} to be in {}'.format(i, flat_sample, flat_space)
def __init__(self, config, width, height, seeker, hiding, walls): self.default_cfg = config self.map_path = config['game']['map'] self.fps = config['game']['fps'] self.clock = pygame.time.Clock() self.screen = None self.dt = self.clock.tick_busy_loop(self.fps) self.cfg = config['game'] self.duration = config['game']['duration'] self.width = width self.height = height self.walls_group = pygame.sprite.Group() self.env_walls = walls self.walls_group.add(walls) self.player_seek = seeker self.player_hide = hiding self.players_group = pygame.sprite.Group() self.players_group.add(self.player_seek) self.players_group.add(self.player_hide) self.screen_lite = pygame.Surface((self.width, self.height)) if self.walls_group: for wall in walls: wall_p = [(p.x, p.y) for p in wall.get_abs_vertices()] _ = [pygame.draw.polygon( self.screen_lite, (255, 255, 255), wall_p)] self.p_hide_cfg = config['hiding'] self.p_seek_cfg = config['seeker'] self.agent_env = {} self.action_space = spaces.Discrete(6) # for both agents ''' 0 - NOOP 1 - FORWARD MOVEMENT 2 - BACKWARD MOVEMENT 3 - ROTATE RIGHT (clockwise) 4 - ROTATE LEFT (counter-clockwise) 5 - SPECIAL (ADD/DELETE WALL) ''' self.observation_space_n = [ spaces.Box(low=0, high=1, shape=(self.width, self.height)), spaces.Box(low=0, high=1, shape=(self.width, self.height)), ] self.flatten_observation_space_n = [flatten_space( space) for space in self.observation_space_n]
def test_dtypes(original_space, expected_flattened_dtype): flattened_space = utils.flatten_space(original_space) original_sample = original_space.sample() flattened_sample = utils.flatten(original_space, original_sample) unflattened_sample = utils.unflatten(original_space, flattened_sample) assert flattened_space.contains( flattened_sample ), "Expected flattened_space to contain flattened_sample" assert flattened_space.dtype == expected_flattened_dtype, "Expected flattened_space's dtype to equal " \ "{}".format(expected_flattened_dtype) assert flattened_sample.dtype == flattened_space.dtype, "Expected flattened_space's dtype to equal " \ "flattened_sample's dtype " compare_sample_types(original_space, original_sample, unflattened_sample)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) self.obs_sizes = _get_size(obs_space.spaces[0]) self.n_players = len(obs_space.spaces) self.n_actions = action_space.spaces[0].n os = [] for pl in range(self.n_players): os.append(flatten_space(obs_space.spaces[pl])) self.pl_models = { pl: FullyConnectedNetwork(os[pl], action_space.spaces[pl], action_space.spaces[pl].n, model_config, name) for pl in range(self.n_players) } # Set models as attributes to obtain parameters for pl in range(self.n_players): setattr(self, "model_{}".format(pl), self.pl_models[pl])
def test_flatten_space(space, expected_flattened_space): flattened_space = utils.flatten_space(space) assert flattened_space == expected_flattened_space
def configure(self, setting: ContinualRLSetting): super().configure(setting) # The default value for the buffer size in the DQN model is WAY too # large, so we re-size it depending on the size of the observations. flattened_observation_space = flatten_space(setting.observation_space) observation_size_bytes = flattened_observation_space.sample().nbytes # IF there are more than a few dimensions per observation, then we # should probably reduce the size of the replay buffer according to # the size of the observations. max_buffer_size_bytes = self.max_buffer_size_megabytes * 1024 * 1024 max_buffer_length = max_buffer_size_bytes // observation_size_bytes if max_buffer_length == 0: raise RuntimeError( f"Couldn't even fit a single observation in the buffer, " f"given the specified max_buffer_size_megabytes " f"({self.max_buffer_size_megabytes}) and the size of a " f"single observation ({observation_size_bytes} bytes)!") if self.hparams.buffer_size > max_buffer_length: calculated_size_bytes = observation_size_bytes * self.hparams.buffer_size calculated_size_gb = calculated_size_bytes / 1024**3 warnings.warn( RuntimeWarning( f"The selected buffer size ({self.hparams.buffer_size} is " f"too large! (It would take roughly around " f"{calculated_size_gb:.3f}Gb to hold many observations alone! " f"The buffer size will be capped at {max_buffer_length} " f"entries.")) self.hparams.buffer_size = int(max_buffer_length) # Don't use up too many of the observations from the task to fill up the buffer. # Truth is, we should probably get this to work first. # NOTE: Need to change some attributes depending on the maximal number of steps # in the environment allowed in the given Setting. if setting.max_steps: logger.info( f"Total training steps are limited to {setting.steps_per_task} steps " f"per task, {setting.max_steps} steps in total.") ten_percent_of_step_budget = setting.steps_per_task // 10 if self.hparams.buffer_size > ten_percent_of_step_budget: warnings.warn( RuntimeWarning( "Reducing max buffer size to ten percent of the step budget." )) self.hparams.buffer_size = ten_percent_of_step_budget if self.hparams.learning_starts > ten_percent_of_step_budget: logger.info( f"The model was originally going to use the first " f"{self.hparams.learning_starts} steps for pure random " f"exploration, but the setting has a max number of steps set to " f"{setting.max_steps}, therefore we will limit the number of " f"exploration steps to 10% of that 'step budget' = " f"{ten_percent_of_step_budget} steps.") self.hparams.learning_starts = ten_percent_of_step_budget if self.hparams.target_update_interval > ten_percent_of_step_budget: # Same for the 'update target network' interval. self.hparams.target_update_interval = ten_percent_of_step_budget // 2 logger.info( f"Reducing the target network update interval to " f"{self.hparams.target_update_interval}, because of the limit on " f"training steps imposed by the Setting.") logger.info( f"Will use a Replay buffer of size {self.hparams.buffer_size}.")
def configure(self, setting: ContinualRLSetting): super().configure(setting) # The default value for the buffer size in the DQN model is WAY too # large, so we re-size it depending on the size of the observations. # NOTE: (issue #156) Only consider the images, not the task labels for these # buffer size calculations (since the task labels might be None and have the # np.object dtype). x_space = setting.observation_space.x flattened_observation_space = flatten_space(x_space) observation_size_bytes = flattened_observation_space.sample().nbytes # IF there are more than a few dimensions per observation, then we # should probably reduce the size of the replay buffer according to # the size of the observations. max_buffer_size_bytes = self.max_buffer_size_megabytes * 1024 * 1024 max_buffer_length = max_buffer_size_bytes // observation_size_bytes if max_buffer_length == 0: raise RuntimeError( f"Couldn't even fit a single observation in the buffer, " f"given the specified max_buffer_size_megabytes " f"({self.max_buffer_size_megabytes}) and the size of a " f"single observation ({observation_size_bytes} bytes)!" ) if self.hparams.buffer_size > max_buffer_length: calculated_size_bytes = observation_size_bytes * self.hparams.buffer_size calculated_size_gb = calculated_size_bytes / 1024 ** 3 warnings.warn( RuntimeWarning( f"The selected buffer size ({self.hparams.buffer_size} is " f"too large! (It would take roughly around " f"{calculated_size_gb:.3f}Gb to hold many observations alone! " f"The buffer size will be capped at {max_buffer_length} " f"entries." ) ) self.hparams.buffer_size = int(max_buffer_length) # NOTE: Need to change some attributes depending on the maximal number of steps # in the environment allowed in the given Setting. if setting.max_steps: logger.info( f"Total training steps are limited to {setting.steps_per_task} steps " f"per task, {setting.max_steps} steps in total." ) ten_percent_of_step_budget = setting.steps_per_phase // 10 if self.hparams.buffer_size > ten_percent_of_step_budget: warnings.warn( RuntimeWarning( "Reducing max buffer size to ten percent of the step budget." ) ) self.hparams.buffer_size = ten_percent_of_step_budget if self.hparams.learning_starts > ten_percent_of_step_budget: logger.info( f"The model was originally going to use the first " f"{self.hparams.learning_starts} steps for pure random " f"exploration, but the setting has a max number of steps set to " f"{setting.max_steps}, therefore we will limit the number of " f"exploration steps to 10% of that 'step budget' = " f"{ten_percent_of_step_budget} steps." ) self.hparams.learning_starts = ten_percent_of_step_budget if self.hparams.train_freq != -1: # Update the model at least 2 times during each task, and at most # once per step. self.hparams.train_freq = min( self.hparams.train_freq, int(0.5 * ten_percent_of_step_budget), ) self.hparams.train_freq = max(self.hparams.train_freq, 1) logger.info(f"Training frequency: {self.hparams.train_freq}") logger.info(f"Will use a Replay buffer of size {self.hparams.buffer_size}.") if setting.steps_per_phase: if not isinstance(self.hparams.train_freq, int): if self.hparams.train_freq[1] == "step": self.hparams.train_freq = self.hparams.train_freq[0] else: assert self.hparams.train_freq[1] == "episode" # Use some value based of the maximum episode length if available, # else use a "reasonable" default value. # TODO: Double-check that this makes sense. if setting.max_episode_steps: self.hparams.train_freq = setting.max_episode_steps else: self.hparams.train_freq = 10 warnings.warn( RuntimeWarning( f"Need the training frequency units to be steps for now! " f"(Train freq has been changed to every " f"{self.hparams.train_freq} steps)." ) ) # NOTE: We limit the number of training steps per task, such that we never # attempt to fill the buffer using more samples than the environment allows. if self.hparams.train_freq > setting.steps_per_phase: self.hparams.n_steps = math.ceil(0.1 * setting.steps_per_phase) logger.info( f"Capping the n_steps to 10% of step budget length: " f"{self.hparams.n_steps}" ) self.train_steps_per_task = min( self.train_steps_per_task, setting.steps_per_phase - self.hparams.train_freq - 1, ) logger.info( f"Limitting training steps per task to {self.train_steps_per_task}" )