def __init__(self, config, section): super().__init__(config=config, section=section) self.max_episode_steps = config.getint(section, 'max_episode_steps') robot_world_section = config.get(section, 'robot_world') self.robot_world = make(config, robot_world_section) state_sampler_section = config.get(section, 'state_sampler') self.state_sampler = make(config, state_sampler_section) reward_func_section = config.get(section, 'reward_func') self.reward_func = make(config, reward_func_section) obs_func_section = config.get(section, 'observation_func') self.get_obs = make(config, obs_func_section) goal_sampler_section = config.get(section, 'goal_sampler') self.goal_sampler = make(config, goal_sampler_section) env_obs = self.reset() observation_spaces = OrderedDict() for key in env_obs.keys(): observation_spaces[key] = gym.spaces.Box(-np.inf, np.inf, shape=env_obs[key].shape, dtype='float32') self.observation_space = gym.spaces.Dict(observation_spaces) self.action_space_bounds = [-1.0, 1.0] if config.has_option(section, 'action_space_bounds'): self.action_space_bounds = [float(x) for x in config.get(section,'action_space_bounds')] self.action_space = gym.spaces.Box(low=self.action_space_bounds[0], high=self.action_space_bounds[1], shape=(self.robot_world.get_action_dim(),)) self.steps = 0 self.render_gui = None if config.has_option(section, 'render_gui'): render_gui_section = config.get(section, 'render_gui') self.render_gui = make(config, render_gui_section) self.np_random = np.random
def __init__(self, config, section): # Setup self.experiment_dir = None self.config = config self.section = section self.env_maker = make(config, config.get(section, 'env_maker')) self.seed = config.getint(section, 'seed') self.snapshot_mode = config.get(section, 'snapshot_mode', fallback='last') # SAC hyper parameters self.policy_hidden_sizes = eval( config.get(section, 'policy_hidden_sizes', fallback='[256, 256]')) self.qf_hidden_sizes = eval( config.get(section, 'qf_hidden_sizes', fallback='[256, 256]')) self.buffer_capacity_in_transitions = int( config.getfloat(section, 'buffer_capacity_in_transitions', fallback=1e6)) self.gradient_steps_per_itr = config.getint( section, 'gradient_steps_per_iteration', fallback=1000) self.max_path_length = config.getint(section, 'max_path_length', fallback=1000) self.max_eval_path_length = config.getint(section, 'max_eval_path_length', fallback=1000) self.min_buffer_size = int( config.getfloat(section, 'min_buffer_size', fallback=1e4)) self.target_update_tau = config.getfloat(section, 'target_update_tau', fallback=5e-3) self.discount = config.getfloat(section, 'discount', fallback=0.99) self.buffer_batch_size = config.getint(section, 'buffer_batch_size', fallback=256) self.reward_scale = config.getfloat(section, 'reward_scale', fallback=1.) self.steps_per_epoch = config.getint(section, 'steps_per_epoch', fallback=1) self.batch_size = config.getint(section, 'batch_size', fallback=1000) self.n_epochs = config.getint(section, 'n_epochs', fallback=1000) self.parallel_sampling = config.getboolean(section, 'parallel_sampling', fallback=False) if self.parallel_sampling: self.n_workers = config.getint(section, 'n_workers')
def main(args): config_file = args.config_file config = ConfigParser() config.read(config_file) experiment_no = config.get('experiment', 'experiment_no') os.makedirs(os.environ['EXPERIMENTS_DIR'], exist_ok=True) experiment_dir = utils.get_experiment_dir(os.environ['EXPERIMENTS_DIR'], experiment_no, mkdir=True) config_path = utils.get_config_path(experiment_dir, experiment_no) config.save(config_path) algo = make(config, config.get('experiment', 'algo')) algo.set_experiment_dir(experiment_dir) algo.train()
def __init__(self, config, section): self._learn = ppo2.learn self.experiment_dir = None self.config = config self.section = section # ppo parameters params = self._get_parameter_descr_dict() params = config.get_section(section, params) # build and set network function to be passed to learn network_section = params['network'] params['network'] = get_network(config, network_section) self.params = params # env env_maker_section = config.get(section, 'env_maker') self.env_maker = make(config, env_maker_section) vec_env_maker_section = config.get(section, 'vec_env_maker') self.vec_env_maker = VecEnvMaker(config, vec_env_maker_section) self.seed = config.getint(section, 'seed')
def get_network(config, section): """ Returns callable class for building network. The section in config can either have 'type' or 'entrypoint' as option The type can be set to mlp, lstm, etc,. The entrypoint option specifies the entrypoint to class definition that inherits NetworkFn. This however is not a strict requirement as long as the object returns a network when called. Refer baselines.common.models for the complete list of values type can take and the requirements for using custom network function. Example: [my_network] type = mlp num_layers = 2 num_hidden = 128 or [my_network] entrypoint: roam_rl.openai_baselines.models:MLP num_layers = 2 num_hidden = 128 """ assert (config.has_option(section, 'type') and config.has_option(section, 'entrypoint')) is False,\ "cannot specify both type and entrypoint" _mapping = {'mlp': MLP, 'lstm': LSTM, 'mlp_lstm_mlp': MLP_LSTM_MLP} if config.has_option(section, 'type'): _type = config.get(section, 'type') return _mapping[_type](config, section) elif config.has_option(section, 'entrypoint'): return make(config, section) else: raise ValueError("network unknown")