def make_robotics_env(env_id, seed, rank=0): """ Create a wrapped, monitored gym.Env for MuJoCo. """ set_global_seeds(seed) env = gym.make(env_id) env = FlattenDictWrapper(env, ['observation', 'desired_goal']) env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), info_keywords=('is_success',)) env.seed(seed) return env
def train(): logger.configure() set_global_seeds(args.seed) directory = os.path.join( args.log_dir, '_'.join([args.env, datetime.datetime.now().strftime("%m%d%H%M")])) if not os.path.exists(directory): os.makedirs(directory) else: ValueError("The directory already exists...", directory) json.dump(vars(args), open(os.path.join(directory, 'learning_prop.json'), 'w')) env = make_atari(args.env) env = bench.Monitor(env, logger.get_dir()) env = deepq.wrap_atari_dqn(env) nb_test_steps = args.nb_test_steps if args.nb_test_steps > 0 else None if args.record == 1: env = Monitor(env, directory=args.log_dir) with tf.device(args.device): model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=bool(args.dueling), ) act, records = deepq.learn( env, q_func=model, lr=args.learning_rate, lr_decay_factor=args.learning_rate_decay_factor, lr_growth_factor=args.learning_rate_growth_factor, max_timesteps=args.nb_train_steps, buffer_size=args.buffer_size, exploration_fraction=args.eps_fraction, exploration_final_eps=args.eps_min, train_freq=4, print_freq=1000, checkpoint_freq=int(args.nb_train_steps / 10), learning_starts=args.nb_warmup_steps, target_network_update_freq=args.target_update_freq, gamma=0.99, prioritized_replay=bool(args.prioritized), prioritized_replay_alpha=args.prioritized_replay_alpha, epoch_steps=args.nb_epoch_steps, gpu_memory=args.gpu_memory, double_q=args.double_q, save_dir=directory, nb_test_steps=nb_test_steps, scope=args.scope, test_eps=args.test_eps, ) print("Saving model to model.pkl") act.save(os.path.join(directory, "model.pkl")) env.close() plot(records, directory)
def make_mujoco_env(env_id, seed): """ Create a wrapped, monitored gym.Env for MuJoCo. """ rank = MPI.COMM_WORLD.Get_rank() set_global_seeds(seed + 10000 * rank) env = gym.make(env_id) env = Monitor(env, os.path.join(logger.get_dir(), str(rank))) env.seed(seed) return env
def make(env_name, type, render=False, record=False, directory='', **kwargs): """ env_name : str name of an environment. (e.g. 'Cartpole-v0') type : str type of an environment. One of ['atari', 'classic_control', 'classic_mdp','target_tracking'] """ if type == 'atari': from baselines0.common.atari_wrappers import make_atari from baselines0.common.atari_wrappers import wrap_deepmind from baselines0 import bench, logger env = make_atari(env_name) env = bench.Monitor(env, logger.get_dir()) env = wrap_deepmind(env, frame_stack=True, scale=True) if record: env = Monitor(env, directory=directory) elif type == 'classic_control': env = gym.make(env_name) if record: env = Monitor(env, directory=directory) elif type == 'classic_mdp': from envs import classic_mdp env = classic_mdp.model_assign(env_name) elif type == 'target_tracking': import ttenv env = ttenv.make(env_name, render=render, record=record, directory=directory, **kwargs) elif type == 'ma_target_tracking': import maTTenv env = maTTenv.make(env_name, render=render, record=record, directory=directory, **kwargs) else: raise ValueError('Designate the right type of the environment.') return env
def save(self, path=None): """Save model to a pickle located at `path`""" if path is None: path = os.path.join(logger.get_dir(), "model.pkl") with tempfile.TemporaryDirectory() as td: save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: cloudpickle.dump((model_data, self._act_params), f)
def _thunk(): env = make_atari(env_id) env.seed(seed + rank) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) return wrap_deepmind(env, **wrapper_kwargs)