def main(args): arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) print(args) #args.env="MountainCarContinuous-v0" train_copos(args)
def main(): arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = parse_cmdline_kwargs(unknown_args) args.num_timesteps = 0 args.play = True args.env = 'YamaXRealForwardWalk-v0' model, env = train(args, extra_args) env.close() env = build_env(args) obs = env.reset() def initialize_placeholders(nlstm=128, **kwargs): return np.zeros((args.num_env or 1, 2 * nlstm)), np.zeros((1)) state, dones = initialize_placeholders(**extra_args) while True: actions, _, state, _ = model.step(obs, S=state, M=dones) obs, _, done, _ = env.step(actions) env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: obs = env.reset() env.close()
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = parse_cmdline_kwargs(unknown_args) print(args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: #rank = 0 #logger.configure() #logger.configure(dir=extra_args['logdir']) rank = 0 configure_logger(args.log_path) else: rank = MPI.COMM_WORLD.Get_rank() configure_logger(args.log_path, format_strs=[]) model, env = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) episode_rew = 0 while True: if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions) episode_rew += rew[0] if isinstance(env, VecEnv) else rew env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: print('episode_rew={}'.format(episode_rew)) episode_rew = 0 obs = env.reset() env.close() return model
def main(args): arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) base_dir = extra_args["base_dir"] name = extra_args["name"] results = dict() save_dir = set_default_value(extra_args, 'save_dir', "/") std_type = 1 d = 10 f = 1 trail = 1 for method in ['scrb', 'plain']: log_pattern, legend_name = method_to_log_pattern(method) log_files = extract_log_files(base_dir, patterns=[name, log_pattern]) values = [] stds = [] length = np.inf for logfile in log_files: value = parse_log(logfile, field_name="test/hit_time_mean", normalize=False, dilute_fact=d, f=f)[:-trail] std = parse_log(logfile, field_name="test/hit_time_std", normalize=False, dilute_fact=d, f=f)[:-trail] values.append(value) stds.append(std) if len(value) < length: length = len(value) # std = parse_log(f"{scrb_log_dir}/log.txt", field_name="test/hit_time_std", normalize=False, dilute_fact=d, f=f)[:-trail] values = [value[:length] for value in values] stds = [std[:length] for std in stds] if std_type == 1: standard_deviation = np.mean(stds, axis=0) else: standard_deviation = np.std(values, axis=0) results[method] = dict() results[method]["mean"] = np.mean(values, axis=0) results[method]["std"] = standard_deviation results[method]["xscale"] = d results[method]["name"] = legend_name plot(results, save_dir)
def main(): arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() args.num_env = 1 extra_args = parse_cmdline_kwargs(unknown_args) model, env = train(args, extra_args) env.close() logger.log("Running trained model") env = build_env(args) if not args.play: ts = time.gmtime() directory = time.strftime("./render/%s", ts) logger.log("Output video to directory:", directory) env.envs = [gym.wrappers.Monitor(env.envs[0], directory=directory)] obs = env.reset() def initialize_placeholders(nlstm=128, **kwargs): return np.zeros((args.num_env, 2 * nlstm)), np.zeros((1)) state, dones = initialize_placeholders(**extra_args) NUM_VIDEO = 1 while True: actions, _, state, _ = model.step(obs, S=state, M=dones) obs, _, done, _ = env.step(actions) if args.play: env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: NUM_VIDEO -= 1 if NUM_VIDEO <= 0: break obs = env.reset() env.close()
def parse_args(): parser = argparse.ArgumentParser( "Reinforcement Learning experiments for multiagent environments") # Environment parser.add_argument("--scenario", type=str, default="simple", help="name of the scenario script") parser.add_argument("--max-episode-len", type=int, default=25, help="maximum episode length") parser.add_argument("--num-episodes", type=int, default=60000, help="number of episodes") parser.add_argument("--num-adversaries", type=int, default=None, help="number of adversaries") parser.add_argument("--good-policy", type=str, default="maddpg", help="policy for good agents") parser.add_argument("--adv-policy", type=str, default="maddpg", help="policy of adversaries") # Core training parameters parser.add_argument("--lr", type=float, default=1e-2, help="learning rate for Adam optimizer") parser.add_argument("--gamma", type=float, default=0.95, help="discount factor") parser.add_argument("--batch-size", type=int, default=1024, help="number of episodes to optimize at the same time") parser.add_argument("--num-units", type=int, nargs="+", default=[64, 64], help="number of units in the mlp") # Checkpointing parser.add_argument("--exp-name", type=str, default=None, help="name of the experiment") parser.add_argument( "--save-dir", type=str, default=None, help="directory in which training state and model should be saved") parser.add_argument( "--save-rate", type=int, default=10000, help="save model once every time this many episodes are completed") parser.add_argument( "--print-rate", type=int, default=1000, help= "print training scalars once every time this many episodes are completed" ) parser.add_argument( "--load-dir", type=str, default=None, help="directory in which training state and model are loaded") # Evaluation parser.add_argument("--restore", action="store_true", default=False) parser.add_argument("--display", action="store_true", default=False) parser.add_argument("--save-render-images", action="store_true", default=False) parser.add_argument("--render-dir", type=str, default=None, help="directory in which render image should be saved") parser.add_argument("--benchmark", action="store_true", default=False) parser.add_argument("--benchmark-iters", type=int, default=100000, help="number of iterations run for benchmarking") parser.add_argument("--benchmark-dir", type=str, default=None, help="directory where benchmark data is saved") parser.add_argument("--plots-dir", type=str, default=None, help="directory where plot data is saved") args, unknown_args = parser.parse_known_args() extra_args = parse_cmdline_kwargs(unknown_args) if args.exp_name is None: args.exp_name = "experiment-{}".format(args.scenario) if args.save_dir is None: args.save_dir = os.path.join(logger.get_dir(), "checkpoints") if (args.render_dir is None) and (args.load_dir is not None): args.render_dir = args.load_dir + "-render" if args.benchmark_dir is None: args.benchmark_dir = os.path.join(logger.get_dir(), "benchmark_files") if args.plots_dir is None: args.plots_dir = os.path.join(logger.get_dir(), "learning_curves") if not args.display: os.makedirs(args.save_dir, exist_ok=True) if args.save_render_images: os.makedirs(args.render_dir, exist_ok=True) if args.benchmark: os.makedirs(args.benchmark_dir, exist_ok=True) if not args.display: os.makedirs(args.plots_dir, exist_ok=True) return args, extra_args
def main(args): # print("\n\n\n\n\nXXX") # print(sys.path) # import baselines # print(baselines.__file__()) # for varname in ['PMI_RANK', 'OMPI_COMM_WORLD_RANK']: # if varname in os.environ: # print(varname, int(os.environ[varname])) # print("parsing args...") arg_parser = init_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) # if args.num_cpu > 1: if args.allow_run_as_root: whoami = mpi_fork_run_as_root(args.num_cpu, bind_to_core=args.bind_to_core) else: whoami = mpi_fork(args.num_cpu, bind_to_core=args.bind_to_core) if whoami == 'parent': print('parent exiting with code 0...') sys.exit(0) U.single_threaded_session().__enter__() rank = MPI.COMM_WORLD.Get_rank() # assert MPI.COMM_WORLD.Get_size() == args.num_cpu, MPI.COMM_WORLD.Get_size() # configure logger # rank = MPI.COMM_WORLD.Get_rank() # FIXME: how to log when rank != 0?? # if rank == 0: configure_logger(args.log_path, format_strs=[]) logger.info(f"main: {rank} / {MPI.COMM_WORLD.Get_size()}") logger.info(f"logger dir: {logger.get_dir()}") extra_args = parse_cmdline_kwargs(unknown_args) logger.info(args, extra_args) # else: # configure_logger(log_path=None) # or still args.log_path? # raise RuntimeError(f"tf session: {tf.get_default_session()}, {MPI.COMM_WORLD.Get_rank()} / {MPI.COMM_WORLD.Get_size()}") def make_wrapped_env(): env = gym.make(args.env) if args.env_type == 'maze': pass elif args.env_type == 'robotics': from baselines.envs.goal_sampler_env_wrapper import GoalSamplerEnvWrapper env = GoalSamplerEnvWrapper(env) elif args.env_type == 'ant': env = GoalExplorationEnv(env=env, only_feasible=True, extend_dist_rew=0, inner_weight=0, goal_weight=1) else: raise NotImplementedError(args.env_type) # FIXME: if resample space is feasible, can set only_feasible = False to avoid unnecessary computation return env venv_kwargs = dict( make_wrapped_env=make_wrapped_env, seed=args.seed, reward_scale=args.reward_scale, flatten_dict_observations=False, mpi_rank=rank, monitor_log_dir=args.log_path, # FIXME ) venv = make_vec_env(num_env=args.num_env, **venv_kwargs) eval_venv = make_vec_env(num_env=args.num_env, **venv_kwargs) if args.debug: plotter_venv = make_vec_env(num_env=1, **venv_kwargs) else: plotter_venv = None # Seed everything. rank_seed = args.seed + 1000000 * rank if args.seed is not None else None set_global_seeds(rank_seed) logger.info(f'setting global rank: {rank_seed} ') # Prepare params. params = dict() params.update(config.DEFAULT_PARAMS) params.update(config.DEFAULT_ENV_PARAMS[args.env]) params.update(**extra_args) # makes it possible to override any parameter # if args.debug: # params['n_cycles'] = 2 # params['n_batches'] = 2 # params['ve_n_batches'] = 2 # params['size_ensemble'] = 2 # env settings params['env_name'] = args.env params['num_cpu'] = args.num_cpu params['rollout_batch_size'] = args.num_env params['timesteps_per_cpu'] = int(args.num_timesteps) with open(os.path.join(logger.get_dir(), 'params.json'), 'w') as f: json.dump(params, f) params['make_env'] = make_wrapped_env learn_fun_return = learn( venv=venv, eval_venv=eval_venv, plotter_venv=plotter_venv, params=params, save_path=args.log_path, save_interval=args.save_interval, ) if rank == 0: save_path = os.path.expanduser(logger.get_dir()) for k, v in learn_fun_return.items(): v.save(os.path.join(save_path, f"final-{k}.joblib")) venv.close() eval_venv.close() if plotter_venv is not None: plotter_venv.close()
import numpy as np import os import shutil from argparse import Namespace from baselines.run import build_env, train, parse_cmdline_kwargs from baselines.a2c.a2c import Model from baselines.common.cmd_util import common_arg_parser print("Running trained model") arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = parse_cmdline_kwargs(unknown_args) model, env = train(args, extra_args) # Load the data to test on data = np.load('test_images.npy') tests = len(data) # Build environment env = build_env(args) env.envs[0].env.env.phase = 'test' # Load model model.load(args.env + args.alg) # Test model on all images in dataset path = "/tmp/movements" norm_steps = [] fail = 0 # for j in range(100):
def main(args): arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) print(args) train_copos(args)