def main(): """ Runs the test """ logger.configure() parser = mujoco_arg_parser() parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'humanoid_policy')) parser.set_defaults(num_timesteps=int(2e7)) args = parser.parse_args() if not args.play: # train the model train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path) else: # construct the model object, load pre-trained model and render policy = train(num_timesteps=1, seed=args.seed) tf_util.load_state(args.model_path) env = make_mujoco_env('Humanoid-v2', seed=0) obs = env.reset() while True: action = policy.act(stochastic=False, obs=obs)[0] obs, _, done, _ = env.step(action) env.render() if done: obs = env.reset()
def main(): logger.configure() parser = mujoco_arg_parser() parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'policy')) parser.set_defaults(num_timesteps=int(2e7)) args = parser.parse_args() if not args.play: # train the model train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path) else: # construct the model object, load pre-trained model and render pi = train(args.env, num_timesteps=1, seed=args.seed) U.load_state(args.model_path) env = make_mujoco_env(args.env, seed=0) ob = env.reset() while True: action = pi.act(stochastic=False, ob=ob)[0] ob, _, done, _ = env.step(action) print(ob,action) #env.render() if done: ob = env.reset()
def main(): args = mujoco_arg_parser().parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, save=args.save_model)
def main(): args = mujoco_arg_parser().parse_args() wandb.config.update(args) wandb.config.algo = 'ppo2' logger.configure() model, env = train(args.env, num_timesteps=args.num_timesteps, seed=args.seed) env_final = gym.make(args.env) video_recorder = gym.wrappers.monitoring.video_recorder.VideoRecorder( env=env_final, base_path=os.path.join(wandb.run.dir, 'humanoid'), enabled=True) # obs = env_final.reset() if True: # if args.play logger.log("Running trained model") obs = np.zeros((env.num_envs, ) + env.observation_space.shape) obs[:] = env_final.reset() while True: actions = model.step(obs)[0] print(actions.shape) o, r, d, i = env_final.step(actions[0]) obs[:] = o # env.render() video_recorder.capture_frame() if d: obs[:] = env_final.reset() video_recorder.close() break
def main(): logger.configure() parser = mujoco_arg_parser() parser.add_argument('--model-path', default='checkpoints_best/Humanoid-v2-6914') parser.set_defaults(num_timesteps=int(2e8)) args = parser.parse_args() if not args.play: # train the model train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path) else: # construct the model object, load pre-trained model and render pi = train(num_timesteps=1, seed=args.seed) U.load_state(args.model_path) env = make_mujoco_env('Humanoid-v2', seed=123) ob = env.reset() while True: action = pi.act(stochastic=False, ob=ob)[0] ob, _, done, _ = env.step(action) env.render() time.sleep(0.01) if done: ob = env.reset()
def main(): """ Runs the test """ args = mujoco_arg_parser().parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
def main(): args = mujoco_arg_parser().parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, soc=args.soc, psi=args.psi)
def main(): args = mujoco_arg_parser().parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, p=args.p, alpha=args.alpha)
def main(): args = mujoco_arg_parser().parse_args() logger.configure(dir=args.filepath) train(args.env, num_timesteps=args.num_timesteps, timesteps_per_actor_batch=args.timesteps_per_episode, seed=args.seed, entropy_coeff=args.entropy_coeff, filepath=args.filepath)
def main(): parser = mujoco_arg_parser() parser.add_argument('--logdir') parser.add_argument('--load-path', default=None) args = parser.parse_args() logger.configure(dir=args.logdir) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, load_path=args.load_path)
def main(): args = mujoco_arg_parser().parse_args() logger.configure(dir='geo/v{}/{}/{}/{}/{}'.format( args.version, args.alg, args.env, args.lr, args.seed)) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, alg=args.alg, lr=args.lr, momentum=args.mom)
def main(): args = mujoco_arg_parser().parse_args() logger.configure() train(args.env, gamma=args.gamma, lam=args.lam, save=args.save, desired_kl=args.desired_kl, num_timesteps=args.num_timesteps, seed=args.seed)
def main(): parser = mujoco_arg_parser() parser.add_argument('--log-dir', help='Log directory where all logs will be written', default=None) parser.add_argument('--log-formats', help='Formats in which the logs will be written.', default=None) args = parser.parse_args() logger.configure(args.log_dir, args.log_formats) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
def main(): parser = mujoco_arg_parser() parser.add_argument('--load-path') parser.add_argument('--save-video') args = parser.parse_args() logger.configure() infer(args.env, load_path=args.load_path, num_timesteps=args.num_timesteps, seed=args.seed, save_video=args.save_video)
def main(): args = mujoco_arg_parser().parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, clip_param=args.clip_param, optim_stepsize=args.stepsize, optim_batchsize=args.batch_size, gamma=args.gamma, lam=args.lam, save=args.save)
def main(): parser = mujoco_arg_parser() parser.add_argument('--cpu', type=int, default=1) parser.add_argument('--lr', type=float, default=3e-4) parser.add_argument('--batch', type=int, default=2048) args = parser.parse_args() logdir = './results/mappo/' + args.env + '/l-{}-b-{}/seed-{}'.format(args.lr, args.batch, args.seed) try: logger.configure(logdir, format_strs=['stdout', 'log', 'json', 'tensorboard']) except: logger.configure() train(args.env, num_timesteps=1e7, seed=args.seed, num_cpu=args.cpu, batch=args.batch, lr=args.lr)
def main(): args = mujoco_arg_parser().parse_args() logger.configure() pi = train(args.env, num_timesteps=args.num_timesteps, seed=args.seed) env = make_mujoco_env('Walker2d-v2', seed=0) ob = env.reset() while True: action = pi.act(stochastic=False, ob=ob)[0] ob, _, done, _ = env.step(action) env.render() time.sleep(0.01) if done: env.reset()
def main(): args = mujoco_arg_parser().parse_args() logger.configure() model, env = train(args.env, num_timesteps=args.num_timesteps, seed=args.seed) if args.play: logger.log("Running trained model") obs = np.zeros((env.num_envs,) + env.observation_space.shape) obs[:] = env.reset() while True: actions = model.step(obs)[0] obs[:] = env.step(actions)[0] env.render()
def main(): args = mujoco_arg_parser().parse_args() logger.configure() model, env = train(args.env, num_timesteps=args.num_timesteps, seed=args.seed) if 1: logger.log("Running trained model") obs = np.zeros((env.num_envs,) + env.observation_space.shape) obs[:] = env.reset() while True: actions = model.step(obs)[0] obs[:] = env.step(actions)[0] env.render()
def start(fold, env_id): from baselines import logger from baselines.common.cmd_util import mujoco_arg_parser from baselines.trpo_replay.acktr_cont import train from algorithm_parameters import algorithm_parameters import os import tensorflow as tf tf.reset_default_graph() os.environ['OPENAI_LOGDIR'] = 'logs_' + env_id + '_' + str(fold) args = mujoco_arg_parser().parse_args() logger.configure() parameters = algorithm_parameters() train(env_id, parameters=parameters, seed=args.seed)
def main(): parser = mujoco_arg_parser() parser.add_argument('--model-path') parser.add_argument('--sim', default=False, action='store_true') parser.add_argument('--hessians', default=False, action='store_true') parser.add_argument('--logdir', type=str, default=None) args = parser.parse_args() logger.configure(args.logdir) if not args.model_path: raise ValueError('You have to provide a model path.') if not args.play: # train the model train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path, target1=args.target1, target2=args.target2, target3=args.target3, output_prefix=args.output_prefix, input_file=args.input_file, sim=args.sim, hessians=args.hessians) else: # construct the model object, load pre-trained model and render pi = train(args.env, num_timesteps=1, seed=args.seed, target1=args.target1, target2=args.target2, target3=args.target3, output_prefix=args.output_prefix, input_file=args.input_file, sim=False) U.load_state('models/' + args.model_path) env = make_pareto_mujoco_env(args.env, seed=0, target1=args.target1, target2=args.target2, target3=args.target3) ob = env.reset() while True: action = pi.act(stochastic=False, ob=ob)[0] ob, _, done, _ = env.step(action) env.render() if done: ob = env.reset()
def main(): parser = mujoco_arg_parser() args = parser.parse_args() if 'ext-v2' in args.env: import gym cost = gym.make(args.env).messageCost logdir = 'TRY_logs/env=%s-c-%d/seed=%d_%s' % ( args.env, cost, args.seed, datetime.now().strftime('%d_%H:%M:%S')) else: logdir = 'TRY_logs/env=%s/seed=%d_%s' % ( args.env, args.seed, datetime.now().strftime('%d_%H:%M:%S')) logger.configure(logdir) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
def main(): from time import strftime pydart.init() args = mujoco_arg_parser().parse_args() logger.configure(dir='./log'+strftime("%Y%m%d%H%M")+'/') model, env = train(args.env, num_timesteps=10000000, seed=args.seed) logger.log("Running trained model") obs = np.zeros((env.num_envs,) + env.observation_space.shape) obs[:] = env.reset() while True: actions = model.step(obs) res = env.step(actions) obs[:] = res[0] done = res[2] if done[0]: break
def main(): parser = mujoco_arg_parser() parser.add_argument('--use-penal', help='enable penal', default=False) parser.add_argument('--gpu', type=int, default=0, help='GPU selection') parser.add_argument('--pg-rate', type=float, default=0.0) args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = '%d' % args.gpu print("game %s run on GPU: %d" % (args.env, args.gpu)) logger.configure( args.env + '_seed_' + str(args.seed) + '_nopen' + '_pg' + str(args.pg_rate) if not args.use_penal else args.env + '_seed_' + str(args.seed) + '_pen' + '_pg' + str(args.pg_rate), ['log', 'tensorboard']) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, args=args)
def main(): args = mujoco_arg_parser().parse_args() args.env = 'RoboschoolWalker2d-v1' args.save_file = '' args.load_file = 'ppo_walker2d' args.num_timesteps = 10000000 args.render = True args.stochastic = False print(args) logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, save_file=args.save_file, load_file=args.load_file, render=args.render, stochastic=args.stochastic)
def main(): args = mujoco_arg_parser().parse_args() logger.configure( dir= '/home/jonasrothfuss/Dropbox/Eigene_Dateien/UC_Berkley/2_Code/model_ensemble_meta_learning/data/local/ppo-baselines' ) model, env = train(args.env, num_timesteps=args.num_timesteps, seed=args.seed) if args.play: logger.log("Running trained model") obs = np.zeros((env.num_envs, ) + env.observation_space.shape) obs[:] = env.reset() while True: actions = model.step(obs)[0] obs[:] = env.step(actions)[0] env.render()
def main(): parser = mujoco_arg_parser() parser.add_argument('--log_dir', help='the directory to save log file', default='log') parser.add_argument('--lr', type=float, default=3e-4, help="Learning rate") parser.add_argument('--sil-update', type=float, default=10, help="Number of updates per iteration") parser.add_argument('--sil-value', type=float, default=0.01, help="Weight for value update") parser.add_argument('--sil-alpha', type=float, default=0.6, help="Alpha for prioritized replay") parser.add_argument('--sil-beta', type=float, default=0.1, help="Beta for prioritized replay") args = parser.parse_args() logger.configure(dir=args.log_dir) model, env = train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, lr=args.lr, sil_update=args.sil_update, sil_value=args.sil_value, sil_alpha=args.sil_alpha, sil_beta=args.sil_beta) if args.play: logger.log("Running trained model") obs = np.zeros((env.num_envs, ) + env.observation_space.shape) obs[:] = env.reset() while True: actions = model.step(obs)[0] obs[:] = env.step(actions)[0] env.render()
def main(): args = mujoco_arg_parser().parse_args() logger.configure() pi = train(args.env, num_timesteps=1, seed=args.seed, play=False) run = 'run-20180703_034952-1a24a6ik/' run_home = '/home/ubuntu/wandb_baselines/wandb/' + run #run-20180702_220411-4xtopfue/' model_path = run_home + 'humanoid_policy' # model_path = '/home/ubuntu/wandb_baselines/wandb/run-20180702_220411-4xtopfue/humanoid_policy' U.load_state(model_path) seed = random.randint(1, 1000) env = make_mujoco_env('RoboschoolHumanoid-v1', seed=seed) tot_r = 0 ob = env.reset() runs = 0 video = True if video: video_recorder = gym.wrappers.monitoring.video_recorder.VideoRecorder( env=env, base_path=os.path.join('/home/ubuntu/wandb_baselines', 'humanoid_run2_%i' % seed), enabled=True) while True: action = pi.act(stochastic=False, ob=ob)[0] ob, r, done, _ = env.step(action) if video: video_recorder.capture_frame() tot_r += r if done: ob = env.reset() runs += 1 # if video: # video_recorder.close() # video_recorder = gym.wrappers.monitoring.video_recorder.VideoRecorder(env=env, base_path=os.path.join(run_home, 'humanoid_run_%i'%runs), enabled=True) print(tot_r) tot_r = 0 print("@@@@@@@@@@@@@@@") if runs > 0: break
def main(): parser = mujoco_arg_parser() parser.add_argument('--log-dir', type=str, default='./log', help="Log directory") parser.add_argument('--exp-scale', type=float, default=0.5, help="Exp scale of confidence score") args = parser.parse_args() #fig_path = os.path.join(args.log_dir, args.env_id+'.png') # Single processing for testing #arguments = [args.env, 0, args.log_dir, args.num_timesteps, args.exp_scale] #run(arguments) exp_num = 3 # Multiprocessing pool = Pool(processes=exp_num) arguments = [[ args.env_id, seed, args.log_dir, args.num_timesteps, args.exp_scale ] for seed in range(exp_num)] pool.map(run, arguments) #stats_dict = {'timestep': [], 'reward': []} stats_dict = {'timestep': [], 'reward': [], 'agent': []} # Read Logs print('Reading logs...') for seed in range(exp_num): filename = os.path.join(args.log_dir, args.env_id + '_' + str(seed), 'progress.csv') with open(filename, 'r') as csvfile: csvreader = csv.reader(csvfile) fields = next(csvreader) for row in csvreader: reward = row[fields.index('eprewmean')] timestep = row[fields.index('total_timesteps')] agent = row[fields.index('agent')] stats_dict['timestep'].append(int(timestep) * 2) stats_dict['reward'].append(float(reward)) stats_dict['agent'].append(agent)
def main(): parser = mujoco_arg_parser() parser.add_argument('--log_dir', help='the directory to save log file', default='log') parser.add_argument('--lr', type=float, default=3e-4, help="Learning rate") args = parser.parse_args() logger.configure(dir=args.log_dir) model, env = train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, lr=args.lr) if args.play: logger.log("Running trained model") obs = np.zeros((env.num_envs, ) + env.observation_space.shape) obs[:] = env.reset() while True: actions = model.step(obs)[0] obs[:] = env.step(actions)[0] env.render()
def main(*args, **kwargs): if "env" in kwargs.keys(): game = kwargs["env"] else: game = "Hopper" parser = mujoco_arg_parser() parser.add_argument('--attention', help='attention or not', type=str, default="NoAttention", choices=["Attention,NoAttention,StateAttention"]) parser.add_argument('--env', help='environment ID', type=str, default=game + "-v2") args = parser.parse_args() print("Going to train.") train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, method=args.attention)
def main(): logger.configure() parser = mujoco_arg_parser() parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'humanoid_policy')) parser.set_defaults(num_timesteps=int(2e7)) args = parser.parse_args() if not args.play: # train the model train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path) else: # construct the model object, load pre-trained model and render pi = train(num_timesteps=1, seed=args.seed) U.load_state(args.model_path) env = make_mujoco_env('Humanoid-v2', seed=0) ob = env.reset() while True: action = pi.act(stochastic=False, ob=ob)[0] ob, _, done, _ = env.step(action) env.render() if done: ob = env.reset()
def main(): args = mujoco_arg_parser().parse_args() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)