def visualize( logdir, outdir, num_agents, num_episodes, checkpoint=None, env_processes=True): """Recover checkpoint and render videos from it. Args: logdir: Logging directory of the trained algorithm. outdir: Directory to store rendered videos in. num_agents: Number of environments to simulate in parallel. num_episodes: Total number of episodes to simulate. checkpoint: Checkpoint name to load; defaults to most recent. env_processes: Whether to step environments in separate processes. """ config = utility.load_config(logdir) with tf.device('/cpu:0'): batch_env = utility.define_batch_env( lambda: _create_environment(config, outdir), num_agents, env_processes) graph = utility.define_simulation_graph( batch_env, config.algorithm, config) total_steps = num_episodes * config.max_length loop = _define_loop(graph, total_steps) saver = utility.define_saver( exclude=(r'.*_temporary.*', r'global_step')) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: utility.initialize_variables( sess, saver, config.logdir, checkpoint, resume=True) for unused_score in loop.run(sess, saver, total_steps): pass batch_env.close()
def main(_): """ Create or load configuration and launch the trainer. """ if FLAGS.config == 'offense': data = np.load('bball_strategies/pretrain/data/off_obs.npy') label = np.load('bball_strategies/pretrain/data/off_actions.npy') elif FLAGS.config == 'defense': data = np.load('bball_strategies/pretrain/data/def_obs.npy') label = np.load('bball_strategies/pretrain/data/def_actions.npy') else: raise ValueError('{} is not an available config'.format(FLAGS.config)) utility.set_up_logging() if not FLAGS.resume: logdir = FLAGS.logdir and os.path.expanduser( os.path.join(FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config))) else: logdir = FLAGS.logdir if FLAGS.vis: outdir = os.path.join(logdir, 'train_output') else: outdir = None try: config = utility.load_config(logdir) except IOError: if not FLAGS.config: raise KeyError('You must specify a configuration.') config = tools.AttrDict(getattr(configs, FLAGS.config)()) config = utility.save_config(config, logdir) train(config, data, label, outdir)
def main(_): """ Create or load configuration and launch the trainer. """ utility.set_up_logging() if not FLAGS.resume: logdir = FLAGS.logdir and os.path.expanduser( os.path.join(FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config))) else: logdir = FLAGS.logdir if FLAGS.vis: outdir = os.path.join(logdir, 'train_output') else: outdir = None try: config = utility.load_config(logdir) except IOError: if not FLAGS.config: raise KeyError('You must specify a configuration.') config = tools.AttrDict(getattr(configs, FLAGS.config)()) config = utility.save_config(config, logdir) for score in train(config, FLAGS.env_processes, outdir): tf.logging.info('Score {}.'.format(score))
def visualize( logdir, outdir, num_agents, num_episodes, checkpoint=None, env_processes=True): """Recover checkpoint and render videos from it. Args: logdir: Logging directory of the trained algorithm. outdir: Directory to store rendered videos in. num_agents: Number of environments to simulate in parallel. num_episodes: Total number of episodes to simulate. checkpoint: Checkpoint name to load; defaults to most recent. env_processes: Whether to step environments in separate processes. """ config = utility.load_config(logdir) with tf.device('/cpu:0'): batch_env = utility.define_batch_env( lambda: _create_environment(config, outdir), num_agents, env_processes) graph = utility.define_simulation_graph( batch_env, config.algorithm, config) total_steps = num_episodes * config.max_length loop = _define_loop(graph, total_steps) saver = utility.define_saver( exclude=(r'.*_temporary/.*', r'global_step')) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: utility.initialize_variables( sess, saver, config.logdir, checkpoint, resume=True) for unused_score in loop.run(sess, saver, total_steps): pass batch_env.close()
def main(argv): del argv # Unused. config = utility.load_config(LOG_DIR) policy_layers = config.policy_layers value_layers = config.value_layers env = config.env(render=True) network = config.network with tf.Session() as sess: agent = simple_ppo_agent.SimplePPOPolicy( sess, env, network, policy_layers=policy_layers, value_layers=value_layers, checkpoint=os.path.join(LOG_DIR, CHECKPOINT)) sum_reward = 0 observation = env.reset() while True: action = agent.get_action([observation]) observation, reward, done, _ = env.step(action[0]) # This sleep is to prevent serial communication error on the real robot. time.sleep(0.002) sum_reward += reward if done: break tf.logging.info("reward: %s", sum_reward)
def main(_): """Create or load configuration and launch the trainer.""" utility.set_up_logging() if not FLAGS.config: raise KeyError('You must specify a configuration.') logdir = FLAGS.logdir and os.path.expanduser(os.path.join( FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config))) try: config = utility.load_config(logdir) except IOError: config = tools.AttrDict(getattr(configs, FLAGS.config)()) config = utility.save_config(config, logdir) for score in train(config, FLAGS.env_processes): tf.logging.info('Score {}.'.format(score))
def main(_): FLAGS.logdir = '../../Log' FLAGS.config = 'pendulum' FLAGS.env_processes = False """Create or load configuration and launch the trainer.""" utility.set_up_logging() if not FLAGS.config: raise KeyError('You must specify a configuration.') logdir = FLAGS.logdir and os.path.expanduser( os.path.join(FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config))) try: config = utility.load_config(logdir) except IOError: config = tools.AttrDict(getattr(configs, FLAGS.config)()) config = utility.save_config(config, logdir) global globalConfig globalConfig = config for score in train(config, FLAGS.env_processes): tf.logging.info('Score {}.'.format(score))
def main(_): """ Create or load configuration and launch the trainer. """ off_data = np.load('bball_strategies/pretrain/data/off_obs.npy') off_label = np.load('bball_strategies/pretrain/data/off_actions.npy') def_data = np.load('bball_strategies/pretrain/data/def_obs.npy') def_label = np.load('bball_strategies/pretrain/data/def_actions.npy') utility.set_up_logging() logdir = FLAGS.logdir try: config = utility.load_config(logdir) except IOError: if not FLAGS.config: raise KeyError('You must specify a configuration.') config = tools.AttrDict(getattr(configs, FLAGS.config)()) config = utility.save_config(config, logdir) outdir = os.path.expanduser(os.path.join(FLAGS.logdir, 'vis')) vis_data(off_data, off_label, def_data, def_label, outdir, start_idx=0) testing(config, off_data, off_label, def_data, def_label, outdir)
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'exp_path', type=str, nargs='*', help='Full experiment path (to the dir where the config is stored)') parser.add_argument( '-nep', '--no_env_process', default=False, action='store_true', help='Step environments in separate processes to circumvent the GIL') parser.add_argument('-r', '--render', default=False, action='store_true', help='Whether to render the run') parser.add_argument('-c', '--cpu', default=False, action='store_true', help='Whether to run the training on access1-cp') parser.add_argument('-e', '--edgar', default=False, action='store_true', help='Whether to run the training on edgar') parser.add_argument('-b', '--besteffort', default=False, action='store_true', help='Whether to run in besteffort mode') parser.add_argument('-nc', '--nb_cores', type=int, default=8, help='Number of cores to be used on the cluster') parser.add_argument('-w', '--wallclock', type=int, default=72, help='Job wall clock time to be set on the cluster') parser.add_argument( '-s', '--steps', type=int, default=None, help= 'Number of steps of the experiment (if not None, change the config)') args = parser.parse_args() sys_path_clean = utils.get_sys_path_clean() seed_path, timestamp_dir = os.path.split(os.path.normpath( args.exp_path[0])) exp_path, _ = os.path.split(os.path.normpath(seed_path)) exp_name = os.path.basename(exp_path) rendered_envs_path = '/home/thoth/apashevi/scratch_remote/Cache/Code/{}/rlgrasp/rendered_envs.py'.format( exp_name) if not args.cpu and not args.edgar: # run the job locally utils.change_sys_path(sys_path_clean, exp_path) import agents.scripts.train as trainer from agents.scripts import utility assert len(args.exp_path) == 1 config = utility.load_config(args.exp_path[0]) with config.unlocked: config.num_agents = 4 if args.steps is not None: config.steps = args.steps utils.rewrite_rendered_envs_file(args.render, rendered_envs_path) for score in trainer.train(config, not args.no_env_process): print('Score {}'.format(score)) if args.render: utils.rewrite_rendered_envs_file(False, rendered_envs_path) else: if args.edgar: cluster = 'edgar' else: cluster = 'access1-cp' utils.rewrite_rendered_envs_file(False, rendered_envs_path) job_cluster = utils.get_job(cluster, args.besteffort, args.nb_cores, args.wallclock) timestamp = timestamp_dir.split('-')[0] config = timestamp_dir.split('-')[1] if len(args.exp_path) == 1: send_job(job_cluster, seed_path, timestamp, config, args_steps) else: for exp_path_complete in args.exp_path: seed_path, _ = os.path.split( os.path.normpath(exp_path_complete)) send_job(job_cluster, seed_path, timestamp, config, args.steps)
sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: utility.initialize_variables( sess, saver, config.logdir, checkpoint, resume=True) for unused_score in loop.run(sess, saver, total_steps): pass tf.reset_default_graph() return batch_env.rewards_list import pickle for i in range(len(checkpoints)): model = logdir+ runs[i] + checkpoints[i] #print('*'*100) #print(logdir + runs[i]) config = utility.load_config(logdir + runs[i]) for j in range(len(latency_index)): sampling_j= sampling_index[j] latency_j = latency_index[j] res = run_setting(config, model, sampling_j, latency_j, num_of_episodes) print(i, j,res) file_name = 'data/'+save_names[i]+'_'+str(sampling_j)+'_'+str(latency_j)+'.p' pickle.dump([res, sampling_j, latency_j, model, config], open(file_name, "wb" ) )