parser.add_argument('--log_dir', type=str, default='./Data/AST/GA/Test') parser.add_argument('--args_data', type=str, default=None) args = parser.parse_args() # Create the logger log_dir = args.log_dir tabular_log_file = osp.join(log_dir, args.tabular_log_file) text_log_file = osp.join(log_dir, args.text_log_file) params_log_file = osp.join(log_dir, args.params_log_file) logger.log_parameters_lite(params_log_file, args) # logger.add_text_output(text_log_file) logger.add_tabular_output(tabular_log_file) prev_snapshot_dir = logger.get_snapshot_dir() prev_mode = logger.get_snapshot_mode() logger.set_snapshot_dir(log_dir) logger.set_snapshot_mode(args.snapshot_mode) logger.set_snapshot_gap(args.snapshot_gap) logger.set_log_tabular_only(args.log_tabular_only) logger.push_prefix("[%s] " % args.exp_name) seed = 0 top_k = 10 max_path_length = 100 top_paths = BPQ.BoundedPriorityQueue(top_k) np.random.seed(seed) tf.set_random_seed(seed) with tf.Session() as sess:
def run_experiment(argv): default_log_dir = config.LOG_DIR now = datetime.datetime.now(dateutil.tz.tzlocal()) # avoid name clashes when running distributed jobs rand_id = str(uuid.uuid4())[:5] timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z') default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id) parser = argparse.ArgumentParser() parser.add_argument( '--n_parallel', type=int, default=1, help=("Number of parallel workers to perform rollouts. " "0 => don't start any workers")) parser.add_argument( '--exp_name', type=str, default=default_exp_name, help='Name of the experiment.') parser.add_argument( '--log_dir', type=str, default=None, help='Path to save the log and iteration snapshot.') parser.add_argument( '--snapshot_mode', type=str, default='all', help='Mode to save the snapshot. Can be either "all" ' '(all iterations will be saved), "last" (only ' 'the last iteration will be saved), "gap" (every' '`snapshot_gap` iterations are saved), or "none" ' '(do not save snapshots)') parser.add_argument( '--snapshot_gap', type=int, default=1, help='Gap between snapshot iterations.') parser.add_argument( '--tabular_log_file', type=str, default='progress.csv', help='Name of the tabular log file (in csv).') parser.add_argument( '--text_log_file', type=str, default='debug.log', help='Name of the text log file (in pure text).') parser.add_argument( '--tensorboard_step_key', type=str, default=None, help=("Name of the step key in tensorboard_summary.")) parser.add_argument( '--params_log_file', type=str, default='params.json', help='Name of the parameter log file (in json).') parser.add_argument( '--variant_log_file', type=str, default='variant.json', help='Name of the variant log file (in json).') parser.add_argument( '--resume_from', type=str, default=None, help='Name of the pickle file to resume experiment from.') parser.add_argument( '--plot', type=ast.literal_eval, default=False, help='Whether to plot the iteration results') parser.add_argument( '--log_tabular_only', type=ast.literal_eval, default=False, help='Print only the tabular log information (in a horizontal format)') parser.add_argument('--seed', type=int, help='Random seed for numpy') parser.add_argument( '--args_data', type=str, help='Pickled data for objects') parser.add_argument( '--variant_data', type=str, help='Pickled data for variant configuration') parser.add_argument( '--use_cloudpickle', type=ast.literal_eval, default=False) args = parser.parse_args(argv[1:]) if args.seed is not None: set_seed(args.seed) # SIGINT is blocked for all processes created in parallel_sampler to avoid # the creation of sleeping and zombie processes. # # If the user interrupts run_experiment, there's a chance some processes # won't die due to a dead lock condition where one of the children in the # parallel sampler exits without releasing a lock once after it catches # SIGINT. # # Later the parent tries to acquire the same lock to proceed with his # cleanup, but it remains sleeping waiting for the lock to be released. # In the meantime, all the process in parallel sampler remain in the zombie # state since the parent cannot proceed with their clean up. with mask_signals([signal.SIGINT]): if args.n_parallel > 0: parallel_sampler.initialize(n_parallel=args.n_parallel) if args.seed is not None: parallel_sampler.set_seed(args.seed) if not args.plot: garage.plotter.Plotter.disable() garage.tf.plotter.Plotter.disable() if args.log_dir is None: log_dir = osp.join(default_log_dir, args.exp_name) else: log_dir = args.log_dir tabular_log_file = osp.join(log_dir, args.tabular_log_file) text_log_file = osp.join(log_dir, args.text_log_file) params_log_file = osp.join(log_dir, args.params_log_file) if args.variant_data is not None: variant_data = pickle.loads(base64.b64decode(args.variant_data)) variant_log_file = osp.join(log_dir, args.variant_log_file) logger.log_variant(variant_log_file, variant_data) else: variant_data = None if not args.use_cloudpickle: logger.log_parameters_lite(params_log_file, args) logger.add_text_output(text_log_file) logger.add_tabular_output(tabular_log_file) logger.set_tensorboard_dir(log_dir) prev_snapshot_dir = logger.get_snapshot_dir() prev_mode = logger.get_snapshot_mode() logger.set_snapshot_dir(log_dir) logger.set_snapshot_mode(args.snapshot_mode) logger.set_snapshot_gap(args.snapshot_gap) logger.set_log_tabular_only(args.log_tabular_only) logger.set_tensorboard_step_key(args.tensorboard_step_key) logger.push_prefix("[%s] " % args.exp_name) if args.resume_from is not None: data = joblib.load(args.resume_from) assert 'algo' in data algo = data['algo'] algo.train() else: # read from stdin if args.use_cloudpickle: import cloudpickle method_call = cloudpickle.loads(base64.b64decode(args.args_data)) try: method_call(variant_data) except BaseException: children = garage.plotter.Plotter.get_plotters() children += garage.tf.plotter.Plotter.get_plotters() if args.n_parallel > 0: children += [parallel_sampler] child_proc_shutdown(children) raise else: data = pickle.loads(base64.b64decode(args.args_data)) maybe_iter = concretize(data) if is_iterable(maybe_iter): for _ in maybe_iter: pass logger.set_snapshot_mode(prev_mode) logger.set_snapshot_dir(prev_snapshot_dir) logger.remove_tabular_output(tabular_log_file) logger.remove_text_output(text_log_file) logger.pop_prefix()
def run_experiment(argv): default_log_dir = config.LOG_DIR now = datetime.datetime.now(dateutil.tz.tzlocal()) # avoid name clashes when running distributed jobs rand_id = str(uuid.uuid4())[:5] timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z') default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id) parser = argparse.ArgumentParser() parser.add_argument( '--n_parallel', type=int, default=1, help=("Number of parallel workers to perform rollouts. " "0 => don't start any workers")) parser.add_argument( '--exp_name', type=str, default=default_exp_name, help='Name of the experiment.') parser.add_argument( '--log_dir', type=str, default=None, help='Path to save the log and iteration snapshot.') parser.add_argument( '--snapshot_mode', type=str, default='all', help='Mode to save the snapshot. Can be either "all" ' '(all iterations will be saved), "last" (only ' 'the last iteration will be saved), "gap" (every' '`snapshot_gap` iterations are saved), or "none" ' '(do not save snapshots)') parser.add_argument( '--snapshot_gap', type=int, default=1, help='Gap between snapshot iterations.') parser.add_argument( '--tabular_log_file', type=str, default='progress.csv', help='Name of the tabular log file (in csv).') parser.add_argument( '--text_log_file', type=str, default='debug.log', help='Name of the text log file (in pure text).') parser.add_argument( '--tensorboard_step_key', type=str, default=None, help=("Name of the step key in tensorboard_summary.")) parser.add_argument( '--params_log_file', type=str, default='params.json', help='Name of the parameter log file (in json).') parser.add_argument( '--variant_log_file', type=str, default='variant.json', help='Name of the variant log file (in json).') parser.add_argument( '--resume_from', type=str, default=None, help='Name of the pickle file to resume experiment from.') parser.add_argument( '--plot', type=ast.literal_eval, default=False, help='Whether to plot the iteration results') parser.add_argument( '--log_tabular_only', type=ast.literal_eval, default=False, help='Print only the tabular log information (in a horizontal format)') parser.add_argument('--seed', type=int, help='Random seed for numpy') parser.add_argument( '--args_data', type=str, help='Pickled data for stub objects') parser.add_argument( '--variant_data', type=str, help='Pickled data for variant configuration') parser.add_argument( '--use_cloudpickle', type=ast.literal_eval, default=False) args = parser.parse_args(argv[1:]) assert (os.environ.get("JOBLIB_START_METHOD", None) == "forkserver") if args.seed is not None: set_seed(args.seed) if args.n_parallel > 0: from garage.sampler import parallel_sampler parallel_sampler.initialize(n_parallel=args.n_parallel) if args.seed is not None: parallel_sampler.set_seed(args.seed) if not args.plot: garage.plotter.Plotter.disable() garage.tf.plotter.Plotter.disable() if args.log_dir is None: log_dir = osp.join(default_log_dir, args.exp_name) else: log_dir = args.log_dir tabular_log_file = osp.join(log_dir, args.tabular_log_file) text_log_file = osp.join(log_dir, args.text_log_file) params_log_file = osp.join(log_dir, args.params_log_file) if args.variant_data is not None: variant_data = pickle.loads(base64.b64decode(args.variant_data)) variant_log_file = osp.join(log_dir, args.variant_log_file) logger.log_variant(variant_log_file, variant_data) else: variant_data = None if not args.use_cloudpickle: logger.log_parameters_lite(params_log_file, args) logger.add_text_output(text_log_file) logger.add_tabular_output(tabular_log_file) logger.set_tensorboard_dir(log_dir) prev_snapshot_dir = logger.get_snapshot_dir() prev_mode = logger.get_snapshot_mode() logger.set_snapshot_dir(log_dir) logger.set_snapshot_mode(args.snapshot_mode) logger.set_snapshot_gap(args.snapshot_gap) logger.set_log_tabular_only(args.log_tabular_only) logger.set_tensorboard_step_key(args.tensorboard_step_key) logger.push_prefix("[%s] " % args.exp_name) if args.resume_from is not None: data = joblib.load(args.resume_from) assert 'algo' in data algo = data['algo'] algo.train() else: # read from stdin if args.use_cloudpickle: import cloudpickle method_call = cloudpickle.loads(base64.b64decode(args.args_data)) try: method_call(variant_data) except BaseException: if args.n_parallel > 0: parallel_sampler.terminate() raise else: data = pickle.loads(base64.b64decode(args.args_data)) maybe_iter = concretize(data) if is_iterable(maybe_iter): for _ in maybe_iter: pass logger.set_snapshot_mode(prev_mode) logger.set_snapshot_dir(prev_snapshot_dir) logger.remove_tabular_output(tabular_log_file) logger.remove_text_output(text_log_file) logger.pop_prefix()
def run_task(*_): # Configure TF session config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config).as_default() as tf_session: ## Load data from itr_N.pkl with open(snapshot_file, 'rb') as file: saved_data = dill.load(file) ## Construct PathTrie and find missing skill description # This is basically ASA.decide_new_skill min_length = 3 max_length = 5 action_map = {0: 's', 1: 'L', 2: 'R'} min_f_score = 1 max_results = 10 aggregations = [] # sublist of ['mean', 'most_freq', 'nearest_mean', 'medoid'] or 'all' paths = saved_data['paths'] path_trie = PathTrie(saved_data['hrl_policy'].num_skills) for path in paths: actions = path['actions'].argmax(axis=1).tolist() observations = path['observations'] path_trie.add_all_subpaths( actions, observations, min_length=min_length, max_length=max_length ) logger.log('Searched {} rollouts'.format(len(paths))) frequent_paths = path_trie.items( action_map=action_map, min_count=10, # len(paths) * 2 min_f_score=min_f_score, max_results=max_results, aggregations=aggregations ) logger.log('Found {} frequent paths: [index, actions, count, f-score]'.format(len(frequent_paths))) for i, f_path in enumerate(frequent_paths): logger.log(' {:2}: {:{pad}}\t{}\t{:.3f}'.format( i, f_path['actions_text'], f_path['count'], f_path['f_score'], pad=max_length)) top_subpath = frequent_paths[0] start_obss = top_subpath['start_observations'] end_obss = top_subpath['end_observations'] ## Prepare elements for training # Environment base_env = saved_data['env'].env.env # <NormalizedEnv<MinibotEnv instance>> skill_learning_env = TfEnv( SkillLearningEnv( # base env that was wrapped in HierarchizedEnv (not fully unwrapped - may be normalized!) env=base_env, start_obss=start_obss, end_obss=end_obss ) ) # Skill policy hrl_policy = saved_data['hrl_policy'] new_skill_policy, new_skill_id = hrl_policy.create_new_skill( end_obss=end_obss ) # Baseline - clone baseline specified in low_algo_kwargs, or top-algo`s baseline low_algo_kwargs = dict(saved_data['low_algo_kwargs']) baseline_to_clone = low_algo_kwargs.get('baseline', saved_data['baseline']) baseline = Serializable.clone( # to create blank baseline obj=baseline_to_clone, name='{}Skill{}'.format(type(baseline_to_clone).__name__, new_skill_id) ) low_algo_kwargs['baseline'] = baseline low_algo_cls = saved_data['low_algo_cls'] # Set custom training params (should`ve been set in asa_basic_run) low_algo_kwargs['batch_size'] = 2500 low_algo_kwargs['max_path_length'] = 50 low_algo_kwargs['n_itr'] = 500 # Algorithm algo = low_algo_cls( env=skill_learning_env, policy=new_skill_policy, **low_algo_kwargs ) # Logger parameters logger_snapshot_dir_before = logger.get_snapshot_dir() logger_snapshot_mode_before = logger.get_snapshot_mode() logger_snapshot_gap_before = logger.get_snapshot_gap() # No need to change snapshot dir in this script, it is used in ASA-algo.create_and_train_new_skill() # logger.set_snapshot_dir(os.path.join( # logger_snapshot_dir_before, # 'skill{}'.format(new_skill_id) # )) logger.set_snapshot_mode('none') logger.set_tensorboard_step_key('Iteration') ## Train new skill with logger.prefix('Skill {} | '.format(new_skill_id)): algo.train(sess=tf_session) ## Save new policy and its end_obss (we`ll construct skill stopping function # from them manually in asa_resume_with_new_skill.py) out_file = os.path.join(logger.get_snapshot_dir(), 'final.pkl') with open(out_file, 'wb') as file: out_data = { 'policy': new_skill_policy, 'subpath': top_subpath } dill.dump(out_data, file) # Restore logger parameters logger.set_snapshot_dir(logger_snapshot_dir_before) logger.set_snapshot_mode(logger_snapshot_mode_before) logger.set_snapshot_gap(logger_snapshot_gap_before)
def create_and_train_new_skill(self, skill_subpath): """ Create and train a new skill based on given subpath. The new skill policy and ID are returned, and also saved in self._hrl_policy. """ ## Prepare elements for training # Environment skill_learning_env = TfEnv( SkillLearningEnv( # base env that was wrapped in HierarchizedEnv (not fully unwrapped - may be normalized!) env=self.env.env.env, start_obss=skill_subpath['start_observations'], end_obss=skill_subpath['end_observations'] ) ) # Skill policy new_skill_pol, new_skill_id = self._hrl_policy.create_new_skill(skill_subpath['end_observations']) # blank policy to be trained # Baseline - clone baseline specified in low_algo_kwargs, or top-algo`s baseline # We need to clone baseline, as each skill policy must have its own instance la_kwargs = dict(self._low_algo_kwargs) baseline_to_clone = la_kwargs.get('baseline', self.baseline) baseline = Serializable.clone( # to create blank baseline obj=baseline_to_clone, name='{}Skill{}'.format(type(baseline_to_clone).__name__, new_skill_id) ) la_kwargs['baseline'] = baseline # Algorithm algo = self._low_algo_cls( env=skill_learning_env, policy=new_skill_pol, **la_kwargs ) # Logger parameters logger.dump_tabular(with_prefix=False) logger.log('Launching training of the new skill') logger_snapshot_dir_before = logger.get_snapshot_dir() logger_snapshot_mode_before = logger.get_snapshot_mode() logger_snapshot_gap_before = logger.get_snapshot_gap() logger.set_snapshot_dir(os.path.join( logger_snapshot_dir_before, 'skill{}'.format(new_skill_id) )) logger.set_snapshot_mode('none') # logger.set_snapshot_gap(max(1, np.floor(la_kwargs['n_itr'] / 10))) logger.push_tabular_prefix('Skill{}/'.format(new_skill_id)) logger.set_tensorboard_step_key('Iteration') # Train new skill with logger.prefix('Skill {} | '.format(new_skill_id)): algo.train(sess=self._tf_sess) # Restore logger parameters logger.pop_tabular_prefix() logger.set_snapshot_dir(logger_snapshot_dir_before) logger.set_snapshot_mode(logger_snapshot_mode_before) logger.set_snapshot_gap(logger_snapshot_gap_before) logger.log('Training of the new skill finished') return new_skill_pol, new_skill_id