Exemplo n.º 1
0
    def __init__(self,
                 env_name,
                 record_video=False,
                 video_schedule=None,
                 log_dir=None,
                 record_log=False,
                 force_reset=True):
        if log_dir is None:
            if logger.get_snapshot_dir() is None:
                logger.log(
                    "Warning: skipping Gym environment monitoring since snapshot_dir not configured."
                )
            else:
                log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log")
        Serializable.quick_init(self, locals())

        env = gym.envs.make(env_name)

        # HACK: Gets rid of the TimeLimit wrapper that sets 'done = True' when
        # the time limit specified for each environment has been passed and
        # therefore the environment is not Markovian (terminal condition depends
        # on time rather than state).
        env = env.env

        self.env = env
        self.env_id = env.spec.id

        assert not (not record_log and record_video)

        if log_dir is None or record_log is False:
            self.monitoring = False
        else:
            if not record_video:
                video_schedule = NoVideoSchedule()
            else:
                if video_schedule is None:
                    video_schedule = CappedCubicVideoSchedule()
            self.env = gym.wrappers.Monitor(self.env,
                                            log_dir,
                                            video_callable=video_schedule,
                                            force=True)
            self.monitoring = True

        self._observation_space = convert_gym_space(env.observation_space)
        logger.log("observation space: {}".format(self._observation_space))
        self._action_space = convert_gym_space(env.action_space)
        logger.log("action space: {}".format(self._action_space))
        self._horizon = env.spec.tags[
            'wrapper_config.TimeLimit.max_episode_steps']
        self._log_dir = log_dir
        self._force_reset = force_reset
Exemplo n.º 2
0
 def record_policy(self,
                   env,
                   policy,
                   itr,
                   n_rollout=1,
                   path=None,
                   postfix=""):
     # Rollout
     if path is None:
         path = logger.get_snapshot_dir().rstrip(
             os.sep) + os.sep + "videos" + os.sep + "itr_%05d%s.mp4" % (
                 itr, postfix)
     path_directory = path.rsplit(os.sep, 1)[0]
     if not os.path.exists(path_directory):
         os.makedirs(path_directory, exist_ok=True)
     for _ in range(n_rollout):
         obs = env.reset()
         recorder = VideoRecorder(env.env, path=path)
         while True:
             # env.render()
             # import pdb; pdb.set_trace()
             action, _ = policy.get_action(obs)
             obs, _, done, _ = env.step(action)
             recorder.capture_frame()
             if done:
                 break
         recorder.close()
Exemplo n.º 3
0
    def h5_prepare_file(self, filename, args):
        # Assuming the following structure / indexing of the H5 file
        # teacher_info/
        #   - [teacher_indx]:
        #        - description
        #        - params
        # traj_data/
        #   - [teacher_indx] * [iter_indx] * traj_data

        # Making names and opening h5 file
        if filename is None:
            self.h5_filename = logger.get_snapshot_dir(
            ) + os.sep + "trajectories.h5"
        else:  #capability to store multiple teachers in a single file
            self.h5_filename = filename
        self.h5_filename = self.h5_filename if self.h5_filename[
            -3:] == '.h5' else (self.h5_filename + '.h5')

        if os.path.exists(self.h5_filename):
            # input("WARNING: output file %s already exists and will be appended. Press ENTER to continue. (exit with ctrl-C)" % self.h5_filename)
            print(
                "WARNING: output file %s already exists and will be appended" %
                self.h5_filename)
        self.hdf = h5py.File(self.h5_filename, "a")

        # Creating proper groups
        groups = list(self.hdf.keys())
        # Groups to create: tuples: (group_name, structure_decscripton)
        create_groups = [("teacher_info", "Runs indices(Teachers)"),
                         ("traj_data",
                          "Runs(Teachers) x Iterations x Trajectories x Data")]

        for group in create_groups:
            if not group in groups:
                self.hdf.create_group(group[0])
                self.hdf[group[0]].attrs["structure"] = np.string_(group[1])

        # Checking if other teachers' results already exist in the h5 file
        # If they exist - just append
        teacher_indices = list(self.hdf["traj_data"].keys())
        if not teacher_indices:
            self.teacher_indx = 0
        else:
            teacher_indices = [int(indx) for indx in teacher_indices]
            teacher_indices = np.sort(teacher_indices)
            self.teacher_indx = teacher_indices[-1] + 1
            print("%s : Appended teacher index: " % self.__class__.__name__,
                  self.teacher_indx)

        self.hdf.create_group("traj_data/" +
                              h5u.indx2str(self.teacher_indx))  #Teacher group

        ## Saving info about the teacher
        teacher_info_group = "teacher_info/" + h5u.indx2str(
            self.teacher_indx) + "/"
        self.hdf.create_group(teacher_info_group)  #Teacher group
        h5u.add_dict(self.hdf, self.args, groupname=teacher_info_group)

        return self.hdf
Exemplo n.º 4
0
 def train(self, sess=None, snapshot_mode=None):
     if sess is None:
         sess = tf.Session()
         sess.__enter__()
     self._tf_sess = sess
     if snapshot_mode is not None:
         logger.set_snapshot_mode(snapshot_mode)
     last_average_return = super(AdaptiveSkillAcquisition, self).train(sess=sess)
     return {
         'last_average_return': last_average_return,
         'snapshot_dir': logger.get_snapshot_dir()
     }
Exemplo n.º 5
0
    def __init__(self, env_name, record_video=False, video_schedule=None, log_dir=None, record_log=False,
                 force_reset=True):
        if log_dir is None:
            if logger.get_snapshot_dir() is None:
                logger.log("Warning: skipping Gym environment monitoring since snapshot_dir not configured.")
            else:
                log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log")
        Serializable.quick_init(self, locals())

        env = gym.envs.make(env_name)

        # HACK: Gets rid of the TimeLimit wrapper that sets 'done = True' when
        # the time limit specified for each environment has been passed and
        # therefore the environment is not Markovian (terminal condition depends
        # on time rather than state).
        env = env.env

        self.env = env
        self.env_id = env.spec.id

        assert not (not record_log and record_video)

        if log_dir is None or record_log is False:
            self.monitoring = False
        else:
            if not record_video:
                video_schedule = NoVideoSchedule()
            else:
                if video_schedule is None:
                    video_schedule = CappedCubicVideoSchedule()
            self.env = gym.wrappers.Monitor(self.env, log_dir, video_callable=video_schedule, force=True)
            self.monitoring = True

        self._observation_space = convert_gym_space(env.observation_space)
        logger.log("observation space: {}".format(self._observation_space))
        self._action_space = convert_gym_space(env.action_space)
        logger.log("action space: {}".format(self._action_space))
        self._horizon = env.spec.tags['wrapper_config.TimeLimit.max_episode_steps']
        self._log_dir = log_dir
        self._force_reset = force_reset
Exemplo n.º 6
0
 def save_rendered_plot(self):
     plt.scatter(*self.agent_pos, marker='x', s=50,
                 c='r')  # to mark agent`s end position
     directory = logger.get_snapshot_dir()
     if directory is None:
         directory = '~/garage/data/local/asa/instant-run'
     directory = os.path.expanduser(directory)
     if not os.path.isdir(directory):
         os.makedirs(directory)
     base = 'demo_run_'
     try:
         i = 1 + max([
             int(f[len(base):f.find('.')])
             for f in os.listdir(directory) if f.startswith(base)
         ])
     except ValueError:
         i = 0
     plt.savefig(os.path.join(directory, '{}{}.png'.format(base, i)))
Exemplo n.º 7
0
 def save_samples(self, itr, samples_data):
     with open(osp.join(logger.get_snapshot_dir(), 'samples_%i.pkl' % itr),
               "wb") as fout:
         pickle.dump(samples_data, fout)
Exemplo n.º 8
0
parser.add_argument('--log_tabular_only', type=bool, default=False)
parser.add_argument('--log_dir', type=str, default='./Data/AST/GA/Test')
parser.add_argument('--args_data', type=str, default=None)
args = parser.parse_args()

# Create the logger
log_dir = args.log_dir

tabular_log_file = osp.join(log_dir, args.tabular_log_file)
text_log_file = osp.join(log_dir, args.text_log_file)
params_log_file = osp.join(log_dir, args.params_log_file)

logger.log_parameters_lite(params_log_file, args)
# logger.add_text_output(text_log_file)
logger.add_tabular_output(tabular_log_file)
prev_snapshot_dir = logger.get_snapshot_dir()
prev_mode = logger.get_snapshot_mode()
logger.set_snapshot_dir(log_dir)
logger.set_snapshot_mode(args.snapshot_mode)
logger.set_snapshot_gap(args.snapshot_gap)
logger.set_log_tabular_only(args.log_tabular_only)
logger.push_prefix("[%s] " % args.exp_name)

seed = 0
top_k = 10
max_path_length = 100

top_paths = BPQ.BoundedPriorityQueue(top_k)

np.random.seed(seed)
tf.set_random_seed(seed)
Exemplo n.º 9
0
    def __init__(self,
                 env,
                 policy,
                 baseline,
                 scope=None,
                 n_itr=500,
                 max_samples=None,
                 start_itr=0,
                 batch_size=5000,
                 max_path_length=500,
                 discount=0.99,
                 gae_lambda=1,
                 plot=False,
                 pause_for_plot=False,
                 center_adv=True,
                 positive_adv=False,
                 store_paths=False,
                 paths_h5_filename=None,
                 whole_paths=True,
                 fixed_horizon=False,
                 sampler_cls=None,
                 sampler_args=None,
                 force_batch_sampler=False,
                 play_every_itr=None,
                 record_every_itr=None,
                 record_end_ep_num=3,
                 **kwargs):
        """
        :param env: Environment
        :param policy: Policy
        :type policy: Policy
        :param baseline: Baseline
        :param scope: Scope for identifying the algorithm. Must be specified if
         running multiple algorithms
        simultaneously, each using different environments and policies
        :param n_itr: Max umber of iterations.
        :param max_samples: If not None - exit when max env samples is collected (overrides n_itr)
        :param start_itr: Starting iteration.
        :param batch_size: Number of samples per iteration.
        :param max_path_length: Maximum length of a single rollout.
        :param discount: Discount.
        :param gae_lambda: Lambda used for generalized advantage estimation.
        :param plot: Plot evaluation run after each iteration.
        :param pause_for_plot: Whether to pause before contiuing when plotting.
        :param center_adv: Whether to rescale the advantages so that they have
         mean 0 and standard deviation 1.
        :param positive_adv: Whether to shift the advantages so that they are
         always positive. When used in conjunction with center_adv the
         advantages will be standardized before shifting.
        :param store_paths: Whether to save all paths data to the snapshot.
        :return:
        """
        self.args = locals()
        del self.args["kwargs"]
        del self.args["self"]
        self.args = {**self.args, **kwargs}  #merging dicts

        self.env = env
        try:
            self.env.env.save_dyn_params(
                filename=logger.get_snapshot_dir().rstrip(os.sep) + os.sep +
                "dyn_params.yaml")
        except:
            print("WARNING: BatchPolOpt: couldn't save dynamics params")
            # import pdb; pdb.set_trace()
        from gym.wrappers import Monitor
        # self.env_rec = Monitor(self.env.env, logger.get_snapshot_dir() + os.sep + "videos", force=True)

        self.policy = policy
        self.baseline = baseline
        self.scope = scope
        self.n_itr = n_itr
        self.max_samples = max_samples
        self.start_itr = start_itr
        self.batch_size = batch_size
        self.max_path_length = max_path_length
        self.discount = discount
        self.gae_lambda = gae_lambda
        self.plot = plot
        self.pause_for_plot = pause_for_plot
        self.center_adv = center_adv
        self.positive_adv = positive_adv
        self.store_paths = store_paths
        self.whole_paths = whole_paths
        self.fixed_horizon = fixed_horizon
        self.play_every_itr = play_every_itr
        self.record_every_itr = record_every_itr
        self.record_end_ep_num = record_end_ep_num
        if sampler_cls is None:
            if self.policy.vectorized and not force_batch_sampler:
                sampler_cls = OnPolicyVectorizedSampler
            else:
                sampler_cls = BatchSampler
        if sampler_args is None:
            sampler_args = dict()
        self.sampler = sampler_cls(self, **sampler_args)
        self.init_opt()

        ## Initialization of HDF5 logging of trajectories
        if self.store_paths:
            self.h5_prepare_file(filename=paths_h5_filename, args=self.args)

        ## Initialize cleaner if we close
        atexit.register(self.clean_at_exit)
Exemplo n.º 10
0
def run_experiment(argv):
    default_log_dir = config.LOG_DIR
    now = datetime.datetime.now(dateutil.tz.tzlocal())

    # avoid name clashes when running distributed jobs
    rand_id = str(uuid.uuid4())[:5]
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z')

    default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--n_parallel',
        type=int,
        default=1,
        help=("Number of parallel workers to perform rollouts. "
              "0 => don't start any workers"))
    parser.add_argument(
        '--exp_name',
        type=str,
        default=default_exp_name,
        help='Name of the experiment.')
    parser.add_argument(
        '--log_dir',
        type=str,
        default=None,
        help='Path to save the log and iteration snapshot.')
    parser.add_argument(
        '--snapshot_mode',
        type=str,
        default='all',
        help='Mode to save the snapshot. Can be either "all" '
        '(all iterations will be saved), "last" (only '
        'the last iteration will be saved), "gap" (every'
        '`snapshot_gap` iterations are saved), or "none" '
        '(do not save snapshots)')
    parser.add_argument(
        '--snapshot_gap',
        type=int,
        default=1,
        help='Gap between snapshot iterations.')
    parser.add_argument(
        '--tabular_log_file',
        type=str,
        default='progress.csv',
        help='Name of the tabular log file (in csv).')
    parser.add_argument(
        '--text_log_file',
        type=str,
        default='debug.log',
        help='Name of the text log file (in pure text).')
    parser.add_argument(
        '--tensorboard_step_key',
        type=str,
        default=None,
        help=("Name of the step key in tensorboard_summary."))
    parser.add_argument(
        '--params_log_file',
        type=str,
        default='params.json',
        help='Name of the parameter log file (in json).')
    parser.add_argument(
        '--variant_log_file',
        type=str,
        default='variant.json',
        help='Name of the variant log file (in json).')
    parser.add_argument(
        '--resume_from',
        type=str,
        default=None,
        help='Name of the pickle file to resume experiment from.')
    parser.add_argument(
        '--plot',
        type=ast.literal_eval,
        default=False,
        help='Whether to plot the iteration results')
    parser.add_argument(
        '--log_tabular_only',
        type=ast.literal_eval,
        default=False,
        help='Print only the tabular log information (in a horizontal format)')
    parser.add_argument('--seed', type=int, help='Random seed for numpy')
    parser.add_argument(
        '--args_data', type=str, help='Pickled data for objects')
    parser.add_argument(
        '--variant_data',
        type=str,
        help='Pickled data for variant configuration')
    parser.add_argument(
        '--use_cloudpickle', type=ast.literal_eval, default=False)

    args = parser.parse_args(argv[1:])

    if args.seed is not None:
        set_seed(args.seed)

    # SIGINT is blocked for all processes created in parallel_sampler to avoid
    # the creation of sleeping and zombie processes.
    #
    # If the user interrupts run_experiment, there's a chance some processes
    # won't die due to a dead lock condition where one of the children in the
    # parallel sampler exits without releasing a lock once after it catches
    # SIGINT.
    #
    # Later the parent tries to acquire the same lock to proceed with his
    # cleanup, but it remains sleeping waiting for the lock to be released.
    # In the meantime, all the process in parallel sampler remain in the zombie
    # state since the parent cannot proceed with their clean up.
    with mask_signals([signal.SIGINT]):
        if args.n_parallel > 0:
            parallel_sampler.initialize(n_parallel=args.n_parallel)
            if args.seed is not None:
                parallel_sampler.set_seed(args.seed)

    if not args.plot:
        garage.plotter.Plotter.disable()
        garage.tf.plotter.Plotter.disable()

    if args.log_dir is None:
        log_dir = osp.join(default_log_dir, args.exp_name)
    else:
        log_dir = args.log_dir
    tabular_log_file = osp.join(log_dir, args.tabular_log_file)
    text_log_file = osp.join(log_dir, args.text_log_file)
    params_log_file = osp.join(log_dir, args.params_log_file)

    if args.variant_data is not None:
        variant_data = pickle.loads(base64.b64decode(args.variant_data))
        variant_log_file = osp.join(log_dir, args.variant_log_file)
        logger.log_variant(variant_log_file, variant_data)
    else:
        variant_data = None

    if not args.use_cloudpickle:
        logger.log_parameters_lite(params_log_file, args)

    logger.add_text_output(text_log_file)
    logger.add_tabular_output(tabular_log_file)
    logger.set_tensorboard_dir(log_dir)
    prev_snapshot_dir = logger.get_snapshot_dir()
    prev_mode = logger.get_snapshot_mode()
    logger.set_snapshot_dir(log_dir)
    logger.set_snapshot_mode(args.snapshot_mode)
    logger.set_snapshot_gap(args.snapshot_gap)
    logger.set_log_tabular_only(args.log_tabular_only)
    logger.set_tensorboard_step_key(args.tensorboard_step_key)
    logger.push_prefix("[%s] " % args.exp_name)

    if args.resume_from is not None:
        data = joblib.load(args.resume_from)
        assert 'algo' in data
        algo = data['algo']
        algo.train()
    else:
        # read from stdin
        if args.use_cloudpickle:
            import cloudpickle
            method_call = cloudpickle.loads(base64.b64decode(args.args_data))
            try:
                method_call(variant_data)
            except BaseException:
                children = garage.plotter.Plotter.get_plotters()
                children += garage.tf.plotter.Plotter.get_plotters()
                if args.n_parallel > 0:
                    children += [parallel_sampler]
                child_proc_shutdown(children)
                raise
        else:
            data = pickle.loads(base64.b64decode(args.args_data))
            maybe_iter = concretize(data)
            if is_iterable(maybe_iter):
                for _ in maybe_iter:
                    pass

    logger.set_snapshot_mode(prev_mode)
    logger.set_snapshot_dir(prev_snapshot_dir)
    logger.remove_tabular_output(tabular_log_file)
    logger.remove_text_output(text_log_file)
    logger.pop_prefix()
Exemplo n.º 11
0
def run_task(*_):
    # Configure TF session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config).as_default() as tf_session:
        ## Load data from itr_N.pkl
        with open(snapshot_file, 'rb') as file:
            saved_data = dill.load(file)


        ## Construct PathTrie and find missing skill description
        # This is basically ASA.decide_new_skill
        min_length = 3
        max_length = 5
        action_map = {0: 's', 1: 'L', 2: 'R'}
        min_f_score = 1
        max_results = 10
        aggregations = []  # sublist of ['mean', 'most_freq', 'nearest_mean', 'medoid'] or 'all'

        paths = saved_data['paths']
        path_trie = PathTrie(saved_data['hrl_policy'].num_skills)
        for path in paths:
            actions = path['actions'].argmax(axis=1).tolist()
            observations = path['observations']
            path_trie.add_all_subpaths(
                    actions,
                    observations,
                    min_length=min_length,
                    max_length=max_length
            )
        logger.log('Searched {} rollouts'.format(len(paths)))

        frequent_paths = path_trie.items(
                action_map=action_map,
                min_count=10,  # len(paths) * 2
                min_f_score=min_f_score,
                max_results=max_results,
                aggregations=aggregations
        )
        logger.log('Found {} frequent paths: [index, actions, count, f-score]'.format(len(frequent_paths)))
        for i, f_path in enumerate(frequent_paths):
            logger.log('    {:2}: {:{pad}}\t{}\t{:.3f}'.format(
                i,
                f_path['actions_text'],
                f_path['count'],
                f_path['f_score'],
                pad=max_length))

        top_subpath = frequent_paths[0]
        start_obss = top_subpath['start_observations']
        end_obss   = top_subpath['end_observations']



        ## Prepare elements for training
        # Environment
        base_env = saved_data['env'].env.env  # <NormalizedEnv<MinibotEnv instance>>
        skill_learning_env = TfEnv(
                SkillLearningEnv(
                    # base env that was wrapped in HierarchizedEnv (not fully unwrapped - may be normalized!)
                    env=base_env,
                    start_obss=start_obss,
                    end_obss=end_obss
                )
        )

        # Skill policy
        hrl_policy = saved_data['hrl_policy']
        new_skill_policy, new_skill_id = hrl_policy.create_new_skill(
                end_obss=end_obss
        )

        # Baseline - clone baseline specified in low_algo_kwargs, or top-algo`s baseline
        low_algo_kwargs = dict(saved_data['low_algo_kwargs'])
        baseline_to_clone = low_algo_kwargs.get('baseline', saved_data['baseline'])
        baseline = Serializable.clone(  # to create blank baseline
                obj=baseline_to_clone,
                name='{}Skill{}'.format(type(baseline_to_clone).__name__, new_skill_id)
        )
        low_algo_kwargs['baseline'] = baseline
        low_algo_cls = saved_data['low_algo_cls']

        # Set custom training params (should`ve been set in asa_basic_run)
        low_algo_kwargs['batch_size'] = 2500
        low_algo_kwargs['max_path_length'] = 50
        low_algo_kwargs['n_itr'] = 500

        # Algorithm
        algo = low_algo_cls(
            env=skill_learning_env,
            policy=new_skill_policy,
            **low_algo_kwargs
        )

        # Logger parameters
        logger_snapshot_dir_before = logger.get_snapshot_dir()
        logger_snapshot_mode_before = logger.get_snapshot_mode()
        logger_snapshot_gap_before = logger.get_snapshot_gap()
        # No need to change snapshot dir in this script, it is used in ASA-algo.create_and_train_new_skill()
        # logger.set_snapshot_dir(os.path.join(
        #         logger_snapshot_dir_before,
        #         'skill{}'.format(new_skill_id)
        # ))
        logger.set_snapshot_mode('none')
        logger.set_tensorboard_step_key('Iteration')


        ## Train new skill
        with logger.prefix('Skill {} | '.format(new_skill_id)):
            algo.train(sess=tf_session)



        ## Save new policy and its end_obss (we`ll construct skill stopping function
        #  from them manually in asa_resume_with_new_skill.py)
        out_file = os.path.join(logger.get_snapshot_dir(), 'final.pkl')
        with open(out_file, 'wb') as file:
            out_data = {
                    'policy': new_skill_policy,
                    'subpath': top_subpath
            }
            dill.dump(out_data, file)

        # Restore logger parameters
        logger.set_snapshot_dir(logger_snapshot_dir_before)
        logger.set_snapshot_mode(logger_snapshot_mode_before)
        logger.set_snapshot_gap(logger_snapshot_gap_before)
Exemplo n.º 12
0
def run_experiment(argv):
    default_log_dir = config.LOG_DIR
    now = datetime.datetime.now(dateutil.tz.tzlocal())

    # avoid name clashes when running distributed jobs
    rand_id = str(uuid.uuid4())[:5]
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z')

    default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--n_parallel',
        type=int,
        default=1,
        help=("Number of parallel workers to perform rollouts. "
              "0 => don't start any workers"))
    parser.add_argument(
        '--exp_name',
        type=str,
        default=default_exp_name,
        help='Name of the experiment.')
    parser.add_argument(
        '--log_dir',
        type=str,
        default=None,
        help='Path to save the log and iteration snapshot.')
    parser.add_argument(
        '--snapshot_mode',
        type=str,
        default='all',
        help='Mode to save the snapshot. Can be either "all" '
        '(all iterations will be saved), "last" (only '
        'the last iteration will be saved), "gap" (every'
        '`snapshot_gap` iterations are saved), or "none" '
        '(do not save snapshots)')
    parser.add_argument(
        '--snapshot_gap',
        type=int,
        default=1,
        help='Gap between snapshot iterations.')
    parser.add_argument(
        '--tabular_log_file',
        type=str,
        default='progress.csv',
        help='Name of the tabular log file (in csv).')
    parser.add_argument(
        '--text_log_file',
        type=str,
        default='debug.log',
        help='Name of the text log file (in pure text).')
    parser.add_argument(
        '--tensorboard_step_key',
        type=str,
        default=None,
        help=("Name of the step key in tensorboard_summary."))
    parser.add_argument(
        '--params_log_file',
        type=str,
        default='params.json',
        help='Name of the parameter log file (in json).')
    parser.add_argument(
        '--variant_log_file',
        type=str,
        default='variant.json',
        help='Name of the variant log file (in json).')
    parser.add_argument(
        '--resume_from',
        type=str,
        default=None,
        help='Name of the pickle file to resume experiment from.')
    parser.add_argument(
        '--plot',
        type=ast.literal_eval,
        default=False,
        help='Whether to plot the iteration results')
    parser.add_argument(
        '--log_tabular_only',
        type=ast.literal_eval,
        default=False,
        help='Print only the tabular log information (in a horizontal format)')
    parser.add_argument('--seed', type=int, help='Random seed for numpy')
    parser.add_argument(
        '--args_data', type=str, help='Pickled data for stub objects')
    parser.add_argument(
        '--variant_data',
        type=str,
        help='Pickled data for variant configuration')
    parser.add_argument(
        '--use_cloudpickle', type=ast.literal_eval, default=False)

    args = parser.parse_args(argv[1:])

    assert (os.environ.get("JOBLIB_START_METHOD", None) == "forkserver")
    if args.seed is not None:
        set_seed(args.seed)

    if args.n_parallel > 0:
        from garage.sampler import parallel_sampler
        parallel_sampler.initialize(n_parallel=args.n_parallel)
        if args.seed is not None:
            parallel_sampler.set_seed(args.seed)

    if not args.plot:
        garage.plotter.Plotter.disable()
        garage.tf.plotter.Plotter.disable()

    if args.log_dir is None:
        log_dir = osp.join(default_log_dir, args.exp_name)
    else:
        log_dir = args.log_dir
    tabular_log_file = osp.join(log_dir, args.tabular_log_file)
    text_log_file = osp.join(log_dir, args.text_log_file)
    params_log_file = osp.join(log_dir, args.params_log_file)

    if args.variant_data is not None:
        variant_data = pickle.loads(base64.b64decode(args.variant_data))
        variant_log_file = osp.join(log_dir, args.variant_log_file)
        logger.log_variant(variant_log_file, variant_data)
    else:
        variant_data = None

    if not args.use_cloudpickle:
        logger.log_parameters_lite(params_log_file, args)

    logger.add_text_output(text_log_file)
    logger.add_tabular_output(tabular_log_file)
    logger.set_tensorboard_dir(log_dir)
    prev_snapshot_dir = logger.get_snapshot_dir()
    prev_mode = logger.get_snapshot_mode()
    logger.set_snapshot_dir(log_dir)
    logger.set_snapshot_mode(args.snapshot_mode)
    logger.set_snapshot_gap(args.snapshot_gap)
    logger.set_log_tabular_only(args.log_tabular_only)
    logger.set_tensorboard_step_key(args.tensorboard_step_key)
    logger.push_prefix("[%s] " % args.exp_name)

    if args.resume_from is not None:
        data = joblib.load(args.resume_from)
        assert 'algo' in data
        algo = data['algo']
        algo.train()
    else:
        # read from stdin
        if args.use_cloudpickle:
            import cloudpickle
            method_call = cloudpickle.loads(base64.b64decode(args.args_data))
            try:
                method_call(variant_data)
            except BaseException:
                if args.n_parallel > 0:
                    parallel_sampler.terminate()
                raise
        else:
            data = pickle.loads(base64.b64decode(args.args_data))
            maybe_iter = concretize(data)
            if is_iterable(maybe_iter):
                for _ in maybe_iter:
                    pass

    logger.set_snapshot_mode(prev_mode)
    logger.set_snapshot_dir(prev_snapshot_dir)
    logger.remove_tabular_output(tabular_log_file)
    logger.remove_text_output(text_log_file)
    logger.pop_prefix()
Exemplo n.º 13
0
def run_task_continue(task_param):
    """
    Wrap PPO training task in the run_task function.

    :param _:
    :return:
    """
    from garage.tf.baselines import GaussianMLPBaseline
    from garage.tf.envs import TfEnv
    from garage.tf.policies import GaussianMLPPolicy, DeterministicMLPPolicy, GaussianGRUPolicy, GaussianLSTMPolicy

    from quad_train.algos.cem import CEM
    from quad_train.algos.cma_es import CMAES
    from quad_train.algos.ppo import PPO
    from quad_train.algos.trpo import TRPO

    import sys
    import os

    import garage.misc.logger as logger
    import joblib

    pkl_file = logger.get_snapshot_dir().rstrip(os.sep) + os.sep + "params.pkl"
    if os.path.isfile(pkl_file):
        print("WARNING: Loading and continuing from %s snapshot ..." %
              logger.get_snapshot_dir().rstrip(os.sep))
    else:
        raise ValueError("ERROR: params.pkl not found in %s" % pkl_file)

    import tensorflow as tf

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # Unpack the snapshot
        snapshot = joblib.load(pkl_file)

        env = snapshot["env"]
        policy = snapshot["policy"]
        itr = snapshot["itr"]

        if args.new_env:
            from quad_sim.quadrotor import QuadrotorEnv
            env = TfEnv(QuadrotorEnv(**task_param["env_param"]))

        task_param["alg_param"]["start_itr"] = itr + 1

        del task_param["env"]
        del task_param["env_param"]
        del task_param["policy_class"]
        del task_param["policy_param"]

        if task_param["alg_class"] != "CEM" and task_param[
                "alg_class"] != "CMAES":
            baseline = snapshot["baseline"]
            del task_param["baseline_class"]
            del task_param["baseline_param"]

            algo = locals()[task_param["alg_class"]](env=env,
                                                     policy=policy,
                                                     baseline=baseline,
                                                     **task_param["alg_param"])
        else:
            algo = locals()[task_param["alg_class"]](env=env,
                                                     policy=policy,
                                                     **task_param["alg_param"])

        del task_param["alg_class"]
        del task_param["alg_param"]

        # Check that we used all parameters:
        # It helps revealing situations where you thought you set certain parameter
        # But in fact made spelling mistake and it failed
        del task_param["exp_name"]  #This is probably generated by garage
        assert task_param == {}, "ERROR: Some of parameter values were not used: %s" % str(
            task_param)

        algo.train(sess=sess, step=itr + 1)
Exemplo n.º 14
0
def plot_state(self, name='sensors', state=None):
    if state:
        self.wrapped_env.reset(state)

    structure = self.__class__.MAZE_STRUCTURE
    size_scaling = self.__class__.MAZE_SIZE_SCALING
    # duplicate cells to plot the maze
    structure_plot = np.zeros(
        ((len(structure) - 1) * 2, (len(structure[0]) - 1) * 2))
    for i in range(len(structure)):
        for j in range(len(structure[0])):
            cell = structure[i][j]
            if type(cell) is not int:
                cell = 0.3 if cell == 'r' else 0.7
            if i == 0:
                if j == 0:
                    structure_plot[i, j] = cell
                elif j == len(structure[0]) - 1:
                    structure_plot[i, 2 * j - 1] = cell
                else:
                    structure_plot[i, 2 * j - 1:2 * j + 1] = cell
            elif i == len(structure) - 1:
                if j == 0:
                    structure_plot[2 * i - 1, j] = cell
                elif j == len(structure[0]) - 1:
                    structure_plot[2 * i - 1, 2 * j - 1] = cell
                else:
                    structure_plot[2 * i - 1, 2 * j - 1:2 * j + 1] = cell
            else:
                if j == 0:
                    structure_plot[2 * i - 1:2 * i + 1, j] = cell
                elif j == len(structure[0]) - 1:
                    structure_plot[2 * i - 1:2 * i + 1, 2 * j - 1] = cell
                else:
                    structure_plot[2 * i - 1:2 * i + 1,
                                   2 * j - 1:2 * j + 1] = cell

    fig, ax = plt.subplots()
    im = ax.pcolor(-np.array(structure_plot),
                   cmap='gray',
                   edgecolor='black',
                   linestyle=':',
                   lw=1)
    x_labels = list(range(len(structure[0])))
    y_labels = list(range(len(structure)))
    ax.grid(True)  # elimiate this to avoid inner lines

    ax.xaxis.set(ticks=2 * np.arange(len(x_labels)), ticklabels=x_labels)
    ax.yaxis.set(ticks=2 * np.arange(len(y_labels)), ticklabels=y_labels)

    obs = self.get_current_maze_obs()

    robot_xy = np.array(self.wrapped_env.get_body_com("torso")
                        [:2])  # the coordinates of this are wrt the init
    ori = self.get_ori(
    )  # for Ant this is computed with atan2, which gives [-pi, pi]

    # compute origin cell i_o, j_o coordinates and center of it x_o, y_o
    # (with 0,0 in the top-right corner of struc)
    o_xy = np.array(self._find_robot(
    ))  # this is self.init_torso_x, self.init_torso_y: center of the cell xy!
    o_ij = (o_xy / size_scaling).astype(
        int)  # this is the position in the grid

    o_xy_plot = o_xy / size_scaling * 2
    robot_xy_plot = o_xy_plot + robot_xy / size_scaling * 2

    plt.scatter(*robot_xy_plot)

    for ray_idx in range(self._n_bins):
        if obs[ray_idx]:
            length_wall = self._sensor_range - obs[ray_idx] * self._sensor_range
        else:
            length_wall = 1e-6
        ray_ori = ori - self._sensor_span * 0.5 + ray_idx / (
            self._n_bins - 1) * self._sensor_span
        if ray_ori > math.pi:
            ray_ori -= 2 * math.pi
        elif ray_ori < -math.pi:
            ray_ori += 2 * math.pi
        # find the end point wall
        end_xy = (
            robot_xy + length_wall *
            np.array([math.cos(ray_ori), math.sin(ray_ori)]))
        end_xy_plot = (o_ij + end_xy / size_scaling) * 2
        plt.plot([robot_xy_plot[0], end_xy_plot[0]],
                 [robot_xy_plot[1], end_xy_plot[1]], 'r')

        if obs[ray_idx + self._n_bins]:
            length_goal = self._sensor_range - obs[
                ray_idx + self._n_bins] * self._sensor_range
        else:
            length_goal = 1e-6
        ray_ori = ori - self._sensor_span * 0.5 + ray_idx / (
            self._n_bins - 1) * self._sensor_span
        # find the end point goal
        end_xy = (
            robot_xy + length_goal *
            np.array([math.cos(ray_ori), math.sin(ray_ori)]))
        end_xy_plot = (o_ij + end_xy / size_scaling) * 2
        plt.plot([robot_xy_plot[0], end_xy_plot[0]],
                 [robot_xy_plot[1], end_xy_plot[1]], 'g')

    log_dir = logger.get_snapshot_dir()
    ax.set_title('sensors: ' + name)

    plt.savefig(osp.join(
        log_dir,
        name + '_sesors.png'))  # this saves the current figure, here f
    plt.close()
Exemplo n.º 15
0
    def create_and_train_new_skill(self, skill_subpath):
        """
        Create and train a new skill based on given subpath. The new skill policy and
        ID are returned, and also saved in self._hrl_policy.
        """
        ## Prepare elements for training
        # Environment
        skill_learning_env = TfEnv(
                SkillLearningEnv(
                    # base env that was wrapped in HierarchizedEnv (not fully unwrapped - may be normalized!)
                    env=self.env.env.env,
                    start_obss=skill_subpath['start_observations'],
                    end_obss=skill_subpath['end_observations']
                )
        )

        # Skill policy
        new_skill_pol, new_skill_id = self._hrl_policy.create_new_skill(skill_subpath['end_observations'])  # blank policy to be trained

        # Baseline - clone baseline specified in low_algo_kwargs, or top-algo`s baseline
        #   We need to clone baseline, as each skill policy must have its own instance
        la_kwargs = dict(self._low_algo_kwargs)
        baseline_to_clone = la_kwargs.get('baseline', self.baseline)
        baseline = Serializable.clone(  # to create blank baseline
                obj=baseline_to_clone,
                name='{}Skill{}'.format(type(baseline_to_clone).__name__, new_skill_id)
        )
        la_kwargs['baseline'] = baseline

        # Algorithm
        algo = self._low_algo_cls(
                env=skill_learning_env,
                policy=new_skill_pol,
                **la_kwargs
        )

        # Logger parameters
        logger.dump_tabular(with_prefix=False)
        logger.log('Launching training of the new skill')
        logger_snapshot_dir_before = logger.get_snapshot_dir()
        logger_snapshot_mode_before = logger.get_snapshot_mode()
        logger_snapshot_gap_before = logger.get_snapshot_gap()
        logger.set_snapshot_dir(os.path.join(
                logger_snapshot_dir_before,
                'skill{}'.format(new_skill_id)
        ))
        logger.set_snapshot_mode('none')
        # logger.set_snapshot_gap(max(1, np.floor(la_kwargs['n_itr'] / 10)))
        logger.push_tabular_prefix('Skill{}/'.format(new_skill_id))
        logger.set_tensorboard_step_key('Iteration')

        # Train new skill
        with logger.prefix('Skill {} | '.format(new_skill_id)):
            algo.train(sess=self._tf_sess)

        # Restore logger parameters
        logger.pop_tabular_prefix()
        logger.set_snapshot_dir(logger_snapshot_dir_before)
        logger.set_snapshot_mode(logger_snapshot_mode_before)
        logger.set_snapshot_gap(logger_snapshot_gap_before)
        logger.log('Training of the new skill finished')

        return new_skill_pol, new_skill_id
Exemplo n.º 16
0
    def _plot_visitations(self, paths, opts=None):
        """
        Plot visitation graphs, i.e. stacked all paths in batch.
        :param paths: paths statistics (dict)
        :param opts: plotting options:
                {'save': directory to save, True for default directory, or False to disable,
                 'live': <boolean>,
                 'alpha': <0..1> opacity of each plotted path,
                 'noise': <0..1> amount of noise added to distinguish individual paths}
        """
        if opts is None:
            opts = dict()
        if opts.get('live', False):
            plt.figure('Paths')
        else:
            plt.ioff()
        plt.clf()

        # Common plot opts
        m = self.map
        plt.tight_layout()
        plt.xlim(-0.5, self.n_col - 0.5)
        plt.ylim(-0.5, self.n_row - 0.5)
        plt.xticks([], [])
        plt.yticks([], [])
        plt.gca().set_aspect('equal')

        # # Grid
        # x_grid = np.arange(self.n_col + 1) - 0.5
        # y_grid = np.arange(self.n_row + 1) - 0.5
        # plt.plot(x_grid, np.stack([y_grid] * x_grid.size), ls='-',
        #          c='k', lw=1, alpha=0.8)
        # plt.plot(np.stack([x_grid] * y_grid.size), y_grid, ls='-',
        #          c='k', lw=1, alpha=0.8)

        # Coins, holes, starts, goals and walls
        coins = self._get_pos_as_xy(np.argwhere(m == 'C').T)
        holes = self._get_pos_as_xy(np.argwhere(m == 'H').T)
        starts = self._get_pos_as_xy(np.argwhere(m == 'S').T)
        goals = self._get_pos_as_xy(np.argwhere(m == 'G').T)
        walls = self._get_pos_as_xy(np.argwhere(m == 'W').T)
        plt.scatter(*coins,
                    c='gold',
                    marker='o',
                    s=150,
                    zorder=10,
                    edgecolors='black')
        plt.scatter(*holes, c='red', marker='X', s=100, zorder=10)
        plt.gca().add_collection(
            PatchCollection([Rectangle(xy - 0.5, 1, 1) for xy in starts.T],
                            color='navajowhite'))
        plt.gca().add_collection(
            PatchCollection([Rectangle(xy - 0.5, 1, 1) for xy in goals.T],
                            color='lightgreen'))
        plt.gca().add_collection(
            PatchCollection([Rectangle(xy - 0.5, 1, 1) for xy in walls.T],
                            color='navy'))

        # Plot paths
        alpha = opts.get('alpha', 0.1)
        noise = opts.get('noise', 0.1)
        for path in paths:
            data = path['env_infos']
            # Concat subpaths from HRL rollout
            if 'prev_pos_xy' not in data:
                data = SubpolicyPathInfo.concat_subpath_infos(
                    path['env_infos']['subpath_infos'])['env_infos']
            # Starting position
            start_pos = data['prev_pos_xy'][:1].T
            # All others
            all_pos = data['next_pos_xy'].T
            all_pos = np.c_[start_pos, all_pos]
            all_pos = all_pos + np.random.normal(size=all_pos.shape,
                                                 scale=noise)
            # Colorful line collection
            points = all_pos.T.reshape(-1, 1, 2)
            segments = np.concatenate([points[:-1], points[1:]], axis=1)
            lc = LineCollection(segments,
                                cmap=plt.get_cmap('jet'),
                                alpha=alpha)
            lc.set_array(np.arange(all_pos.shape[-1]))
            plt.gca().add_collection(lc)

        # Save paths figure
        folder = opts.get('save', False)
        if folder:
            if isinstance(folder, str):
                folder = os.path.expanduser(folder)
                if not os.path.isdir(folder):
                    os.makedirs(folder)
            else:
                folder = logger.get_snapshot_dir()
            plt.savefig(
                os.path.join(
                    folder,
                    'visitation{:0>3d}.png'.format(self.visitation_plot_num)))
            self.visitation_plot_num += 1

        # Live plotting
        if opts.get('live', False):
            plt.gcf().canvas.draw()
            plt.waitforbuttonpress(timeout=0.001)