Exemplo n.º 1
0
def main(args_list: list = []):
    parser = argparse.ArgumentParser()
    # parser.add_argument('--env-type', default='gridworld')
    parser.add_argument('--env-type', default='point_robot_sparse')
    # parser.add_argument('--env-type', default='cheetah_vel')
    # parser.add_argument('--env-type', default='ant_semicircle_sparse')
    args, rest_args = parser.parse_known_args(args_list)
    env = args.env_type

    # --- GridWorld ---
    if env == 'gridworld':
        args = args_gridworld.get_args(rest_args)
    # --- PointRobot ---
    elif env == 'point_robot_sparse':
        args = args_point_robot_sparse.get_args(rest_args)
    # --- Mujoco ---
    elif env == 'cheetah_vel':
        args = args_cheetah_vel.get_args(rest_args)
    elif env == 'ant_semicircle_sparse':
        args = args_ant_semicircle_sparse.get_args(rest_args)

    set_gpu_mode(torch.cuda.is_available())

    if hasattr(args, 'save_buffer') and args.save_buffer:
        os.makedirs(args.main_save_dir, exist_ok=True)

    learner = Learner(args)

    learner.train()
def main():
    parser = argparse.ArgumentParser()
    # parser.add_argument('--env-type', default='gridworld')
    # parser.add_argument('--env-type', default='point_robot_sparse')
    # parser.add_argument('--env-type', default='cheetah_vel')
    # parser.add_argument('--env-type', default='ant_semicircle_sparse')
    parser.add_argument('--env-type', default='point_robot_wind')
    # parser.add_argument('--env-type', default='escape_room')

    args, rest_args = parser.parse_known_args()
    env = args.env_type

    # --- GridWorld ---
    if env == 'gridworld':
        args = args_gridworld.get_args(rest_args)
    # --- PointRobot ---
    elif env == 'point_robot_sparse':
        args = args_point_robot_sparse.get_args(rest_args)
    elif env == 'escape_room':
        args = args_point_robot_barrier.get_args(rest_args)
    elif env == 'point_robot_wind':
        args = args_point_robot_rand_params.get_args(rest_args)
    # --- Mujoco ---
    elif env == 'cheetah_vel':
        args = args_cheetah_vel.get_args(rest_args)
    elif env == 'ant_semicircle_sparse':
        args = args_ant_semicircle_sparse.get_args(rest_args)

    set_gpu_mode(torch.cuda.is_available() and args.use_gpu)

    args, env = off_utl.expand_args(args)

    dataset, goals = off_utl.load_dataset(data_dir=args.data_dir, args=args, arr_type='numpy')
    # dataset, goals = off_utl.load_dataset(args)
    if args.hindsight_relabelling:
        print('Perform reward relabelling...')
        dataset, goals = off_utl.mix_task_rollouts(dataset, env, goals, args)

    if args.policy_replaying:
        mix_dataset, mix_goals = off_utl.load_replaying_dataset(data_dir=args.replaying_data_dir, args=args)
        print('Perform policy replaying...')
        dataset, goals = off_utl.mix_policy_rollouts(dataset, goals, mix_dataset, mix_goals, args)

    # vis test tasks
    # vis_train_tasks(env.unwrapped, goals)     # not with GridNavi

    if args.save_model:
        dir_prefix = args.save_dir_prefix if hasattr(args, 'save_dir_prefix') \
                                             and args.save_dir_prefix is not None else ''
        args.full_save_path = os.path.join(args.save_dir, args.env_name,
                                           dir_prefix + datetime.datetime.now().strftime('__%d_%m_%H_%M_%S'))
        os.makedirs(args.full_save_path, exist_ok=True)
        config_utl.save_config_file(args, args.full_save_path)

    vae = VAE(args)
    train(vae, dataset, goals, args)
Exemplo n.º 3
0
def offline_experiment(doodad_config, variant):
    save_doodad_config(doodad_config)
    parser = argparse.ArgumentParser()
    # parser.add_argument('--env-type', default='gridworld')
    # parser.add_argument('--env-type', default='point_robot_sparse')
    # parser.add_argument('--env-type', default='cheetah_vel')
    parser.add_argument('--env-type', default='ant_semicircle_sparse')
    args, rest_args = parser.parse_known_args(args=[])
    env = args.env_type

    # --- GridWorld ---
    if env == 'gridworld':
        args = args_gridworld.get_args(rest_args)
    # --- PointRobot ---
    elif env == 'point_robot_sparse':
        args = args_point_robot_sparse.get_args(rest_args)
    # --- Mujoco ---
    elif env == 'cheetah_vel':
        args = args_cheetah_vel.get_args(rest_args)
    elif env == 'ant_semicircle_sparse':
        args = args_ant_semicircle_sparse.get_args(rest_args)

    set_gpu_mode(torch.cuda.is_available() and args.use_gpu)

    vae_args = config_utl.load_config_file(
        os.path.join(args.vae_dir, args.env_name, args.vae_model_name,
                     'online_config.json'))
    args = config_utl.merge_configs(
        vae_args, args)  # order of input to this function is important

    # Transform data BAMDP (state relabelling)
    if args.transform_data_bamdp:
        # load VAE for state relabelling
        vae_models_path = os.path.join(args.vae_dir, args.env_name,
                                       args.vae_model_name, 'models')
        vae = VAE(args)
        off_utl.load_trained_vae(vae, vae_models_path)
        # load data and relabel
        save_data_path = os.path.join(args.main_data_dir, args.env_name,
                                      args.relabelled_data_dir)
        os.makedirs(save_data_path)
        dataset, goals = off_utl.load_dataset(data_dir=args.data_dir,
                                              args=args,
                                              arr_type='numpy')
        bamdp_dataset = off_utl.transform_mdps_ds_to_bamdp_ds(
            dataset, vae, args)
        # save relabelled data
        off_utl.save_dataset(save_data_path, bamdp_dataset, goals)

    learner = OfflineMetaLearner(args)

    learner.train()
Exemplo n.º 4
0
def main(args_list: list = []):
    parser = argparse.ArgumentParser()
    # parser.add_argument('--env-type', default='cheetah_vel')
    parser.add_argument('--env-type', default='point_robot_sparse')
    # parser.add_argument('--env-type', default='gridworld')
    args, rest_args = parser.parse_known_args(args_list)
    env = args.env_type

    # --- GridWorld ---
    if env == 'gridworld':
        args = args_gridworld.get_args(rest_args)
    # --- PointRobot ---
    elif env == 'point_robot':
        args = args_point_robot.get_args(rest_args)
    elif env == 'point_robot_sparse':
        args = args_point_robot_sparse.get_args(rest_args)
    # --- Mujoco ---
    elif env == 'cheetah_vel':
        args = args_cheetah_vel.get_args(rest_args)
    elif env == 'ant_semicircle':
        args = args_ant_semicircle.get_args(rest_args)
    elif env == 'ant_semicircle_sparse':
        args = args_ant_semicircle_sparse.get_args(rest_args)

    # make sure we have log directories
    try:
        os.makedirs(args.agent_log_dir)
    except OSError:
        files = glob.glob(os.path.join(args.agent_log_dir, '*.monitor.csv'))
        for f in files:
            os.remove(f)
    eval_log_dir = args.agent_log_dir + "_eval"
    try:
        os.makedirs(eval_log_dir)
    except OSError:
        files = glob.glob(os.path.join(eval_log_dir, '*.monitor.csv'))
        for f in files:
            os.remove(f)

    # set gpu
    set_gpu_mode(torch.cuda.is_available() and args.use_gpu)

    # start training
    if args.output_file_prefix == 'only_sac':
        learner = MetaLearnerOnlySAC(args)
    elif args.output_file_prefix == 'sacmer':
        learner = MetaLearnerSACMER(args)
    else:
        raise NotImplementedError

    learner.train()
def main():
    for i in range(10):
        parser = argparse.ArgumentParser()
        # parser.add_argument('--env-type', default='gridworld')
        # parser.add_argument('--env-type', default='point_robot_sparse')
        # parser.add_argument('--env-type', default='cheetah_vel')
        # parser.add_argument('--env-type', default='ant_semicircle_sparse')
        parser.add_argument('--env-type', default='point_robot_wind')
        # parser.add_argument('--env-type', default='escape_room')

        args, rest_args = parser.parse_known_args()
        env = args.env_type

        # --- GridWorld ---
        if env == 'gridworld':
            args = args_gridworld.get_args(rest_args)
        # --- PointRobot ---
        elif env == 'point_robot_sparse':
            args = args_point_robot_sparse.get_args(rest_args)
        elif env == 'point_robot_wind':
            args = args_point_robot_rand_params.get_args(rest_args)
        elif env == 'escape_room':
            args = args_point_robot_barrier.get_args(rest_args)
        # --- Mujoco ---
        elif env == 'cheetah_vel':
            args = args_cheetah_vel.get_args(rest_args)
        elif env == 'ant_semicircle_sparse':
            args = args_ant_semicircle_sparse.get_args(rest_args)

        set_gpu_mode(torch.cuda.is_available())

        print("start new round - seed ", i + 30)
        args.seed = i + 30

        if hasattr(args, 'save_buffer') and args.save_buffer:
            os.makedirs(args.main_save_dir, exist_ok=True)

        learner = Learner(args)

        learner.train()
Exemplo n.º 6
0
def collect_hindsight_data():
    parser = argparse.ArgumentParser()

    parser.add_argument('--env-type', default='point_robot_wind')
    #parser.add_argument('--env-type', default='escape_room')

    args, rest_args = parser.parse_known_args()
    env = args.env_type

    if env == 'point_robot_wind':
        args = args_point_robot_rand_params.get_args(rest_args)
    elif env == 'escape_room':
        args = args_point_robot_barrier.get_args(rest_args)

    # necessary args because we use VAE functions
    args.main_data_dir = args.main_save_dir
    args.trajectory_len = 50
    args.num_trajs_per_task = None

    args.num_rollouts = 10

    set_gpu_mode(torch.cuda.is_available())

    if hasattr(args, 'save_buffer') and args.save_buffer:
        os.makedirs(args.main_save_dir, exist_ok=True)

    _, goals = off_utl.load_dataset(data_dir=args.save_dir,
                                    args=args,
                                    arr_type='numpy')

    args.save_dir = "hindsight_data"
    args.save_data_path = os.path.join(args.main_data_dir, args.env_name,
                                       args.save_dir)
    os.makedirs(args.save_data_path)
    models_dir = './trained_agents'
    all_dirs = os.listdir(models_dir)
    for i, goal in enumerate(goals):
        print("start collect rollouts for task number ", i + 1)
        collect_rollout_per_goal(args, goal, all_dirs)
Exemplo n.º 7
0
def _train_vae(log_dir,
               offline_buffer_path,
               saved_tasks_path,
               env_type,
               seed,
               path_length,
               meta_episode_len,
               load_buffer_kwargs=None,
               **kwargs):
    with open(os.path.join(log_dir, 'test.txt'), 'w') as f:
        f.write("hello from train_vae_offline.py")
    if load_buffer_kwargs is None:
        load_buffer_kwargs = {}
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    parser = argparse.ArgumentParser()
    # parser.add_argument('--env-type', default='gridworld')
    # parser.add_argument('--env-type', default='point_robot_sparse')
    # parser.add_argument('--env-type', default='cheetah_vel')
    parser.add_argument('--env-type', default='ant_semicircle_sparse')
    extra_args = []
    for k, v in kwargs.items():
        extra_args.append('--{}'.format(k))
        extra_args.append(str(v))
    args, rest_args = parser.parse_known_args(args=extra_args)

    # --- GridWorld ---
    if env_type == 'cheetah_vel':
        args = args_cheetah_vel.get_args(rest_args)
        args.env_name = 'HalfCheetahVel-v0'
    elif env_type == 'ant_dir':
        # TODO: replace with ant_dir env
        args = args_ant_semicircle_sparse.get_args(rest_args)
        parser.add_argument('--env-name', default='AntSemiCircleSparse-v0')
        args.env_name = 'AntDir-v0'
    elif env_type == 'walker':
        args = args_walker_param.get_args(rest_args)
    elif env_type == 'hopper':
        args = args_hopper_param.get_args(rest_args)
    elif env_type == 'humanoid':
        args = args_humanoid_dir.get_args(rest_args)
    else:
        raise ValueError('Unknown env_type: {}'.format(env_type))

    set_gpu_mode(torch.cuda.is_available() and args.use_gpu)

    args, env = off_utl.expand_args(args)
    args.save_dir = os.path.join(log_dir, 'trained_vae')

    args.trajectory_len = path_length
    task_data = joblib.load(saved_tasks_path)
    tasks = task_data['tasks']
    print("loading dataset")
    with open(os.path.join(log_dir, 'tmp1.txt'), 'w') as f:
        f.write("train_vae_offline.py: start loading dataset")
    dataset, goals = off_utl.load_pearl_buffer(
        pretrain_buffer_path=offline_buffer_path,
        tasks=tasks,
        add_done_info=env.add_done_info,
        path_length=path_length,
        meta_episode_len=meta_episode_len,
        **load_buffer_kwargs)
    with open(os.path.join(log_dir, 'tmp1.txt'), 'a') as f:
        f.write("train_vae_offline.py: done loading dataset")
    print("done loading dataset")
    for data in dataset:
        print(data[0].shape)

    dataset = [[x.astype(np.float32) for x in d] for d in dataset]

    if args.save_model:
        dir_prefix = args.save_dir_prefix if hasattr(args, 'save_dir_prefix') \
                                             and args.save_dir_prefix is not None else ''
        args.full_save_path = os.path.join(
            args.save_dir, args.env_name,
            dir_prefix + datetime.datetime.now().strftime('__%d_%m_%H_%M_%S'))
        os.makedirs(args.full_save_path, exist_ok=True)
        config_utl.save_config_file(args, args.full_save_path)

    vae = VAE(args)
    train(vae, dataset, args)
Exemplo n.º 8
0
def _borel(
        log_dir,
        pretrained_vae_dir,
        env_type,
        transform_data_bamdp,
        seed,
        path_length,
        meta_episode_len,
        relabelled_data_dir=None,
        offline_buffer_path_to_save_to=None,
        offline_buffer_path='',
        saved_tasks_path='',
        debug=False,
        vae_model_name=None,
        load_buffer_kwargs=None,
        gpu_id=0,
        **kwargs,
):
    if load_buffer_kwargs is None:
        load_buffer_kwargs = {}
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    parser = argparse.ArgumentParser()
    torch.autograd.set_detect_anomaly(True)

    if offline_buffer_path_to_save_to is None:
        offline_buffer_path_to_save_to = os.path.join(log_dir, 'transformed_data')

    # parser.add_argument('--env-type', default='gridworld')
    # parser.add_argument('--env-type', default='point_robot_sparse')
    # parser.add_argument('--env-type', default='cheetah_vel')
    parser.add_argument('--env-type', default=env_type)
    extra_args = []
    for k, v in kwargs.items():
        extra_args.append('--{}'.format(k))
        extra_args.append(str(v))
    args, rest_args = parser.parse_known_args(args=extra_args)
    args = env_name_to_args[env_type].get_args(rest_args)
    set_gpu_mode(torch.cuda.is_available() and args.use_gpu, gpu_id=gpu_id)

    if vae_model_name is None:
        vae_model_name = os.listdir(
            os.path.join(pretrained_vae_dir, args.env_name)
        )[0]

    vae_args = config_utl.load_config_file(os.path.join(pretrained_vae_dir, args.env_name,
                                                        vae_model_name, 'online_config.json'))
    args = config_utl.merge_configs(vae_args, args)     # order of input to this function is important
    # _, env = off_utl.expand_args(args)
    from environments.make_env import make_env
    task_data = joblib.load(saved_tasks_path)
    tasks = task_data['tasks']
    args.presampled_tasks = tasks
    env = make_env(args.env_name,
                   args.max_rollouts_per_task,
                   presampled_tasks=tasks,
                   seed=args.seed)#,
                   # n_tasks=1)

    args.vae_dir = pretrained_vae_dir
    args.data_dir = None
    args.vae_model_name = vae_model_name
    if transform_data_bamdp:
        # Transform data BAMDP (state relabelling)
        # load VAE for state relabelling
        print("performing state-relabeling")
        vae_models_path = os.path.join(pretrained_vae_dir, args.env_name,
                                       vae_model_name, 'models')
        vae = VAE(args)
        off_utl.load_trained_vae(vae, vae_models_path)
        # load data and relabel
        os.makedirs(offline_buffer_path_to_save_to, exist_ok=True)
        dataset, goals = off_utl.load_pearl_buffer(
            offline_buffer_path,
            tasks,
            add_done_info=env.add_done_info,
            path_length=path_length,
            meta_episode_len=meta_episode_len,
            **load_buffer_kwargs
        )
        dataset = [[x.astype(np.float32) for x in d] for d in dataset]
        bamdp_dataset = off_utl.transform_mdps_ds_to_bamdp_ds(dataset, vae, args)
        # save relabelled data
        print("saving state-relabeled data to ", offline_buffer_path_to_save_to)
        off_utl.save_dataset(offline_buffer_path_to_save_to, bamdp_dataset, goals)
        relabelled_data_dir = offline_buffer_path_to_save_to
    args.relabelled_data_dir = relabelled_data_dir
    args.max_rollouts_per_task = 3
    args.results_log_dir = log_dir

    if debug:
        print("DEBUG MODE ON")
        args.rl_updates_per_iter = 1
        args.log_interval = 1
    learner = OfflineMetaLearner(args)

    learner.train()