def get_data(N = 10000, test_p = 0.9, use_cached=True, render=False):
    filename = "/tmp/pusher2d_smallpuck_" + str(N) + ".npy"
    if use_cached and osp.isfile(filename):
        dataset = np.load(filename).astype(np.float32)
        print("loaded data from saved file", filename)
    else:
        # if not cached
        now = time.time()
        e = FullPusher2DEnv()
        e = ImageMujocoEnv(e, 84, camera_name="topview", transpose=True, normalize=True)
        dataset = np.zeros((N, 3*84*84))
        for i in range(N):
            if i % 100 == 0:
                e.reset()
            u = np.random.rand(3) * 4 - 2
            img, _, _, _ = e.step(u)
            dataset[i, :] = img
            if render:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
        print("done making training data", filename, time.time() - now, "mean", dataset.mean())
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset
示例#2
0
def get_data(N=10000, test_p=0.9, use_cached=True, imsize=84):
    filename = "/tmp/sawyer_" + str(N) + ".npy"
    info = {}
    if use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = SawyerPushXYEnv()
        env = ImageMujocoEnv(
            env,
            imsize,
            transpose=True,
            init_camera=sawyer_init_camera,
            normalize=True,
        )
        info['env'] = env

        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            env.wrapped_env.set_goal(env.wrapped_env.sample_goal_for_rollout())
            env.reset()
            for _ in range(50):
                env.wrapped_env.step(env.wrapped_env.action_space.sample())
            img = env.step(env.action_space.sample())[0]
            dataset[i, :] = img
            # cv2.imshow('img', img.reshape(3, 84, 84).transpose())
            # cv2.waitKey(1)
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    init_camera=sawyer_init_camera_zoomed_in,
    dataset_path=None,
    env_kwargs=None,
):
    if env_kwargs is None:
        env_kwargs = {}
    filename = "/tmp/sawyer_push_variable{}_{}.npy".format(
        str(N),
        init_camera.__name__,
    )
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
        N = dataset.shape[0]
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = SawyerPushXYVariableEnv(hide_goal=True, **env_kwargs)
        env = ImageMujocoEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        info['env'] = env

        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            goal = env.sample_goal_for_rollout()
            hand_pos = env.sample_hand_xy()
            env.set_to_goal(goal, reset_hand=False)
            env.set_hand_xy(hand_pos)
            # img = env.reset()
            img = env.step(env.action_space.sample())[0]
            dataset[i, :] = img
            if show:
                img = img.reshape(3, 84, 84).transpose()
                img = img[::-1, :, ::-1]
                cv2.imshow('img', img)
                cv2.waitKey(1)
                # radius = input('waiting...')
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    init_camera=sawyer_init_camera_zoomed_in,
    dataset_path=None,
    env_kwargs=None,
):
    """
    Oracle means that we use `set_to_goal` rather than doing random rollouts.
    """
    if env_kwargs is None:
        env_kwargs = {}
    filename = "/tmp/sawyer_reset_free_push{}_{}.npy".format(
        str(N),
        init_camera.__name__,
    )
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
        N = dataset.shape[0]
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = SawyerResetFreePushEnv(hide_goal=True, **env_kwargs)
        env = ImageMujocoEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        info['env'] = env

        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            goal = env.sample_goal_for_rollout()
            env.set_to_goal(goal)
            img = env.reset()
            dataset[i, :] = img
            if show:
                img = img.reshape(3, 84, 84).transpose()
                img = img[::-1, :, ::-1]
                cv2.imshow('img', img)
                cv2.waitKey(1)
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
示例#5
0
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    init_camera=sawyer_init_camera_zoomed_in,
    dataset_path=None,
):
    filename = "/tmp/sawyer_push_new_easy{}_{}.npy".format(
        str(N),
        init_camera.__name__,
    )
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
        N = dataset.shape[0]
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = SawyerPushXYEasyEnv(hide_goal=True)
        env = ImageMujocoEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        info['env'] = env

        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            env.reset()
            for _ in range(100):
                action = env.wrapped_env.action_space.sample()
                # action[0] = 0
                # action[1] = 1
                env.wrapped_env.step(action)
            img = env.step(env.action_space.sample())[0]
            dataset[i, :] = img
            print(i)
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
示例#6
0
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    dataset_path=None,
):
    filename = "/tmp/sawyer_push_new_easy_wider2_" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = SawyerPushXYEasyEnv(hide_goal=True)
        env = ImageMujocoEnv(
            env,
            imsize,
            transpose=True,
            init_camera=sawyer_init_camera_zoomed_in,
            # init_camera=sawyer_init_camera,
            normalize=True,
        )
        info['env'] = env
        policy = OUStrategy(env.action_space)

        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            # env.reset()
            if i % 100 == 0:
                g = env.sample_goal_for_rollout()
                env.set_goal(g)
                policy.reset()
            u = policy.get_action_from_raw_action(env.action_space.sample())
            img = env.step(u)[0]
            dataset[i, :] = img
            if show:
                # env.render()
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
示例#7
0
def experiment(variant):
    imsize = variant['imsize']
    history = variant['history']

    env = gym.make(variant['env_id']).env
    training_env = gym.make(variant['env_id']).env

    env = NormalizedBoxEnv(env)
    training_env = NormalizedBoxEnv(training_env)

    env = ImageMujocoEnv(env,
                         imsize=imsize,
                         keep_prev=history - 1,
                         init_camera=variant['init_camera'])
    training_env = ImageMujocoEnv(training_env,
                                  imsize=imsize,
                                  keep_prev=history - 1,
                                  init_camera=variant['init_camera'])

    env = DiscretizeEnv(env, variant['bins'])
    training_env = DiscretizeEnv(training_env, variant['bins'])

    qf = CNN(output_size=env.action_space.n,
             input_width=imsize,
             input_height=imsize,
             input_channels=history,
             **variant['cnn_params'])

    qf_criterion = variant['qf_criterion_class']()
    algorithm = variant['algo_class'](env,
                                      training_env=training_env,
                                      qf=qf,
                                      qf_criterion=qf_criterion,
                                      **variant['algo_params'])
    algorithm.to(ptu.device)
    algorithm.train()
def experiment(variant):
    imsize = variant['imsize']
    history = variant['history']

    env = Pusher2DEnv()#gym.make(variant['env_id']).env
    env = NormalizedBoxEnv(ImageMujocoEnv(env,
                                    imsize=imsize,
                                    keep_prev=history - 1,
                                    init_camera=variant['init_camera']))
#    es = GaussianStrategy(
#        action_space=env.action_space,
#    )
    es = OUStrategy(action_space=env.action_space)
    obs_dim = env.observation_space.low.size
    action_dim = env.action_space.low.size

    qf = MergedCNN(input_width=imsize,
                   input_height=imsize,
                   output_size=1,
                   input_channels= history,
                   added_fc_input_size=action_dim,
                   **variant['cnn_params'])

    vf  = CNN(input_width=imsize,
               input_height=imsize,
               output_size=1,
               input_channels=history,
               **variant['cnn_params'])

    policy = TanhCNNGaussianPolicy(input_width=imsize,
                                   input_height=imsize,
                                   output_size=action_dim,
                                   input_channels=history,
                                   **variant['cnn_params'],
    )


    algorithm = SoftActorCritic(
        env=env,
        policy=policy,
        qf=qf,
        vf=vf,
        **variant['algo_params']
    )

    algorithm.to(ptu.device)
    algorithm.train()
def generate_vae_dataset(
        N=10000, test_p=0.9, use_cached=True, imsize=84, show=False,
        dataset_path=None,
):
    filename = "/tmp/sawyer_" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = SawyerPushXYEnv()
        env = ImageMujocoEnv(
            env, imsize,
            transpose=True,
            init_camera=sawyer_init_camera,
            normalize=True,
        )
        info['env'] = env

        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            # Move the goal out of the image
            env.wrapped_env.set_goal(np.array([100, 100]))
            env.reset()
            for _ in range(50):
                env.wrapped_env.step(
                    env.wrapped_env.action_space.sample()
                )
            img = env.step(env.action_space.sample())[0]
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
示例#10
0
def experiment(variant):
    rdim = variant["rdim"]
    vae_paths = {
        2:
        "/home/ashvin/data/s3doodad/ashvin/vae/pusher2d-conv-sweep2/run1/id0/params.pkl",
        4:
        "/home/ashvin/data/s3doodad/ashvin/vae/pusher2d-conv-sweep2/run1/id1/params.pkl",
        8:
        "/home/ashvin/data/s3doodad/ashvin/vae/pusher2d-conv-sweep2/run1/id2/params.pkl",
        16:
        "/home/ashvin/data/s3doodad/ashvin/vae/pusher2d-conv-sweep2/run1/id3/params.pkl"
    }
    vae_path = vae_paths[rdim]
    vae = torch.load(vae_path)
    print("loaded", vae_path)

    if variant['multitask']:
        env = CylinderXYPusher2DEnv(**variant["env_kwargs"])
        env = ImageMujocoEnv(env, 84, camera_name="topview", transpose=True)
        env = VAEWrappedEnv(env,
                            vae,
                            use_vae_obs=True,
                            use_vae_reward=True,
                            use_vae_goals=True)
        env = MultitaskToFlatEnv(env)
    # else:
    # env = Pusher2DEnv(**variant['env_kwargs'])
    if variant['normalize']:
        env = NormalizedBoxEnv(env)
    exploration_type = variant['exploration_type']
    if exploration_type == 'ou':
        es = OUStrategy(action_space=env.action_space)
    elif exploration_type == 'gaussian':
        es = GaussianStrategy(
            action_space=env.action_space,
            max_sigma=0.1,
            min_sigma=0.1,  # Constant sigma
        )
    elif exploration_type == 'epsilon':
        es = EpsilonGreedy(
            action_space=env.action_space,
            prob_random_action=0.1,
        )
    else:
        raise Exception("Invalid type: " + exploration_type)
    obs_dim = env.observation_space.low.size
    action_dim = env.action_space.low.size
    qf1 = FlattenMlp(
        input_size=obs_dim + action_dim,
        output_size=1,
        hidden_sizes=[400, 300],
    )
    qf2 = FlattenMlp(
        input_size=obs_dim + action_dim,
        output_size=1,
        hidden_sizes=[400, 300],
    )
    policy = TanhMlpPolicy(
        input_size=obs_dim,
        output_size=action_dim,
        hidden_sizes=[400, 300],
    )
    exploration_policy = PolicyWrappedWithExplorationStrategy(
        exploration_strategy=es,
        policy=policy,
    )
    algorithm = TD3(env,
                    training_env=env,
                    qf1=qf1,
                    qf2=qf2,
                    policy=policy,
                    exploration_policy=exploration_policy,
                    **variant['algo_kwargs'])
    print("use_gpu", variant["use_gpu"], bool(variant["use_gpu"]))
    if variant["use_gpu"]:
        gpu_id = variant["gpu_id"]
        ptu.set_gpu_mode(True)
        ptu.set_device(gpu_id)
        algorithm.to(ptu.device)
        env._wrapped_env.vae.to(ptu.device)
    algorithm.train()
示例#11
0
from railrl.envs.mujoco.sawyer_gripper_env import SawyerXYZEnv
from railrl.envs.wrappers import ImageMujocoEnv
import cv2
import numpy as np

print("making env")
sawyer = SawyerXYZEnv()
env = ImageMujocoEnv(sawyer, imsize=400)

print("starting rollout")
while True:
    obs = env.reset()
    for t in range(1000):
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        raw_img = env._image_observation()
        img = np.concatenate((
            raw_img[::-1, :, 2:3],
            raw_img[::-1, :, 1:2],
            raw_img[::-1, :, 0:1],
        ),
                             axis=2)
        cv2.imshow('obs', img)
        cv2.waitKey(1)
        # if done:
        #     break
    print("new episode")
def experiment(variant):
    rdim = variant["rdim"]
    use_env_goals = variant["use_env_goals"]
    vae_path = variant["vae_paths"][str(rdim)]
    render = variant["render"]
    wrap_mujoco_env = variant.get("wrap_mujoco_env", False)

    # vae = torch.load(vae_path)
    # print("loaded", vae_path)

    from railrl.envs.wrappers import ImageMujocoEnv, NormalizedBoxEnv
    from railrl.images.camera import sawyer_init_camera

    env = variant["env"](**variant['env_kwargs'])
    env = NormalizedBoxEnv(ImageMujocoEnv(
        env,
        imsize=84,
        keep_prev=0,
        init_camera=sawyer_init_camera,
    ))
    if wrap_mujoco_env:
        env = ImageMujocoEnv(env, 84, camera_name="topview", transpose=True, normalize=True)


    if use_env_goals:
        track_qpos_goal = variant.get("track_qpos_goal", 0)
        env = VAEWrappedImageGoalEnv(env, vae_path, use_vae_obs=True,
                                     use_vae_reward=True, use_vae_goals=True,
                                     render_goals=render, render_rollouts=render, track_qpos_goal=track_qpos_goal)
    else:
        env = VAEWrappedEnv(env, vae_path, use_vae_obs=True,
                            use_vae_reward=True, use_vae_goals=True,
                            render_goals=render, render_rollouts=render)

    env = MultitaskToFlatEnv(env)
    if variant['normalize']:
        env = NormalizedBoxEnv(env)
    exploration_type = variant['exploration_type']
    if exploration_type == 'ou':
        es = OUStrategy(action_space=env.action_space)
    elif exploration_type == 'gaussian':
        es = GaussianStrategy(
            action_space=env.action_space,
            max_sigma=0.1,
            min_sigma=0.1,  # Constant sigma
        )
    elif exploration_type == 'epsilon':
        es = EpsilonGreedy(
            action_space=env.action_space,
            prob_random_action=0.1,
        )
    else:
        raise Exception("Invalid type: " + exploration_type)
    obs_dim = env.observation_space.low.size
    action_dim = env.action_space.low.size
    qf1 = FlattenMlp(
        input_size=obs_dim + action_dim,
        output_size=1,
        hidden_sizes=[400, 300],
    )
    qf2 = FlattenMlp(
        input_size=obs_dim + action_dim,
        output_size=1,
        hidden_sizes=[400, 300],
    )
    policy = TanhMlpPolicy(
        input_size=obs_dim,
        output_size=action_dim,
        hidden_sizes=[400, 300],
    )
    exploration_policy = PolicyWrappedWithExplorationStrategy(
        exploration_strategy=es,
        policy=policy,
    )
    algorithm = TD3(
        env,
        training_env=env,
        qf1=qf1,
        qf2=qf2,
        policy=policy,
        exploration_policy=exploration_policy,
        **variant['algo_kwargs']
    )
    algorithm.to(ptu.device)
        env._wrapped_env.vae.to(ptu.device)
示例#13
0
def simulate_policy(args):
    data = joblib.load(args.file)
    if 'eval_policy' in data:
        policy = data['eval_policy']
    elif 'policy' in data:
        policy = data['policy']
    elif 'exploration_policy' in data:
        policy = data['exploration_policy']
    elif 'naf_policy' in data:
        policy = data['naf_policy']
    elif 'optimizable_qfunction' in data:
        qf = data['optimizable_qfunction']
        policy = qf.implicit_policy
    else:
        raise Exception("No policy found in loaded dict. Keys: {}".format(
            data.keys()))

    env = data['env']
    if isinstance(env, RemoteRolloutEnv):
        env = env._wrapped_env
    print("Policy loaded")

    env.mode("video_env")
    env.decode_goals = True

    image_env = ImageMujocoEnv(
        env._wrapped_env._wrapped_env,
        84,
        init_camera=None,
        camera_name="topview",
        transpose=True,
        normalize=True,
    )
    # env.image_env = image_env

    if args.enable_render:
        # some environments need to be reconfigured for visualization
        env.enable_render()

    if args.gpu:
        set_gpu_mode(True)
        policy.to(ptu.device)
        if hasattr(env, "vae"):
            env.vae.to(ptu.device)
    else:
        # make sure everything is on the CPU
        set_gpu_mode(False)
        policy.cpu()
        if hasattr(env, "vae"):
            env.vae.cpu()

    if args.pause:
        import ipdb
        ipdb.set_trace()
    if isinstance(policy, PyTorchModule):
        policy.train(False)
    ROWS = 3
    COLUMNS = 6
    dirname = osp.dirname(args.file)
    input_file_name = os.path.splitext(os.path.basename(args.file))[0]
    filename = osp.join(dirname, "video_{}.mp4".format(input_file_name))
    paths = dump_video(
        env,
        policy,
        filename,
        ROWS=ROWS,
        COLUMNS=COLUMNS,
        horizon=args.H,
        image_env=image_env,
        dirname=dirname,
        subdirname="rollouts_" + input_file_name,
    )

    if hasattr(env, "log_diagnostics"):
        env.log_diagnostics(paths)
    logger.dump_tabular()