Exemplos de PolicyWrappedWithExplorationStrategy.reset em Python, exemplos de railrl.exploration_strategies.base.PolicyWrappedWithExplorationStrategy.reset em Python

Exemplo n.º 1

0

Exibir arquivo

def generate_vae_dataset(
        N=10000, test_p=0.9, use_cached=True, imsize=84, show=False,
        dataset_path=None, policy_path=None, action_space_sampling=False, env_class=SawyerDoorEnv, env_kwargs=None,
        init_camera=sawyer_door_env_camera_v2,
):
    if policy_path is not None:
        filename = "/tmp/sawyer_door_pull_open_oracle+random_policy_data_closer_zoom_action_limited" + str(N) + ".npy"
    elif action_space_sampling:
        filename = "/tmp/sawyer_door_pull_open_zoomed_in_action_space_sampling" + str(N) + ".npy"
    else:
        filename = "/tmp/sawyer_door_pull_open" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = env_class(**env_kwargs)
        env = ImageEnv(
            env, imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        info['env'] = env
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        env.wrapped_env.reset()
        dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8)
        for i in range(N):
            if i % 20==0:
                env.reset_model()
                exploration_policy.reset()
            for _ in range(10):
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(
                    action
                )
            # env.set_to_goal_angle(env.get_goal()['state_desired_goal'])
            img = env._get_flat_img()
            dataset[i, :] = unormalize_image(img)
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info

Exemplo n.º 2

0

Exibir arquivo

Arquivo: generate_goal_dataset.py Projeto: Asap7772/rail-rl-franka-eval

def generate_goal_data_set(env=None, num_goals=1000, use_cached_dataset=False,
                           action_scale=1 / 10):
    if use_cached_dataset and osp.isfile(
            '/tmp/goals' + str(num_goals) + '.npy'):
        goal_dict = np.load('/tmp/goals' + str(num_goals) + '.npy').item()
        print("loaded data from saved file")
        return goal_dict
    cached_goal_keys = [
        'latent_desired_goal',
        'image_desired_goal',
        'state_desired_goal',
        'joint_desired_goal',
    ]
    goal_sizes = [
        env.observation_space.spaces['latent_desired_goal'].low.size,
        env.observation_space.spaces['image_desired_goal'].low.size,
        env.observation_space.spaces['state_desired_goal'].low.size,
        7
    ]
    observation_keys = [
        'latent_observation',
        'image_observation',
        'state_observation',
        'state_observation',
    ]
    goal_generation_dict = dict()
    for goal_key, goal_size, obs_key in zip(
            cached_goal_keys,
            goal_sizes,
            observation_keys,
    ):
        goal_generation_dict[goal_key] = [goal_size, obs_key]
    goal_dict = dict()
    policy = RandomPolicy(env.action_space)
    es = OUStrategy(action_space=env.action_space, theta=0)
    exploration_policy = PolicyWrappedWithExplorationStrategy(
        exploration_strategy=es,
        policy=policy,
    )
    for goal_key in goal_generation_dict:
        goal_size, obs_key = goal_generation_dict[goal_key]
        goal_dict[goal_key] = np.zeros((num_goals, goal_size))
    print('Generating Random Goals')
    for i in range(num_goals):
        if i % 50 == 0:
            print('Reset')
            env.reset_model()
            exploration_policy.reset()
        action = exploration_policy.get_action()[0] * action_scale
        obs, _, _, _ = env.step(
            action
        )
        print(i)
        for goal_key in goal_generation_dict:
            goal_size, obs_key = goal_generation_dict[goal_key]
            goal_dict[goal_key][i, :] = obs[obs_key]
    np.save('/tmp/goals' + str(num_goals) + '.npy', goal_dict)
    return goal_dict

Exemplo n.º 3

0

Exibir arquivo

Arquivo: sawyer3d_reach_data_sim.py Projeto: Asap7772/rail-rl-franka-eval

def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    dataset_path=None,
    action_space_sampling=False,
    init_camera=None,
    env_class=None,
    env_kwargs=None,
):
    filename = "/tmp/sawyer_xyz_pos_control_new_zoom_cam" + str(N) + '.npy'
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        if env_kwargs == None:
            env_kwargs = dict()
        env = env_class(**env_kwargs)
        env = ImageEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8)
        if action_space_sampling:
            action_space = Box(np.array([-.1, .5, 0]), np.array([.1, .7, .5]))
            for i in range(N):
                env.set_to_goal(env.sample_goal())
                img = env._get_flat_img()
                dataset[i, :] = unormalize_image(img)
                if show:
                    cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                    cv2.waitKey(1)
                print(i)
            info['env'] = env
        else:
            policy = RandomPolicy(env.action_space)
            es = OUStrategy(action_space=env.action_space, theta=0)
            exploration_policy = PolicyWrappedWithExplorationStrategy(
                exploration_strategy=es,
                policy=policy,
            )
            for i in range(N):
                # Move the goal out of the image
                env.wrapped_env.set_goal(np.array([100, 100, 100]))
                if i % 50 == 0:
                    print('Reset')
                    env.reset()
                    exploration_policy.reset()
                for _ in range(1):
                    action = exploration_policy.get_action()[0] * 10
                    env.wrapped_env.step(action)
                img = env.step(env.action_space.sample())[0]
                dataset[i, :] = img
                if show:
                    cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                    cv2.waitKey(1)
                print(i)

        print("done making training data", time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info

Exemplo n.º 4

0

Exibir arquivo

Arquivo: sawyer_door_push_open_data.py Projeto: Asap7772/rail-rl-franka-eval

def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    dataset_path=None,
    policy_path=None,
    ratio_oracle_policy_data_to_random=1 / 2,
    action_space_sampling=False,
    env_class=None,
    env_kwargs=None,
    action_plus_random_sampling=False,
    init_camera=sawyer_door_env_camera,
):
    if policy_path is not None:
        filename = "/tmp/sawyer_door_push_open_oracle+random_policy_data_closer_zoom_action_limited" + str(
            N) + ".npy"
    elif action_space_sampling:
        filename = "/tmp/sawyer_door_push_open_zoomed_in_action_space_sampling" + str(
            N) + ".npy"
    else:
        filename = "/tmp/sawyer_door_push_open" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    elif action_space_sampling:
        env = SawyerDoorPushOpenEnv(**env_kwargs)
        env = ImageEnv(
            env,
            imsize,
            transpose=False,
            init_camera=sawyer_door_env_camera,
            normalize=False,
        )
        action_space = Box(np.array([-env.max_x_pos, .5, .06]),
                           np.array([env.max_x_pos, env.max_y_pos, .06]))
        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            env.set_to_goal_pos(action_space.sample())  #move arm to spot
            goal = env.sample_goal()
            env.set_to_goal(goal)
            img = env.get_image().flatten()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        info['env'] = env
    elif action_plus_random_sampling:
        env = env_class(**env_kwargs)
        env = ImageEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        action_space = Box(np.array([-env.max_x_pos, .5, .06]),
                           np.array([env.max_x_pos, .6, .06]))
        action_sampled_data = int(N / 2)
        dataset = np.zeros((N, imsize * imsize * 3))
        print('Action Space Sampling')
        for i in range(action_sampled_data):
            env.set_to_goal_pos(action_space.sample())  # move arm to spot
            goal = env.sample_goal()
            env.set_to_goal(goal)
            img = env._get_flat_img()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        env._wrapped_env.min_y_pos = .6
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        print('Random Sampling')
        for i in range(action_sampled_data, N):
            if i % 20 == 0:
                env.reset()
                exploration_policy.reset()
            for _ in range(10):
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(action)
            img = env._get_flat_img()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        env._wrapped_env.min_y_pos = .5
        info['env'] = env
    else:
        now = time.time()
        env = SawyerDoorPushOpenEnv(max_angle=.5)
        env = ImageEnv(
            env,
            imsize,
            transpose=True,
            init_camera=sawyer_door_env_camera,
            normalize=True,
        )
        info['env'] = env
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            if i % 100 == 0:
                env.reset()
                exploration_policy.reset()
            for _ in range(25):
                # env.wrapped_env.step(
                #     env.wrapped_env.action_space.sample()
                # )
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(action)
            goal = env.sample_goal_for_rollout()
            env.set_to_goal(goal)
            img = env.step(env.action_space.sample())[0]
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info

Exemplo n.º 5

0

Exibir arquivo

def generate_vae_dataset(
        N=10000, test_p=0.9, use_cached=True, imsize=84, show=False,
        dataset_path=None, policy_path=None, action_space_sampling=False, env_class=SawyerPushAndPullDoorEnv, env_kwargs=None,
        action_plus_random_sampling=False, init_camera=sawyer_door_env_camera, ratio_action_sample_to_random=1 / 2, env_id=None,
):
    if policy_path is not None:
        filename = "/tmp/sawyer_door_push_and_pull_open_oracle+random_policy_data_closer_zoom_action_limited" + str(N) + ".npy"
    elif action_space_sampling:
        filename = "/tmp/sawyer_door_push_and_pull_open_zoomed_in_action_space_sampling" + str(N) + ".npy"
    else:
        filename = "/tmp/sawyer_door_push_and_pull_open" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    elif action_plus_random_sampling:
        if env_id is not None:
            import gym
            env = gym.make(env_id)
        else:
            env = env_class(**env_kwargs)
            env =  ImageEnv(
                env, imsize,
                transpose=True,
                init_camera=init_camera,
                normalize=True,
            )
        action_sampled_data = int(N*ratio_action_sample_to_random)
        dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8)
        print('Action Space Sampling')
        for i in range(action_sampled_data):
            goal = env.sample_goal()
            env.set_to_goal(goal)
            img = env._get_flat_img()
            dataset[i, :] = unormalize_image(img)
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        print('Random Sampling')
        for i in range(action_sampled_data, N):
            if i % 20==0:
                env.reset()
                exploration_policy.reset()
            for _ in range(10):
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(
                    action
                )
            goal = env.sample_goal()
            env.set_to_goal_angle(goal['state_desired_goal'])
            img = env._get_flat_img()
            dataset[i, :] = unormalize_image(img)
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        env._wrapped_env.min_y_pos = .5
        info['env'] = env
    else:
        raise NotImplementedError()
    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info

Exemplo n.º 6

0

Exibir arquivo

def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    dataset_path=None,
    env_class=None,
    env_kwargs=None,
    init_camera=sawyer_door_env_camera,
):
    filename = "/tmp/sawyer_door_push_open_and_reach" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        env = env_class(**env_kwargs)
        env = ImageEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        oracle_sampled_data = int(N / 2)
        dataset = np.zeros((N, imsize * imsize * 3))
        print('Goal Space Sampling')
        for i in range(oracle_sampled_data):
            goal = env.sample_goal()
            env.set_to_goal(goal)
            img = env._get_flat_img()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        env._wrapped_env.min_y_pos = .6
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        print('Random Sampling')
        for i in range(oracle_sampled_data, N):
            if i % 20 == 0:
                env.reset()
                exploration_policy.reset()
            for _ in range(10):
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(action)
            img = env._get_flat_img()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info