Exemplo n.º 1
0
import gym
from utils import wrappers
from gym.wrappers.time_limit import TimeLimit

from algos.test_policy import load_policy_and_env, run_policy

_, get_action = load_policy_and_env('./data/2')

env = TimeLimit(gym.make("PepperReachCam-v0", gui=True, dense=True),
                max_episode_steps=100)

run_policy(env, get_action)
Exemplo n.º 2
0
torch.autograd.profiler.profile(enabled=False)

env = TimeLimit(gym.make("PepperPush-v0", gui=True), max_episode_steps=100)

ac_kwargs = dict(hidden_sizes=[64, 64, 64], activation=nn.ReLU)
rb_kwargs = dict(size=1000000,
                 n_sampled_goal=4,
                 goal_selection_strategy='future')

logger_kwargs = dict(output_dir='data/push_su', exp_name='push_su')

model = SAC(env=env,
            actor_critic=core_her.MLPActorCritic,
            ac_kwargs=ac_kwargs,
            replay_buffer=replay_buffer_her.ReplayBuffer,
            rb_kwargs=rb_kwargs,
            max_ep_len=100,
            batch_size=256,
            gamma=0.95,
            lr=0.0003,
            update_after=1000,
            update_every=1,
            logger_kwargs=logger_kwargs)

model.train(steps_per_epoch=2000, epochs=3000)

from algos.test_policy import load_policy_and_env, run_policy

_, get_action = load_policy_and_env('data/push_su')

run_policy(env, get_action)
Exemplo n.º 3
0
                         dense=True,
                         head_motion=False),
                max_episode_steps=100)

ac_kwargs = dict(hidden_sizes=[256, 256, 256], activation=nn.ReLU)
rb_kwargs = dict(size=1000000)

logger_kwargs = dict(output_dir='data/reach_su', exp_name='reach_su')

model = SAC(env=env,
            actor_critic=core.MLPActorCritic,
            ac_kwargs=ac_kwargs,
            replay_buffer=replay_buffer.ReplayBuffer,
            rb_kwargs=rb_kwargs,
            max_ep_len=100,
            batch_size=256,
            gamma=0.95,
            lr=0.0003,
            ent_coef="auto",
            update_after=1000,
            update_every=1,
            logger_kwargs=logger_kwargs)

model.train(steps_per_epoch=2000, epochs=1000)

from algos.test_policy import load_policy_and_env, run_policy

_, get_action = load_policy_and_env('data/reach_su')

run_policy(env, get_action)
Exemplo n.º 4
0
import gym
import gym_pepper
from utils.wrappers import VisionWrapper, TorchifyWrapper
from gym.wrappers.time_limit import TimeLimit
from algos.test_policy import load_policy_and_env, run_policy
from cnn.cnn_0 import Net

env = TimeLimit(VisionWrapper(
    TorchifyWrapper(gym.make("PepperReachCam-v0", gui=True, dense=True)), Net,
    "trained/vision_0.pth"),
                max_episode_steps=100)

_, get_action = load_policy_and_env("trained/0")

run_policy(env, get_action)
Exemplo n.º 5
0

ac_kwargs = dict(hidden_sizes=[256, 256],
                 activation=nn.ReLU,
                 extractor_module=Extractor)
rb_kwargs = dict(size=40000)

logger_kwargs = dict(output_dir='data/reach_cam', exp_name='reach_cam')

model = SAC(env=env,
            actor_critic=core_cam.MLPActorCritic,
            ac_kwargs=ac_kwargs,
            replay_buffer=replay_buffer_cam.ReplayBuffer,
            rb_kwargs=rb_kwargs,
            max_ep_len=100,
            batch_size=256,
            gamma=0.95,
            lr=0.0003,
            update_after=512,
            update_every=512,
            logger_kwargs=logger_kwargs,
            use_gpu_buffer=True)

model.train(steps_per_epoch=1024, epochs=5000)

from algos.test_policy import load_policy_and_env, run_policy

_, get_action = load_policy_and_env('data/reach_cam', deterministic=True)

run_policy(env, get_action)
Exemplo n.º 6
0
env = gym.make("FetchReach-v1")

ac_kwargs = dict(hidden_sizes=[64, 64], activation=nn.ReLU)
rb_kwargs = dict(size=1000000,
                 n_sampled_goal=4,
                 goal_selection_strategy='future')

logger_kwargs = dict(output_dir='data/fetch_reach_su', exp_name='fetch_reach_su')

model = SAC(env=env,
    actor_critic=core_her.MLPActorCritic,
    ac_kwargs=ac_kwargs,
    replay_buffer=replay_buffer_her.ReplayBuffer,
    rb_kwargs=rb_kwargs,
    max_ep_len=100,
    batch_size=256,
    gamma=0.95,
    lr=0.001,
    ent_coef="auto",
    update_after=1000,
    update_every=1,
    logger_kwargs=logger_kwargs)

model.train(steps_per_epoch=3000, epochs=10)

from algos.test_policy import load_policy_and_env, run_policy

_, get_action = load_policy_and_env('data/fetch_reach_su', deterministic=True)

run_policy(env, get_action)
Exemplo n.º 7
0
import gym
from utils import wrappers
from gym.wrappers.time_limit import TimeLimit

from algos.test_policy import load_policy_and_env, run_policy

_, get_action = load_policy_and_env('trained/1')

env = TimeLimit(gym.make("PepperPush-v0", gui=True), max_episode_steps=100)

run_policy(env, get_action)