Exemplo n.º 1
0
device_name = 'cpu' if args.cuda < 0 else "cuda:{}".format(args.cuda)
device = torch.device(device_name)
set_device(device)

r = redis.StrictRedis()

high = np.inf * np.ones(4)
observation_space = Box(low=-high, high=high, dtype=np.float32)
high = np.ones(1)
action_space = Box(low=-high, high=high)

pol_net = PolNetLSTM(observation_space, action_space, h_size=args.h_size, cell_size=args.cell_size)
pol = GaussianPol(observation_space, action_space, pol_net, data_parallel=args.data_parallel, rnn=True)

if args.pol:
    pol.load_state_dict(torch.load(args.pol, map_location=lambda storage, loc: storage))
else:
    raise Exception

pol.to(device)

pol.dp_run = False

pol.reset()

r.set('start', 'false')
while True:
    if r.get('start').decode('utf-8') == 'true':
        break

class Process(object):
Exemplo n.º 2
0
    s_pol = GaussianPol(observation_space, action_space, s_pol_net, args.rnn)
elif isinstance(action_space, gym.spaces.Discrete):
    t_pol = CategoricalPol(
        observation_space, action_space, t_pol_net, args.rnn)
    s_pol = CategoricalPol(
        observation_space, action_space, s_pol_net, args.rnn)
elif isinstance(action_space, gym.spaces.MultiDiscrete):
    t_pol = MultiCategoricalPol(
        observation_space, action_space, t_pol_net, args.rnn)
    s_pol = MultiCategoricalPol(
        observation_space, action_space, s_pol_net, args.rnn)
else:
    raise ValueError('Only Box, Discrete and Multidiscrete are supported')

if args.teacher_pol:
    t_pol.load_state_dict(torch.load(
        os.path.join(args.teacher_dir, args.teacher_fname)))

if args.rnn:
    s_vf_net = VNetLSTM(observation_space, h_size=256, cell_size=256)
else:
    s_vf_net = VNet(observation_space)

if args.sampling_policy == 'teacher':
    teacher_sampler = EpiSampler(
        env,
        t_pol,
        num_parallel=args.num_parallel,
        seed=args.seed)

student_sampler = EpiSampler(
    env,
Exemplo n.º 3
0
log_dir_name = 'garbage_airl_20190915_2'
env_name = 'RoboschoolPremaidAIWalker-v0'
env = gym.make(env_name)
env.seed(seed)

# check dimension of observation space and action space
observation_space = env.observation_space
action_space = env.action_space

# policy
pol_net = PolNet(observation_space, action_space)

# load best policy
best_path = f'{log_dir_name}/models/pol_max.pkl'
best_pol = GaussianPol(observation_space, action_space, pol_net)
best_pol.load_state_dict(torch.load(best_path), )

# show trained policy's behavior
done = False
o = env.reset()
for _ in range(1000):  # show 16.5 sec (0.0165 * 1000)
    if done:
        time.sleep(1)  # when the boundary of eposode
        o = env.reset()
    ac_real, ac, a_i = best_pol.deterministic_ac_real(
        torch.tensor(o, dtype=torch.float))
    ac_real = ac_real.reshape(best_pol.action_space.shape)
    next_o, r, done, e_i = env.step(np.array(ac_real))
    o = next_o
    env.render()
    # time.sleep(0.0165)