Exemplo n.º 1
0
def test(args=get_args()):
    torch.set_num_threads(1)  # we just need only one thread for NN

    model_path = os.path.join(args.logdir, args.task, 'ddpg/policy.pth')
    env = gym_make()
    policy = load_policy(env, args.layer, model_path)
    print(env.action_space.low, env.action_space.high)
    obs = env.reset()

    while True:
        action = policy_forward(policy, obs, eps=0.05)
        print(action)
        obs, reward, done, info = env.step(action[0])
        print(reward)
        env.render()
        if done:
            break
Exemplo n.º 2
0
    def steer(self, from_node, to_node, t_max_extend=10.0, t_tree=5.0):
        # using RL policy to steer from_node to to_node
        parent_node = from_node
        new_node = self.Node(from_node.state)
        new_node.parent = parent_node
        new_node.path.append(new_node.state)
        state = new_node.state
        goal = to_node.state.copy()

        env = self.gym
        env.state = state
        env.goal = goal
        obs = env._obs()
        dt = 0.2
        n_max_extend = round(t_max_extend / dt)
        n_tree = round(t_tree / dt)
        new_node_list = []

        for n_extend in range(1, n_max_extend + 1):
            action = policy_forward(self.policy, obs, eps=0.05)[0]
            # control with RL policy
            obs, rewards, done, info = env.step(action)

            state = env.state
            new_node.state = state
            new_node.path.append(state)
            if not env.robot_env.valid_state_check(state):
                # collision
                break
            elif np.linalg.norm(state[:2] - goal[:2]) <= 1.0:
                # reach
                new_node_list.append(new_node)
                break
            elif n_extend % n_tree == 0:
                new_node_list.append(new_node)
                parent_node = new_node
                new_node = self.Node(parent_node.state)
                new_node.parent = parent_node
                new_node.path.append(new_node.state)

        self.node_list += new_node_list
        return new_node_list
Exemplo n.º 3
0
def test_policy():
    # torch.set_num_threads(1)  # we just need only one thread for NN
    model_path = os.path.dirname(
        __file__) + '/../data/net/end2end/ddpg/policy.pth'

    test_env1 = pickle.load(
        open(
            os.path.dirname(__file__) + '/../data/obstacles/test_env1.pkl',
            'rb'))
    obs_list = pickle.load(
        open(
            os.path.dirname(__file__) + '/../data/obstacles/obs_list_list.pkl',
            'rb'))[:1]
    test_env2 = pickle.load(
        open(
            os.path.dirname(__file__) + '/../data/obstacles/test_env2.pkl',
            'rb'))
    training_env = pickle.load(
        open(
            os.path.dirname(__file__) + '/../data/obstacles/training_env.pkl',
            'rb'))

    env = DifferentialDriveGym(obs_list_list=[training_env])
    env.reset()

    policy = load_policy(env, [1024, 512, 512, 512], model_path)

    obs = env.reset()

    fig, ax = plt.subplots()
    while True:
        action = policy_forward(policy, obs, eps=0.05)
        print(action)
        obs, reward, done, info = env.step(action[0])
        print(reward)
        env.render()
        if done:
            break