示例#1
0
    def step(self, action):
        # Send action and receive next state and reward
        req = Dqn.Request()
        req.action = action

        while not self.rl_agent_interface_client.wait_for_service(
                timeout_sec=1.0):
            self.get_logger().info(
                'rl_agent interface service not available, waiting again...')

        future = self.rl_agent_interface_client.call_async(req)

        rclpy.spin_until_future_complete(self, future)

        if future.result() is not None:
            # Next state and reward
            next_state = future.result().state
            next_state = np.reshape(np.asarray(next_state),
                                    [1, self.state_size])
            reward = future.result().reward
            done = future.result().done
        else:
            self.get_logger().error(
                'Exception while calling service: {0}'.format(
                    future.exception()))
        return next_state, reward, done
示例#2
0
    def process(self):
        global_step = 0

        for episode in range(1, 1000):
            global_step += 1
            local_step = 0

            state = list()
            next_state = list()
            done = False
            init = True
            score = 0

            # Reset DQN environment
            time.sleep(0.05)

            while not done:
                local_step += 1

                # Aciton based on the current state
                if local_step == 1:
                    action = 2  # Move forward
                else:
                    state = next_state
                    action = int(self.get_action(state))

                # Send action and receive next state and reward
                req = Dqn.Request()
                print(int(action))
                req.action = action
                req.init = init
                while not self.dqn_com_client.wait_for_service(
                        timeout_sec=1.0):
                    self.get_logger().info(
                        'service not available, waiting again...')

                future = self.dqn_com_client.call_async(req)

                while rclpy.ok():
                    rclpy.spin_once(self)
                    if future.done():
                        if future.result() is not None:
                            # Next state and reward
                            next_state = future.result().state
                            reward = future.result().reward
                            done = future.result().done
                            score += reward
                            init = False
                        else:
                            self.get_logger().error(
                                'Exception while calling service: {0}'.format(
                                    future.exception()))
                        break

                # While loop rate
                time.sleep(0.01)
示例#3
0
    def reset_environment(self):
        while not self.reset_environment_client.wait_for_service(
                timeout_sec=1.0):
            self.get_logger().warn(
                'Reset environment client failed to connect to the server, try again ...'
            )

        future = self.reset_environment_client.call_async(Dqn.Request())

        rclpy.spin_until_future_complete(self, future)
        if future.result() is not None:
            state = future.result().state
            state = np.reshape(np.asarray(state), [1, self.state_size])
        else:
            self.get_logger().error(
                'Exception while calling service: {0}'.format(
                    future.exception()))

        return state
示例#4
0
    def process(self):
        global_step = 0

        for episode in range(self.load_episode + 1, self.episode_size):
            global_step += 1
            local_step = 0

            state = list()
            next_state = list()
            done = False
            init = True
            score = 0

            # Reset DQN environment
            time.sleep(1.0)

            while not done:
                local_step += 1

                # Aciton based on the current state
                if local_step == 1:
                    action = 2  # Move forward
                else:
                    state = next_state
                    action = int(self.get_action(state))

                # Send action and receive next state and reward
                req = Dqn.Request()
                print(int(action))
                req.action = action
                req.init = init
                while not self.dqn_com_client.wait_for_service(
                        timeout_sec=1.0):
                    self.get_logger().info(
                        'service not available, waiting again...')

                future = self.dqn_com_client.call_async(req)

                while rclpy.ok():
                    rclpy.spin_once(self)
                    if future.done():
                        if future.result() is not None:
                            # Next state and reward
                            next_state = future.result().state
                            reward = future.result().reward
                            done = future.result().done
                            score += reward
                            init = False
                        else:
                            self.get_logger().error(
                                'Exception while calling service: {0}'.format(
                                    future.exception()))
                        break

                # Save <s, a, r, s'> samples
                if local_step > 1:
                    self.append_sample(state, action, reward, next_state, done)

                    # Train model
                    if global_step > self.update_target_model_start:
                        self.train_model(True)
                    elif global_step > self.train_start:
                        self.train_model()

                    if done:
                        # Update neural network
                        self.update_target_model()

                        print("Episode:", episode,
                              "score:", score, "memory length:",
                              len(self.memory), "epsilon:", self.epsilon)

                        param_keys = ['epsilon']
                        param_values = [self.epsilon]
                        param_dictionary = dict(zip(param_keys, param_values))

                # While loop rate
                time.sleep(0.01)

            # Update result and save model every 10 episodes
            if episode % 10 == 0:
                self.model_path = os.path.join(
                    self.model_dir_path, 'stage' + str(self.stage) +
                    '_episode' + str(episode) + '.h5')
                self.model.save(self.model_path)
                with open(
                        os.path.join(
                            self.model_dir_path, 'stage' + str(self.stage) +
                            '_episode' + str(episode) + '.json'),
                        'w') as outfile:
                    json.dump(param_dictionary, outfile)

            # Epsilon
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay