Exemplo n.º 1
0
    def _update_observation(self):
        return_obs = Observation()

        y_ankle = NaoWorldModel().get_joint_pos(self._ankle_roll)[1]
        y_hip = NaoWorldModel().get_joint_pos(self._hip_roll)[1]
        displacement = (y_ankle - y_hip) * 1000  # convert to millimeters

        if self._feature_rep == 'rl':
            image_x = NaoWorldModel().get_object("ball").image_center.x
            return_obs.intArray = [image_x, displacement]
        else:
            s = [0] * MDPState.nfeatures

            bin_num = int(math.floor((displacement - MDPState.min_features) / self._bin_width))
            try:
                s[bin_num] = 1
            except IndexError, e:
                exit(e)
            return_obs.intArray = s
    def start(self):
        if self.fixed_start_state:
            state_valid = self.set_agent_state(self.start_row, self.start_col)
            if not state_valid:
                print "The fixed start state was NOT valid: " + str(int(self.start_row)) + "," + str(int(self.start_row))
                self.set_random_state()
        else:
            self.set_random_state()

        return_obs = Observation()
        return_obs.intArray = [self.calculate_flat_state()]

        return return_obs
    def step(self, action):
        # Make sure the action is valid
        assert len(action.intArray) == 1, "Expected 1 integer action."
        assert action.intArray[0] >= 0, "Expected action to be in [0,3]"
        assert action.intArray[0] < 4, "Expected action to be in [0,3]"

        self.update_position(action.intArray[0])

        obs = Observation()
        obs.intArray = [self.calculate_flat_state()]

        return_ro = Reward_observation_terminal()
        return_ro.r = self.calculate_reward()
        return_ro.o = obs
        return_ro.terminal = self.check_current_terminal()

        return return_ro