Exemplo n.º 1
0
    def _update_observation(self):
        return_obs = Observation()

        return_obs.doubleArray = np.zeros(MDPState.nfeatures)
        for effector, mapping in MDPState.description.iteritems():
            pos = NaoWorldModel().get_effector_pos(effector)
            for key, axis in mapping.iteritems():
                return_obs.doubleArray[axis] = pos[MDPState.key_to_index(key)]
        return return_obs
    def start(self):
        if self.fixed_start_state:
            state_valid = self.set_agent_state(self.start_row, self.start_col)
            if not state_valid:
                print "The fixed start state was NOT valid: " + str(int(self.start_row)) + "," + str(int(self.start_row))
                self.set_random_state()
        else:
            self.set_random_state()

        return_obs = Observation()
        return_obs.intArray = [self.calculate_flat_state()]

        return return_obs
Exemplo n.º 3
0
    def start(self):
        if self._random_start:
            self._sensors = np.random.random(self._sensors.shape)
            self._sensors *= (self._limits[:, 1] - self._limits[:, 0])
            self._sensors += self._limits[:, 0]
        else:
            self._sensors = np.zeros(self._sensors.shape)
            self._sensors[0] = -0.5

        self._render(self._sensors[0])

        return_obs = Observation()
        return_obs.doubleArray = self._sensors.tolist()
        return return_obs
    def step(self, action):
        # Make sure the action is valid
        assert len(action.intArray) == 1, "Expected 1 integer action."
        assert action.intArray[0] >= 0, "Expected action to be in [0,3]"
        assert action.intArray[0] < 4, "Expected action to be in [0,3]"

        self.update_position(action.intArray[0])

        obs = Observation()
        obs.intArray = [self.calculate_flat_state()]

        return_ro = Reward_observation_terminal()
        return_ro.r = self.calculate_reward()
        return_ro.o = obs
        return_ro.terminal = self.check_current_terminal()

        return return_ro
Exemplo n.º 5
0
    def _update_observation(self):
        return_obs = Observation()

        y_ankle = NaoWorldModel().get_joint_pos(self._ankle_roll)[1]
        y_hip = NaoWorldModel().get_joint_pos(self._hip_roll)[1]
        displacement = (y_ankle - y_hip) * 1000  # convert to millimeters

        if self._feature_rep == 'rl':
            image_x = NaoWorldModel().get_object("ball").image_center.x
            return_obs.intArray = [image_x, displacement]
        else:
            s = [0] * MDPState.nfeatures

            bin_num = int(math.floor((displacement - MDPState.min_features) / self._bin_width))
            try:
                s[bin_num] = 1
            except IndexError, e:
                exit(e)
            return_obs.intArray = s
Exemplo n.º 6
0
    def step(self, action):
        return_ro = Reward_observation_terminal()
        self._apply(action)
        self._render(self._sensors[0])

        return_ro.terminal = self._is_terminal()

        return_ro.r = -1.
        if return_ro.terminal:
            return_ro.r = .0

        if self._reward_noise > 0:
            return_ro.r += np.random.normal(scale=self._reward_noise)

        obs = Observation()
        obs.doubleArray = self._sensors.tolist()
        return_ro.o = obs

        return return_ro
Exemplo n.º 7
0
 def get_Observation(self):
     return Observation.from_AbstractType(self.get_AbstractType())