Exemplo n.º 1
0
    def init(self, taskspec):
        super(BodyMotionAgent, self).init(taskspec)

        ts = TaskSpecParser(taskspec)
        if ts.valid:
            extra = ts.get_extra()

            v = ['FEATUREREP', 'STATESPERDIM', 'STATEDESCR', 'ACTIONDESCR', 'COPYRIGHT']
            pos = []
            for i, id_ in enumerate(list(v)):
                try:
                    pos.append(extra.index(id_))
                except:
                    v.remove(id_)
            sorted_v = sorted(zip(pos, v))
            v = [s[1] for s in sorted_v]

            for i, id_ in enumerate(v):
                val = ts.get_value(i, extra, v)
                if id_ == 'FEATUREREP':
                    self._feature_rep = val

            if self._feature_rep == 'larm':
                def map_state_key(key):
                    return {
                        "x": 0,
                        "y": 1,
                        "z": 2,
                    }[key]

                def map_action_key(key):
                    return {
                        "dx": 0,
                        "dy": 1,
                        "dz": 2,
                    }[key]

            else:
                def map_state_key(key):
                    return {
                        "x": 0,
                        "y": 1,
                        "z": 2,
                        "wx": 3,
                        "wy": 4,
                        "wz": 5,
                    }[key]

                def map_action_key(key):
                    return {
                        "dx": 0,
                        "dy": 1,
                        "dz": 2,
                        "dwx": 3,
                        "dwy": 4,
                        "dwz": 5
                    }[key]

            MDPState.key_to_index = staticmethod(map_state_key)
            MDPAction.key_to_index = staticmethod(map_action_key)
Exemplo n.º 2
0
    def init(self, taskspec):
        ts = TaskSpecParser(taskspec)
        if not ts.valid:
            raise TaskSpecError('TaskSpec Error: Invalid task spec version')

        _, maxval = ts.get_reward_range()

        extra = ts.get_extra()
        v = ['OBSDESCR', 'ACTDESCR', 'COPYRIGHT']
        pos = []
        for i, id_ in enumerate(list(v)):
            try:
                pos.append(extra.index(id_))
            except ValueError:
                v.remove(id_)
        sorted_v = sorted(zip(pos, v))

        act_desc = {}
        for i, (_, id_) in enumerate(sorted_v):
            val = ts.get_value(i, extra, v)
            if id_ == 'OBSDESCR':
                pass
            elif id_ == 'ACTDESCR':
                act_desc = eval(val)

        obs = ts.get_double_obs()
        dimensions = [1.0] * ts.get_num_int_obs()
        dimensions += (1.0 / np.asarray(zip(*obs)[1] - np.asarray(zip(*obs)[0]))).tolist()

        MDPState.set_feature_limits(obs)

        act_limits = ts.get_int_act()
        act_limits += ts.get_double_act()

        discrete_dim = ts.get_num_int_act()
        assert (discrete_dim > 0)
        continuous_dim = ts.get_num_double_act()
        assert (continuous_dim == 0)

        if discrete_dim > 1:
            min_ = list(zip(*act_limits)[0])
            max_ = (np.asarray(list(zip(*act_limits)[1])) + 1).tolist()
            actions = [range(*a) for a in zip(min_, max_)]

            import itertools
            act = list(itertools.product(*actions))
        else:
            act = act_limits[0][:]
            act[1] += 1

        bb = self._config['modelbreadth'] * self._config['modelbreadth']
        maxd = np.sqrt(-bb * np.log(self._config['minweight']))

        feature_metadata = {}
        feature_names = ['state', 'act', 'delta_state']
        for i, n in enumerate(feature_names):
            feature_metadata[n] = {
                'type': 'float',
                'is_index': False if n == 'delta_state' else True,
                'order': i,
            }
            try:
                feature_metadata[n]['retrieval_method'] = self._config['retrieval'][n]['method']
            except KeyError:
                continue

            try:
                feature_metadata[n]['retrieval_method_params'] = self._config['retrieval'][n]['method_params']
            except KeyError:
                pass
            else:
                if n == 'state':
                    if isinstance(feature_metadata[n]['retrieval_method_params'], dict):
                        if 'scale' not in feature_metadata[n]['retrieval_method_params']:
                            feature_metadata[n]['retrieval_method_params']['scale'] = dimensions
                    else:
                        feature_metadata[n]['retrieval_method_params'] = listify(
                            feature_metadata[n]['retrieval_method_params'])
                        feature_metadata[n]['retrieval_method_params'][0] = maxd
                        if len(feature_metadata[n]['retrieval_method_params']) <= 1:
                            feature_metadata[n]['retrieval_method_params'].insert(0, dimensions)

        model_approximator = CasmlApproximator(feature_metadata, self._config['minfraction'], dimensions,
                                               lambda x: np.exp(-x * x / bb), self._config['model_tau'],
                                               self._config['model_sigma'], self._config['n_components'],
                                               self._config['n_iter'])

        actions = []
        for i in range(*act):
            actions.append(
                MDPAction.create(i, self._config['explorationthreshold'], maxval, model_approximator,
                                 name=act_desc[i] if i in act_desc else None,
                                 feature_limits=act_limits))
        # shuffle(actions)
        actions = [actions[0], actions[1], actions[2]]

        # def inclusion(approximator, state):
        #     try:
        #         approx = approximator._queries[state]
        #     except KeyError:
        #         approx = approximator.Approximation(approximator, state, approximator._kernelfn)
        #
        #     return len(approx._weights) <= 0 or approx._neighbors[0][0] > self._config['value_tau']
        #
        # value_approximator = KernelApproximator(0.01, maxd, dimensions, lambda x: np.exp(-x ** 2), inclusion)
        value_approximator = InterpolationApproximator(self._config['resolutionfactor'], dimensions)
        mdp = MDP(actions, value_approximator)
        planner = PrioritizedSweeping(mdp, lambda x: False, lambda x: 0, self._config['discountfactor'],
                                      self._config['epsilon'])

        self._learner = Learner(planner)
Exemplo n.º 3
0
    def init(self, taskspec):
        ts = TaskSpecParser(taskspec)
        if not ts.valid:
            raise TaskSpecError('TaskSpec Error: Invalid task spec version')

        _, maxval = ts.get_reward_range()

        extra = ts.get_extra()
        v = ['OBSDESCR', 'ACTDESCR', 'COPYRIGHT']
        pos = []
        for i, id_ in enumerate(list(v)):
            try:
                pos.append(extra.index(id_))
            except ValueError:
                v.remove(id_)
        sorted_v = sorted(zip(pos, v))

        act_desc = {}
        for i, (_, id_) in enumerate(sorted_v):
            val = ts.get_value(i, extra, v)
            if id_ == 'OBSDESCR':
                pass
            elif id_ == 'ACTDESCR':
                act_desc = eval(val)

        obs = ts.get_double_obs()
        dimensions = [1.0] * ts.get_num_int_obs()
        dimensions += (1.0 / np.asarray(zip(*obs)[1] - np.asarray(zip(*obs)[0]))).tolist()

        MDPState.set_feature_limits(obs)

        act_limits = ts.get_int_act()
        act_limits += ts.get_double_act()

        discrete_dim = ts.get_num_int_act()
        assert (discrete_dim > 0)
        continuous_dim = ts.get_num_double_act()
        assert (continuous_dim == 0)

        if discrete_dim > 1:
            min_ = list(zip(*act_limits)[0])
            max_ = (np.asarray(list(zip(*act_limits)[1])) + 1).tolist()
            actions = [range(*a) for a in zip(min_, max_)]

            import itertools
            act = list(itertools.product(*actions))
        else:
            act = act_limits[0][:]
            act[1] += 1

        bb = self._config['modelbreadth'] * self._config['modelbreadth']
        maxd = np.sqrt(-bb * np.log(self._config['minweight']))
        kernelfn = lambda x: np.exp(-x * x / bb)

        def model_inclusion(approximator, state, delta):
            try:
                approx = approximator._queries[state]
            except KeyError:
                approx = approximator.Approximation(approximator, state, approximator._kernelfn)

            do_add = True
            for _, s, d in approx._neighbors:
                delta_error = np.linalg.norm(d - delta)
                if delta_error <= self._config['model_sigma']:
                    # At least one of the cases in the case base correctly estimated the query case,
                    # the query case does not add any new information, do not add.
                    do_add = False
                    break

            do_add = do_add or approx._neighbors[0][0] > self._config['model_tau']
            return do_add

        actions = []
        for i in range(*act):
            model_approximator = CsmlApproximator(self._config['minfraction'], maxd, dimensions, kernelfn,
                                                  self._config['n_components'], self._config['n_iter'], model_inclusion)
            actions.append(
                MDPAction.create(i, self._config['explorationthreshold'], maxval, model_approximator,
                                 name=act_desc[i] if i in act_desc else None,
                                 feature_limits=act_limits))

        # shuffle(actions)
        actions = [actions[0], actions[1], actions[2]]

        # def value_inclusion(approximator, state):
        #     try:
        #         approx = approximator._queries[state]
        #     except KeyError:
        #         approx = approximator.Approximation(approximator, state, approximator._kernelfn)
        #
        #     return len(approx._weights) <= 0 or approx._neighbors[0][0] > self._config['value_tau']
        #
        # value_approximator = KernelApproximator(0.01, maxd, dimensions, lambda x: np.exp(-x ** 2), value_inclusion)
        value_approximator = InterpolationApproximator(self._config['resolutionfactor'], dimensions)
        mdp = MDP(actions, value_approximator)
        planner = PrioritizedSweeping(mdp, lambda x: False, lambda x: 0, self._config['discountfactor'],
                                      self._config['epsilon'])

        self._learner = Learner(planner)
Exemplo n.º 4
0
    def init(self, taskspec):
        super(PenaltyKickAgent, self).init(taskspec)

        ts = TaskSpecParser(taskspec)
        if ts.valid:
            extra = ts.get_extra()

            v = ['FEATUREREP', 'SUPPORTLEG', 'STATEDESCR', 'ACTIONDESCR', 'COPYRIGHT']
            pos = []
            for i, id_ in enumerate(list(v)):
                try:
                    pos.append(extra.index(id_))
                except:
                    v.remove(id_)
            sorted_v = sorted(zip(pos, v))
            v = [s[1] for s in sorted_v]

            for i, id_ in enumerate(v):
                val = ts.get_value(i, extra, v)
                if id_ == 'FEATUREREP':
                    self._feature_rep = val
                if id_ == 'SUPPORTLEG':
                    if val == 'left':
                        self._ankle_roll = "RAnkleRoll"
                        self._hip_roll = "RHipRoll"

            min_hip_roll, max_hip_roll = NaoWorldModel().get_robot_info(self._hip_roll)
            leg_length = NaoWorldModel().get_robot_info("TibiaLength") + NaoWorldModel().get_robot_info("ThighLength")

            MDPState.dtype = MDPState.DTYPE_INT

            if self._feature_rep == 'rl':
                try:
                    max_location = NaoWorldModel().get_object("ball").resolution[0]
                except AttributeError:
                    max_location = 0

                MDPState.set_minmax_features([0, max_location],
                                             [math.floor(leg_length * math.sin(min_hip_roll)),
                                              math.ceil(leg_length * math.sin(max_hip_roll))])
                MDPState.set_states_per_dim([int((MDPState.max_features[0] - MDPState.min_features[0]) / 2),
                                             int(math.ceil((MDPState.max_features[1] - MDPState.min_features[1]) / 4))])

                # noinspection PyShadowingNames
                def is_valid(self):
                    real_state = True

                    if MDPState.min_features is not None:
                        for (feature, min_feature, max_feature) in zip(self, MDPState.min_features,
                                                                       MDPState.max_features):
                            if feature < (min_feature - eps) or feature > (max_feature + eps):
                                real_state = False
                                self._logger.debug("\t\t\t\tNext state is not valid (feature %d out of range)", feature)
                                break

                    return real_state

                MDPState.is_valid = is_valid

            else:
                MDPState.set_minmax_features([math.floor(leg_length * math.sin(min_hip_roll)),
                                              math.ceil(leg_length * math.sin(max_hip_roll))])
                MDPState.set_nfeatures(
                    int(math.ceil((MDPState.max_features - MDPState.min_features + 1) / self._bin_width)))

                # noinspection PyShadowingNames
                def is_valid(self):
                    num_ones = len(np.where(self.get()[0:len(self)] == 1)[0])
                    if num_ones > 1 or num_ones < 1 or not all(i == 0 or i == 1 for i in self.get()):
                        return False
                    return True

                # noinspection PyShadowingNames
                def encode(self):
                    return np.where(self.get()[0:len(self)] == 1)[0]

                def decode(cls, state_repr):
                    decoded = [0] * cls.nfeatures
                    bin_num = 0
                    if isinstance(state_repr[0], int):
                        bin_num = state_repr[0]
                    elif isinstance(state_repr[0], float):
                        bin_num = int(math.floor((state_repr[0] - cls.min_features) / self._bin_width))

                    if 0 <= bin_num <= cls.nfeatures - 1:
                        decoded[bin_num] = 1
                    elif bin_num < 0:
                        decoded[0] = 1
                    else:
                        decoded[cls.nfeatures - 1] = 1

                    return cls(decoded)

                MDPState.is_valid = is_valid
                MDPState.encode = encode
                MDPState.decode = decode
Exemplo n.º 5
0
    def init(self, taskspec):
        """Initializes the agent.

        Parameters
        ----------
        taskspec : str
            The task specification.

        """
        ts = TaskSpecParser(taskspec)
        if not ts.valid:
            raise TaskSpecError('TaskSpec Error: Invalid task spec version')

        _, maxval = ts.get_reward_range()

        extra = ts.get_extra()
        v = ['OBSDESCR', 'ACTDESCR', 'COPYRIGHT']
        pos = []
        for i, id_ in enumerate(list(v)):
            try:
                pos.append(extra.index(id_))
            except ValueError:
                v.remove(id_)
        sorted_v = sorted(zip(pos, v))

        act_desc = {}
        for i, (_, id_) in enumerate(sorted_v):
            val = ts.get_value(i, extra, v)
            if id_ == 'OBSDESCR':
                pass
            elif id_ == 'ACTDESCR':
                act_desc = eval(val)

        obs = ts.get_double_obs()
        dimensions = [1.0] * ts.get_num_int_obs()
        dimensions += (1.0 / np.asarray(zip(*obs)[1] - np.asarray(zip(*obs)[0]))).tolist()

        MDPState.set_feature_limits(obs)

        act_limits = ts.get_int_act()
        act_limits += ts.get_double_act()

        discrete_dim = ts.get_num_int_act()
        assert (discrete_dim > 0)
        continuous_dim = ts.get_num_double_act()
        assert (continuous_dim == 0)

        if discrete_dim > 1:
            min_ = list(zip(*act_limits)[0])
            max_ = (np.asarray(list(zip(*act_limits)[1])) + 1).tolist()
            actions = [range(*a) for a in zip(min_, max_)]

            import itertools
            act = list(itertools.product(*actions))
        else:
            act = act_limits[0][:]
            act[1] += 1

        bb = self._config['modelbreadth'] * self._config['modelbreadth']
        maxd = np.sqrt(-bb * np.log(self._config['minweight']))
        kernelfn = lambda x: np.exp(-x * x / bb)

        actions = []
        for i in range(*act):
            model_approximator = KernelApproximator(self._config['minfraction'], maxd, dimensions, kernelfn)
            actions.append(
                MDPAction.create(i, self._config['explorationthreshold'], maxval, model_approximator,
                                 name=act_desc[i] if i in act_desc else None,
                                 feature_limits=act_limits))
        # shuffle(actions)
        actions = [actions[0], actions[1], actions[2]]

        value_approximator = InterpolationApproximator(self._config['resolutionfactor'], dimensions)
        mdp = MDP(actions, value_approximator)
        planner = PrioritizedSweeping(mdp, lambda x: False, lambda x: 0, self._config['discountfactor'],
                                      self._config['epsilon'])

        self._learner = Learner(planner)