示例#1
0
    def idle(self):
        if random.uniform(0, 1) < 0.9:  ## tell goal
            rec_goal = True
        else:
            rec_goal = False

        goal_pairs = self._get_between_pair_goals()
        triplets = []
        for g1, g2 in goal_pairs:
            flag = False
            for g in self._get_goals():
                if g.loc == (g1.loc[0] + 1, g1.loc[1]):
                    flag = True
                    if rec_goal:
                        triplets.append((g1.name, g2.name, g.name))
            if not rec_goal and not flag:
                triplets.append((g1.name, g2.name, "nothing"))

        if triplets:
            g1_name, g2_name, name = random.choice(triplets)
            self._bind("S -> question")
            if random.uniform(0, 1) < 0.5:
                g1_name, g2_name = g2_name, g1_name
            self._bind("Z -> '%s'" % g1_name)
            self._bind("T -> '%s'" % g2_name)
            sentence = self._generate()
            if get_flag("task_mode") == "arxiv_lang_acquisition":
                return ["idle", 0, sentence + " " + name]
            else:
                self._record_answer(name)
                return ["simple_recognition_reward", 0, sentence]

        return ["idle", 0, ""]
示例#2
0
 def __init__(self, item_path, start_level=0):
     super(XWorldNav, self).__init__(item_path=item_path,
                                     max_height=8,
                                     max_width=8,
                                     start_level=start_level,
                                     maze_generation=True)
     self.curriculum = get_flag("curriculum")
    def idle(self):
        found = False
        if random.uniform(0, 1) < 0.9:  ## tell goal
            goals = self._get_surrounding_goals()
            if goals:
                grid, direction = random.choice(goals)
                grid = grid.name
                found = True
        else: ## tell empty grid
            empty_grids = self._get_surrounding_empty_grids()
            if empty_grids:
                _, direction = random.choice(empty_grids)
                grid = "nothing"
                found = True

        if found:
            self._bind("S -> question")
            self._bind("D -> '%s'" % direction)
            sentence = self._generate()
            if get_flag("task_mode") == "lang_acquisition":
                return ["idle", 0, sentence + " " + grid]
            else:
                self._record_answer(grid)
                return ["simple_recognition_reward", 0, sentence]

        return ["idle", 0, ""]
示例#4
0
文件: XWorldNav.py 项目: He370/XWorld
 def __init__(self, item_path):
     super(XWorldNav, self).__init__(
         item_path=item_path,
         max_height=5,
         max_width=5)
     self.curriculum = get_flag("curriculum")
     self.current_level = 0
示例#5
0
    def idle(self):
        goals = self._get_goals()
        found = False
        if random.uniform(0, 1) < 0.9:  ## tell goal
            goals = [(g, random.choice(self._get_surrounding_goals(g.loc))) \
                for g in goals if self._get_surrounding_goals(g.loc)]
            if goals:
                goal, goal2 = random.choice(goals)
                grid = goal2[0].name
                direction = goal2[1]
                found = True
        else:  ## tell empty grid
            empty_grids = [(g, random.choice(self._get_surrounding_empty_grids(g.loc))) \
                for g in goals if self._get_surrounding_empty_grids(g.loc)]
            if empty_grids:
                goal, goal2 = random.choice(empty_grids)
                grid = "nothing"
                direction = goal2[1]
                found = True

        if found:
            self._bind("S -> question")
            self._bind("D -> '%s'" % direction)
            self._bind("Z -> '%s'" % goal.name)
            sentence = self._generate()
            if get_flag("task_mode") == "lang_acquisition":
                return ["idle", 0, sentence + " " + grid]
            else:
                self._record_answer(grid)
                return ["simple_recognition_reward", 0, sentence]

        return ["idle", 0, ""]
 def idle(self):
     goal_pairs = self._get_between_pair_goals()
     agent, _, _ = self._get_agent()
     triplets = []
     for g1, g2 in goal_pairs:
         d = self._get_direction(agent.loc, (g1.loc[0] + 1, g1.loc[1]))
         if d != "":
             triplets.append((g1.name, g2.name, d))
     if triplets:
         g1_name, g2_name, direction = random.choice(triplets)
         self._bind("S -> question")
         if random.uniform(0, 1) < 0.5:
             g1_name, g2_name = g2_name, g1_name
         self._bind("O -> '%s'" % g1_name)
         self._bind("T -> '%s'" % g2_name)
         sentence = self._generate()
         if get_flag("task_mode") == "arxiv_lang_acquisition":
             # supervised; forward compatible
             return ["idle", 0, sentence + " " + direction]
         else:
             self._record_answer(
                 direction)  # record answer for reward stage
             return ["simple_recognition_reward", 0, sentence]  # RL
     else:
         return ["idle", 0, ""]
示例#7
0
 def __init__(self, env):
     ## define all the spatial relations
     self.directions = {
         (self.PI_2 - self.PI_8, self.PI_2 + self.PI_8):
         "right",
         (-self.PI_2 - self.PI_8, -self.PI_2 + self.PI_8):
         "left",
         (-self.PI_8, self.PI_8):
         "front",
         # two intervals of "back" cannot be merged, so we need to specify
         # them separately
         (self.PI - self.PI_8, self.PI):
         "back",
         (-self.PI, -self.PI + self.PI_8):
         "back",
         (self.PI_8, self.PI_4 + self.PI_8):
         "front-right",
         (self.PI_2 + self.PI_8, self.PI - self.PI_8):
         "back-right",
         (-self.PI_2 + self.PI_8, -self.PI_8):
         "front-left",
         (-self.PI + self.PI_8, -self.PI_2 - self.PI_8):
         "back-left"
     }
     self.distance_threshold = get_flag("x3_reaching_distance")
     self.orientation_threshold = self.PI_4
     self.env = env
     self.event = ""
     self.num_successes = 0
     self.num_failures = 0
     self.reset()
     self.cfg = CFG(*self._define_grammar())
     self.sentence = ""
     self.failure_recorded = False
    def idle(self):
        goals = self._get_colored_goals()
        if goals:
            goal = random.choice(goals)
            self._bind("S -> question")
            self._bind("C -> '%s'" % goal.color)
            sentence = self._generate()
            if get_flag("task_mode") == "arxiv_lang_acquisition":
                return ["idle", 0, sentence + " " + goal.name]
            else:
                self._record_answer(goal.name)
                return ["simple_recognition_reward", 0, sentence]

        return ["idle", 0, ""]
    def idle(self):
        goals = self._get_surrounding_goals()
        goals = [g for g in goals if self._color_defined(g[0].color)]
        if goals:
            goal, direction = random.choice(goals)
            self._bind("S -> question")
            self._bind("D -> '%s'" % direction)
            sentence = self._generate()
            if get_flag("task_mode") == "arxiv_lang_acquisition":
                return ["idle", 0, sentence + " " + goal.color]
            else:
                self._record_answer(goal.color)
                return ["simple_recognition_reward", 0, sentence]

        return ["idle", 0, ""]
示例#10
0
 def idle(self):
     goals = self._get_colored_goals()
     if goals:
         goal = random.choice(goals)
         self._bind("S -> question")
         self._bind("G -> '%s'" % goal.name)
         sentence = self._generate()
         if get_flag("task_mode") == "arxiv_lang_acquisition":
             # supervised; forward compatible
             return ["idle", 0, sentence + " " + goal.color]
         else:
             self._record_answer(
                 goal.color)  # record answer for reward stage
             return ["simple_recognition_reward", 0, sentence]  # RL
     else:
         return ["idle", 0, ""]
示例#11
0
 def idle(self):
     goals = self._get_surrounding_goals()
     goals = [g for g in goals if self._color_defined(g[0].color)]
     if goals:
         goal, direction = random.choice(goals)
         self._bind("S -> question")
         self._bind("C -> '%s'" % goal.color)
         sentence = self._generate()
         if get_flag("task_mode") == "lang_acquisition":
             # supervised; forward compatible
             return ["idle", 0, sentence + " " + direction]
         else:
             self._record_answer(
                 direction)  # record answer for reward stage
             return ["simple_recognition_reward", 0, sentence]  # RL
     else:
         return ["idle", 0, ""]
 def idle(self):
     goals = self._get_colored_goals()
     goals = [(random.choice(self._get_surrounding_goals(g.loc)), g) \
              for g in goals if self._get_surrounding_goals(g.loc)]
     if goals:
         goal, color_goal = random.choice(goals)
         goal, _ = goal
         direction = self._get_direction(goal.loc, color_goal.loc)
         self._bind("S -> question")
         self._bind("D -> '%s'" % direction)
         self._bind("O -> '%s'" % goal.name)
         sentence = self._generate()
         if get_flag("task_mode") == "lang_acquisition":
             # supervised; forward compatible
             return ["idle", 0, sentence + " " + color_goal.color]
         else:
             self._record_answer(color_goal.color) # record answer for reward stage
             return ["simple_recognition_reward", 0, sentence] # RL
     else:
         return ["idle", 0, ""]
示例#13
0
    def simple_navigation_reward(self):
        """
        A simple navigation reward stage. If the agent reaches the correct
        goal, it gets a positive reward. If it steps on an incorrect goal,
        it gets a negative reward. There will also be penalties if it has
        a failed action. The task returns to 'idle' stage when the correct
        goal is reached or the time is up for this task. The time-up setting
        is only for "one_channel" mode.
        """
        reward = XWorldTask.time_penalty

        agent, _, action_successful = self._get_agent()
        if not action_successful:
            reward += XWorldTask.failed_action_penalty

        goal_locs = [g.loc for g in self._get_goals()]
        next_stage = "simple_navigation_reward"
        sentence = ""

        self.steps_in_cur_task += 1
        h, w = self.env.get_max_dims()
        if get_flag("task_mode") == "one_channel" \
           and self.steps_in_cur_task >= h*w / 2:
            self.steps_in_cur_task = 0
            self._record_failure()
            self._bind("S -> timeup")
            next_stage = "idle"
            sentence = self._generate()
        elif agent.loc == self.target:
            self.steps_in_cur_task = 0
            self._record_success()
            self._record_event("correct_goal")
            reward += XWorldTask.correct_reward
            self._bind("S -> finish")
            next_stage = "idle"
            sentence = self._generate()
        elif agent.loc in goal_locs:
            reward += XWorldTask.wrong_reward

        return [next_stage, reward, sentence]
示例#14
0
    def idle(self):
        goal_pairs = self._get_between_pair_goals()
        color_goals = self._get_colored_goals()
        goal_rows = []
        for g in color_goals:
            for g1, g2 in goal_pairs:
                if g.loc == (g1.loc[0] + 1, g1.loc[1]):
                    goal_rows.append((g1.name, g2.name, g.color))

        if goal_rows:
            g1_name, g2_name, color = random.choice(goal_rows)
            self._bind("S -> question")
            if random.uniform(0, 1) < 0.5:
                g1_name, g2_name = g2_name, g1_name
            self._bind("O -> '%s'" % g1_name)
            self._bind("T -> '%s'" % g2_name)
            sentence = self._generate()
            if get_flag("task_mode") == "arxiv_lang_acquisition":
                return ["idle", 0, sentence + " " + color]
            else:
                self._record_answer(color)
                return ["simple_recognition_reward", 0, sentence]

        return ["idle", 0, ""]
示例#15
0
    def set_property(self, entity, property_value_dict={}):
        """
        Reinstantiate the specified properties of an existing entity.
        Properties and corresponding values are specified by the property_value_dict in the form
        of {property : value, ...}, e.g. {"name" : "apple", "loc" : (0, 0)}. value could be None
        (force reinstantiation) or a valid value for that property.
        1) If None value is provided for a specified property (e.g. {"name" : None}), entity
           property will be reinstantiated regardless of its original value.
        2) otherwise, the value will be assigned to that property of the entity.
        3) all unset entity properties will be instantiated.
        """
        default_dict = OrderedDict.fromkeys(["name", \
                                             "loc", \
                                             "asset_path", \
                                             "yaw", \
                                             "scale", \
                                             "offset"])
        pv_dict = default_dict.copy()
        pv_dict.update(property_value_dict)

        # pre-processing for name and asset_path due to their dependency
        path_value = property_value_dict.get("asset_path", "empty")
        name_value = property_value_dict.get("name", "empty")

        # if name is specified by user but asset_path is unspecified
        if name_value != "empty" and path_value == "empty":
            property_value_dict["asset_path"] = None

        # if asset_path is specified
        if path_value != "empty":
            # if name is not specified then derive name from asset_path
            if name_value == "empty":
                names = [
                    n for n in self.items[entity.type]
                    if path_value in set(self.items[entity.type][n])
                ]
                assert len(
                    names
                ) == 1, "there should be only one name corresponding to the asset_path: %s" % (
                    path_value)
                property_value_dict["name"] = names[0]
                pv_dict.update(property_value_dict)
            else:
                # if both name and asset_path are specified, check consistency
                assert path_value in self.items[entity.type][name_value], \
                    "specified name: %s and asset_path: %s mis-match" % (name_value, path_value)

        for property in pv_dict:
            assert property in entity.__dict__.keys() and property in default_dict.keys(), \
                "invalid property name: %s is provided" % property
            value = pv_dict[property]

            # skip unspecified and non-empty entity properties
            if property not in property_value_dict.keys(
            ) and entity.__dict__[property] is not None:
                continue

            if property == "loc":
                if entity.loc is not None:
                    self.available_grids.append(entity.loc)
                entity.loc = check_or_get_value(value, self.available_grids)
                self.available_grids.remove(entity.loc)
            if property == "name":
                entity.name = check_or_get_value(
                    value, self.get_all_possible_names(entity.type))
                # update id once name is changed
                entity.id = "%s_%s" % (entity.name,
                                       self.entity_nums[entity.type])
            if property == "asset_path":
                entity.asset_path = check_or_get_value(
                    value, self.items[entity.type][entity.name])
                # color is coupled with asset_path
                if entity.asset_path in self.color_table.keys():
                    entity.color = self.color_table[entity.asset_path]
                else:
                    entity.color = "na"
            if property == "yaw" and get_flag(
                    "visible_radius") and entity.type != "block":
                ## if partially observed, perturb the objects
                yaw_range = range(-1, 3)
                entity.yaw = check_or_get_value(value, yaw_range) * self.PI_2
            if property == "scale" and get_flag(
                    "visible_radius") and entity.type == "goal":
                scale_range = [0.5, 1]
                entity.scale = check_or_get_value(value,
                                                  scale_range,
                                                  is_continuous=True)
            if property == "offset" and get_flag(
                    "visible_radius") and entity.type == "goal":
                offset_range = [
                    0, 1 - (entity.scale if hasattr(entity, 'scale') else 0.5)
                ]
                entity.offset = check_or_get_value(value,
                                                   offset_range,
                                                   is_continuous=True)

        self.changed = True
示例#16
0
    def set_property(self, entity, property_value_dict={}):
        """
        Reinstantiate the specified properties of an existing entity.
        Properties and corresponding values are specified by the property_value_dict in the form
        of {property : value, ...}, e.g. {"name" : "apple", "loc" : (0, 0)}. value could be None
        (force reinstantiation) or a valid value for that property.
        1) If None value is provided for a specified property (e.g. {"name" : None}), entity
           property will be reinstantiated regardless of its original value.
        2) otherwise, the value will be assigned to that property of the entity.

        For the remaining not in property_value_dict:
        3) all unset entity properties will be instantiated.
        4) the already set properties will keep the same.

        Because name and asset_path have a dependency, we require that at most one of them
        is not None.
        a. When name is None but asset_path is not, then name will be uniquely determined from the path;
        b. When asset_path is None but name is not, then path will be randomly selected for the name;
        c. When both are None, both are randomly selected.
        """
        pv_dict = entity.__dict__.copy()
        ## let the user overwrite the specified
        pv_dict.update(property_value_dict)

        ## pre-processing for name and asset_path due to their dependency
        path_value = pv_dict["asset_path"]
        name_value = pv_dict["name"]
        if path_value is not None:
            assert name_value is None, "With asset_path, you don't have to set name"
            names = [n for n in self.items[entity.type] \
                     if path_value in self.items[entity.type][n]]
            assert len(names) == 1, \
                "each asset_path corresponds to only one name: %s" % (path_value)
            pv_dict["name"] = names[0]
        # else: do nothing; asset_path will be set later

        ## set each key in entity.__dict__.keys()
        if entity.loc is not None:
            self.available_grids.append(entity.loc)
        entity.loc = check_or_get_value(pv_dict["loc"], self.available_grids)
        self.available_grids.remove(entity.loc)
        ##
        entity.name = check_or_get_value(
            pv_dict["name"], self.get_all_possible_names(entity.type))
        entity.id = "%s_%d" % (entity.name, self.running_id)
        self.running_id += 1
        ##
        entity.asset_path = check_or_get_value(
            pv_dict["asset_path"], self.items[entity.type][entity.name])
        # color is coupled with asset_path
        if entity.asset_path in self.color_table.keys():
            entity.color = self.color_table[entity.asset_path]
        else:
            entity.color = "na"
        ##
        if get_flag("visible_radius"):
            if entity.type == "agent":
                yaw_range = range(-1, 3)
                entity.yaw = check_or_get_value(pv_dict["yaw"],
                                                yaw_range) * self.PI_2
            if entity.type == "goal":
                ## if partially observed, perturb the objects
                yaw_range = [0, self.PI_2 * 4]
                entity.yaw = check_or_get_value(pv_dict["yaw"],
                                                yaw_range,
                                                is_continuous=True)
                ##
                scale_range = [0.5, 1]
                entity.scale = check_or_get_value(pv_dict["scale"],
                                                  scale_range,
                                                  is_continuous=True)
                ##
                offset_range = [0, 1 - entity.scale]
                entity.offset = check_or_get_value(pv_dict["offset"],
                                                   offset_range,
                                                   is_continuous=True)

        self.changed = True
示例#17
0
class XWorld3DTask(object):
    ## some static class variables
    ## that shoule be shared by all derived classes
    time_penalty = -0.01
    correct_reward = 1.0
    wrong_reward = -1.0
    collision_penalty = 0.0

    failed_action_penalty = -0.1

    navigation_max_steps_factor = get_flag("max_steps_factor")

    PI = 3.1415926
    PI_2 = PI / 2  # 90
    PI_4 = PI / 4  # 45
    PI_8 = PI / 8  # 22.5
    PI_12 = PI / 12  # 15

    ## the window size for recording the performance
    performance_window_size = 200

    def __init__(self, env):
        ## define all the spatial relations
        self.directions = {
            (self.PI_2 - self.PI_8, self.PI_2 + self.PI_8):
            "right",
            (-self.PI_2 - self.PI_8, -self.PI_2 + self.PI_8):
            "left",
            (-self.PI_8, self.PI_8):
            "front",
            # two intervals of "behind" cannot be merged, so we need to specify
            # them separately
            (self.PI - self.PI_8, self.PI):
            "behind",
            (-self.PI, -self.PI + self.PI_8):
            "behind",
            (self.PI_8, self.PI_4 + self.PI_8):
            "front-right",
            (self.PI_2 + self.PI_8, self.PI - self.PI_8):
            "behind-right",
            (-self.PI_2 + self.PI_8, -self.PI_8):
            "front-left",
            (-self.PI + self.PI_8, -self.PI_2 - self.PI_8):
            "behind-left"
        }
        self.orientation_threshold = self.PI_4
        self.env = env
        self.event = ""
        self.success_seq = []
        self.num_successes = 0
        self.num_failures = 0
        self.success_steps = 0
        self.reset()
        self.cfg = CFG(*self._define_grammar())
        self.sentence = ""

    ################ internal functions ####################
    def _define_grammar(self):
        """
        The derived class can override this function to define a grammar
        for the teacher. To do so, the function should return a tuple of a grammar
        string and a start symbol.
        By default the grammar will be empty and the teacher generates
        a empty sentence each time step.
        """
        return "", ""

    def _get_all_directions(self):
        """
        Return all the spatial-relation words in xworld.
        """
        return self.directions.values()

    def _get_all_colors(self):
        """
        Return all colors defined in xworld.
        """
        return self.env.get_all_colors()

    def _get_direction_and_distance(self, p1, p2, p1_yaw=None):
        """
        Get the direciton of p2 wrt p1's yaw, and the distance from p1 to p2.
        Return:
        theta:      relative angle from p2 to p1 wrt p1's yaw
        dist:       distance from p1 to p2
        direction:  name of the direction
        """
        dx = p2[0] - p1[0]
        dy = p2[1] - p1[1]
        dist = sqrt(dx**2 + dy**2)
        if p1_yaw is None:
            return dist
        if dist == 0:
            return 0, 0, ""
        else:
            v1 = (cos(p1_yaw), sin(p1_yaw))
            v2 = (dx / dist, dy / dist)
            # theta is the angle from p2 to p1 wrt p1's orientation
            cos_theta = max(-1, min(1, v1[0] * v2[0] + v1[1] * v2[1]))
            sin_theta = max(-1, min(1, v1[1] * v2[0] - v1[0] * v2[1]))
            theta = acos(cos_theta) * copysign(1, asin(sin_theta))
            direction = ""
            for r in self.directions.keys():
                if (theta >= r[0] and theta < r[1]):
                    direction = self.directions[r]
            return theta, dist, direction

    def _get_distance(self, p1, p2):
        return sqrt((p2[0] - p1[0])**2 + (p2[1] - p1[1])**2)

    def __record_result(self, res):
        self.success_seq.append(res)
        if len(self.success_seq) > XWorld3DTask.performance_window_size:
            self.success_seq.pop(0)
        self._record_env_usage()

    def _record_success(self):
        self.__record_result(1)
        self.num_successes += 1
        self.success_steps += self.steps_in_cur_task

    def _record_failure(self):
        self.__record_result(0)
        self.num_failures += 1

    def _record_env_usage(self):
        self.env.record_environment_usage(self.__class__.__name__,
                                          self.success_seq)

    def _record_answer(self, answer):
        """
        Record the answer for later evaluation and rewarding
        """
        self.answer = answer

    def _record_target(self, target):
        """
        Record the navigation target
        """
        self.target = target

    def _record_event(self, event, next=False):
        """
        Record an event at every time step if necessary
        Every event has a lifespan of only one time step
        """
        if not next:
            self.event = event
        else:
            self.prev_event = event

    def _parse_collision_event(self, events):
        """
        Extract collision events from game event message
        """
        collision_events = [
            e for e in events.strip().split('\n') if e.startswith("collision")
        ]
        hits = set()
        for e in collision_events:
            o = e.split(':')[1].split('|')
            hits.update(o)

        return hits

    def __within_boundary(self, loc):
        """
        Determine if a location is out of boundary of the map
        """
        h, w = self.env.get_dims()
        x, y = loc
        return y >= 0 and y < h and x >= 0 and x < w

    def _get_surrounding_goals(self, distance_threshold=1.5, refer=None):
        """
        Given a reference, return all goals within a circular neighborhood
        with a radius of distance_threshold (excluding the refer itself)
        Note that distance_threshold is inclusive
        """
        goals = self._get_goals()
        if refer is None:
            refer, _, _ = self._get_agent()
            refer = refer.loc
        ret = []
        for g in goals:
            if g.loc == refer:
                continue
            dist = self._get_direction_and_distance(refer, g.loc)
            if dist < distance_threshold + 1e-3:
                ret.append(g)
        return ret

    def _get_surrounding_empty_grids(self, distance_threshold=1.5, refer=None):
        """
        Given a reference location, return all empty grids in its neighborhood
        (excluding the refer itself)
        """
        if refer is None:
            refer, _, _ = self._get_agent()
            refer = refer.loc
        ret = []
        for g in self.env.get_available_grids():
            if g == refer:  ## refer itself might be empty
                continue
            dist = self._get_direction_and_distance(refer, g)
            if dist < distance_threshold + 1e-3:
                ret.append(g)
        return ret

    def _get_p_tiles(self):
        """
        Return all the "pair" tiles (length=2) on the map.
        """
        Y, X = self.env.get_dims()
        p_tiles = []
        available = set(self.env.get_available_grids())

        def test_pair(p1, p2):
            if p1 in available and p2 in available:
                around_p2 = self._get_surrounding_empty_grids(
                    distance_threshold=1.0, refer=p2)
                if set(around_p2) - set([p1]):
                    p_tiles.append((p1, p2))
                around_p1 = self._get_surrounding_empty_grids(
                    distance_threshold=1.0, refer=p1)
                if set(around_p1) - set([p2]):
                    p_tiles.append((p2, p1))

        for y in range(Y):
            for x in range(X):
                test_pair((x, y, 0), (x + 1, y, 0))
                test_pair((x, y, 0), (x, y + 1, 0))
                test_pair((x, y, 0), (x + 1, y + 1, 0))

        return p_tiles

    def _get_t_tiles(self):
        """
        Return all the empty T-shaped tiles on the map. For our purpose, only return
        the two ends of the longer segment (length=3).
        """
        Y, X = self.env.get_dims()
        t_tiles = []
        available = set(self.env.get_available_grids())
        for y in range(Y):
            for x in range(X):
                if (x, y, 0) in available:
                    ## horizontal
                    if (x - 1, y, 0) in available \
                       and (x + 1, y, 0) in available \
                       and ((x, y - 1, 0) in available \
                            or (x, y + 1, 0) in available):
                        t_tiles.append(((x - 1, y, 0), (x + 1, y, 0)))
                    ## vertical
                    if (x, y - 1, 0) in available \
                       and (x, y + 1, 0) in available \
                       and ((x - 1, y, 0) in available \
                            or (x + 1, y, 0) in available):
                        t_tiles.append(((x, y - 1, 0), (x, y + 1, 0)))
        return t_tiles

    def _get_s_tiles(self):
        """
        Return all the square tiles (area=4) no the map.
        """
        Y, X = self.env.get_dims()
        s_tiles = []
        available = set(self.env.get_available_grids())
        neighbors = [(0, 0), (1, 0), (0, 1), (1, 1)]
        for y in range(Y):
            for x in range(X):
                flag = True
                for n in neighbors:
                    nx = x + n[0]
                    ny = y + n[1]
                    if not (nx, ny, 0) in available:
                        flag = False
                        break
                if flag:  # the whole square is empty
                    s_tiles.append(((x, y, 0), (x + 1, y, 0)))
                    s_tiles.append(((x, y + 1, 0), (x + 1, y + 1, 0)))
                    s_tiles.append(((x, y, 0), (x + 1, y + 1, 0)))
                    s_tiles.append(((x + 1, y, 0), (x, y + 1, 0)))
        return s_tiles

    def _get_l_tiles(self):
        """
        Return all the l-shaped tiles (length=3) no the map
        """
        Y, X = self.env.get_dims()
        l_tiles = []
        available = set(self.env.get_available_grids())

        def test_triple(p1, p2, p3):
            if p1 in available:
                if p2 in available and p3 in available:
                    l_tiles.append((p1, p2))
                    l_tiles.append((p2, p3))

        for y in range(Y):
            for x in range(X):
                test_triple((x, y, 0), (x, y + 1, 0), (x, y + 2, 0))
                test_triple((x, y, 0), (x + 1, y, 0), (x + 2, y, 0))
#                test_triple((x, y, 0), (x + 1, y + 1, 0), (x + 2, y + 2, 0))

        return l_tiles

    def _middle_loc(self, l1, l2, fl=False):
        half = 2.0 if fl else 2
        return ((l1[0] + l2[0]) / half, (l1[1] + l2[1]) / half,
                (l1[2] + l2[2]) / half)

    def _reachable(self, start, end):
        """
        Use BFS to determine that if location 'end' can be reached from location 'start'
        The obstacles are the wall blocks and goals on the current map.
        """
        if start == end:
            return True
        blocks = [b.loc for b in self._get_blocks()]
        goals = [g.loc for g in self._get_goals()]
        if end in goals:  # end could be occupied by a goal
            goals.remove(end)
        obstacles = blocks + goals
        assert not start in obstacles, "start pos should not be in obstacles"
        Y, X = self.env.get_dims()
        return (bfs(start, end, X, Y, obstacles) is not None)

    def _propagate_agent(self, seeds, inclusive=False):
        """
        Given a list of goals, propagate them through the maze to find possible agent
        positions
        """
        obstacles = [b.loc for b in self._get_blocks()]
        goals = [g.loc for g in self._get_goals()]
        Y, X = self.env.get_dims()
        filled = flood_fill(seeds, obstacles + goals, X, Y)
        if inclusive:
            filled += [(s, 0) for s in seeds]
        return sorted(filled, key=lambda f: f[1])

    def _find_curriculum_agent_pos(self, targets, max_dist, inclusive=False):
        new_a = self._propagate_agent(targets, inclusive=inclusive)
        assert new_a
        if max_dist > 0:
            for i, na in enumerate(new_a):
                if na[1] >= max_dist:
                    break
            new_a = new_a[:i + 1]
        new_a, _ = random.choice(new_a)
        return new_a

    ############# public APIs #############
    def reset(self):
        self.steps_in_cur_task = 0
        self.target = None
        self.answer = ""

    def get_event(self):
        """
        Return the triggered event at the current time step
        Automatically reset the event to empty after getting
        """
        ret = self.event
        self.event = ""
        return ret

    def obtain_performance(self):
        return (self.num_successes, self.num_failures, self.success_steps)

    def print_grammar(self):
        self.cfg.show()

    def total_possible_sentences(self):
        """
        Return the number of total possible sentences *given* the current
        bindings.
        """
        return self.cfg.total_possible_sentences()

    def conversation_wrapup(self):
        """
        This dummpy stage simply adds an additional time step after the
        conversation is over, which enables the agent to learn language model
        from teacher's last sentence.
        """
        ### prev_event should have been recorded by self._record_event(); otherwise crash
        self._record_event(self.prev_event)
        self.prev_event = None
        return ["idle", 0, ""]

    def terminal(self):
        return ["terminal", 0, ""]

    def simple_recognition_reward(self):
        """
        A simple recognition reward stage. It gives reward according to the
        single-word answer. The agent has to exactly match the answer word.
        """
        reward = XWorld3DTask.time_penalty
        _, agent_sent, _ = self._get_agent()

        collisions = self._parse_collision_event(self.env.game_event)
        if collisions:
            reward += XWorld3DTask.collision_penalty

        self.steps_in_cur_task += 1

        session_end = True
        if self.steps_in_cur_task >= XWorld3DTask.max_steps / 2:
            self._record_failure()
            self._record_event("time_up", next=True)
        elif agent_sent != "-":  # if the agent answers
            if agent_sent == self.answer:
                reward += XWorld3DTask.correct_reward
                self._record_success()
                self._record_event("correct_reply", next=True)
            else:
                reward += XWorld3DTask.wrong_reward
                self._record_failure()
                self._record_event("wrong_reply", next=True)
        else:
            session_end = False

        if session_end:
            self._bind("S -> answer")
            self._set_production_rule("answer -> '%s'" % self.answer)
            self.sentence = self._generate()
            next_stage = "conversation_wrapup"
        else:
            self.sentence = ""
            next_stage = "simple_recognition_reward"

        return [next_stage, reward, self.sentence]

    def _reach_object(self, agent, yaw, object):
        collisions = self._parse_collision_event(self.env.game_event)
        theta, _, _ = self._get_direction_and_distance(agent, object.loc, yaw)
        return abs(
            theta) < self.orientation_threshold and object.id in collisions

    def _successful_goal(self, reward):
        self._record_success()
        self._record_event("correct_goal")
        reward += XWorld3DTask.correct_reward
        self._bind("S -> correct")
        self.sentence = self._generate()
        return reward

    def _failed_goal(self, reward):
        self._record_failure()
        self._record_event("wrong_goal")
        reward += XWorld3DTask.wrong_reward
        self._bind("S -> wrong")
        self.sentence = self._generate()
        return reward

    def _time_reward(self):
        reward = XWorld3DTask.time_penalty
        self.steps_in_cur_task += 1
        h, w = self.env.get_dims()
        if self.steps_in_cur_task >= h * w * XWorld3DTask.navigation_max_steps_factor:
            self._record_failure()
            self._bind("S -> timeup")
            self.sentence = self._generate()
            self._record_event("time_up")
            return (reward, True)
        return (reward, False)

    ############ functions that wrap self.env and self.cfg #############
    def _list_of_strs_to_rhs(self, strs):
        """
        Converting a list of strings to a string as the right hand side of a
        production rule. For example, if the list is ['foo', 'bar'], then the
        conversion result is "'foo' | 'bar'"
        """
        return "|".join(["'" + s + "'" for s in strs])

    def _get_all_goal_names_as_rhs(self):
        """
        Get all possible goal names in xworld and convert the list to a rhs string
        """
        return self._list_of_strs_to_rhs(
            self.env.get_all_possible_names("goal"))

    def _get_all_directions_as_rhs(self):
        """
        Get all possible spatial-relation words in xworld and convert the list to a rhs string
        """
        return self._list_of_strs_to_rhs(self._get_all_directions())

    def _get_all_colors_as_rhs(self):
        """
        Get all possible color words in xworld and convert the list to a rhs string
        """
        return self._list_of_strs_to_rhs(self._get_all_colors())

    def _get_goals(self):
        """
        Get all the goals on the current map
        """
        return self.env.get_goals()

    def _get_blocks(self):
        """
        Get all the blocks on the current map
        """
        return self.env.get_blocks()

    def _get_entities(self):
        """
        Get all the entities on the current map
        """
        return self.env.get_entities()

    def _color_defined(self, color):
        """
        Decide if a color is defined or not. An undefined color is "na"
        """
        return color != "na"

    def _get_colored_goals(self):
        """
        Get all the goals on the current map that have defined colors
        """
        return [g for g in self._get_goals() if self._color_defined(g.color)]

    def _get_agent(self):
        """
        Get the agent information; see XWorld3DEnv.get_agent()
        """
        return self.env.get_agent()

    def _set_entity_inst(self, e):
        self.env.set_entity_inst(e)

    def _move_entity(self, e, loc):
        self.env.move_entity(e, loc)

    def _delete_entity(self, e):
        self.env.delete_entity(e)

    def _bind(self, binding_str):
        """
        Bind a production rule; see CFG.bind() for details
        """
        self.cfg.bind(binding_str)

    def _generate(self):
        """
        Generate a sentence according the grammar and current bindings;
        see CFG.generate() for details
        """
        return self.cfg.generate()

    def _generate_all(self):
        """
        Generate all possible sentences given current bindings; in this case,
        the bindings are not necessary.
        See CFG.generate_all() for details.
        """
        return self.cfg.generate_all()

    def _set_production_rule(self, rule):
        """
        Set a new production rule or overwrite an existing one.
        See CFG.set_production_rule() for details.
        """
        self.cfg.set_production_rule(rule)
示例#18
0
 def __init__(self, asset_path, start_level=0):
     super(XWorld3DNav, self).__init__(asset_path=asset_path,
                                       max_height=7,
                                       max_width=7)
     self.curriculum = get_flag("curriculum")
     self.current_level = start_level
示例#19
0
    def set_property(self, entity, property_value_dict={}):
        """
        Reinstantiate the specified properties of an existing entity.
        Properties and corresponding values are specified by the property_value_dict.
        There are three use cases:
        1) if no property is specified (e.g. property_value_dict={}), all properties will
           be reinstantiated.
        2) If None value is provided for a specified property (e.g. {"name" : None}),
           a valid random value will be sampled for the specified property and all the rest 
           properties will be reinstantiated randomly.
        3) If False value is provided for a specified property (e.g. {"loc" : False}),
           this property remains unchanged while other properties will be reinstantiated.
        Mode 2) and 3) can be used together (e.g. {"name" : None, "loc" : False})).
        """
        def check_or_get_value(valid_value_set, is_continuous=False):
            """
            Check if the given value of the specified property is a valid one, or randomly
            select one from the valid value set if value is None, and return the value.
            is_continuous denotes whenther the value is continuous (True) or discrete (False).
            """
            if not is_continuous:
                if value is None:
                    assert len(valid_value_set) > 0, \
                        "invalid value set for property %s is provided" % property
                    return random.choice(valid_value_set)
                else:
                    assert value in valid_value_set, \
                        "invalid value for property %s is provided" % property
                    return value
            else:
                if value is None:
                    assert len(valid_value_set) == 2 and valid_value_set[0] < valid_value_set[1], \
                        "invalid value range for property %s is provided" % property
                    return random.uniform(valid_value_set)
                else:
                    assert value >= valid_value_set[0] and value <= valid_value_set[1], \
                        "invalid value for property %s is provided" % property
                    return value

        default_dict = OrderedDict.fromkeys(["name", \
                                             "loc", \
                                             "asset_path", \
                                             "yaw", \
                                             "scale", \
                                             "offset"])
        context_dict = default_dict.copy()
        context_dict.update(property_value_dict)

        for property in context_dict:
            assert property in entity.__dict__.keys() and property in default_dict.keys(), \
                "invalid property name: %s is provided" % property
            value = context_dict[property]
            if value == False:  # should only be used for properties with values
                assert entity.__dict__[
                    property], "no existing value for property %s" % property
                continue
            if property == "loc":
                entity.loc = check_or_get_value(self.available_grids)
            if property == "name":
                entity.name = check_or_get_value(
                    self.get_all_possible_names(entity.type))
                # update id once name is changed
                entity.id = "%s_%s" % (entity.name,
                                       self.entity_nums[entity.type])
            if property == "asset_path" or property == "name":
                # update the asset_path once the name is changed
                entity.asset_path = check_or_get_value(
                    self.items[entity.type][entity.name])
                # color is coupled with asset_path
                if entity.asset_path in self.color_table.keys():
                    entity.color = self.color_table[entity.asset_path]
                else:
                    entity.color = "na"
            if property == "yaw" and get_flag(
                    "visible_radius") and entity.type != "block":
                ## if partially observed, perturb the objects
                yaw_range = range(-1, 3)
                entity.yaw = check_or_get_value(yaw_range) * self.PI_2
            if property == "scale" and get_flag(
                    "visible_radius") and entity.type == "goal":
                scale_range = [0.5, 1]
                entity.scale = check_or_get_value(scale_range,
                                                  is_continuous=True)
            if property == "offset" and get_flag(
                    "visible_radius") and entity.type == "goal":
                offset_range = [
                    0, 1 - (entity.scale if hasattr(entity, 'scale') else 0.5)
                ]
                entity.offset = check_or_get_value(offset_range,
                                                   is_continuous=True)

        self.changed = True