def idle(self): if random.uniform(0, 1) < 0.9: ## tell goal rec_goal = True else: rec_goal = False goal_pairs = self._get_between_pair_goals() triplets = [] for g1, g2 in goal_pairs: flag = False for g in self._get_goals(): if g.loc == (g1.loc[0] + 1, g1.loc[1]): flag = True if rec_goal: triplets.append((g1.name, g2.name, g.name)) if not rec_goal and not flag: triplets.append((g1.name, g2.name, "nothing")) if triplets: g1_name, g2_name, name = random.choice(triplets) self._bind("S -> question") if random.uniform(0, 1) < 0.5: g1_name, g2_name = g2_name, g1_name self._bind("Z -> '%s'" % g1_name) self._bind("T -> '%s'" % g2_name) sentence = self._generate() if get_flag("task_mode") == "arxiv_lang_acquisition": return ["idle", 0, sentence + " " + name] else: self._record_answer(name) return ["simple_recognition_reward", 0, sentence] return ["idle", 0, ""]
def __init__(self, item_path, start_level=0): super(XWorldNav, self).__init__(item_path=item_path, max_height=8, max_width=8, start_level=start_level, maze_generation=True) self.curriculum = get_flag("curriculum")
def idle(self): found = False if random.uniform(0, 1) < 0.9: ## tell goal goals = self._get_surrounding_goals() if goals: grid, direction = random.choice(goals) grid = grid.name found = True else: ## tell empty grid empty_grids = self._get_surrounding_empty_grids() if empty_grids: _, direction = random.choice(empty_grids) grid = "nothing" found = True if found: self._bind("S -> question") self._bind("D -> '%s'" % direction) sentence = self._generate() if get_flag("task_mode") == "lang_acquisition": return ["idle", 0, sentence + " " + grid] else: self._record_answer(grid) return ["simple_recognition_reward", 0, sentence] return ["idle", 0, ""]
def __init__(self, item_path): super(XWorldNav, self).__init__( item_path=item_path, max_height=5, max_width=5) self.curriculum = get_flag("curriculum") self.current_level = 0
def idle(self): goals = self._get_goals() found = False if random.uniform(0, 1) < 0.9: ## tell goal goals = [(g, random.choice(self._get_surrounding_goals(g.loc))) \ for g in goals if self._get_surrounding_goals(g.loc)] if goals: goal, goal2 = random.choice(goals) grid = goal2[0].name direction = goal2[1] found = True else: ## tell empty grid empty_grids = [(g, random.choice(self._get_surrounding_empty_grids(g.loc))) \ for g in goals if self._get_surrounding_empty_grids(g.loc)] if empty_grids: goal, goal2 = random.choice(empty_grids) grid = "nothing" direction = goal2[1] found = True if found: self._bind("S -> question") self._bind("D -> '%s'" % direction) self._bind("Z -> '%s'" % goal.name) sentence = self._generate() if get_flag("task_mode") == "lang_acquisition": return ["idle", 0, sentence + " " + grid] else: self._record_answer(grid) return ["simple_recognition_reward", 0, sentence] return ["idle", 0, ""]
def idle(self): goal_pairs = self._get_between_pair_goals() agent, _, _ = self._get_agent() triplets = [] for g1, g2 in goal_pairs: d = self._get_direction(agent.loc, (g1.loc[0] + 1, g1.loc[1])) if d != "": triplets.append((g1.name, g2.name, d)) if triplets: g1_name, g2_name, direction = random.choice(triplets) self._bind("S -> question") if random.uniform(0, 1) < 0.5: g1_name, g2_name = g2_name, g1_name self._bind("O -> '%s'" % g1_name) self._bind("T -> '%s'" % g2_name) sentence = self._generate() if get_flag("task_mode") == "arxiv_lang_acquisition": # supervised; forward compatible return ["idle", 0, sentence + " " + direction] else: self._record_answer( direction) # record answer for reward stage return ["simple_recognition_reward", 0, sentence] # RL else: return ["idle", 0, ""]
def __init__(self, env): ## define all the spatial relations self.directions = { (self.PI_2 - self.PI_8, self.PI_2 + self.PI_8): "right", (-self.PI_2 - self.PI_8, -self.PI_2 + self.PI_8): "left", (-self.PI_8, self.PI_8): "front", # two intervals of "back" cannot be merged, so we need to specify # them separately (self.PI - self.PI_8, self.PI): "back", (-self.PI, -self.PI + self.PI_8): "back", (self.PI_8, self.PI_4 + self.PI_8): "front-right", (self.PI_2 + self.PI_8, self.PI - self.PI_8): "back-right", (-self.PI_2 + self.PI_8, -self.PI_8): "front-left", (-self.PI + self.PI_8, -self.PI_2 - self.PI_8): "back-left" } self.distance_threshold = get_flag("x3_reaching_distance") self.orientation_threshold = self.PI_4 self.env = env self.event = "" self.num_successes = 0 self.num_failures = 0 self.reset() self.cfg = CFG(*self._define_grammar()) self.sentence = "" self.failure_recorded = False
def idle(self): goals = self._get_colored_goals() if goals: goal = random.choice(goals) self._bind("S -> question") self._bind("C -> '%s'" % goal.color) sentence = self._generate() if get_flag("task_mode") == "arxiv_lang_acquisition": return ["idle", 0, sentence + " " + goal.name] else: self._record_answer(goal.name) return ["simple_recognition_reward", 0, sentence] return ["idle", 0, ""]
def idle(self): goals = self._get_surrounding_goals() goals = [g for g in goals if self._color_defined(g[0].color)] if goals: goal, direction = random.choice(goals) self._bind("S -> question") self._bind("D -> '%s'" % direction) sentence = self._generate() if get_flag("task_mode") == "arxiv_lang_acquisition": return ["idle", 0, sentence + " " + goal.color] else: self._record_answer(goal.color) return ["simple_recognition_reward", 0, sentence] return ["idle", 0, ""]
def idle(self): goals = self._get_colored_goals() if goals: goal = random.choice(goals) self._bind("S -> question") self._bind("G -> '%s'" % goal.name) sentence = self._generate() if get_flag("task_mode") == "arxiv_lang_acquisition": # supervised; forward compatible return ["idle", 0, sentence + " " + goal.color] else: self._record_answer( goal.color) # record answer for reward stage return ["simple_recognition_reward", 0, sentence] # RL else: return ["idle", 0, ""]
def idle(self): goals = self._get_surrounding_goals() goals = [g for g in goals if self._color_defined(g[0].color)] if goals: goal, direction = random.choice(goals) self._bind("S -> question") self._bind("C -> '%s'" % goal.color) sentence = self._generate() if get_flag("task_mode") == "lang_acquisition": # supervised; forward compatible return ["idle", 0, sentence + " " + direction] else: self._record_answer( direction) # record answer for reward stage return ["simple_recognition_reward", 0, sentence] # RL else: return ["idle", 0, ""]
def idle(self): goals = self._get_colored_goals() goals = [(random.choice(self._get_surrounding_goals(g.loc)), g) \ for g in goals if self._get_surrounding_goals(g.loc)] if goals: goal, color_goal = random.choice(goals) goal, _ = goal direction = self._get_direction(goal.loc, color_goal.loc) self._bind("S -> question") self._bind("D -> '%s'" % direction) self._bind("O -> '%s'" % goal.name) sentence = self._generate() if get_flag("task_mode") == "lang_acquisition": # supervised; forward compatible return ["idle", 0, sentence + " " + color_goal.color] else: self._record_answer(color_goal.color) # record answer for reward stage return ["simple_recognition_reward", 0, sentence] # RL else: return ["idle", 0, ""]
def simple_navigation_reward(self): """ A simple navigation reward stage. If the agent reaches the correct goal, it gets a positive reward. If it steps on an incorrect goal, it gets a negative reward. There will also be penalties if it has a failed action. The task returns to 'idle' stage when the correct goal is reached or the time is up for this task. The time-up setting is only for "one_channel" mode. """ reward = XWorldTask.time_penalty agent, _, action_successful = self._get_agent() if not action_successful: reward += XWorldTask.failed_action_penalty goal_locs = [g.loc for g in self._get_goals()] next_stage = "simple_navigation_reward" sentence = "" self.steps_in_cur_task += 1 h, w = self.env.get_max_dims() if get_flag("task_mode") == "one_channel" \ and self.steps_in_cur_task >= h*w / 2: self.steps_in_cur_task = 0 self._record_failure() self._bind("S -> timeup") next_stage = "idle" sentence = self._generate() elif agent.loc == self.target: self.steps_in_cur_task = 0 self._record_success() self._record_event("correct_goal") reward += XWorldTask.correct_reward self._bind("S -> finish") next_stage = "idle" sentence = self._generate() elif agent.loc in goal_locs: reward += XWorldTask.wrong_reward return [next_stage, reward, sentence]
def idle(self): goal_pairs = self._get_between_pair_goals() color_goals = self._get_colored_goals() goal_rows = [] for g in color_goals: for g1, g2 in goal_pairs: if g.loc == (g1.loc[0] + 1, g1.loc[1]): goal_rows.append((g1.name, g2.name, g.color)) if goal_rows: g1_name, g2_name, color = random.choice(goal_rows) self._bind("S -> question") if random.uniform(0, 1) < 0.5: g1_name, g2_name = g2_name, g1_name self._bind("O -> '%s'" % g1_name) self._bind("T -> '%s'" % g2_name) sentence = self._generate() if get_flag("task_mode") == "arxiv_lang_acquisition": return ["idle", 0, sentence + " " + color] else: self._record_answer(color) return ["simple_recognition_reward", 0, sentence] return ["idle", 0, ""]
def set_property(self, entity, property_value_dict={}): """ Reinstantiate the specified properties of an existing entity. Properties and corresponding values are specified by the property_value_dict in the form of {property : value, ...}, e.g. {"name" : "apple", "loc" : (0, 0)}. value could be None (force reinstantiation) or a valid value for that property. 1) If None value is provided for a specified property (e.g. {"name" : None}), entity property will be reinstantiated regardless of its original value. 2) otherwise, the value will be assigned to that property of the entity. 3) all unset entity properties will be instantiated. """ default_dict = OrderedDict.fromkeys(["name", \ "loc", \ "asset_path", \ "yaw", \ "scale", \ "offset"]) pv_dict = default_dict.copy() pv_dict.update(property_value_dict) # pre-processing for name and asset_path due to their dependency path_value = property_value_dict.get("asset_path", "empty") name_value = property_value_dict.get("name", "empty") # if name is specified by user but asset_path is unspecified if name_value != "empty" and path_value == "empty": property_value_dict["asset_path"] = None # if asset_path is specified if path_value != "empty": # if name is not specified then derive name from asset_path if name_value == "empty": names = [ n for n in self.items[entity.type] if path_value in set(self.items[entity.type][n]) ] assert len( names ) == 1, "there should be only one name corresponding to the asset_path: %s" % ( path_value) property_value_dict["name"] = names[0] pv_dict.update(property_value_dict) else: # if both name and asset_path are specified, check consistency assert path_value in self.items[entity.type][name_value], \ "specified name: %s and asset_path: %s mis-match" % (name_value, path_value) for property in pv_dict: assert property in entity.__dict__.keys() and property in default_dict.keys(), \ "invalid property name: %s is provided" % property value = pv_dict[property] # skip unspecified and non-empty entity properties if property not in property_value_dict.keys( ) and entity.__dict__[property] is not None: continue if property == "loc": if entity.loc is not None: self.available_grids.append(entity.loc) entity.loc = check_or_get_value(value, self.available_grids) self.available_grids.remove(entity.loc) if property == "name": entity.name = check_or_get_value( value, self.get_all_possible_names(entity.type)) # update id once name is changed entity.id = "%s_%s" % (entity.name, self.entity_nums[entity.type]) if property == "asset_path": entity.asset_path = check_or_get_value( value, self.items[entity.type][entity.name]) # color is coupled with asset_path if entity.asset_path in self.color_table.keys(): entity.color = self.color_table[entity.asset_path] else: entity.color = "na" if property == "yaw" and get_flag( "visible_radius") and entity.type != "block": ## if partially observed, perturb the objects yaw_range = range(-1, 3) entity.yaw = check_or_get_value(value, yaw_range) * self.PI_2 if property == "scale" and get_flag( "visible_radius") and entity.type == "goal": scale_range = [0.5, 1] entity.scale = check_or_get_value(value, scale_range, is_continuous=True) if property == "offset" and get_flag( "visible_radius") and entity.type == "goal": offset_range = [ 0, 1 - (entity.scale if hasattr(entity, 'scale') else 0.5) ] entity.offset = check_or_get_value(value, offset_range, is_continuous=True) self.changed = True
def set_property(self, entity, property_value_dict={}): """ Reinstantiate the specified properties of an existing entity. Properties and corresponding values are specified by the property_value_dict in the form of {property : value, ...}, e.g. {"name" : "apple", "loc" : (0, 0)}. value could be None (force reinstantiation) or a valid value for that property. 1) If None value is provided for a specified property (e.g. {"name" : None}), entity property will be reinstantiated regardless of its original value. 2) otherwise, the value will be assigned to that property of the entity. For the remaining not in property_value_dict: 3) all unset entity properties will be instantiated. 4) the already set properties will keep the same. Because name and asset_path have a dependency, we require that at most one of them is not None. a. When name is None but asset_path is not, then name will be uniquely determined from the path; b. When asset_path is None but name is not, then path will be randomly selected for the name; c. When both are None, both are randomly selected. """ pv_dict = entity.__dict__.copy() ## let the user overwrite the specified pv_dict.update(property_value_dict) ## pre-processing for name and asset_path due to their dependency path_value = pv_dict["asset_path"] name_value = pv_dict["name"] if path_value is not None: assert name_value is None, "With asset_path, you don't have to set name" names = [n for n in self.items[entity.type] \ if path_value in self.items[entity.type][n]] assert len(names) == 1, \ "each asset_path corresponds to only one name: %s" % (path_value) pv_dict["name"] = names[0] # else: do nothing; asset_path will be set later ## set each key in entity.__dict__.keys() if entity.loc is not None: self.available_grids.append(entity.loc) entity.loc = check_or_get_value(pv_dict["loc"], self.available_grids) self.available_grids.remove(entity.loc) ## entity.name = check_or_get_value( pv_dict["name"], self.get_all_possible_names(entity.type)) entity.id = "%s_%d" % (entity.name, self.running_id) self.running_id += 1 ## entity.asset_path = check_or_get_value( pv_dict["asset_path"], self.items[entity.type][entity.name]) # color is coupled with asset_path if entity.asset_path in self.color_table.keys(): entity.color = self.color_table[entity.asset_path] else: entity.color = "na" ## if get_flag("visible_radius"): if entity.type == "agent": yaw_range = range(-1, 3) entity.yaw = check_or_get_value(pv_dict["yaw"], yaw_range) * self.PI_2 if entity.type == "goal": ## if partially observed, perturb the objects yaw_range = [0, self.PI_2 * 4] entity.yaw = check_or_get_value(pv_dict["yaw"], yaw_range, is_continuous=True) ## scale_range = [0.5, 1] entity.scale = check_or_get_value(pv_dict["scale"], scale_range, is_continuous=True) ## offset_range = [0, 1 - entity.scale] entity.offset = check_or_get_value(pv_dict["offset"], offset_range, is_continuous=True) self.changed = True
class XWorld3DTask(object): ## some static class variables ## that shoule be shared by all derived classes time_penalty = -0.01 correct_reward = 1.0 wrong_reward = -1.0 collision_penalty = 0.0 failed_action_penalty = -0.1 navigation_max_steps_factor = get_flag("max_steps_factor") PI = 3.1415926 PI_2 = PI / 2 # 90 PI_4 = PI / 4 # 45 PI_8 = PI / 8 # 22.5 PI_12 = PI / 12 # 15 ## the window size for recording the performance performance_window_size = 200 def __init__(self, env): ## define all the spatial relations self.directions = { (self.PI_2 - self.PI_8, self.PI_2 + self.PI_8): "right", (-self.PI_2 - self.PI_8, -self.PI_2 + self.PI_8): "left", (-self.PI_8, self.PI_8): "front", # two intervals of "behind" cannot be merged, so we need to specify # them separately (self.PI - self.PI_8, self.PI): "behind", (-self.PI, -self.PI + self.PI_8): "behind", (self.PI_8, self.PI_4 + self.PI_8): "front-right", (self.PI_2 + self.PI_8, self.PI - self.PI_8): "behind-right", (-self.PI_2 + self.PI_8, -self.PI_8): "front-left", (-self.PI + self.PI_8, -self.PI_2 - self.PI_8): "behind-left" } self.orientation_threshold = self.PI_4 self.env = env self.event = "" self.success_seq = [] self.num_successes = 0 self.num_failures = 0 self.success_steps = 0 self.reset() self.cfg = CFG(*self._define_grammar()) self.sentence = "" ################ internal functions #################### def _define_grammar(self): """ The derived class can override this function to define a grammar for the teacher. To do so, the function should return a tuple of a grammar string and a start symbol. By default the grammar will be empty and the teacher generates a empty sentence each time step. """ return "", "" def _get_all_directions(self): """ Return all the spatial-relation words in xworld. """ return self.directions.values() def _get_all_colors(self): """ Return all colors defined in xworld. """ return self.env.get_all_colors() def _get_direction_and_distance(self, p1, p2, p1_yaw=None): """ Get the direciton of p2 wrt p1's yaw, and the distance from p1 to p2. Return: theta: relative angle from p2 to p1 wrt p1's yaw dist: distance from p1 to p2 direction: name of the direction """ dx = p2[0] - p1[0] dy = p2[1] - p1[1] dist = sqrt(dx**2 + dy**2) if p1_yaw is None: return dist if dist == 0: return 0, 0, "" else: v1 = (cos(p1_yaw), sin(p1_yaw)) v2 = (dx / dist, dy / dist) # theta is the angle from p2 to p1 wrt p1's orientation cos_theta = max(-1, min(1, v1[0] * v2[0] + v1[1] * v2[1])) sin_theta = max(-1, min(1, v1[1] * v2[0] - v1[0] * v2[1])) theta = acos(cos_theta) * copysign(1, asin(sin_theta)) direction = "" for r in self.directions.keys(): if (theta >= r[0] and theta < r[1]): direction = self.directions[r] return theta, dist, direction def _get_distance(self, p1, p2): return sqrt((p2[0] - p1[0])**2 + (p2[1] - p1[1])**2) def __record_result(self, res): self.success_seq.append(res) if len(self.success_seq) > XWorld3DTask.performance_window_size: self.success_seq.pop(0) self._record_env_usage() def _record_success(self): self.__record_result(1) self.num_successes += 1 self.success_steps += self.steps_in_cur_task def _record_failure(self): self.__record_result(0) self.num_failures += 1 def _record_env_usage(self): self.env.record_environment_usage(self.__class__.__name__, self.success_seq) def _record_answer(self, answer): """ Record the answer for later evaluation and rewarding """ self.answer = answer def _record_target(self, target): """ Record the navigation target """ self.target = target def _record_event(self, event, next=False): """ Record an event at every time step if necessary Every event has a lifespan of only one time step """ if not next: self.event = event else: self.prev_event = event def _parse_collision_event(self, events): """ Extract collision events from game event message """ collision_events = [ e for e in events.strip().split('\n') if e.startswith("collision") ] hits = set() for e in collision_events: o = e.split(':')[1].split('|') hits.update(o) return hits def __within_boundary(self, loc): """ Determine if a location is out of boundary of the map """ h, w = self.env.get_dims() x, y = loc return y >= 0 and y < h and x >= 0 and x < w def _get_surrounding_goals(self, distance_threshold=1.5, refer=None): """ Given a reference, return all goals within a circular neighborhood with a radius of distance_threshold (excluding the refer itself) Note that distance_threshold is inclusive """ goals = self._get_goals() if refer is None: refer, _, _ = self._get_agent() refer = refer.loc ret = [] for g in goals: if g.loc == refer: continue dist = self._get_direction_and_distance(refer, g.loc) if dist < distance_threshold + 1e-3: ret.append(g) return ret def _get_surrounding_empty_grids(self, distance_threshold=1.5, refer=None): """ Given a reference location, return all empty grids in its neighborhood (excluding the refer itself) """ if refer is None: refer, _, _ = self._get_agent() refer = refer.loc ret = [] for g in self.env.get_available_grids(): if g == refer: ## refer itself might be empty continue dist = self._get_direction_and_distance(refer, g) if dist < distance_threshold + 1e-3: ret.append(g) return ret def _get_p_tiles(self): """ Return all the "pair" tiles (length=2) on the map. """ Y, X = self.env.get_dims() p_tiles = [] available = set(self.env.get_available_grids()) def test_pair(p1, p2): if p1 in available and p2 in available: around_p2 = self._get_surrounding_empty_grids( distance_threshold=1.0, refer=p2) if set(around_p2) - set([p1]): p_tiles.append((p1, p2)) around_p1 = self._get_surrounding_empty_grids( distance_threshold=1.0, refer=p1) if set(around_p1) - set([p2]): p_tiles.append((p2, p1)) for y in range(Y): for x in range(X): test_pair((x, y, 0), (x + 1, y, 0)) test_pair((x, y, 0), (x, y + 1, 0)) test_pair((x, y, 0), (x + 1, y + 1, 0)) return p_tiles def _get_t_tiles(self): """ Return all the empty T-shaped tiles on the map. For our purpose, only return the two ends of the longer segment (length=3). """ Y, X = self.env.get_dims() t_tiles = [] available = set(self.env.get_available_grids()) for y in range(Y): for x in range(X): if (x, y, 0) in available: ## horizontal if (x - 1, y, 0) in available \ and (x + 1, y, 0) in available \ and ((x, y - 1, 0) in available \ or (x, y + 1, 0) in available): t_tiles.append(((x - 1, y, 0), (x + 1, y, 0))) ## vertical if (x, y - 1, 0) in available \ and (x, y + 1, 0) in available \ and ((x - 1, y, 0) in available \ or (x + 1, y, 0) in available): t_tiles.append(((x, y - 1, 0), (x, y + 1, 0))) return t_tiles def _get_s_tiles(self): """ Return all the square tiles (area=4) no the map. """ Y, X = self.env.get_dims() s_tiles = [] available = set(self.env.get_available_grids()) neighbors = [(0, 0), (1, 0), (0, 1), (1, 1)] for y in range(Y): for x in range(X): flag = True for n in neighbors: nx = x + n[0] ny = y + n[1] if not (nx, ny, 0) in available: flag = False break if flag: # the whole square is empty s_tiles.append(((x, y, 0), (x + 1, y, 0))) s_tiles.append(((x, y + 1, 0), (x + 1, y + 1, 0))) s_tiles.append(((x, y, 0), (x + 1, y + 1, 0))) s_tiles.append(((x + 1, y, 0), (x, y + 1, 0))) return s_tiles def _get_l_tiles(self): """ Return all the l-shaped tiles (length=3) no the map """ Y, X = self.env.get_dims() l_tiles = [] available = set(self.env.get_available_grids()) def test_triple(p1, p2, p3): if p1 in available: if p2 in available and p3 in available: l_tiles.append((p1, p2)) l_tiles.append((p2, p3)) for y in range(Y): for x in range(X): test_triple((x, y, 0), (x, y + 1, 0), (x, y + 2, 0)) test_triple((x, y, 0), (x + 1, y, 0), (x + 2, y, 0)) # test_triple((x, y, 0), (x + 1, y + 1, 0), (x + 2, y + 2, 0)) return l_tiles def _middle_loc(self, l1, l2, fl=False): half = 2.0 if fl else 2 return ((l1[0] + l2[0]) / half, (l1[1] + l2[1]) / half, (l1[2] + l2[2]) / half) def _reachable(self, start, end): """ Use BFS to determine that if location 'end' can be reached from location 'start' The obstacles are the wall blocks and goals on the current map. """ if start == end: return True blocks = [b.loc for b in self._get_blocks()] goals = [g.loc for g in self._get_goals()] if end in goals: # end could be occupied by a goal goals.remove(end) obstacles = blocks + goals assert not start in obstacles, "start pos should not be in obstacles" Y, X = self.env.get_dims() return (bfs(start, end, X, Y, obstacles) is not None) def _propagate_agent(self, seeds, inclusive=False): """ Given a list of goals, propagate them through the maze to find possible agent positions """ obstacles = [b.loc for b in self._get_blocks()] goals = [g.loc for g in self._get_goals()] Y, X = self.env.get_dims() filled = flood_fill(seeds, obstacles + goals, X, Y) if inclusive: filled += [(s, 0) for s in seeds] return sorted(filled, key=lambda f: f[1]) def _find_curriculum_agent_pos(self, targets, max_dist, inclusive=False): new_a = self._propagate_agent(targets, inclusive=inclusive) assert new_a if max_dist > 0: for i, na in enumerate(new_a): if na[1] >= max_dist: break new_a = new_a[:i + 1] new_a, _ = random.choice(new_a) return new_a ############# public APIs ############# def reset(self): self.steps_in_cur_task = 0 self.target = None self.answer = "" def get_event(self): """ Return the triggered event at the current time step Automatically reset the event to empty after getting """ ret = self.event self.event = "" return ret def obtain_performance(self): return (self.num_successes, self.num_failures, self.success_steps) def print_grammar(self): self.cfg.show() def total_possible_sentences(self): """ Return the number of total possible sentences *given* the current bindings. """ return self.cfg.total_possible_sentences() def conversation_wrapup(self): """ This dummpy stage simply adds an additional time step after the conversation is over, which enables the agent to learn language model from teacher's last sentence. """ ### prev_event should have been recorded by self._record_event(); otherwise crash self._record_event(self.prev_event) self.prev_event = None return ["idle", 0, ""] def terminal(self): return ["terminal", 0, ""] def simple_recognition_reward(self): """ A simple recognition reward stage. It gives reward according to the single-word answer. The agent has to exactly match the answer word. """ reward = XWorld3DTask.time_penalty _, agent_sent, _ = self._get_agent() collisions = self._parse_collision_event(self.env.game_event) if collisions: reward += XWorld3DTask.collision_penalty self.steps_in_cur_task += 1 session_end = True if self.steps_in_cur_task >= XWorld3DTask.max_steps / 2: self._record_failure() self._record_event("time_up", next=True) elif agent_sent != "-": # if the agent answers if agent_sent == self.answer: reward += XWorld3DTask.correct_reward self._record_success() self._record_event("correct_reply", next=True) else: reward += XWorld3DTask.wrong_reward self._record_failure() self._record_event("wrong_reply", next=True) else: session_end = False if session_end: self._bind("S -> answer") self._set_production_rule("answer -> '%s'" % self.answer) self.sentence = self._generate() next_stage = "conversation_wrapup" else: self.sentence = "" next_stage = "simple_recognition_reward" return [next_stage, reward, self.sentence] def _reach_object(self, agent, yaw, object): collisions = self._parse_collision_event(self.env.game_event) theta, _, _ = self._get_direction_and_distance(agent, object.loc, yaw) return abs( theta) < self.orientation_threshold and object.id in collisions def _successful_goal(self, reward): self._record_success() self._record_event("correct_goal") reward += XWorld3DTask.correct_reward self._bind("S -> correct") self.sentence = self._generate() return reward def _failed_goal(self, reward): self._record_failure() self._record_event("wrong_goal") reward += XWorld3DTask.wrong_reward self._bind("S -> wrong") self.sentence = self._generate() return reward def _time_reward(self): reward = XWorld3DTask.time_penalty self.steps_in_cur_task += 1 h, w = self.env.get_dims() if self.steps_in_cur_task >= h * w * XWorld3DTask.navigation_max_steps_factor: self._record_failure() self._bind("S -> timeup") self.sentence = self._generate() self._record_event("time_up") return (reward, True) return (reward, False) ############ functions that wrap self.env and self.cfg ############# def _list_of_strs_to_rhs(self, strs): """ Converting a list of strings to a string as the right hand side of a production rule. For example, if the list is ['foo', 'bar'], then the conversion result is "'foo' | 'bar'" """ return "|".join(["'" + s + "'" for s in strs]) def _get_all_goal_names_as_rhs(self): """ Get all possible goal names in xworld and convert the list to a rhs string """ return self._list_of_strs_to_rhs( self.env.get_all_possible_names("goal")) def _get_all_directions_as_rhs(self): """ Get all possible spatial-relation words in xworld and convert the list to a rhs string """ return self._list_of_strs_to_rhs(self._get_all_directions()) def _get_all_colors_as_rhs(self): """ Get all possible color words in xworld and convert the list to a rhs string """ return self._list_of_strs_to_rhs(self._get_all_colors()) def _get_goals(self): """ Get all the goals on the current map """ return self.env.get_goals() def _get_blocks(self): """ Get all the blocks on the current map """ return self.env.get_blocks() def _get_entities(self): """ Get all the entities on the current map """ return self.env.get_entities() def _color_defined(self, color): """ Decide if a color is defined or not. An undefined color is "na" """ return color != "na" def _get_colored_goals(self): """ Get all the goals on the current map that have defined colors """ return [g for g in self._get_goals() if self._color_defined(g.color)] def _get_agent(self): """ Get the agent information; see XWorld3DEnv.get_agent() """ return self.env.get_agent() def _set_entity_inst(self, e): self.env.set_entity_inst(e) def _move_entity(self, e, loc): self.env.move_entity(e, loc) def _delete_entity(self, e): self.env.delete_entity(e) def _bind(self, binding_str): """ Bind a production rule; see CFG.bind() for details """ self.cfg.bind(binding_str) def _generate(self): """ Generate a sentence according the grammar and current bindings; see CFG.generate() for details """ return self.cfg.generate() def _generate_all(self): """ Generate all possible sentences given current bindings; in this case, the bindings are not necessary. See CFG.generate_all() for details. """ return self.cfg.generate_all() def _set_production_rule(self, rule): """ Set a new production rule or overwrite an existing one. See CFG.set_production_rule() for details. """ self.cfg.set_production_rule(rule)
def __init__(self, asset_path, start_level=0): super(XWorld3DNav, self).__init__(asset_path=asset_path, max_height=7, max_width=7) self.curriculum = get_flag("curriculum") self.current_level = start_level
def set_property(self, entity, property_value_dict={}): """ Reinstantiate the specified properties of an existing entity. Properties and corresponding values are specified by the property_value_dict. There are three use cases: 1) if no property is specified (e.g. property_value_dict={}), all properties will be reinstantiated. 2) If None value is provided for a specified property (e.g. {"name" : None}), a valid random value will be sampled for the specified property and all the rest properties will be reinstantiated randomly. 3) If False value is provided for a specified property (e.g. {"loc" : False}), this property remains unchanged while other properties will be reinstantiated. Mode 2) and 3) can be used together (e.g. {"name" : None, "loc" : False})). """ def check_or_get_value(valid_value_set, is_continuous=False): """ Check if the given value of the specified property is a valid one, or randomly select one from the valid value set if value is None, and return the value. is_continuous denotes whenther the value is continuous (True) or discrete (False). """ if not is_continuous: if value is None: assert len(valid_value_set) > 0, \ "invalid value set for property %s is provided" % property return random.choice(valid_value_set) else: assert value in valid_value_set, \ "invalid value for property %s is provided" % property return value else: if value is None: assert len(valid_value_set) == 2 and valid_value_set[0] < valid_value_set[1], \ "invalid value range for property %s is provided" % property return random.uniform(valid_value_set) else: assert value >= valid_value_set[0] and value <= valid_value_set[1], \ "invalid value for property %s is provided" % property return value default_dict = OrderedDict.fromkeys(["name", \ "loc", \ "asset_path", \ "yaw", \ "scale", \ "offset"]) context_dict = default_dict.copy() context_dict.update(property_value_dict) for property in context_dict: assert property in entity.__dict__.keys() and property in default_dict.keys(), \ "invalid property name: %s is provided" % property value = context_dict[property] if value == False: # should only be used for properties with values assert entity.__dict__[ property], "no existing value for property %s" % property continue if property == "loc": entity.loc = check_or_get_value(self.available_grids) if property == "name": entity.name = check_or_get_value( self.get_all_possible_names(entity.type)) # update id once name is changed entity.id = "%s_%s" % (entity.name, self.entity_nums[entity.type]) if property == "asset_path" or property == "name": # update the asset_path once the name is changed entity.asset_path = check_or_get_value( self.items[entity.type][entity.name]) # color is coupled with asset_path if entity.asset_path in self.color_table.keys(): entity.color = self.color_table[entity.asset_path] else: entity.color = "na" if property == "yaw" and get_flag( "visible_radius") and entity.type != "block": ## if partially observed, perturb the objects yaw_range = range(-1, 3) entity.yaw = check_or_get_value(yaw_range) * self.PI_2 if property == "scale" and get_flag( "visible_radius") and entity.type == "goal": scale_range = [0.5, 1] entity.scale = check_or_get_value(scale_range, is_continuous=True) if property == "offset" and get_flag( "visible_radius") and entity.type == "goal": offset_range = [ 0, 1 - (entity.scale if hasattr(entity, 'scale') else 0.5) ] entity.offset = check_or_get_value(offset_range, is_continuous=True) self.changed = True