def get_holes(interpreter, speaker, location, limit=1, all_proximity=10) -> List[Tuple[XYZ, Hole]]: holes: List[Hole] = perception.get_all_nearby_holes( interpreter.agent, location) candidates: List[Tuple[XYZ, Hole]] = [ (to_block_pos(np.mean(hole[0], axis=0)), hole) for hole in holes ] if len(candidates) > 0: # NB(demiguo): by default, we fill the hole the player is looking at player = interpreter.memory.get_player_struct_by_name(speaker) centroid_hole = object_looked_at(interpreter.agent, candidates, player, limit=limit) if centroid_hole is None or len(centroid_hole) == 0: # NB(demiguo): if there's no hole in front of the player, we will fill the nearest hole speaker_pos = interpreter.memory.get_player_struct_by_name( speaker).pos speaker_pos = to_block_pos(pos_to_np(speaker_pos)) if limit == "ALL": return list( filter( lambda c: euclid_dist(c[0], speaker_pos) <= all_proximity, candidates)) else: candidates.sort(key=lambda c: euclid_dist(c[0], speaker_pos)) return candidates[:limit] else: return centroid_hole else: return []
def interpret_reference_object( interpreter, speaker, d, ignore_mobs=False, limit=1, loose_speakerlook=False) -> List[ReferenceObjectNode]: if d.get("coref_resolve", "NULL") != "NULL": mem = d["coref_resolve"] if isinstance(mem, ReferenceObjectNode): return [mem] else: logging.error("bad coref_resolve -> {}".format(mem)) if len(interpreter.progeny_data) == 0: tags = [ stemmer.stemWord(tag.lstrip("the ")) for key, tag in d.items() if key.startswith("has_") and isinstance(tag, str) ] candidates = (get_reference_objects(interpreter, *tags) if not ignore_mobs else get_objects(interpreter, *tags)) if len(candidates) > 0: location_d = d.get("location", {"location_type": "SPEAKER_LOOK"}) if limit == 1: # override with input value limit = get_repeat_num(d) r = filter_by_sublocation(interpreter, speaker, candidates, location_d, limit=limit, loose=loose_speakerlook) return [mem for _, mem in r] else: # no candidates found; ask Clarification # TODO: move ttad call to dialogue manager and remove this logic interpreter.action_dict_frozen = True player = interpreter.memory.get_player_struct_by_name(speaker) confirm_candidates = get_objects(interpreter) # no tags objects = object_looked_at(interpreter.agent, confirm_candidates, player, limit=1) if len(objects) == 0: raise ErrorWithResponse( "I don't know what you're referring to") _, mem = objects[0] blocks = list(mem.blocks.keys()) interpreter.provisional["object_mem"] = mem interpreter.provisional["object"] = blocks interpreter.provisional["d"] = d interpreter.dialogue_stack.append_new(ConfirmReferenceObject, blocks) raise NextDialogueStep() else: # clarification answered r = interpreter.progeny_data[-1].get("response") if r == "yes": # TODO: learn from the tag! put it in memory! return [interpreter.provisional.get("object_mem")] * limit else: # TODO: error handling here ? return []
def filter_by_sublocation( interpreter, speaker, candidates: List[Tuple[XYZ, T]], location: Dict, limit=1, all_proximity=10, loose=False, ) -> List[Tuple[XYZ, T]]: """Select from a list of candidate (xyz, object) tuples given a sublocation If limit == 'ALL', return all matching candidates Returns a list of (xyz, mem) tuples """ # handle SPEAKER_LOOK separately due to slightly different semantics # (proximity to ray instead of point) if location.get("location_type") == "SPEAKER_LOOK": player = interpreter.memory.get_player_struct_by_name(speaker) return object_looked_at(interpreter.agent, candidates, player, limit=limit, loose=loose) reldir = location.get("relative_direction") if reldir: if reldir == "INSIDE": if location.get("reference_object"): # this is ugly, should probably return from interpret_location... ref_mems = interpret_reference_object( interpreter, speaker, location["reference_object"]) for l, candidate_mem in candidates: if perception.check_inside([candidate_mem, ref_mems[0]]): return [(l, candidate_mem)] raise ErrorWithResponse("I can't find something inside that") elif reldir == "AWAY": raise ErrorWithResponse("I don't know which object you mean") elif reldir == "NEAR": pass # fall back to no reference direction else: # reference object location, i.e. the "X" in "left of X" ref_loc = interpret_location(interpreter, speaker, location, ignore_reldir=True) # relative direction, i.e. the "LEFT" in "left of X" reldir_vec = rotation.DIRECTIONS[reldir] # transform each object into the speaker look coordinate system, # and project onto the reldir vector look = interpreter.memory.get_player_struct_by_name(speaker).look proj = [ rotation.transform(np.array(l) - ref_loc, look.yaw, 0) @ reldir_vec for (l, _) in candidates ] # filter by relative dir, e.g. "left of Y" proj_cands = [(p, c) for (p, c) in zip(proj, candidates) if p > 0] # "the X left of Y" = the right-most X that is left of Y if limit == "ALL": limit = len(proj_cands) return [c for (_, c) in sorted(proj_cands, key=lambda p: p[0]) ][:limit] else: # no reference direction: choose the closest if limit == "ALL": return list( filter(lambda c: euclid_dist(c[0], ref_loc) <= all_proximity, candidates)) else: candidates.sort(key=lambda c: euclid_dist(c[0], ref_loc)) return candidates[:limit] return [] # this fixes flake but seems awful?
def interpret_reference_object( interpreter, speaker, d, only_voxels=False, only_physical=False, only_destructible=False, not_location=False, limit=1, loose_speakerlook=False, ) -> List[ReferenceObjectNode]: """this tries to find a ref obj memory matching the criteria from the ref_obj_dict """ F = d.get("filters") special = d.get("special_reference") # F can be empty... assert ( F is not None ) or special, "no filters or special_reference sub-dicts {}".format(d) if special: mem = get_special_reference_object(interpreter, speaker, special) return [mem] if F.get("contains_coreference", "NULL") != "NULL": mem = F["contains_coreference"] if isinstance(mem, ReferenceObjectNode): return [mem] else: logging.error("bad coref_resolve -> {}".format(mem)) if len(interpreter.progeny_data) == 0: tags = tags_from_dict(F) if only_voxels: tags.append("_voxel_object") if only_physical: tags.append("_physical_object") if only_destructible: tags.append("_destructible") # FIXME hack until memory_filters supprts "not" if not_location: tags.append("_not_location") # TODO Add ignore_player maybe? candidates = get_reference_objects(interpreter, *tags) if len(candidates) > 0: r = filter_by_sublocation(interpreter, speaker, candidates, d, limit=limit, loose=loose_speakerlook) return [mem for _, mem in r] else: # no candidates found; ask Clarification # TODO: move ttad call to dialogue manager and remove this logic interpreter.action_dict_frozen = True player_struct = interpreter.agent.perception_modules[ "low_level"].get_player_struct_by_name(speaker) tags = [] if only_voxels: tags.append("_voxel_object") if only_physical: tags.append("_physical_object") if only_destructible: tags.append("_destructible") confirm_candidates = get_reference_objects(interpreter, *tags) objects = object_looked_at(interpreter.agent, confirm_candidates, player_struct, limit=1) if len(objects) == 0: raise ErrorWithResponse( "I don't know what you're referring to") _, mem = objects[0] interpreter.provisional["object_mem"] = mem interpreter.provisional["F"] = F interpreter.dialogue_stack.append_new(ConfirmReferenceObject, mem) raise NextDialogueStep() else: # clarification answered r = interpreter.progeny_data[-1].get("response") if r == "yes": # TODO: learn from the tag! put it in memory! return [interpreter.provisional.get("object_mem")] * limit else: # TODO: error handling here ? return []