class PolicyRoller: """ Really only a wrapper around the roll_out_policy function, which does the policy rollout in the pomdp It collects actions both from the user-provided policy and from the oracle (as labels) and accumulates a dataset """ def __init__(self, instance_id=0): self.presenter = Presenter() self.instance_id = instance_id self.env = None self.word2token = None self.all_instructions = None def reset(self): self.__init__() def load_all_envs(self): train_i, dev_i, test_i, corpus = get_all_instructions() all_instructions = merge_instruction_sets(train_i, dev_i, test_i) token2term, word2token = get_word_to_token_map(corpus) env_ids = list(all_instructions.keys()) return env_ids, all_instructions, corpus, token2term, word2token def tokenize_string(self, s): word_list = filter(None, s.split(" ")) token_instruction = list(map(lambda w: self.word2token[w], word_list)) return token_instruction def roll_out_on_segment(self, ): pass def choose_action(self, params, step, switch_thres, reference_action, policy_action): """ Choose whether to perform the policy action or the reference (oracle) action based on the type of mixture policy that is being executed :param params: RolloutParams instance :param step: current control step number :param switch_thres: roll-in/roll-out control step number :param reference_action: action executed by oracle :param policy_action: action executed by policy :return: """ if params.rollout_strategy == RolloutStrategy.POLICY: return policy_action elif params.rollout_strategy == RolloutStrategy.REFERENCE: return reference_action elif params.rollout_strategy == RolloutStrategy.POLICY_IN_REF_OUT: if step > switch_thres: return reference_action else: return policy_action elif params.rollout_strategy == RolloutStrategy.MIXTURE: if random.uniform(0, 1) < params.mixture_ref_prob: return reference_action else: return policy_action def roll_out_on_env(self, params, instructions_set, set_idx, only_seg_idx=None, custom_instr=None): env_dataset = [] failed = False env_id = instructions_set["env"] self.env.set_environment( env_id, instruction_set=instructions_set['instructions']) path = load_and_convert_path(env_id) params.initPolicyContext(env_id, path) import rollout.run_metadata as md segments = list(instructions_set['instructions']) # all segments with at least length 2 valid_segments = [ (segments[i], segments[i]["seg_idx"]) for i in range(len(segments)) if segments[i]["end_idx"] - segments[i]["start_idx"] >= 2 ] if len(valid_segments) == 0: print("Ding dong!") first_seg = True # For recurrent policy, we need to explicity start a segment and reset the LSTM state # TODO: Make sure this still works for the older non-NL model params.policy.start_sequence() for segment, seg_idx in valid_segments: if only_seg_idx is not None and seg_idx != only_seg_idx: print("Skipping seg: " + str(seg_idx) + " as not requested") continue valid_segment = self.env.set_current_segment(seg_idx) if not valid_segment: print( f"Skipping segment {seg_idx} as it is empty / invalid for env {env_id}" ) continue if params.segment_level: params.policy.start_sequence() segment_dataset = [] # Decide when to switch policies switch_threshold = params.horizon + 1 # Never switch policies by default do_switch = random.uniform(0, 1) < params.switch_prob if do_switch and params.threshold_strategy == SwitchThresholdStrategy.UNIFORM: switch_threshold = random.uniform(0, params.horizon) string_instruction, end_idx, start_idx = segment[ "instruction"], segment["end_idx"], segment["start_idx"] # Manual instruction override to allow rolling out arbitrary instructions for debugging if custom_instr is not None: print("REPLACED: ", string_instruction) string_instruction = custom_instr print("INSTRUCTION:", string_instruction) # Set some global parameters that can be accessed by other parts of the system md.IS_ROLLOUT = True md.REAL_DRONE = params.real_drone md.RUN_NAME = params.run_name md.ENV_ID = env_id md.SET_IDX = set_idx md.SEG_IDX = seg_idx md.START_IDX = start_idx md.END_IDX = end_idx md.INSTRUCTION = string_instruction if hasattr(params.policy, "start_segment_rollout"): params.policy.start_segment_rollout(env_id, set_idx, seg_idx) token_instruction = self.tokenize_string(string_instruction) # At the end of segment N, should we reset drone position to the start of segment N+1 or continue # rolling out seamlessly? if first_seg or params.shouldResetAlways() or ( failed and params.shouldResetIfFailed()): state = self.env.reset(seg_idx) #instr_str = debug_untokenize_instruction(instruction) #Presenter().show_instruction(string_instruction.replace(" ", " ")) failed = False first_seg = False sleep(sleepytime) # Tell the oracle which part of the path is currently being executed params.setCurrentSegment(start_idx, end_idx) step_num = 0 total_reward = 0 # If the path has been finished according to the oracle, allow rolling out STEPS_TO_KILL more steps # If we finish the segment, but don't stop, log the position at which we finish the segment oracle_finished_countdown = params.steps_to_kill # Finally the actual policy roll out on the path segment! while True: # Get oracle action (labels) ref_action, _ = params.ref_policy.get_action( state, token_instruction) if ref_action is None or step_num == params.horizon: failed = True # Either veered off too far, or ran out of time. Either way, we consider it a fail print("Failed segment") break # Get the policy action (actions to be rolled out) action, _ = params.policy.get_action( state, token_instruction) #, env_id=env_id) if action is None: print("POLICY PRODUCED None ACTION") break # Choose which action to execute (reference or policy) based on the selected procedure exec_action = self.choose_action(params, step_num, switch_threshold, ref_action, action) # action = [vel_x, vel_y, vel_yaw] vel_y is unused currently. Execute the action in the pomdp state, reward, done, exceeded, oob = self.env.step(exec_action) total_reward += reward # Collect the data into a dataset sample = { "instruction": string_instruction, "state": state, "ref_action": ref_action, "reward": reward, "done": done, #"metadata": { "seg_path": path[start_idx:end_idx + 1], "path": path, "env_id": env_id, "set_idx": set_idx, "seg_idx": seg_idx, "start_idx": start_idx, "end_idx": end_idx, "action": exec_action, "pol_action": action, #"ref_action": ref_action, #"instruction": string_instruction, "flag": params.getFlag() #} } segment_dataset.append(sample) if not params.isSegmentLevel(): env_dataset.append(sample) # Do visual feedback and logging if params.first_person: self.presenter.show_sample(state, exec_action, reward, string_instruction) if params.plot: self.presenter.plot_paths(segment_dataset, interactive=True) if params.save_samples: file_path = params.getSaveSamplesPath( env_id, set_idx, seg_idx, step_num) self.presenter.save_sample(file_path, state, exec_action, reward, string_instruction) if params.show_action: self.presenter.show_action(ref_action, "ref_action") self.presenter.show_action(exec_action, "exec_action") # If the policy is finished, we stop. Otherwise the oracle should just keep outputing # examples that say that the policy should output finished at this point if exec_action[3] > 0.5 and not params.shouldIgnorePolicyStop( ): print("Policy stop!") break # If oracle says we're finished, allow a number of steps before terminating. if ref_action[3] > 0.5: if oracle_finished_countdown == params.steps_to_kill: drone_pos_force_stop = state.get_pos_2d() oracle_finished_countdown -= 1 if oracle_finished_countdown == 0: print("Oracle forced stop!") break step_num += 1 # Call the rollout end callback, so that the model can save any debugging information, such as feature maps if callable(getattr(params.policy, "on_rollout_end", None)): params.policy.on_rollout_end(env_id, set_idx, seg_idx) if params.isSegmentLevel(): env_dataset.append(segment_dataset) # Plot the trajectories for error tracking # TODO: Plot entire envs not segment by segment if params.save_plots: if not params.isSegmentLevel(): self.presenter.plot_paths( env_dataset, segment_path=path[start_idx:end_idx + 1], interactive=False, bg=True, world_size=4.7) self.presenter.save_plot( params.getSavePlotPath(env_id, set_idx, seg_idx)) # Calculate end of segment error if end_idx > len(path) - 1: end_idx = len(path) - 1 # The reward is proportional to path length. Weigh it down, so that max reward is 1: seg_len = end_idx - start_idx #self.error_tracker.add_sample(not failed, drone_pos_force_stop, state.get_pos(), path[end_idx], # path[end_idx - 1], total_reward, seg_len) if params.first_segment_only: print("Only running the first segment") break #sleep(sleepytime) return env_dataset def roll_out_policy(self, params): """ Given the provided rollout parameters, spawn a simulator instance and execute the specified policy on all environments specified in params.setEnvIds. Awful function that really needs to be simplified. A lot of the code is simply checking various error conditions, because the data has issues, and logging the outcome. The actual rollout is a very small part of the code. :param params: RollOutParams instance defining the parameters of the rollout :return: Aggregated dataset with images, states and oracle actions. If params.isSegmentLevel(), the returned dataset will be a list (over environments) of samples otherwise it will be a list (over environments) of lists (over segments) of samples """ if params.isDebug(): run_metadata.WRITE_DEBUG_DATA = True dataset = [] try: # Load the neural network policy from file # We can't just pass a neural network into this function, because it can't be pickled params.loadPolicy() assert params.hasPolicy() self.env = PomdpInterface(instance_id=self.instance_id, is_real=params.real_drone) all_env_ids, all_instructions, corpus, token2term, self.word2token = self.load_all_envs( ) env_ids = params.envs # if params.envs is not None else all_env_ids seg_indices = params.seg_list custom_instructions = params.custom_instructions # Filter out the envs that are not in all_instructions (we don't have instructions available for them) valid_env_ids = [i for i in env_ids if i in all_instructions] count = 0 # Loop through environments for i, env_id in enumerate(valid_env_ids): #print ("Rolling out on env: " + str(env_id)) # Loop through all non-empty sets of instructions for each pomdp instruction_sets = [ s for s in all_instructions[env_id] if len(s) > 0 ] if len(instruction_sets) == 0: print("No instruction sets for env: " + str(env_id)) for j, instructions_set in enumerate(instruction_sets): count += 1 try: seg_id = seg_indices[ i] if seg_indices is not None else None custom_instr = custom_instructions[ i] if custom_instructions is not None else None import rollout.run_metadata as md md.CUSTOM_INSTR_NO = i # TODO: Check if this works! dataset.append( self.roll_out_on_env(params, instructions_set, j, seg_id, custom_instr)) #log("Path finished!") DebugWriter().commit() if params.isRealDrone(): break except Exception as e: import traceback from utils.colors import print_error print_error("Error encountered during policy rollout!") print_error(e) print_error(traceback.format_exc()) continue except Exception as e: import traceback from utils.colors import print_error print_error("Error encountered during policy rollout!") print_error(e) print_error(traceback.format_exc()) self.env.land() return dataset
class DataEvalNL(EvaluateBase): def __init__(self, run_name="", save_images=True, entire_trajectory=True, custom_instr=None, aug_len=None): super(EvaluateBase, self).__init__() self.train_i, self.test_i, self.dev_i, corpus = get_all_instructions() self.all_i = {**self.train_i, **self.test_i, **self.dev_i} self.passing_distance = P.get_current_parameters( )["Units"]["passing_distance"] self.results = ResultsLandmarkSide() self.presenter = Presenter() self.run_name = run_name self.save_images = save_images self.entire_trajectory = entire_trajectory self.custom_instr = custom_instr self.aug_len = aug_len self.visible_map = {} self.hfov = P.get_current_parameters( )["ModelPVN"]["Stage1"]["cam_h_fov"] def _has_multiple_segments(self, rollout): prev_idx = rollout[0]["metadata"]["seg_idx"] if "metadata" in rollout[ 0] else rollout[0]["seg_idx"] for sample in rollout: if "metadata" not in sample: sample["metadata"] = sample if sample["metadata"]["seg_idx"] != prev_idx: return True return False def _split_rollout_in_segments(self, rollout): segments = [] current_segment = [rollout[0]] for sample in rollout[1:]: if "metadata" not in sample: sample["metadata"] = sample if sample["metadata"]["seg_idx"] != current_segment[0]["metadata"][ "seg_idx"]: segments.append(current_segment) current_segment = [sample] else: current_segment.append(sample) segments.append(current_segment) return segments def _segment_matches_auglen(self, segment): if not self.aug_len: return True env_id = segment[0]["env_id"] seg_idx = segment[0]["seg_idx"] set_idx = segment[0]["set_idx"] instr_seg = get_instruction_segment(env_id, set_idx, seg_idx, all_instr=self.all_i) return instr_seg["merge_len"] == self.aug_len def evaluate_dataset(self, list_of_rollouts): for rollout in list_of_rollouts: if len(rollout) == 0: continue if self._has_multiple_segments(rollout): segments = self._split_rollout_in_segments(rollout) for segment in segments: if self._segment_matches_auglen(segment): seg_results = self.evaluate_rollout(segment) if seg_results is not None: self.results += seg_results else: if self._segment_matches_auglen(rollout): seg_results = self.evaluate_rollout(rollout) if seg_results is not None: self.results += seg_results self.save_results() def rollout_success(self, env_id, set_idx, seg_idx, rollout): path = load_and_convert_path(env_id) seg_ordinal = seg_idx_to_ordinal( self.all_i[env_id][set_idx]["instructions"], seg_idx) path_end_idx = self.all_i[env_id][set_idx]["instructions"][ seg_ordinal]["end_idx"] if path_end_idx > len(path) - 1: path_end_idx = len(path) - 1 end_pos = np.asarray(rollout[-1]["state"].get_pos_2d()) target_end_pos = np.asarray(path[path_end_idx]) end_dist = np.linalg.norm(end_pos - target_end_pos) success = end_dist < self.passing_distance return success def is_goal_visible(self, instr_seg): end = np.asarray(instr_seg["end_pos"]) start = np.asarray(instr_seg["start_pos"]) vec_start_to_end = end - start endp_yaw = vec_to_yaw(vec_start_to_end) start_yaw = instr_seg["start_yaw"] yaw_diff = endp_yaw - start_yaw yaw_diff_abs = math.fabs(clip_angle(yaw_diff)) goal_visible = 2 * yaw_diff_abs < math.radians(self.hfov) return goal_visible def _filter_path(self, posseq, dst=0.02): """Replace original points in the path with equally spaced points""" cumdist = 0 cumdists = [cumdist] for prev_pos, pos in zip(posseq[:-1], posseq[1:]): gap = np.linalg.norm(pos - prev_pos) cumdist += gap cumdists.append(cumdist) total_path_length = cumdists[-1] p = 0 ptr = 0 traj_out = [] # Add the starting point, and move to the next point pt = posseq[ptr] traj_out.append(pt) p += dst # Reconstruct the trajectory with equidistant points of fixed precision. while p < total_path_length and ptr < len(posseq): # Get how far along until the next point this is frac = (p - cumdists[ptr - 1]) / (cumdists[ptr] - cumdists[ptr - 1] + 1e-10) # Grab interpolated intermediate point pt = posseq[ptr - 1] + (posseq[ptr] - posseq[ptr - 1]) * frac traj_out.append(pt) p += dst # Advance past the correct starting point while ptr < len(cumdists) and p > cumdists[ptr]: ptr += 1 out = np.asarray(traj_out) if False: plt = np.zeros((470, 470, 3)) for pt in posseq: pt *= 100 plt[int(pt[0]):int(pt[0]) + 2, int(pt[1]):int(pt[1]) + 2, 0] = 1.0 for pt in out: pt *= 100 plt[int(pt[0]):int(pt[0]) + 2, int(pt[1]):int(pt[1]) + 2, 2] = 1.0 Presenter().show_image(plt, "filter_paths", scale=4, waitkey=True) return out def _calculate_emd(self, exec_path, gt_path): exec_len = len(exec_path) gt_len = len(gt_path) if gt_len == 0: return None p2p_differences = exec_path[np.newaxis, :, :] - gt_path[:, np.newaxis, :] p2p_distances = np.linalg.norm(p2p_differences, axis=2) # rows index over ground truth path, columns index over executed path # Distribute probability mass of 1 evenly over executed and ground-truth trajectories prob_masses_exec = np.asarray([1 / float(exec_len + 1e-10)] * exec_len) prob_masses_gt = np.asarray([1 / float(gt_len + 1e-10)] * gt_len) assert np.isclose(prob_masses_exec.sum(), 1.0) assert np.isclose(prob_masses_gt.sum(), 1.0) #print("ding") ot_plan, log = ot.emd(prob_masses_gt, prob_masses_exec, p2p_distances, log=True, numItermax=10000) emd = log["cost"] assert emd > 0, "There is no way that a drone will perfectly follow a trajectory! Something is wrong. EMD error?" return emd def evaluate_rollout(self, rollout): last_sample = rollout[-1] if "metadata" not in last_sample: last_sample["metadata"] = last_sample env_id = last_sample["metadata"]["env_id"] # TEMPORARY FOR APPENDIX TABLE! REMOVE IT! # if env_id >= 6000: # return None seg_idx = last_sample["metadata"]["seg_idx"] set_idx = last_sample["metadata"]["set_idx"] path = load_and_convert_path(env_id) seg_ordinal = seg_idx_to_ordinal( self.all_i[env_id][set_idx]["instructions"], seg_idx) instr_seg = self.all_i[env_id][set_idx]["instructions"][seg_ordinal] if self.entire_trajectory: path_end_idx = len(path) - 1 path_start_idx = 0 else: # Find the segment end index path_end_idx = self.all_i[env_id][set_idx]["instructions"][ seg_ordinal]["end_idx"] + 1 path_start_idx = self.all_i[env_id][set_idx]["instructions"][ seg_ordinal]["start_idx"] if path_end_idx > len(path) - 1: path_end_idx = len(path) - 1 if path_end_idx < path_start_idx: path_start_idx = path_end_idx seg_path = path[path_start_idx:path_end_idx] goal_visible = self.is_goal_visible(instr_seg) self.visible_map[f"{env_id}_{seg_idx}"] = (1 if goal_visible else 0) exec_path = np.asarray([r["state"].get_pos_2d() for r in rollout]) end_pos = np.asarray(exec_path[-1]) #["state"].get_pos_2d()) target_end_pos = np.asarray(seg_path[-1]) end_dist = np.linalg.norm(end_pos - target_end_pos) success = end_dist < self.passing_distance # EMD between trajectories, and EMD between start position and trajectory. exec_path = self._filter_path(exec_path) gt_path = self._filter_path(seg_path) emd = self._calculate_emd(exec_path, gt_path) stop_emd = self._calculate_emd(exec_path[0:1], gt_path) # Success weighted by earth-mover's distance nemd = emd / stop_emd semd = max((1 if success else 0) * (1 - nemd), 0) if last_sample["metadata"]["pol_action"][3] > 0.5: who_stopped = "Policy Stopped" elif last_sample["metadata"]["ref_action"][3] > 0.5: who_stopped = "Oracle Stopped" else: who_stopped = "Veered Off" result = "Success" if success else "Fail" print(env_id, set_idx, seg_idx, result) texts = [who_stopped, result, "run:" + self.run_name] #print(seg_idx, result, semd) if self.save_images and emd: dir = get_results_dir(self.run_name, makedir=True) print("Results dir: ", dir) # TODO: Refactor this to not pull path from rollout, but provide it explicitly self.presenter.plot_paths( rollout, segment_path=gt_path, interactive=False, texts=texts, entire_trajectory=self.entire_trajectory, world_size=P.get_current_parameters()["Setup"]["world_size_m"], real_drone=P.get_current_parameters()["Setup"]["real_drone"]) filename = os.path.join( dir, str(env_id) + "_" + str(set_idx) + "_" + str(seg_idx)) if self.custom_instr is not None: filename += "_" + last_sample["metadata"][ "instruction"][:24] + "_" + last_sample["metadata"][ "instruction"][-16:] self.presenter.save_plot(filename) #if emd: # self.save_results() return ResultsLandmarkSide(success=success, end_dist=end_dist, goal_visible=goal_visible, emd=emd, semd=semd, nemd=nemd) def write_summaries(self, run_name, name, iteration): results_dict = self.get_results() writer = LoggingSummaryWriter( log_dir=f"{get_logging_dir()}/runs/{run_name}", restore=True) if not K_AVG_DIST in results_dict: print("nothing to write") return writer.add_scalar(name + "/avg_dist_to_goal", results_dict[K_AVG_DIST], iteration) writer.add_scalar(name + "/success_rate", results_dict[K_RATE], iteration) #writer.save_spied_values() def get_results(self): return self.results.get_dict() def save_results(self): # Write results dict path = get_results_path(self.run_name, makedir=True) with open(path, "w") as fp: json.dump(self.get_results(), fp)
class DataEvalLandmarkSide(EvaluateBase): def __init__(self, run_name="", save_images=True): super(EvaluateBase, self).__init__() self.train_i, self.test_i, self.dev_i, corpus = get_all_instructions() self.passing_distance = LANDMARK_REGION_RADIUS self.results = ResultsLandmarkSide() self.presenter = Presenter() self.run_name = run_name self.save_images = save_images def evaluate_dataset(self, list_of_rollouts): for rollout in list_of_rollouts: self.results += self.evaluate_rollout(rollout) def get_landmark_pos(self, env_id): template = load_template(env_id) config = load_env_config(env_id) landmark_idx = config["landmarkName"].index(template["landmark1"]) pos_x = config["xPos"][landmark_idx] pos_y = config["zPos"][landmark_idx] landmark_pos = np.asarray([pos_x, pos_y]) return landmark_pos def correct_side(self, rollout, env_id): template = load_template(env_id) landmark_pos = self.get_landmark_pos(env_id) last_pos = rollout[-1].state.get_pos() first_pos = rollout[0].state.get_pos() dir_landmark = landmark_pos - first_pos if len(N_SIDES) == 4: dir_lm_to_last = last_pos - landmark_pos dir_landmark_norm = dir_landmark / (np.linalg.norm(dir_landmark) + 1e-18) dir_ortho_norm = np.asarray( [dir_landmark_norm[1], -dir_landmark_norm[0]]) proj = np.dot(dir_lm_to_last, dir_landmark_norm) opp_proj = np.dot(dir_lm_to_last, dir_ortho_norm) angle = math.atan2(proj, opp_proj) DEG45 = 0.785398 if template["side"] == "right": return -DEG45 < angle < DEG45 elif template["side"] == "back": return DEG45 < angle < 3 * DEG45 elif template["side"] == "left": return 3 * DEG45 < angle < math.pi or -math.pi < angle < -3 * DEG45 elif template["side"] == "front": return -3 * DEG45 < angle < -DEG45 else: print("Unknown side: ", template["side"]) print("Angle: ", angle) else: # len(N_SIDES) = 2 dir_end = last_pos - first_pos z = np.cross(dir_landmark, dir_end) if template["side"] == "left": return z > 0 else: return z < 0 def evaluate_rollout(self, rollout): last_sample = rollout[-1] env_id = last_sample["metadata"]["env_id"] seg_idx = last_sample["metadata"]["seg_idx"] set_idx = last_sample["metadata"]["set_idx"] # TODO: Allow multiple templates / instructions per env path = load_path(env_id) end_pos = np.asarray(last_sample["state"].get_pos()) landmark_pos = self.get_landmark_pos(env_id) target_end_pos = np.asarray(path[-1]) end_goal_dist = np.linalg.norm(end_pos - target_end_pos) end_lm_dist = np.linalg.norm(end_pos - landmark_pos) correct_landmark_region = end_lm_dist < LANDMARK_REGION_RADIUS correct_quadrant = self.correct_side(rollout, env_id) if last_sample["metadata"]["pol_action"][3] > 0.5: who_stopped = "Policy Stopped" elif last_sample["metadata"]["ref_action"][3] > 0.5: who_stopped = "Oracle Stopped" else: who_stopped = "Veered Off" success = correct_landmark_region and correct_quadrant side_txt = "Correct landmark" if correct_landmark_region else "Wrong landmark" result = "Success" if success else "Fail" texts = [who_stopped, result, side_txt, "run:" + self.run_name] if self.save_images: dir = get_results_dir(self.run_name, makedir=True) self.presenter.plot_paths(rollout, interactive=False, texts=[]) #texts) filename = os.path.join( dir, str(env_id) + "_" + str(set_idx) + "_" + str(seg_idx)) self.presenter.save_plot(filename) self.save_results() return ResultsLandmarkSide(success, end_goal_dist, correct_landmark_region) def write_summaries(self, run_name, name, iteration): results_dict = self.get_results() writer = LoggingSummaryWriter(log_dir="runs/" + run_name, restore=True) if not K_AVG_DIST in results_dict: print("nothing to write") return writer.add_scalar(name + "/avg_dist_to_goal", results_dict[K_AVG_DIST], iteration) writer.add_scalar(name + "/success_rate", results_dict[K_RATE], iteration) writer.save_spied_values() def get_results(self): return self.results.get_dict() def save_results(self): # Write results dict path = get_results_path(self.run_name, makedir=True) with open(path, "w") as fp: json.dump(self.get_results(), fp)
class DataEvalNL (EvaluateBase): def __init__(self, run_name="", save_images=True, entire_trajectory=True, custom_instr=None): super(EvaluateBase, self).__init__() self.train_i, self.test_i, self.dev_i, corpus = get_all_instructions() self.all_i = {**self.train_i, **self.test_i, **self.dev_i} self.passing_distance = DEFAULT_PASSING_DISTANCE self.results = ResultsLandmarkSide() self.presenter = Presenter() self.run_name = run_name self.save_images = save_images self.entire_trajectory = entire_trajectory self.custom_instr = custom_instr def evaluate_dataset(self, list_of_rollouts): for rollout in list_of_rollouts: if len(rollout) == 0: continue self.results += self.evaluate_rollout(rollout) def evaluate_rollout(self, rollout): last_sample = rollout[-1] env_id = last_sample["metadata"]["env_id"] seg_idx = last_sample["metadata"]["seg_idx"] set_idx = last_sample["metadata"]["set_idx"] # TODO: Allow multiple instruction sets / paths per env path = load_path(env_id) if self.entire_trajectory: path_end_idx = len(path) - 1 else: # Find the segment end index path_end_idx = self.all_i[env_id][set_idx]["instructions"][seg_idx]["end_idx"] if path_end_idx > len(path) - 1: path_end_idx = len(path) - 1 end_pos = np.asarray(last_sample["state"].get_pos()) target_end_pos = np.asarray(path[path_end_idx]) end_dist = np.linalg.norm(end_pos - target_end_pos) success = end_dist < DEFAULT_PASSING_DISTANCE if last_sample["metadata"]["pol_action"][3] > 0.5: who_stopped = "Policy Stopped" elif last_sample["metadata"]["ref_action"][3] > 0.5: who_stopped = "Oracle Stopped" else: who_stopped = "Veered Off" result = "Success" if success else "Fail" texts = [who_stopped, result, "run:" + self.run_name] print(seg_idx, result) if self.save_images: dir = get_results_dir(self.run_name, makedir=True) print("Results dir: ", dir) self.presenter.plot_paths(rollout, interactive=False, texts=texts, entire_trajectory=self.entire_trajectory) filename = os.path.join(dir, str(env_id) + "_" + str(set_idx) + "_" + str(seg_idx)) if self.custom_instr is not None: filename += "_" + last_sample["metadata"]["instruction"][:24] + "_" + last_sample["metadata"]["instruction"][-16:] self.presenter.save_plot(filename) self.save_results() return ResultsLandmarkSide(success, end_dist) def write_summaries(self, run_name, name, iteration): results_dict = self.get_results() writer = LoggingSummaryWriter(log_dir="runs/" + run_name, restore=True) if not K_AVG_DIST in results_dict: print("nothing to write") return writer.add_scalar(name + "/avg_dist_to_goal", results_dict[K_AVG_DIST], iteration) writer.add_scalar(name + "/success_rate", results_dict[K_RATE], iteration) writer.save_spied_values() def get_results(self): return self.results.get_dict() def save_results(self): # Write results dict path = get_results_path(self.run_name, makedir=True) with open(path, "w") as fp: json.dump(self.get_results(), fp)