def generate_video( args, images, episode_id, checkpoint_idx, spl, tb_writer, fps=10 ) -> None: r"""Generate video according to specified information. Args: args: contains args.video_option and args.video_dir. images: list of images to be converted to video. episode_id: episode id for video naming. checkpoint_idx: checkpoint index for video naming. spl: SPL for this episode for video naming. tb_writer: tensorboard writer object for uploading video fps: fps for generated video Returns: None """ if args.video_option and len(images) > 0: video_name = f"episode{episode_id}_ckpt{checkpoint_idx}_spl{spl:.2f}" if "disk" in args.video_option: images_to_video(images, args.video_dir, video_name) if "tensorboard" in args.video_option: tb_writer.add_video_from_np_images( f"episode{episode_id}", checkpoint_idx, images, fps=fps )
def generate_video( video_option: List[str], video_dir: Optional[str], images: List[np.ndarray], episode_id: int, checkpoint_idx: int, metric_name: str, metric_value: float, fps: int = 10, ) -> None: r"""Generate video according to specified information. Args: video_option: string list of "tensorboard" or "disk" or both. video_dir: path to target video directory. images: list of images to be converted to video. episode_id: episode id for video naming. checkpoint_idx: checkpoint index for video naming. metric_name: name of the performance metric, e.g. "spl". metric_value: value of metric. tb_writer: tensorboard writer object for uploading video. fps: fps for generated video. Returns: None """ if len(images) < 1: return video_name = f"episode{episode_id}_ckpt{checkpoint_idx}_{metric_name}{metric_value:.2f}" if "disk" in video_option: assert video_dir is not None images_to_video(images, video_dir, video_name)
def shortest_path_example(mode): config = habitat.get_config(config_paths="configs/tasks/pointnav.yaml") config.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK.SENSORS.append("HEADING_SENSOR") env = SimpleRLEnv(config=config) goal_radius = env.episodes[0].goals[0].radius if goal_radius is None: goal_radius = config.SIMULATOR.FORWARD_STEP_SIZE follower = ShortestPathFollower(env.habitat_env.sim, goal_radius, False) follower.mode = mode print("Environment creation successful") for episode in range(3): env.reset() dirname = os.path.join( IMAGE_DIR, "shortest_path_example", mode, "%02d" % episode ) if os.path.exists(dirname): shutil.rmtree(dirname) os.makedirs(dirname) print("Agent stepping around inside environment.") images = [] while not env.habitat_env.episode_over: best_action = follower.get_next_action( env.habitat_env.current_episode.goals[0].position ) observations, reward, done, info = env.step(best_action.value) im = observations["rgb"] top_down_map = draw_top_down_map( info, observations["heading"], im.shape[0] ) output_im = np.concatenate((im, top_down_map), axis=1) images.append(output_im) images_to_video(images, dirname, "trajectory") print("Episode finished")
def generate_video( config: Config, images: List[np.ndarray], episode_id: int, checkpoint_idx: int, spl: float, tb_writer: Union[DummyWriter, TensorboardWriter], fps: int = 10, ) -> None: r""" Generate video according to specified information. Args: config: config object that contains video_option and video_dir. images: list of images to be converted to video. episode_id: episode id for video naming. checkpoint_idx: checkpoint index for video naming. spl: SPL for this episode for video naming. tb_writer: tensorboard writer object for uploading video fps: fps for generated video Returns: None """ if config.video_option and len(images) > 0: video_name = f"episode{episode_id}_ckpt{checkpoint_idx}_spl{spl:.2f}" if "disk" in config.video_option: images_to_video(images, config.video_dir, video_name) if "tensorboard" in config.video_option: tb_writer.add_video_from_np_images( f"episode{episode_id}", checkpoint_idx, images, fps=fps )
def reference_path_example(mode): """ Saves a video of a shortest path follower agent navigating from a start position to a goal. Agent follows the ground truth reference path by navigating to intermediate viewpoints en route to goal. Args: mode: 'geodesic_path' or 'greedy' """ config = habitat.get_config( config_paths="configs/test/habitat_r2r_vln_test.yaml") config.defrost() config.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK.SENSORS.append("HEADING_SENSOR") config.freeze() with SimpleRLEnv(config=config) as env: follower = ShortestPathFollower(env.habitat_env.sim, goal_radius=0.5, return_one_hot=False) follower.mode = mode print("Environment creation successful") for episode in range(3): env.reset() print(env.habitat_env.current_episode) episode_id = env.habitat_env.current_episode.episode_id print( f"Agent stepping around inside environment. Episode id: {episode_id}" ) dirname = os.path.join(IMAGE_DIR, "vln_reference_path_example", mode, "%02d" % episode) if os.path.exists(dirname): shutil.rmtree(dirname) os.makedirs(dirname) images = [] steps = 0 reference_path = env.habitat_env.current_episode.reference_path + [ env.habitat_env.current_episode.goals[0].position ] for point in reference_path: done = False while not done: best_action = follower.get_next_action(point) print(best_action) if best_action == None or best_action == 0: break observations, reward, done, info = env.step(best_action) save_map(observations, info, images) steps += 1 print(f"Navigated to goal in {steps} steps.") images_to_video(images, dirname, str(episode_id)) images = []
def generate_video(video_option: List[str], video_dir: Optional[str], images: List[np.ndarray], scene_name: str, sound: str, sr: int, episode_id: int, checkpoint_idx: int, metric_name: str, metric_value: float, tb_writer: TensorboardWriter, fps: int = 10, audios: List[str] = None) -> None: r"""Generate video according to specified information. Args: video_option: string list of "tensorboard" or "disk" or both. video_dir: path to target video directory. images: list of images to be converted to video. episode_id: episode id for video naming. checkpoint_idx: checkpoint index for video naming. metric_name: name of the performance metric, e.g. "spl". metric_value: value of metric. tb_writer: tensorboard writer object for uploading video. fps: fps for generated video. audios: raw audio files Returns: None """ if len(images) < 1: return video_name = f"{scene_name}_{episode_id}_{sound}_{metric_name}{metric_value:.2f}" if "disk" in video_option: assert video_dir is not None if audios is None: images_to_video(images, video_dir, video_name) else: images_to_video_with_audio(images, video_dir, video_name, audios, sr, fps=fps) if "tensorboard" in video_option: tb_writer.add_video_from_np_images(f"episode{episode_id}", checkpoint_idx, images, fps=fps)
def generate_video( video_option: List[str], video_dir: Optional[str], images: List[np.ndarray], episode_id: Union[int, str], checkpoint_idx: int, metrics: Dict[str, float], tb_writer: TensorboardWriter, fps: int = 10, ) -> None: r"""Generate video according to specified information. Args: video_option: string list of "tensorboard" or "disk" or both. video_dir: path to target video directory. images: list of images to be converted to video. episode_id: episode id for video naming. checkpoint_idx: checkpoint index for video naming. metric_name: name of the performance metric, e.g. "spl". metric_value: value of metric. tb_writer: tensorboard writer object for uploading video. fps: fps for generated video. Returns: None """ if len(images) < 1: return metric_strs = [] for k, v in metrics.items(): if isinstance(v, str): metric_strs.append(f"{k}={v}") else: metric_strs.append(f"{k}={v:.2f}") video_name = f"episode={episode_id}-ckpt={checkpoint_idx}-" + "-".join( metric_strs) if "disk" in video_option: assert video_dir is not None images_to_video(images, video_dir, video_name, fps=fps) if "tensorboard" in video_option: tb_writer.add_video_from_np_images(f"episode{episode_id}", checkpoint_idx, images, fps=fps) return video_name
def generate_video( video_option: List[str], video_dir: Optional[str], images: List[np.ndarray], episode_id: int, checkpoint_idx: int, tag: str, metrics: Dict[str, float], tb_writer: TensorboardWriter, fps: int = 10, ) -> None: r"""Generate video according to specified information. Args: video_option: string list of "tensorboard" or "disk" or both. video_dir: path to target video directory. images: list of images to be converted to video. episode_id: episode id for video naming. checkpoint_idx: checkpoint index for video naming. info: metric dictionary tag: Additional tag for naming video tb_writer: tensorboard writer object for uploading video. fps: fps for generated video. Returns: None """ print(len(images)) if len(images) < 1: return metric_strs = [] for k, v in metrics.items(): metric_strs.append(f"{k}={v:.2f}") video_name = f"{tag}_episode={episode_id}-ckpt={checkpoint_idx}-" + "-".join( metric_strs ) if "disk" in video_option: assert video_dir is not None images_to_video(images, video_dir, video_name) if "tensorboard" in video_option: tb_writer.add_video_from_np_images( f"episode{episode_id}", checkpoint_idx, images, fps=fps )
def save_video(self, file: Union[str, Path], fps: int = 10) -> None: assert self._capture_video, 'Not capturing video; nothing to save.' if len(self._rgb_frames) == 0: return first_shape = self._rgb_frames[0].shape next_shape = self._rgb_frames[1].shape if first_shape != next_shape: assert first_shape[0] == next_shape[0] # First frame is missing the top-down map, so we copy it from the next one td_map = self._rgb_frames[1][:, first_shape[1]:, :] self._rgb_frames[0] = np.concatenate((self._rgb_frames[0], td_map), 1) file = Path(file) with capture_output('save_video'): images_to_video(self._rgb_frames, str(file.parent), file.name, fps=fps, quality=5)
def reset(self): # reset hidden state and set done self.test_recurrent_hidden_states = torch.zeros( 1, self.hidden_size ).cuda() self.not_done_masks = torch.zeros(1, 1).cuda() # reset observation storage (and verify) z = torch.zeros(1, 2).cuda() mask_out_done = { name: z for name in self.current_obs.sensor_names } if 'global_pos' in self.current_obs.sensor_names: mask_out_done['global_pos'] = torch.zeros(1,1).cuda() self.current_obs.clear_done(mask_out_done) for value in self.current_obs.peek().values(): assert torch.sum(value.peek()).item() < 1e-6, 'did not clear the curent_obs properly' # log everything if len(self.episode_pgs) != 0: # log video (and save to log_dir) if self.save_eval_videos: images_to_video(images=self.episode_rgbs, output_dir=self.log_dir, video_name=f'test_{self.episode_num}') self.mlog.add_meter(f'diagnostics/rollout_{self.episode_num}', tnt.meter.SingletonMeter(), ptype='video') if self.use_visdom: vid_path = os.path.join(self.log_dir, f'test_{self.episode_num}.mp4') self.mlog.update_meter(vid_path, meters={f'diagnostics/rollout_{self.episode_num}'}, phase='val') else: print('video support for TB is weak not recommended') rgb_tensor = torch.Tensor(self.episode_rgbs).unsqueeze(dim=0) self.mlog.update_meter(rgb_tensor, meters={f'diagnostics/rollout_{self.episode_num}'}, phase='val') # reset log self.mlog.reset_meter(self.episode_num, mode='val') # reset episode logs self.episode_rgbs = [] self.episode_pgs = [] self.episode_values = [] self.episode_entropy = [] self.episode_lengths.append(self.t) self.episode_num += 1 self.t = 0 self.last_action = None
def video_from_dir(vidDir, images_type="png"): """ Images will be ordered alpha-numeric order for video vidDir: str directory path that has images Output video is called 'video.mp4' and placed in same directory """ assert os.path.isdir(vidDir), "vidDir must be a valid directory" images = sorted(os.listdir(vidDir)) images.sort(key=natural_sort_key) imArray = [] for i in images: if not i.endswith('.' + images_type): continue tmp = imread(os.path.join(vidDir, i)) # print(tmp) # print(type(tmp)) # print(tmp.shape) imArray.append(tmp) images_to_video(imArray, vidDir, "video", fps=2)
def generate_video( video_option: List[str], video_dir: Optional[str], images: List[np.ndarray], episode_id: int, checkpoint_idx: int, spl: float, tb_writer: TensorboardWriter, fps: int = 10, ) -> None: r"""Generate video according to specified information. Args: video_option: string list of "tensorboard" or "disk" or both. video_dir: path to target video directory. images: list of images to be converted to video. episode_id: episode id for video naming. checkpoint_idx: checkpoint index for video naming. spl: SPL for this episode for video naming. tb_writer: tensorboard writer object for uploading video. fps: fps for generated video. Returns: None """ if len(images) < 1: return video_name = f"episode{episode_id}_ckpt{checkpoint_idx}_spl{spl:.2f}" if "disk" in video_option: assert video_dir is not None images_to_video(images, video_dir, video_name) if "tensorboard" in video_option: tb_writer.add_video_from_np_images(f"episode{episode_id}", checkpoint_idx, images, fps=fps)
def reference_path_example(mode): """ Saves a video of a shortest path follower agent navigating from a start position to a goal. Agent follows the ground truth reference path by navigating to intermediate viewpoints en route to goal. Args: mode: 'geodesic_path' or 'greedy' """ config = habitat.get_config( config_paths="configs/test/habitat_r2r_vln_test.yaml") config.defrost() config.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK.SENSORS.append("HEADING_SENSOR") config.freeze() with SimpleRLEnv(config=config) as env: print("Environment creation successful") sim_time = 30 # @param {type:"integer"} continuous_nav = True # @param {type:"boolean"} if continuous_nav: control_frequency = 10 # @param {type:"slider", min:1, max:30, step:1} frame_skip = 6 # @param {type:"slider", min:1, max:30, step:1} fps = control_frequency * frame_skip print("fps = " + str(fps)) control_sequence = [] for action in range(int(sim_time * control_frequency)): if continuous_nav: # allow forward velocity and y rotation to vary control_sequence.append({ "forward_velocity": random.random() * 2.0, # [0,2) "rotation_velocity": (random.random() - 0.5) * 2.0, # [-1,1) }) else: control_sequence.append(random.choice(action_names)) # create and configure a new VelocityControl structure vel_control = habitat_sim.physics.VelocityControl() vel_control.controlling_lin_vel = True vel_control.lin_vel_is_local = True vel_control.controlling_ang_vel = True vel_control.ang_vel_is_local = True for episode in range(6): env.reset() print(env.habitat_env.current_episode) episode_id = env.habitat_env.current_episode.episode_id print( f"Agent stepping around inside environment. Episode id: {episode_id}" ) dirname = os.path.join(IMAGE_DIR, "vln_reference_path_example", mode, "%02d" % episode) if os.path.exists(dirname): shutil.rmtree(dirname) os.makedirs(dirname) images = [] steps = 0 reference_path = env.habitat_env.current_episode.reference_path + [ env.habitat_env.current_episode.goals[0].position ] # manually control the object's kinematic state via velocity integration time_step = 1.0 / (frame_skip * control_frequency) print("time_step = " + str(time_step)) for action in control_sequence: # apply actions if continuous_nav: # update the velocity control # local forward is -z vel_control.linear_velocity = np.array( [0, 0, -action["forward_velocity"]]) # local up is y vel_control.angular_velocity = np.array( [0, action["rotation_velocity"], 0]) observations, reward, done, info = env.step(vel_control) save_map(observations, info, images) steps += 1 print(f"Navigated to goal in {steps} steps.") images_to_video(images, dirname, str(episode_id), fps=int(1.0 / time_step)) images = []
def test_noise_models_rgbd(): DEMO_MODE = False N_STEPS = 100 config = get_config() config.defrost() config.SIMULATOR.SCENE = ( "data/scene_datasets/habitat-test-scenes/skokloster-castle.glb") config.SIMULATOR.AGENT_0.SENSORS = ["RGB_SENSOR", "DEPTH_SENSOR"] config.freeze() if not os.path.exists(config.SIMULATOR.SCENE): pytest.skip("Please download Habitat test data to data folder.") valid_start_position = [-1.3731, 0.08431, 8.60692] expected_pointgoal = [0.1, 0.2, 0.3] goal_position = np.add(valid_start_position, expected_pointgoal) # starting quaternion is rotated 180 degree along z-axis, which # corresponds to simulator using z-negative as forward action start_rotation = [0, 0, 0, 1] test_episode = NavigationEpisode( episode_id="0", scene_id=config.SIMULATOR.SCENE, start_position=valid_start_position, start_rotation=start_rotation, goals=[NavigationGoal(position=goal_position)], ) print(f"{test_episode}") with habitat.Env(config=config, dataset=None) as env: env.episode_iterator = iter([test_episode]) no_noise_obs = [env.reset()] no_noise_states = [env.sim.get_agent_state()] actions = [ sample_non_stop_action(env.action_space) for _ in range(N_STEPS) ] for action in actions: no_noise_obs.append(env.step(action)) no_noise_states.append(env.sim.get_agent_state()) env.close() config.defrost() config.SIMULATOR.RGB_SENSOR.NOISE_MODEL = "GaussianNoiseModel" config.SIMULATOR.RGB_SENSOR.NOISE_MODEL_KWARGS = habitat.Config() config.SIMULATOR.RGB_SENSOR.NOISE_MODEL_KWARGS.INTENSITY_CONSTANT = 0.5 config.SIMULATOR.DEPTH_SENSOR.NOISE_MODEL = "RedwoodDepthNoiseModel" config.SIMULATOR.ACTION_SPACE_CONFIG = "pyrobotnoisy" config.SIMULATOR.NOISE_MODEL = habitat.Config() config.SIMULATOR.NOISE_MODEL.ROBOT = "LoCoBot" config.SIMULATOR.NOISE_MODEL.CONTROLLER = "Proportional" config.SIMULATOR.NOISE_MODEL.NOISE_MULTIPLIER = 0.5 config.freeze() env = habitat.Env(config=config, dataset=None) env.episode_iterator = iter([test_episode]) obs = env.reset() assert np.linalg.norm( obs["rgb"].astype(np.float) - no_noise_obs[0]["rgb"].astype(np.float)) > 1.5e-2 * np.linalg.norm( no_noise_obs[0]["rgb"].astype( np.float)), "No RGB noise detected." assert np.linalg.norm(obs["depth"].astype(np.float) - no_noise_obs[0]["depth"].astype(np.float) ) > 1.5e-2 * np.linalg.norm( no_noise_obs[0]["depth"].astype( np.float)), "No Depth noise detected." images = [] state = env.sim.get_agent_state() angle_diffs = [] pos_diffs = [] for action in actions: prev_state = state obs = env.step(action) state = env.sim.get_agent_state() position_change = np.linalg.norm(np.array(state.position) - np.array(prev_state.position), ord=2) if action["action"][:5] == "TURN_": angle_diff = abs( angle_between_quaternions(state.rotation, prev_state.rotation) - np.deg2rad(config.SIMULATOR.TURN_ANGLE)) angle_diffs.append(angle_diff) else: pos_diffs.append( abs(position_change - config.SIMULATOR.FORWARD_STEP_SIZE)) if DEMO_MODE: images.append(observations_to_image(obs, {})) if DEMO_MODE: images_to_video(images, "data/video/test_noise", "test_noise") assert (np.mean(angle_diffs) > 0.025), "No turn action actuation noise detected." assert (np.mean(pos_diffs) > 0.025), "No forward action actuation noise detected."
for maps_chunk in torch.split(maps, batch_size): maps_chunk = map_transform(maps_chunk) maps_lst.append(maps_chunk.to('cpu', non_blocking=True)) maps = torch.cat(maps_lst).numpy() for (s, _), mapp in zip(traj, maps): s['map'] = mapp for task, taskonomy_data in zip(tasks, all_taskonomys): for (s, _), taskonomy_frame in zip(traj, taskonomy_data): s[f'taskonomy_{task}'] = taskonomy_frame if task == 'curvature': # for backwards support s['taskonomy'] = taskonomy_frame # write trajectory cur_scene_name = env._env.current_episode.scene_id.split('/')[-1].split('.')[0] episode_id = env._env.current_episode.episode_id single_traj_dir = os.path.join(TRAJ_DIR, cur_scene_name, f'episode_{episode_id}') os.makedirs(single_traj_dir, exist_ok=True) for i, (obs, act) in enumerate(traj): Image.fromarray(obs['rgb_filled']).save(os.path.join(single_traj_dir, f'rgb_filled_{i:03d}.png')) del obs['rgb_filled'] np.savez_compressed(os.path.join(single_traj_dir, f'action_{i:03d}.npz'), act) for k, v in obs.items(): np.savez_compressed(os.path.join(single_traj_dir, f'{k}_{i:03d}.npz'), v) if SAVE_VIDEO: images_to_video(images, IMAGE_DIR, str(episode))
def reference_path_example(mode): """ Saves a video of a shortest path follower agent navigating from a start position to a goal. Agent follows the ground truth reference path by navigating to intermediate viewpoints en route to goal. Args: mode: 'geodesic_path' or 'greedy' """ config = habitat.get_config( config_paths="configs/test/habitat_r2r_vln_test_continuous.yaml") config.defrost() config.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK.SENSORS.append("HEADING_SENSOR") config.freeze() with SimpleRLEnv(config=config) as env: print("Environment creation successful") sim_time = 30 # @param {type:"integer"} continuous_nav = True # @param {type:"boolean"} if continuous_nav: control_frequency = 10 # @param {type:"slider", min:1, max:30, step:1} frame_skip = 6 # @param {type:"slider", min:1, max:30, step:1} fps = control_frequency * frame_skip print("fps = " + str(fps)) control_sequence = [] # create and configure a new VelocityControl structure vel_control = habitat_sim.physics.VelocityControl() vel_control.controlling_lin_vel = True vel_control.lin_vel_is_local = True vel_control.controlling_ang_vel = True vel_control.ang_vel_is_local = True for episode in range(6): env.reset() print(env.habitat_env.current_episode) episode_id = env.habitat_env.current_episode.episode_id print( f"Agent stepping around inside environment. Episode id: {episode_id}" ) dirname = os.path.join(IMAGE_DIR, "vln_reference_path_example", mode, "%02d" % episode) if os.path.exists(dirname): shutil.rmtree(dirname) os.makedirs(dirname) images = [] steps = 0 reference_path = env.habitat_env.current_episode.reference_path waypoints = np.array(reference_path) for i in range(waypoints.shape[1] - 1): if np.abs(np.linalg.norm(waypoints[i + 1, :] - waypoints[i, :])) < 5: waypoints = np.delete(waypoints, (i + 1), axis=0) waypoints = waypoints[0::10] x_wp = waypoints[:, 0] y_wp = -waypoints[:, 2] z_wp = waypoints[:, 1] ax = x_wp ay = y_wp cx, cy, cyaw, ck, s_profile, s = cubic_spline_planner.calc_spline_course( ax, ay, ds=0.01) goal = [cx[-1], cy[-1], cyaw[-1]] agent_forward = quat_to_magnum( env.habitat_env._sim.get_agent_state( ).rotation).transform_vector(mn.Vector3(0, 0, -1.0)) angle = quaternion.as_euler_angles( env.habitat_env._sim.get_agent_state().rotation) init_x = env.habitat_env._sim.get_agent_state().position[0] init_y = -env.habitat_env._sim.get_agent_state().position[2] # init_yaw = angle[1] init_yaw = math.atan2(agent_forward[0], agent_forward[2]) state = State(x=init_x, y=init_y, yaw=init_yaw) time_step = 1.0 / (30) T = 500 t, x, y, yaw, vel, omeg, images = closed_loop_prediction( env, state, cx, cy, cyaw, ck, s_profile, goal, images, vel_control, time_step) images_to_video(images, dirname, str(episode_id), fps=int(1.0 / time_step)) images = []
def train_model(self): episode_rewards = deque(maxlen=10) current_episode_rewards = np.zeros(self.shell_args.num_processes) episode_lengths = deque(maxlen=10) current_episode_lengths = np.zeros(self.shell_args.num_processes) current_rewards = np.zeros(self.shell_args.num_processes) total_num_steps = self.start_iter fps_timer = [time.time(), total_num_steps] timers = np.zeros(3) egomotion_loss = 0 video_frames = [] num_episodes = 0 # self.evaluate_model() obs = self.envs.reset() if self.compute_surface_normals: obs["surface_normals"] = pt_util.depth_to_surface_normals( obs["depth"].to(self.device)) obs["prev_action_one_hot"] = obs[ "prev_action_one_hot"][:, ACTION_SPACE].to(torch.float32) if self.shell_args.algo == "supervised": obs["best_next_action"] = pt_util.from_numpy( obs["best_next_action"][:, ACTION_SPACE]) self.rollouts.copy_obs(obs, 0) distances = pt_util.to_numpy_array(obs["goal_geodesic_distance"]) self.train_stats["start_geodesic_distance"][:] = distances previous_visual_features = None egomotion_pred = None prev_action = None prev_action_probs = None num_updates = (int(self.shell_args.num_env_steps) // self.shell_args.num_forward_rollout_steps ) // self.shell_args.num_processes try: for iter_count in range(num_updates): if self.shell_args.tensorboard: if iter_count % 500 == 0: print("Logging conv summaries") self.logger.network_conv_summary( self.agent, total_num_steps) elif iter_count % 100 == 0: print("Logging variable summaries") self.logger.network_variable_summary( self.agent, total_num_steps) if self.shell_args.use_linear_lr_decay: # decrease learning rate linearly update_linear_schedule(self.optimizer.optimizer, iter_count, num_updates, self.shell_args.lr) if self.shell_args.algo == "ppo" and self.shell_args.use_linear_clip_decay: self.optimizer.clip_param = self.shell_args.clip_param * ( 1 - iter_count / float(num_updates)) if hasattr(self.agent.base, "enable_decoder"): if self.shell_args.record_video: self.agent.base.enable_decoder() else: self.agent.base.disable_decoder() for step in range(self.shell_args.num_forward_rollout_steps): with torch.no_grad(): start_t = time.time() value, action, action_log_prob, recurrent_hidden_states = self.agent.act( { "images": self.rollouts.obs[step], "target_vector": self.rollouts.additional_observations_dict[ "pointgoal"][step], "prev_action_one_hot": self.rollouts.additional_observations_dict[ "prev_action_one_hot"][step], }, self.rollouts.recurrent_hidden_states[step], self.rollouts.masks[step], ) action_cpu = pt_util.to_numpy_array(action.squeeze(1)) translated_action_space = ACTION_SPACE[action_cpu] if not self.shell_args.end_to_end: self.rollouts.additional_observations_dict[ "visual_encoder_features"][ self.rollouts.step].copy_( self.agent.base.visual_encoder_features ) if self.shell_args.use_motion_loss: if self.shell_args.record_video: if previous_visual_features is not None: egomotion_pred = self.agent.base.predict_egomotion( self.agent.base.visual_features, previous_visual_features) previous_visual_features = self.agent.base.visual_features.detach( ) timers[1] += time.time() - start_t if self.shell_args.record_video: # Copy so we don't mess with obs itself draw_obs = OrderedDict() for key, val in obs.items(): draw_obs[key] = pt_util.to_numpy_array( val).copy() best_next_action = draw_obs.pop( "best_next_action", None) if prev_action is not None: draw_obs[ "action_taken"] = pt_util.to_numpy_array( self.agent.last_dist.probs).copy() draw_obs["action_taken"][:] = 0 draw_obs["action_taken"][ np.arange(self.shell_args.num_processes), prev_action] = 1 draw_obs[ "action_taken_name"] = SIM_ACTION_TO_NAME[ ACTION_SPACE_TO_SIM_ACTION[ ACTION_SPACE[ prev_action.squeeze()]]] draw_obs[ "action_prob"] = pt_util.to_numpy_array( prev_action_probs).copy() else: draw_obs["action_taken"] = None draw_obs[ "action_taken_name"] = SIM_ACTION_TO_NAME[ SimulatorActions.STOP] draw_obs["action_prob"] = None prev_action = action_cpu prev_action_probs = self.agent.last_dist.probs.detach( ) if (hasattr(self.agent.base, "decoder_outputs") and self.agent.base.decoder_outputs is not None): min_channel = 0 for key, num_channels in self.agent.base.decoder_output_info: outputs = self.agent.base.decoder_outputs[:, min_channel: min_channel + num_channels, ...] draw_obs["output_" + key] = pt_util.to_numpy_array( outputs).copy() min_channel += num_channels draw_obs["rewards"] = current_rewards.copy() draw_obs["step"] = current_episode_lengths.copy() draw_obs["method"] = self.shell_args.method_name if best_next_action is not None: draw_obs["best_next_action"] = best_next_action if self.shell_args.use_motion_loss: if egomotion_pred is not None: draw_obs[ "egomotion_pred"] = pt_util.to_numpy_array( F.softmax(egomotion_pred, dim=1)).copy() else: draw_obs["egomotion_pred"] = None images, titles, normalize = draw_outputs.obs_to_images( draw_obs) if self.shell_args.algo == "supervised": im_inds = [0, 2, 3, 1, 9, 6, 7, 8, 5, 4] else: im_inds = [0, 2, 3, 1, 6, 7, 8, 5] height, width = images[0].shape[:2] subplot_image = drawing.subplot( images, 2, 5, titles=titles, normalize=normalize, order=im_inds, output_width=max(width, 320), output_height=max(height, 320), ) video_frames.append(subplot_image) # save dists from previous step or else on reset they will be overwritten distances = pt_util.to_numpy_array( obs["goal_geodesic_distance"]) start_t = time.time() obs, rewards, dones, infos = self.envs.step( translated_action_space) timers[0] += time.time() - start_t obs["reward"] = rewards if self.shell_args.algo == "supervised": obs["best_next_action"] = pt_util.from_numpy( obs["best_next_action"][:, ACTION_SPACE]).to( torch.float32) obs["prev_action_one_hot"] = obs[ "prev_action_one_hot"][:, ACTION_SPACE].to( torch.float32) rewards *= REWARD_SCALAR rewards = np.clip(rewards, -10, 10) if self.shell_args.record_video and not dones[0]: obs["top_down_map"] = infos[0]["top_down_map"] if self.compute_surface_normals: obs["surface_normals"] = pt_util.depth_to_surface_normals( obs["depth"].to(self.device)) current_rewards = pt_util.to_numpy_array(rewards) current_episode_rewards += pt_util.to_numpy_array( rewards).squeeze() current_episode_lengths += 1 for ii, done_e in enumerate(dones): if done_e: num_episodes += 1 if self.shell_args.record_video: final_rgb = draw_obs["rgb"].transpose( 0, 2, 3, 1).squeeze(0) if self.shell_args.task == "pointnav": if infos[ii]["spl"] > 0: draw_obs[ "action_taken_name"] = "Stop. Success" draw_obs["reward"] = [ self.configs[0].TASK. SUCCESS_REWARD ] final_rgb[:] = final_rgb * np.float32( 0.5) + np.tile( np.array([0, 128, 0], dtype=np.uint8), (final_rgb.shape[0], final_rgb.shape[1], 1), ) else: draw_obs[ "action_taken_name"] = "Timeout. Failed" final_rgb[:] = final_rgb * np.float32( 0.5) + np.tile( np.array([128, 0, 0], dtype=np.uint8), (final_rgb.shape[0], final_rgb.shape[1], 1), ) elif self.shell_args.task == "exploration" or self.shell_args.task == "flee": draw_obs[ "action_taken_name"] = "End of episode." final_rgb = final_rgb[np.newaxis, ...].transpose( 0, 3, 1, 2) draw_obs["rgb"] = final_rgb images, titles, normalize = draw_outputs.obs_to_images( draw_obs) im_inds = [0, 2, 3, 1, 6, 7, 8, 5] height, width = images[0].shape[:2] subplot_image = drawing.subplot( images, 2, 5, titles=titles, normalize=normalize, order=im_inds, output_width=max(width, 320), output_height=max(height, 320), ) video_frames.extend( [subplot_image] * (self.configs[0].ENVIRONMENT. MAX_EPISODE_STEPS + 30 - len(video_frames))) if "top_down_map" in infos[0]: video_dir = os.path.join( self.shell_args.log_prefix, "videos") if not os.path.exists(video_dir): os.makedirs(video_dir) im_path = os.path.join( self.shell_args.log_prefix, "videos", "total_steps_%d.png" % total_num_steps) from habitat.utils.visualizations import maps import imageio top_down_map = maps.colorize_topdown_map( infos[0]["top_down_map"]["map"]) imageio.imsave(im_path, top_down_map) images_to_video( video_frames, os.path.join( self.shell_args.log_prefix, "videos"), "total_steps_%d" % total_num_steps, ) video_frames = [] if self.shell_args.task == "pointnav": print( "FINISHED EPISODE %d Length %d Reward %.3f SPL %.4f" % ( num_episodes, current_episode_lengths[ii], current_episode_rewards[ii], infos[ii]["spl"], )) self.train_stats["spl"][ii] = infos[ii][ "spl"] self.train_stats["success"][ ii] = self.train_stats["spl"][ii] > 0 self.train_stats["end_geodesic_distance"][ ii] = (distances[ii] - self.configs[0]. SIMULATOR.FORWARD_STEP_SIZE) self.train_stats[ "delta_geodesic_distance"][ii] = ( self.train_stats[ "start_geodesic_distance"][ii] - self.train_stats[ "end_geodesic_distance"][ii]) self.train_stats["num_steps"][ ii] = current_episode_lengths[ii] elif self.shell_args.task == "exploration": print( "FINISHED EPISODE %d Reward %.3f States Visited %d" % (num_episodes, current_episode_rewards[ii], infos[ii]["visited_states"])) self.train_stats["visited_states"][ ii] = infos[ii]["visited_states"] elif self.shell_args.task == "flee": print( "FINISHED EPISODE %d Reward %.3f Distance from start %.4f" % (num_episodes, current_episode_rewards[ii], infos[ii]["distance_from_start"])) self.train_stats["distance_from_start"][ ii] = infos[ii]["distance_from_start"] self.train_stats["num_episodes"][ii] += 1 self.train_stats["reward"][ ii] = current_episode_rewards[ii] if self.shell_args.tensorboard: log_dict = { "single_episode/reward": self.train_stats["reward"][ii] } if self.shell_args.task == "pointnav": log_dict.update({ "single_episode/num_steps": self.train_stats["num_steps"][ii], "single_episode/spl": self.train_stats["spl"][ii], "single_episode/success": self.train_stats["success"][ii], "single_episode/start_geodesic_distance": self.train_stats[ "start_geodesic_distance"][ii], "single_episode/end_geodesic_distance": self.train_stats[ "end_geodesic_distance"][ii], "single_episode/delta_geodesic_distance": self.train_stats[ "delta_geodesic_distance"][ii], }) elif self.shell_args.task == "exploration": log_dict[ "single_episode/visited_states"] = self.train_stats[ "visited_states"][ii] elif self.shell_args.task == "flee": log_dict[ "single_episode/distance_from_start"] = self.train_stats[ "distance_from_start"][ii] self.logger.dict_log( log_dict, step=(total_num_steps + self.shell_args.num_processes * step + ii)) episode_rewards.append( current_episode_rewards[ii]) current_episode_rewards[ii] = 0 episode_lengths.append( current_episode_lengths[ii]) current_episode_lengths[ii] = 0 self.train_stats["start_geodesic_distance"][ ii] = obs["goal_geodesic_distance"][ii] # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in dones]) bad_masks = torch.FloatTensor( [[0.0] if "bad_transition" in info.keys() else [1.0] for info in infos]) self.rollouts.insert(obs, recurrent_hidden_states, action, action_log_prob, value, rewards, masks, bad_masks) with torch.no_grad(): start_t = time.time() next_value = self.agent.get_value( { "images": self.rollouts.obs[-1], "target_vector": self.rollouts. additional_observations_dict["pointgoal"][-1], "prev_action_one_hot": self.rollouts.additional_observations_dict[ "prev_action_one_hot"][-1], }, self.rollouts.recurrent_hidden_states[-1], self.rollouts.masks[-1], ).detach() timers[1] += time.time() - start_t self.rollouts.compute_returns(next_value, self.shell_args.use_gae, self.shell_args.gamma, self.shell_args.tau) if not self.shell_args.no_weight_update: start_t = time.time() if self.shell_args.algo == "supervised": ( total_loss, action_loss, visual_loss_total, visual_loss_dict, egomotion_loss, forward_model_loss, ) = self.optimizer.update(self.rollouts, self.shell_args) else: ( total_loss, value_loss, action_loss, dist_entropy, visual_loss_total, visual_loss_dict, egomotion_loss, forward_model_loss, ) = self.optimizer.update(self.rollouts, self.shell_args) timers[2] += time.time() - start_t self.rollouts.after_update() # save for every interval-th episode or for the last epoch if iter_count % self.shell_args.save_interval == 0 or iter_count == num_updates - 1: self.save_checkpoint(5, total_num_steps) total_num_steps += self.shell_args.num_processes * self.shell_args.num_forward_rollout_steps if not self.shell_args.no_weight_update and iter_count % self.shell_args.log_interval == 0: log_dict = {} if len(episode_rewards) > 1: end = time.time() nsteps = total_num_steps - fps_timer[1] fps = int((total_num_steps - fps_timer[1]) / (end - fps_timer[0])) timers /= nsteps env_spf = timers[0] forward_spf = timers[1] backward_spf = timers[2] print(( "{} Updates {}, num timesteps {}, FPS {}, Env FPS " "{}, \n Last {} training episodes: mean/median reward " "{:.3f}/{:.3f}, min/max reward {:.3f}/{:.3f}\n" ).format( datetime.datetime.now(), iter_count, total_num_steps, fps, int(1.0 / env_spf), len(episode_rewards), np.mean(episode_rewards), np.median(episode_rewards), np.min(episode_rewards), np.max(episode_rewards), )) if self.shell_args.tensorboard: log_dict.update({ "stats/full_spf": 1.0 / (fps + 1e-10), "stats/env_spf": env_spf, "stats/forward_spf": forward_spf, "stats/backward_spf": backward_spf, "stats/full_fps": fps, "stats/env_fps": 1.0 / (env_spf + 1e-10), "stats/forward_fps": 1.0 / (forward_spf + 1e-10), "stats/backward_fps": 1.0 / (backward_spf + 1e-10), "episode/mean_rewards": np.mean(episode_rewards), "episode/median_rewards": np.median(episode_rewards), "episode/min_rewards": np.min(episode_rewards), "episode/max_rewards": np.max(episode_rewards), "episode/mean_lengths": np.mean(episode_lengths), "episode/median_lengths": np.median(episode_lengths), "episode/min_lengths": np.min(episode_lengths), "episode/max_lengths": np.max(episode_lengths), }) fps_timer[0] = time.time() fps_timer[1] = total_num_steps timers[:] = 0 if self.shell_args.tensorboard: log_dict.update({ "loss/action": action_loss, "loss/0_total": total_loss, "loss/visual/0_total": visual_loss_total, "loss/exploration/egomotion": egomotion_loss, "loss/exploration/forward_model": forward_model_loss, }) if self.shell_args.algo != "supervised": log_dict.update({ "loss/entropy": dist_entropy, "loss/value": value_loss }) for key, val in visual_loss_dict.items(): log_dict["loss/visual/" + key] = val self.logger.dict_log(log_dict, step=total_num_steps) if self.shell_args.eval_interval is not None and total_num_steps % self.shell_args.eval_interval < ( self.shell_args.num_processes * self.shell_args.num_forward_rollout_steps): self.save_checkpoint(-1, total_num_steps) self.set_log_iter(total_num_steps) self.evaluate_model() # reset the env datasets self.envs.unwrapped.call( ["switch_dataset"] * self.shell_args.num_processes, [("train", )] * self.shell_args.num_processes) obs = self.envs.reset() if self.compute_surface_normals: obs["surface_normals"] = pt_util.depth_to_surface_normals( obs["depth"].to(self.device)) obs["prev_action_one_hot"] = obs[ "prev_action_one_hot"][:, ACTION_SPACE].to(torch.float32) if self.shell_args.algo == "supervised": obs["best_next_action"] = pt_util.from_numpy( obs["best_next_action"][:, ACTION_SPACE]) self.rollouts.copy_obs(obs, 0) distances = pt_util.to_numpy_array( obs["goal_geodesic_distance"]) self.train_stats["start_geodesic_distance"][:] = distances previous_visual_features = None egomotion_pred = None prev_action = None prev_action_probs = None except: # Catch all exceptions so a final save can be performed import traceback traceback.print_exc() finally: self.save_checkpoint(-1, total_num_steps)
def evaluate_agent(config: Config): split = config.EVAL.SPLIT config.defrost() config.TASK_CONFIG.DATASET.SPLIT = split config.TASK_CONFIG.TASK.NDTW.SPLIT = split config.TASK_CONFIG.TASK.SDTW.SPLIT = split config.TASK_CONFIG.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = True config.TASK_CONFIG.ENVIRONMENT.ITERATOR_OPTIONS.MAX_SCENE_REPEAT_STEPS = -1 config.freeze() logger.info(config) env = construct_env(config) gt_path = config.TASK_CONFIG.TASK.NDTW.GT_PATH.format( split=config.TASK_CONFIG.DATASET.SPLIT) with gzip.open(gt_path, "rt") as f: gt_json = json.load(f) assert config.EVAL.NONLEARNING.AGENT in [ "RandomAgent", "HandcraftedAgent", ], "EVAL.NONLEARNING.AGENT must be either RandomAgent or HandcraftedAgent." if config.EVAL.NONLEARNING.AGENT == "RandomAgent": agent = RandomContinuousAgent() else: agent = HandcraftedAgent() obs = env.reset() agent.reset() steps = 0 is_done = False stats_episodes = {} # dict of dicts that stores stats per episode ep_count = 0 locations = [] vel_control = habitat_sim.physics.VelocityControl() vel_control.controlling_lin_vel = True vel_control.lin_vel_is_local = True vel_control.controlling_ang_vel = True vel_control.ang_vel_is_local = True images = [] IMAGE_DIR = os.path.join("examples", "images") if not os.path.exists(IMAGE_DIR): os.makedirs(IMAGE_DIR) while (len(stats_episodes) < config.EVAL.EPISODE_COUNT): current_episode = env.habitat_env.current_episode actions = agent.act() vel_control.linear_velocity = np.array([0, 0, -actions[0]]) vel_control.angular_velocity = np.array([0, actions[1], 0]) observations, _, done, info = env.step(vel_control) episode_over, success = done episode_success = success and (actions[0] < 0.25) is_done = episode_over or episode_success steps += 1 locations.append( env.habitat_env._sim.get_agent_state().position.tolist()) save_map(observations, info, images) dirname = os.path.join( IMAGE_DIR, "icra_video", "%02d" % env.habitat_env.current_episode.episode_id) if os.path.exists(dirname): shutil.rmtree(dirname) os.makedirs(dirname) if is_done or steps == config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS: gt_locations = gt_json[str( current_episode.episode_id)]["locations"] dtw_distance = fastdtw(locations, gt_locations, dist=euclidean_distance)[0] nDTW = np.exp(-dtw_distance / (len(gt_locations) * config.TASK_CONFIG.TASK.NDTW.SUCCESS_DISTANCE)) locations = [] is_done = False ep_count += 1 steps = 0 print("dones:", done) stats_episodes[current_episode.episode_id] = info stats_episodes[current_episode.episode_id]['ndtw'] = nDTW print("len stats episodes", len(stats_episodes)) print("Current episode ID:", current_episode.episode_id) print("Episode Completed:", ep_count) print(" Episode done---------------------------------------------") obs = env.reset() print(stats_episodes[current_episode.episode_id]) time_step = 1.0 / 30 images_to_video(images, dirname, str(current_episode.episode_id), fps=int(1.0 / time_step)) images = [] env.close() aggregated_stats = {} num_episodes = len(stats_episodes) for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = ( sum([v[stat_key] for v in stats_episodes.values()]) / num_episodes) with open(f"stats_complete_{config.EVAL.NONLEARNING.AGENT}_{split}.json", "w") as f: json.dump(aggregated_stats, f, indent=4)
def evaluate_model(self): self.envs.unwrapped.call(["switch_dataset"] * self.shell_args.num_processes, [("val", )] * self.shell_args.num_processes) if not os.path.exists(self.eval_dir): os.makedirs(self.eval_dir) try: eval_net_file_name = sorted( glob.glob( os.path.join(self.shell_args.log_prefix, self.shell_args.checkpoint_dirname, "*") + "/*.pt"), key=os.path.getmtime, )[-1] eval_net_file_name = ( self.shell_args.log_prefix.replace(os.sep, "_") + "_" + "_".join(eval_net_file_name.split(os.sep)[-2:])[:-3]) except IndexError: print("Warning, no weights found") eval_net_file_name = "random_weights" eval_output_file = open( os.path.join(self.eval_dir, eval_net_file_name + ".csv"), "w") print("Writing results to", eval_output_file.name) # Save the evaled net for posterity if self.shell_args.save_checkpoints: save_model = self.agent pt_util.save( save_model, os.path.join(self.shell_args.log_prefix, self.shell_args.checkpoint_dirname, "eval_weights"), num_to_keep=-1, iteration=self.log_iter, ) print("Wrote model to file for safe keeping") obs = self.envs.reset() if self.compute_surface_normals: obs["surface_normals"] = pt_util.depth_to_surface_normals( obs["depth"].to(self.device)) obs["prev_action_one_hot"] = obs[ "prev_action_one_hot"][:, ACTION_SPACE].to(torch.float32) recurrent_hidden_states = torch.zeros( self.shell_args.num_processes, self.agent.recurrent_hidden_state_size, dtype=torch.float32, device=self.device, ) masks = torch.ones(self.shell_args.num_processes, 1, dtype=torch.float32, device=self.device) episode_rewards = deque(maxlen=10) current_episode_rewards = np.zeros(self.shell_args.num_processes) episode_lengths = deque(maxlen=10) current_episode_lengths = np.zeros(self.shell_args.num_processes) total_num_steps = self.log_iter fps_timer = [time.time(), total_num_steps] timers = np.zeros(3) num_episodes = 0 print("Config\n", self.configs[0]) # Initialize every time eval is run rather than just at the start dataset_sizes = np.array( [len(dataset.episodes) for dataset in self.eval_datasets]) eval_stats = dict( episode_ids=[None for _ in range(self.shell_args.num_processes)], num_episodes=np.zeros(self.shell_args.num_processes, dtype=np.int32), num_steps=np.zeros(self.shell_args.num_processes, dtype=np.int32), reward=np.zeros(self.shell_args.num_processes, dtype=np.float32), spl=np.zeros(self.shell_args.num_processes, dtype=np.float32), visited_states=np.zeros(self.shell_args.num_processes, dtype=np.int32), success=np.zeros(self.shell_args.num_processes, dtype=np.int32), end_geodesic_distance=np.zeros(self.shell_args.num_processes, dtype=np.float32), start_geodesic_distance=np.zeros(self.shell_args.num_processes, dtype=np.float32), delta_geodesic_distance=np.zeros(self.shell_args.num_processes, dtype=np.float32), distance_from_start=np.zeros(self.shell_args.num_processes, dtype=np.float32), ) eval_stats_means = dict( num_episodes=0, num_steps=0, reward=0, spl=0, visited_states=0, success=0, end_geodesic_distance=0, start_geodesic_distance=0, delta_geodesic_distance=0, distance_from_start=0, ) eval_output_file.write("name,%s,iter,%d\n\n" % (eval_net_file_name, self.log_iter)) if self.shell_args.task == "pointnav": eval_output_file.write(( "episode_id,num_steps,reward,spl,success,start_geodesic_distance," "end_geodesic_distance,delta_geodesic_distance\n")) elif self.shell_args.task == "exploration": eval_output_file.write("episode_id,reward,visited_states\n") elif self.shell_args.task == "flee": eval_output_file.write("episode_id,reward,distance_from_start\n") distances = pt_util.to_numpy(obs["goal_geodesic_distance"]) eval_stats["start_geodesic_distance"][:] = distances progress_bar = tqdm.tqdm(total=self.num_eval_episodes_total) all_done = False iter_count = 0 video_frames = [] previous_visual_features = None egomotion_pred = None prev_action = None prev_action_probs = None if hasattr(self.agent.base, "enable_decoder"): if self.shell_args.record_video: self.agent.base.enable_decoder() else: self.agent.base.disable_decoder() while not all_done: with torch.no_grad(): start_t = time.time() value, action, action_log_prob, recurrent_hidden_states = self.agent.act( { "images": obs["rgb"].to(self.device), "target_vector": obs["pointgoal"].to(self.device), "prev_action_one_hot": obs["prev_action_one_hot"].to(self.device), }, recurrent_hidden_states, masks, ) action_cpu = pt_util.to_numpy(action.squeeze(1)) translated_action_space = ACTION_SPACE[action_cpu] timers[1] += time.time() - start_t if self.shell_args.record_video: if self.shell_args.use_motion_loss: if previous_visual_features is not None: egomotion_pred = self.agent.base.predict_egomotion( self.agent.base.visual_features, previous_visual_features) previous_visual_features = self.agent.base.visual_features.detach( ) # Copy so we don't mess with obs itself draw_obs = OrderedDict() for key, val in obs.items(): draw_obs[key] = pt_util.to_numpy(val).copy() best_next_action = draw_obs.pop("best_next_action", None) if prev_action is not None: draw_obs["action_taken"] = pt_util.to_numpy( self.agent.last_dist.probs).copy() draw_obs["action_taken"][:] = 0 draw_obs["action_taken"][ np.arange(self.shell_args.num_processes), prev_action] = 1 draw_obs["action_taken_name"] = SIM_ACTION_TO_NAME[ draw_obs['prev_action'].item()] draw_obs["action_prob"] = pt_util.to_numpy( prev_action_probs).copy() else: draw_obs["action_taken"] = None draw_obs["action_taken_name"] = SIM_ACTION_TO_NAME[ SimulatorActions.STOP] draw_obs["action_prob"] = None prev_action = action_cpu prev_action_probs = self.agent.last_dist.probs.detach() if hasattr( self.agent.base, "decoder_outputs" ) and self.agent.base.decoder_outputs is not None: min_channel = 0 for key, num_channels in self.agent.base.decoder_output_info: outputs = self.agent.base.decoder_outputs[:, min_channel: min_channel + num_channels, ...] draw_obs["output_" + key] = pt_util.to_numpy(outputs).copy() min_channel += num_channels draw_obs["rewards"] = eval_stats["reward"] draw_obs["step"] = current_episode_lengths.copy() draw_obs["method"] = self.shell_args.method_name if best_next_action is not None: draw_obs["best_next_action"] = best_next_action if self.shell_args.use_motion_loss: if egomotion_pred is not None: draw_obs["egomotion_pred"] = pt_util.to_numpy( F.softmax(egomotion_pred, dim=1)).copy() else: draw_obs["egomotion_pred"] = None images, titles, normalize = draw_outputs.obs_to_images( draw_obs) im_inds = [0, 2, 3, 1, 6, 7, 8, 5] height, width = images[0].shape[:2] subplot_image = drawing.subplot( images, 2, 4, titles=titles, normalize=normalize, output_width=max(width, 320), output_height=max(height, 320), order=im_inds, fancy_text=True, ) video_frames.append(subplot_image) # save dists from previous step or else on reset they will be overwritten distances = pt_util.to_numpy(obs["goal_geodesic_distance"]) start_t = time.time() obs, rewards, dones, infos = self.envs.step( translated_action_space) timers[0] += time.time() - start_t obs["prev_action_one_hot"] = obs[ "prev_action_one_hot"][:, ACTION_SPACE].to(torch.float32) rewards *= REWARD_SCALAR rewards = np.clip(rewards, -10, 10) if self.shell_args.record_video and not dones[0]: obs["top_down_map"] = infos[0]["top_down_map"] if self.compute_surface_normals: obs["surface_normals"] = pt_util.depth_to_surface_normals( obs["depth"].to(self.device)) current_episode_rewards += pt_util.to_numpy(rewards).squeeze() current_episode_lengths += 1 to_pause = [] for ii, done_e in enumerate(dones): if done_e: num_episodes += 1 if self.shell_args.record_video: if "top_down_map" in infos[ii]: video_dir = os.path.join( self.shell_args.log_prefix, "videos") if not os.path.exists(video_dir): os.makedirs(video_dir) im_path = os.path.join( self.shell_args.log_prefix, "videos", "total_steps_%d.png" % total_num_steps) top_down_map = maps.colorize_topdown_map( infos[ii]["top_down_map"]["map"]) imageio.imsave(im_path, top_down_map) images_to_video( video_frames, os.path.join(self.shell_args.log_prefix, "videos"), "total_steps_%d" % total_num_steps, ) video_frames = [] eval_stats["episode_ids"][ii] = infos[ii]["episode_id"] if self.shell_args.task == "pointnav": print( "FINISHED EPISODE %d Length %d Reward %.3f SPL %.4f" % ( num_episodes, current_episode_lengths[ii], current_episode_rewards[ii], infos[ii]["spl"], )) eval_stats["spl"][ii] = infos[ii]["spl"] eval_stats["success"][ ii] = eval_stats["spl"][ii] > 0 eval_stats["num_steps"][ ii] = current_episode_lengths[ii] eval_stats["end_geodesic_distance"][ii] = ( infos[ii]["final_distance"] if eval_stats["success"][ii] else distances[ii]) eval_stats["delta_geodesic_distance"][ii] = ( eval_stats["start_geodesic_distance"][ii] - eval_stats["end_geodesic_distance"][ii]) elif self.shell_args.task == "exploration": print( "FINISHED EPISODE %d Reward %.3f States Visited %d" % (num_episodes, current_episode_rewards[ii], infos[ii]["visited_states"])) eval_stats["visited_states"][ii] = infos[ii][ "visited_states"] elif self.shell_args.task == "flee": print( "FINISHED EPISODE %d Reward %.3f Distance from start %.4f" % (num_episodes, current_episode_rewards[ii], infos[ii]["distance_from_start"])) eval_stats["distance_from_start"][ii] = infos[ii][ "distance_from_start"] eval_stats["num_episodes"][ii] += 1 eval_stats["reward"][ii] = current_episode_rewards[ii] if eval_stats["num_episodes"][ii] <= dataset_sizes[ii]: progress_bar.update(1) eval_stats_means["num_episodes"] += 1 eval_stats_means["reward"] += eval_stats["reward"][ ii] if self.shell_args.task == "pointnav": eval_output_file.write( "%s,%d,%f,%f,%d,%f,%f,%f\n" % ( eval_stats["episode_ids"][ii], eval_stats["num_steps"][ii], eval_stats["reward"][ii], eval_stats["spl"][ii], eval_stats["success"][ii], eval_stats["start_geodesic_distance"] [ii], eval_stats["end_geodesic_distance"] [ii], eval_stats["delta_geodesic_distance"] [ii], )) eval_stats_means["num_steps"] += eval_stats[ "num_steps"][ii] eval_stats_means["spl"] += eval_stats["spl"][ ii] eval_stats_means["success"] += eval_stats[ "success"][ii] eval_stats_means[ "start_geodesic_distance"] += eval_stats[ "start_geodesic_distance"][ii] eval_stats_means[ "end_geodesic_distance"] += eval_stats[ "end_geodesic_distance"][ii] eval_stats_means[ "delta_geodesic_distance"] += eval_stats[ "delta_geodesic_distance"][ii] elif self.shell_args.task == "exploration": eval_output_file.write("%s,%f,%d\n" % ( eval_stats["episode_ids"][ii], eval_stats["reward"][ii], eval_stats["visited_states"][ii], )) eval_stats_means[ "visited_states"] += eval_stats[ "visited_states"][ii] elif self.shell_args.task == "flee": eval_output_file.write("%s,%f,%f\n" % ( eval_stats["episode_ids"][ii], eval_stats["reward"][ii], eval_stats["distance_from_start"][ii], )) eval_stats_means[ "distance_from_start"] += eval_stats[ "distance_from_start"][ii] eval_output_file.flush() if eval_stats["num_episodes"][ii] == dataset_sizes[ ii]: to_pause.append(ii) episode_rewards.append(current_episode_rewards[ii]) current_episode_rewards[ii] = 0 episode_lengths.append(current_episode_lengths[ii]) current_episode_lengths[ii] = 0 eval_stats["start_geodesic_distance"][ii] = obs[ "goal_geodesic_distance"][ii] # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in dones]).to(self.device) # Reverse in order to maintain order in case of multiple. to_pause.reverse() for ii in to_pause: # Pause the environments that are done from the vectorenv. print("Pausing env", ii) self.envs.unwrapped.pause_at(ii) current_episode_rewards = np.concatenate( (current_episode_rewards[:ii], current_episode_rewards[ii + 1:])) current_episode_lengths = np.concatenate( (current_episode_lengths[:ii], current_episode_lengths[ii + 1:])) for key in eval_stats: eval_stats[key] = np.concatenate( (eval_stats[key][:ii], eval_stats[key][ii + 1:])) dataset_sizes = np.concatenate( (dataset_sizes[:ii], dataset_sizes[ii + 1:])) for key in obs: if type(obs[key]) == torch.Tensor: obs[key] = torch.cat( (obs[key][:ii], obs[key][ii + 1:]), dim=0) else: obs[key] = np.concatenate( (obs[key][:ii], obs[key][ii + 1:]), axis=0) recurrent_hidden_states = torch.cat( (recurrent_hidden_states[:ii], recurrent_hidden_states[ii + 1:]), dim=0) masks = torch.cat((masks[:ii], masks[ii + 1:]), dim=0) if len(dataset_sizes) == 0: progress_bar.close() all_done = True total_num_steps += self.shell_args.num_processes if iter_count % (self.shell_args.log_interval * 100) == 0: log_dict = {} if len(episode_rewards) > 1: end = time.time() nsteps = total_num_steps - fps_timer[1] fps = int((total_num_steps - fps_timer[1]) / (end - fps_timer[0])) timers /= nsteps env_spf = timers[0] forward_spf = timers[1] print(( "{} Updates {}, num timesteps {}, FPS {}, Env FPS {}, " "\n Last {} training episodes: mean/median reward {:.3f}/{:.3f}, " "min/max reward {:.3f}/{:.3f}\n").format( datetime.datetime.now(), iter_count, total_num_steps, fps, int(1.0 / env_spf), len(episode_rewards), np.mean(episode_rewards), np.median(episode_rewards), np.min(episode_rewards), np.max(episode_rewards), )) if self.shell_args.tensorboard: log_dict.update({ "stats/full_spf": 1.0 / (fps + 1e-10), "stats/env_spf": env_spf, "stats/forward_spf": forward_spf, "stats/full_fps": fps, "stats/env_fps": 1.0 / (env_spf + 1e-10), "stats/forward_fps": 1.0 / (forward_spf + 1e-10), "episode/mean_rewards": np.mean(episode_rewards), "episode/median_rewards": np.median(episode_rewards), "episode/min_rewards": np.min(episode_rewards), "episode/max_rewards": np.max(episode_rewards), "episode/mean_lengths": np.mean(episode_lengths), "episode/median_lengths": np.median(episode_lengths), "episode/min_lengths": np.min(episode_lengths), "episode/max_lengths": np.max(episode_lengths), }) self.eval_logger.dict_log(log_dict, step=self.log_iter) fps_timer[0] = time.time() fps_timer[1] = total_num_steps timers[:] = 0 iter_count += 1 print("Finished testing") print("Wrote results to", eval_output_file.name) eval_stats_means = { key: val / eval_stats_means["num_episodes"] for key, val in eval_stats_means.items() } if self.shell_args.tensorboard: log_dict = {"single_episode/reward": eval_stats_means["reward"]} if self.shell_args.task == "pointnav": log_dict.update({ "single_episode/num_steps": eval_stats_means["num_steps"], "single_episode/spl": eval_stats_means["spl"], "single_episode/success": eval_stats_means["success"], "single_episode/start_geodesic_distance": eval_stats_means["start_geodesic_distance"], "single_episode/end_geodesic_distance": eval_stats_means["end_geodesic_distance"], "single_episode/delta_geodesic_distance": eval_stats_means["delta_geodesic_distance"], }) elif self.shell_args.task == "exploration": log_dict["single_episode/visited_states"] = eval_stats_means[ "visited_states"] elif self.shell_args.task == "flee": log_dict[ "single_episode/distance_from_start"] = eval_stats_means[ "distance_from_start"] self.eval_logger.dict_log(log_dict, step=self.log_iter) self.envs.unwrapped.resume_all()