def keep_shortest_trajectories(self, demos_file, num_to_keep, episode_lengths=None): # Keep num_to_keep shortest trajectories in the dataset at demos_file. if episode_lengths is None: episode_lengths = [] with gfile.GFile(demos_file, 'rb') as f: while True: try: demo = pickle.load(f) episode_lengths.append(len(demo['actions'])) except EOFError: break sorted_lengths = sorted(enumerate(episode_lengths), key=operator.itemgetter(1)) included_trajectories = set( [e[0] for e in sorted_lengths[:num_to_keep]]) print('Keeping', len(included_trajectories), 'trajectories') all_demos_file = (demos_file.replace(f'e{num_to_keep}', '').replace('.pkl', 'all.pkl')) gfile.Rename(demos_file, all_demos_file) new_demo_writer = pickle_dataset.DemoWriter(demos_file) i = 0 with gfile.GFile(all_demos_file, 'rb') as f: while True: try: demo = pickle.load(f) if i in included_trajectories: new_demo_writer.write_episode(demo['observations'], demo['actions']) i += 1 except EOFError: break
def keep_latest_trajectories(self, demos_file, num_to_keep): # Keep num_to_keep shortest trajectories in the dataset at demos_file. print(demos_file) all_demos_file = (demos_file.replace(f'e{num_to_keep}', '').replace('.pkl', 'all.pkl')) print(all_demos_file) gfile.Rename(demos_file, all_demos_file) last_demos = [] with gfile.GFile(all_demos_file, 'rb') as f: while True: try: demo = pickle.load(f) last_demos.append(demo) last_demos = last_demos[:num_to_keep] except EOFError: break new_demo_writer = pickle_dataset.DemoWriter(demos_file) for demo in last_demos: new_demo_writer.write_episode(demo['observations'], demo['actions'])
def main(_): task = FLAGS.task env = gym.make(f'visual-{task}-v0') topdir = FLAGS.top_dir in_path = os.path.join(topdir, FLAGS.in_dir, f'{task}-v0_demos.pickle') with gfile.GFile(in_path, 'rb') as f: dataset = pickle.load(f) out_path = os.path.join(topdir, FLAGS.out_dir, f'{task}-v0_demos.pickle') writer = pickle_dataset.DemoWriter(out_path, compress=FLAGS.compress_images) old_time = time.time() num_demos = FLAGS.max_demos_to_include or len(dataset) for d in range(num_demos): env.reset() demo = dataset[d] observations, actions = get_observations_for_demo(env, demo) writer.write_episode(observations, actions) new_time = time.time() print(f'{d + 1} / {num_demos}', new_time - old_time, 's') old_time = new_time
def env_loop(env, agent, num_episodes, log_path, record_failed, seed, increment_seed, compress_images=True): """Loop for collecting demonstrations with an agent in a Gym environment.""" if log_path is None: log_f = None success_f = None demo_writer = None else: log_f = gfile.GFile(log_path + '_log.txt', 'w') success_f = gfile.GFile(log_path + '_success.txt', 'w') demo_writer = pickle_dataset.DemoWriter(log_path + '.pkl', compress_images) print('Writing demos to', log_path + '.pkl') e = 0 # Counter to keep track of seed offset, if not recording failed episodes. skipped_seeds = 0 num_successes = 0 num_attempts = 0 min_reward, max_reward = np.inf, -np.inf while e < num_episodes: if e % 10 == 0 and e > 0: print(f'Episode {e} / {num_episodes}; ' f'Success rate {num_successes} / {num_attempts}') if increment_seed: env.seed(seed + skipped_seeds + e) obs = env.reset() done = False _, agent_info = agent.get_action(obs['original_obs']) action = agent_info['evaluation'] observations = [] actions = [] rewards = [] # For envs with non-Markovian success criteria, track required fields. goals_achieved = [] while not done: observations.append(obs) actions.append(action) obs, reward, done, info = env.step(action) rewards.append(reward) min_reward = min(min_reward, reward) max_reward = max(max_reward, reward) _, agent_info = agent.get_action(obs['original_obs']) action = agent_info['evaluation'] if 'goal_achieved' in info: goals_achieved.append(info['goal_achieved']) # Environment defines success criteria based on full episode. success_percentage = env.evaluate_success([{ 'env_infos': { 'goal_achieved': goals_achieved } }]) success = bool(success_percentage) num_successes += int(success) num_attempts += 1 if success: print(f'{e}: success') if log_f is not None: log_f.write(f'{e}: success\n') log_f.flush() if success_f is not None: success_f.write('success\n') success_f.flush() else: if 'TimeLimit.truncated' in info and info['TimeLimit.truncated']: print(f'{e}: failure: time limit') else: print(f'{e}: failure') if log_f is not None: if 'TimeLimit.truncated' in info and info[ 'TimeLimit.truncated']: log_f.write(f'{e}: failure: time limit \n') else: log_f.write(f'{e}: failure\n') log_f.flush() if success_f is not None: success_f.write('failure\n') success_f.flush() if success or record_failed: e += 1 if demo_writer is not None: demo_writer.write_episode(observations, actions, rewards) elif not record_failed: skipped_seeds += 1 print(f'Done; Success rate {num_successes} / {num_attempts}') print('min reward', min_reward) print('max reward', max_reward) if log_f is not None: log_f.write(f'Done; Success rate {num_successes} / {num_attempts}\n') log_f.write(f'min reward {min_reward}\n') log_f.write(f'max reward {max_reward}\n') log_f.close()
def compress_dataset(demos_file, new_demos_file): dataset = pickle_dataset.DemoReader(path=demos_file) writer = pickle_dataset.DemoWriter(path=new_demos_file) for obs, act in zip(dataset.observations, dataset.actions): writer.write_episode(obs, act)
def eval_policy(self, num_episodes, trained_steps=None, collapse_policy=True, eval_path=None, num_videos_to_save=0, max_num_steps=None, seed=None, increment_seed=False, stop_if_stuck=False): """Evaluate policy on env for num_episodes episodes.""" if FLAGS.domain == 'mime': self._eval_environment.create_env() if not increment_seed and seed is not None: self._eval_environment.env.seed(seed) if self._cam_eval_environment is not None: self._cam_eval_environment.env.seed(seed) num_successes = 0 action_logger = ActionLogger(self._environment.action_spec()) if max_num_steps is None: max_num_steps = self._eval_environment.default_max_episode_steps if eval_path is None: log_f = None success_f = None episode_length_f = None eval_writer = None else: if not gfile.exists(os.path.dirname(eval_path)): gfile.makedirs(os.path.dirname(eval_path)) collapse_str = 'c' if collapse_policy else '' stuck_str = 's' if stop_if_stuck else '' eval_summary_path = eval_path + f'_all{collapse_str}{stuck_str}' eval_path = eval_path + f'_{trained_steps}{collapse_str}{stuck_str}' log_f = gfile.GFile(eval_path + '_log.txt', 'w') success_f = gfile.GFile(eval_path + '_success.txt', 'w') episode_length_f = gfile.GFile(eval_path + '_lengths.txt', 'w') eval_writer = pickle_dataset.DemoWriter(eval_path + '.pkl') actions_path = eval_path + '_actions.pkl' if gfile.exists(actions_path): gfile.Remove(actions_path) for e in range(num_episodes): rewards = [] if increment_seed and seed is not None: self._eval_environment.env.seed(seed + e) if self._cam_eval_environment is not None: self._cam_eval_environment.env.seed(seed + e) if e % 10 == 0 and e > 0: success_rate = num_successes / e * 100 print( f'Episode {e} / {num_episodes}; Success rate {num_successes} / ' f'{e} ({success_rate:.4f}%)') if (e < num_videos_to_save and eval_writer is not None and self._cam_eval_environment is not None): environment = self._cam_eval_environment self._eval_environment.reset( ) # Keep both environments in same state. print(f'eval episode {e}: using cam env') else: environment = self._eval_environment if self._cam_eval_environment is not None: # Keep both environments in same state. self._cam_eval_environment.reset() print(f'eval episode {e}: using non-cam env') timestep = environment.reset() observations = [] actions = [] step_count = 0 if FLAGS.base_controller is not None: # Reset script for each episode. self._actor.base_controller = ScriptAgent( environment.env, FLAGS.base_controller) while not timestep.last(): acme_obs, _, norm_base_act = self._actor.get_acme_observation( timestep.observation) action, base_action, residual_action, _, _, _, _ = ( self._actor.select_action(acme_obs, norm_base_act, timestep.observation, add_exploration=False, collapse=collapse_policy)) observations.append(timestep.observation) actions.append(self._actor.flat_action_to_dict(action)) self._log_action(action_logger, (action, base_action, residual_action)) next_timestep = environment.step(action) info = environment.info_from_observation( next_timestep.observation) timestep = next_timestep rewards.append(timestep.reward) step_count += 1 discounted_returns = [rewards[-1]] for r in reversed(rewards[:-1]): discounted_returns.append(r + FLAGS.discount * discounted_returns[-1]) self.min_discounted = min(self.min_discounted, np.min(discounted_returns)) self.max_discounted = max(self.max_discounted, np.max(discounted_returns)) print('discounted episode return range:' f'[{self.min_discounted}, {self.max_discounted}]') if info['success']: print(f'{e}: success') if log_f is not None: log_f.write(f'{e}: success\n') log_f.flush() if success_f is not None: success_f.write('success\n') success_f.flush() num_successes += 1 else: if 'failure_message' in info: print(f'{e}: failure:', info['failure_message']) elif step_count >= max_num_steps or timestep.last(): print(f'{e}: failure: time limit') else: print(f'{e}: failure') if log_f is not None: if 'failure_message' in info: log_f.write(f'{e}: failure:' + info['failure_message'] + '\n') elif step_count >= max_num_steps or timestep.last(): log_f.write(f'{e}: failure: time limit \n') else: log_f.write(f'{e}: failure\n') log_f.flush() if success_f is not None: success_f.write('failure\n') success_f.flush() if episode_length_f is not None: episode_length_f.write(str(step_count) + '\n') episode_length_f.flush() if e < num_videos_to_save and eval_writer is not None: eval_writer.write_episode(observations, actions) action_logger.append_to_pickle(actions_path) success_rate = num_successes / num_episodes * 100 print(f'Done; Success rate {num_successes} / {num_episodes} ' f'({success_rate:.4f}%)') if log_f is not None: log_f.write(f'Done; Success rate {num_successes} / {num_episodes} ' f'({success_rate:.4f}%)\n') log_f.close() csv_writer = csv.writer( gfile.GFile(eval_summary_path + '_success_rates.csv', 'a')) csv_writer.writerow([trained_steps, num_successes / num_episodes]) return num_successes / num_episodes, True
record_count = i % log_frames_freq while num_episodes is None or i < num_episodes: rewards = [] episode_steps = 0 episode_return = 0 prev_raw_residual = None prev_residual_exploration = False # For envs with non-Markovian success criteria, track required fields. if i % log_frames_freq == 0: record_count = 0 first_to_record = i last_to_record = i + num_episodes_to_log - 1 if out_dir is not None: demo_writer = pickle_dataset.DemoWriter( os.path.join( out_dir, 'episodes', f'episodes_{first_to_record}-{last_to_record}.pkl') ) if record_count < num_episodes_to_log: # Log frames for current episode. if self._cam_environment is None: environment = self._environment else: environment = self._cam_environment self._environment.reset( ) # Keep both environments in same state. print(f'episode {i}: using cam env') else: # Do not log frames for current episode. environment = self._environment if self._cam_environment is not None: self._cam_environment.reset( ) # Keep both environments in same state.
def eval_policy(env, seed, increment_seed, agent, num_episodes, eval_path=None, num_videos_to_save=0, summary_writer=None, summary_key='', stop_if_stuck=False, verbose=False): """Evaluate policy on env for num_episodes episodes.""" num_successes = 0 success_rates = {} if eval_path is None: log_f = None success_f = None episode_length_f = None eval_writer = None else: log_f = gfile.GFile(eval_path + '_log.txt', 'w') success_f = gfile.GFile(eval_path + '_success.txt', 'w') episode_length_f = gfile.GFile(eval_path + '_lengths.txt', 'w') eval_writer = pickle_dataset.DemoWriter(eval_path + '.pkl') if not increment_seed: env.seed(seed) hand_vil_episodes = None if FLAGS.hand_vil_episodes_path is not None: with gfile.GFile(FLAGS.hand_vil_episodes_path, 'rb') as f: hand_vil_episodes = pickle.load(f) hand_vil_actions = hand_vil_episodes['actions'] hand_vil_images = hand_vil_episodes['rgb'] hand_vil_robot_info = [ separate_episode_robot_info(e_infos, agent) for e_infos in hand_vil_episodes['env_infos'] ] for e in range(num_episodes): if e % 10 == 0 and e > 0: success_rate = num_successes / e if verbose: print( f'Episode {e} / {num_episodes}; Success rate {num_successes} / ' f'{e} ({success_rate * 100:.4f}%)') if (e % 100 == 0 and e > 0) or e == num_episodes - 1: success_rates[e] = success_rate log_success_rate(e, success_rate, summary_writer, summary_key) if increment_seed: env.seed(seed) seed += 1 obs = env.reset() done = False observations = [] actions = [] step_count = 0 prev_stacked_obs = None # For envs with non-Markovian success criteria, track required fields. goals_achieved = [] while not done: if hand_vil_episodes is not None: obs = hand_vil_robot_info[e][step_count] obs['rgb'] = hand_vil_images[e][step_count] action, stacked_obs = agent.get_action(obs, observations, env, return_stacked_obs=True) if hand_vil_episodes is not None: if not np.allclose( action, hand_vil_actions[e][step_count], atol=5e-6): raise ValueError( 'Actions from agent and from trajectory diverge: ' f'{action} vs {hand_vil_actions[e][step_count]}') if prev_stacked_obs is not None and stop_if_stuck: prev_img, prev_signals = prev_stacked_obs # pylint: disable=unpacking-non-sequence img, signals = stacked_obs obs_stuck = np.all(np.equal(img, prev_img)) # Note: target position has even higher noise. signals_stuck = np.all(np.isclose(signals, prev_signals)) act_stuck = equal_actions(action, actions[-1]) if obs_stuck and signals_stuck and act_stuck: info[ 'failure_message'] = 'Stuck' or info['failure_message'] break prev_stacked_obs = stacked_obs observations.append(obs) actions.append(action) obs, unused_reward, done, info = env.step(action) step_count += 1 if (hand_vil_episodes is not None and step_count >= len(hand_vil_robot_info[e])): print('episode ends at', step_count, 'done =', done) if 'goal_achieved' in info: # Environment defines success criteria based on full episode. goals_achieved.append(info['goal_achieved']) success_percentage = env.evaluate_success([{ 'env_infos': { 'goal_achieved': goals_achieved } }]) success = bool(success_percentage) done = done or success else: success = False if verbose: print(step_count, info) # Success is directly exposed in environment info. success = success or ('success' in info and info['success']) num_successes += int(success) if success: if verbose: print(f'{e}: success') if log_f is not None: log_f.write(f'{e}: success\n') log_f.flush() if success_f is not None: success_f.write('success\n') success_f.flush() else: if verbose: if 'failure_message' in info: print(f'{e}: failure:', info['failure_message']) elif 'TimeLimit.truncated' in info and info[ 'TimeLimit.truncated']: print(f'{e}: failure: time limit') else: print(f'{e}: failure') if log_f is not None: if 'failure_message' in info: log_f.write(f'{e}: failure: ' + info['failure_message'] + '\n') elif 'TimeLimit.truncated' in info and info[ 'TimeLimit.truncated']: log_f.write(f'{e}: failure: time limit \n') else: log_f.write(f'{e}: failure\n') log_f.flush() if success_f is not None: success_f.write('failure\n') success_f.flush() if episode_length_f is not None: # TODO(minttu): Save env infos for later. episode_length_f.write(str(step_count) + '\n') episode_length_f.flush() if e < num_videos_to_save and eval_writer is not None: eval_writer.write_episode(observations, actions) success_rate = num_successes / num_episodes success_rates[num_episodes] = success_rate log_success_rate(num_episodes, success_rate, summary_writer, summary_key) print(f'Done; Success rate {num_successes} / {num_episodes} ' f'({success_rate * 100:.4f}%)') if log_f is not None: log_f.write(f'Done; Success rate {num_successes} / {num_episodes} ' f'({success_rate * 100:.4f}%)\n') log_f.close() return success_rates
def env_loop(env, add_noise, num_episodes, log_path, record_failed, stop_early, seed, increment_seed, compress_images): """Loop for collecting demos with a scripted agent in a Mime environment.""" if log_path is None: log_f = None success_f = None demo_writer = None else: log_f = gfile.GFile(log_path + '_log.txt', 'w') success_f = gfile.GFile(log_path + '_success.txt', 'w') demo_writer = pickle_dataset.DemoWriter(log_path + '.pkl', compress_images) print('Writing demos to', log_path + '.pkl') e = 0 # Counter to keep track of seed offset, if not recording failed episodes. skipped_seeds = 0 num_successes = 0 num_attempts = 0 while e < num_episodes: if e % 10 == 0 and e > 0: print(f'Episode {e} / {num_episodes}; ' f'Success rate {num_successes} / {num_attempts}') if increment_seed: env.seed(seed + skipped_seeds + e) obs = env.reset() # To define a different script, use forked version of mime. # agent = ScriptAgent(env, FLAGS.script_type) agent = ScriptAgent(env) done = False action = agent.get_action() if add_noise: make_noised(action) observations = [] actions = [] while (not (stop_early and done)) and action is not None: observations.append(obs) actions.append(action) obs, unused_reward, done, info = env.step(action) action = agent.get_action() if add_noise and action is not None: make_noised(action) if info['success']: print(f'{num_attempts}: success') if log_f is not None: log_f.write(f'{num_attempts}: success' + '\n') log_f.flush() if success_f is not None: success_f.write('success\n') success_f.flush() num_successes += 1 else: if action is None: info['failure_message'] = 'End of Script.' print(f'{num_attempts}: failure:', info['failure_message']) if log_f is not None: log_f.write( f'{num_attempts}: failure: ' + info['failure_message'] + '\n') log_f.flush() if success_f is not None: success_f.write('failure\n') success_f.flush() num_attempts += 1 if info['success'] or record_failed: e += 1 if demo_writer is not None: demo_writer.write_episode(observations, actions) elif not record_failed: skipped_seeds += 1 print(f'Done; Success rate {num_successes} / {num_attempts}') if log_f is not None: log_f.write(f'Done; Success rate {num_successes} / {num_attempts}\n') log_f.close()