def play(env, act, stochastic, video_path, clipped, num_trials=10): num_episodes = 0 video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() reward = 0 num_played = 0 rewardArray = [] while num_played < num_trials: env.unwrapped.render() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if clipped: rew = clip_score(rew) reward += rew if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) rewardArray.append(reward) reward = 0 num_played += 1 num_episodes = len(info["rewards"]) return {"Nonclipped": info["rewards"], "Clipped": rewardArray}
class Runner: def __init__(self, args): with open(args.src_filepath, "rb") as f: model = pickle.load(f) env = gym.make(args.env_name) self.model = model self.env = env self.n_iter = args.n_iter self.rec_flag = args.rec self.recorder = VideoRecorder(env, base_path=args.src_filepath) def __call__(self): for i in range(self.n_iter): score = self.get_score() print(score) def close(self): self.recorder.close() self.env.close() def get_score(self): env = self.env obs = env.reset() acc = 0 while True: y = self.model(obs) action = np.random.choice(len(y), p=F.softmax(y)) obs, reward, done, info = env.step(action) acc += reward if self.rec_flag: self.recorder.capture_frame() if done: break return acc
def test_no_frames(): env = BrokenRecordableEnv() rec = VideoRecorder(env) rec.close() assert rec.empty assert rec.functional assert not os.path.exists(rec.path)
def play(env, act, stochastic, video_path, nb_atoms): num_episodes = 0 video_recorder = VideoRecorder( env, video_path, enabled=video_path is not None) obs = env.reset() if args.visual: action_names = dqn_core.actions_from_env(env) plot_machine = PlotMachine(nb_atoms, env.action_space.n, action_names) while True: env.unwrapped.render() video_recorder.capture_frame() action = act(np.array(obs)[None], args.alpha, stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if args.visual: plot_machine.plot_distribution(np.array(obs)[None]) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) num_episodes = len(info["rewards"])
def main(): args = parse_args() env = make_atari(args.env) env = wrap_deepmind(env) # setup the model to process actions for one environment and one step at a time model = acktr_disc.Model(policies.CnnPolicy, env.observation_space, env.action_space, 1, 1) # load the trainable parameters from our trained file model.load(args.model_path) # keep track of the last 4 frames of observations env_width = env.observation_space.shape[0] env_height = env.observation_space.shape[1] obs_history = np.zeros((1, env_width, env_height, 4), dtype=np.uint8) # if we're supposed to show how the model sees the game if args.show_observation: obs = env.reset() import pygame from pygame import surfarray # the default size is too small, scale it up scale_factor = args.scale_factor screen = pygame.display.set_mode((env_width*scale_factor, env_height*scale_factor), 0, 8) # setup a gray palette pygame.display.set_palette(tuple([(i, i, i) for i in range(256)])) # if we're supposed to record video video_path = args.video_path if video_path is not None: video_recorder = VideoRecorder( env, base_path=video_path, enabled=video_path is not None) while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() if args.show_observation: # use the Kronecker product to scale the array up for display, and also transpose x/y axes because pygame # displays as column/row instead of gym's row/column transposed = obs_history[0,:,:,-1].transpose((1,0)) scaled_array = np.uint8(np.kron(transposed, np.ones((scale_factor, scale_factor)))) surfarray.blit_array(screen, scaled_array) pygame.display.flip() if video_path is not None: video_recorder.capture_frame() # add the current observation onto our history list obs_history = np.roll(obs_history, shift=-1, axis=3) obs_history[:, :, :, -1] = obs[None][:, :, :, 0] # get the suggested action for the current observation history action, v, _ = model.step(obs_history) obs, rew, done, info = env.step(action) episode_rew += rew print("Episode reward", episode_rew) # if we're taking video, stop it now and clear video path so no more frames are added if we're out of lives or there are no lives in this game if video_path is not None and ('ale.lives' not in info or info['ale.lives'] == 0): video_path = None video_recorder.close()
def test_record_breaking_render_method(): env = BrokenRecordableEnv() rec = VideoRecorder(env) rec.capture_frame() rec.close() assert rec.empty assert rec.broken assert not os.path.exists(rec.path)
def test_text_envs(): env = gym.make('FrozenLake-v0') video = VideoRecorder(env) try: env.reset() video.capture_frame() video.close() finally: os.remove(video.path)
def play( env, act, craft_adv_obs, craft_adv_obs2, stochastic, video_path, attack, m_target, m_adv, ): num_episodes = 0 num_moves = 0 num_transfer = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() while True: env.unwrapped.render() video_recorder.capture_frame() # V: Attack # if attack is not None: # Craft adv. examples with m_adv.get_session().as_default(): adv_obs = craft_adv_obs(np.array(obs)[None], stochastic_adv=stochastic)[0] with m_target.get_session().as_default(): action = act(np.array(adv_obs)[None], stochastic=stochastic)[0] action2 = act(np.array(obs)[None], stochastic=stochastic)[0] num_moves += 1 if action != action2: num_transfer += 1 else: # Normal action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print("Reward: " + str(info["rewards"][-1])) num_episodes = len(info["rewards"]) print("Episode: " + str(num_episodes)) success = float(num_transfer / num_moves) * 100.0 print("Percentage of successful attacks: " + str(success)) num_moves = 0 num_transfer = 0
def test_record_simple(): env = gym.make("CartPole-v1") rec = VideoRecorder(env) env.reset() rec.capture_frame() rec.close() assert not rec.empty assert not rec.broken assert os.path.exists(rec.path) f = open(rec.path) assert os.fstat(f.fileno()).st_size > 100
def play(env, act, craft_adv_obs, stochastic, video_path, game_name, attack, defense): if defense == 'foresight': vf, game_screen_mean = load_visual_foresight(game_name) pred_obs = deque(maxlen=4) num_episodes = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) t = 0 obs = env.reset() while True: #env.unwrapped.render() video_recorder.capture_frame() # Attack if craft_adv_obs != None: # Craft adv. examples adv_obs = craft_adv_obs(np.array(obs)[None], stochastic=stochastic)[0] action = act(np.array(adv_obs)[None], stochastic=stochastic)[0] else: # Normal action = act(np.array(obs)[None], stochastic=stochastic)[0] # Defense if t > 4 and defense == 'foresight': pred_obs.append( foresee(U.get_session(), old_obs, old_action, np.array(obs), game_screen_mean, vf, env.action_space.n, t)) if len(pred_obs) == 4: action = act(np.stack(pred_obs, axis=2)[None], stochastic=stochastic)[0] old_obs = obs old_action = action # RL loop obs, rew, done, info = env.step(action) t += 1 if done: t = 0 obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) num_episodes = len(info["rewards"])
def play(env, act, craft_adv_obs, craft_adv_obs2, stochastic, video_path, attack, m_target, m_adv): num_episodes = 0 num_moves = 0 num_transfer = 0 video_recorder = None video_recorder = VideoRecorder( env, video_path, enabled=video_path is not None) obs = env.reset() while True: env.unwrapped.render() video_recorder.capture_frame() # V: Attack # if attack is not None: # Craft adv. examples with m_adv.get_session().as_default(): adv_obs = \ craft_adv_obs(np.array(obs)[None], stochastic_adv=stochastic)[0] with m_target.get_session().as_default(): action = act(np.array(adv_obs)[None], stochastic=stochastic)[0] action2 = act(np.array(obs)[None], stochastic=stochastic)[0] num_moves += 1 if action != action2: num_transfer += 1 else: # Normal action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print('Reward: ' + str(info["rewards"][-1])) num_episodes = len(info["rewards"]) print('Episode: ' + str(num_episodes)) success = float(num_transfer / num_moves) * 100.0 print("Percentage of successful attacks: " + str(success)) num_moves = 0 num_transfer = 0
def render(env, recorde=False): if recorde: rec = VideoRecorder(env) else: rec = None mean_reward = 0.0 mean_traj_reward = 0.0 max_run_time = 0.0 min_run_time = 1e+10 mean_run_time = 0.0 for i in range(5): total_reward = 0.0 traj_total_reward = 0.0 idx = 0 done = False obs = env.reset() while done == False: env.render() x = np.reshape(obs, [1, -1]) pred = rl_model.run(x, None) action = np.argmax(pred) obs, _, done, info = env.step(action) total_reward += _ traj_total_reward += hc_model.predict(obs.reshape([1, -1])) if rec != None: rec.capture_frame() idx += 1 if done or idx > 300: if idx > max_run_time: max_run_time = idx elif idx < min_run_time: min_run_time = idx mean_run_time += idx mean_reward += total_reward mean_traj_reward += traj_total_reward break if rec != None: rec.close() print "[ RunLength =",5," MeanReward =",mean_reward / 5.0, "MeantrajReward =",mean_traj_reward/5.0,\ " MeanRunTime =",mean_run_time / 5.0, " MaxRunTime =",max_run_time," MinRunTime =",min_run_time,"]"
def play(env, act, stochastic, video_path): num_episodes = 0 video_recorder = None video_recorder = VideoRecorder( env, video_path, enabled=video_path is not None) obs = env.reset() while True: env.unwrapped.render() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) num_episodes = len(info["rewards"])
def test_record_unrecordable_method(): env = UnrecordableEnv() rec = VideoRecorder(env) assert not rec.enabled rec.close()
batch_size=BATCH_SIZE, shuffle=True) match_env(env_real, env_sim) video_recorder = VideoRecorder(env_real, 'real.mp4', enabled=True) video_recorder2 = VideoRecorder(env_sim, 'sim.mp4', enabled=True) for i, data in enumerate(dataloader_train): for j in range(50): env_sim.render() env_real.render() action = data["actions"][0, j].numpy() video_recorder.capture_frame() video_recorder2.capture_frame() obs_real, _, _, _ = env_real.step(action.copy()) obs_simp, _, _, _ = env_sim.step(action.copy()) env_real.reset() env_sim.reset() match_env(env_real, env_sim) if i == 10: break video_recorder.close() video_recorder.enabled = False video_recorder2.close() video_recorder2.enabled = False
utils.to_bin(pole_angle, pole_angle_bins), utils.to_bin(angle_rate_of_change, angle_rate_bins) ]) if not (done): #qlearn.learn(state, action, reward, nextState) state = nextState rewards_sum += reward else: # Q-learn stuff reward = -200 rewards_sum += reward break print(rewards_sum) test_reward.append(rewards_sum) record.close() print("Test Meaning_value: {}".format(np.mean(test_reward))) print("Test std: {}".format(np.std(test_reward))) # print("length: ") print("Test finished...") x = np.asarray(x, dtype=int) reward_y = np.asarray(reward_y, dtype=float) plt.figure() plt.plot(x, reward_y) plt.savefig('cartpole.png') plt.show() trajectory = last_time_steps.tolist()
class VideoWrapper(Wrapper): """Creates videos from wrapped environment by called render after each timestep.""" def __init__(self, env, directory, single_video=True): """ :param env: (gym.Env) the wrapped environment. :param directory: the output directory. :param single_video: (bool) if True, generates a single video file, with episodes concatenated. If False, a new video file is created for each episode. Usually a single video file is what is desired. However, if one is searching for an interesting episode (perhaps by looking at the metadata), saving to different files can be useful. """ super(VideoWrapper, self).__init__(env) self.episode_id = 0 self.video_recorder = None self.single_video = single_video self.directory = osp.abspath(directory) # Make sure to not put multiple different runs in the same directory, # if the directory already exists error_msg = "You're trying to use the same directory twice, " \ "this would result in files being overwritten" assert not os.path.exists(self.directory), error_msg os.makedirs(self.directory, exist_ok=True) def _step(self, action): obs, rew, done, info = self.env.step(action) if done: winners = [i for i, d in info.items() if 'winner' in d] metadata = {'winners': winners} self.video_recorder.metadata.update(metadata) self.video_recorder.capture_frame() return obs, rew, done, info def _reset(self): self._reset_video_recorder() self.episode_id += 1 return self.env.reset() def _reset_video_recorder(self): """Called at the start of each episode (by _reset). Always creates a video recorder if one does not already exist. When a video recorder is already present, it will only create a new one if `self.single_video == False`.""" if self.video_recorder is not None: # Video recorder already started. if not self.single_video: # We want a new video for each episode, so destroy current recorder. self.video_recorder.close() self.video_recorder = None if self.video_recorder is None: # No video recorder -- start a new one. self.video_recorder = VideoRecorder( env=self.env, base_path=osp.join(self.directory, 'video.{:06}'.format(self.episode_id)), metadata={'episode_id': self.episode_id}, ) def _close(self): if self.video_recorder is not None: self.video_recorder.close() self.video_recorder = None super(VideoWrapper, self)._close()