Exemplo n.º 1
0
class Runner:
    def __init__(self, args):
        with open(args.src_filepath, "rb") as f:
            model = pickle.load(f)
        env = gym.make(args.env_name)
        self.model = model
        self.env = env
        self.n_iter = args.n_iter
        self.rec_flag = args.rec
        self.recorder = VideoRecorder(env, base_path=args.src_filepath)

    def __call__(self):
        for i in range(self.n_iter):
            score = self.get_score()
            print(score)

    def close(self):
        self.recorder.close()
        self.env.close()

    def get_score(self):
        env = self.env
        obs = env.reset()
        acc = 0
        while True:
            y = self.model(obs)
            action = np.random.choice(len(y), p=F.softmax(y))
            obs, reward, done, info = env.step(action)
            acc += reward
            if self.rec_flag:
                self.recorder.capture_frame()
            if done:
                break
        return acc
Exemplo n.º 2
0
def play(env, act, stochastic, video_path):
    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)
    obs = env.reset()
    X = []
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()
        action = act(np.array(obs)[None], stochastic=stochastic)[0]
        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print("Score in this episode: " + str(info["rewards"][-1]))
            X.append(info["rewards"][-1])
            num_episodes = len(info["rewards"])
            print("Average Score so far: " + str(sum(X) / float(num_episodes)))
Exemplo n.º 3
0
def play(env, act, stochastic, video_path):
    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)
    obs = env.reset()
    if args.show_observation:
        fig = plt.figure()
        im = plt.imshow(obs._frames[-1].reshape((84, 84)), cmap='Greys')
        plt.show(False)
    while True:
        env.unwrapped.render()
        if args.show_observation:
            im.set_data(obs._frames[-1].reshape((84, 84)))
            fig.canvas.draw()
        video_recorder.capture_frame()
        action = act(np.array(obs)[None], stochastic=stochastic)[0]
        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])
Exemplo n.º 4
0
def play(env, act, stochastic, video_path, clipped, num_trials=10):
    num_episodes = 0
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)
    obs = env.reset()
    reward = 0
    num_played = 0
    rewardArray = []
    while num_played < num_trials:
        env.unwrapped.render()
        video_recorder.capture_frame()
        action = act(np.array(obs)[None], stochastic=stochastic)[0]
        obs, rew, done, info = env.step(action)
        if clipped:
            rew = clip_score(rew)
        reward += rew
        if done:
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            rewardArray.append(reward)
            reward = 0
            num_played += 1
            num_episodes = len(info["rewards"])
    return {"Nonclipped": info["rewards"], "Clipped": rewardArray}
Exemplo n.º 5
0
def play(env, act, stochastic, video_path):
    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)
    obs = env.reset()
    if args.visual:
        action_names = distdeepq.actions_from_env(env)
        plot_machine = distdeepq.PlotMachine(dist_params, env.action_space.n,
                                             action_names)
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()
        action = act(np.array(obs)[None], stochastic=stochastic)[0]
        obs, rew, done, info = env.step(action)
        if args.visual:
            plot_machine.plot_distribution(np.array(obs)[None])

        if done:
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])
Exemplo n.º 6
0
def main():
    args = parse_args()
    
    env = make_atari(args.env)
    env = wrap_deepmind(env)
    
    # setup the model to process actions for one environment and one step at a time
    model = acktr_disc.Model(policies.CnnPolicy, env.observation_space, env.action_space, 1, 1)
    # load the trainable parameters from our trained file
    model.load(args.model_path)
    # keep track of the last 4 frames of observations
    env_width = env.observation_space.shape[0]
    env_height = env.observation_space.shape[1]
    obs_history = np.zeros((1, env_width, env_height, 4), dtype=np.uint8)

    # if we're supposed to show how the model sees the game
    if args.show_observation:
        obs = env.reset()
        import pygame
        from pygame import surfarray
        # the default size is too small, scale it up
        scale_factor = args.scale_factor
        screen = pygame.display.set_mode((env_width*scale_factor, env_height*scale_factor), 0, 8)
        # setup a gray palette
        pygame.display.set_palette(tuple([(i, i, i) for i in range(256)]))
        
    # if we're supposed to record video
    video_path = args.video_path
    if video_path is not None:
        video_recorder = VideoRecorder(
        env, base_path=video_path, enabled=video_path is not None)
        
    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            if args.show_observation:
                # use the Kronecker product to scale the array up for display, and also transpose x/y axes because pygame
                # displays as column/row instead of gym's row/column
                transposed = obs_history[0,:,:,-1].transpose((1,0))
                scaled_array = np.uint8(np.kron(transposed, np.ones((scale_factor, scale_factor))))
                surfarray.blit_array(screen, scaled_array)
                pygame.display.flip()
            if video_path is not None:
                video_recorder.capture_frame()
            # add the current observation onto our history list
            obs_history = np.roll(obs_history, shift=-1, axis=3)
            obs_history[:, :, :, -1] = obs[None][:, :, :, 0]
            # get the suggested action for the current observation history
            action, v, _ = model.step(obs_history)
            
            obs, rew, done, info = env.step(action)
            episode_rew += rew
        print("Episode reward", episode_rew)
        # if we're taking video, stop it now and clear video path so no more frames are added if we're out of lives or there are no lives in this game
        if video_path is not None and ('ale.lives' not in info or info['ale.lives'] == 0):
            video_path = None
            video_recorder.close()
Exemplo n.º 7
0
def test_record_breaking_render_method():
    env = BrokenRecordableEnv()
    rec = VideoRecorder(env)
    rec.capture_frame()
    rec.close()
    assert rec.empty
    assert rec.broken
    assert not os.path.exists(rec.path)
def test_record_breaking_render_method():
    env = BrokenRecordableEnv()
    rec = VideoRecorder(env)
    rec.capture_frame()
    rec.close()
    assert rec.empty
    assert rec.broken
    assert not os.path.exists(rec.path)
Exemplo n.º 9
0
def play(
    env,
    act,
    craft_adv_obs,
    craft_adv_obs2,
    stochastic,
    video_path,
    attack,
    m_target,
    m_adv,
):
    num_episodes = 0
    num_moves = 0
    num_transfer = 0

    video_recorder = None
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)
    obs = env.reset()
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()

        # V: Attack #
        if attack is not None:
            # Craft adv. examples
            with m_adv.get_session().as_default():
                adv_obs = craft_adv_obs(np.array(obs)[None],
                                        stochastic_adv=stochastic)[0]
            with m_target.get_session().as_default():
                action = act(np.array(adv_obs)[None], stochastic=stochastic)[0]
                action2 = act(np.array(obs)[None], stochastic=stochastic)[0]
                num_moves += 1
                if action != action2:
                    num_transfer += 1
        else:
            # Normal
            action = act(np.array(obs)[None], stochastic=stochastic)[0]

        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()

        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print("Reward: " + str(info["rewards"][-1]))
            num_episodes = len(info["rewards"])
            print("Episode: " + str(num_episodes))
            success = float(num_transfer / num_moves) * 100.0
            print("Percentage of successful attacks: " + str(success))
            num_moves = 0
            num_transfer = 0
Exemplo n.º 10
0
def test_text_envs():
    env = gym.make('FrozenLake-v0')
    video = VideoRecorder(env)
    try:
        env.reset()
        video.capture_frame()
        video.close()
    finally:
        os.remove(video.path)
Exemplo n.º 11
0
def test_text_envs():
    env = gym.make('FrozenLake-v0')
    video = VideoRecorder(env)
    try:
        env.reset()
        video.capture_frame()
        video.close()
    finally:
        os.remove(video.path)
Exemplo n.º 12
0
def play(env, act, craft_adv_obs, stochastic, video_path, game_name, attack,
         defense):
    if defense == 'foresight':
        vf, game_screen_mean = load_visual_foresight(game_name)
        pred_obs = deque(maxlen=4)

    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)

    t = 0
    obs = env.reset()
    while True:
        #env.unwrapped.render()
        video_recorder.capture_frame()

        # Attack
        if craft_adv_obs != None:
            # Craft adv. examples
            adv_obs = craft_adv_obs(np.array(obs)[None],
                                    stochastic=stochastic)[0]
            action = act(np.array(adv_obs)[None], stochastic=stochastic)[0]
        else:
            # Normal
            action = act(np.array(obs)[None], stochastic=stochastic)[0]

# Defense
        if t > 4 and defense == 'foresight':
            pred_obs.append(
                foresee(U.get_session(), old_obs, old_action, np.array(obs),
                        game_screen_mean, vf, env.action_space.n, t))
            if len(pred_obs) == 4:
                action = act(np.stack(pred_obs, axis=2)[None],
                             stochastic=stochastic)[0]

        old_obs = obs
        old_action = action

        # RL loop
        obs, rew, done, info = env.step(action)
        t += 1
        if done:
            t = 0
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])
Exemplo n.º 13
0
def test_record_simple():
    env = gym.make("CartPole-v1")
    rec = VideoRecorder(env)
    env.reset()
    rec.capture_frame()
    rec.close()
    assert not rec.empty
    assert not rec.broken
    assert os.path.exists(rec.path)
    f = open(rec.path)
    assert os.fstat(f.fileno()).st_size > 100
Exemplo n.º 14
0
def test_record_simple():
    env = gym.make("CartPole-v1")
    rec = VideoRecorder(env)
    env.reset()
    rec.capture_frame()
    rec.close()
    assert not rec.empty
    assert not rec.broken
    assert os.path.exists(rec.path)
    f = open(rec.path)
    assert os.fstat(f.fileno()).st_size > 100
Exemplo n.º 15
0
def play(env, act, craft_adv_obs, craft_adv_obs2, stochastic, video_path,
         attack, m_target, m_adv):
    num_episodes = 0
    num_moves = 0
    num_transfer = 0

    video_recorder = None
    video_recorder = VideoRecorder(
        env, video_path, enabled=video_path is not None)
    obs = env.reset()
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()

        # V: Attack #
        if attack is not None:
            # Craft adv. examples
            with m_adv.get_session().as_default():
                adv_obs = \
                    craft_adv_obs(np.array(obs)[None],
                                  stochastic_adv=stochastic)[0]
            with m_target.get_session().as_default():
                action = act(np.array(adv_obs)[None],
                             stochastic=stochastic)[0]
                action2 = act(np.array(obs)[None], stochastic=stochastic)[0]
                num_moves += 1
                if action != action2:
                    num_transfer += 1
        else:
            # Normal
            action = act(np.array(obs)[None], stochastic=stochastic)[0]

        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()

        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print('Reward: ' + str(info["rewards"][-1]))
            num_episodes = len(info["rewards"])
            print('Episode: ' + str(num_episodes))
            success = float(num_transfer / num_moves) * 100.0
            print("Percentage of successful attacks: " + str(success))
            num_moves = 0
            num_transfer = 0
Exemplo n.º 16
0
def render(env, recorde=False):
    if recorde:
        rec = VideoRecorder(env)
    else:
        rec = None

    mean_reward = 0.0
    mean_traj_reward = 0.0
    max_run_time = 0.0
    min_run_time = 1e+10
    mean_run_time = 0.0
    for i in range(5):
        total_reward = 0.0
        traj_total_reward = 0.0

        idx = 0
        done = False
        obs = env.reset()
        while done == False:
            env.render()
            x = np.reshape(obs, [1, -1])
            pred = rl_model.run(x, None)
            action = np.argmax(pred)

            obs, _, done, info = env.step(action)
            total_reward += _
            traj_total_reward += hc_model.predict(obs.reshape([1, -1]))

            if rec != None:
                rec.capture_frame()

            idx += 1
            if done or idx > 300:

                if idx > max_run_time:
                    max_run_time = idx
                elif idx < min_run_time:
                    min_run_time = idx
                mean_run_time += idx

                mean_reward += total_reward
                mean_traj_reward += traj_total_reward
                break
    if rec != None:
        rec.close()
    print "[ RunLength =",5," MeanReward =",mean_reward / 5.0, "MeantrajReward =",mean_traj_reward/5.0,\
      " MeanRunTime =",mean_run_time / 5.0, " MaxRunTime =",max_run_time," MinRunTime =",min_run_time,"]"
Exemplo n.º 17
0
def play(env, act, stochastic, video_path):
    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(
        env, video_path, enabled=video_path is not None)
    obs = env.reset()
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()
        action = act(np.array(obs)[None], stochastic=stochastic)[0]
        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])
Exemplo n.º 18
0
dataloader_train = DataLoader(dataset_train,
                              batch_size=BATCH_SIZE,
                              shuffle=True)

match_env(env_real, env_sim)
video_recorder = VideoRecorder(env_real, 'real.mp4', enabled=True)
video_recorder2 = VideoRecorder(env_sim, 'sim.mp4', enabled=True)

for i, data in enumerate(dataloader_train):
    for j in range(50):
        env_sim.render()
        env_real.render()

        action = data["actions"][0, j].numpy()

        video_recorder.capture_frame()
        video_recorder2.capture_frame()

        obs_real, _, _, _ = env_real.step(action.copy())
        obs_simp, _, _, _ = env_sim.step(action.copy())

    env_real.reset()
    env_sim.reset()
    match_env(env_real, env_sim)
    if i == 10:
        break

video_recorder.close()
video_recorder.enabled = False
video_recorder2.close()
video_recorder2.enabled = False