def test_record_simple():
    env = gym.make("CartPole-v1")
    rec = VideoRecorder(env)
    env.reset()
    rec.capture_frame()
    rec.close()
    assert not rec.empty
    assert not rec.broken
    assert os.path.exists(rec.path)
    f = open(rec.path)
    assert os.fstat(f.fileno()).st_size > 100
Пример #2
0
def play(env, act, craft_adv_obs, stochastic, video_path, game_name, attack,
         defense):
    if defense == 'foresight':
        vf, game_screen_mean = load_visual_foresight(game_name)
        pred_obs = deque(maxlen=4)

    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)

    t = 0
    obs = env.reset()
    while True:
        #env.unwrapped.render()
        video_recorder.capture_frame()

        # Attack
        if craft_adv_obs != None:
            # Craft adv. examples
            adv_obs = craft_adv_obs(np.array(obs)[None],
                                    stochastic=stochastic)[0]
            action = act(np.array(adv_obs)[None], stochastic=stochastic)[0]
        else:
            # Normal
            action = act(np.array(obs)[None], stochastic=stochastic)[0]

# Defense
        if t > 4 and defense == 'foresight':
            pred_obs.append(
                foresee(U.get_session(), old_obs, old_action, np.array(obs),
                        game_screen_mean, vf, env.action_space.n, t))
            if len(pred_obs) == 4:
                action = act(np.stack(pred_obs, axis=2)[None],
                             stochastic=stochastic)[0]

        old_obs = obs
        old_action = action

        # RL loop
        obs, rew, done, info = env.step(action)
        t += 1
        if done:
            t = 0
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])
Пример #3
0
    def sample(self, horizon, policy, record_fname=None):
        """Samples a rollout from the agent.

        Arguments:
            horizon: (int) The length of the rollout to generate from the agent.
            policy: (policy) The policy that the agent will use for actions.
            record_fname: (str/None) The name of the file to which a recording of the rollout
                will be saved. If None, the rollout will not be recorded.

        Returns: (dict) A dictionary containing data from the rollout.
            The keys of the dictionary are 'obs', 'ac', and 'reward_sum'.
        """
        video_record = record_fname is not None
        recorder = None if not video_record else VideoRecorder(self.env, record_fname)

        times, rewards = [], []
        O, A, reward_sum, done = [self.env.reset()], [], 0, False
        env_infos = []

        policy.reset()
        for t in range(horizon):
            if video_record:
                recorder.capture_frame()
            start = time.time()
            A.append(policy.act(O[t], t))
            times.append(time.time() - start)

            if self.noise_stddev is None:
                obs, reward, done, info = self.env.step(A[t])
            else:
                action = A[t] + np.random.normal(loc=0, scale=self.noise_stddev, size=[self.dU])
                action = np.minimum(np.maximum(action, self.env.action_space.low), self.env.action_space.high)
                obs, reward, done, info = self.env.step(action)
            O.append(obs)
            env_infos.append(info)
            reward_sum += reward
            rewards.append(reward)
            if done:
                break

        if video_record:
            recorder.capture_frame()
            recorder.close()

        print("Average action selection time: ", np.mean(times))
        print("Rollout length: ", len(A))

        return {
            "obs": np.array(O),
            "ac": np.array(A),
            "reward_sum": reward_sum,
            "rewards": np.array(rewards),
            "env_infos": env_infos,
        }
def play(env, act, craft_adv_obs, craft_adv_obs2, stochastic, video_path,
         attack, m_target, m_adv):
    num_episodes = 0
    num_moves = 0
    num_transfer = 0

    video_recorder = None
    video_recorder = VideoRecorder(
        env, video_path, enabled=video_path is not None)
    obs = env.reset()
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()

        # V: Attack #
        if attack is not None:
            # Craft adv. examples
            with m_adv.get_session().as_default():
                adv_obs = \
                    craft_adv_obs(np.array(obs)[None],
                                  stochastic_adv=stochastic)[0]
            with m_target.get_session().as_default():
                action = act(np.array(adv_obs)[None],
                             stochastic=stochastic)[0]
                action2 = act(np.array(obs)[None], stochastic=stochastic)[0]
                num_moves += 1
                if action != action2:
                    num_transfer += 1
        else:
            # Normal
            action = act(np.array(obs)[None], stochastic=stochastic)[0]

        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()

        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print('Reward: ' + str(info["rewards"][-1]))
            num_episodes = len(info["rewards"])
            print('Episode: ' + str(num_episodes))
            success = float(num_transfer / num_moves) * 100.0
            print("Percentage of successful attacks: " + str(success))
            num_moves = 0
            num_transfer = 0
Пример #5
0
    def sample(self, horizon, policy, record_fname=None):
        """Samples a rollout from the agent.
        Arguments:
            horizon: (int) The length of the rollout to generate from the agent.
            policy: (policy) The policy that the agent will use for actions.
            record_fname: (str/None) The name of the file to which a recording of the rollout
                will be saved. If None, the rollout will not be recorded.
        Returns: (dict) A dictionary containing data from the rollout.
            The keys of the dictionary are 'obs', 'ac', and 'reward_sum'.
        """
        # default to be False
        video_record = record_fname is not None
        recorder = None if not video_record else VideoRecorder(
            self.env, record_fname)

        times, rewards = [], []
        O, A, reward_sum, done = [self.env.reset()], [], 0, False

        policy.reset()

        for t in range(horizon):
            if video_record:
                recorder.capture_frame()
            start = time.time()
            A.append(policy.act(O[t], t))
            # print(O[t].shape, A[t].shape)
            times.append(time.time() - start)
            obs, reward, done, info = self.env.step(A[t])

            O.append(obs)
            reward_sum += reward
            rewards.append(reward)
            if done:
                break

        if video_record:
            recorder.capture_frame()
            recorder.close()

        print("Average action selection time: ", np.mean(times))
        print("Rollout length: ", len(A))

        return {
            "obs": np.array(O),
            "ac": np.array(A),
            "reward_sum": reward_sum,
            "rewards": np.array(rewards),
        }
Пример #6
0
def render(env, recorde=False):
    if recorde:
        rec = VideoRecorder(env)
    else:
        rec = None

    mean_reward = 0.0
    mean_traj_reward = 0.0
    max_run_time = 0.0
    min_run_time = 1e+10
    mean_run_time = 0.0
    for i in range(5):
        total_reward = 0.0
        traj_total_reward = 0.0

        idx = 0
        done = False
        obs = env.reset()
        while done == False:
            env.render()
            x = np.reshape(obs, [1, -1])
            pred = rl_model.run(x, None)
            action = np.argmax(pred)

            obs, _, done, info = env.step(action)
            total_reward += _
            traj_total_reward += hc_model.predict(obs.reshape([1, -1]))

            if rec != None:
                rec.capture_frame()

            idx += 1
            if done or idx > 300:

                if idx > max_run_time:
                    max_run_time = idx
                elif idx < min_run_time:
                    min_run_time = idx
                mean_run_time += idx

                mean_reward += total_reward
                mean_traj_reward += traj_total_reward
                break
    if rec != None:
        rec.close()
    print "[ RunLength =",5," MeanReward =",mean_reward / 5.0, "MeantrajReward =",mean_traj_reward/5.0,\
      " MeanRunTime =",mean_run_time / 5.0, " MaxRunTime =",max_run_time," MinRunTime =",min_run_time,"]"
Пример #7
0
    def _reset_video_recorder(self):
        """Called at the start of each episode (by _reset). Always creates a video recorder
           if one does not already exist. When a video recorder is already present, it will only
           create a new one if `self.single_video == False`."""
        if self.video_recorder is not None:
            # Video recorder already started.
            if not self.single_video:
                # We want a new video for each episode, so destroy current recorder.
                self.video_recorder.close()
                self.video_recorder = None

        if self.video_recorder is None:
            # No video recorder -- start a new one.
            self.video_recorder = VideoRecorder(
                env=self.env,
                base_path=osp.join(self.directory,
                                   'video.{:06}'.format(self.episode_id)),
                metadata={'episode_id': self.episode_id},
            )
Пример #8
0
def play(env, act, stochastic, video_path):
    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(
        env, video_path, enabled=video_path is not None)
    obs = env.reset()
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()
        action = act(np.array(obs)[None], stochastic=stochastic)[0]
        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])
Пример #9
0
    def sample(self, horizon, policy, record_fname=None, test_policy=False, average=False):
        """Samples a rollout from the agent.

        Arguments:
            horizon: (int) The length of the rollout to generate from the agent.
            policy: (policy) The policy that the agent will use for actions.
            record_fname: (str/None) The name of the file to which a recording of the rollout
                will be saved. If None, the rollout will not be recorded.

        Returns: (dict) A dictionary containing data from the rollout.
            The keys of the dictionary are 'obs', 'ac', and 'reward_sum'.
        """
        if test_policy:
            logger.info('Testing the policy')
        video_record = record_fname is not None
        recorder = None if not video_record else VideoRecorder(self.env, record_fname)

        times, rewards = [], []
        O, A, reward_sum, done = [self.env.reset()], [], 0, False
        self._debug += 1

        policy.reset()
        # for t in range(20):
        for t in range(horizon):
            if hasattr(self.env, 'render_imitation'):
                self.env.render_imitation()
            if t % 50 == 10 and t > 1:
                logger.info('Current timesteps: %d / %d, average time: %.5f'
                            % (t, horizon, np.mean(times)))
            if video_record:
                recorder.capture_frame()
            start = time.time()
            if test_policy:
                A.append(policy.act(O[t], t, test_policy=test_policy, average=average))
            else:
                A.append(policy.act(O[t], t))
            times.append(time.time() - start)

            if self.noise_stddev is None:
                obs, reward, done, info = self.env.step(A[t])
            else:
                action = A[t] + np.random.normal(loc=0, scale=self.noise_stddev,
                                                 size=[self.dU])
                action = np.minimum(np.maximum(action,
                                               self.env.action_space.low),
                                    self.env.action_space.high)
                obs, reward, done, info = self.env.step(action)
            O.append(obs)
            reward_sum += reward
            rewards.append(reward)
            if done:
                break

        if video_record:
            recorder.capture_frame()
            recorder.close()

        logger.info("Average action selection time: %.4f" % np.mean(times))
        logger.info("Rollout length: %d" % len(A))

        return {
            "obs": np.array(O),
            "ac": np.array(A),
            "reward_sum": reward_sum,
            "rewards": np.array(rewards),
        }
Пример #10
0
def test_record_unrecordable_method():
    env = UnrecordableEnv()
    rec = VideoRecorder(env)
    assert not rec.enabled
    rec.close()
Пример #11
0
# train_data = H5PYDataset(
#     DATASET_PATH, which_sets=('train',), sources=('s_transition_obs','r_transition_obs', 'obs', 'actions')
# )
# stream_train = DataStream(train_data, iteration_scheme=SequentialScheme(train_data.num_examples, batch_size))
# valid_data = H5PYDataset(
#     DATASET_PATH, which_sets=('valid',), sources=('s_transition_obs','r_transition_obs', 'obs', 'actions')
# )
# stream_valid = DataStream(valid_data, iteration_scheme=SequentialScheme(train_data.num_examples, batch_size))

# iterator = stream_train.get_epoch_iterator(as_dict=True)
# data = next(iterator)

# length = data["actions"].shape[1]
length = 10

video_recorder = VideoRecorder(env, 'real_backlash.mp4', enabled=False)
video_recorder2 = VideoRecorder(env2, 'sim+.mp4', enabled=False)

# Only first six predicted by the lstm
num_obs = 10
array = np.zeros((2, 100, num_obs))

for i, data in enumerate(range(1)):
    env.reset()
    env2.reset()
    match_env(env, env2)
    new_obs = env.unwrapped._get_obs()
    new_obs2 = env2.unwrapped._get_obs()

    for j in range(100):
        # env.render()
Пример #12
0

def match_env(real, sim):
    # set env1 (simulator) to that of env2 (real robot)
    sim.env.set_state(real.env.model.data.qpos.ravel(),
                      real.env.model.data.qvel.ravel())


dataset_train = MujocoTraintestPusherSimpleDataset(DATASET_PATH,
                                                   for_training=True)
dataloader_train = DataLoader(dataset_train,
                              batch_size=BATCH_SIZE,
                              shuffle=True)

match_env(env_real, env_sim)
video_recorder = VideoRecorder(env_real, 'real.mp4', enabled=True)
video_recorder2 = VideoRecorder(env_sim, 'sim.mp4', enabled=True)

for i, data in enumerate(dataloader_train):
    for j in range(50):
        env_sim.render()
        env_real.render()

        action = data["actions"][0, j].numpy()

        video_recorder.capture_frame()
        video_recorder2.capture_frame()

        obs_real, _, _, _ = env_real.step(action.copy())
        obs_simp, _, _, _ = env_sim.step(action.copy())
                              train_data.num_examples, batch_size))
valid_data = H5PYDataset(DATASET_PATH,
                         which_sets=('valid', ),
                         sources=('s_transition_obs', 'r_transition_obs',
                                  'obs', 'actions'))
stream_valid = DataStream(valid_data,
                          iteration_scheme=SequentialScheme(
                              train_data.num_examples, batch_size))

iterator = stream_train.get_epoch_iterator(as_dict=True)

data = next(iterator)
length = data["actions"].shape[1]

match_env(env, env2)
video_recorder = VideoRecorder(env, 'sim+backlash.mp4', enabled=True)
video_recorder2 = VideoRecorder(env2, 'sim+.mp4', enabled=True)

for i, data in enumerate(stream_train.get_epoch_iterator(as_dict=True)):
    for j in range(length):
        action = data["actions"]
        video_recorder.capture_frame()
        video_recorder2.capture_frame()
        new_obs, reward, done, info = env.step(action)
        new_obs2, reward2, done2, info2 = env2.step(action)

    if i == 4:
        break

video_recorder.close()
video_recorder.enabled = False
    simulator = sim.env
    if hasattr(simulator, "env"):
        simulator = simulator.env

    simulator.set_state(
        real.env.model.data.qpos.ravel(),
        real.env.model.data.qvel.ravel()
    )


# dataset_train = MujocoTraintestPusherSimpleDataset(DATASET_PATH, for_training=True)
# dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)

match_env(env_real, env_simplus)
match_env(env_real, env_sim)
video_recorder_real = VideoRecorder(
    env_real, 'real.mp4', enabled=True)
video_recorder_simplus = VideoRecorder(
    env_simplus, 'sim+.mp4', enabled=True)
video_recorder_sim = VideoRecorder(
    env_sim, 'sim.mp4', enabled=True)

video_recorders = [video_recorder_real, video_recorder_simplus, video_recorder_sim]

# for i, data in enumerate(dataloader_train):
for i in range(40):
    for j in range(50):
        env_real.render()
        env_simplus.render()
        env_sim.render()

        # action = data["actions"][0, j].numpy()
Пример #15
0
            return actual_model(img_and_gaze_combined, num_actions, scope,
                                **kwargs)

        act = DeepqWithGaze.build_act(make_obs_ph=lambda name: U.Uint8Input(
            env.observation_space.shape, name=name),
                                      q_func=model_wrapper,
                                      num_actions=env.action_space.n)
        U.load_state(os.path.join(args.model_dir, "saved"))
        gaze_model.load_weights(
            'baselines/DeepqWithGaze/ImgOnly_gazeModels/seaquest-dp0.4-DQN+BNonInput.hdf5'
        )

        num_episodes = 0
        video_recorder = None
        video_recorder = VideoRecorder(env,
                                       args.video,
                                       enabled=args.video is not None)
        obs = env.reset()
        if args.debug_mode:
            fig, axarr = plt.subplots(2, 3)
            plt.show(block=False)
            debug_embed_last_time = time.time(
            )  # TODO this is temporary. delete it and its related code
            debug_embed_freq_sec = 600
        while True:
            if args.debug_mode and debug_gaze_in is not None:
                for i in range(4):
                    axarr[int(i / 2), i % 2].cla()
                    axarr[int(i / 2), i % 2].imshow(debug_gaze_in[0, :, :, i])
                axarr[1, 2].cla()
                axarr[1, 2].imshow(debug_gaze_in[0, :, :, 4])
    test_reward = []

    for i_episode in range(200):
        observation = env.reset()

        rewards_sum = 0

        cart_position, cart_velocity, pole_angle, angle_rate_of_change = observation
        state = utils.build_state([
            utils.to_bin(cart_position, cart_position_bins),
            utils.to_bin(cart_velocity, cart_velocity_bins),
            utils.to_bin(pole_angle, pole_angle_bins),
            utils.to_bin(angle_rate_of_change, angle_rate_bins)
        ])

        record = VideoRecorder(env=env, path="cartpolev0.mp4")
        for t in range(max_number_of_steps):
            #env.render()

            # Pick an action based on the current state
            action = 0 if qlearn.getQ(state, 0) > qlearn.getQ(state, 1) else 1
            # Execute the action and get feedback
            observation, reward, done, info = env.step(action)

            # Digitize the observation to get a state
            cart_position, cart_velocity, pole_angle, angle_rate_of_change = observation
            nextState = utils.build_state([
                utils.to_bin(cart_position, cart_position_bins),
                utils.to_bin(cart_velocity, cart_velocity_bins),
                utils.to_bin(pole_angle, pole_angle_bins),
                utils.to_bin(angle_rate_of_change, angle_rate_bins)