def test_no_frames(): env = BrokenRecordableEnv() rec = VideoRecorder(env) rec.close() assert rec.empty assert rec.functional assert not os.path.exists(rec.path)
class Runner: def __init__(self, args): with open(args.src_filepath, "rb") as f: model = pickle.load(f) env = gym.make(args.env_name) self.model = model self.env = env self.n_iter = args.n_iter self.rec_flag = args.rec self.recorder = VideoRecorder(env, base_path=args.src_filepath) def __call__(self): for i in range(self.n_iter): score = self.get_score() print(score) def close(self): self.recorder.close() self.env.close() def get_score(self): env = self.env obs = env.reset() acc = 0 while True: y = self.model(obs) action = np.random.choice(len(y), p=F.softmax(y)) obs, reward, done, info = env.step(action) acc += reward if self.rec_flag: self.recorder.capture_frame() if done: break return acc
def test_record_breaking_render_method(): env = BrokenRecordableEnv() rec = VideoRecorder(env) rec.capture_frame() rec.close() assert rec.empty assert rec.broken assert not os.path.exists(rec.path)
def test_text_envs(): env = gym.make('FrozenLake-v0') video = VideoRecorder(env) try: env.reset() video.capture_frame() video.close() finally: os.remove(video.path)
def __init__(self, args): with open(args.src_filepath, "rb") as f: model = pickle.load(f) env = gym.make(args.env_name) self.model = model self.env = env self.n_iter = args.n_iter self.rec_flag = args.rec self.recorder = VideoRecorder(env, base_path=args.src_filepath)
def test_record_simple(): env = gym.make("CartPole-v1") rec = VideoRecorder(env) env.reset() rec.capture_frame() rec.close() assert not rec.empty assert not rec.broken assert os.path.exists(rec.path) f = open(rec.path) assert os.fstat(f.fileno()).st_size > 100
def sample(self, horizon, policy, record_fname=None): """Samples a rollout from the agent. Arguments: horizon: (int) The length of the rollout to generate from the agent. policy: (policy) The policy that the agent will use for actions. record_fname: (str/None) The name of the file to which a recording of the rollout will be saved. If None, the rollout will not be recorded. Returns: (dict) A dictionary containing data from the rollout. The keys of the dictionary are 'obs', 'ac', and 'reward_sum'. """ video_record = record_fname is not None recorder = None if not video_record else VideoRecorder( self.env, record_fname) times, rewards = [], [] O, A, plan_hors, reward_sum, done = [self.env.reset() ], [], [], 0, False policy.reset() for t in range(horizon): if video_record: recorder.capture_frame() start = time.time() a, plan_hor = policy.act(O[t], t) A.append(a) plan_hors.append(plan_hor) times.append(time.time() - start) if self.noise_stddev is None: obs, reward, done, info = self.env.step(A[t]) else: action = A[t] + np.random.normal( loc=0, scale=self.noise_stddev, size=[self.dU]) action = np.minimum( np.maximum(action, self.env.action_space.low), self.env.action_space.high) obs, reward, done, info = self.env.step(action) O.append(obs) reward_sum += reward rewards.append(reward) if done: break if video_record: recorder.capture_frame() recorder.close() print("Average action selection time: ", np.mean(times)) print("Rollout length: ", len(A)) return { "obs": np.array(O), "ac": np.array(A), "reward_sum": reward_sum, "rewards": np.array(rewards), "plan_hor": np.array(plan_hors) }
def play(env, act, stochastic, video_path, clipped, num_trials=10): num_episodes = 0 video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() reward = 0 num_played = 0 rewardArray = [] while num_played < num_trials: env.unwrapped.render() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if clipped: rew = clip_score(rew) reward += rew if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) rewardArray.append(reward) reward = 0 num_played += 1 num_episodes = len(info["rewards"]) return {"Nonclipped": info["rewards"], "Clipped": rewardArray}
def play(env, act, stochastic, video_path): num_episodes = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() if args.show_observation: fig = plt.figure() im = plt.imshow(obs._frames[-1].reshape((84, 84)), cmap='Greys') plt.show(False) while True: env.unwrapped.render() if args.show_observation: im.set_data(obs._frames[-1].reshape((84, 84))) fig.canvas.draw() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) num_episodes = len(info["rewards"])
def play(env, act, stochastic, video_path): num_episodes = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() if args.visual: action_names = distdeepq.actions_from_env(env) plot_machine = distdeepq.PlotMachine(dist_params, env.action_space.n, action_names) while True: env.unwrapped.render() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if args.visual: plot_machine.plot_distribution(np.array(obs)[None]) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) num_episodes = len(info["rewards"])
def play(env, act, stochastic, video_path): num_episodes = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() X = [] while True: env.unwrapped.render() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print("Score in this episode: " + str(info["rewards"][-1])) X.append(info["rewards"][-1]) num_episodes = len(info["rewards"]) print("Average Score so far: " + str(sum(X) / float(num_episodes)))
def _reset_video_recorder(self): """Called at the start of each episode (by _reset). Always creates a video recorder if one does not already exist. When a video recorder is already present, it will only create a new one if `self.single_video == False`.""" if self.video_recorder is not None: # Video recorder already started. if not self.single_video: # We want a new video for each episode, so destroy current recorder. self.video_recorder.close() self.video_recorder = None if self.video_recorder is None: # No video recorder -- start a new one. self.video_recorder = VideoRecorder( env=self.env, base_path=osp.join(self.directory, 'video.{:06}'.format(self.episode_id)), metadata={'episode_id': self.episode_id}, )
def play( env, act, craft_adv_obs, craft_adv_obs2, stochastic, video_path, attack, m_target, m_adv, ): num_episodes = 0 num_moves = 0 num_transfer = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() while True: env.unwrapped.render() video_recorder.capture_frame() # V: Attack # if attack is not None: # Craft adv. examples with m_adv.get_session().as_default(): adv_obs = craft_adv_obs(np.array(obs)[None], stochastic_adv=stochastic)[0] with m_target.get_session().as_default(): action = act(np.array(adv_obs)[None], stochastic=stochastic)[0] action2 = act(np.array(obs)[None], stochastic=stochastic)[0] num_moves += 1 if action != action2: num_transfer += 1 else: # Normal action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print("Reward: " + str(info["rewards"][-1])) num_episodes = len(info["rewards"]) print("Episode: " + str(num_episodes)) success = float(num_transfer / num_moves) * 100.0 print("Percentage of successful attacks: " + str(success)) num_moves = 0 num_transfer = 0
def play(env, act, craft_adv_obs, stochastic, video_path, game_name, attack, defense): if defense == 'foresight': vf, game_screen_mean = load_visual_foresight(game_name) pred_obs = deque(maxlen=4) num_episodes = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) t = 0 obs = env.reset() while True: #env.unwrapped.render() video_recorder.capture_frame() # Attack if craft_adv_obs != None: # Craft adv. examples adv_obs = craft_adv_obs(np.array(obs)[None], stochastic=stochastic)[0] action = act(np.array(adv_obs)[None], stochastic=stochastic)[0] else: # Normal action = act(np.array(obs)[None], stochastic=stochastic)[0] # Defense if t > 4 and defense == 'foresight': pred_obs.append( foresee(U.get_session(), old_obs, old_action, np.array(obs), game_screen_mean, vf, env.action_space.n, t)) if len(pred_obs) == 4: action = act(np.stack(pred_obs, axis=2)[None], stochastic=stochastic)[0] old_obs = obs old_action = action # RL loop obs, rew, done, info = env.step(action) t += 1 if done: t = 0 obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) num_episodes = len(info["rewards"])
def main(): args = parse_args() env = make_atari(args.env) env = wrap_deepmind(env) # setup the model to process actions for one environment and one step at a time model = acktr_disc.Model(policies.CnnPolicy, env.observation_space, env.action_space, 1, 1) # load the trainable parameters from our trained file model.load(args.model_path) # keep track of the last 4 frames of observations env_width = env.observation_space.shape[0] env_height = env.observation_space.shape[1] obs_history = np.zeros((1, env_width, env_height, 4), dtype=np.uint8) # if we're supposed to show how the model sees the game if args.show_observation: obs = env.reset() import pygame from pygame import surfarray # the default size is too small, scale it up scale_factor = args.scale_factor screen = pygame.display.set_mode((env_width*scale_factor, env_height*scale_factor), 0, 8) # setup a gray palette pygame.display.set_palette(tuple([(i, i, i) for i in range(256)])) # if we're supposed to record video video_path = args.video_path if video_path is not None: video_recorder = VideoRecorder( env, base_path=video_path, enabled=video_path is not None) while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() if args.show_observation: # use the Kronecker product to scale the array up for display, and also transpose x/y axes because pygame # displays as column/row instead of gym's row/column transposed = obs_history[0,:,:,-1].transpose((1,0)) scaled_array = np.uint8(np.kron(transposed, np.ones((scale_factor, scale_factor)))) surfarray.blit_array(screen, scaled_array) pygame.display.flip() if video_path is not None: video_recorder.capture_frame() # add the current observation onto our history list obs_history = np.roll(obs_history, shift=-1, axis=3) obs_history[:, :, :, -1] = obs[None][:, :, :, 0] # get the suggested action for the current observation history action, v, _ = model.step(obs_history) obs, rew, done, info = env.step(action) episode_rew += rew print("Episode reward", episode_rew) # if we're taking video, stop it now and clear video path so no more frames are added if we're out of lives or there are no lives in this game if video_path is not None and ('ale.lives' not in info or info['ale.lives'] == 0): video_path = None video_recorder.close()
def sample(self, horizon, policy, record_fname=None): """Samples a rollout from the agent. Arguments: horizon: (int) The length of the rollout to generate from the agent. policy: (policy) The policy that the agent will use for actions. record_fname: (str/None) The name of the file to which a recording of the rollout will be saved. If None, the rollout will not be recorded. Returns: (dict) A dictionary containing data from the rollout. The keys of the dictionary are 'obs', 'ac', and 'reward_sum'. """ # default to be False video_record = record_fname is not None recorder = None if not video_record else VideoRecorder( self.env, record_fname) times, rewards = [], [] O, A, reward_sum, done = [self.env.reset()], [], 0, False policy.reset() for t in range(horizon): if video_record: recorder.capture_frame() start = time.time() A.append(policy.act(O[t], t)) # print(O[t].shape, A[t].shape) times.append(time.time() - start) obs, reward, done, info = self.env.step(A[t]) O.append(obs) reward_sum += reward rewards.append(reward) if done: break if video_record: recorder.capture_frame() recorder.close() print("Average action selection time: ", np.mean(times)) print("Rollout length: ", len(A)) return { "obs": np.array(O), "ac": np.array(A), "reward_sum": reward_sum, "rewards": np.array(rewards), }
def play(env, act, craft_adv_obs, craft_adv_obs2, stochastic, video_path, attack, m_target, m_adv): num_episodes = 0 num_moves = 0 num_transfer = 0 video_recorder = None video_recorder = VideoRecorder( env, video_path, enabled=video_path is not None) obs = env.reset() while True: env.unwrapped.render() video_recorder.capture_frame() # V: Attack # if attack is not None: # Craft adv. examples with m_adv.get_session().as_default(): adv_obs = \ craft_adv_obs(np.array(obs)[None], stochastic_adv=stochastic)[0] with m_target.get_session().as_default(): action = act(np.array(adv_obs)[None], stochastic=stochastic)[0] action2 = act(np.array(obs)[None], stochastic=stochastic)[0] num_moves += 1 if action != action2: num_transfer += 1 else: # Normal action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print('Reward: ' + str(info["rewards"][-1])) num_episodes = len(info["rewards"]) print('Episode: ' + str(num_episodes)) success = float(num_transfer / num_moves) * 100.0 print("Percentage of successful attacks: " + str(success)) num_moves = 0 num_transfer = 0
def render(env, recorde=False): if recorde: rec = VideoRecorder(env) else: rec = None mean_reward = 0.0 mean_traj_reward = 0.0 max_run_time = 0.0 min_run_time = 1e+10 mean_run_time = 0.0 for i in range(5): total_reward = 0.0 traj_total_reward = 0.0 idx = 0 done = False obs = env.reset() while done == False: env.render() x = np.reshape(obs, [1, -1]) pred = rl_model.run(x, None) action = np.argmax(pred) obs, _, done, info = env.step(action) total_reward += _ traj_total_reward += hc_model.predict(obs.reshape([1, -1])) if rec != None: rec.capture_frame() idx += 1 if done or idx > 300: if idx > max_run_time: max_run_time = idx elif idx < min_run_time: min_run_time = idx mean_run_time += idx mean_reward += total_reward mean_traj_reward += traj_total_reward break if rec != None: rec.close() print "[ RunLength =",5," MeanReward =",mean_reward / 5.0, "MeantrajReward =",mean_traj_reward/5.0,\ " MeanRunTime =",mean_run_time / 5.0, " MaxRunTime =",max_run_time," MinRunTime =",min_run_time,"]"
def play(env, act, stochastic, video_path): num_episodes = 0 video_recorder = None video_recorder = VideoRecorder( env, video_path, enabled=video_path is not None) obs = env.reset() while True: env.unwrapped.render() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) num_episodes = len(info["rewards"])
def sample(self, horizon, policy, record_fname=None, test_policy=False, average=False): """Samples a rollout from the agent. Arguments: horizon: (int) The length of the rollout to generate from the agent. policy: (policy) The policy that the agent will use for actions. record_fname: (str/None) The name of the file to which a recording of the rollout will be saved. If None, the rollout will not be recorded. Returns: (dict) A dictionary containing data from the rollout. The keys of the dictionary are 'obs', 'ac', and 'reward_sum'. """ if test_policy: logger.info('Testing the policy') video_record = record_fname is not None recorder = None if not video_record else VideoRecorder(self.env, record_fname) times, rewards = [], [] O, A, reward_sum, done = [self.env.reset()], [], 0, False self._debug += 1 policy.reset() # for t in range(20): for t in range(horizon): if hasattr(self.env, 'render_imitation'): self.env.render_imitation() if t % 50 == 10 and t > 1: logger.info('Current timesteps: %d / %d, average time: %.5f' % (t, horizon, np.mean(times))) if video_record: recorder.capture_frame() start = time.time() if test_policy: A.append(policy.act(O[t], t, test_policy=test_policy, average=average)) else: A.append(policy.act(O[t], t)) times.append(time.time() - start) if self.noise_stddev is None: obs, reward, done, info = self.env.step(A[t]) else: action = A[t] + np.random.normal(loc=0, scale=self.noise_stddev, size=[self.dU]) action = np.minimum(np.maximum(action, self.env.action_space.low), self.env.action_space.high) obs, reward, done, info = self.env.step(action) O.append(obs) reward_sum += reward rewards.append(reward) if done: break if video_record: recorder.capture_frame() recorder.close() logger.info("Average action selection time: %.4f" % np.mean(times)) logger.info("Rollout length: %d" % len(A)) return { "obs": np.array(O), "ac": np.array(A), "reward_sum": reward_sum, "rewards": np.array(rewards), }
def test_record_unrecordable_method(): env = UnrecordableEnv() rec = VideoRecorder(env) assert not rec.enabled rec.close()
test_reward = [] for i_episode in range(200): observation = env.reset() rewards_sum = 0 cart_position, cart_velocity, pole_angle, angle_rate_of_change = observation state = utils.build_state([ utils.to_bin(cart_position, cart_position_bins), utils.to_bin(cart_velocity, cart_velocity_bins), utils.to_bin(pole_angle, pole_angle_bins), utils.to_bin(angle_rate_of_change, angle_rate_bins) ]) record = VideoRecorder(env=env, path="cartpolev0.mp4") for t in range(max_number_of_steps): #env.render() # Pick an action based on the current state action = 0 if qlearn.getQ(state, 0) > qlearn.getQ(state, 1) else 1 # Execute the action and get feedback observation, reward, done, info = env.step(action) # Digitize the observation to get a state cart_position, cart_velocity, pole_angle, angle_rate_of_change = observation nextState = utils.build_state([ utils.to_bin(cart_position, cart_position_bins), utils.to_bin(cart_velocity, cart_velocity_bins), utils.to_bin(pole_angle, pole_angle_bins), utils.to_bin(angle_rate_of_change, angle_rate_bins)
return actual_model(img_and_gaze_combined, num_actions, scope, **kwargs) act = DeepqWithGaze.build_act(make_obs_ph=lambda name: U.Uint8Input( env.observation_space.shape, name=name), q_func=model_wrapper, num_actions=env.action_space.n) U.load_state(os.path.join(args.model_dir, "saved")) gaze_model.load_weights( 'baselines/DeepqWithGaze/ImgOnly_gazeModels/seaquest-dp0.4-DQN+BNonInput.hdf5' ) num_episodes = 0 video_recorder = None video_recorder = VideoRecorder(env, args.video, enabled=args.video is not None) obs = env.reset() if args.debug_mode: fig, axarr = plt.subplots(2, 3) plt.show(block=False) debug_embed_last_time = time.time( ) # TODO this is temporary. delete it and its related code debug_embed_freq_sec = 600 while True: if args.debug_mode and debug_gaze_in is not None: for i in range(4): axarr[int(i / 2), i % 2].cla() axarr[int(i / 2), i % 2].imshow(debug_gaze_in[0, :, :, i]) axarr[1, 2].cla() axarr[1, 2].imshow(debug_gaze_in[0, :, :, 4])
simulator = sim.env if hasattr(simulator, "env"): simulator = simulator.env simulator.set_state( real.env.model.data.qpos.ravel(), real.env.model.data.qvel.ravel() ) # dataset_train = MujocoTraintestPusherSimpleDataset(DATASET_PATH, for_training=True) # dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True) match_env(env_real, env_simplus) match_env(env_real, env_sim) video_recorder_real = VideoRecorder( env_real, 'real.mp4', enabled=True) video_recorder_simplus = VideoRecorder( env_simplus, 'sim+.mp4', enabled=True) video_recorder_sim = VideoRecorder( env_sim, 'sim.mp4', enabled=True) video_recorders = [video_recorder_real, video_recorder_simplus, video_recorder_sim] # for i, data in enumerate(dataloader_train): for i in range(40): for j in range(50): env_real.render() env_simplus.render() env_sim.render() # action = data["actions"][0, j].numpy()
class VideoWrapper(Wrapper): """Creates videos from wrapped environment by called render after each timestep.""" def __init__(self, env, directory, single_video=True): """ :param env: (gym.Env) the wrapped environment. :param directory: the output directory. :param single_video: (bool) if True, generates a single video file, with episodes concatenated. If False, a new video file is created for each episode. Usually a single video file is what is desired. However, if one is searching for an interesting episode (perhaps by looking at the metadata), saving to different files can be useful. """ super(VideoWrapper, self).__init__(env) self.episode_id = 0 self.video_recorder = None self.single_video = single_video self.directory = osp.abspath(directory) # Make sure to not put multiple different runs in the same directory, # if the directory already exists error_msg = "You're trying to use the same directory twice, " \ "this would result in files being overwritten" assert not os.path.exists(self.directory), error_msg os.makedirs(self.directory, exist_ok=True) def _step(self, action): obs, rew, done, info = self.env.step(action) if done: winners = [i for i, d in info.items() if 'winner' in d] metadata = {'winners': winners} self.video_recorder.metadata.update(metadata) self.video_recorder.capture_frame() return obs, rew, done, info def _reset(self): self._reset_video_recorder() self.episode_id += 1 return self.env.reset() def _reset_video_recorder(self): """Called at the start of each episode (by _reset). Always creates a video recorder if one does not already exist. When a video recorder is already present, it will only create a new one if `self.single_video == False`.""" if self.video_recorder is not None: # Video recorder already started. if not self.single_video: # We want a new video for each episode, so destroy current recorder. self.video_recorder.close() self.video_recorder = None if self.video_recorder is None: # No video recorder -- start a new one. self.video_recorder = VideoRecorder( env=self.env, base_path=osp.join(self.directory, 'video.{:06}'.format(self.episode_id)), metadata={'episode_id': self.episode_id}, ) def _close(self): if self.video_recorder is not None: self.video_recorder.close() self.video_recorder = None super(VideoWrapper, self)._close()
train_data.num_examples, batch_size)) valid_data = H5PYDataset(DATASET_PATH, which_sets=('valid', ), sources=('s_transition_obs', 'r_transition_obs', 'obs', 'actions')) stream_valid = DataStream(valid_data, iteration_scheme=SequentialScheme( train_data.num_examples, batch_size)) iterator = stream_train.get_epoch_iterator(as_dict=True) data = next(iterator) length = data["actions"].shape[1] match_env(env, env2) video_recorder = VideoRecorder(env, 'sim+backlash.mp4', enabled=True) video_recorder2 = VideoRecorder(env2, 'sim+.mp4', enabled=True) for i, data in enumerate(stream_train.get_epoch_iterator(as_dict=True)): for j in range(length): action = data["actions"] video_recorder.capture_frame() video_recorder2.capture_frame() new_obs, reward, done, info = env.step(action) new_obs2, reward2, done2, info2 = env2.step(action) if i == 4: break video_recorder.close() video_recorder.enabled = False
# train_data = H5PYDataset( # DATASET_PATH, which_sets=('train',), sources=('s_transition_obs','r_transition_obs', 'obs', 'actions') # ) # stream_train = DataStream(train_data, iteration_scheme=SequentialScheme(train_data.num_examples, batch_size)) # valid_data = H5PYDataset( # DATASET_PATH, which_sets=('valid',), sources=('s_transition_obs','r_transition_obs', 'obs', 'actions') # ) # stream_valid = DataStream(valid_data, iteration_scheme=SequentialScheme(train_data.num_examples, batch_size)) # iterator = stream_train.get_epoch_iterator(as_dict=True) # data = next(iterator) # length = data["actions"].shape[1] length = 10 video_recorder = VideoRecorder(env, 'real_backlash.mp4', enabled=False) video_recorder2 = VideoRecorder(env2, 'sim+.mp4', enabled=False) # Only first six predicted by the lstm num_obs = 10 array = np.zeros((2, 100, num_obs)) for i, data in enumerate(range(1)): env.reset() env2.reset() match_env(env, env2) new_obs = env.unwrapped._get_obs() new_obs2 = env2.unwrapped._get_obs() for j in range(100): # env.render()
def match_env(real, sim): # set env1 (simulator) to that of env2 (real robot) sim.env.set_state(real.env.model.data.qpos.ravel(), real.env.model.data.qvel.ravel()) dataset_train = MujocoTraintestPusherSimpleDataset(DATASET_PATH, for_training=True) dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True) match_env(env_real, env_sim) video_recorder = VideoRecorder(env_real, 'real.mp4', enabled=True) video_recorder2 = VideoRecorder(env_sim, 'sim.mp4', enabled=True) for i, data in enumerate(dataloader_train): for j in range(50): env_sim.render() env_real.render() action = data["actions"][0, j].numpy() video_recorder.capture_frame() video_recorder2.capture_frame() obs_real, _, _, _ = env_real.step(action.copy()) obs_simp, _, _, _ = env_sim.step(action.copy())