def _flip_past(self, observation_n, reward_n, done_n, info): # Wait until all observations are past the corresponding reset times remote_target_time = [ info_i['reward_buffer.remote_time'] for info_i in info['n'] ] while True: new_observation_n, new_reward_n, new_done_n, new_info = self.env.step( [[] for i in range(self.n)]) # info_i.get['diagnostics.image_remote_time'] may not exist, for example when an env # is resetting. target is a timestamp, thus > 0, so these will count as "need to catch up" deltas = [ target - info_i.get('diagnostics.image_remote_time', 0) for target, info_i in zip(remote_target_time, new_info['n']) ] count = len([d for d in deltas if d > 0]) rewarder.merge_n(observation_n, reward_n, done_n, info, new_observation_n, new_reward_n, new_done_n, new_info) if count == 0: return else: logger.debug( '[GymCoreSync] Still waiting on %d envs to catch up to their targets: %s', count, deltas)
def _step(self, action_n): # Add C keypress in order to "commit" the action, as # interpreted by the remote. action_n = [action + [ spaces.KeyEvent.by_name('c', down=True), spaces.KeyEvent.by_name('c', down=False) ] for action in action_n] observation_n, reward_n, done_n, info = self.env.step(action_n) if self.reward_n is not None: rewarder.merge_n( observation_n, reward_n, done_n, info, [None] * self.n, self.reward_n, self.done_n, self.info, ) self.reward_n = self.done_n = self.info = None while True: count = len([True for info_i in info['n'] if info_i['stats.reward.count'] == 0]) if count > 0: logger.debug('[GymCoreSync] Still waiting on %d envs to receive their post-commit reward', count) else: break new_observation_n, new_reward_n, new_done_n, new_info = self.env.step([[] for i in range(self.n)]) rewarder.merge_n( observation_n, reward_n, done_n, info, new_observation_n, new_reward_n, new_done_n, new_info ) assert all(info_i['stats.reward.count'] == 1 for info_i in info['n']), "Expected all stats.reward.counts to be 1: {}".format(info) # Fast forward until the observation is caught up with the rewarder self._flip_past(observation_n, reward_n, done_n, info) return observation_n, reward_n, done_n, info
def _step(self, action_n): observation_n, reward_n, done_n, info = self.env.step(action_n) if self.reward_n is not None: rewarder.merge_n( observation_n, reward_n, done_n, info, [None] * self.n, self.reward_n, self.done_n, self.info, ) self.reward_n = self.done_n = self.info = None return self._observation(done_n, info), reward_n, done_n, info
def _reset(self): observation_n = self.env.reset() self.reward_n = [0] * self.n self.done_n = [False] * self.n self.info = {'n': [{} for _ in range(self.n)]} new_observation_n, new_reward_n, new_done_n, new_info = self.env.step( [[] for i in range(self.n)]) rewarder.merge_n(observation_n, self.reward_n, self.done_n, self.info, new_observation_n, new_reward_n, new_done_n, new_info) return self._observation(self.done_n, self.info)
def _reset(self): observation_n = self.env.reset() self.reward_n = [0] * self.n self.done_n = [False] * self.n self.info = {'n': [{} for _ in range(self.n)]} new_observation_n, new_reward_n, new_done_n, new_info = self.env.step([[] for i in range(self.n)]) rewarder.merge_n( observation_n, self.reward_n, self.done_n, self.info, new_observation_n, new_reward_n, new_done_n, new_info ) return self._observation(self.done_n, self.info)
def _step(self, action_n): # Add C keypress in order to "commit" the action, as # interpreted by the remote. action_n = [ action + [ spaces.KeyEvent.by_name('c', down=True), spaces.KeyEvent.by_name('c', down=False) ] for action in action_n ] observation_n, reward_n, done_n, info = self.env.step(action_n) if self.reward_n is not None: rewarder.merge_n( observation_n, reward_n, done_n, info, [None] * self.n, self.reward_n, self.done_n, self.info, ) self.reward_n = self.done_n = self.info = None while True: count = len([ True for info_i in info['n'] if info_i['stats.reward.count'] == 0 ]) if count > 0: logger.debug( '[GymCoreSync] Still waiting on %d envs to receive their post-commit reward', count) else: break new_observation_n, new_reward_n, new_done_n, new_info = self.env.step( [[] for i in range(self.n)]) rewarder.merge_n(observation_n, reward_n, done_n, info, new_observation_n, new_reward_n, new_done_n, new_info) assert all( info_i['stats.reward.count'] == 1 for info_i in info['n'] ), "Expected all stats.reward.counts to be 1: {}".format(info) # Fast forward until the observation is caught up with the rewarder self._flip_past(observation_n, reward_n, done_n, info) return observation_n, reward_n, done_n, info
def _flip_past(self, observation_n, reward_n, done_n, info): # Wait until all observations are past the corresponding reset times remote_target_time = [info_i['reward_buffer.remote_time'] for info_i in info['n']] while True: new_observation_n, new_reward_n, new_done_n, new_info = self.env.step([[] for i in range(self.n)]) # info_i.get['diagnostics.image_remote_time'] may not exist, for example when an env # is resetting. target is a timestamp, thus > 0, so these will count as "need to catch up" deltas = [target - info_i.get('diagnostics.image_remote_time', 0) for target, info_i in zip(remote_target_time, new_info['n'])] count = len([d for d in deltas if d > 0]) rewarder.merge_n( observation_n, reward_n, done_n, info, new_observation_n, new_reward_n, new_done_n, new_info ) if count == 0: return else: logger.debug('[GymCoreSync] Still waiting on %d envs to catch up to their targets: %s', count, deltas)
def _reset(self): observation_n = self.env.reset() self.reward_n = [0] * self.n self.done_n = [False] * self.n self.info = {'n': [{} for _ in range(self.n)]} while any(ob is None for ob in observation_n): action_n = [] for done in self.done_n: if done: # No popping of reward/done. Don't want to merge across episode boundaries. action_n.append([spaces.PeekReward]) else: action_n.append([]) new_observation_n, new_reward_n, new_done_n, new_info = self.env.step( action_n) rewarder.merge_n(observation_n, self.reward_n, self.done_n, self.info, new_observation_n, new_reward_n, new_done_n, new_info) return observation_n
def _step(self, action_n): observation_n, reward_n, done_n, info = self.env.step(action_n) if self.reward_n is not None: rewarder.merge_n(observation_n, reward_n, done_n, info, [None] * self.n, self.reward_n, self.done_n, self.info) self.reward_n = self.done_n = self.info = None while any(ob is None for ob in observation_n): action_n = [] for done in done_n: if done: # No popping of reward/done. Don't want to merge across episode boundaries. action_n.append([spaces.PeekReward]) else: action_n.append([]) new_observation_n, new_reward_n, new_done_n, new_info = self.env.step( action_n) rewarder.merge_n(observation_n, reward_n, done_n, info, new_observation_n, new_reward_n, new_done_n, new_info) return observation_n, reward_n, done_n, info
def _step(self, action_n): if self._steps is None: self._start_timer() self._steps += 1 accum_observation_n, accum_reward_n, accum_done_n, accum_info = self._substep( action_n) accum_info['throttle.action.available_at'] = time.time() # Record which indexes we were just peeking at, so when we # make the follow-up we'll be sure to peek there too. peek_n = [ any(spaces.PeekReward for peek in action) for action in action_n ] if self.fps is None: return accum_observation_n, accum_reward_n, accum_done_n, accum_info accum_info['stats.throttle.sleep'] = 0 while True: # See how much time we have to idle delta = self._start + 1. / self.fps * self._steps - time.time() # The following assumes that our control loop if delta < 0: # We're out of time. Just get out of here. delta = abs(delta) if delta >= 1: logger.info( 'Throttle fell behind by %.2fs; lost %.2f frames', delta, self.fps * delta) pyprofile.timing('vnc_env.Throttle.lost_sleep', delta) self._start_timer() break # elif delta < 0.008: # # Only have 8ms. Let's spend it sleeping, and # # return an image which may have up to an # # additional 8ms lag. # # # # 8ms is reasonably arbitrary; we just want something # # that's small where it's not actually going to help # # if we make another step call. Step with 32 parallel # # envs takes about 6ms (about half of which is # # diagnostics, which could be totally async!), so 8 is # # a reasonable choice for now.. # pyprofile.timing('vnc_env.Throttle.sleep', delta) # accum_info['stats.throttle.sleep'] += delta # time.sleep(delta) # break else: # We've got plenty of time. Sleep for up to 16ms, and # then refresh our current frame. We need to # constantly be calling step so that our lags are # reported correctly, within 16ms. (The layering is # such that the vncdriver doesn't know which pixels # correspond to metadata, and the diagnostics don't # know when pixels first got painted. So we do our # best to present frames as they're ready to the # diagnostics.) delta = min(delta, 0.016) pyprofile.timing('vnc_env.Throttle.sleep', delta) accum_info['stats.throttle.sleep'] += delta time.sleep(delta) # We want to merge in the latest reward/done/info so that our # agent has the most up-to-date info post-sleep, but also want # to avoid popping any rewards where done=True (since we'd # have to merge across episode boundaries). action_n = [] for done, peek in zip(accum_done_n, peek_n): if done or peek: # No popping of reward/done action_n.append([spaces.PeekReward]) else: action_n.append([]) observation_n, reward_n, done_n, info = self._substep(action_n) # Merge observation, rewards and metadata. # Text observation has order in which the messages are sent. rewarder.merge_n( accum_observation_n, accum_reward_n, accum_done_n, accum_info, observation_n, reward_n, done_n, info, ) return accum_observation_n, accum_reward_n, accum_done_n, accum_info
def _step(self, action_n): if self._steps is None: self._start_timer() self._steps += 1 accum_observation_n, accum_reward_n, accum_done_n, accum_info = self._substep(action_n) accum_info['throttle.action.available_at'] = time.time() # Record which indexes we were just peeking at, so when we # make the follow-up we'll be sure to peek there too. peek_n = [any(spaces.PeekReward for peek in action) for action in action_n] if self.fps is None: return accum_observation_n, accum_reward_n, accum_done_n, accum_info accum_info['stats.throttle.sleep'] = 0 while True: # See how much time we have to idle delta = self._start + 1./self.fps * self._steps - time.time() # The following assumes that our control loop if delta < 0: # We're out of time. Just get out of here. delta = abs(delta) if delta >= 1: logger.info('Throttle fell behind by %.2fs; lost %.2f frames', delta, self.fps*delta) pyprofile.timing('vnc_env.Throttle.lost_sleep', delta) self._start_timer() break # elif delta < 0.008: # # Only have 8ms. Let's spend it sleeping, and # # return an image which may have up to an # # additional 8ms lag. # # # # 8ms is reasonably arbitrary; we just want something # # that's small where it's not actually going to help # # if we make another step call. Step with 32 parallel # # envs takes about 6ms (about half of which is # # diagnostics, which could be totally async!), so 8 is # # a reasonable choice for now.. # pyprofile.timing('vnc_env.Throttle.sleep', delta) # accum_info['stats.throttle.sleep'] += delta # time.sleep(delta) # break else: # We've got plenty of time. Sleep for up to 16ms, and # then refresh our current frame. We need to # constantly be calling step so that our lags are # reported correctly, within 16ms. (The layering is # such that the vncdriver doesn't know which pixels # correspond to metadata, and the diagnostics don't # know when pixels first got painted. So we do our # best to present frames as they're ready to the # diagnostics.) delta = min(delta, 0.016) pyprofile.timing('vnc_env.Throttle.sleep', delta) accum_info['stats.throttle.sleep'] += delta time.sleep(delta) # We want to merge in the latest reward/done/info so that our # agent has the most up-to-date info post-sleep, but also want # to avoid popping any rewards where done=True (since we'd # have to merge across episode boundaries). action_n = [] for done, peek in zip(accum_done_n, peek_n): if done or peek: # No popping of reward/done action_n.append([spaces.PeekReward]) else: action_n.append([]) observation_n, reward_n, done_n, info = self._substep(action_n) # Merge observation, rewards and metadata. # Text observation has order in which the messages are sent. rewarder.merge_n( accum_observation_n, accum_reward_n, accum_done_n, accum_info, observation_n, reward_n, done_n, info, ) return accum_observation_n, accum_reward_n, accum_done_n, accum_info