示例#1
0
 def _step(self, action_n):
     observation_n, reward_n, done_n, info = self.env.step(action_n)
     # We want this to be above Mask, so we know whether or not a
     # particular index is resetting.
     if self.unwrapped.diagnostics:
         with pyprofile.push('vnc_env.diagnostics.add_metadata'):
             self.unwrapped.diagnostics.add_metadata(
                 observation_n, info['n'])
     return observation_n, reward_n, done_n, info
示例#2
0
    def add_metadata(self, observation_n, info_n, available_at=None):
        """Mutates the info_n dictionary."""
        if self.instance_n is None:
            return

        with pyprofile.push('vnc_env.diagnostics.Diagnostics.add_metadata'):
            async = self.pool.imap_unordered(
                self._add_metadata_i,
                zip(self.instance_n, observation_n, info_n,
                    [available_at] * len(observation_n)))
            list(async)
示例#3
0
    def _step(self, action_n):
        start = time.time()
        with pyprofile.push('vnc_env.Timer.step'):
            observation_n, reward_n, done_n, info = self.env.step(action_n)

        # Calculate how much time was spent actually doing work
        sleep = info.get('stats.throttle.sleep')
        if sleep is None or sleep < 0:
            sleep = 0
        pyprofile.timing('vnc_env.Timer.step.excluding_sleep',
                         time.time() - start - sleep)
        return observation_n, reward_n, done_n, info
示例#4
0
    def flip(self):
        pyprofile.push('vncdriver.numpy_screen.flip_bitmap')
        with self.lock:
            if self._back_updated:
                updates = self._defer

                # Flip screens
                front_screen, back_screen = self._screens
                self._screens = back_screen, front_screen

                # Mark ourselves as pending application of updates
                self._back_updated = False

                # This can be called asynchronously if desired, but it means
                # less reliably smooth playback.
                #
                # reactor.callFromThread(self.update_back)
                self.update_back()
            else:
                updates = []
            result = self.peek(), {'vnc_session.framebuffer_updates': updates}
        pyprofile.pop()
        return result
示例#5
0
    def _pop_rewarder_session(self, peek_d):
        with pyprofile.push('vnc_env.VNCEnv.rewarder_session.pop'):
            reward_d, done_d, info_d, err_d = self.rewarder_session.pop(
                peek_d=peek_d)

        reward_n = []
        done_n = []
        info_n = []
        err_n = []
        for name in self.connection_names:
            reward_n.append(reward_d.get(name, 0))
            done_n.append(done_d.get(name, False))
            info_n.append(info_d.get(name, {'env_status.disconnected': True}))
            err_n.append(err_d.get(name))
        return reward_n, done_n, info_n, err_n
示例#6
0
    def _substep(self, action_n):
        with pyprofile.push('vnc_env.Throttle.step'):
            start = time.time()
            # Submit the action ASAP, before the thread goes to sleep.
            observation_n, reward_n, done_n, info = self.env.step(action_n)

            available_at = info[
                'throttle.observation.available_at'] = time.time()
            if available_at - start > 1:
                logger.info('env.step took a long time: %.2fs',
                            available_at - start)
            if not self.skip_metadata and self.diagnostics is not None:
                # Run (slow) diagnostics
                self.diagnostics.add_metadata(observation_n,
                                              info['n'],
                                              available_at=available_at)
            return observation_n, reward_n, done_n, info
示例#7
0
    def _step_vnc_session(self, compiled_d):
        if self._send_actions_over_websockets:
            self.rewarder_session.send_action(compiled_d, self.spec.id)
            vnc_action_d = {}
        else:
            vnc_action_d = compiled_d

        with pyprofile.push('vnc_env.VNCEnv.vnc_session.step'):
            observation_d, info_d, err_d = self.vnc_session.step(vnc_action_d)

        observation_n = []
        info_n = []
        err_n = []
        for name in self.connection_names:
            observation_n.append(observation_d.get(name))
            info_n.append(info_d.get(name))
            err_n.append(err_d.get(name))

        return observation_n, info_n, err_n
示例#8
0
    for i in range(args.max_steps):
        # print(observation_n)
        # user_input.handle_events()

        if render:
            # Note the first time you call render, it'll be relatively
            # slow and you'll have some aggregated rewards. We could
            # open the render() window before `reset()`, but that's
            # confusing since it pops up a black window for the
            # duration of the reset.
            env.render()

        action_n = agent(observation_n, reward_n, done_n)

        # Take an action
        with pyprofile.push('env.step'):
            observation_n, reward_n, done_n, info = env.step(action_n)

        episode_length += 1
        if not all(r is None
                   for r in reward_n):  # checks if we connected the rewarder
            episode_score += np.array(reward_n)
        for i, ob in enumerate(observation_n):
            if ob is not None and (not isinstance(ob, dict)
                                   or ob['vision'] is not None):
                observation_count[i] += 1

        scores = {}
        lengths = {}
        observations = {}
        for i, done in enumerate(done_n):
示例#9
0
    def add_metadata(self, observation, info, available_at=None):
        """Extract metadata from a pixel observation and add it to the info
        """
        observation = observation['vision']
        if observation is None: return
        if self.network is not None and not self.network.active():
            return
        elif self.metadata_decoder is None:
            return
        elif observation is None:
            return
        # should return a dict with now/probe_received_at keys
        with pyprofile.push(
                'vnc_env.diagnostics.DiagnosticsInstance.add_metadata.decode'):
            metadata = self.metadata_decoder.decode(observation,
                                                    available_at=available_at)

        if metadata is False:
            # No metadata ready, though it doesn't mean parsing failed
            metadata = None
        elif metadata is None:
            if self.could_read_metadata:
                self.could_read_metadata = False
                extra_logger.info(
                    '[%s] Stopped being able to read metadata (expected when environment resets)',
                    self.label)
        elif not self.could_read_metadata:
            self.could_read_metadata = True
            extra_logger.info('[%s] Started being able to read metadata',
                              self.label)

        if self.metadata_decoder.flag_synchronous and metadata is not None:
            info['diagnostics.image_remote_time'] = metadata['now']

        local_now = time.time()

        if self.network is None:
            # Assume the clock skew is zero. Should only be run on the
            # same machine as the VNC server, such as the jiminy
            # instance inside of the environmenth containers.
            real_clock_skew = self.zero_clock_skew
        else:
            # Note: this is a 2-length vector of (min, max), so anything added to
            # it is also going to be a 2-length vector.
            # Most of the diagnostics below are, but you have to look carefully.
            real_clock_skew = self.network.reversed_clock_skew()

        # Store real clock skew here
        info['stats.gauges.diagnostics.clock_skew'] = real_clock_skew
        if self.ignore_clock_skew:
            clock_skew = self.zero_clock_skew
        else:
            clock_skew = real_clock_skew

        if metadata is not None:
            # We'll generally update the observation timestamp infrequently
            if self.last_observation_timestamp == metadata['now']:
                delta = None
            else:
                # We just got a new timestamp in the observation!
                self.last_observation_timestamp = metadata['now']
                observation_now = metadata['now']
                delta = observation_now - metadata['available_at']

                # Subtract *local* time it was received from the *remote* time
                # displayed. Negate and reverse order to fix time ordering.
                info['stats.gauges.diagnostics.lag.observation'] = -(
                    delta + clock_skew)[[1, 0]]

            # if self.network is None:
            #     # The rest of diagnostics need the network, so we're done here
            #     return

            probe_received_at = metadata['probe_received_at']
            if probe_received_at == 0 or self.disable_action_probes:
                # Happens when the env first starts
                self.probe_received_at = None
            elif self.probe_received_at is None:  # this also would work for the equality case
                self.probe_received_at = probe_received_at
            elif self.probe_received_at != probe_received_at and self.probe_sent_at is None:
                logger.info(
                    '[%s] Probe is marked as received at %s, but probe_sent_at is None. This is surprising. (HINT: do you have multiple jiminy instances talking to the same environment?)',
                    self.label, probe_received_at)
            elif self.probe_received_at != probe_received_at:
                extra_logger.debug('[%s] Next probe received: old=%s new=%s',
                                   self.label, self.probe_received_at,
                                   probe_received_at)
                self.probe_received_at = probe_received_at
                # Subtract the *local* time we sent it from the *remote* time it was received
                self.action_latency_skewed = probe_received_at - self.probe_sent_at
                self.probe_sent_at = None

            if self.action_latency_skewed:
                action_lag = self.action_latency_skewed + clock_skew
                self.action_latency_skewed = None
            else:
                action_lag = None
            info['stats.gauges.diagnostics.lag.action'] = action_lag

        local_now = time.time()
        # Look at when the remote believed it parsed the score (not
        # all envs send this currently).
        #
        # Also, if we received no new rewards, then this values is
        # None. This could indicate a high reward latency (bad,
        # uncommon), or that the agent is calling step faster than new
        # rewards are coming in (good, common).
        remote_score_now = info.get('rewarder.lag.observation.timestamp')
        if remote_score_now is not None:
            delta = remote_score_now - local_now
            info['stats.gauges.diagnostics.lag.reward'] = -(delta +
                                                            clock_skew)[[1, 0]]

        # Look at when the remote send the message, so we know how
        # long it's taking for messages to get to us.
        rewarder_message_now = info.get('reward_buffer.remote_time')
        if rewarder_message_now:
            delta = rewarder_message_now - local_now
            info['stats.gauges.diagnostics.lag.rewarder_message'] = -(
                delta + clock_skew)[[1, 0]]
示例#10
0
 def write_item(self, item):
     with pyprofile.push('recording.write'):
         l = json.dumps(item, skipkeys=True, default=self.json_encode)
         self.log_f.write(l + '\n')
         self.log_f.flush()
示例#11
0
 def _reset(self):
     with pyprofile.push('vnc_env.Timer.reset'):
         return self.env.reset()