示例#1
0
 def apply(self, framebuffer_update):
     pyprofile.push('vncdriver.pyglet_screen.apply')
     for rect in framebuffer_update.rectangles:
         if isinstance(rect.encoding,
                       (server_messages.RAWEncoding, server_messages.ZRLEEncoding, server_messages.ZlibEncoding)):
             self.update_rectangle(rect.x, rect.y, rect.width, rect.height, rect.encoding.data)
         else:
             raise error.Error('Unrecognized encoding: {}'.format(rect.encoding))
     pyprofile.pop()
示例#2
0
 def __call__(self, img):
     cropped = crop(img, self.crop_coords)
     with pyprofile.push('score.crop_cache.get.{}'.format(self.name)):
         value = self.get(cropped)
     if value is None:
         with pyprofile.push('score.crop_cache.readthrough.{}'.format(
                 self.name)):
             value = self.method(cropped)
         self.put(cropped, value)
     return value
示例#3
0
 def apply(self, framebuffer_update):
     pyprofile.push('vncdriver.pyglet_screen.apply')
     for rect in framebuffer_update.rectangles:
         if isinstance(
                 rect.encoding,
             (server_messages.RAWEncoding, server_messages.ZRLEEncoding,
              server_messages.ZlibEncoding)):
             self.update_rectangle(rect.x, rect.y, rect.width, rect.height,
                                   rect.encoding.data)
         else:
             raise error.Error('Unrecognized encoding: {}'.format(
                 rect.encoding))
     pyprofile.pop()
示例#4
0
 def _step(self, action_n):
     with pyprofile.push('env.obs_buffer.step'):
         with pyprofile.push('env.obs_buffer.after_step'):
             observation_n, reward_n, done_n, info = self.env.step(action_n)
         num_c = self._num_channels
         for i in range(self.n):
             buffer = self.buffer_n[i]
             buffer[..., 0:num_c *
                    (self.buffer_size - 1)] = buffer[..., num_c:num_c *
                                                     self.buffer_size]
             buffer[..., -num_c:] = observation_n[0]
         return [buffer.copy()
                 for buffer in self.buffer_n], reward_n, done_n, info
示例#5
0
    def _decode(self, observation, start, available_at):
        # This method gets wrapped by AsyncDecode.__call__
        with pyprofile.push('vnc_env.diagnostics.QRCodeMetadataDecoder.qr_code_scanner'):

            encoded = fastzbarlight.qr_code_scanner(observation.tobytes(), self.width, self.height)
        if encoded is None:
            # Failed to parse!
            return
        if encoded.startswith(b'v1:'):
            encoded = encoded.decode('utf-8')
            if len(encoded) != len('v1:') + 12 + 12:
                raise error.Error('Bad length for metadata from enviroment: {}'.format(encoded))

            encoded = encoded[len('v1:'):]
            last_update = int(encoded[:12], 16) / 1000.0
            last_action = int(encoded[12:24], 16) / 1000.

            return {
                # Timestamp on the image
                'now': last_update,
                # When the last probe was received
                'probe_received_at': last_action,
                'processing_start': start,
                'processing_end': time.time(),
                'available_at': available_at,
            }
        else:
            raise error.Error('Bad version string for metadata from environment: {}'.format(encoded))
示例#6
0
    def _decode(self, observation, start, available_at):
        # This method gets wrapped by AsyncDecode.__call__
        with pyprofile.push(
                'vnc_env.diagnostics.QRCodeMetadataDecoder.qr_code_scanner'):

            encoded = fastzbarlight.qr_code_scanner(observation.tobytes(),
                                                    self.width, self.height)
        if encoded is None:
            # Failed to parse!
            return
        if encoded.startswith(b'v1:'):
            encoded = encoded.decode('utf-8')
            if len(encoded) != len('v1:') + 12 + 12:
                raise error.Error(
                    'Bad length for metadata from enviroment: {}'.format(
                        encoded))

            encoded = encoded[len('v1:'):]
            last_update = int(encoded[:12], 16) / 1000.0
            last_action = int(encoded[12:24], 16) / 1000.

            return {
                # Timestamp on the image
                'now': last_update,
                # When the last probe was received
                'probe_received_at': last_action,
                'processing_start': start,
                'processing_end': time.time(),
                'available_at': available_at,
            }
        else:
            raise error.Error(
                'Bad version string for metadata from environment: {}'.format(
                    encoded))
示例#7
0
 def _step(self, action_n):
     observation_n, reward_n, done_n, info = self.env.step(action_n)
     # We want this to be above Mask, so we know whether or not a
     # particular index is resetting.
     if self.unwrapped.diagnostics:
         with pyprofile.push('vnc_env.diagnostics.add_metadata'):
             self.unwrapped.diagnostics.add_metadata(observation_n, info['n'])
     return observation_n, reward_n, done_n, info
示例#8
0
 def _step(self, action_n):
     observation_n, reward_n, done_n, info = self.env.step(action_n)
     # We want this to be above Mask, so we know whether or not a
     # particular index is resetting.
     if self.diagnostics:
         with pyprofile.push('vnc_env.diagnostics.add_metadata'):
             self.diagnostics.add_metadata(observation_n, info['n'])
     return observation_n, reward_n, done_n, info
示例#9
0
    def add_metadata(self, observation_n, info_n, available_at=None):
        """Mutates the info_n dictionary."""
        if self.instance_n is None:
            return

        with pyprofile.push('vnc_env.diagnostics.Diagnostics.add_metadata'):
            async = self.pool.imap_unordered(
                self._add_metadata_i,
                zip(self.instance_n, observation_n, info_n, [available_at] * len(observation_n)))
            list(async)
示例#10
0
    def add_metadata(self, observation_n, info_n, available_at=None):
        """Mutates the info_n dictionary."""
        if self.instance_n is None:
            return

        with pyprofile.push('vnc_env.diagnostics.Diagnostics.add_metadata'):
            async = self.pool.imap_unordered(
                self._add_metadata_i,
                zip(self.instance_n, observation_n, info_n, [available_at] * len(observation_n)))
            list(async)
示例#11
0
文件: timer.py 项目: deev/universe
    def _step(self, action_n):
        start = time.time()
        with pyprofile.push('vnc_env.Timer.step'):
            observation_n, reward_n, done_n, info = self.env.step(action_n)

        # Calculate how much time was spent actually doing work
        sleep = info.get('stats.throttle.sleep')
        if sleep is None or sleep < 0:
            sleep = 0
        pyprofile.timing('vnc_env.Timer.step.excluding_sleep', time.time() - start - sleep)
        return observation_n, reward_n, done_n, info
示例#12
0
    def _step(self, action_n):
        start = time.time()
        with pyprofile.push('vnc_env.Timer.step'):
            observation_n, reward_n, done_n, info = self.env.step(action_n)

        # Calculate how much time was spent actually doing work
        sleep = info.get('stats.throttle.sleep')
        if sleep is None or sleep < 0:
            sleep = 0
        pyprofile.timing('vnc_env.Timer.step.excluding_sleep',
                         time.time() - start - sleep)
        return observation_n, reward_n, done_n, info
示例#13
0
    def _substep(self, action_n):
        with pyprofile.push('vnc_env.Throttle.step'):
            start = time.time()
            # Submit the action ASAP, before the thread goes to sleep.
            observation_n, reward_n, done_n, info = self.env.step(action_n)

            available_at = info['throttle.observation.available_at'] = time.time()
            if available_at - start > 1:
                logger.info('env.step took a long time: %.2fs', available_at - start)
            if not self.skip_metadata and self.diagnostics is not None:
                # Run (slow) diagnostics
                self.diagnostics.add_metadata(observation_n, info['n'], available_at=available_at)
            return observation_n, reward_n, done_n, info
示例#14
0
    def _pop_rewarder_session(self, peek_d):
        with pyprofile.push('vnc_env.VNCEnv.rewarder_session.pop'):
            reward_d, done_d, info_d, err_d = self.rewarder_session.pop(peek_d=peek_d)

        reward_n = []
        done_n = []
        info_n = []
        err_n = []
        for name in self.connection_names:
            reward_n.append(reward_d.get(name, 0))
            done_n.append(done_d.get(name, False))
            info_n.append(info_d.get(name, {'env_status.disconnected': True}))
            err_n.append(err_d.get(name))
        return reward_n, done_n, info_n, err_n
示例#15
0
    def flip(self):
        pyprofile.push('vncdriver.numpy_screen.flip_bitmap')
        with self.lock:
            if self._back_updated:
                updates = self._defer

                # Flip screens
                front_screen, back_screen = self._screens
                self._screens = back_screen, front_screen

                # Mark ourselves as pending application of updates
                self._back_updated = False

                # This can be called asynchronously if desired, but it means
                # less reliably smooth playback.
                #
                # reactor.callFromThread(self.update_back)
                self.update_back()
            else:
                updates = []
            result = self.peek(), {'vnc_session.framebuffer_updates': updates}
        pyprofile.pop()
        return result
示例#16
0
    def _pop_rewarder_session(self, peek_d):
        with pyprofile.push('vnc_env.VNCEnv.rewarder_session.pop'):
            reward_d, done_d, info_d, err_d = self.rewarder_session.pop(peek_d=peek_d)

        reward_n = []
        done_n = []
        info_n = []
        err_n = []
        for name in self.connection_names:
            reward_n.append(reward_d.get(name, 0))
            done_n.append(done_d.get(name, False))
            info_n.append(info_d.get(name, {'env_status.disconnected': True}))
            err_n.append(err_d.get(name))
        return reward_n, done_n, info_n, err_n
示例#17
0
 def _observation(self, observation_n):
     with pyprofile.push('env.crop_screen.observation'):
         if self.normalize:
             return [
                 ob[self.top:self.top + self.height, self.left:self.left +
                    self.width, :].astype(np.float32) *
                 (1.0 / 255.0) if ob is not None else None
                 for ob in observation_n
             ]
         else:
             return [
                 ob[self.top:self.top + self.height, self.left:self.left +
                    self.width, :] if ob is not None else None
                 for ob in observation_n
             ]
示例#18
0
    def _substep(self, action_n):
        with pyprofile.push('vnc_env.Throttle.step'):
            start = time.time()
            # Submit the action ASAP, before the thread goes to sleep.
            observation_n, reward_n, done_n, info = self.env.step(action_n)

            available_at = info[
                'throttle.observation.available_at'] = time.time()
            if available_at - start > 1:
                logger.info('env.step took a long time: %.2fs',
                            available_at - start)
            if not self.skip_metadata and self.diagnostics is not None:
                # Run (slow) diagnostics
                self.diagnostics.add_metadata(observation_n,
                                              info['n'],
                                              available_at=available_at)
            return observation_n, reward_n, done_n, info
示例#19
0
    def _step_vnc_session(self, compiled_d):
        if self._send_actions_over_websockets:
            self.rewarder_session.send_action(compiled_d, self.spec.id)
            vnc_action_d = {}
        else:
            vnc_action_d = compiled_d

        with pyprofile.push('vnc_env.VNCEnv.vnc_session.step'):
            observation_d, info_d, err_d = self.vnc_session.step(vnc_action_d)

        observation_n = []
        info_n = []
        err_n = []
        for name in self.connection_names:
            observation_n.append(observation_d.get(name))
            info_n.append(info_d.get(name))
            err_n.append(err_d.get(name))

        return observation_n, info_n, err_n
示例#20
0
    def _step_vnc_session(self, compiled_d):
        if self._send_actions_over_websockets:
            self.rewarder_session.send_action(compiled_d, self.spec.id)
            vnc_action_d = {}
        else:
            vnc_action_d = compiled_d

        with pyprofile.push('vnc_env.VNCEnv.vnc_session.step'):
            observation_d, info_d, err_d = self.vnc_session.step(vnc_action_d)

        observation_n = []
        info_n = []
        err_n = []
        for name in self.connection_names:
            observation_n.append(observation_d.get(name))
            info_n.append(info_d.get(name))
            err_n.append(err_d.get(name))

        return observation_n, info_n, err_n
示例#21
0
 def _gameover(self, img):
     if self.vexpect is None:
         return None, None
     with pyprofile.push('reward.parsing.gameover'):
         return self.vexpect.gameover(img)
示例#22
0
    for i in range(args.max_steps):
        # print(observation_n)
        # user_input.handle_events()

        if render:
            # Note the first time you call render, it'll be relatively
            # slow and you'll have some aggregated rewards. We could
            # open the render() window before `reset()`, but that's
            # confusing since it pops up a black window for the
            # duration of the reset.
            env.render()

        action_n = agent(observation_n, reward_n, done_n)

        # Take an action
        with pyprofile.push('env.step'):
            observation_n, reward_n, done_n, info = env.step(action_n)

        episode_length += 1
        if not all(r is None
                   for r in reward_n):  # checks if we connected the rewarder
            episode_score += np.array(reward_n)
        for i, ob in enumerate(observation_n):
            if ob is not None and (not isinstance(ob, dict)
                                   or ob['vision'] is not None):
                observation_count[i] += 1

        scores = {}
        lengths = {}
        observations = {}
        for i, done in enumerate(done_n):
示例#23
0
 def _observation(self, observation_n):
     with pyprofile.push('env.flash_rescale.observation'):
         return [
             _process_frame_flash(observation)
             for observation in observation_n
         ]
示例#24
0
def main():
    # You can optionally set up the logger. Also fine to set the level
    # to logging.DEBUG or logging.WARN if you want to change the
    # amount of output.
    logger.setLevel(logging.INFO)
    universe.configure_logging()

    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-e', '--env_id', default='gtav.SaneDriving-v0', help='Which environment to run on.')
    parser.add_argument('-m', '--monitor', action='store_false', help='Whether to activate the monitor.')
    parser.add_argument('-r', '--remote', help='The number of environments to create (e.g. -r 20), or the address of pre-existing VNC servers and rewarders to use (e.g. -r vnc://localhost:5900+15900,localhost:5901+15901), or a query to the allocator (e.g. -r http://allocator.sci.openai-tech.com?n=2)')
    parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
    parser.add_argument('-R', '--no-render', action='store_true', help='Do not render the environment locally.')
    parser.add_argument('-f', '--fps', default=8., type=float, help='Desired frames per second')
    parser.add_argument('-N', '--max-steps', type=int, default=10**7, help='Maximum number of steps to take')
    parser.add_argument('-d', '--driver', default='DeepDriver', help='Choose your driver')
    parser.add_argument('-c', '--custom_camera',  action='store_true', help='Customize the GTA camera')

    args = parser.parse_args()

    logging.getLogger('gym').setLevel(logging.NOTSET)
    logging.getLogger('universe').setLevel(logging.NOTSET)
    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    if args.env_id is not None:
        # N.B. This does not set the actual environment mode yet, which
        # is currently driven by environment itself.
        env = gym.make(args.env_id)
    else:
        env = wrappers.WrappedVNCEnv()
    if not isinstance(env, wrappers.GymCoreAction):
        # The GymCoreSyncEnv's try to mimic their core counterparts,
        # and thus came pre-wrapped wth an action space
        # translator. Everything else probably wants a SafeActionSpace
        # wrapper to shield them from random-agent clicking around
        # everywhere.
        env = wrappers.SafeActionSpace(env)
    else:
        # Only gym-core are seedable
        env.seed([0])
    env = wrappers.Logger(env)

    env.configure(
        fps=args.fps,
        # print_frequency=None,
        # ignore_clock_skew=True,
        remotes=args.remote,
        vnc_driver='go', vnc_kwargs={
            'encoding': 'tight', 'compress_level': 0, 'fine_quality_level': 50, 'subsample_level': 0, 'quality_level': 5,
        },
    )

    if args.driver == 'DeepDriver':
        driver = DeepDriver()
    elif args.driver == 'DeepDriverTF':
        driver = DeepDriverTF()
    else:
        raise Exception('That driver is not available')

    driver.setup()

    if args.monitor:
        # env.monitor.start('/tmp/vnc_random_agent', force=True, video_callable=lambda i: True)
        wrappers.Monitor(env, '/tmp/vnc_random_agent', video_callable=False, force=True)
         
    render = not args.no_render
    observation_n = env.reset()
    reward_n = [0] * env.n
    done_n = [False] * env.n
    info = None

    for i in range(args.max_steps):
        if render:
            # Note the first time you call render, it'll be relatively
            # slow and you'll have some aggregated rewards. We could
            # open the render() window before `reset()`, but that's
            # confusing since it pops up a black window for the
            # duration of the reset.
            env.render()

        action_n = driver.step(observation_n, reward_n, done_n, info)

        try:
            if info is not None:
                distance = info['n'][0]['distance_from_destination']
                logger.info('distance %s', distance)
        except KeyError as e:
            logger.debug('distance not available %s', str(e))

        if args.custom_camera:
            # Sending this every step is probably overkill
            for action in action_n:
                action.append(GTASetting('use_custom_camera', True))

        # Take an action
        with pyprofile.push('env.step'):
            _step = env.step(action_n)
            observation_n, reward_n, done_n, info = _step

        if any(done_n) and info and not any(info_n.get('env_status.artificial.done', False) for info_n in info['n']):
            print('done_n', done_n, 'i', i)
            logger.info('end of episode')
            env.reset()

    # We're done! clean up
    env.close()
示例#25
0
    for i in range(args.max_steps):
        # print(observation_n)
        # user_input.handle_events()

        if render:
            # Note the first time you call render, it'll be relatively
            # slow and you'll have some aggregated rewards. We could
            # open the render() window before `reset()`, but that's
            # confusing since it pops up a black window for the
            # duration of the reset.
            env.render()

        action_n = agent(observation_n, reward_n, done_n)

        # Take an action
        with pyprofile.push('env.step'):
            observation_n, reward_n, done_n, info = env.step(action_n)

        episode_length += 1
        if not all(r is None for r in reward_n): # checks if we connected the rewarder
            episode_score += np.array(reward_n)
        for i, ob in enumerate(observation_n):
            if ob is not None and (not isinstance(ob, dict) or ob['vision'] is not None):
                observation_count[i] += 1

        scores = {}
        lengths = {}
        observations = {}
        for i, done in enumerate(done_n):
            if not done:
                continue
示例#26
0
    def add_metadata(self, observation, info, available_at=None):
        """Extract metadata from a pixel observation and add it to the info
        """
        observation = observation['vision']
        if observation is None: return
        if self.network is not None and not self.network.active():
            return
        elif self.metadata_decoder is None:
            return
        elif observation is None:
            return
        # should return a dict with now/probe_received_at keys
        with pyprofile.push('vnc_env.diagnostics.DiagnosticsInstance.add_metadata.decode'):
            metadata = self.metadata_decoder.decode(observation, available_at=available_at)

        if metadata is False:
            # No metadata ready, though it doesn't mean parsing failed
            metadata = None
        elif metadata is None:
            if self.could_read_metadata:
                self.could_read_metadata = False
                extra_logger.info('[%s] Stopped being able to read metadata (expected when environment resets)', self.label)
        elif not self.could_read_metadata:
            self.could_read_metadata = True
            extra_logger.info('[%s] Started being able to read metadata', self.label)

        if self.metadata_decoder.flag_synchronous and metadata is not None:
            info['diagnostics.image_remote_time'] = metadata['now']

        local_now = time.time()

        if self.network is None:
            # Assume the clock skew is zero. Should only be run on the
            # same machine as the VNC server, such as the universe
            # instance inside of the environmenth containers.
            real_clock_skew = self.zero_clock_skew
        else:
            # Note: this is a 2-length vector of (min, max), so anything added to
            # it is also going to be a 2-length vector.
            # Most of the diagnostics below are, but you have to look carefully.
            real_clock_skew = self.network.reversed_clock_skew()

        # Store real clock skew here
        info['stats.gauges.diagnostics.clock_skew'] = real_clock_skew
        if self.ignore_clock_skew:
            clock_skew = self.zero_clock_skew
        else:
            clock_skew = real_clock_skew

        if metadata is not None:
            # We'll generally update the observation timestamp infrequently
            if self.last_observation_timestamp == metadata['now']:
                delta = None
            else:
                # We just got a new timestamp in the observation!
                self.last_observation_timestamp = metadata['now']
                observation_now = metadata['now']
                delta = observation_now - metadata['available_at']

                # Subtract *local* time it was received from the *remote* time
                # displayed. Negate and reverse order to fix time ordering.
                info['stats.gauges.diagnostics.lag.observation'] = -(delta + clock_skew)[[1, 0]]

            # if self.network is None:
            #     # The rest of diagnostics need the network, so we're done here
            #     return

            probe_received_at = metadata['probe_received_at']
            if probe_received_at == 0 or self.disable_action_probes:
                # Happens when the env first starts
                self.probe_received_at = None
            elif self.probe_received_at is None: # this also would work for the equality case
                self.probe_received_at = probe_received_at
            elif self.probe_received_at != probe_received_at and self.probe_sent_at is None:
                logger.info('[%s] Probe is marked as received at %s, but probe_sent_at is None. This is surprising. (HINT: do you have multiple universe instances talking to the same environment?)', self.label, probe_received_at)
            elif self.probe_received_at != probe_received_at:
                extra_logger.debug('[%s] Next probe received: old=%s new=%s', self.label, self.probe_received_at, probe_received_at)
                self.probe_received_at = probe_received_at
                # Subtract the *local* time we sent it from the *remote* time it was received
                self.action_latency_skewed = probe_received_at - self.probe_sent_at
                self.probe_sent_at = None

            if self.action_latency_skewed:
                action_lag = self.action_latency_skewed + clock_skew
                self.action_latency_skewed = None
            else:
                action_lag = None
            info['stats.gauges.diagnostics.lag.action'] = action_lag

        local_now = time.time()
        # Look at when the remote believed it parsed the score (not
        # all envs send this currently).
        #
        # Also, if we received no new rewards, then this values is
        # None. This could indicate a high reward latency (bad,
        # uncommon), or that the agent is calling step faster than new
        # rewards are coming in (good, common).
        remote_score_now = info.get('rewarder.lag.observation.timestamp')
        if remote_score_now is not None:
            delta = remote_score_now - local_now
            info['stats.gauges.diagnostics.lag.reward'] = -(delta + clock_skew)[[1, 0]]

        # Look at when the remote send the message, so we know how
        # long it's taking for messages to get to us.
        rewarder_message_now = info.get('reward_buffer.remote_time')
        if rewarder_message_now:
            delta = rewarder_message_now - local_now
            info['stats.gauges.diagnostics.lag.rewarder_message'] = -(delta + clock_skew)[[1, 0]]
示例#27
0
    def add_metadata(self, observation, info, available_at=None):
        """Extract metadata from a pixel observation and add it to the info
        """
        observation = observation['vision']
        if observation is None: return
        if self.network is not None and not self.network.active():
            return
        elif self.metadata_decoder is None:
            return
        elif observation is None:
            return
        # should return a dict with now/probe_received_at keys
        with pyprofile.push(
                'vnc_env.diagnostics.DiagnosticsInstance.add_metadata.decode'):
            metadata = self.metadata_decoder.decode(observation,
                                                    available_at=available_at)

        if metadata is False:
            # No metadata ready, though it doesn't mean parsing failed
            metadata = None
        elif metadata is None:
            if self.could_read_metadata:
                self.could_read_metadata = False
                extra_logger.info(
                    '[%s] Stopped being able to read metadata (expected when environment resets)',
                    self.label)
        elif not self.could_read_metadata:
            self.could_read_metadata = True
            extra_logger.info('[%s] Started being able to read metadata',
                              self.label)

        if self.metadata_decoder.flag_synchronous and metadata is not None:
            info['diagnostics.image_remote_time'] = metadata['now']

        local_now = time.time()

        if self.network is None:
            # Assume the clock skew is zero. Should only be run on the
            # same machine as the VNC server, such as the universe
            # instance inside of the environmenth containers.
            real_clock_skew = self.zero_clock_skew
        else:
            # Note: this is a 2-length vector of (min, max), so anything added to
            # it is also going to be a 2-length vector.
            # Most of the diagnostics below are, but you have to look carefully.
            real_clock_skew = self.network.reversed_clock_skew()

        # Store real clock skew here
        info['stats.gauges.diagnostics.clock_skew'] = real_clock_skew
        if self.ignore_clock_skew:
            clock_skew = self.zero_clock_skew
        else:
            clock_skew = real_clock_skew

        if metadata is not None:
            # We'll generally update the observation timestamp infrequently
            if self.last_observation_timestamp == metadata['now']:
                delta = None
            else:
                # We just got a new timestamp in the observation!
                self.last_observation_timestamp = metadata['now']
                observation_now = metadata['now']
                delta = observation_now - metadata['available_at']

                # Subtract *local* time it was received from the *remote* time
                # displayed. Negate and reverse order to fix time ordering.
                info['stats.gauges.diagnostics.lag.observation'] = -(
                    delta + clock_skew)[[1, 0]]

            # if self.network is None:
            #     # The rest of diagnostics need the network, so we're done here
            #     return

            probe_received_at = metadata['probe_received_at']
            if probe_received_at == 0 or self.disable_action_probes:
                # Happens when the env first starts
                self.probe_received_at = None
            elif self.probe_received_at is None:  # this also would work for the equality case
                self.probe_received_at = probe_received_at
            elif self.probe_received_at != probe_received_at and self.probe_sent_at is None:
                logger.info(
                    '[%s] Probe is marked as received at %s, but probe_sent_at is None. This is surprising. (HINT: do you have multiple universe instances talking to the same environment?)',
                    self.label, probe_received_at)
            elif self.probe_received_at != probe_received_at:
                extra_logger.debug('[%s] Next probe received: old=%s new=%s',
                                   self.label, self.probe_received_at,
                                   probe_received_at)
                self.probe_received_at = probe_received_at
                # Subtract the *local* time we sent it from the *remote* time it was received
                self.action_latency_skewed = probe_received_at - self.probe_sent_at
                self.probe_sent_at = None

            if self.action_latency_skewed:
                action_lag = self.action_latency_skewed + clock_skew
                self.action_latency_skewed = None
            else:
                action_lag = None
            info['stats.gauges.diagnostics.lag.action'] = action_lag

        local_now = time.time()
        # Look at when the remote believed it parsed the score (not
        # all envs send this currently).
        #
        # Also, if we received no new rewards, then this values is
        # None. This could indicate a high reward latency (bad,
        # uncommon), or that the agent is calling step faster than new
        # rewards are coming in (good, common).
        remote_score_now = info.get('rewarder.lag.observation.timestamp')
        if remote_score_now is not None:
            delta = remote_score_now - local_now
            info['stats.gauges.diagnostics.lag.reward'] = -(delta +
                                                            clock_skew)[[1, 0]]

        # Look at when the remote send the message, so we know how
        # long it's taking for messages to get to us.
        rewarder_message_now = info.get('reward_buffer.remote_time')
        if rewarder_message_now:
            delta = rewarder_message_now - local_now
            info['stats.gauges.diagnostics.lag.rewarder_message'] = -(
                delta + clock_skew)[[1, 0]]
示例#28
0
 def write_item(self, item):
     with pyprofile.push('recording.write'):
         l = json.dumps(item, skipkeys=True, default=self.json_encode)
         self.log_f.write(l + '\n')
         self.log_f.flush()
示例#29
0
文件: timer.py 项目: deev/universe
 def _reset(self):
     with pyprofile.push('vnc_env.Timer.reset'):
         return self.env.reset()
示例#30
0
 def _score(self, img):
     if self.scorer is None:
         return None
     with pyprofile.push('reward.parsing.score'):
         return self.scorer.score(img)
示例#31
0
 def _reset(self):
     with pyprofile.push('vnc_env.Timer.reset'):
         return self.env.reset()
示例#32
0
    def do_run(self):
        # For debug environments which set a server-enforced time
        # limit
        frames = 0
        # Log all the rewards, but buffer those logs rather than
        # spewing out in realtime.
        reward_logger = remote.RewardLogger()

        last_export = time.time()
        # Just make sure last_export time is set inside of pyprofile
        pyprofile.export(log=False, reset=False)

        # For tracking the framerate
        target = time.time()
        self.__vnc_last_update = target  # real initial value
        while True:
            # Atomically recover details of the env state
            env_info = self.env_status.env_info()
            env_state = env_info['env_state']

            # Hang out until it's all done resetting. We don't need to
            # reset any of our internal state here, as that happens
            # below. (We're not actually guaranteed to actually see a
            # resetting state, if the reset is very fast.)
            while env_state == 'resetting':
                logger.info('[%s] Blocking until env finishes resetting',
                            utils.thread_name())
                env_info = self.env_status.wait_for_env_state_change(env_state)
                logger.info('[%s] Unblocking since env reset finished',
                            utils.thread_name())
                env_state = env_info['env_state']

                # Start our frame timing from here
                target = time.time()

            env_state = env_info['env_state']
            episode_id = env_info['episode_id']
            env_id = env_info['env_id']
            fps = env_info['fps']
            assert env_state == 'running', 'Env state: {}'.format(env_state)

            if fps is not None and fps != self.fps:
                logger.info('[%s] Changing fps: %s -> %s', fps, self.fps,
                            utils.thread_name())
                self.fps = fps

            # Detect whether the environment has reset, and thus we
            # need to clear our internal state.
            if env_id != self.env_id:
                assert episode_id != self._episode_id, 'Episode ids: {}->{}'.format(
                    episode_id, self._episode_id)
                logger.info('[%s] Changing reward_parsers: %s -> %s',
                            utils.thread_name(), self.env_id, env_id)
                # This is slow (since it has to load the scorer), so
                # we call it only in the rare case where the env ID
                # has changed.
                self.set_reward_parser(env_info)
                frames = 0
                reward_logger.reset()
            elif episode_id != self._episode_id and self.reward_parser is None:
                # Just set internal state. This is fast since it
                # doesn't actually have to load anything. Also only
                # relevant during development.
                self.set_reward_parser(env_info)
                frames = 0
                reward_logger.reset()
            elif episode_id != self._episode_id:
                # If the env state ID changed, then we need to reset
                # the reward parser.
                #
                # Not enough just to look whether the env_state is
                # resetting, since in theory we might never see the
                # resetting state for a very fast reset.
                logger.info(
                    '[%s] Clearing reward_parser state: env_id=%s episode_id=%s->%s, env_state=%s',
                    utils.thread_name(), env_id, self._episode_id, episode_id,
                    env_state)
                self._episode_id = episode_id
                self.reward_parser.reset()
                frames = 0
                reward_logger.reset()

            # Recover the exact reward
            with pyprofile.push('rewarder.compute_reward'):
                reward, done, info = self.reward()
            # done=None means we're not sure if the game is over or
            # not.
            done = bool(done)

            # Cut short the environment. Currently only used in debug
            # environment.
            if self.controlplane_spec is not None and \
               self.controlplane_spec.server_timestep_limit is not None and \
               frames >= self.controlplane_spec.server_timestep_limit:
                logger.info(
                    '[%s] Marking environment as done=True since server_timestep_limit of %d frames reached',
                    utils.thread_name(), frames)
                done = True

            # Add our own statistics
            if time.time() - last_export > 5:
                force_send = True
                last_export = time.time()
                profile = pyprofile.export()
                # Send the pyprofile to the agent. Info keys we set
                # will be available directly to the agent.
                info['rewarder.profile'] = profile
            else:
                force_send = False

            # Send if there's anything interesting to transmit
            if reward != 0 or done or force_send:
                if 'rewarder.profile' in info:
                    # We already print the pyprofile (during the
                    # export) so no need to repeat it here. It's
                    # pretty big.
                    display_info = info.copy()
                    display_info['rewarder.profile'] = '<{} bytes>'.format(
                        len(str(display_info['rewarder.profile'])))
                else:
                    display_info = info

                reward_logger.record(reward, done, info)
                self.agent_conn.send_env_reward(reward,
                                                done,
                                                info,
                                                episode_id=episode_id)

            old_target = target
            # Run at the appropriate frame rate
            target += 1. / self.fps

            # Do appropriate sleeping
            delta = target - time.time()
            if done:
                # game_autoresets means the game itself will do the
                # reset, so we don't have to do any work.
                logger.info('[%s] Resetting environment since done=%s',
                            utils.thread_name(), done)
                self.trigger_reset()
            elif delta > 0:
                pyprofile.timing('rewarder.sleep', delta)
                time.sleep(delta)
            else:
                pyprofile.timing('rewarder.sleep.missed', -delta)
                if delta < -0.1:
                    logger.info(
                        '[%s] Rewarder fell behind by %ss from target; losing %s frames',
                        utils.thread_name(), -delta, int(-delta * self.fps))
                target = time.time()
            # Record the total time spent in this frame, starting from the top
            pyprofile.timing('rewarder.frame', time.time() - old_target)
            frames += 1