Exemplo n.º 1
0
    def onMessage(self, payload, isBinary):
        if not self.factory.agent_conn.check_message(self):
            return

        assert not isBinary, "Binary websocket not supported"
        payload = ujson.loads(payload)

        context = {
            'start': time.time(),
            'conn': self,
        }
        latency = context['start'] - payload['headers']['sent_at']
        pyprofile.incr('rewarder_protocol.messages')
        pyprofile.incr('rewarder_protocol.messages.{}'.format(payload['method']))

        pyprofile.timing('rewarder_protocol.latency.rtt.skew_unadjusted', 2*latency)
        if latency < 0:
            pyprofile.incr('rewarder_protocol.latency.rtt.skew_unadjusted.negative')

        if payload['method'] == 'v0.env.reset':
            logger.info('Received reset message: %s', payload)
            self.factory.agent_conn.control_buffer.recv_rpc(context, payload)
        elif payload['method'] == 'v0.control.ping':
            logger.debug('Received ping message: %s', payload)
            parent_message_id = payload['headers']['message_id']
            headers = {'parent_message_id': parent_message_id}
            self.send_message('v0.reply.control.ping', {}, headers)
        else:
            logger.warn('Received unsupported message: %s', payload)
Exemplo n.º 2
0
    def __call__(self, img, available_at):
        # Choose the return value
        if len(self.deque) > 0 and self.deque[0].ready():
            last = self.deque.popleft()
            res = last.get()
            if res is not None:
                pyprofile.timing('vnc_env.diagnostics.async_decode.latency', time.time() - res['available_at'])
        else:
            res = False

        pyprofile.gauge('vnc_env.diagnostics.async_decode.queue_depth', len(self.deque))

        # Just grayscale it by keeping only one component. Should be
        # good enough as this region is black and white anyway.
        grayscale = img[self.y:self.y+self.height, self.x:self.x+self.width, 0]

        # Apply processing if needed
        match = np.array_equal(self._last_img, grayscale)
        if not match:
            pyprofile.incr('vnc_env.diagnostics.async_decode.schedule')
            # sneakily copy if numpy hasn't, so it can be cached
            self._last_img = np.ascontiguousarray(grayscale)
            async = self.qr_pool.apply_async(self.method, (self._last_img, time.time(), available_at))
            self.deque.append(async)
        else:
            pyprofile.incr('vnc_env.diagnostics.async_decode.cache_hit')

        return res
Exemplo n.º 3
0
    def __call__(self, img, available_at):
        # Choose the return value
        if len(self.deque) > 0 and self.deque[0].ready():
            last = self.deque.popleft()
            res = last.get()
            if res is not None:
                pyprofile.timing('vnc_env.diagnostics.async_decode.latency',
                                 time.time() - res['available_at'])
        else:
            res = False

        pyprofile.gauge('vnc_env.diagnostics.async_decode.queue_depth',
                        len(self.deque))

        # Just grayscale it by keeping only one component. Should be
        # good enough as this region is black and white anyway.
        grayscale = img[self.y:self.y + self.height,
                        self.x:self.x + self.width, 0]

        # Apply processing if needed
        match = np.array_equal(self._last_img, grayscale)
        if not match:
            pyprofile.incr('vnc_env.diagnostics.async_decode.schedule')
            # sneakily copy if numpy hasn't, so it can be cached
            self._last_img = np.ascontiguousarray(grayscale)
            async = self.qr_pool.apply_async(
                self.method, (self._last_img, time.time(), available_at))
            self.deque.append(async)
        else:
            pyprofile.incr('vnc_env.diagnostics.async_decode.cache_hit')

        return res
Exemplo n.º 4
0
    def _step(self, action_n):
        start = time.time()
        with pyprofile.push('vnc_env.Timer.step'):
            observation_n, reward_n, done_n, info = self.env.step(action_n)

        # Calculate how much time was spent actually doing work
        sleep = info.get('stats.throttle.sleep')
        if sleep is None or sleep < 0:
            sleep = 0
        pyprofile.timing('vnc_env.Timer.step.excluding_sleep', time.time() - start - sleep)
        return observation_n, reward_n, done_n, info
Exemplo n.º 5
0
    def _step(self, action_n):
        start = time.time()
        with pyprofile.push('vnc_env.Timer.step'):
            observation_n, reward_n, done_n, info = self.env.step(action_n)

        # Calculate how much time was spent actually doing work
        sleep = info.get('stats.throttle.sleep')
        if sleep is None or sleep < 0:
            sleep = 0
        pyprofile.timing('vnc_env.Timer.step.excluding_sleep',
                         time.time() - start - sleep)
        return observation_n, reward_n, done_n, info
Exemplo n.º 6
0
    def onMessage(self, payload, isBinary):
        extra_logger.debug('[%s] Received payload: %s', self.factory.label,
                           payload)
        assert not isBinary
        payload = ujson.loads(payload)

        context = self._make_context()
        latency = context['start'] - payload['headers']['sent_at']
        pyprofile.incr('rewarder_protocol.messages')
        pyprofile.incr('rewarder_protocol.messages.{}'.format(
            payload['method']))

        # Double latency to model RTT
        pyprofile.timing('rewarder_protocol.latency.rtt.skew_unadjusted',
                         2 * latency)
        if latency < 0:
            pyprofile.incr(
                'rewarder_protocol.latency.rtt.skew_unadjusted.negative')

        self.recv(context, payload)
Exemplo n.º 7
0
    def _step(self, action_n):
        if self._steps is None:
            self._start_timer()
        self._steps += 1

        accum_observation_n, accum_reward_n, accum_done_n, accum_info = self._substep(
            action_n)
        accum_info['throttle.action.available_at'] = time.time()

        # Record which indexes we were just peeking at, so when we
        # make the follow-up we'll be sure to peek there too.
        peek_n = [
            any(spaces.PeekReward for peek in action) for action in action_n
        ]

        if self.fps is None:
            return accum_observation_n, accum_reward_n, accum_done_n, accum_info

        accum_info['stats.throttle.sleep'] = 0
        while True:
            # See how much time we have to idle
            delta = self._start + 1. / self.fps * self._steps - time.time()

            # The following assumes that our control loop
            if delta < 0:
                # We're out of time. Just get out of here.
                delta = abs(delta)
                if delta >= 1:
                    logger.info(
                        'Throttle fell behind by %.2fs; lost %.2f frames',
                        delta, self.fps * delta)
                pyprofile.timing('vnc_env.Throttle.lost_sleep', delta)
                self._start_timer()
                break
            # elif delta < 0.008:
            #     # Only have 8ms. Let's spend it sleeping, and
            #     # return an image which may have up to an
            #     # additional 8ms lag.
            #     #
            #     # 8ms is reasonably arbitrary; we just want something
            #     # that's small where it's not actually going to help
            #     # if we make another step call. Step with 32 parallel
            #     # envs takes about 6ms (about half of which is
            #     # diagnostics, which could be totally async!), so 8 is
            #     # a reasonable choice for now..
            #     pyprofile.timing('vnc_env.Throttle.sleep', delta)
            #     accum_info['stats.throttle.sleep'] += delta
            #     time.sleep(delta)
            #     break
            else:
                # We've got plenty of time. Sleep for up to 16ms, and
                # then refresh our current frame. We need to
                # constantly be calling step so that our lags are
                # reported correctly, within 16ms. (The layering is
                # such that the vncdriver doesn't know which pixels
                # correspond to metadata, and the diagnostics don't
                # know when pixels first got painted. So we do our
                # best to present frames as they're ready to the
                # diagnostics.)
                delta = min(delta, 0.016)
                pyprofile.timing('vnc_env.Throttle.sleep', delta)
                accum_info['stats.throttle.sleep'] += delta
                time.sleep(delta)

                # We want to merge in the latest reward/done/info so that our
                # agent has the most up-to-date info post-sleep, but also want
                # to avoid popping any rewards where done=True (since we'd
                # have to merge across episode boundaries).
                action_n = []
                for done, peek in zip(accum_done_n, peek_n):
                    if done or peek:
                        # No popping of reward/done
                        action_n.append([spaces.PeekReward])
                    else:
                        action_n.append([])

                observation_n, reward_n, done_n, info = self._substep(action_n)

                # Merge observation, rewards and metadata.
                # Text observation has order in which the messages are sent.
                rewarder.merge_n(
                    accum_observation_n,
                    accum_reward_n,
                    accum_done_n,
                    accum_info,
                    observation_n,
                    reward_n,
                    done_n,
                    info,
                )

        return accum_observation_n, accum_reward_n, accum_done_n, accum_info
Exemplo n.º 8
0
    def do_run(self):
        # For debug environments which set a server-enforced time
        # limit
        frames = 0
        # Log all the rewards, but buffer those logs rather than
        # spewing out in realtime.
        reward_logger = remote.RewardLogger()

        last_export = time.time()
        # Just make sure last_export time is set inside of pyprofile
        pyprofile.export(log=False, reset=False)

        # For tracking the framerate
        target = time.time()
        self.__vnc_last_update = target  # real initial value
        while True:
            # Atomically recover details of the env state
            env_info = self.env_status.env_info()
            env_state = env_info['env_state']

            # Hang out until it's all done resetting. We don't need to
            # reset any of our internal state here, as that happens
            # below. (We're not actually guaranteed to actually see a
            # resetting state, if the reset is very fast.)
            while env_state == 'resetting':
                logger.info('[%s] Blocking until env finishes resetting',
                            utils.thread_name())
                env_info = self.env_status.wait_for_env_state_change(env_state)
                logger.info('[%s] Unblocking since env reset finished',
                            utils.thread_name())
                env_state = env_info['env_state']

                # Start our frame timing from here
                target = time.time()

            env_state = env_info['env_state']
            episode_id = env_info['episode_id']
            env_id = env_info['env_id']
            fps = env_info['fps']
            assert env_state == 'running', 'Env state: {}'.format(env_state)

            if fps is not None and fps != self.fps:
                logger.info('[%s] Changing fps: %s -> %s', fps, self.fps,
                            utils.thread_name())
                self.fps = fps

            # Detect whether the environment has reset, and thus we
            # need to clear our internal state.
            if env_id != self.env_id:
                assert episode_id != self._episode_id, 'Episode ids: {}->{}'.format(
                    episode_id, self._episode_id)
                logger.info('[%s] Changing reward_parsers: %s -> %s',
                            utils.thread_name(), self.env_id, env_id)
                # This is slow (since it has to load the scorer), so
                # we call it only in the rare case where the env ID
                # has changed.
                self.set_reward_parser(env_info)
                frames = 0
                reward_logger.reset()
            elif episode_id != self._episode_id and self.reward_parser is None:
                # Just set internal state. This is fast since it
                # doesn't actually have to load anything. Also only
                # relevant during development.
                self.set_reward_parser(env_info)
                frames = 0
                reward_logger.reset()
            elif episode_id != self._episode_id:
                # If the env state ID changed, then we need to reset
                # the reward parser.
                #
                # Not enough just to look whether the env_state is
                # resetting, since in theory we might never see the
                # resetting state for a very fast reset.
                logger.info(
                    '[%s] Clearing reward_parser state: env_id=%s episode_id=%s->%s, env_state=%s',
                    utils.thread_name(), env_id, self._episode_id, episode_id,
                    env_state)
                self._episode_id = episode_id
                self.reward_parser.reset()
                frames = 0
                reward_logger.reset()

            # Recover the exact reward
            with pyprofile.push('rewarder.compute_reward'):
                reward, done, info = self.reward()
            # done=None means we're not sure if the game is over or
            # not.
            done = bool(done)

            # Cut short the environment. Currently only used in debug
            # environment.
            if self.controlplane_spec is not None and \
               self.controlplane_spec.server_timestep_limit is not None and \
               frames >= self.controlplane_spec.server_timestep_limit:
                logger.info(
                    '[%s] Marking environment as done=True since server_timestep_limit of %d frames reached',
                    utils.thread_name(), frames)
                done = True

            # Add our own statistics
            if time.time() - last_export > 5:
                force_send = True
                last_export = time.time()
                profile = pyprofile.export()
                # Send the pyprofile to the agent. Info keys we set
                # will be available directly to the agent.
                info['rewarder.profile'] = profile
            else:
                force_send = False

            # Send if there's anything interesting to transmit
            if reward != 0 or done or force_send:
                if 'rewarder.profile' in info:
                    # We already print the pyprofile (during the
                    # export) so no need to repeat it here. It's
                    # pretty big.
                    display_info = info.copy()
                    display_info['rewarder.profile'] = '<{} bytes>'.format(
                        len(str(display_info['rewarder.profile'])))
                else:
                    display_info = info

                reward_logger.record(reward, done, info)
                self.agent_conn.send_env_reward(reward,
                                                done,
                                                info,
                                                episode_id=episode_id)

            old_target = target
            # Run at the appropriate frame rate
            target += 1. / self.fps

            # Do appropriate sleeping
            delta = target - time.time()
            if done:
                # game_autoresets means the game itself will do the
                # reset, so we don't have to do any work.
                logger.info('[%s] Resetting environment since done=%s',
                            utils.thread_name(), done)
                self.trigger_reset()
            elif delta > 0:
                pyprofile.timing('rewarder.sleep', delta)
                time.sleep(delta)
            else:
                pyprofile.timing('rewarder.sleep.missed', -delta)
                if delta < -0.1:
                    logger.info(
                        '[%s] Rewarder fell behind by %ss from target; losing %s frames',
                        utils.thread_name(), -delta, int(-delta * self.fps))
                target = time.time()
            # Record the total time spent in this frame, starting from the top
            pyprofile.timing('rewarder.frame', time.time() - old_target)
            frames += 1
Exemplo n.º 9
0
    def _step(self, action_n):
        if self._steps is None:
            self._start_timer()
        self._steps += 1

        accum_observation_n, accum_reward_n, accum_done_n, accum_info = self._substep(action_n)
        accum_info['throttle.action.available_at'] = time.time()

        # Record which indexes we were just peeking at, so when we
        # make the follow-up we'll be sure to peek there too.
        peek_n = [any(spaces.PeekReward for peek in action) for action in action_n]

        if self.fps is None:
            return accum_observation_n, accum_reward_n, accum_done_n, accum_info

        accum_info['stats.throttle.sleep'] = 0
        while True:
            # See how much time we have to idle
            delta = self._start + 1./self.fps * self._steps - time.time()

            # The following assumes that our control loop
            if delta < 0:
                # We're out of time. Just get out of here.
                delta = abs(delta)
                if delta >= 1:
                    logger.info('Throttle fell behind by %.2fs; lost %.2f frames', delta, self.fps*delta)
                pyprofile.timing('vnc_env.Throttle.lost_sleep', delta)
                self._start_timer()
                break
            # elif delta < 0.008:
            #     # Only have 8ms. Let's spend it sleeping, and
            #     # return an image which may have up to an
            #     # additional 8ms lag.
            #     #
            #     # 8ms is reasonably arbitrary; we just want something
            #     # that's small where it's not actually going to help
            #     # if we make another step call. Step with 32 parallel
            #     # envs takes about 6ms (about half of which is
            #     # diagnostics, which could be totally async!), so 8 is
            #     # a reasonable choice for now..
            #     pyprofile.timing('vnc_env.Throttle.sleep', delta)
            #     accum_info['stats.throttle.sleep'] += delta
            #     time.sleep(delta)
            #     break
            else:
                # We've got plenty of time. Sleep for up to 16ms, and
                # then refresh our current frame. We need to
                # constantly be calling step so that our lags are
                # reported correctly, within 16ms. (The layering is
                # such that the vncdriver doesn't know which pixels
                # correspond to metadata, and the diagnostics don't
                # know when pixels first got painted. So we do our
                # best to present frames as they're ready to the
                # diagnostics.)
                delta = min(delta, 0.016)
                pyprofile.timing('vnc_env.Throttle.sleep', delta)
                accum_info['stats.throttle.sleep'] += delta
                time.sleep(delta)

                # We want to merge in the latest reward/done/info so that our
                # agent has the most up-to-date info post-sleep, but also want
                # to avoid popping any rewards where done=True (since we'd
                # have to merge across episode boundaries).
                action_n = []
                for done, peek in zip(accum_done_n, peek_n):
                    if done or peek:
                        # No popping of reward/done
                        action_n.append([spaces.PeekReward])
                    else:
                        action_n.append([])

                observation_n, reward_n, done_n, info = self._substep(action_n)

                # Merge observation, rewards and metadata.
                # Text observation has order in which the messages are sent.
                rewarder.merge_n(
                    accum_observation_n, accum_reward_n, accum_done_n, accum_info,
                    observation_n, reward_n, done_n, info,
                )

        return accum_observation_n, accum_reward_n, accum_done_n, accum_info