def _configure( self, remotes=None, client_id=None, start_timeout=None, docker_image=None, ignore_clock_skew=False, disable_action_probes=False, vnc_driver=None, vnc_kwargs={}, rewarder_driver=None, replace_on_crash=False, allocate_sync=True, observer=False, api_key=None, record=False, ): """Standard Gym hook to configure the environment. Args: ignore_clock_skew (bool): Assume remotes are on the same machine as us, for the purposes of diagnostics measurement. If true, we skip measuring the clock skew over the network, and skip generating diagnostics which rely on it. True when used by the rewarder to measure latency between the VNC frame and its calculation of reward for that frame. In this case we share a common clock with the env generating the VNC frame, so we don't need to send/receive probes. Clock skew is zero in this case. False when remotes are potentially different machines (such as an agent, or a demonstrator), and we will be sending probe keys and measuring network ping rountrip times to calculate clock skew. """ if self._started: raise error.Error( '{} has already been started; cannot change configuration now.' .format(self)) universe.configure_logging() twisty.start_once() if self.spec is not None: runtime = registration.runtime_spec(self.spec.tags['runtime']) # Let the user manually set the docker_image version if docker_image: # TODO: don't support this option? runtime.image = docker_image else: runtime = None if remotes is None: remotes = os.environ.get('GYM_VNC_REMOTES', '1') if client_id is None: client_id = default_client_id() self.remote_manager, self.n = remotes_module.build( client_id=client_id, remotes=remotes, runtime=runtime, start_timeout=start_timeout, api_key=api_key, use_recorder_ports=record, ) self.connection_names = [None] * self.n self.connection_labels = [None] * self.n self.crashed = {} self.allow_reconnect = replace_on_crash and self.remote_manager.supports_reconnect if self.remote_manager.connect_vnc: cls = vnc_session(vnc_driver) vnc_kwargs.setdefault('start_timeout', self.remote_manager.start_timeout) if runtime == 'gym-core': vnc_kwargs.setdefault('encoding', 'zrle') else: vnc_kwargs.setdefault('encoding', 'tight') vnc_kwargs.setdefault('fine_quality_level', 50) vnc_kwargs.setdefault('subsample_level', 2) # Filter out None values, since some drivers may not handle them correctly vnc_kwargs = {k: v for k, v in vnc_kwargs.items() if v is not None} logger.info( 'Using VNCSession arguments: %s. (Customize by running "env.configure(vnc_kwargs={...})"', vnc_kwargs) self.vnc_kwargs = vnc_kwargs self.vnc_session = cls() else: self.vnc_session = None self._observer = observer if self.remote_manager.connect_rewarder: cls = rewarder_session(rewarder_driver) self.rewarder_session = cls() else: self.rewarder_session = None if ignore_clock_skew: logger.info( 'Printed stats will ignore clock skew. (This usually makes sense only when the environment and agent are on the same machine.)' ) if self.rewarder_session or ignore_clock_skew: # Don't need rewarder session if we're ignoring clock skew if self.spec is not None: metadata_encoding = self.spec.tags.get('metadata_encoding') else: metadata_encoding = None self.diagnostics = diagnostics.Diagnostics( self.n, self._probe_key, ignore_clock_skew, metadata_encoding=metadata_encoding, disable_action_probes=disable_action_probes) else: self.diagnostics = None self._reset_mask() self._started = True self.remote_manager.allocate([str(i) for i in range(self.n)], initial=True) if allocate_sync: # Block until we've fulfilled n environments self._handle_connect(n=self.n) else: # Handle any backends which synchronously fufill their # allocation. self._handle_connect()
def _configure(self, remotes=None, client_id=None, start_timeout=None, docker_image=None, ignore_clock_skew=False, disable_action_probes=False, vnc_driver=None, vnc_kwargs=None, rewarder_driver=None, replace_on_crash=False, allocate_sync=True, observer=False, api_key=None, record=False, ): """Standard Gym hook to configure the environment. Args: ignore_clock_skew (bool): Assume remotes are on the same machine as us, for the purposes of diagnostics measurement. If true, we skip measuring the clock skew over the network, and skip generating diagnostics which rely on it. True when used by the rewarder to measure latency between the VNC frame and its calculation of reward for that frame. In this case we share a common clock with the env generating the VNC frame, so we don't need to send/receive probes. Clock skew is zero in this case. False when remotes are potentially different machines (such as an agent, or a demonstrator), and we will be sending probe keys and measuring network ping rountrip times to calculate clock skew. """ if self._started: raise error.Error('{} has already been started; cannot change configuration now.'.format(self)) universe.configure_logging() twisty.start_once() if self.spec is not None: runtime = registration.runtime_spec(self.spec.tags['runtime']) # Let the user manually set the docker_image version if docker_image: # TODO: don't support this option? runtime.image = docker_image else: runtime = None if remotes is None: remotes = os.environ.get('GYM_VNC_REMOTES', '1') if client_id is None: client_id = default_client_id() if vnc_kwargs is None: vnc_kwargs = {} self.remote_manager, self.n = remotes_module.build( client_id=client_id, remotes=remotes, runtime=runtime, start_timeout=start_timeout, api_key=api_key, use_recorder_ports=record, ) self.connection_names = [None] * self.n self.connection_labels = [None] * self.n self.crashed = {} self.allow_reconnect = replace_on_crash and self.remote_manager.supports_reconnect if self.remote_manager.connect_vnc: cls = vnc_session(vnc_driver) vnc_kwargs.setdefault('start_timeout', self.remote_manager.start_timeout) if runtime == 'gym-core': vnc_kwargs.setdefault('encoding', 'zrle') else: vnc_kwargs.setdefault('encoding', 'tight') vnc_kwargs.setdefault('fine_quality_level', 50) vnc_kwargs.setdefault('subsample_level', 2) # Filter out None values, since some drivers may not handle them correctly vnc_kwargs = {k: v for k, v in vnc_kwargs.items() if v is not None} logger.info('Using VNCSession arguments: %s. (Customize by running "env.configure(vnc_kwargs={...})"', vnc_kwargs) self.vnc_kwargs = vnc_kwargs self.vnc_session = cls() else: self.vnc_session = None self._observer = observer if self.remote_manager.connect_rewarder: cls = rewarder_session(rewarder_driver) self.rewarder_session = cls() else: self.rewarder_session = None if ignore_clock_skew: logger.info('Printed stats will ignore clock skew. (This usually makes sense only when the environment and agent are on the same machine.)') if self.rewarder_session or ignore_clock_skew: # Don't need rewarder session if we're ignoring clock skew if self.spec is not None: metadata_encoding = self.spec.tags.get('metadata_encoding') else: metadata_encoding = None self.diagnostics = diagnostics.Diagnostics(self.n, self._probe_key, ignore_clock_skew, metadata_encoding=metadata_encoding, disable_action_probes=disable_action_probes) else: self.diagnostics = None self._reset_mask() self._started = True self.remote_manager.allocate([str(i) for i in range(self.n)], initial=True) if allocate_sync: # Block until we've fulfilled n environments self._handle_connect(n=self.n) else: # Handle any backends which synchronously fufill their # allocation. self._handle_connect()