def __init__(self, specifiable_class, spec, output_spaces, shutdown_method=None): """ Args: specifiable_class (type): The class to use for constructing the Specifiable from spec. This class needs to be a child class of Specifiable (with a __lookup_classes__ property). spec (dict): The specification dict that will be used to construct the Specifiable. output_spaces (Union[callable,Dict[str,Space]]): A callable that takes a method_name as argument and returns the Space(s) that this method (on the Specifiable object) would return. Alternatively: A dict with key=method name and value=Space(s). shutdown_method (Optional[str]): An optional name of a shutdown method that will be called on the Specifiable object before "server" shutdown to give the Specifiable a chance to clean up. The Specifiable must implement this method. #flatten_output_dicts (bool): Whether output dictionaries should be flattened to tuples and then # returned. """ super(SpecifiableServer, self).__init__() self.specifiable_class = specifiable_class self.spec = spec # If dict: Process possible specs so we don't have to do this during calls. if isinstance(output_spaces, dict): self.output_spaces = {} for method_name, space_spec in output_spaces.items(): if isinstance(space_spec, (tuple, list)): self.output_spaces[method_name] = [ Space.from_spec(spec) if spec is not None else None for spec in space_spec ] else: self.output_spaces[method_name] = Space.from_spec( space_spec) if space_spec is not None else None else: self.output_spaces = output_spaces self.shutdown_method = shutdown_method # The process in which the Specifiable will run. self.process = None # The out-pipe to send commands (method calls) to the server process. self.out_pipe = None # The in-pipe to receive "ready" signal from the server process. self.in_pipe = None # Register this object with the class. self.INSTANCES.append(self)
def __new__(cls, *components, **kwargs): if isinstance(components[0], (list, tuple)): assert len(components) == 1 components = components[0] add_batch_rank = kwargs.get("add_batch_rank", False) add_time_rank = kwargs.get("add_time_rank", False) time_major = kwargs.get("time_major", False) # Allow for any spec or already constructed Space to be passed in as values in the python-list/tuple. list_ = list() for value in components: # Value is already a Space: Copy it (to not affect original Space) and maybe add/remove batch-rank. if isinstance(value, Space): list_.append(value.with_extra_ranks(add_batch_rank, add_time_rank, time_major)) # Value is a list/tuple -> treat as Tuple space. elif isinstance(value, (list, tuple)): list_.append( Tuple(*value, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) ) # Value is a spec (or a spec-dict with "type" field) -> produce via `from_spec`. elif (isinstance(value, dict) and "type" in value) or not isinstance(value, dict): list_.append(Space.from_spec( value, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major )) # Value is a simple dict -> recursively construct another Dict Space as a sub-space of this one. else: list_.append(Dict( value, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major )) return tuple.__new__(cls, list_)
def __init__(self, spec=None, **kwargs): add_batch_rank = kwargs.pop("add_batch_rank", False) add_time_rank = kwargs.pop("add_time_rank", False) time_major = kwargs.pop("time_major", False) ContainerSpace.__init__(self, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) # Allow for any spec or already constructed Space to be passed in as values in the python-dict. # Spec may be part of kwargs. if spec is None: spec = kwargs space_dict = {} for key in sorted(spec.keys()): # Keys must be strings. if not isinstance(key, str): raise RLGraphError( "ERROR: No non-str keys allowed in a Dict-Space!") # Prohibit reserved characters (for flattened syntax). if re.search( r'/|{}\d+{}'.format(FLAT_TUPLE_OPEN, FLAT_TUPLE_CLOSE), key): raise RLGraphError( "ERROR: Key to Dict must not contain '/' or '{}\d+{}'! Is {}." .format(FLAT_TUPLE_OPEN, FLAT_TUPLE_CLOSE, key)) value = spec[key] # Value is already a Space: Copy it (to not affect original Space) and maybe add/remove batch/time-ranks. if isinstance(value, Space): w_batch_w_time = value.with_extra_ranks( add_batch_rank, add_time_rank, time_major) space_dict[key] = w_batch_w_time # Value is a list/tuple -> treat as Tuple space. elif isinstance(value, (list, tuple)): space_dict[key] = Tuple(*value, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) # Value is a spec (or a spec-dict with "type" field) -> produce via `from_spec`. elif (isinstance(value, dict) and "type" in value) or not isinstance(value, dict): space_dict[key] = Space.from_spec( value, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) # Value is a simple dict -> recursively construct another Dict Space as a sub-space of this one. else: space_dict[key] = Dict(value, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) dict.__init__(self, space_dict)
def __init__(self, network_spec, action_space=None, action_adapter_spec=None, deterministic=True, scope="policy", **kwargs): """ Args: network_spec (Union[NeuralNetwork,dict]): The NeuralNetwork Component or a specification dict to build one. action_space (Space): The action Space within which this Component will create actions. action_adapter_spec (Optional[dict]): A spec-dict to create an ActionAdapter. Use None for the default ActionAdapter object. deterministic (bool): Whether to pick actions according to the max-likelihood value or via sampling. Default: True. batch_apply (bool): Whether to wrap both the NN and the ActionAdapter with a BatchApply Component in order to fold time rank into batch rank before a forward pass. """ super(Policy, self).__init__(scope=scope, **kwargs) self.neural_network = NeuralNetwork.from_spec( network_spec) # type: NeuralNetwork # Create the necessary action adapters for this Policy. One for each action space component. self.action_adapters = dict() if action_space is None: self.action_adapters[""] = ActionAdapter.from_spec( action_adapter_spec) self.action_space = self.action_adapters[""].action_space # Assert single component action space. assert len(self.action_space.flatten()) == 1,\ "ERROR: Action space must not be ContainerSpace if no `action_space` is given in Policy c'tor!" else: self.action_space = Space.from_spec(action_space) for i, (flat_key, action_component) in enumerate( self.action_space.flatten().items()): if action_adapter_spec is not None: aa_spec = action_adapter_spec.get(flat_key, action_adapter_spec) aa_spec["action_space"] = action_component else: aa_spec = dict(action_space=action_component) self.action_adapters[flat_key] = ActionAdapter.from_spec( aa_spec, scope="action-adapter-{}".format(i)) self.deterministic = deterministic # Figure out our Distributions. self.distributions = dict() for i, (flat_key, action_component) in enumerate( self.action_space.flatten().items()): if isinstance(action_component, IntBox): self.distributions[flat_key] = Categorical( scope="categorical-{}".format(i)) # Continuous action space -> Normal distribution (each action needs mean and variance from network). elif isinstance(action_component, FloatBox): self.distributions[flat_key] = Normal( scope="normal-{}".format(i)) else: raise RLGraphError( "ERROR: `action_component` is of type {} and not allowed in {} Component!" .format(type(action_space).__name__, self.name)) self.add_components(*[self.neural_network] + list(self.action_adapters.values()) + list(self.distributions.values()))
def __init__( self, state_space, action_space, discount=0.98, preprocessing_spec=None, network_spec=None, internal_states_space=None, action_adapter_spec=None, exploration_spec=None, execution_spec=None, optimizer_spec=None, observe_spec=None, update_spec=None, summary_spec=None, saver_spec=None, auto_build=True, name="agent" ): """ Args: state_space (Union[dict,Space]): Spec dict for the state Space or a direct Space object. action_space (Union[dict,Space]): Spec dict for the action Space or a direct Space object. preprocessing_spec (Optional[list,PreprocessorStack]): The spec list for the different necessary states preprocessing steps or a PreprocessorStack object itself. discount (float): The discount factor (gamma). network_spec (Optional[list,NeuralNetwork]): Spec list for a NeuralNetwork Component or the NeuralNetwork object itself. internal_states_space (Optional[Union[dict,Space]]): Spec dict for the internal-states Space or a direct Space object for the Space(s) of the internal (RNN) states. action_adapter_spec (Optional[dict,ActionAdapter]): The spec-dict for the ActionAdapter Component or the ActionAdapter object itself. exploration_spec (Optional[dict]): The spec-dict to create the Exploration Component. execution_spec (Optional[dict,Execution]): The spec-dict specifying execution settings. optimizer_spec (Optional[dict,Optimizer]): The spec-dict to create the Optimizer for this Agent. observe_spec (Optional[dict]): Spec-dict to specify `Agent.observe()` settings. update_spec (Optional[dict]): Spec-dict to specify `Agent.update()` settings. summary_spec (Optional[dict]): Spec-dict to specify summary settings. saver_spec (Optional[dict]): Spec-dict to specify saver settings. auto_build (Optional[bool]): If True (default), immediately builds the graph using the agent's graph builder. If false, users must separately call agent.build(). Useful for debugging or analyzing components before building. name (str): Some name for this Agent object. """ super(Agent, self).__init__() self.name = name self.auto_build = auto_build self.graph_built = False self.logger = logging.getLogger(__name__) self.state_space = Space.from_spec(state_space).with_batch_rank(False) self.logger.info("Parsed state space definition: {}".format(self.state_space)) self.action_space = Space.from_spec(action_space).with_batch_rank(False) self.logger.info("Parsed action space definition: {}".format(self.action_space)) self.discount = discount # The agent's root-Component. self.root_component = Component(name=self.name) # Define the input-Spaces: # Tag the input-Space to `self.set_policy_weights` as equal to whatever the variables-Space will be for # the Agent's policy Component. self.input_spaces = dict( states=self.state_space.with_batch_rank(), ) # Construct the Preprocessor. self.preprocessor = PreprocessorStack.from_spec(preprocessing_spec) self.preprocessed_state_space = self.preprocessor.get_preprocessed_space(self.state_space) self.preprocessing_required = preprocessing_spec is not None and len(preprocessing_spec) > 1 if self.preprocessing_required: self.logger.info("Preprocessing required.") self.logger.info("Parsed preprocessed-state space definition: {}".format(self.preprocessed_state_space)) else: self.logger.info("No preprocessing required.") # Construct the Policy network. self.neural_network = None if network_spec is not None: self.neural_network = NeuralNetwork.from_spec(network_spec) self.action_adapter_spec = action_adapter_spec self.internal_states_space = internal_states_space # An object implementing the loss function interface is only strictly needed # if automatic device strategies like multi-gpu are enabled. This is because # the device strategy needs to know the name of the loss function to infer the appropriate # operations. self.loss_function = None # The action adapter mapping raw NN output to (shaped) actions. action_adapter_dict = dict(action_space=self.action_space) if self.action_adapter_spec is None: self.action_adapter_spec = action_adapter_dict else: self.action_adapter_spec.update(action_adapter_dict) # The behavioral policy of the algorithm. Also the one that gets updated. self.policy = Policy( network_spec=self.neural_network, action_adapter_spec=self.action_adapter_spec ) self.exploration = Exploration.from_spec(exploration_spec) self.execution_spec = parse_execution_spec(execution_spec) # Python-side experience buffer for better performance (may be disabled). self.default_env = "env_0" self.states_buffer = defaultdict(list) self.actions_buffer = defaultdict(list) self.internals_buffer = defaultdict(list) self.rewards_buffer = defaultdict(list) self.next_states_buffer = defaultdict(list) self.terminals_buffer = defaultdict(list) self.observe_spec = parse_observe_spec(observe_spec) if self.observe_spec["buffer_enabled"]: self.reset_env_buffers() # Global time step counter. self.timesteps = 0 # Create the Agent's optimizer based on optimizer_spec and execution strategy. self.optimizer = None if optimizer_spec is not None: self.optimizer = Optimizer.from_spec(optimizer_spec) #get_optimizer_from_device_strategy( #optimizer_spec, self.execution_spec.get("device_strategy", 'default') # Update-spec dict tells the Agent how to update (e.g. memory batch size). self.update_spec = parse_update_spec(update_spec) # Create our GraphBuilder and -Executor. self.graph_builder = GraphBuilder(action_space=self.action_space, summary_spec=summary_spec) self.graph_executor = GraphExecutor.from_spec( get_backend(), graph_builder=self.graph_builder, execution_spec=self.execution_spec, saver_spec=saver_spec ) # type: GraphExecutor