def translate_space(space, dtype=None, force_float32=False): """ Translates openAI spaces into RLGraph Space classes. Args: space (gym.spaces.Space): The openAI Space to be translated. Returns: Space: The translated rlgraph Space. """ if isinstance(space, gym.spaces.Discrete): return IntBox(space.n) elif isinstance(space, gym.spaces.MultiBinary): return BoolBox(shape=(space.n, )) elif isinstance(space, gym.spaces.MultiDiscrete): return IntBox(low=np.zeros((space.nvec.ndim, ), dtype("uint8", "np")), high=space.nvec) elif isinstance(space, gym.spaces.Box): # Decide by dtype: box_dtype = str(dtype or space.low.dtype) if "int" in box_dtype: return IntBox(low=space.low, high=space.high, dtype=box_dtype) elif "float" in box_dtype: return FloatBox( low=space.low, high=space.high, dtype="float32" if force_float32 is True else box_dtype) elif "bool" in box_dtype: return BoolBox(shape=space.shape) elif isinstance(space, gym.spaces.Tuple): return Tuple( *[OpenAIGymEnv.translate_space(s) for s in space.spaces]) elif isinstance(space, gym.spaces.Dict): return Dict({ key: OpenAIGymEnv.translate_space(value, dtype, force_float32) for key, value in space.spaces.items() }) raise RLGraphError( "Unknown openAI gym Space class ({}) for state_space!".format( space))
def reduce(self, start, limit, reduce_op=operator.add): """ Applies an operation to specified segment. Args: start (int): Start index to apply reduction to. limit (end): End index to apply reduction to. reduce_op (Union(operator.add, min, max)): Reduce op to apply. Returns: Number: Result of reduce operation """ if limit is None: limit = self.capacity if limit < 0: limit += self.capacity # Init result with neutral element of reduce op. # Note that all of these are commutative reduce ops. if reduce_op == operator.add: result = 0.0 elif reduce_op == min: result = float('inf') elif reduce_op == max: result = float('-inf') else: raise RLGraphError( "Unsupported reduce OP. Support ops are [add, min, max].") start += self.capacity limit += self.capacity while start < limit: if start & 1: result = reduce_op(result, self.values[start]) start += 1 if limit & 1: limit -= 1 result = reduce_op(result, self.values[limit]) start = start >> 1 limit = limit >> 1 return result
def _sanity_check_call_parameters(self, params, method, method_type, add_auto_key_as_first_param): raw_signature_parameters = inspect.signature(method).parameters actual_params = list(raw_signature_parameters.values()) if add_auto_key_as_first_param is True: actual_params = actual_params[1:] if len(params) != len(actual_params): # Check whether the last arg is var_positional (e.g. *inputs; in that case it's ok if the number of params # is larger than that of the actual graph_fn params or its one smaller). if actual_params[-1].kind == inspect.Parameter.VAR_POSITIONAL and (len(params) > len(actual_params) > 0 or len(params) == len(actual_params) - 1): pass # Some actual params have default values: Number of given params must be at least as large as the number # of non-default actual params but maximally as large as the number of actual_parameters. elif len(actual_params) >= len(params) >= sum( [p.default is inspect.Parameter.empty for p in actual_params]): pass else: raise RLGraphError( "ERROR: {} '{}/{}' has {} input-parameters, but {} ({}) were being provided in the " "`Component.call` method!".format(method_type, self.name, method.__name__, len(actual_params), len(params), params) )
def check_input_spaces(self, input_spaces, action_space=None): action_sample_space = input_spaces["actions"] if get_backend() == "tf": sanity_check_space(action_sample_space, must_have_batch_rank=True) assert action_space is not None self.action_space = action_space if self.epsilon_exploration and self.noise_component: # Check again at graph creation? This is currently redundant to the check in __init__ raise RLGraphError( "Cannot use both epsilon exploration and a noise component at the same time." ) if self.epsilon_exploration: sanity_check_space(self.action_space, allowed_types=[IntBox], must_have_categories=True, num_categories=(1, None)) elif self.noise_component: sanity_check_space(self.action_space, allowed_types=[FloatBox])
def define_observations(self, observation_spec): """ Creates a RLgraph Space for the given deepmind Lab's observation specifier. Args: observation_spec (List[str]): A list with the wanted names from the deepmind Lab available observations. Each available observation is a dict with the following keys: name, shape and dtype. Returns: Space: The RLgraph equivalent observation Space. """ dict_space = dict() space = None available_observations = self.level.observation_spec() for observation_name in observation_spec: # Find the observation_item in the observation_spec of the Env. observation_item = [ o for o in available_observations if o["name"] == observation_name ][0] if "float" in str(observation_item["dtype"]): space = FloatBox(shape=observation_item["shape"], dtype=observation_item["dtype"]) elif "int" in str(observation_item["dtype"]): space = IntBox(shape=observation_item["shape"], dtype=observation_item["dtype"]) elif "str" in str(observation_item["dtype"]): space = TextBox(shape=observation_item["shape"]) else: raise RLGraphError( "Unknown Deepmind Lab Space class for state_space!") dict_space[observation_name] = space if len(dict_space) == 1: return space else: return Dict(dict_space)
def __init__(self, num_iterations, call_component, graph_fn_name, scope="fixed-loop", **kwargs): """ Args: num_iterations (int): How often to call the given GraphFn. call_component (Component): Component providing graph fn to call within loop. graph_fn_name (str): The name of the graph_fn in call_component. """ assert num_iterations > 0 super(FixedLoop, self).__init__(scope=scope, **kwargs) self.num_iterations = num_iterations self.graph_fn_to_call = None flatten_ops = False for graph_fn in call_component.graph_fns: if graph_fn.name == graph_fn_name: self.graph_fn_to_call = graph_fn.get_method() flatten_ops = graph_fn.flatten_ops break if not self.graph_fn_to_call: raise RLGraphError( "ERROR: GraphFn '{}' not found in Component '{}'!".format( graph_fn_name, call_component.global_scope)) # TODO: Do we sum up, append to list, ...? #self.define_inputs("inputs") # self.define_outputs("fixed_loop_result") self.add_components(call_component) self.define_api_method(name="call_loop", func=self._graph_fn_call_loop, flatten_ops={"inputs"} if flatten_ops else None)
def connect_to(self, next_op_rec): """ Connects this op-rec to a next one by passing on the `op` and `space` properties and correctly setting the `next` and `previous` pointers in both op-recs. Args: next_op_rec (DataOpRecord): The next DataOpRecord to connect this one to. """ # If already connected, make sure connection is the same as the already existing one. if next_op_rec.previous is not None: assert next_op_rec.previous is self else: # Set `previous` pointer. next_op_rec.previous = self # We do have an op -> Pass it (and its Space) on to the next op-rec. if self.op is not None: # Push op and Space into next op-record. # With op-instructions? #if "key-lookup" in next_op_rec.op_instructions: if "key-lookup" in self.op_instructions: lookup_key = self.op_instructions["key-lookup"] if isinstance(lookup_key, str): found_op = None found_space = None if isinstance(self.op, dict): assert isinstance(self.op, DataOpDict) if lookup_key in self.op: found_op = self.op[lookup_key] found_space = self.space[lookup_key] # Lookup-key could also be a flat-key. -> Try to find entry in nested (dict) op. else: found_op = self.op.flat_key_lookup(lookup_key) if found_op is not None: found_space = self.space.flat_key_lookup(lookup_key) # Did we find anything? If not, error for invalid key-lookup. if found_op is None or found_space is None: raise RLGraphError( "Op ({}) is not a dict or does not contain the lookup key '{}'!". \ format(self.op, lookup_key) ) next_op_rec.op = found_op next_op_rec.space = found_space elif isinstance(lookup_key, int) and \ (not isinstance(self.op, (list, tuple)) or lookup_key >= len(self.op)): raise RLGraphError( "Op ({}) is not a list/tuple or contains not enough items for lookup " "index '{}'!".format(self.op, lookup_key) ) else: next_op_rec.op = self.op[lookup_key] next_op_rec.space = self.space[lookup_key] # No instructions -> simply pass on. else: next_op_rec.op = self.op next_op_rec.space = self.space assert next_op_rec.space is not None #next_op_rec.space = get_space_from_op(self.op) # Add `next` connection. self.next.add(next_op_rec)
def _graph_fn_sync(self, values_): """ Generates the op that syncs this Synchronizable's parent's variable values from another Synchronizable Component. Args: values_ (DataOpDict): The dict of variable values (coming from the "_variables"-Socket of any other Component) that need to be assigned to this Component's parent's variables. The keys in the dict refer to the names of our parent's variables and must match their names. strict (bool): Whether to check strictly if the given `values_` match the variables of the to-be-synced Component (in the number of vars). Returns: DataOp: The op that executes the syncing. """ # Loop through all incoming vars and our own and collect assign ops. syncs = [] # Sanity checking if get_backend() == "tf": parents_vars = self.parent_component.get_variables( collections=self.collections, custom_scope_separator="-") syncs_from, syncs_to = (sorted(values_.items()), sorted(parents_vars.items())) if len(syncs_from) != len(syncs_to): raise RLGraphError( "ERROR: Number of Variables to sync must match! " "We have {} syncs_from and {} syncs_to.".format( len(syncs_from), len(syncs_to))) for (key_from, var_from), (key_to, var_to) in zip(syncs_from, syncs_to): # Sanity checking. TODO: Check the names' ends? Without the global scope? #if key_from != key_to: # raise RLGraphError("ERROR: Variable names for syncing must match in order and name! " # "Mismatch at from={} and to={}.".format(key_from, key_to)) if get_shape(var_from) != get_shape(var_to): raise RLGraphError( "ERROR: Variable shapes for syncing must match! " "Shape mismatch between from={} ({}) and to={} ({}).". format(key_from, get_shape(var_from), key_to, get_shape(var_to))) syncs.append(self.assign_variable(var_to, var_from)) # Bundle everything into one "sync"-op. with tf.control_dependencies(syncs): return tf.no_op( name="sync-to-{}".format(self.parent_component.name)) elif get_backend() == "pytorch": # Get refs(!) parents_vars = self.parent_component.get_variables( collections=self.collections, custom_scope_separator="-", get_ref=True) syncs_from, sync_to_ref = (sorted(values_.items()), sorted(parents_vars.items())) # Assign parameters of layers. for (key_from, var_from), (key_to, ref_to) in zip(syncs_from, sync_to_ref): ref_to.set_value(var_from) return None
def api_method_wrapper(self, *args, **kwargs): api_fn_name = name or re.sub(r'^_graph_fn_', "", wrapped_func.__name__) # Direct evaluation of function. if self.execution_mode == "define_by_run": type(self).call_count += 1 start = time.perf_counter() # Check with owner if extra args needed. if api_fn_name in self.api_methods and self.api_methods[ api_fn_name].add_auto_key_as_first_param: output = wrapped_func(self, "", *args, **kwargs) else: output = wrapped_func(self, *args, **kwargs) # Store runtime for this method. type(self).call_times.append( # Component.call_times (self.name, wrapped_func.__name__, time.perf_counter() - start)) return output api_method_rec = self.api_methods[api_fn_name] # Sanity check input args for accidential dict-return values being passed into the next API as # supposed DataOpRecord. dict_args = [ next(iter(a.values())) for a in args if isinstance(a, dict) ] if len(dict_args) > 0 and isinstance(dict_args[0], DataOpRecord): raise RLGraphError( "One of your input args to API-method '{}.{}()' is a dict of DataOpRecords! This is probably " "coming from a previous call to an API-method (returning a dict) and the DataOpRecord should be " "extracted by string-key and passed into '{}' " "directly.".format(api_method_rec.component.global_scope, api_fn_name, api_fn_name)) # Create op-record column to call API method with. Ignore None input params. These should not be sent # to the API-method. in_op_column = DataOpRecordColumnIntoAPIMethod( component=self, api_method_rec=api_method_rec, args=args, kwargs=kwargs) # Add the column to the API-method record. api_method_rec.in_op_columns.append(in_op_column) # Check minimum number of passed args. minimum_num_call_params = len(in_op_column.api_method_rec.non_args_kwargs) - \ len(in_op_column.api_method_rec.default_args) if len(in_op_column.op_records) < minimum_num_call_params: raise RLGraphAPICallParamError( "Number of call params ({}) for call to API-method '{}' is too low. Needs to be at least {} " "params!".format(len(in_op_column.op_records), api_method_rec.name, minimum_num_call_params)) # Link from incoming op_recs into the new column or populate new column with ops/Spaces (this happens # if this call was made from within a graph_fn such that ops and Spaces are already known). all_args = [(i, a) for i, a in enumerate(args) if a is not None] + \ [(k, v) for k, v in sorted(kwargs.items()) if v is not None] flex = None build_when_done = False for i, (key, value) in enumerate(all_args): # Named arg/kwarg -> get input_name from that and peel op_rec. if isinstance(key, str): param_name = key # Positional arg -> get input_name from input_names list. else: slot = key if flex is None else flex if slot >= len(api_method_rec.input_names): raise RLGraphAPICallParamError( "Too many input args given in call to API-method '{}'!" .format(api_method_rec.name)) param_name = api_method_rec.input_names[slot] # Var-positional arg, attach the actual position to input_name string. if self.api_method_inputs.get(param_name, "") == "*flex": if flex is None: flex = i param_name += "[{}]".format(i - flex) # Actual kwarg (not in list of api_method_inputs). elif api_method_rec.kwargs_name is not None and param_name not in self.api_method_inputs: param_name = api_method_rec.kwargs_name + "[{}]".format( param_name) # We are already in building phase (params may be coming from inside graph_fn). if self.graph_builder is not None and self.graph_builder.phase == "building": # If Space not stored yet, determine it from op. assert in_op_column.op_records[i].op is not None if in_op_column.op_records[i].space is None: in_op_column.op_records[i].space = get_space_from_op( in_op_column.op_records[i].op) self.api_method_inputs[ param_name] = in_op_column.op_records[i].space # Check input-completeness of Component (but not strict as we are only calling API, not a graph_fn). if self.input_complete is False: # Build right after this loop in case more Space information comes in through next args/kwargs. build_when_done = True # A DataOpRecord from the meta-graph. elif isinstance(value, DataOpRecord): # Create entry with unknown Space if it doesn't exist yet. if param_name not in self.api_method_inputs: self.api_method_inputs[param_name] = None # Fixed value (instead of op-record): Store the fixed value directly in the op. else: if self.api_method_inputs.get(param_name) is None: self.api_method_inputs[ param_name] = in_op_column.op_records[i].space if build_when_done: # Check Spaces and create variables. self.graph_builder.build_component_when_input_complete(self) # Regular API-method: Call it here. api_fn_args, api_fn_kwargs = in_op_column.get_args_and_kwargs() if api_method_rec.is_graph_fn_wrapper is False: return_values = wrapped_func(self, *api_fn_args, **api_fn_kwargs) # Wrapped graph_fn: Call it through yet another wrapper. else: return_values = graph_fn_wrapper( self, wrapped_func, returns, dict( flatten_ops=flatten_ops, split_ops=split_ops, add_auto_key_as_first_param=add_auto_key_as_first_param, requires_variable_completeness= requires_variable_completeness), *api_fn_args, **api_fn_kwargs) # Process the results (push into a column). out_op_column = DataOpRecordColumnFromAPIMethod( component=self, api_method_name=api_fn_name, args=util.force_tuple(return_values) if type(return_values) != dict else None, kwargs=return_values if type(return_values) == dict else None) # If we already have actual op(s) and Space(s), push them already into the # DataOpRecordColumnFromAPIMethod's records. if self.graph_builder is not None and self.graph_builder.phase == "building": # Link the returned ops to that new out-column. for i, rec in enumerate(out_op_column.op_records): out_op_column.op_records[i].op = rec.op out_op_column.op_records[i].space = rec.space # And append the new out-column to the api-method-rec. api_method_rec.out_op_columns.append(out_op_column) # Do we need to return the raw ops or the op-recs? # Only need to check if False, otherwise, we return ops directly anyway. return_ops = False stack = inspect.stack() f_locals = stack[1][0].f_locals # We may be in a list comprehension, try next frame. if f_locals.get(".0"): f_locals = stack[2][0].f_locals # Check whether the caller component is a parent of this one. caller_component = f_locals.get( "root", f_locals.get("self_", f_locals.get("self"))) # Potential call from a lambda. if caller_component is None and "fn" in stack[2][0].f_locals: # This is the component. prev_caller_component = TraceContext.PREV_CALLER lambda_obj = stack[2][0].f_locals["fn"] if "lambda" in inspect.getsource(lambda_obj): # Try to reconstruct caller by using parent of prior caller. caller_component = prev_caller_component.parent_component if caller_component is None: raise RLGraphError( "API-method '{}' must have as 1st parameter (the component) either `root` or `self`. Other names " "are not allowed!".format(api_method_rec.name)) # Not directly called by this method itself (auto-helper-component-API-call). # AND call is coming from some caller Component, but that component is not this component # OR a parent -> Error. elif caller_component is not None and \ type(caller_component).__name__ != "MetaGraphBuilder" and \ caller_component not in [self] + self.get_parents(): if not (stack[1][3] == "__init__" and re.search(r'op_records\.py$', stack[1][1])): raise RLGraphError( "The component '{}' is not a child (or grand-child) of the caller ({})! Maybe you forgot to " "add it as a sub-component via `add_components()`.". format(self.global_scope, caller_component.global_scope)) # Update trace context. TraceContext.PREV_CALLER = caller_component for stack_item in stack[1:]: # skip current frame # If we hit an API-method call -> return op-recs. if stack_item[3] == "api_method_wrapper" and re.search( r'decorators\.py$', stack_item[1]): break # If we hit a graph_fn call -> return ops. elif stack_item[3] == "run_through_graph_fn" and re.search( r'graph_builder\.py$', stack_item[1]): return_ops = True break if return_ops is True: if type(return_values) == dict: return { key: value.op for key, value in out_op_column.get_args_and_kwargs() [1].items() } else: tuple_returns = tuple( map(lambda x: x.op, out_op_column.get_args_and_kwargs()[0])) return tuple_returns[0] if len( tuple_returns) == 1 else tuple_returns # Parent caller is non-graph_fn: Return op-recs. else: if type(return_values) == dict: return return_values else: tuple_returns = out_op_column.get_args_and_kwargs()[0] return tuple_returns[0] if len( tuple_returns) == 1 else tuple_returns
def get_space_from_op(op, num_categories=None): """ Tries to re-create a Space object given some DataOp (e.g. a tf op). This is useful for shape inference on returned ops after having run through a graph_fn. Args: op (DataOp): The op to create a corresponding Space for. num_categories (Optional[int]): An optional indicator, what the `num_categories` property for an IntBox should be. Returns: Space: The inferred Space object. """ # a Dict if isinstance(op, dict): # DataOpDict spec = {} add_batch_rank = False add_time_rank = False for key, value in op.items(): # Special case for IntBoxes: # If another key exists, with the name: `_num_[key]` -> take num_categories from that key's value. if key[:5] == "_num_": continue num_categories = op.get("_num_{}".format(key)) spec[key] = get_space_from_op(value, num_categories=num_categories) # Return if spec[key] == 0: return 0 if spec[key].has_batch_rank: add_batch_rank = True if spec[key].has_time_rank: add_time_rank = True return Dict(spec, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank) # a Tuple elif isinstance(op, tuple): # DataOpTuple spec = [] add_batch_rank = False add_time_rank = False for i in op: space = get_space_from_op(i) if space == 0: return 0 spec.append(space) if spec[-1].has_batch_rank: add_batch_rank = True if spec[-1].has_time_rank: add_time_rank = True return Tuple(spec, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank) # primitive Space -> infer from op dtype and shape else: int_high = { "high": num_categories } if num_categories is not None else {} # Op itself is a single value, simple python type. if isinstance(op, (bool, int, float)): return BoxSpace.from_spec(spec=type(op), shape=(), **int_high) elif isinstance(op, str): raise RLGraphError( "Cannot derive Space from non-allowed op ({})!".format(op)) # A single numpy array. elif isinstance(op, np.ndarray): return BoxSpace.from_spec(spec=convert_dtype(str(op.dtype), "np"), shape=op.shape, **int_high) elif isinstance(op, list): return try_space_inference_from_list(op) # No Space: e.g. the tf.no_op, a distribution (anything that's not a tensor). # PyTorch Tensors do not have get_shape so must check backend. elif hasattr(op, "dtype") is False or (get_backend() == "tf" and not hasattr(op, "get_shape")): return 0 # Some tensor: can be converted into a BoxSpace. else: shape = get_shape(op) # Unknown shape (e.g. a cond op). if shape is None: return 0 add_batch_rank = False add_time_rank = False time_major = False new_shape = list(shape) # New way: Detect via op._batch_rank and op._time_rank properties where these ranks are. if hasattr(op, "_batch_rank") and isinstance(op._batch_rank, int): add_batch_rank = True new_shape[op._batch_rank] = -1 # elif get_backend() == "pytorch": # if isinstance(op, torch.Tensor): # if op.dim() > 1 and shape[0] == 1: # add_batch_rank = True # new_shape[0] = 1 if hasattr(op, "_time_rank") and isinstance(op._time_rank, int): add_time_rank = True if op._time_rank == 0: time_major = True new_shape[op._time_rank] = -1 shape = tuple(n for n in new_shape if n != -1) # Old way: Detect automatically whether the first rank(s) are batch and/or time rank. if add_batch_rank is False and add_time_rank is False and shape != ( ) and shape[0] is None: if len(shape) > 1 and shape[1] is None: #raise RLGraphError( # "ERROR: Cannot determine time-major flag if both batch- and time-ranks are in an op w/o saying " # "which rank goes to which position!" #) shape = shape[2:] add_time_rank = True else: shape = shape[1:] add_batch_rank = True # TODO: If op._batch_rank and/or op._time_rank are not set, set them now. base_dtype = op.dtype.base_dtype if hasattr( op.dtype, "base_dtype") else op.dtype # PyTorch does not have a bool type if get_backend() == "pytorch": if op.dtype is torch.uint8: base_dtype = bool base_dtype_str = str(base_dtype) # FloatBox if "float" in base_dtype_str: return FloatBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major, dtype=convert_dtype(base_dtype, "np")) # IntBox elif "int" in base_dtype_str: high = num_categories or getattr(op, "_num_categories", None) return IntBox(high, shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major, dtype=convert_dtype(base_dtype, "np")) # a BoolBox elif "bool" in base_dtype_str: return BoolBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) # a TextBox elif "string" in base_dtype_str: return TextBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) raise RLGraphError( "ERROR: Cannot derive Space from op '{}' (unknown type?)!".format(op))
def __init__(self, epsilon_spec=None, noise_spec=None, scope="exploration", **kwargs): """ Args: epsilon_spec (any): The spec or Component object itself to construct an EpsilonExploration Component. noise_spec (dict): The specification dict for a noise generator that adds noise to the NN's output. """ super(Exploration, self).__init__(scope=scope, **kwargs) self.action_space = None # The actual action space (may not have batch-rank, just the plain space) self.flat_action_space = None self.epsilon_exploration = None self.noise_component = None # For define-by-run sampling. self.sample_obj = None # Don't allow both epsilon and noise component if epsilon_spec and noise_spec: raise RLGraphError( "Cannot use both epsilon exploration and a noise component at the same time." ) # Add epsilon component. if epsilon_spec: self.epsilon_exploration = EpsilonExploration.from_spec( epsilon_spec) self.add_components(self.epsilon_exploration) # Define our interface. @rlgraph_api(component=self) def get_action(self, actions, time_step, use_exploration=True): """ Action depends on time-step (e.g. epsilon-decay). """ epsilon_decisions = self.epsilon_exploration.do_explore( actions, time_step) return self._graph_fn_pick(use_exploration, epsilon_decisions, actions) # Add noise component. elif noise_spec: self.noise_component = NoiseComponent.from_spec(noise_spec) self.add_components(self.noise_component) @rlgraph_api(component=self) def get_action(self, actions, time_step=0, use_exploration=True): """ Noise is added to the sampled action. """ noise = self.noise_component.get_noise() return self._graph_fn_add_noise(use_exploration, noise, actions) # Don't explore at all. Simple pass-through. else: @rlgraph_api(component=self) def get_action(self, actions, time_step=0, use_exploration=False): """ Action is returned as is. """ return actions
def setup_session(self, hooks): """ Creates and then enters the session for this model. Also finalizes the graph. Args: hooks (list): A list of session hooks to use. """ if self.execution_mode == "distributed": self.logger.info("Setting up distributed TensorFlow session.") if self.server is None: raise RLGraphError( "TensorflowGraphExecutor's Server is None! It could be that your DISTRIBUTED_BACKEND (currently " "set to '{}') is not set to 'distributed_tf'. You can do so via the RLGraph config file in your " "home directory or the ENV variable 'RLGRAPH_DISTRIBUTED_BACKEND=distributed_tf'.". format(get_distributed_backend()) ) if self.tf_session_type == "monitored-session": session_creator = tf.train.ChiefSessionCreator( scaffold=self.scaffold, master=self.server.target, config=self.tf_session_config, checkpoint_dir=None, checkpoint_filename_with_path=None ) self.monitored_session = tf.train.MonitoredSession( #is_chief=self.execution_spec["distributed_spec"]["task_index"] == 0, session_creator=session_creator, hooks=hooks, stop_grace_period_secs=120 # Default value. ) else: assert self.tf_session_type == "monitored-training-session",\ "ERROR: Invalid session type: {}!".format(self.tf_session_type) is_chief = self.execution_spec["distributed_spec"].get( "is_chief", self.execution_spec["distributed_spec"]["task_index"] == 0 ) self.monitored_session = tf.train.MonitoredTrainingSession( master=self.server.target, is_chief=is_chief, checkpoint_dir=None, # TODO: specify? save_checkpoint_secs=600, save_summaries_secs=30, log_step_count_steps=50000, # scaffold=self.scaffold, # Ignore other hooks hooks=[hooks[-1]] if hooks else None, config=self.tf_session_config, stop_grace_period_secs=120 # Default value. ) else: # If monitoring is disabled, if self.disable_monitoring: self.logger.info("Setting up default session for non-distributed mode.") self.monitored_session = tf.Session(config=self.tf_session_config) else: self.logger.info("Setting up singular monitored session for non-distributed mode.") self.monitored_session = tf.train.SingularMonitoredSession( hooks=hooks, scaffold=self.scaffold, master='', # Default value. config=self.tf_session_config, checkpoint_dir=None ) # Exit the graph-context and finalize the graph. if self.graph_default_context is not None: self.graph_default_context.__exit__(None, None, None) # TODO back in # self.graph.finalize() if self.disable_monitoring: # If no monitoring, both just end up being simple sessions. self.session = self.monitored_session self.session.run(self.init_op) else: # Enter the session to be ready for acting/learning. self.monitored_session.__enter__() self.session = self.monitored_session._tf_sess() # Setup the tf Profiler. if self.profiling_enabled and not self.disable_monitoring: self.profiler = tf.profiler.Profiler(graph=self.session.graph)
def build(self, root_component, input_spaces=None): """ Builds the meta-graph by constructing op-record columns going into and coming out of all API-methods and graph_fns. Args: root_component (Component): Root component of the meta graph to build. input_spaces (Optional[Space]): Input spaces for all (exposed) API methods of the root-component. """ # Time the meta-graph build: DataOpRecord.reset() time_start = time.perf_counter() api = {} # Sanity check input_spaces dict. if input_spaces is not None: for input_param_name in input_spaces.keys(): if input_param_name not in root_component.api_method_inputs: raise RLGraphError( "ERROR: `input_spaces` contains an input-parameter name ('{}') that's not defined in any of " "the root-component's ('{}') API-methods, whose args are '{}'!" .format(input_param_name, root_component.name, root_component.api_method_inputs)) else: input_spaces = {} # Call all API methods of the core once and thereby, create empty in-op columns that serve as placeholders # and bi-directional links between ops (for the build time). for api_method_name, api_method_rec in root_component.api_methods.items( ): self.logger.debug("Building meta-graph of API-method '{}'.".format( api_method_name)) # Create the loose list of in-op-records depending on signature and input-spaces given. # If an arg has a default value, its input-space does not have to be provided. in_ops_records = [] use_named = False for i, param_name in enumerate(api_method_rec.input_names): # Arg has a default of None (flex). If in input_spaces, arg will be provided. if root_component.api_method_inputs[param_name] == "flex": if param_name in input_spaces: in_ops_records.append( DataOpRecord( position=i, kwarg=param_name if use_named else None, placeholder=param_name)) else: use_named = True # Already defined (per default arg value (e.g. bool)). elif isinstance(root_component.api_method_inputs[param_name], Space): if param_name in input_spaces: in_ops_records.append( DataOpRecord( position=i, kwarg=param_name if use_named else None, placeholder=param_name)) else: use_named = True # No default values -> Must be provided in `input_spaces`. else: # A var-positional param. if root_component.api_method_inputs[param_name] == "*flex": assert use_named is False if param_name in input_spaces: for j in range( len(force_list(input_spaces[param_name]))): in_ops_records.append( DataOpRecord(position=i + j, placeholder=param_name + "[{}]".format(j))) # A keyword param. elif root_component.api_method_inputs[ param_name] == "**flex": if param_name in input_spaces: assert use_named is False for key in sorted(input_spaces[param_name].keys()): in_ops_records.append( DataOpRecord(kwarg=key, placeholder=param_name + "[{}]".format(key))) use_named = True else: # TODO: If space not provided in input_spaces -> Try to call this API method later (maybe another API-method). assert param_name in input_spaces, \ "ERROR: arg-name '{}' not defined in input_spaces for root component '{}'!".format( param_name, root_component.global_scope ) in_ops_records.append( DataOpRecord( position=i, kwarg=param_name if use_named else None, placeholder=param_name)) # Do the actual core API-method call (thereby assembling the meta-graph). args = [ op_rec for op_rec in in_ops_records if op_rec.kwarg is None ] kwargs = { op_rec.kwarg: op_rec for op_rec in in_ops_records if op_rec.kwarg is not None } getattr(api_method_rec.component, api_method_name)(*args, **kwargs) # Register core's interface. api[api_method_name] = ( in_ops_records, api_method_rec.out_op_columns[-1].op_records) # Tag very last out-op-records with is_terminal_op=True, so we know in the build process that we are done. for op_rec in api_method_rec.out_op_columns[-1].op_records: op_rec.is_terminal_op = True time_build = time.perf_counter() - time_start self.logger.info( "Meta-graph build completed in {} s.".format(time_build)) # Get some stats on the graph and report. num_meta_ops = DataOpRecord._ID + 1 self.logger.info( "Meta-graph op-records generated: {}".format(num_meta_ops)) return MetaGraph(root_component=root_component, api=api, num_ops=num_meta_ops, build_status=True)
def from_spec(cls, spec=None, **kwargs): """ Uses the given spec to create an object. If `spec` is a dict, an optional "type" key can be used as a "constructor hint" to specify a certain class of the object. If `spec` is not a dict, `spec`'s value is used directly as the "constructor hint". The rest of `spec` (if it's a dict) will be used as kwargs for the (to-be-determined) constructor. Additional keys in **kwargs will always have precedence (overwrite keys in `spec` (if a dict)). Also, if the spec-dict or **kwargs contains the special key "_args", it will be popped from the dict and used as *args list to be passed separately to the constructor. The following constructor hints are valid: - None: Use `cls` as constructor. - An already instantiated object: Will be returned as is; no constructor call. - A string or an object that is a key in `cls`'s `__lookup_classes__` dict: The value in `__lookup_classes__` for that key will be used as the constructor. - A python callable: Use that as constructor. - A string: Either a json filename or the name of a python module+class (e.g. "rlgraph.components.Component") to be Will be used to Args: spec (Optional[dict]): The specification dict. Keyword Args: kwargs (any): Optional possibility to pass the c'tor arguments in here and use spec as the type-only info. Then we can call this like: from_spec([type]?, [**kwargs for ctor]) If `spec` is already a dict, then `kwargs` will be merged with spec (overwriting keys in `spec`) after "type" has been popped out of `spec`. If a constructor of a Specifiable needs an *args list of items, the special key `_args` can be passed inside `kwargs` with a list type value (e.g. kwargs={"_args": [arg1, arg2, arg3]}). Returns: The object generated from the spec. """ # specifiable_type is already a created object of this class -> Take it as is. if isinstance(spec, cls): return spec # `specifiable_type`: Indicator for the Specifiable's constructor. # `ctor_args`: *args arguments for the constructor. # `ctor_kwargs`: **kwargs arguments for the constructor. # Copy so caller can reuse safely. spec = deepcopy(spec) if isinstance(spec, dict): if "type" in spec: specifiable_type = spec.pop("type", None) else: specifiable_type = None ctor_kwargs = spec ctor_kwargs.update(kwargs) # give kwargs priority else: specifiable_type = spec ctor_kwargs = kwargs # Special `_args` field in kwargs for *args-utilizing constructors. ctor_args = ctor_kwargs.pop("_args", []) # Figure out the actual constructor (class) from `type_`. # None: Try __default__object (if no args/kwargs), only then constructor of cls (using args/kwargs). if specifiable_type is None: # We have a default constructor that was defined directly by cls (not by its children). if cls.__default_constructor__ is not None and ctor_args == [] and \ (not hasattr(cls.__bases__[0], "__default_constructor__") or cls.__bases__[0].__default_constructor__ is None or cls.__bases__[0].__default_constructor__ is not cls.__default_constructor__ ): constructor = cls.__default_constructor__ # Default partial's keywords into ctor_kwargs. if isinstance(constructor, partial): kwargs = default_dict(ctor_kwargs, constructor.keywords) constructor = partial(constructor.func, **kwargs) ctor_kwargs = {} # erase to avoid duplicate kwarg error # Try our luck with this class itself. else: constructor = cls # Try the __lookup_classes__ of this class. else: constructor = cls.lookup_class(specifiable_type) # Found in cls.__lookup_classes__. if constructor is not None: pass # Python callable. elif callable(specifiable_type): constructor = specifiable_type # A string: Filename or a python module+class. elif isinstance(specifiable_type, str): if re.search(r'\.(yaml|yml|json)$', specifiable_type): return cls.from_file(specifiable_type, *ctor_args, **ctor_kwargs) elif specifiable_type.find('.') != -1: module_name, function_name = specifiable_type.rsplit(".", 1) module = importlib.import_module(module_name) constructor = getattr(module, function_name) else: raise RLGraphError( "ERROR: String specifier ({}) in from_spec must be a filename, a module+class, or a key " "into {}.__lookup_classes__!".format(specifiable_type, cls.__name__) ) if not constructor: raise RLGraphError("Invalid type: {}".format(specifiable_type)) # Create object with inferred constructor. specifiable_object = constructor(*ctor_args, **ctor_kwargs) assert isinstance(specifiable_object, constructor.func if isinstance(constructor, partial) else constructor) return specifiable_object
def get_space_from_op(op, read_key_hints=False, dtype=None, low=None, high=None): """ Tries to re-create a Space object given some DataOp (e.g. a tf op). This is useful for shape inference on returned ops after having run through a graph_fn. Args: op (DataOp): The op to create a corresponding Space for. read_key_hints (bool): If True, tries to read type- and low/high-hints from the pattern of the Dict keys (str). - Preceding "I_": IntBox, "F_": FloatBox, "B_": BoolBox. - Succeeding "_low=0.0": Low value. - Succeeding "_high=1.0": High value. E.g. Dict key "F_somekey_low=0.0_high=2.0" indicates a FloatBox with low=0.0 and high=2.0. Dict key "I_somekey" indicates an intbox with no limits. Dict key "I_somekey_high=5" indicates an intbox with high=5 (values 0-4). Default: False. dtype (Optional[str]): An optional indicator, what the `dtype` of a BoxSpace should be. low (Optional[int,float]): An optional indicator, what the `low` property for a BoxSpace should be. high (Optional[int,float]): An optional indicator, what the `high` property for a BoxSpace should be. Returns: Space: The inferred Space object. """ # a Dict if isinstance(op, dict): # DataOpDict spec = {} add_batch_rank = False add_time_rank = False for key, value in op.items(): # Try to infer hints from the key. if read_key_hints is True: dtype, low, high = get_space_hints_from_dict_key(key) spec[key] = get_space_from_op(value, dtype=dtype, low=low, high=high) # Return if spec[key] == 0: return 0 if spec[key].has_batch_rank: add_batch_rank = True if spec[key].has_time_rank: add_time_rank = True return Dict(spec, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank) # a Tuple elif isinstance(op, tuple): # DataOpTuple spec = [] add_batch_rank = False add_time_rank = False for i in op: space = get_space_from_op(i) if space == 0: return 0 spec.append(space) if spec[-1].has_batch_rank: add_batch_rank = True if spec[-1].has_time_rank: add_time_rank = True return Tuple(spec, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank) # primitive Space -> infer from op dtype and shape else: low_high = {} if high is not None: low_high["high"] = high if low is not None: low_high["low"] = low # Op itself is a single value, simple python type. if isinstance(op, (bool, int, float)): return BoxSpace.from_spec(spec=(dtype or type(op)), shape=(), **low_high) elif isinstance(op, str): raise RLGraphError( "Cannot derive Space from non-allowed op ({})!".format(op)) # A single numpy array. elif isinstance(op, np.ndarray): return BoxSpace.from_spec(spec=convert_dtype(str(op.dtype), "np"), shape=op.shape, **low_high) elif isinstance(op, list): return try_space_inference_from_list(op, dtype=dtype, **low_high) # No Space: e.g. the tf.no_op, a distribution (anything that's not a tensor). # PyTorch Tensors do not have get_shape so must check backend. elif hasattr(op, "dtype") is False or (get_backend() == "tf" and not hasattr(op, "get_shape")): return 0 # Some tensor: can be converted into a BoxSpace. else: shape = get_shape(op) # Unknown shape (e.g. a cond op). if shape is None: return 0 add_batch_rank = False add_time_rank = False time_major = False new_shape = list(shape) # New way: Detect via op._batch_rank and op._time_rank properties where these ranks are. if hasattr(op, "_batch_rank") and isinstance(op._batch_rank, int): add_batch_rank = True new_shape[op._batch_rank] = -1 # elif get_backend() == "pytorch": # if isinstance(op, torch.Tensor): # if op.dim() > 1 and shape[0] == 1: # add_batch_rank = True # new_shape[0] = 1 if hasattr(op, "_time_rank") and isinstance(op._time_rank, int): add_time_rank = True if op._time_rank == 0: time_major = True new_shape[op._time_rank] = -1 shape = tuple(n for n in new_shape if n != -1) # Old way: Detect automatically whether the first rank(s) are batch and/or time rank. if add_batch_rank is False and add_time_rank is False and shape != ( ) and shape[0] is None: if len(shape) > 1 and shape[1] is None: #raise RLGraphError( # "ERROR: Cannot determine time-major flag if both batch- and time-ranks are in an op w/o saying " # "which rank goes to which position!" #) shape = shape[2:] add_time_rank = True else: shape = shape[1:] add_batch_rank = True # TODO: If op._batch_rank and/or op._time_rank are not set, set them now. base_dtype = op.dtype.base_dtype if hasattr( op.dtype, "base_dtype") else op.dtype # PyTorch does not have a bool type if get_backend() == "pytorch": if op.dtype is torch.uint8: base_dtype = bool base_dtype_str = str(base_dtype) # FloatBox if "float" in base_dtype_str: return FloatBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major, dtype=convert_dtype(base_dtype, "np")) # IntBox elif "int" in base_dtype_str: high_ = high or getattr(op, "_num_categories", None) return IntBox(high_, shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major, dtype=convert_dtype(base_dtype, "np")) # a BoolBox elif "bool" in base_dtype_str: return BoolBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) # a TextBox elif "string" in base_dtype_str: return TextBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) raise RLGraphError( "ERROR: Cannot derive Space from op '{}' (unknown type?)!".format(op))
def __init__(self, network_spec, action_space=None, action_adapter_spec=None, max_likelihood=True, scope="policy", **kwargs): """ Args: network_spec (Union[NeuralNetwork,dict]): The NeuralNetwork Component or a specification dict to build one. action_space (Space): The action Space within which this Component will create actions. action_adapter_spec (Optional[dict]): A spec-dict to create an ActionAdapter. Use None for the default ActionAdapter object. max_likelihood (bool): Whether to pick actions according to the max-likelihood value or via sampling. Default: True. """ super(Policy, self).__init__(scope=scope, **kwargs) self.neural_network = NeuralNetwork.from_spec(network_spec) if action_space is None: self.action_adapter = ActionAdapter.from_spec(action_adapter_spec) action_space = self.action_adapter.action_space else: self.action_adapter = ActionAdapter.from_spec( action_adapter_spec, action_space=action_space) self.action_space = action_space self.max_likelihood = max_likelihood # TODO: Hacky trick to implement IMPALA post-LSTM256 time-rank folding and unfolding. # TODO: Replace entirely via sonnet-like BatchApply Component. is_impala = "IMPALANetwork" in type(self.neural_network).__name__ # Add API-method to get baseline output (if we use an extra value function baseline node). if isinstance(self.action_adapter, BaselineActionAdapter): # TODO: IMPALA attempt to speed up final pass after LSTM. if is_impala: self.time_rank_folder = ReShape(fold_time_rank=True, scope="time-rank-fold") self.time_rank_unfolder_v = ReShape(unfold_time_rank=True, time_major=True, scope="time-rank-unfold-v") self.time_rank_unfolder_a_probs = ReShape( unfold_time_rank=True, time_major=True, scope="time-rank-unfold-a-probs") self.time_rank_unfolder_logits = ReShape( unfold_time_rank=True, time_major=True, scope="time-rank-unfold-logits") self.time_rank_unfolder_log_probs = ReShape( unfold_time_rank=True, time_major=True, scope="time-rank-unfold-log-probs") self.add_components(self.time_rank_folder, self.time_rank_unfolder_v, self.time_rank_unfolder_a_probs, self.time_rank_unfolder_log_probs, self.time_rank_unfolder_logits) @rlgraph_api(component=self) def get_state_values_logits_probabilities_log_probs( self, nn_input, internal_states=None): nn_output = self.neural_network.apply(nn_input, internal_states) last_internal_states = nn_output.get("last_internal_states") nn_output = nn_output["output"] # TODO: IMPALA attempt to speed up final pass after LSTM. if is_impala: nn_output = self.time_rank_folder.apply(nn_output) out = self.action_adapter.get_logits_probabilities_log_probs( nn_output) # TODO: IMPALA attempt to speed up final pass after LSTM. if is_impala: state_values = self.time_rank_unfolder_v.apply( out["state_values"], nn_output) logits = self.time_rank_unfolder_logits.apply( out["logits"], nn_output) probs = self.time_rank_unfolder_a_probs.apply( out["probabilities"], nn_output) log_probs = self.time_rank_unfolder_log_probs.apply( out["log_probs"], nn_output) else: state_values = out["state_values"] logits = out["logits"] probs = out["probabilities"] log_probs = out["log_probs"] return dict(state_values=state_values, logits=logits, probabilities=probs, log_probs=log_probs, last_internal_states=last_internal_states) # Figure out our Distribution. if isinstance(action_space, IntBox): self.distribution = Categorical() # Continuous action space -> Normal distribution (each action needs mean and variance from network). elif isinstance(action_space, FloatBox): self.distribution = Normal() else: raise RLGraphError( "ERROR: `action_space` is of type {} and not allowed in {} Component!" .format(type(action_space).__name__, self.name)) self.add_components(self.neural_network, self.action_adapter, self.distribution) if is_impala: self.add_components(self.time_rank_folder, self.time_rank_unfolder_v, self.time_rank_unfolder_a_probs, self.time_rank_unfolder_log_probs, self.time_rank_unfolder_logits)
def get_activation_function(activation_function=None, *other_parameters): """ Returns an activation function (callable) to use in a NN layer. Args: activation_function (Optional[callable,str]): The activation function to lookup. Could be given as: - already a callable (return just that) - a lookup key (str) - None: Use linear activation. other_parameters (any): Possible extra parameter(s) used for some of the activation functions. Returns: callable: The backend-dependent activation function. """ if get_backend() == "tf": if activation_function is None or callable(activation_function): return activation_function elif activation_function == "linear": return tf.identity # Rectifier linear unit (ReLU) : 0 if x < 0 else x elif activation_function == "relu": return tf.nn.relu # Exponential linear: exp(x) - 1 if x < 0 else x elif activation_function == "elu": return tf.nn.elu # Sigmoid: 1 / (1 + exp(-x)) elif activation_function == "sigmoid": return tf.sigmoid # Scaled exponential linear unit: scale * [alpha * (exp(x) - 1) if < 0 else x] # https://arxiv.org/pdf/1706.02515.pdf elif activation_function == "selu": return tf.nn.selu # Swish function: x * sigmoid(x) # https://arxiv.org/abs/1710.05941 elif activation_function == "swish": return lambda x: x * tf.sigmoid(x=x) # Leaky ReLU: x * [alpha if x < 0 else 1.0] elif activation_function in ["lrelu", "leaky_relu"]: alpha = other_parameters[0] if len(other_parameters) > 0 else 0.2 return partial(tf.nn.leaky_relu, alpha=alpha) # Concatenated ReLU: elif activation_function == "crelu": return tf.nn.crelu # Softmax function: elif activation_function == "softmax": return tf.nn.softmax # Softplus function: elif activation_function == "softplus": return tf.nn.softplus # Softsign function: elif activation_function == "softsign": return tf.nn.softsign # tanh activation function: elif activation_function == "tanh": return tf.nn.tanh else: raise RLGraphError( "ERROR: Unknown activation_function '{}' for TensorFlow backend!" .format(activation_function)) elif get_backend() == "pytorch": # Have to instantiate objects here. if activation_function is None or callable(activation_function): return activation_function elif activation_function == "linear": # Do nothing. return None # Rectifier linear unit (ReLU) : 0 if x < 0 else x elif activation_function == "relu": return nn.ReLU() # Exponential linear: exp(x) - 1 if x < 0 else x elif activation_function == "elu": return nn.ELU() # Sigmoid: 1 / (1 + exp(-x)) elif activation_function == "sigmoid": return nn.Sigmoid() # Scaled exponential linear unit: scale * [alpha * (exp(x) - 1) if < 0 else x] # https://arxiv.org/pdf/1706.02515.pdf elif activation_function == "selu": return nn.SELU() # Leaky ReLU: x * [alpha if x < 0 else 1.0] elif activation_function in ["lrelu", "leaky_relu"]: alpha = other_parameters[0] if len(other_parameters) > 0 else 0.2 return nn.LeakyReLU(negative_slope=alpha) # Softmax function: elif activation_function == "softmax": return nn.Softmax() # Softplus function: elif activation_function == "softplus": return nn.Softplus() # Softsign function: elif activation_function == "softsign": return nn.Softsign() # tanh activation function: elif activation_function == "tanh": return nn.Tanh() else: raise RLGraphError( "ERROR: Unknown activation_function '{}' for PyTorch backend!". format(activation_function))
def get_graph_markup(component, level=0, draw_graph_fns=False): """ Returns graph markup to be used for RLGraph metagraph plotting. Uses the (mermaid)[https://github.com/knsv/mermaid] markup language. Args: component (Component): Component to generate meta-graph markup for. level (int): Indentation level. If >= 1, return this component as sub-component. draw_graph_fns (bool): Include graph fns in plot. Returns: str: Meta-graph markup string. """ # Print (sub)graph declaration if level >= 1: markup = " " * 4 * level + "subgraph {}\n".format(component.name) elif level == 0: markup = "graph TD\n" markup += "classDef input_socket fill:#9ff,stroke:#333,stroke-width:2px;\n" markup += "classDef output_socket fill:#f9f,stroke:#333,stroke-width:2px;\n" markup += "classDef space fill:#999,stroke:#333,stroke-width:2px;\n" markup += "classDef graph_fn fill:#ff9,stroke:#333,stroke-width:2px;\n" markup += "\n" else: raise RLGraphError( "Invalid component indentation level {}".format(level)) all_sockets = list() all_graph_fns = list() # Add input socket nodes with the following markup: in_socket_HASH(INPUT SOCKET NAME) markup_input_sockets = list() for input_socket in component.input_sockets: markup += " " * 4 * (level + 1) + "socket_{hash}(\"{name}\")\n".format( hash=hash(input_socket), name=input_socket.name) markup_input_sockets.append( "socket_{hash}".format(hash=hash(input_socket))) all_sockets.append(input_socket) # Add output socket nodes with the following markup: out_socket_HASH(OUTPUT SOCKET NAME) markup_output_sockets = list() for output_socket in component.output_sockets: markup += " " * 4 * (level + 1) + "socket_{hash}(\"{name}\")\n".format( hash=hash(output_socket), name=output_socket.name) markup_output_sockets.append( "socket_{hash}".format(hash=hash(output_socket))) all_sockets.append(output_socket) markup += "\n" # Add graph function nodes with the following markup: graphfn_HASH(GRAPH FN NAME) markup_graph_fns = list() for graph_fn in component.graph_fns: markup += " " * 4 * (level + 1) + "graphfn_{hash}(\"{name}\")\n".format( hash=hash(graph_fn), name=graph_fn.name) markup_graph_fns.append("graphfn_{hash}".format(hash=hash(graph_fn))) all_graph_fns.append(graph_fn) # Collect connections by looping through all incoming connections. # All outgoing connections should be incoming connections of another socket, so we don't need to loop through them. connections = list() markup_spaces = list() for socket in all_sockets: for incoming_connection in socket.incoming_connections: if isinstance(incoming_connection, Socket): connections.append( ("socket_{}".format(hash(incoming_connection)), "socket_{}".format(hash(socket)), None)) elif isinstance(incoming_connection, Space): # Add spaces to markup (we only know about them because of their connections). markup += " " * 4 * (level + 1) + "space_{hash}(\"{name}\")\n".format( hash=hash(incoming_connection), name=str(incoming_connection)) markup_spaces.append( "space_{hash}".format(hash=hash(incoming_connection))) connections.append( ("space_{}".format(hash(incoming_connection)), "socket_{}".format(hash(socket)), None)) elif isinstance(incoming_connection, GraphFunction): connections.append( ("graphfn_{}".format(hash(incoming_connection)), "socket_{}".format(hash(socket)), None)) # Collect graph fn connections by looping through all input sockets of the graph fns. # All output sockets should have been covered by the above collection of incoming connections to the sockets. for graph_fn in all_graph_fns: for input_socket_name, input_socket_dict in graph_fn.input_sockets.items( ): input_socket = input_socket_dict['socket'] if isinstance(input_socket, Socket): connections.append(("socket_{}".format(hash(input_socket)), "graphfn_{}".format(hash(graph_fn)), None)) else: raise ValueError("Not a valid input socket: {} ({})".format( input_socket, type(input_socket))) # Add style class `input_socket` to the input sockets if markup_input_sockets: markup += " " * 4 * (level + 1) + "class {} input_socket;\n".format( ','.join(markup_input_sockets)) # Add style class `output_socket` to the output sockets if markup_output_sockets: markup += " " * 4 * (level + 1) + "class {} output_socket;\n".format( ','.join(markup_output_sockets)) # Add style class `space` to the spaces if markup_spaces: markup += " " * 4 * (level + 1) + "class {} space;\n".format( ','.join(markup_spaces)) # Add style class `graph_fn` to the graph fns if markup_graph_fns: markup += " " * 4 * (level + 1) + "class {} graph_fn;\n".format( ','.join(markup_graph_fns)) markup += "\n" # Add sub-components. for sub_component_name, sub_component in component.sub_components.items(): markup += get_graph_markup(sub_component, level=level + 1, draw_graph_fns=draw_graph_fns) # Subgraphs (level >= 1) require an end statement. if level >= 1: markup += " " * 4 * level + "end\n" markup += "\n" # Connection are inserted after the graph for connection in connections: if connection[2]: # Labeled connection markup += " " * 4 * level + "{}--{}-->{}\n".format( connection[0], connection[2], connection[1]) else: # Unlabeled connection markup += " " * 4 * level + "{}-->{}\n".format( connection[0], connection[1]) markup += "\n" return markup
def _execute(self, num_timesteps=None, num_episodes=None, max_timesteps_per_episode=None, use_exploration=True, update_spec=None, frameskip=None, reset=True): """ Actual implementation underlying `execute_timesteps` and `execute_episodes`. Args: num_timesteps (Optional[int]): The maximum number of timesteps to run. At least one of `num_timesteps` or `num_episodes` must be provided. num_episodes (Optional[int]): The maximum number of episodes to run. At least one of `num_timesteps` or `num_episodes` must be provided. use_exploration (Optional[bool]): Indicates whether to utilize exploration (epsilon or noise based) when picking actions. Default: True. max_timesteps_per_episode (Optional[int]): Can be used to limit the number of timesteps per episode. Use None or 0 for no limit. Default: None. update_spec (Optional[dict]): Update parameters. If None, the worker only performs rollouts. Matches the structure of an Agent's update_spec dict and will be "defaulted" by that dict. See `input_parsing/parse_update_spec.py` for more details. frameskip (Optional[int]): How often actions are repeated after retrieving them from the agent. Rewards are accumulated over the number of skips. Use None for the Worker's default value. reset (bool): Whether to reset the environment and all the Worker's internal counters. Default: True. Returns: dict: Execution statistics. """ assert num_timesteps is not None or num_episodes is not None,\ "ERROR: One of `num_timesteps` or `num_episodes` must be provided!" # Are we updating or just acting/observing? update_spec = default_dict(update_spec, self.agent.update_spec) self.set_update_schedule(update_spec) num_timesteps = num_timesteps or 0 num_episodes = num_episodes or 0 max_timesteps_per_episode = [ max_timesteps_per_episode or 0 for _ in range_(self.num_environments) ] frameskip = frameskip or self.frameskip # Stats. timesteps_executed = 0 episodes_executed = 0 start = time.perf_counter() episode_terminals = self.episode_terminals if reset is True: self.env_frames = 0 self.episodes_since_update = 0 self.finished_episode_rewards = [ [] for _ in range_(self.num_environments) ] self.finished_episode_durations = [ [] for _ in range_(self.num_environments) ] self.finished_episode_timesteps = [ [] for _ in range_(self.num_environments) ] for i, env_id in enumerate(self.env_ids): self.episode_returns[i] = 0 self.episode_timesteps[i] = 0 self.episode_terminals[i] = False self.episode_starts[i] = time.perf_counter() if self.worker_executes_preprocessing: self.state_is_preprocessed[env_id] = False self.env_states = self.vector_env.reset_all() self.agent.reset() elif self.env_states[0] is None: raise RLGraphError( "Runner must be reset at the very beginning. Environment is in invalid state." ) # Only run everything for at most num_timesteps (if defined). env_states = self.env_states while not (0 < num_timesteps <= timesteps_executed): if self.render: self.vector_env.render() if self.worker_executes_preprocessing: for i, env_id in enumerate(self.env_ids): state = self.agent.state_space.force_batch(env_states[i]) if self.preprocessors[env_id] is not None: if self.state_is_preprocessed[env_id] is False: self.preprocessed_states_buffer[ i] = self.preprocessors[env_id].preprocess( state) self.state_is_preprocessed[env_id] = True else: self.preprocessed_states_buffer[i] = env_states[i] # TODO extra returns when worker is not applying preprocessing. actions = self.agent.get_action( states=self.preprocessed_states_buffer, use_exploration=use_exploration, apply_preprocessing=self.apply_preprocessing) preprocessed_states = np.array(self.preprocessed_states_buffer) else: actions, preprocessed_states = self.agent.get_action( states=np.array(env_states), use_exploration=use_exploration, apply_preprocessing=True, extra_returns="preprocessed_states") # Accumulate the reward over n env-steps (equals one action pick). n=self.frameskip. env_rewards = [0 for _ in range_(self.num_environments)] next_states = None # For Dict action spaces, we have to treat each key as an array with batch-rank at index 0. # The action-dict is then translated into a list of dicts where each dict contains the original data # but without the batch-rank. # E.g. {'A': array([0, 1]), 'B': array([2, 3])} -> [{'A': 0, 'B': 2}, {'A': 1, 'B': 3}] if isinstance(self.agent.action_space, Dict): some_key = next(iter(actions)) assert isinstance(actions, dict) and isinstance(actions[some_key], np.ndarray),\ "ERROR: Cannot flip Dict-action batch with dict keys if returned value is not a dict OR " \ "values of returned value are not np.ndarrays!" # TODO: What if actions come as nested dicts (more than one level deep)? # TODO: Use DataOpDict/Tuple's new `map` method. if hasattr(actions[some_key], "__len__"): env_actions = [{ key: value[i] for key, value in actions.items() } for i in range(len(actions[some_key]))] else: # Action was not array type. env_actions = [{ key: value for key, value in actions.items() }] # Tuple action Spaces: # E.g. Tuple(array([0, 1]), array([2, 3])) -> [(0, 2), (1, 3)] elif isinstance(self.agent.action_space, Tuple): assert isinstance(actions, tuple) and isinstance(actions[0], np.ndarray),\ "ERROR: Cannot flip tuple-action batch if returned value is not a tuple OR " \ "values of returned value are not np.ndarrays!" # TODO: Use DataOpDict/Tuple's new `map` method. env_actions = [ tuple(value[i] for _, value in enumerate(actions)) for i in range(len(actions[0])) ] # No container batch-flipping necessary. else: env_actions = actions if self.num_environments == 1 and env_actions.shape == (): env_actions = [env_actions] for _ in range_(frameskip): next_states, step_rewards, episode_terminals, _ = self.vector_env.step( actions=env_actions) self.env_frames += self.num_environments for i, step_reward in enumerate(step_rewards): env_rewards[i] += step_reward if np.any(episode_terminals): break # Only render once per action. #if self.render: # self.vector_env.environments[0].render() for i, env_id in enumerate(self.env_ids): self.episode_returns[i] += env_rewards[i] self.episode_timesteps[i] += 1 if 0 < max_timesteps_per_episode[i] <= self.episode_timesteps[ i]: episode_terminals[i] = True if self.worker_executes_preprocessing: self.state_is_preprocessed[env_id] = False # Do accounting for finished episodes. if episode_terminals[i]: episodes_executed += 1 self.episodes_since_update += 1 episode_duration = time.perf_counter( ) - self.episode_starts[i] self.finished_episode_rewards[i].append( self.episode_returns[i]) self.finished_episode_durations[i].append(episode_duration) self.finished_episode_timesteps[i].append( self.episode_timesteps[i]) self.log_finished_episode( episode_return=self.episode_returns[i], duration=episode_duration, timesteps=self.episode_timesteps[i], env_num=i) # Reset this environment and its preprocecssor stack. env_states[i] = self.vector_env.reset(i) if self.worker_executes_preprocessing and self.preprocessors[ env_id] is not None: self.preprocessors[env_id].reset() # This re-fills the sequence with the reset state. state = self.agent.state_space.force_batch( env_states[i]) # Pre - process, add to buffer self.preprocessed_states_buffer[i] = np.array( self.preprocessors[env_id].preprocess(state)) self.state_is_preprocessed[env_id] = True self.episode_returns[i] = 0 self.episode_timesteps[i] = 0 self.episode_starts[i] = time.perf_counter() else: # Otherwise assign states to next states env_states[i] = next_states[i] if self.worker_executes_preprocessing and self.preprocessors[ env_id] is not None: #next_state = self.agent.state_space.force_batch(env_states[i]) next_states[i] = np.array( self.preprocessors[env_id].preprocess( env_states[i])) # next_state self._observe(self.env_ids[i], preprocessed_states[i], env_actions[i], env_rewards[i], next_states[i], episode_terminals[i]) self.update_if_necessary() timesteps_executed += self.num_environments num_timesteps_reached = (0 < num_timesteps <= timesteps_executed) if 0 < num_episodes <= episodes_executed or num_timesteps_reached: break total_time = (time.perf_counter() - start) or 1e-10 # Return values for current episode(s) if None have been completed. if episodes_executed == 0: mean_episode_runtime = 0 mean_episode_reward = np.mean(self.episode_returns) max_episode_reward = np.max(self.episode_returns) final_episode_reward = self.episode_returns[0] else: all_finished_durations = [] all_finished_rewards = [] for i in range_(self.num_environments): all_finished_rewards.extend(self.finished_episode_rewards[i]) all_finished_durations.extend( self.finished_episode_durations[i]) mean_episode_runtime = np.mean(all_finished_durations) mean_episode_reward = np.mean(all_finished_rewards) max_episode_reward = np.max(all_finished_rewards) final_episode_reward = all_finished_rewards[-1] self.episode_terminals = episode_terminals self.env_states = env_states results = dict( runtime=total_time, # Agent act/observe throughput. timesteps_executed=timesteps_executed, ops_per_second=(timesteps_executed / total_time), # Env frames including action repeats. env_frames=self.env_frames, env_frames_per_second=(self.env_frames / total_time), episodes_executed=episodes_executed, episodes_per_minute=(episodes_executed / (total_time / 60)), mean_episode_runtime=mean_episode_runtime, mean_episode_reward=mean_episode_reward, max_episode_reward=max_episode_reward, final_episode_reward=final_episode_reward) # Total time of run. self.logger.info("Finished execution in {} s".format(total_time)) # Total (RL) timesteps (actions) done (and timesteps/sec). self.logger.info("Time steps (actions) executed: {} ({} ops/s)".format( results['timesteps_executed'], results['ops_per_second'])) # Total env-timesteps done (including action repeats) (and env-timesteps/sec). self.logger.info( "Env frames executed (incl. action repeats): {} ({} frames/s)". format(results['env_frames'], results['env_frames_per_second'])) # Total episodes done (and episodes/min). self.logger.info("Episodes finished: {} ({} episodes/min)".format( results['episodes_executed'], results['episodes_per_minute'])) self.logger.info("Mean episode runtime: {}s".format( results['mean_episode_runtime'])) self.logger.info("Mean episode reward: {}".format( results['mean_episode_reward'])) self.logger.info("Max. episode reward: {}".format( results['max_episode_reward'])) self.logger.info("Final episode reward: {}".format( results['final_episode_reward'])) return results
def __init__(self, shape, specification=None, **kwargs): """ Args: shape (tuple): The shape of the Variables to initialize. specification (any): A spec that determines the nature of this initializer. Raises: RLGraphError: If a fixed shape in `specification` does not match `shape`. """ super(Initializer, self).__init__() # The shape of the variable to be initialized. self.shape = shape # The actual underlying initializer object. self.initializer = None # Truncated Normal. if specification == "truncated_normal": if get_backend() == "tf": # Use the first dimension (num_rows or batch rank) to figure out the stddev. stddev = 1 / math.sqrt(shape[0] if isinstance( shape, (tuple, list)) and len(shape) > 0 else 1.0) self.initializer = tf.truncated_normal_initializer( stddev=stddev) elif get_backend() == "pytorch": stddev = 1 / math.sqrt(shape[0] if isinstance( shape, (tuple, list)) and len(shape) > 0 else 1.0) self.initializer = lambda t: torch.nn.init.normal_(tensor=t, std=stddev) # No spec -> Leave initializer as None for TF (will then use default; # e.g. for tf weights: Xavier uniform). For PyTorch, still have to set Xavier. # TODO this is None or is False is very unclean because TF and PT have different defaults -> # change to clean default values for weights and biases. elif specification is None or specification is False: if get_backend() == "tf": pass elif get_backend() == "pytorch": self.initializer = torch.nn.init.xavier_uniform_ # Fixed values spec -> Use them, just do sanity checking. else: # Constant value across the variable. if isinstance(specification, (float, int)): pass # A 1D initializer (e.g. for biases). elif isinstance(specification, list): array = np.asarray(specification, dtype=convert_dtype("float32", "np")) if array.shape != self.shape: raise RLGraphError( "ERROR: Number/shape of given items ({}) not identical with shape ({})!" .format(array.shape, self.shape)) # A nD initializer (numpy-array). elif isinstance(specification, np.ndarray): if specification.shape != self.shape: raise RLGraphError( "ERROR: Shape of given items ({}) not identical with shape ({})!" .format(specification.shape, self.shape)) # Unknown type. else: raise RLGraphError( "ERROR: Bad specification given ({}) for Initializer object!" .format(specification)) # Create the backend initializer object. if get_backend() == "tf": self.initializer = tf.constant_initializer( value=specification, dtype=convert_dtype("float32")) elif get_backend() == "pytorch": self.initializer = lambda t: torch.nn.init.constant_( tensor=t, val=specification)
def __init__(self, network_spec, action_space=None, action_adapter_spec=None, deterministic=True, scope="policy", **kwargs): """ Args: network_spec (Union[NeuralNetwork,dict]): The NeuralNetwork Component or a specification dict to build one. action_space (Space): The action Space within which this Component will create actions. action_adapter_spec (Optional[dict]): A spec-dict to create an ActionAdapter. Use None for the default ActionAdapter object. deterministic (bool): Whether to pick actions according to the max-likelihood value or via sampling. Default: True. batch_apply (bool): Whether to wrap both the NN and the ActionAdapter with a BatchApply Component in order to fold time rank into batch rank before a forward pass. """ super(Policy, self).__init__(scope=scope, **kwargs) self.neural_network = NeuralNetwork.from_spec( network_spec) # type: NeuralNetwork # Create the necessary action adapters for this Policy. One for each action space component. self.action_adapters = dict() if action_space is None: self.action_adapters[""] = ActionAdapter.from_spec( action_adapter_spec) self.action_space = self.action_adapters[""].action_space # Assert single component action space. assert len(self.action_space.flatten()) == 1,\ "ERROR: Action space must not be ContainerSpace if no `action_space` is given in Policy c'tor!" else: self.action_space = Space.from_spec(action_space) for i, (flat_key, action_component) in enumerate( self.action_space.flatten().items()): if action_adapter_spec is not None: aa_spec = action_adapter_spec.get(flat_key, action_adapter_spec) aa_spec["action_space"] = action_component else: aa_spec = dict(action_space=action_component) self.action_adapters[flat_key] = ActionAdapter.from_spec( aa_spec, scope="action-adapter-{}".format(i)) self.deterministic = deterministic # Figure out our Distributions. self.distributions = dict() for i, (flat_key, action_component) in enumerate( self.action_space.flatten().items()): if isinstance(action_component, IntBox): self.distributions[flat_key] = Categorical( scope="categorical-{}".format(i)) # Continuous action space -> Normal distribution (each action needs mean and variance from network). elif isinstance(action_component, FloatBox): self.distributions[flat_key] = Normal( scope="normal-{}".format(i)) else: raise RLGraphError( "ERROR: `action_component` is of type {} and not allowed in {} Component!" .format(type(action_space).__name__, self.name)) self.add_components(*[self.neural_network] + list(self.action_adapters.values()) + list(self.distributions.values()))
def _execute(self, num_timesteps=None, num_episodes=None, max_timesteps_per_episode=None, use_exploration=True, update_spec=None, frameskip=None, reset=True): """ Actual implementation underlying `execute_timesteps` and `execute_episodes`. Args: num_timesteps (Optional[int]): The maximum number of timesteps to run. At least one of `num_timesteps` or `num_episodes` must be provided. num_episodes (Optional[int]): The maximum number of episodes to run. At least one of `num_timesteps` or `num_episodes` must be provided. use_exploration (Optional[bool]): Indicates whether to utilize exploration (epsilon or noise based) when picking actions. Default: True. max_timesteps_per_episode (Optional[int]): Can be used to limit the number of timesteps per episode. Use None or 0 for no limit. Default: None. update_spec (Optional[dict]): Update parameters. If None, the worker only performs rollouts. Matches the structure of an Agent's update_spec dict and will be "defaulted" by that dict. See `input_parsing/parse_update_spec.py` for more details. frameskip (Optional[int]): How often actions are repeated after retrieving them from the agent. Rewards are accumulated over the number of skips. Use None for the Worker's default value. reset (bool): Whether to reset the environment and all the Worker's internal counters. Default: True. Returns: dict: Execution statistics. """ assert num_timesteps is not None or num_episodes is not None,\ "ERROR: One of `num_timesteps` or `num_episodes` must be provided!" # Are we updating or just acting/observing? update_spec = default_dict(update_spec, self.agent.update_spec) self.set_update_schedule(update_spec) num_timesteps = num_timesteps or 0 num_episodes = num_episodes or 0 max_timesteps_per_episode = [ max_timesteps_per_episode or 0 for _ in range_(self.num_environments) ] frameskip = frameskip or self.frameskip # Stats. timesteps_executed = 0 episodes_executed = 0 start = time.perf_counter() episode_terminals = self.episode_terminals if reset is True: self.env_frames = 0 self.finished_episode_rewards = [ [] for _ in range_(self.num_environments) ] self.finished_episode_durations = [ [] for _ in range_(self.num_environments) ] self.finished_episode_timesteps = [ [] for _ in range_(self.num_environments) ] for i, env_id in enumerate(self.env_ids): self.episode_returns[i] = 0 self.episode_timesteps[i] = 0 self.episode_terminals[i] = False self.episode_starts[i] = time.perf_counter() if self.worker_executes_preprocessing: self.state_is_preprocessed[env_id] = False self.env_states = self.vector_env.reset_all() self.agent.reset() elif self.env_states[0] is None: raise RLGraphError( "Runner must be reset at the very beginning. Environment is in invalid state." ) # Only run everything for at most num_timesteps (if defined). env_states = self.env_states while not (0 < num_timesteps <= timesteps_executed): if self.render: # This renders the first underlying environment. self.vector_env.render() if self.worker_executes_preprocessing: for i, env_id in enumerate(self.env_ids): state = self.agent.state_space.force_batch(env_states[i]) if self.preprocessors[env_id] is not None: if self.state_is_preprocessed[env_id] is False: self.preprocessed_states_buffer[ i] = self.preprocessors[env_id].preprocess( state) self.state_is_preprocessed[env_id] = True else: self.preprocessed_states_buffer[i] = env_states[i] # TODO extra returns when worker is not applying preprocessing. actions = self.agent.get_action( states=self.preprocessed_states_buffer, use_exploration=use_exploration, apply_preprocessing=self.apply_preprocessing) preprocessed_states = np.array(self.preprocessed_states_buffer) else: preprocessed_states, actions = self.agent.get_action( states=np.array(env_states), use_exploration=use_exploration, apply_preprocessing=True, extra_returns="preprocessed_states") # Accumulate the reward over n env-steps (equals one action pick). n=self.frameskip. env_rewards = [0 for _ in range_(self.num_environments)] next_states = None for _ in range_(frameskip): next_states, step_rewards, episode_terminals, infos = self.vector_env.step( actions=actions) self.env_frames += self.num_environments for i, step_reward in enumerate(step_rewards): env_rewards[i] += step_reward if np.any(episode_terminals): break # Only render once per action. if self.render: self.vector_env.environments[0].render() for i, env_id in enumerate(self.env_ids): self.episode_returns[i] += env_rewards[i] self.episode_timesteps[i] += 1 if 0 < max_timesteps_per_episode[i] <= self.episode_timesteps[ i]: episode_terminals[i] = True if self.worker_executes_preprocessing: self.state_is_preprocessed[env_id] = False # Do accounting for finished episodes. if episode_terminals[i]: episodes_executed += 1 episode_duration = time.perf_counter( ) - self.episode_starts[i] self.finished_episode_rewards[i].append( self.episode_returns[i]) self.finished_episode_durations[i].append(episode_duration) self.finished_episode_timesteps[i].append( self.episode_timesteps[i]) self.log_finished_episode( reward=self.episode_returns[i], duration=episode_duration, timesteps=self.episode_timesteps[i], env_num=i) # Reset this environment and its preprocecssor stack. env_states[i] = self.vector_env.reset(i) if self.worker_executes_preprocessing and self.preprocessors[ env_id] is not None: self.preprocessors[env_id].reset() # This re-fills the sequence with the reset state. state = self.agent.state_space.force_batch( env_states[i]) # Pre - process, add to buffer self.preprocessed_states_buffer[i] = np.array( self.preprocessors[env_id].preprocess(state)) self.state_is_preprocessed[env_id] = True self.episode_returns[i] = 0 self.episode_timesteps[i] = 0 self.episode_starts[i] = time.perf_counter() else: # Otherwise assign states to next states env_states[i] = next_states[i] if self.worker_executes_preprocessing and self.preprocessors[ env_id] is not None: next_state = self.agent.state_space.force_batch( env_states[i]) next_states[i] = np.array( self.preprocessors[env_id].preprocess(next_state)) # TODO: If worker does not execute preprocessing, next state is not preprocessed here. # Observe per environment. self.agent.observe(preprocessed_states=preprocessed_states[i], actions=actions[i], internals=[], rewards=env_rewards[i], next_states=next_states[i], terminals=episode_terminals[i], env_id=self.env_ids[i]) self.update_if_necessary() timesteps_executed += self.num_environments num_timesteps_reached = (0 < num_timesteps <= timesteps_executed) if 0 < num_episodes <= episodes_executed or num_timesteps_reached: break total_time = (time.perf_counter() - start) or 1e-10 # Return values for current episode(s) if None have been completed. if episodes_executed == 0: mean_episode_runtime = 0 mean_episode_reward = np.mean(self.episode_returns) max_episode_reward = np.max(self.episode_returns) final_episode_reward = self.episode_returns[0] else: all_finished_durations = [] all_finished_rewards = [] for i in range_(self.num_environments): all_finished_rewards.extend(self.finished_episode_rewards[i]) all_finished_durations.extend( self.finished_episode_durations[i]) mean_episode_runtime = np.mean(all_finished_durations) mean_episode_reward = np.mean(all_finished_rewards) max_episode_reward = np.max(all_finished_rewards) final_episode_reward = all_finished_rewards[-1] self.episode_terminals = episode_terminals self.env_states = env_states results = dict( runtime=total_time, # Agent act/observe throughput. timesteps_executed=timesteps_executed, ops_per_second=(timesteps_executed / total_time), # Env frames including action repeats. env_frames=self.env_frames, env_frames_per_second=(self.env_frames / total_time), episodes_executed=episodes_executed, episodes_per_minute=(episodes_executed / (total_time / 60)), mean_episode_runtime=mean_episode_runtime, mean_episode_reward=mean_episode_reward, max_episode_reward=max_episode_reward, final_episode_reward=final_episode_reward) # Total time of run. self.logger.info("Finished execution in {} s".format(total_time)) # Total (RL) timesteps (actions) done (and timesteps/sec). self.logger.info("Time steps (actions) executed: {} ({} ops/s)".format( results['timesteps_executed'], results['ops_per_second'])) # Total env-timesteps done (including action repeats) (and env-timesteps/sec). self.logger.info( "Env frames executed (incl. action repeats): {} ({} frames/s)". format(results['env_frames'], results['env_frames_per_second'])) # Total episodes done (and episodes/min). self.logger.info("Episodes finished: {} ({} episodes/min)".format( results['episodes_executed'], results['episodes_per_minute'])) self.logger.info("Mean episode runtime: {}s".format( results['mean_episode_runtime'])) self.logger.info("Mean episode reward: {}".format( results['mean_episode_reward'])) self.logger.info("Max. episode reward: {}".format( results['max_episode_reward'])) self.logger.info("Final episode reward: {}".format( results['final_episode_reward'])) return results
def call(*args): if isinstance(self.output_spaces, dict): assert method_name in self.output_spaces, "ERROR: Method '{}' not specified in output_spaces: {}!".\ format(method_name, self.output_spaces) specs = self.output_spaces[method_name] else: specs = self.output_spaces(method_name) if specs is None: raise RLGraphError( "No Space information received for method '{}:{}'".format( self.specifiable_class.__name__, method_name)) dtypes = [] shapes = [] return_slots = [] for i, space in enumerate(force_list(specs)): assert not isinstance(space, ContainerSpace) # Expecting an op (space 0). if space == 0: dtypes.append(0) shapes.append(0) return_slots.append(i) # Expecting a tensor. elif space is not None: dtypes.append(convert_dtype(space.dtype)) shapes.append(space.shape) return_slots.append(i) if get_backend() == "tf": # This function will send the method-call-comment via the out-pipe to the remote (server) Specifiable # object - all in-graph - and return the results to be used further by other graph ops. def py_call(*call_args): call_args = [ arg.decode('UTF-8') if isinstance(arg, bytes) else arg for arg in call_args ] try: self.out_pipe.send(call_args) received_results = self.out_pipe.recv() # If an error occurred, it'll be passed back through the pipe. if isinstance(received_results, Exception): raise received_results elif received_results is not None: return received_results except Exception as e: if isinstance(e, IOError): raise StopIteration() # Clean exit. else: print("ERROR: Sent={} Exception={}".format( call_args, e)) raise results = tf.py_func(py_call, (method_name, ) + tuple(args), dtypes, name=method_name) # Force known shapes on the returned tensors. for i, (result, shape) in enumerate(zip(results, shapes)): # Not an op (which have shape=0). if shape != 0: result.set_shape(shape) else: raise NotImplementedError return results[0] if len(dtypes) == 1 else tuple(results)
def get_id(): DataOpRecord._ID += 1 if DataOpRecord._ID >= DataOpRecord.MAX_ID: raise RLGraphError("Maximum number of op-rec IDs reached! Simply hard-increase `DataOpRecord.MAX_ID`.") return DataOpRecord._ID
def split_flattened_input_ops(self, *ops, **kwarg_ops): """ Splits any FlattenedDataOp in *ops and **kwarg_ops into its SingleDataOps and collects them to be passed one by one through some graph_fn. If more than one FlattenedDataOp exists in *ops and **kwarg_ops, these must have the exact same keys. If `add_auto_key_as_first_param` is True: Add auto-key as very first parameter in each returned parameter tuple. Args: *ops (op): The primitive ops to split. **kwarg_ops (op): More primitive ops to split (but by named key). Returns: Union[FlattenedDataOp,Tuple[DataOp]]: The sorted parameter tuples (by flat-key) to use as api_methods in the calls to the graph_fn. If no FlattenedDataOp is in ops, returns ops as-is. Raises: RLGraphError: If there are more than 1 flattened ops in ops and their keys don't match 100%. """ assert all(op is not None for op in ops) # just make sure # Collect FlattenedDataOp for checking their keys (must match). flattened = [] for op in ops: if isinstance(op, dict) and (len(op) > 1 or "" not in op): flattened.append(op) # If it's more than 1, make sure they match. If they don't match: raise Error. if len(flattened) > 1: # Loop through the non-first ones and make sure all keys match vs the first one. lead_arg_dict = flattened[0] for other in flattened[1:]: other_arg_iter = iter(other) for key in lead_arg_dict.keys(): k_other = next(other_arg_iter) if key != k_other: # or get_shape(v_other) != get_shape(value): raise RLGraphError("ERROR: Flattened ops have a key mismatch ({} vs {})!".format(key, k_other)) # We have one or many (matching) ContainerDataOps: Split the calls. if len(flattened) > 0: # The first op that is a FlattenedDataOp. guide_op = next(op for op in ops if len(op) > 1 or "" not in op) # Re-create our iterators. collected_call_params = FlattenedDataOp() # Do the single split calls to our computation func. for key in guide_op.keys(): # Prep input params for a single call. params = [key] if self.add_auto_key_as_first_param is True else [] kwargs = {} for op in ops: # Check first, do not try to check key into tensor (not iterable): if isinstance(op, dict): params.append(op[key] if key in op else op[""]) else: # E.g. tuple args. params.append(op) # Add kwarg_ops. for kwarg_key, kwarg_op in kwarg_ops.items(): kwargs[kwarg_key] = kwarg_ops[kwarg_key][key] \ if key in kwarg_ops[kwarg_key] else kwarg_ops[kwarg_key][""] # Now do the single call. collected_call_params[key] = (params, kwargs) return collected_call_params # We don't have any container ops: No splitting possible. Return args and kwargs as is. else: params = [""] if self.add_auto_key_as_first_param is True else [] params += [op[""] if isinstance(op, dict) else op for op in ops] return tuple(params), {key: value[""] for key, value in kwarg_ops.items()}
def define_api_method(component, api_method_record, copy_record=True): """ Registers an API-method with a Component instance. Args: component (Component): The Component object to register the API method with. api_method_record (APIMethodRecord): The APIMethodRecord describing the to-be-registered API-method. copy_record (bool): Whether to deepcopy the APIMethodRecord prior to handing it to the Component for storing. """ # Deep copy the record (in case this got registered the normal way with via decorating a class method). if copy_record: api_method_record = copy.deepcopy(api_method_record) api_method_record.component = component # Raise errors if `name` already taken in this Component. if not api_method_record.ok_to_overwrite: # There already is an API-method with that name. if api_method_record.name in component.api_methods: raise RLGraphError( "API-method with name '{}' already defined!".format( api_method_record.name)) # There already is another object property with that name (avoid accidental overriding). elif not api_method_record.is_class_method and getattr( component, api_method_record.name, None) is not None: raise RLGraphError( "Component '{}' already has a property called '{}'. Cannot define an API-method with " "the same name!".format(component.name, api_method_record.name)) # Do not build this API as per ctor instructions. if api_method_record.name in component.switched_off_apis: return component.synthetic_methods.add(api_method_record.name) setattr( component, api_method_record.name, api_method_record.wrapper_func.__get__(component, component.__class__)) setattr(api_method_record.wrapper_func, "__name__", api_method_record.name) component.api_methods[api_method_record.name] = api_method_record # Direct callable for eager/define by run. component.api_fn_by_name[ api_method_record.name] = api_method_record.wrapper_func # Update the api_method_inputs dict (with empty Spaces if not defined yet). skip_args = 1 # self skip_args += (api_method_record.is_graph_fn_wrapper and api_method_record.add_auto_key_as_first_param) param_list = list( inspect.signature( api_method_record.func).parameters.values())[skip_args:] for param in param_list: component.api_methods[api_method_record.name].input_names.append( param.name) if param.name not in component.api_method_inputs: # This param has a default value. if param.default != inspect.Parameter.empty: # Default is None. Set to "flex" (to signal that this Space is not needed for input-completeness) # and wait for first call using this parameter (only then set it to that Space). if param.default is None: component.api_method_inputs[param.name] = "flex" # Default is some python value (e.g. a bool). Use that are the assigned Space. else: space = get_space_from_op(param.default) component.api_method_inputs[param.name] = space # This param is an *args param. Store as "*flex". Then with upcoming API calls, we determine the Spaces # for the single items in *args and set them under "param[0]", "param[1]", etc.. elif param.kind == inspect.Parameter.VAR_POSITIONAL: component.api_method_inputs[param.name] = "*flex" # This param is a **kwargs param. Store as "**flex". Then with upcoming API calls, we determine the Spaces # for the single items in **kwargs and set them under "param[some-key]", "param[some-other-key]", etc.. elif param.kind == inspect.Parameter.VAR_KEYWORD: component.api_method_inputs[param.name] = "**flex" # Normal POSITIONAL_ONLY parameter. Store as None (needed) for now. else: component.api_method_inputs[param.name] = None
def __init__(self, *input_names, **kwargs): raise RLGraphError( "DictMerger component is no longer supported! Please use ContainerMerger (same API) instead." )