def __str__(self): result = [] for network in range(self.num_networks): network_structure = [] # embedder for embedder in self.input_embedders: network_structure.append("Input Embedder: {}".format( embedder.name)) network_structure.append(indent_string(str(embedder))) if len(self.input_embedders) > 1: network_structure.append("{} ({})".format( self.network_parameters.embedding_merger_type.name, ", ".join([ "{} embedding".format(e.name) for e in self.input_embedders ]))) # middleware network_structure.append("Middleware:") network_structure.append(indent_string(str(self.middleware))) # head if self.network_parameters.use_separate_networks_per_head: heads = range(network, network + 1) else: heads = range(0, len(self.output_heads)) for head_idx in heads: head = self.output_heads[head_idx] head_params = self.network_parameters.heads_parameters[ head_idx] if head_params.num_output_head_copies > 1: network_structure.append( "Output Head: {} (num copies = {})".format( head.name, head_params.num_output_head_copies)) else: network_structure.append("Output Head: {}".format( head.name)) network_structure.append(indent_string(str(head))) # finalize network if self.num_networks > 1: result.append("Sub-network for head: {}".format( self.output_heads[network].name)) result.append(indent_string('\n'.join(network_structure))) else: result.append('\n'.join(network_structure)) result = '\n'.join(result) return result
def __str__(self): action_spaces = [self.spaces.action] if isinstance(self.spaces.action, CompoundActionSpace): action_spaces = self.spaces.action.sub_action_spaces result = [] for action_space_idx, action_space in enumerate(action_spaces): action_head_mean_result = [] if isinstance(action_space, DiscreteActionSpace): # create a discrete action network (softmax probabilities output) action_head_mean_result.append( "Dense (num outputs = {})".format(len( action_space.actions))) action_head_mean_result.append("Softmax") elif isinstance(action_space, BoxActionSpace): # create a continuous action network (bounded mean and stdev outputs) action_head_mean_result.append( "Dense (num outputs = {})".format(action_space.shape)) if np.all(action_space.max_abs_range < np.inf): # bounded actions action_head_mean_result.append( "Activation (type = {})".format( self.activation_function.__name__)) action_head_mean_result.append( "Multiply (factor = {})".format( action_space.max_abs_range)) action_head_stdev_result = [] if isinstance(self.exploration_policy, ContinuousEntropyParameters): action_head_stdev_result.append( "Dense (num outputs = {})".format(action_space.shape)) action_head_stdev_result.append("Softplus") action_head_result = [] if action_head_stdev_result: action_head_result.append("Mean Stream") action_head_result.append( indent_string('\n'.join(action_head_mean_result))) action_head_result.append("Stdev Stream") action_head_result.append( indent_string('\n'.join(action_head_stdev_result))) else: action_head_result.append('\n'.join(action_head_mean_result)) if len(action_spaces) > 1: result.append("Action head {}".format(action_space_idx)) result.append(indent_string('\n'.join(action_head_result))) else: result.append('\n'.join(action_head_result)) return '\n'.join(result)