def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = model_config.get("fcnet_activation") hiddens = model_config.get("fcnet_hiddens", []) no_final_linear = model_config.get("no_final_linear") self.vf_share_layers = model_config.get("vf_share_layers") self.free_log_std = model_config.get("free_log_std") # Generate free-floating bias variables for the second half of # the outputs. if self.free_log_std: assert num_outputs % 2 == 0, ( "num_outputs must be divisible by two", num_outputs) num_outputs = num_outputs // 2 layers = [] prev_layer_size = int(np.product(obs_space.shape)) self._logits = None # Create layers 0 to second-last. for size in hiddens[:-1]: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = size # The last layer is adjusted to be of size num_outputs, but it's a # layer with activation. if no_final_linear and num_outputs: layers.append( SlimFC(in_size=prev_layer_size, out_size=num_outputs, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = num_outputs # Finish the layers with the provided sizes (`hiddens`), plus - # iff num_outputs > 0 - a last linear layer of size num_outputs. else: if len(hiddens) > 0: layers.append( SlimFC(in_size=prev_layer_size, out_size=hiddens[-1], initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = hiddens[-1] if num_outputs: self._logits = SlimFC(in_size=prev_layer_size, out_size=num_outputs, initializer=normc_initializer(0.01), activation_fn=None) else: self.num_outputs = ([int(np.product(obs_space.shape))] + hiddens[-1:])[-1] # Layer to add the log std vars to the state-dependent means. if self.free_log_std and self._logits: self._append_free_log_std = AppendBiasLayer(num_outputs) self._hidden_layers = nn.Sequential(*layers) self._value_branch_separate = None if not self.vf_share_layers: # Build a parallel set of hidden layers for the value net. prev_vf_layer_size = int(np.product(obs_space.shape)) vf_layers = [] for size in hiddens: vf_layers.append( SlimFC(in_size=prev_vf_layer_size, out_size=size, activation_fn=activation, initializer=normc_initializer(1.0))) prev_vf_layer_size = size self._value_branch_separate = nn.Sequential(*vf_layers) self._value_branch = SlimFC(in_size=prev_layer_size, out_size=1, initializer=normc_initializer(1.0), activation_fn=None) # Holds the current "base" output (before logits layer). self._features = None # Holds the last input, in case value branch is separate. self._last_flat_in = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name, num_decompose=2): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = get_activation_fn(model_config.get("fcnet_activation"), framework="torch") hiddens = model_config.get("fcnet_hiddens") no_final_linear = model_config.get("no_final_linear") self.vf_share_layers = model_config.get("vf_share_layers") self.free_log_std = model_config.get("free_log_std") self.num_decompose = num_decompose # Generate free-floating bias variables for the second half of # the outputs. if self.free_log_std: assert num_outputs % 2 == 0, ( "num_outputs must be divisible by two", num_outputs) num_outputs = num_outputs // 2 layers = [] prev_layer_size = int(np.product(obs_space.shape)) self._logits = None # Create layers 0 to second-last. for size in hiddens[:-1]: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = size # The last layer is adjusted to be of size num_outputs, but it's a # layer with activation. if no_final_linear and num_outputs: layers.append( SlimFC(in_size=prev_layer_size, out_size=num_outputs, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = num_outputs # Finish the layers with the provided sizes (`hiddens`), plus - # iff num_outputs > 0 - a last linear layer of size num_outputs. else: if len(hiddens) > 0: layers.append( SlimFC(in_size=prev_layer_size, out_size=hiddens[-1], initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = hiddens[-1] if num_outputs: # self._logits = torch.nn.ModuleList([ # torch.nn.Sequential( # SlimFC( # in_size=prev_layer_size, # out_size=256, # initializer=normc_initializer(1.0), # activation_fn=activation), # SlimFC( # in_size=256, # out_size=num_outputs, # initializer=normc_initializer(1.0), # activation_fn=None), # ) for i in range(self.num_decompose)]) # self._logits = torch.nn.ModuleList([ # torch.nn.Sequential( # torch.nn.Linear(prev_layer_size, 256), # torch.nn.ReLU(), # torch.nn.Linear(256, num_outputs), # ) for i in range(self.num_decompose)]) self._logits = SlimFC(in_size=prev_layer_size, out_size=num_outputs * self.num_decompose, initializer=normc_initializer(0.01), activation_fn=None) else: raise ValueError("No num_outputs") # Layer to add the log std vars to the state-dependent means. if self.free_log_std and self._logits: self._append_free_log_std = AppendBiasLayer(num_outputs) self._hidden_layers = nn.Sequential(*layers) self._value_branch_separate = None if not self.vf_share_layers: # Build a parallel set of hidden layers for the value net. prev_vf_layer_size = int(np.product(obs_space.shape)) self._value_branch_separate = [] for size in hiddens: self._value_branch_separate.append( SlimFC(in_size=prev_vf_layer_size, out_size=size, activation_fn=activation, initializer=normc_initializer(1.0))) prev_vf_layer_size = size self._value_branch_separate = nn.Sequential( *self._value_branch_separate) self._value_branch = SlimFC(in_size=prev_layer_size, out_size=self.num_decompose, initializer=normc_initializer(1.0), activation_fn=None) # Holds the current "base" output (before logits layer). self._features = None # Holds the last input, in case value branch is separate. self._last_flat_in = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = get_activation_fn(model_config.get("fcnet_activation"), framework="torch") hiddens = model_config.get("fcnet_hiddens") no_final_linear = model_config.get("no_final_linear") self.free_log_std = model_config.get("free_log_std") # TODO(sven): implement case: vf_shared_layers = False. # vf_share_layers = model_config.get("vf_share_layers") logger.debug("Constructing fcnet {} {}".format(hiddens, activation)) layers = [] prev_layer_size = int(np.product(obs_space.shape)) self._logits = None # Maybe generate free-floating bias variables for the second half of # the outputs. if self.free_log_std: assert num_outputs % 2 == 0, ( "num_outputs must be divisible by two", num_outputs) num_outputs = num_outputs // 2 # Create layers 0 to second-last. for size in hiddens[:-1]: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = size # The last layer is adjusted to be of size num_outputs, but it's a # layer with activation. if no_final_linear and num_outputs: layers.append( SlimFC(in_size=prev_layer_size, out_size=num_outputs, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = num_outputs # Finish the layers with the provided sizes (`hiddens`), plus - # iff num_outputs > 0 - a last linear layer of size num_outputs. else: if len(hiddens) > 0: layers.append( SlimFC(in_size=prev_layer_size, out_size=hiddens[-1], initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = hiddens[-1] if num_outputs: self._logits = SlimFC(in_size=prev_layer_size, out_size=num_outputs, initializer=normc_initializer(0.01), activation_fn=None) else: self.num_outputs = ([np.product(obs_space.shape)] + hiddens[-1:-1])[-1] # Layer to add the log std vars to the state-dependent means. if self.free_log_std: self._append_free_log_std = AppendBiasLayer(num_outputs) self._hidden_layers = nn.Sequential(*layers) # TODO(sven): Implement non-shared value branch. self._value_branch = SlimFC(in_size=prev_layer_size, out_size=1, initializer=normc_initializer(1.0), activation_fn=None) # Holds the current "base" output (before logits layer). self._features = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name, control_input_size, control_hidden_size, interaction_hidden_size): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) no_final_linear = model_config.get( "no_final_linear") # TODO Handle no_final_linear assert (not no_final_linear, "Not Implemented yet bro") self.vf_share_layers = model_config.get("vf_share_layers") self.vf_hiddens = model_config.get("vf_hiddens", [10, 10]) self.free_log_std = model_config.get("free_log_std") self.control_input_size = control_input_size self.interaction_input_size = 2 assert (np.product(obs_space.shape) == self.control_input_size + self.interaction_input_size, "Wrong size of obs space") control_hidden_size = control_hidden_size interaction_hidden_size = interaction_hidden_size activation = get_activation_fn(model_config.get("fcnet_activation"), framework="torch") # Are the std required as output for the action self.std = ((num_outputs / 2) == np.product(action_space.shape)) # Are the log_std varies with state or not if self.free_log_std: assert num_outputs % 2 == 0, ( "num_outputs must be divisible by two", num_outputs) num_outputs = num_outputs // 2 self._logits = None # Output of the network, called logits for consistency with the rest of RLlib # Build the Negotiate model self.linear_1 = SlimFC(self.control_input_size, control_hidden_size, initializer=normc_initializer(1.0), activation_fn=activation) self.linear_2_mean = SlimFC(control_hidden_size, 2, initializer=normc_initializer(0.01), activation_fn=None) self.linear_accept_1 = SlimFC(self.interaction_input_size, interaction_hidden_size, initializer=normc_initializer(1.0), activation_fn=activation) self.linear_accept_2_mean = SlimFC(interaction_hidden_size, 1, initializer=normc_initializer(0.01), activation_fn=None) self.control = nn.Sequential(self.linear_1, self.linear_2_mean) self.interaction = nn.Sequential(self.linear_accept_1, self.linear_accept_2_mean) self.linear_coop_mean = AppendBiasLayer(1) if self.std: if not self.free_log_std: self.linear_2_std = SlimFC(control_hidden_size, 2, initializer=normc_initializer(0.01), activation_fn=None) self.linear_accept_2_std = SlimFC( interaction_hidden_size, 1, initializer=normc_initializer(0.01), activation_fn=None) self.linear_coop_std = AppendBiasLayer(1) self.control_std = nn.Sequential(self.linear_1, self.linear_2_std) self.interaction_std = nn.Sequential(self.linear_accept_1, self.linear_accept_2_std) self.coop_std = AppendBiasLayer(1) else: self._append_free_log_std = AppendBiasLayer(num_outputs) # value function self._value_branch_separate = None if not self.vf_share_layers: # Build a parallel set of hidden layers for the value net. prev_vf_layer_size = int(np.product(obs_space.shape)) vf_layers = [] for size in self.vf_hiddens: vf_layers.append( SlimFC(in_size=prev_vf_layer_size, out_size=size, activation_fn=activation, initializer=normc_initializer(1.0))) prev_vf_layer_size = size prev_layer_size = prev_vf_layer_size self._value_branch_separate = nn.Sequential(*vf_layers) else: raise NotImplemented() self._value_branch = SlimFC(in_size=prev_layer_size, out_size=1, initializer=normc_initializer(1.0), activation_fn=None) self._value_module = nn.Sequential(self._value_branch_separate, self._value_branch) # Holds the current "base" output (before logits layer). self._features = None # Holds the last input, in case value branch is separate. self._last_flat_in = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = model_config.get("fcnet_activation") hiddens = model_config.get("fcnet_hiddens") no_final_linear = model_config.get("no_final_linear") self.vf_share_layers = model_config.get("vf_share_layers") self.free_log_std = model_config.get("free_log_std") # Generate free-floating bias variables for the second half of # the outputs. if self.free_log_std: assert num_outputs % 2 == 0, ( "num_outputs must be divisible by two", num_outputs) num_outputs = num_outputs // 2 layers = [] prev_layer_size = int(np.product(obs_space.shape)) self._logits = None # Create layers 0 to second-last. for size in hiddens: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = size if num_outputs: self._logits = MultiActionFC(in_size=prev_layer_size, out_size=num_outputs, out_lens=[3, 10], at_hiddens=[32, 3], ap_hiddens=[32, 10], initializer=normc_initializer(0.01), activation=activation) else: self.num_outputs = ([int(np.product(obs_space.shape))] + hiddens[-1:])[-1] # Layer to add the log std vars to the state-dependent means. if self.free_log_std and self._logits: self._append_free_log_std = AppendBiasLayer(num_outputs) self._hidden_layers = nn.Sequential(*layers) self._value_branch_separate = None if not self.vf_share_layers: # Build a parallel set of hidden layers for the value net. prev_vf_layer_size = int(np.product(obs_space.shape)) vf_layers = [] for size in hiddens: vf_layers.append( SlimFC(in_size=prev_vf_layer_size, out_size=size, activation_fn=activation, initializer=normc_initializer(1.0))) prev_vf_layer_size = size self._value_branch_separate = nn.Sequential(*vf_layers) self._value_branch = SlimFC(in_size=prev_layer_size, out_size=1, initializer=normc_initializer(1.0), activation_fn=None) # Holds the current "base" output (before logits layer). self._features = None # Holds the last input, in case value branch is separate. self._last_flat_in = None