def __init__(self, obs_space, action_space, num_outputs, model_config, name): num_options = model_config.get('oc_num_options') TorchModelV2.__init__(self, obs_space, action_space, num_outputs * num_options, model_config, name) nn.Module.__init__(self) activation = get_activation_fn(model_config.get("fcnet_activation"), framework="torch") hiddens = model_config.get("fcnet_hiddens") self.option_epsilon = model_config.get('oc_option_epsilon') layers = [] prev_layer_size = int(np.product(obs_space.shape)) # Create layers for size in hiddens: layers.append(nn.Linear(prev_layer_size, size)) layers.append(activation) prev_layer_size = size self._body = nn.Sequential(*layers) self.q = nn.Linear(prev_layer_size, num_options) # Value for each option self.pi = nn.Sequential( nn.Linear(prev_layer_size, num_options * num_outputs), View((num_options, num_outputs)), nn.Softmax(dim=-1)) # Action probabilities for each option self.beta = nn.Sequential(nn.Linear(prev_layer_size, num_options), nn.Sigmoid) # Termination probabilities # Holds the current "base" output (before logits layer). self._features = self._q = self._v = self._pi = self._beta = None # Holds the last input, in case value branch is separate. self._last_flat_in = None
def build_q_net(name_): activation = get_activation_fn( critic_hidden_activation, framework="torch") # For continuous actions: Feed obs and actions (concatenated) # through the NN. For discrete actions, only obs. q_net = nn.Sequential() ins = self.obs_ins + self.action_dim for i, n in enumerate(critic_hiddens): q_net.add_module( "{}_hidden_{}".format(name_, i), SlimFC( ins, n, initializer=torch.nn.init.xavier_uniform_, activation_fn=activation)) ins = n q_net.add_module( "{}_out".format(name_), SlimFC( ins, 1, initializer=torch.nn.init.xavier_uniform_, activation_fn=None)) return q_net
def _build_value_model(self, model_config: ModelConfigDict): """Build value model with given model configuration model_config = {'activation': str, 'hiddens': Sequence} """ activation = get_activation_fn(model_config.get("activation")) hiddens = model_config.get("hiddens", []) inputs = tf.keras.layers.Input( shape=(np.product(self.critic_preprocessor.shape),), name="value-inputs" ) last_layer = inputs for i, size in enumerate(hiddens): last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0), )(last_layer) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01), )(last_layer) return tf.keras.Model(inputs, [value_out])
def __init__(self, in_size: int, out_size: int, initializer: Any = None, activation_fn: Any = None, use_bias: bool = True, bias_init: float = 0.0): """Creates a standard FC layer, similar to torch.nn.Linear Args: in_size(int): Input size for FC Layer out_size (int): Output size for FC Layer initializer (Any): Initializer function for FC layer weights activation_fn (Any): Activation function at the end of layer use_bias (bool): Whether to add bias weights or not bias_init (float): Initalize bias weights to bias_init const """ super(SlimFC, self).__init__() layers = [] # Actual Conv2D layer (including correct initialization logic). linear = nn.Linear(in_size, out_size, bias=use_bias) if initializer: initializer(linear.weight) if use_bias is True: nn.init.constant_(linear.bias, bias_init) layers.append(linear) # Activation function (if any; default=None (linear)). if isinstance(activation_fn, str): activation_fn = get_activation_fn(activation_fn, "torch") if activation_fn is not None: layers.append(activation_fn()) # Put everything in sequence. self._model = nn.Sequential(*layers)
def feed_forward(self, obs, policy_vars, policy_config): # Hacky for now, reconstruct FC network with adapted weights # @mluo: TODO for any network def fc_network(inp, network_vars, hidden_nonlinearity, output_nonlinearity, policy_config, hiddens_name, logits_name): x = inp hidden_w = [] logits_w = [] for name, w in network_vars.items(): if hiddens_name in name: hidden_w.append(w) elif logits_name in name: logits_w.append(w) else: raise NameError assert len(hidden_w) % 2 == 0 and len(logits_w) == 2 while len(hidden_w) != 0: x = nn.functional.linear(x, hidden_w.pop(0), hidden_w.pop(0)) x = hidden_nonlinearity()(x) x = nn.functional.linear(x, logits_w.pop(0), logits_w.pop(0)) x = output_nonlinearity()(x) return x policyn_vars = {} valuen_vars = {} log_std = None for name, param in policy_vars.items(): if "value" in name: valuen_vars[name] = param elif "log_std" in name: log_std = param else: policyn_vars[name] = param output_nonlinearity = nn.Identity hidden_nonlinearity = get_activation_fn( policy_config["fcnet_activation"], framework="torch") pi_new_logits = fc_network(obs, policyn_vars, hidden_nonlinearity, output_nonlinearity, policy_config, "hidden_layers", "logits") if log_std is not None: pi_new_logits = torch.cat([ pi_new_logits, log_std.unsqueeze(0).repeat([len(pi_new_logits), 1]) ], axis=1) value_fn = fc_network(obs, valuen_vars, hidden_nonlinearity, output_nonlinearity, policy_config, "value_branch_separate", "value_branch") return pi_new_logits, torch.squeeze(value_fn)
def build_q_net(name_): act = get_activation_fn(critic_hidden_activation, framework="torch") init = nn.init.xavier_uniform_ # For discrete actions, only obs. q_net = nn.Sequential() ins = embed_dim # embed to encoder embed outs = self.critic_encoder.feature_dim q_net.add_module( "{}_hidden_{}".format(name_, "e"), SlimFC(ins, outs, initializer=init, activation_fn=act)) ins = outs for i, n in enumerate(critic_hiddens): q_net.add_module( "{}_hidden_{}".format(name_, i), SlimFC(ins, n, initializer=init, activation_fn=act)) ins = n q_net.add_module( "{}_out".format(name_), SlimFC(ins, q_outs, initializer=init, activation_fn=None)) return q_net
def create_inverse_model(self, model_config, encoder): """ Create the inverse submodel of the SCM. Inputs:[Encoded state at t, Encoded state at t - 1, Actions at t - 1, MOA LSTM output at t - 1] Output: Predicted social influence reward at t - 1 :param model_config: The model config dict. :param encoder: The SCM encoder submodel. :return: A new inverse model. """ encoder_output_size = encoder.output_shape[-1] inputs = [ self.create_encoded_input_layer(encoder_output_size, "encoded_input_now"), self.create_encoded_input_layer(encoder_output_size, "encoded_input_next"), self.create_action_input_layer(self.action_space.n, self.num_other_agents + 1), self.create_lstm_input_layer(model_config), ] inputs_concatenated = tf.keras.layers.concatenate(inputs) activation = get_activation_fn(model_config.get("fcnet_activation")) fc_layer = tf.keras.layers.Dense( 32, name="fc_forward", activation=activation, kernel_initializer=normc_initializer(1.0), )(inputs_concatenated) output_layer = tf.keras.layers.Dense( 1, activation="relu", kernel_initializer=normc_initializer(1.0), )(fc_layer) return tf.keras.Model(inputs, output_layer, name="SCM_Inverse_Model")
def create_scm_encoder_model(obs_space, model_config): """ Create the encoder submodel, which is part of the SCM. :param obs_space: A single agent's observation space. :param model_config: The model config dict. :return: A new encoder model. """ original_obs_dims = obs_space.original_space.spaces["curr_obs"].shape input_layer = tf.keras.layers.Input(original_obs_dims, name="observations", dtype=tf.uint8) # Divide by 255 to transform [0,255] uint8 rgb pixel values to [0,1] float32. last_layer = tf.keras.backend.cast(input_layer, tf.float32) last_layer = tf.math.divide(last_layer, 255.0) activation = get_activation_fn(model_config.get("conv_activation")) filters = model_config.get("conv_filters") out_size, kernel, stride = filters[-1] conv_out = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="valid", name="conv_scm_encoder", )(last_layer) flattened_conv_out = tf.keras.layers.Flatten()(conv_out) return tf.keras.Model(input_layer, flattened_conv_out, name="SCM_Encoder_Model")
def __init__(self, size_in, size_out, hiddens, activations, init_weights, append_log_std=False, log_std_type='constant', sample_std=1.0): super().__init__() layers = [] prev_layer_size = size_in for i, size_hidden in enumerate(hiddens + [size_out]): layers.append( SlimFC(in_size=prev_layer_size, out_size=size_hidden, initializer=normc_initializer(init_weights[i]), activation_fn=get_activation_fn(activations[i], framework="torch"))) prev_layer_size = size_hidden if append_log_std: layers.append( AppendLogStd(type=log_std_type, init_val=np.log(sample_std), dim=size_out)) self._model = nn.Sequential(*layers)
def __init__( self, in_channels, out_channels, kernel, stride, padding, # Defaulting these to nn.[..] will break soft torch import. initializer="default", activation_fn="default", bias_init=0): super(SlimConv2d, self).__init__() layers = [] # Padding layer. if padding: layers.append(nn.ZeroPad2d(padding)) # Actual Conv2D layer (including correct initialization logic). conv = nn.Conv2d(in_channels, out_channels, kernel, stride) if initializer: if initializer == "default": initializer = nn.init.xavier_uniform_ initializer(conv.weight) nn.init.constant_(conv.bias, bias_init) layers.append(conv) # Activation function (if any; default=ReLu). if isinstance(activation_fn, str): if activation_fn == "default": activation_fn = nn.ReLU else: activation_fn = get_activation_fn(activation_fn, "torch") if activation_fn is not None: layers.append(activation_fn()) # Put everything in sequence. self._model = nn.Sequential(*layers)
def __init__(self, *, input_size: int, filters: Tuple[Tuple[int]] = ((1024, 5, 2), (128, 5, 2), (64, 6, 2), (32, 6, 2)), initializer="default", bias_init=0, activation_fn: str = "relu", output_shape: Tuple[int] = (3, 64, 64)): """Initializes a TransposedConv2DStack instance. Args: input_size (int): The size of the 1D input vector, from which to generate the image distribution. filters (Tuple[Tuple[int]]): Tuple of filter setups (1 for each ConvTranspose2D layer): [in_channels, kernel, stride]. initializer (Union[str]): bias_init (float): The initial bias values to use. activation_fn (str): Activation function descriptor (str). output_shape (Tuple[int]): Shape of the final output image. """ super().__init__() self.activation = get_activation_fn(activation_fn, framework="torch") self.output_shape = output_shape initializer = get_initializer(initializer, framework="torch") in_channels = filters[0][0] self.layers = [ # Map from 1D-input vector to correct initial size for the # Conv2DTransposed stack. nn.Linear(input_size, in_channels), # Reshape from the incoming 1D vector (input_size) to 1x1 image # format (channels first). Reshape([-1, in_channels, 1, 1]), ] for i, (_, kernel, stride) in enumerate(filters): out_channels = filters[i + 1][0] if i < len(filters) - 1 else \ output_shape[0] conv_transp = nn.ConvTranspose2d(in_channels, out_channels, kernel, stride) # Apply initializer. initializer(conv_transp.weight) nn.init.constant_(conv_transp.bias, bias_init) self.layers.append(conv_transp) # Apply activation function, if provided and if not last layer. if self.activation is not None and i < len(filters) - 1: self.layers.append(self.activation()) # num-outputs == num-inputs for next layer. in_channels = out_channels self._model = nn.Sequential(*self.layers)
def _build_layers_v2(self, input_dict, num_outputs, options): # Hard deprecate this class. All Models should use the ModelV2 # API from here on. deprecation_warning( "Model->VisionNetwork", "ModelV2->VisionNetwork", error=False) inputs = input_dict["obs"] filters = options.get("conv_filters") if not filters: filters = _get_filter_config(inputs.shape.as_list()[1:]) activation = get_activation_fn(options.get("conv_activation")) with tf.name_scope("vision_net"): for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): inputs = tf.layers.conv2d( inputs, out_size, kernel, stride, activation=activation, padding="same", name="conv{}".format(i)) out_size, kernel, stride = filters[-1] # skip final linear layer if options.get("no_final_linear"): fc_out = tf.layers.conv2d( inputs, num_outputs, kernel, stride, activation=activation, padding="valid", name="fc_out") return flatten(fc_out), flatten(fc_out) fc1 = tf.layers.conv2d( inputs, out_size, kernel, stride, activation=activation, padding="valid", name="fc1") fc2 = tf.layers.conv2d( fc1, num_outputs, [1, 1], activation=None, padding="same", name="fc2") return flatten(fc2), flatten(fc1)
def feed_forward(self, obs, policy_vars, policy_config): # Hacky for now, reconstruct FC network with adapted weights # @mluo: TODO for any network def fc_network(inp, network_vars, hidden_nonlinearity, output_nonlinearity, policy_config): bias_added = False x = inp for name, param in network_vars.items(): if "kernel" in name: x = tf.matmul(x, param) elif "bias" in name: x = tf.add(x, param) bias_added = True else: raise NameError if bias_added: if "out" not in name: x = hidden_nonlinearity(x) elif "out" in name: x = output_nonlinearity(x) else: raise NameError bias_added = False return x policyn_vars = {} valuen_vars = {} log_std = None for name, param in policy_vars.items(): if "value" in name: valuen_vars[name] = param elif "log_std" in name: log_std = param else: policyn_vars[name] = param output_nonlinearity = tf.identity hidden_nonlinearity = get_activation_fn( policy_config["fcnet_activation"]) pi_new_logits = fc_network(obs, policyn_vars, hidden_nonlinearity, output_nonlinearity, policy_config) if log_std is not None: pi_new_logits = tf.concat( [pi_new_logits, 0.0 * pi_new_logits + log_std], 1) value_fn = fc_network(obs, valuen_vars, hidden_nonlinearity, output_nonlinearity, policy_config) return pi_new_logits, tf.reshape(value_fn, [-1])
def __init__( self, in_channels: int, out_channels: int, kernel: Union[int, Tuple[int, int]], stride: Union[int, Tuple[int, int]], padding: Union[int, Tuple[int, int]], # Defaulting these to nn.[..] will break soft torch import. initializer: Any = "default", activation_fn: Any = "default", bias_init: float = 0): """Creates a standard Conv2d layer, similar to torch.nn.Conv2d Args: in_channels(int): Number of input channels out_channels (int): Number of output channels kernel (Union[int, Tuple[int, int]]): If int, the kernel is a tuple(x,x). Elsewise, the tuple can be specified stride (Union[int, Tuple[int, int]]): Controls the stride for the cross-correlation. If int, the stride is a tuple(x,x). Elsewise, the tuple can be specified padding (Union[int, Tuple[int, int]]): Controls the amount of implicit zero-paddings during the conv operation initializer (Any): Initializer function for kernel weights activation_fn (Any): Activation function at the end of layer bias_init (float): Initalize bias weights to bias_init const """ super(SlimConv2d, self).__init__() layers = [] # Padding layer. if padding: layers.append(nn.ZeroPad2d(padding)) # Actual Conv2D layer (including correct initialization logic). conv = nn.Conv2d(in_channels, out_channels, kernel, stride) if initializer: if initializer == "default": initializer = nn.init.xavier_uniform_ initializer(conv.weight) nn.init.constant_(conv.bias, bias_init) layers.append(conv) # Activation function (if any; default=ReLu). if isinstance(activation_fn, str): if activation_fn == "default": activation_fn = nn.ReLU else: activation_fn = get_activation_fn(activation_fn, "torch") if activation_fn is not None: layers.append(activation_fn()) # Put everything in sequence. self._model = nn.Sequential(*layers)
def __init__(self, in_size, out_size, sigma0, activation="relu"): """Initializes a NoisyLayer object. Args: in_size: out_size: sigma0: non_linear: """ super().__init__() self.in_size = in_size self.out_size = out_size self.sigma0 = sigma0 self.activation = get_activation_fn(activation, framework="torch") if self.activation is not None: self.activation = self.activation() self.sigma_w = get_variable( np.random.uniform( low=-1.0 / np.sqrt(float(self.in_size)), high=1.0 / np.sqrt(float(self.in_size)), size=[self.in_size, out_size]), framework="torch", dtype=torch.float32, torch_tensor=True, trainable=True) self.sigma_b = get_variable( np.full( shape=[out_size], fill_value=sigma0 / np.sqrt(float(self.in_size))), framework="torch", dtype=torch.float32, torch_tensor=True, trainable=True) self.w = get_variable( np.full( shape=[self.in_size, self.out_size], fill_value=6 / np.sqrt(float(in_size) + float(out_size))), framework="torch", dtype=torch.float32, torch_tensor=True, trainable=True) self.b = get_variable( np.zeros([out_size]), framework="torch", dtype=torch.float32, torch_tensor=True, trainable=True)
def _build_layers(self, inputs, num_outputs, options): """Process the flattened inputs. Note that dict inputs will be flattened into a vector. To define a model that processes the components separately, use _build_layers_v2(). """ # Soft deprecate this class. All Models should use the ModelV2 # API from here on. deprecation_warning("Model->FullyConnectedNetwork", "ModelV2->FullyConnectedNetwork", error=False) hiddens = options.get("fcnet_hiddens") activation = get_activation_fn(options.get("fcnet_activation")) if len(inputs.shape) > 2: inputs = tf.layers.flatten(inputs) with tf.name_scope("fc_net"): i = 1 last_layer = inputs for size in hiddens: # skip final linear layer if options.get("no_final_linear") and i == len(hiddens): output = tf.layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(1.0), activation=activation, name="fc_out") return output, output label = "fc{}".format(i) last_layer = tf.layers.dense( last_layer, size, kernel_initializer=normc_initializer(1.0), activation=activation, name=label) i += 1 output = tf.layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(0.01), activation=None, name="fc_out") return output, last_layer
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = get_activation_fn( model_config.get("conv_activation"), framework="torch") filters = model_config.get("conv_filters") if not filters: filters = _get_filter_config(obs_space.shape) # no_final_linear = model_config.get("no_final_linear") # vf_share_layers = model_config.get("vf_share_layers") layers = [] (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = valid_padding(in_size, kernel, [stride, stride]) layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, padding, activation_fn=activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, activation_fn=activation)) self._convs = nn.Sequential(*layers) self._logits = SlimFC( out_channels, num_outputs, initializer=nn.init.xavier_uniform_) self._value_branch = SlimFC( out_channels, 1, initializer=normc_initializer()) # Holds the current "base" output (before logits layer). self._features = None
def _build_layers_v2(self, input_dict, num_outputs, options): inputs = input_dict["obs"] filters = options.get("conv_filters") if not filters: filters = _get_filter_config(inputs.shape.as_list()[1:]) activation = get_activation_fn(options.get("conv_activation")) with tf.name_scope("vision_net"): for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): inputs = tf.layers.conv2d( inputs, out_size, kernel, stride, activation=activation, padding="same", name="conv{}".format(i)) out_size, kernel, stride = filters[-1] # skip final linear layer if options.get("no_final_linear"): fc_out = tf.layers.conv2d( inputs, num_outputs, kernel, stride, activation=activation, padding="valid", name="fc_out") return flatten(fc_out), flatten(fc_out) fc1 = tf.layers.conv2d( inputs, out_size, kernel, stride, activation=activation, padding="valid", name="fc1") fc2 = tf.layers.conv2d( fc1, num_outputs, [1, 1], activation=None, padding="same", name="fc2") return flatten(fc2), flatten(fc1)
def __init__( self, in_size, out_size, initializer: Optional[Callable] = None, activation_fn: Optional[str] = None, use_bias: bool = True, prng_key: Optional[jax.random.PRNGKey] = None, name: Optional[str] = None, ): """Initializes a SlimFC instance. Args: in_size (int): The input size of the input data that will be passed into this layer. out_size (int): The number of nodes in this FC layer. initializer (flax.: activation_fn (str): An activation string specifier, e.g. "relu". use_bias (bool): Whether to add biases to the dot product or not. #bias_init (float): prng_key (Optional[jax.random.PRNGKey]): An optional PRNG key to use for initialization. If None, create a new random one. name (Optional[str]): An optional name for this layer. """ # By default, use Glorot unform initializer. if initializer is None: initializer = nn.initializers.xavier_uniform() self.prng_key = prng_key or jax.random.PRNGKey(int(time.time())) _, self.prng_key = jax.random.split(self.prng_key) # Create the flax dense layer. self._dense = nn.Dense( out_size, use_bias=use_bias, kernel_init=initializer, name=name, ) # Initialize it. dummy_in = jax.random.normal(self.prng_key, (in_size, ), dtype=np.float32) _, self.prng_key = jax.random.split(self.prng_key) self._params = self._dense.init(self.prng_key, dummy_in) # Activation function (if any; default=None (linear)). self.activation_fn = get_activation_fn(activation_fn, "jax")
def build_fc_layers(model_config, last_layer, name): """ Create a sequence of fully-connected (dense) layers. :param model_config: The config dict containing information on what fully-connected layers to create. :param last_layer: The layer that feeds into the fully connected layer(s) constructed here. :param name: The FC layer name. :return: The last constructed FC layer. """ hiddens = model_config.get("fcnet_hiddens") activation = get_activation_fn(model_config.get("fcnet_activation")) for i, size in enumerate(hiddens): last_layer = tf.keras.layers.Dense( size, name="fc_{}_{}".format(i + 1, name), activation=activation, kernel_initializer=normc_initializer(1.0), )(last_layer) return last_layer
def call(self, inputs): in_size = int(inputs.shape[1]) epsilon_in = tf.random.normal(shape=[in_size]) epsilon_out = tf.random.normal(shape=[self.out_size]) epsilon_in = self._f_epsilon(epsilon_in) epsilon_out = self._f_epsilon(epsilon_out) epsilon_w = tf.matmul( a=tf.expand_dims(epsilon_in, -1), b=tf.expand_dims(epsilon_out, 0)) epsilon_b = epsilon_out action_activation = tf.matmul( inputs, self.w + self.sigma_w * epsilon_w) + \ self.b + self.sigma_b * epsilon_b fn = get_activation_fn(self.activation, framework="tf") if fn is not None: action_activation = fn(action_activation) return action_activation
def __init__(self, in_size: int, out_size: int, sigma0: float, activation: str = "relu"): """Initializes a NoisyLayer object. Args: in_size: Input size for Noisy Layer out_size: Output size for Noisy Layer sigma0: Initialization value for sigma_b (bias noise) activation: Non-linear activation for Noisy Layer """ super().__init__() self.in_size = in_size self.out_size = out_size self.sigma0 = sigma0 self.activation = get_activation_fn(activation, framework="torch") if self.activation is not None: self.activation = self.activation() sigma_w = nn.Parameter( torch.from_numpy( np.random.uniform(low=-1.0 / np.sqrt(float(self.in_size)), high=1.0 / np.sqrt(float(self.in_size)), size=[self.in_size, out_size])).float()) self.register_parameter("sigma_w", sigma_w) sigma_b = nn.Parameter( torch.from_numpy( np.full(shape=[out_size], fill_value=sigma0 / np.sqrt(float(self.in_size)))).float()) self.register_parameter("sigma_b", sigma_b) w = nn.Parameter( torch.from_numpy( np.full(shape=[self.in_size, self.out_size], fill_value=6 / np.sqrt(float(in_size) + float(out_size)))).float()) self.register_parameter("w", w) b = nn.Parameter(torch.from_numpy(np.zeros([out_size])).float()) self.register_parameter("b", b)
def _build_layers(self, inputs, num_outputs, options): """Process the flattened inputs. Note that dict inputs will be flattened into a vector. To define a model that processes the components separately, use _build_layers_v2(). """ hiddens = options.get("fcnet_hiddens") activation = get_activation_fn(options.get("fcnet_activation")) if len(inputs.shape) > 2: inputs = tf.layers.flatten(inputs) with tf.name_scope("fc_net"): i = 1 last_layer = inputs for size in hiddens: # skip final linear layer if options.get("no_final_linear") and i == len(hiddens): output = tf.layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(1.0), activation=activation, name="fc_out") return output, output label = "fc{}".format(i) last_layer = tf.layers.dense( last_layer, size, kernel_initializer=normc_initializer(1.0), activation=activation, name=label) i += 1 output = tf.layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(0.01), activation=None, name="fc_out") return output, last_layer
def __init__(self, in_size, out_size, initializer=None, activation_fn=None, use_bias=True, bias_init=0.0): super(SlimFC, self).__init__() layers = [] linear = nn.Linear(in_size, out_size, bias=use_bias) if initializer: initializer(linear.weight) if use_bias is True: nn.init.constant_(linear.bias, bias_init) layers.append(linear) if isinstance(activation_fn, str): activation_fn = get_activation_fn(activation_fn, "torch") if activation_fn is not None: layers.append(activation_fn()) self._model = nn.Sequential(*layers)
def __init__(self, in_size, out_size, initializer=None, activation_fn=None, use_bias=True, bias_init=0.0): super(SlimFC, self).__init__() layers = [] # Actual Conv2D layer (including correct initialization logic). linear = nn.Linear(in_size, out_size, bias=use_bias) if initializer: initializer(linear.weight) if use_bias is True: nn.init.constant_(linear.bias, bias_init) layers.append(linear) # Activation function (if any; default=None (linear)). if isinstance(activation_fn, str): activation_fn = get_activation_fn(activation_fn, "torch") if activation_fn is not None: layers.append(activation_fn()) # Put everything in sequence. self._model = nn.Sequential(*layers)
def build_conv_layers(model_config, last_layer): """ Create a sequence of convoluational layers. :param model_config: The config dict containing information on what convolutional layers to create. :param last_layer: The layer that feeds into the convolutional layer(s) constructed here. :return: The last constructed convolutional layer. """ activation = get_activation_fn(model_config.get("conv_activation")) filters = model_config.get("conv_filters") for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="same", channels_last=True, name="conv{}".format(i), )(last_layer) out_size, kernel, stride = filters[-1] if len(filters) == 1: i = -1 conv_out = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="valid", name="conv{}".format(i + 1), )(last_layer) flattened_conv_out = tf.keras.layers.Flatten()(conv_out) return flattened_conv_out
def _create_fc_net(self, layer_dims, activation, name=None): """Given a list of layer dimensions (incl. input-dim), creates FC-net. Args: layer_dims (Tuple[int]): Tuple of layer dims, including the input dimension. activation (str): An activation specifier string (e.g. "relu"). Examples: If layer_dims is [4,8,6] we'll have a two layer net: 4->8 (8 nodes) and 8->6 (6 nodes), where the second layer (6 nodes) does not have an activation anymore. 4 is the input dimension. """ layers = [ tf.keras.layers.Input(shape=(layer_dims[0], ), name="{}_in".format(name)) ] if self.framework != "torch" else [] for i in range(len(layer_dims) - 1): act = activation if i < len(layer_dims) - 2 else None if self.framework == "torch": layers.append( SlimFC(in_size=layer_dims[i], out_size=layer_dims[i + 1], initializer=torch.nn.init.xavier_uniform_, activation_fn=act)) else: layers.append( tf.keras.layers.Dense(units=layer_dims[i + 1], activation=get_activation_fn(act), name="{}_{}".format(name, i))) if self.framework == "torch": return nn.Sequential(*layers) else: return tf.keras.Sequential(layers)
def __init__(self, obs_space, action_space, num_outputs, model_config, name, actor_hidden_activation="relu", actor_hiddens=(256, 256), critic_hidden_activation="relu", critic_hiddens=(256, 256), twin_q=False, add_layer_norm=False): """Initialize variables of this model. Extra model kwargs: actor_hidden_activation (str): activation for actor network actor_hiddens (list): hidden layers sizes for actor network critic_hidden_activation (str): activation for critic network critic_hiddens (list): hidden layers sizes for critic network twin_q (bool): build twin Q networks. add_layer_norm (bool): Enable layer norm (for param noise). Note that the core layers for forward() are not defined here, this only defines the layers for the output heads. Those layers for forward() should be defined in subclasses of DDPGTorchModel. """ nn.Module.__init__(self) super(DDPGTorchModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) self.bounded = np.logical_and(action_space.bounded_above, action_space.bounded_below).any() self.low_action = torch.tensor(action_space.low, dtype=torch.float32) self.action_range = torch.tensor(action_space.high - action_space.low, dtype=torch.float32) self.action_dim = np.product(action_space.shape) # Build the policy network. self.policy_model = nn.Sequential() ins = num_outputs self.obs_ins = ins activation = get_activation_fn(actor_hidden_activation, framework="torch") for i, n in enumerate(actor_hiddens): self.policy_model.add_module( "action_{}".format(i), SlimFC(ins, n, initializer=torch.nn.init.xavier_uniform_, activation_fn=activation)) # Add LayerNorm after each Dense. if add_layer_norm: self.policy_model.add_module("LayerNorm_A_{}".format(i), nn.LayerNorm(n)) ins = n self.policy_model.add_module( "action_out", SlimFC(ins, self.action_dim, initializer=torch.nn.init.xavier_uniform_, activation_fn=None)) # Use sigmoid to scale to [0,1], but also double magnitude of input to # emulate behaviour of tanh activation used in DDPG and TD3 papers. # After sigmoid squashing, re-scale to env action space bounds. class _Lambda(nn.Module): def forward(self_, x): sigmoid_out = nn.Sigmoid()(2.0 * x) squashed = self.action_range * sigmoid_out + self.low_action return squashed # Only squash if we have bounded actions. if self.bounded: self.policy_model.add_module("action_out_squashed", _Lambda()) # Build the Q-net(s), including target Q-net(s). def build_q_net(name_): activation = get_activation_fn(critic_hidden_activation, framework="torch") # For continuous actions: Feed obs and actions (concatenated) # through the NN. For discrete actions, only obs. q_net = nn.Sequential() ins = self.obs_ins + self.action_dim for i, n in enumerate(critic_hiddens): q_net.add_module( "{}_hidden_{}".format(name_, i), SlimFC(ins, n, initializer=torch.nn.init.xavier_uniform_, activation_fn=activation)) ins = n q_net.add_module( "{}_out".format(name_), SlimFC(ins, 1, initializer=torch.nn.init.xavier_uniform_, activation_fn=None)) return q_net self.q_model = build_q_net("q") if twin_q: self.twin_q_model = build_q_net("twin_q") else: self.twin_q_model = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = get_activation_fn(model_config.get("fcnet_activation"), framework="torch") hiddens = model_config.get("fcnet_hiddens") no_final_linear = model_config.get("no_final_linear") self.vf_share_layers = model_config.get("vf_share_layers") self.free_log_std = model_config.get("free_log_std") # Generate free-floating bias variables for the second half of # the outputs. if self.free_log_std: assert num_outputs % 2 == 0, ( "num_outputs must be divisible by two", num_outputs) num_outputs = num_outputs // 2 # Layer to add the log std vars to the state-dependent means. if self.free_log_std: self._append_free_log_std = AppendBiasLayer(num_outputs) layers = [] prev_layer_size = int(np.product(obs_space.shape)) self._logits = None # Create layers 0 to second-last. for size in hiddens[:-1]: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = size # The last layer is adjusted to be of size num_outputs, but it's a # layer with activation. if no_final_linear and num_outputs: layers.append( SlimFC(in_size=prev_layer_size, out_size=num_outputs, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = num_outputs # Finish the layers with the provided sizes (`hiddens`), plus - # iff num_outputs > 0 - a last linear layer of size num_outputs. else: if len(hiddens) > 0: layers.append( SlimFC(in_size=prev_layer_size, out_size=hiddens[-1], initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = hiddens[-1] if num_outputs: self._logits = torch.nn.ModuleList([ torch.nn.Sequential( torch.nn.Linear(prev_layer_size, 256), torch.nn.ReLU(), torch.nn.Linear(256, num_outputs), ) for i in range(5) ]) # self._logits = SlimFC( # in_size=prev_layer_size, # out_size=num_outputs, # initializer=normc_initializer(0.01), # activation_fn=None) else: self.num_outputs = ([np.product(obs_space.shape)] + hiddens[-1:])[-1] self._hidden_layers = nn.Sequential(*layers) self._value_branch_separate = None if not self.vf_share_layers: # Build a parallel set of hidden layers for the value net. prev_vf_layer_size = int(np.product(obs_space.shape)) self._value_branch_separate = [] for size in hiddens: self._value_branch_separate.append( SlimFC(in_size=prev_vf_layer_size, out_size=size, activation_fn=activation, initializer=normc_initializer(1.0))) prev_vf_layer_size = size self._value_branch_separate = nn.Sequential( *self._value_branch_separate) self._value_branch = torch.nn.ModuleList([ torch.nn.Sequential( torch.nn.Linear(prev_layer_size, 256), torch.nn.ReLU(), torch.nn.Linear(256, 1), ) for i in range(5) ]) # self._value_branch = SlimFC( # in_size=prev_layer_size, # out_size=1*5, # initializer=normc_initializer(1.0), # activation_fn=None) # Holds the current "base" output (before logits layer). self._features = None # Holds the last input, in case value branch is separate. self._last_flat_in = None self.value = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(VisionNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn(model_config.get("conv_activation")) filters = model_config.get("conv_filters") if not filters: filters = _get_filter_config(obs_space.shape) no_final_linear = model_config.get("no_final_linear") vf_share_layers = model_config.get("vf_share_layers") inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations") last_layer = inputs # Build the action layers for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] # No final linear: Last layer is a Conv2D and uses num_outputs. if no_final_linear: last_layer = tf.keras.layers.Conv2D(num_outputs, kernel, strides=(stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_out")(last_layer) conv_out = last_layer # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv{}".format(i + 1))(last_layer) conv_out = tf.keras.layers.Conv2D(num_outputs, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_out")(last_layer) # Build the value layers if vf_share_layers: last_layer = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) else: # build a parallel set of hidden layers for the value net last_layer = inputs for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv_value_{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_value_{}".format(i + 1))(last_layer) last_layer = tf.keras.layers.Conv2D( 1, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_value_out")(last_layer) value_out = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) self.base_model = tf.keras.Model(inputs, [conv_out, value_out]) self.register_variables(self.base_model.variables)