def __init__( self, image_shape, output_size, fc_sizes=512, dueling=False, use_maxpool=False, channels=None, # None uses default. kernel_sizes=None, strides=None, paddings=None, ): super().__init__() self.dueling = dueling c, h, w = image_shape self.conv = Conv2dModel( in_channels=c, channels=channels or [32, 64, 64], kernel_sizes=kernel_sizes or [8, 4, 3], strides=strides or [4, 2, 1], paddings=paddings or [0, 1, 1], use_maxpool=use_maxpool, ) conv_out_size = self.conv.conv_out_size(h, w) if dueling: self.head = DuelingHeadModel(conv_out_size, fc_sizes, output_size) else: self.head = MlpModel(conv_out_size, fc_sizes, output_size)
def __init__( self, observation_shape, action_size, hidden_sizes=None, # None for default (see below). lstm_size=256, nonlinearity=torch.nn.ReLU, normalize_observation=False, norm_obs_clip=10, norm_obs_var_clip=1e-6, ): super().__init__() self._obs_n_dim = len(observation_shape) self._action_size = action_size hidden_sizes = hidden_sizes or [256, 256] mlp_input_size = int(np.prod(observation_shape)) self.mlp = MlpModel( input_size=mlp_input_size, hidden_sizes=hidden_sizes, output_size=None, nonlinearity=nonlinearity, ) mlp_output_size = hidden_sizes[-1] if hidden_sizes else mlp_input_size self.lstm = torch.nn.LSTM(mlp_output_size + action_size + 1, lstm_size) self.head = torch.nn.Linear(lstm_size, action_size * 2 + 1) if normalize_observation: self.obs_rms = RunningMeanStdModel(observation_shape) self.norm_obs_clip = norm_obs_clip self.norm_obs_var_clip = norm_obs_var_clip self.normalize_observation = normalize_observation
def __init__( self, image_shape, output_size, fc_sizes=512, dueling=False, use_maxpool=False, channels=None, # None uses default. kernel_sizes=None, strides=None, paddings=None, ): """Instantiates the neural network according to arguments; network defaults stored within this method.""" super().__init__() self.dueling = dueling c, h, w = image_shape self.conv = Conv2dModel( in_channels=c, channels=channels or [32, 64, 64], kernel_sizes=kernel_sizes or [8, 4, 3], strides=strides or [4, 2, 1], paddings=paddings or [0, 1, 1], use_maxpool=use_maxpool, ) conv_out_size = self.conv.conv_out_size(h, w) if dueling: self.head = DuelingHeadModel(conv_out_size, fc_sizes, output_size) else: self.head = MlpModel(conv_out_size, fc_sizes, output_size)
def __init__( self, image_shape, latent_size, use_fourth_layer=True, skip_connections=True, hidden_sizes=None, kiaming_init=True, ): super().__init__() c, h, w = image_shape self.conv = DmlabConv2dModel( in_channels=c, use_fourth_layer=True, skip_connections=skip_connections, use_maxpool=False, ) self._output_size = self.conv.output_size(h, w) self._output_shape = self.conv.output_shape(h, w) self.head = MlpModel( # gets to z_t, not necessarily c_t input_size=self._output_size, hidden_sizes=hidden_sizes, output_size=latent_size, ) if kiaming_init: self.apply(weight_init)
def __init__( self, observation_shape, action_size, hidden_sizes=[64, 64], # mlp after lstm fc_sizes=64, # Between conv and lstm channels=None, kernel_sizes=None, strides=None, paddings=None, use_maxpool=False, ): """Instantiate neural net according to inputs.""" super().__init__() self._obs_ndim = len(observation_shape) self.conv = Conv2dHeadModel( image_shape=observation_shape, channels=channels or [4, 8], kernel_sizes=kernel_sizes or [8, 4], strides=strides or [4, 2], paddings=paddings or [0, 1], use_maxpool=use_maxpool, hidden_sizes=fc_sizes, # Applies nonlinearity at end. ) # image -> conv (ReLU) -> linear (fc_sizes) - > ReLU self.mlp = MlpModel( input_size=self.conv.output_size + action_size, hidden_sizes=hidden_sizes, output_size=1, )
def __init__( self, image_shape, latent_size, channels, kernel_sizes, strides, paddings=None, hidden_sizes=None, # usually None; NOT the same as anchor MLP kiaming_init=True, ): super().__init__() c, h, w = image_shape self.conv = Conv2dModel( in_channels=c, channels=channels, kernel_sizes=kernel_sizes, strides=strides, paddings=paddings, use_maxpool=False, ) self._output_size = self.conv.conv_out_size(h, w) self._output_shape = self.conv.conv_out_shape(h, w) self.head = MlpModel( input_size=self._output_size, hidden_sizes=hidden_sizes, output_size=latent_size, ) if kiaming_init: self.apply(weight_init)
def __init__( self, observation_shape, action_size, policy_hidden_sizes=None, policy_hidden_nonlinearity=torch.nn.Tanh, value_hidden_sizes=None, value_hidden_nonlinearity=torch.nn.Tanh, init_log_std=0., min_std=0., normalize_observation=False, norm_obs_clip=10, norm_obs_var_clip=1e-6, policy_inputs_indices=None, ): super().__init__() self.min_std = min_std self._obs_ndim = len(observation_shape) input_size = int(np.prod(observation_shape)) self.policy_inputs_indices = policy_inputs_indices if policy_inputs_indices is not None else list( range(input_size)) policy_hidden_sizes = [ 400, 300 ] if policy_hidden_sizes is None else policy_hidden_sizes value_hidden_sizes = [ 400, 300 ] if value_hidden_sizes is None else value_hidden_sizes self.mu = MlpModel(input_size=len(self.policy_inputs_indices), hidden_sizes=policy_hidden_sizes, output_size=action_size, nonlinearity=policy_hidden_nonlinearity) self.v = MlpModel( input_size=input_size, hidden_sizes=value_hidden_sizes, output_size=1, nonlinearity=value_hidden_nonlinearity, ) self._log_std = torch.nn.Parameter( (np.log(np.exp(init_log_std) - self.min_std)) * torch.ones(action_size)) if normalize_observation: self.obs_rms = RunningMeanStdModel(observation_shape) self.norm_obs_clip = norm_obs_clip self.norm_obs_var_clip = norm_obs_var_clip self.normalize_observation = normalize_observation
def __init__(self, latent_size, action_size, hidden_sizes): super().__init__() self.head = MlpModel( input_size=latent_size + action_size, hidden_sizes=hidden_sizes, output_size=latent_size * 2, ) self._latent_size = latent_size
def __init__(self, latent_size, local_size, anchor_hidden_sizes): super().__init__() self.anchor_mlp = MlpModel( input_size=latent_size, hidden_sizes=anchor_hidden_sizes, output_size=latent_size, ) self.W = torch.nn.Linear(latent_size, local_size, bias=False)
def __init__( self, input_size, hidden_sizes, output_size, grad_scale=2**(-1 / 2), ): super().__init__() if isinstance(hidden_sizes, int): hidden_sizes = [hidden_sizes] self.advantage_hidden = MlpModel(input_size, hidden_sizes) self.advantage_out = torch.nn.Linear(hidden_sizes[-1], output_size, bias=False) self.advantage_bias = torch.nn.Parameter(torch.zeros(1)) self.value = MlpModel(input_size, hidden_sizes, output_size=1) self._grad_scale = grad_scale
def __init__( self, observation_shape, action_size, hidden_sizes=None, lstm_size=None, lstm_skip=True, constraint=True, hidden_nonlinearity="tanh", # or "relu" mu_nonlinearity="tanh", init_log_std=0., normalize_observation=True, var_clip=1e-6, ): super().__init__() if hidden_nonlinearity == "tanh": # So these can be strings in config file. hidden_nonlinearity = torch.nn.Tanh elif hidden_nonlinearity == "relu": hidden_nonlinearity = torch.nn.ReLU else: raise ValueError(f"Unrecognized hidden_nonlinearity string: {hidden_nonlinearity}") if mu_nonlinearity == "tanh": # So these can be strings in config file. mu_nonlinearity = torch.nn.Tanh elif mu_nonlinearity == "relu": mu_nonlinearity = torch.nn.ReLU else: raise ValueError(f"Unrecognized mu_nonlinearity string: {mu_nonlinearity}") self._obs_ndim = len(observation_shape) input_size = int(np.prod(observation_shape)) self.body = MlpModel( input_size=input_size, hidden_sizes=hidden_sizes or [256, 256], nonlinearity=hidden_nonlinearity, ) last_size = self.body.output_size if lstm_size: lstm_input_size = last_size + action_size + 1 self.lstm = torch.nn.LSTM(lstm_input_size, lstm_size) last_size = lstm_size else: self.lstm = None mu_linear = torch.nn.Linear(last_size, action_size) if mu_nonlinearity is not None: self.mu = torch.nn.Sequential(mu_linear, mu_nonlinearity()) else: self.mu = mu_linear self.value = torch.nn.Linear(last_size, 1) if constraint: self.constraint = torch.nn.Linear(last_size, 1) else: self.constraint = None self.log_std = torch.nn.Parameter(init_log_std * torch.ones(action_size)) self._lstm_skip = lstm_skip if normalize_observation: self.obs_rms = RunningMeanStdModel(observation_shape) self.var_clip = var_clip self.normalize_observation = normalize_observation
def __init__( self, observation_shape, action_size, hidden_sizes=None, # None for default (see below). hidden_nonlinearity=torch.nn.Tanh, # Module form. mu_nonlinearity=torch.nn.Tanh, # Module form. init_log_std=0., normalize_observation=True, norm_obs_clip=10, norm_obs_var_clip=1e-6, baselines_init=True, # Orthogonal initialization of sqrt(2) until last layer, then 0.01 for policy, 1 for value ): """Instantiate neural net modules according to inputs.""" super().__init__() self._obs_ndim = len(observation_shape) input_size = int(np.prod(observation_shape)) hidden_sizes = hidden_sizes or [64, 64] inits_mu = inits_v = None if baselines_init: inits_mu = (np.sqrt(2), 0.01) inits_v = (np.sqrt(2), 1.) mu_mlp = torch.jit.script( MlpModel(input_size=input_size, hidden_sizes=hidden_sizes, output_size=action_size, nonlinearity=hidden_nonlinearity, inits=inits_mu)) if mu_nonlinearity is not None: self.mu = torch.nn.Sequential(mu_mlp, mu_nonlinearity()) else: self.mu = mu_mlp self.v = torch.jit.script( MlpModel(input_size=input_size, hidden_sizes=hidden_sizes, output_size=1, nonlinearity=hidden_nonlinearity, inits=inits_v)) self.log_std = torch.nn.Parameter(init_log_std * torch.ones(action_size)) if normalize_observation: self.obs_rms = RunningMeanStdModel(observation_shape) self.norm_obs_clip = norm_obs_clip self.norm_obs_var_clip = norm_obs_var_clip self.normalize_observation = normalize_observation
class POMDPRnnShared0Rnn(nn.Module): def __init__(self, input_classes: int, output_size: int, rnn_type: str = 'gru', rnn_size: int = 256, hidden_sizes: [List, Tuple] = None, baselines_init: bool = True, layer_norm: bool = False, prev_action: int = 2, prev_reward: int = 2, ): super().__init__() self._obs_dim = 0 self.rnn_is_lstm = rnn_type != 'gru' self.preprocessor = tscr(OneHotLayer(input_classes)) rnn_class = get_rnn_class(rnn_type, layer_norm) rnn_input_size = input_classes if prev_action: rnn_input_size += output_size # Use previous action as input if prev_reward: rnn_input_size += 1 # Use previous reward as input self.rnn = rnn_class(rnn_input_size, rnn_size) # Concat action, reward self.body = MlpModel(rnn_size, hidden_sizes, None, nn.ReLU, None) self.pi = nn.Sequential(nn.Linear(self.body.output_size, output_size), nn.Softmax(-1)) self.v = nn.Linear(self.body.output_size, 1) if baselines_init: self.rnn.apply(apply_init); self.body.apply(apply_init) self.pi.apply(partial(apply_init, gain=O_INIT_VALUES['pi'])) self.v.apply(partial(apply_init, gain=O_INIT_VALUES['v'])) self.body, self.pi, self.v = tscr(self.body), tscr(self.pi), tscr(self.v) self.p_a = prev_action > 0 self.p_r = prev_reward > 0 def forward(self, observation, prev_action, prev_reward, init_rnn_state): lead_dim, T, B, _ = infer_leading_dims(observation, self._obs_dim) if init_rnn_state is not None and self.rnn_is_lstm: init_rnn_state = tuple(init_rnn_state) # namedarraytuple -> tuple (h, c) oh = self.preprocessor(observation) # Leave in TxB format for lstm inp_list = [oh.view(T,B,-1)] + ([prev_action.view(T, B, -1)] if self.p_a else []) + ([prev_reward.view(T, B, 1)] if self.p_r else []) rnn_input = torch.cat(inp_list, dim=2) rnn_out, next_rnn_state = self.rnn(rnn_input, init_rnn_state) rnn_out = rnn_out.view(T*B, -1) rnn_out = self.body(rnn_out) pi, v = self.pi(rnn_out), self.v(rnn_out).squeeze(-1) pi, v = restore_leading_dims((pi, v), lead_dim, T, B) if self.rnn_is_lstm: next_rnn_state = RnnState(next_rnn_state) return pi, v, next_rnn_state
def __init__(self, input_classes: int, output_size: int, hidden_sizes: [List, Tuple, None] = None, inits: [(float, float, float), None] = (np.sqrt(2), 1., 0.01), nonlinearity: nn.Module = nn.ReLU, shared_processor: bool = False ): super().__init__() self._obs_ndim = 0 if shared_processor: self.preprocessor = tscr(nn.Sequential(OneHotLayer(input_classes), MlpModel(input_classes, hidden_sizes, None, nonlinearity, inits[:-1] if inits is not None else inits))) self.v = tscr(layer_init(nn.Linear(hidden_sizes[-1], 1), inits[1]) if inits else nn.Linear(hidden_sizes[-1], 1)) self.pi = tscr(nn.Sequential(layer_init(nn.Linear(hidden_sizes[-1], output_size), inits[1]) if inits else nn.Linear(hidden_sizes[-1], output_size), nn.Softmax(-1))) else: self.preprocessor = tscr(OneHotLayer(input_classes)) self.v = tscr(MlpModel(input_classes, hidden_sizes, 1, nonlinearity, inits[:-1] if inits is not None else inits)) self.pi = tscr(nn.Sequential(MlpModel(input_classes, hidden_sizes, output_size, nonlinearity, inits[0::2] if inits is not None else inits), nn.Softmax(-1)))
def __init__( self, input_size, action_size, hidden_sizes, ): super().__init__() self.mlp1 = MlpModel( input_size=input_size + action_size, hidden_sizes=hidden_sizes, output_size=1, ) self.mlp2 = MlpModel( input_size=input_size + action_size, hidden_sizes=hidden_sizes, output_size=1, ) self.apply(weight_init)
def __init__( self, input_shape, # Must be 1D hidden_sizes, output_size=None, nonlinearity=torch.nn.Identity): """Instantiate MLP feature extractor. Does not support parameter sharing with base network.""" super().__init__() self.extractor = MlpModel(input_shape, hidden_sizes, output_size, nonlinearity)
def __init__(self, input_shape, output_size, hidden_sizes=[256, 256], action_mask=True): """Instantiates the neural network according to arguments; network defaults stored within this method.""" super().__init__() self.head = MlpModel(input_shape, hidden_sizes, output_size) self.action_mask = action_mask
def __init__(self, latent_size, anchor_hidden_sizes): super().__init__() if anchor_hidden_sizes is not None: self.anchor_mlp = MlpModel( input_size=latent_size, hidden_sizes=anchor_hidden_sizes, output_size=latent_size, ) else: self.anchor_mlp = None self.W = torch.nn.Linear(latent_size, latent_size, bias=False)
class CartpoleFfModel(torch.nn.Module): def __init__( self, image_shape, output_size, fc_sizes=[64, 64], basis=None, gain_type="xavier", out=None, ): super().__init__() input_size = image_shape[0] # Main body self.head = MlpModel(input_size, fc_sizes) # Policy output self.pi = torch.nn.Linear(fc_sizes[-1], output_size) # Value output self.value = torch.nn.Linear(fc_sizes[-1], 1) if gain_type == "xavier": self.head.apply(weight_init) self.pi.apply(weight_init) self.value.apply(weight_init) def forward(self, in_state, prev_action, prev_reward): """Feedforward layers process as [T*B,H]. Return same leading dims as input, can be [T,B], [B], or [].""" state = in_state.type(torch.float) # Expect torch.uint8 inputs # Infer (presence of) leading dimensions: [T,B], [B], or []. lead_dim, T, B, state_shape = infer_leading_dims(state, 1) base = self.head(state.view(T * B, -1)) pi = F.softmax(self.pi(base), dim=-1) v = self.value(base).squeeze(-1) # Restore leading dimensions: [T,B], [B], or [], as input. pi, v = restore_leading_dims((pi, v), lead_dim, T, B) return pi, v
def __init__( self, observation_shape, action_size, hidden_sizes=None, # None for default (see below). hidden_nonlinearity=torch.nn.Tanh, # Module form. mu_nonlinearity=torch.nn.Tanh, # Module form. init_log_std=0., pooling="average", ): super().__init__() self._obs_ndim = len(observation_shape) self._n_pop = observation_shape[0] input_size = int(observation_shape[-1]) output_size = int(action_size[-1]) hidden_sizes = hidden_sizes or [64, 64] self.pooling = pooling # self.pool = self.make_pooler(pooling) if self.pooling is not None: input_size *= 2 mu_mlp = MlpModel( input_size=input_size, hidden_sizes=hidden_sizes, output_size=output_size, nonlinearity=hidden_nonlinearity, ) if mu_nonlinearity is not None: self.mu = torch.nn.Sequential(mu_mlp, mu_nonlinearity()) else: self.mu = mu_mlp self.v = MlpModel( input_size=input_size, hidden_sizes=hidden_sizes, output_size=1, nonlinearity=hidden_nonlinearity, ) self.log_std = torch.nn.Parameter(init_log_std * torch.ones(action_size))
def __init__( self, observation_shape, action_size, hidden_sizes=None, # None for default (see below). hidden_nonlinearity=torch.nn.Tanh, # Module form. mu_nonlinearity=torch.nn.Tanh, # Module form. init_log_std=0., ): super().__init__() assert hasattr(observation_shape, 'camera'), "VisionFfModel requires observation to contain 'camera' attr" assert hasattr(observation_shape, 'robot_state'), "VisionFfModel requires observation to contain 'robot_state' attr" self.height, self.width, self.channels = observation_shape.camera robot_state_shape = observation_shape.robot_state[0] self.conv = Conv2dModel( in_channels=self.channels, channels=[9, 18], kernel_sizes=[3, 3], strides=[2, 2], paddings=[1, 1], ) conv_out_size = self.conv.conv_out_size(self.height, self.width) robot_state_out = 256 self.robot_state_mlp = MlpModel( input_size=robot_state_shape, hidden_sizes=[256, ], output_size=robot_state_out ) self.mu_head = MlpModel( input_size=robot_state_out + conv_out_size, hidden_sizes=[256, ], output_size=action_size ) self.value_head = MlpModel( input_size=robot_state_out + conv_out_size, hidden_sizes=[256, ], output_size=1 ) self.log_std = torch.nn.Parameter(init_log_std * torch.ones(action_size))
def __init__(self, input_shape, output_size, fc_sizes=[128, 128, 128], dueling=False): """Instantiates the neural network according to arguments; network defaults stored within this method.""" super().__init__() if dueling: self.head = DuelingHeadModel(input_shape, fc_sizes, output_size) else: self.head = MlpModel(input_shape, fc_sizes, output_size)
def __init__(self, input_shape: Tuple, output_size: int, hidden_sizes: [List, Tuple, None] = None, nonlinearity: nn.Module = nn.ReLU ): super().__init__() self._obs_ndim = 2 # All bsuite obs are 2 (even (1,1)) input_size = input_shape[0] * input_shape[1] self.preprocessor = MlpModel(input_size, hidden_sizes, None, nonlinearity) self.v = tscr(nn.Linear(self.preprocessor.output_size, 1)) self.pi = tscr(nn.Sequential(nn.Linear(self.preprocessor.output_size, output_size), nn.Softmax(-1)))
def __init__( self, observation_shape, action_size, hidden_sizes=None, # None for default (see below). hidden_nonlinearity=torch.nn.Tanh, # Module form. mu_nonlinearity=torch.nn.Tanh, # Module form. init_log_std=0., normalize_observation=False, norm_obs_clip=10, norm_obs_var_clip=1e-6, ): """Instantiate neural net modules according to inputs.""" super().__init__() self._obs_ndim = len(observation_shape) input_size = int(np.prod(observation_shape)) hidden_sizes = hidden_sizes or [64, 64] mu_mlp = MlpModel( input_size=input_size, hidden_sizes=hidden_sizes, output_size=action_size, nonlinearity=hidden_nonlinearity, ) if mu_nonlinearity is not None: self.mu = torch.nn.Sequential(mu_mlp, mu_nonlinearity()) else: self.mu = mu_mlp self.v = MlpModel( input_size=input_size, hidden_sizes=hidden_sizes, output_size=1, nonlinearity=hidden_nonlinearity, ) self.log_std = torch.nn.Parameter(init_log_std * torch.ones(action_size)) if normalize_observation: self.obs_rms = RunningMeanStdModel(observation_shape) self.norm_obs_clip = norm_obs_clip self.norm_obs_var_clip = norm_obs_var_clip self.normalize_observation = normalize_observation
class BsuiteRnnShared1Rnn(nn.Module): def __init__(self, input_shape: Tuple, output_size: int, rnn_type: str = 'gru', rnn_size: int = 256, hidden_sizes: [List, Tuple] = None, baselines_init: bool = True, layer_norm: bool = False ): super().__init__() self._obs_dim = 2 self.rnn_is_lstm = rnn_type != 'gru' input_size = int(np.prod(input_shape)) rnn_class = get_rnn_class(rnn_type, layer_norm) self.body = MlpModel(input_size, hidden_sizes, None, nn.ReLU, None) self.rnn = rnn_class(self.body.output_size + output_size + 1, rnn_size) # Concat action, reward self.pi = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, output_size), nn.Softmax(-1)) self.v = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, 1)) if baselines_init: self.rnn.apply(apply_init); self.body.apply(apply_init) self.pi.apply(partial(apply_init, gain=O_INIT_VALUES['pi'])) self.v.apply(partial(apply_init, gain=O_INIT_VALUES['v'])) self.body, self.pi, self.v = tscr(self.body), tscr(self.pi), tscr(self.v) def forward(self, observation, prev_action, prev_reward, init_rnn_state): lead_dim, T, B, _ = infer_leading_dims(observation, self._obs_dim) if init_rnn_state is not None and self.rnn_is_lstm: init_rnn_state = tuple(init_rnn_state) # namedarraytuple -> tuple (h, c) features = self.body(observation.view(T*B, -1)) rnn_input = torch.cat([ features.view(T,B,-1), prev_action.view(T, B, -1), # Assumed onehot. prev_reward.view(T, B, 1), ], dim=2) rnn_out, next_rnn_state = self.rnn(rnn_input, init_rnn_state) rnn_out = rnn_out.view(T*B, -1) pi, v = self.pi(rnn_out), self.v(rnn_out).squeeze(-1) pi, v = restore_leading_dims((pi, v), lead_dim, T, B) if self.rnn_is_lstm: next_rnn_state = RnnState(next_rnn_state) return pi, v, next_rnn_state
def __init__( self, image_shape, action_size, hidden_sizes=512, stop_conv_grad=False, channels=None, # Defaults below. kernel_sizes=None, strides=None, paddings=None, kiaming_init=True, normalize_conv_out=False, ): super().__init__() c, h, w = image_shape self.conv = Conv2dModel( in_channels=c, channels=channels or [32, 64, 64], kernel_sizes=kernel_sizes or [8, 4, 3], strides=strides or [4, 2, 1], paddings=paddings, ) self._conv_out_size = self.conv.conv_out_size(h=h, w=w) self.pi_v_mlp = MlpModel( input_size=self._conv_out_size, hidden_sizes=hidden_sizes, output_size=action_size + 1, ) if kiaming_init: self.apply(weight_init) self.stop_conv_grad = stop_conv_grad logger.log("Model stopping gradient at CONV." if stop_conv_grad else "Modeul using gradients on all parameters.") if normalize_conv_out: # Havent' seen this make a difference yet. logger.log("Model normalizing conv output across all pixels.") self.conv_rms = RunningMeanStdModel((1, )) self.var_clip = 1e-6 self.normalize_conv_out = normalize_conv_out
def __init__(self, input_shape: Tuple, output_size: int, rnn_type: str = 'gru', rnn_size: int = 256, hidden_sizes: [List, Tuple] = None, baselines_init: bool = True, layer_norm: bool = False ): super().__init__() self._obs_dim = 2 self.rnn_is_lstm = rnn_type != 'gru' input_size = int(np.prod(input_shape)) rnn_class = get_rnn_class(rnn_type, layer_norm) self.rnn = rnn_class(input_size + output_size + 1, rnn_size) # Concat action, reward pi_inits = (O_INIT_VALUES['base'], O_INIT_VALUES['pi']) if baselines_init else None v_inits = (O_INIT_VALUES['base'], O_INIT_VALUES['v']) if baselines_init else None self.pi = nn.Sequential(MlpModel(rnn_size, hidden_sizes, output_size, nn.ReLU, pi_inits), nn.Softmax(-1)) self.v = nn.Sequential(MlpModel(rnn_size, hidden_sizes, 1, nn.ReLU, v_inits)) if baselines_init: self.rnn.apply(apply_init) self.pi, self.v = tscr(self.pi), tscr(self.v)
def __init__( self, observation_shape, hidden_sizes, action_size, ): super().__init__() self._obs_ndim = len(observation_shape) self.mlp = MlpModel( input_size=int(np.prod(observation_shape)) + action_size, hidden_sizes=hidden_sizes, output_size=1, )
def __init__( self, observation_shape, hidden_sizes, action_size=None, # Unused but accept kwarg. ): super().__init__() self._obs_ndim = len(observation_shape) self.mlp = MlpModel( input_size=int(np.prod(observation_shape)), hidden_sizes=hidden_sizes, output_size=1, )
def __init__( self, observation_shape, action_size, hidden_sizes=None, # None for default (see below). init_log_std=0., normalize_observation=False, linear_value_output=True, norm_obs_clip=10, full_covariance=False, norm_obs_var_clip=1e-6, ): """Instantiate neural net modules according to inputs.""" super().__init__() self._obs_ndim = len(observation_shape.state) input_size = int(np.prod(observation_shape.state)) self.full_covariance = full_covariance hidden_sizes = hidden_sizes or [256, 256] self.action_size = action_size self.shared_features_dim = 256 self.softplus = torch.nn.Softplus() self.shared_mlp = MlpModel( input_size=input_size, hidden_sizes=[512, self.shared_features_dim]) self.mu_head = MlpModel( input_size=input_size, hidden_sizes=[256, 256], # output_size=action_size * 2, output_size=action_size + np.sum(1 + np.arange(self.action_size)) if full_covariance else 2 * action_size) self.layer_norm = torch.nn.LayerNorm(input_size) # list(self.mu_head.parameters())[-1].data = list(self.mu_head.parameters())[-1].data / 100 # list(self.mu_head.parameters())[-2].data = list(self.mu_head.parameters())[-2].data / 100 self.v_head = MlpModel( input_size=input_size, hidden_sizes=[256, 256], output_size=1 if linear_value_output else None, )