def __init__( self, image_shape, output_size, fc_sizes=512, use_maxpool=False, channels=None, # None uses default. kernel_sizes=None, strides=None, paddings=None, baselines_init=True, init_v=1., init_pi=.01 ): """Instantiate neural net module according to inputs.""" super().__init__() if channels is not None: self.conv = Conv2dHeadModel( image_shape=image_shape, channels=channels, kernel_sizes=kernel_sizes, strides=strides, paddings=paddings, use_maxpool=use_maxpool, hidden_sizes=fc_sizes, # Applies nonlinearity at end. ) else: self.conv = Conv2dHeadModel(image_shape=image_shape, **CONVNET_DQN, hidden_sizes=fc_sizes) #self.pi = torch.jit.script(layer_init(torch.nn.Linear(self.conv.output_size, output_size), init_pi)) #self.value = torch.jit.script(layer_init(torch.nn.Linear(self.conv.output_size, 1), init_v)) #self.conv = torch.jit.script(self.conv) self.pi = layer_init(torch.nn.Linear(self.conv.output_size, output_size), init_pi) self.value = layer_init(torch.nn.Linear(self.conv.output_size, 1), init_v)
def __init__( self, image_shape, output_size, fc_sizes=512, # Between conv and lstm. lstm_size=512, use_maxpool=False, channels=None, # None uses default. kernel_sizes=None, strides=None, paddings=None, ): super().__init__() self.conv = Conv2dHeadModel( image_shape=image_shape, channels=channels or [16, 32], kernel_sizes=kernel_sizes or [8, 4], strides=strides or [4, 2], paddings=paddings or [0, 1], use_maxpool=use_maxpool, hidden_sizes=fc_sizes, # Applies nonlinearity at end. ) self.lstm = torch.nn.LSTM(self.conv.output_size + output_size + 1, lstm_size) self.pi = torch.nn.Linear(lstm_size, output_size) self.value = torch.nn.Linear(lstm_size, 1)
def __init__( self, image_shape, output_size, fc_size=512, # Between conv and lstm. lstm_size=512, head_size=512, dueling=False, use_maxpool=False, channels=None, # None uses default. kernel_sizes=None, strides=None, paddings=None, ): """Instantiates the neural network according to arguments; network defaults stored within this method.""" super().__init__() self.dueling = dueling self.conv = Conv2dHeadModel( image_shape=image_shape, channels=channels or [32, 64, 64], kernel_sizes=kernel_sizes or [8, 4, 3], strides=strides or [4, 2, 1], paddings=paddings or [0, 1, 1], use_maxpool=use_maxpool, hidden_sizes=fc_size, # ReLU applied here (Steven). ) self.lstm = torch.nn.LSTM(self.conv.output_size + output_size + 1, lstm_size) if dueling: self.head = DuelingHeadModel(lstm_size, head_size, output_size) else: self.head = MlpModel(lstm_size, head_size, output_size=output_size)
def __init__( self, image_shape, output_size, fc_sizes=512, use_maxpool=False, channels=None, # None uses default. kernel_sizes=None, strides=None, paddings=None, ): """Instantiate neural net module according to inputs.""" super().__init__() self.conv = Conv2dHeadModel( image_shape=image_shape, channels=channels or [16, 32], kernel_sizes=kernel_sizes or [8, 4], strides=strides or [4, 2], paddings=paddings or [0, 1], use_maxpool=use_maxpool, hidden_sizes=fc_sizes, # Applies nonlinearity at end. ) self.pi = torch.jit.script( torch.nn.Linear(self.conv.output_size, output_size)) self.value = torch.jit.script(torch.nn.Linear(self.conv.output_size, 1)) self.conv = torch.jit.script(self.conv)
def __init__( self, observation_shape, action_size, hidden_sizes=[64,64], # mlp after lstm fc_sizes=128, # Between conv and lstm lstm_size=64, channels=[8, 16], kernel_sizes=[8, 4], strides=[4, 2], paddings=[0, 1], use_maxpool=False, ): """Instantiate neural net according to inputs.""" super().__init__() self._obs_ndim = len(observation_shape) self.conv = Conv2dHeadModel( image_shape=observation_shape, channels=channels, kernel_sizes=kernel_sizes, strides=strides, paddings=paddings, use_maxpool=use_maxpool, hidden_sizes=fc_sizes, # Applies nonlinearity at end. ) # image -> conv (ReLU) -> linear (fc_sizes) - > ReLU self.lstm = torch.nn.LSTM(self.conv.output_size + action_size + 1 + action_size, lstm_size) # Input to LSTM: conv_output + prev_action + prev_reward + action self.mlp = MlpModel( input_size=lstm_size, hidden_sizes=hidden_sizes, output_size=1, )
def __init__( self, image_shape, output_size, fc_size=512, # Between conv and lstm. lstm_size=512, head_size=512, dueling=False, use_maxpool=False, channels=None, # None uses default. kernel_sizes=None, strides=None, paddings=None, ): super().__init__() self.dueling = dueling self.conv = Conv2dHeadModel( image_shape=image_shape, channels=channels or [32, 64, 64], kernel_sizes=kernel_sizes or [8, 4, 3], strides=strides or [4, 2, 1], paddings=paddings or [0, 1, 1], use_maxpool=use_maxpool, hidden_sizes=fc_size, ) self.lstm = torch.nn.LSTM(self.conv.output_size + output_size + 1, lstm_size) if dueling: self.head = DuelingHeadModel(lstm_size, head_size, output_size) else: self.head = MlpModel(lstm_size, head_size, output_size=output_size)
def __init__( self, observation_shape, action_size, hidden_sizes=[64, 64], # mlp after lstm fc_sizes=64, # Between conv and lstm channels=None, kernel_sizes=None, strides=None, paddings=None, use_maxpool=False, ): """Instantiate neural net according to inputs.""" super().__init__() self._obs_ndim = len(observation_shape) self.conv = Conv2dHeadModel( image_shape=observation_shape, channels=channels or [4, 8], kernel_sizes=kernel_sizes or [8, 4], strides=strides or [4, 2], paddings=paddings or [0, 1], use_maxpool=use_maxpool, hidden_sizes=fc_sizes, # Applies nonlinearity at end. ) # image -> conv (ReLU) -> linear (fc_sizes) - > ReLU self.mlp = MlpModel( input_size=self.conv.output_size + action_size, hidden_sizes=hidden_sizes, output_size=1, )
def __init__( self, observation_shape, action_size, fc_sizes=32, use_maxpool=False, channels=None, # None uses default. kernel_sizes=None, strides=None, paddings=None, hidden_nonlinearity=torch.nn.Tanh, # Module form. # mu_nonlinearity=torch.nn.Tanh, # Module form. mu_nonlinearity=None, init_log_std=0., ): """Instantiate neural net module according to inputs.""" super().__init__() self.conv = Conv2dHeadModel( image_shape=observation_shape, channels=channels or [16, 32], kernel_sizes=kernel_sizes or [8, 4], strides=strides or [4, 2], paddings=paddings or [0, 1], use_maxpool=use_maxpool, hidden_sizes=fc_sizes, # Applies nonlinearity at end. ) mu_mlp = MlpModel( input_size=self.conv.output_size, hidden_sizes=fc_sizes, output_size=action_size, nonlinearity=hidden_nonlinearity, ) # print(self.conv.output_size) # print('Num of encoder parameters: %d' % sum(p.numel() for p in self.conv.parameters() if p.requires_grad)) # print('Num of encoder parameters: %d' % sum(p.numel() for p in mu_mlp.parameters() if p.requires_grad)) if mu_nonlinearity is not None: self.mu = torch.nn.Sequential(mu_mlp, mu_nonlinearity()) else: self.mu = mu_mlp self.v = MlpModel( input_size=self.conv.output_size, hidden_sizes=fc_sizes, output_size=1, nonlinearity=hidden_nonlinearity, ) self.lstm = torch.nn.LSTM(mlp_output_size + action_size + 1, lstm_size) self.head = torch.nn.Linear(lstm_size, action_size * 2 + 1) self.log_std = torch.nn.Parameter(init_log_std * torch.ones(action_size))
def __init__( self, image_shape, output_size, num_options, fc_sizes=512, use_maxpool=False, channels=None, # None uses default. kernel_sizes=None, strides=None, paddings=None, use_interest=False): """Instantiate neural net module according to inputs.""" super().__init__() if channels is not None: self.conv = Conv2dHeadModel( image_shape=image_shape, channels=channels, kernel_sizes=kernel_sizes, strides=strides, paddings=paddings, use_maxpool=use_maxpool, hidden_sizes=fc_sizes, # Applies nonlinearity at end. ) else: self.conv = Conv2dHeadModel(image_shape=image_shape, **CONVNET_DQN) self.pi = DiscreteIntraOptionPolicy(self.conv.output_size, num_options, output_size, True) self.q = torch.nn.Linear(self.conv.output_size, num_options) self.beta = torch.nn.Sequential( torch.nn.Linear(self.conv.output_size, num_options), torch.nn.Sigmoid()) self.pi_omega = torch.nn.Sequential( torch.nn.Linear(self.conv.output_size, num_options), torch.nn.Softmax(dim=-1)) self.I = torch.nn.Sequential( torch.nn.Linear(self.conv.output_size, num_options), torch.nn.Sigmoid()) if use_interest else Dummy(num_options)
def __init__( self, image_shape, output_size, fc_sizes=128, use_maxpool=False, channels=None, # None uses default. kernel_sizes=None, strides=None, paddings=None, scale_obs=True, # Whether to scale observations obs_mean=4., # Mean to subtract obs_scale=8. # Scale to apply ): """Instantiate neural net module according to inputs.""" super().__init__() if channels is not None: # Override defaults self.conv = Conv2dHeadModel( image_shape=image_shape, channels=channels or [16, 32], kernel_sizes=kernel_sizes or [8, 4], strides=strides or [4, 2], paddings=paddings or [0, 1], use_maxpool=use_maxpool, hidden_sizes=fc_sizes, # Applies nonlinearity at end. ) else: self.conv = Conv2dHeadModel(image_shape=image_shape, hidden_sizes=fc_sizes, **CONVNET_MINIGRID_TINY) self.pi = torch.nn.Linear(self.conv.output_size, output_size) self.value = torch.nn.Linear(self.conv.output_size, 1) self.scaler = nn.Identity() if not scale_obs else ObsScaler( obs_mean, obs_scale) self.conv = nn.Sequential(self.scaler, self.conv)
def __init__( self, image_shape, output_size, fc_size=512, # Between conv and lstm. lstm_size=512, head_size=512, use_recurrence=True, dueling=False, use_maxpool=False, channels=None, # None uses default. kernel_sizes=None, strides=None, paddings=None, ): """Instantiates the neural network according to arguments; network defaults stored within this method.""" super().__init__() self.use_recurrence = use_recurrence self.dueling = dueling self.conv = Conv2dHeadModel( image_shape=image_shape, channels=channels or [32, 64, 64], kernel_sizes=kernel_sizes or [8, 4, 3], strides=strides or [4, 2, 1], paddings=paddings or [0, 1, 1], use_maxpool=use_maxpool, hidden_sizes=fc_size, # ReLU applied here (Steven). ) if self.use_recurrence: self.rnn = torch.nn.GRUCell(input_size=(self.conv.output_size + output_size + 1), hidden_size=lstm_size) else: self.rnn = nn.Sequential( nn.Linear(self.conv.output_size + output_size + 1, lstm_size), nn.ReLU()) if dueling: self.head = DuelingHeadModel(lstm_size, head_size, output_size) else: self.head = MlpModel(lstm_size, head_size, output_size=output_size) print('model initialized', self) # NOTE for debug purposes # Logging gradients self.prev_hs_pre_grad = None self.prev_hs_rec_grad = None
def __init__( self, image_shape, output_size, option_size, fc_sizes=512, use_maxpool=False, channels=None, # None uses default. kernel_sizes=None, strides=None, paddings=None, baselines_init=True, # Orthogonal initialization of sqrt(2) until last layer, then 0.01 for policy, 1 for value use_interest=False, # IOC sigmoid interest functions ): """Instantiate neural net module according to inputs.""" super().__init__() self.conv = Conv2dHeadModel( image_shape=image_shape, channels=channels or [16, 32], kernel_sizes=kernel_sizes or [8, 4], strides=strides or [4, 2], paddings=paddings or [0, 1], use_maxpool=use_maxpool, hidden_sizes=fc_sizes, # Applies nonlinearity at end. ) self.use_interest = use_interest self.pi = DiscreteIntraOptionPolicy(self.conv.output_size, option_size, output_size, ortho_init=baselines_init) self.q = torch.nn.Linear(self.conv.output_size, option_size) self.beta = torch.nn.Sequential( torch.nn.Linear(self.conv.output_size, option_size), torch.nn.Sigmoid()) self.pi_omega = torch.nn.Sequential( torch.nn.Linear(self.conv.output_size, option_size), torch.nn.Softmax(-1)) self.pi_omega_I = torch.nn.Sequential( torch.nn.Linear(self.conv.output_size, option_size), torch.nn.Sigmoid()) if use_interest else Dummy(option_size)
def __init__( self, image_shape, output_size, fc_sizes=512, use_maxpool=False, channels=None, # None uses default. kernel_sizes=None, strides=None, paddings=None, ): super().__init__() self.conv = Conv2dHeadModel( image_shape=image_shape, channels=channels or [32, 64, 64], kernel_sizes=kernel_sizes or [8, 4, 3], strides=strides or [4, 2, 1], paddings=paddings or [0, 0, 0], use_maxpool=use_maxpool, hidden_sizes=fc_sizes, # Applies nonlinearity at end. ) self.pi = torch.nn.Linear(self.conv.output_size, output_size) self.value = torch.nn.Linear(self.conv.output_size, 1)
def __init__( self, image_shape, output_size, fc_sizes=512, # Between conv and lstm. lstm_size=512, use_maxpool=False, channels=None, # None uses default. kernel_sizes=None, strides=None, paddings=None, curiosity_kwargs=dict(curiosity_alg='none'), obs_stats=None): """Instantiate neural net module according to inputs.""" super().__init__() self.obs_stats = obs_stats if self.obs_stats is not None: self.obs_mean, self.obs_std = self.obs_stats if curiosity_kwargs['curiosity_alg'] != 'none': if curiosity_kwargs['curiosity_alg'] == 'icm': self.curiosity_model = ICM( image_shape=image_shape, action_size=output_size, feature_encoding=curiosity_kwargs['feature_encoding'], batch_norm=curiosity_kwargs['batch_norm'], prediction_beta=curiosity_kwargs['prediction_beta'], obs_stats=self.obs_stats, forward_loss_wt=curiosity_kwargs['forward_loss_wt']) elif curiosity_kwargs['curiosity_alg'] == 'disagreement': self.curiosity_model = Disagreement( image_shape=image_shape, action_size=output_size, feature_encoding=curiosity_kwargs['feature_encoding'], batch_norm=curiosity_kwargs['batch_norm'], prediction_beta=curiosity_kwargs['prediction_beta'], obs_stats=self.obs_stats, device=curiosity_kwargs['device'], forward_loss_wt=curiosity_kwargs['forward_loss_wt']) elif curiosity_kwargs['curiosity_alg'] == 'ndigo': self.curiosity_model = NDIGO( image_shape=image_shape, action_size=output_size, obs_stats=self.obs_stats, horizon=curiosity_kwargs['pred_horizon'], feature_encoding=curiosity_kwargs['feature_encoding'], batch_norm=curiosity_kwargs['batch_norm'], num_predictors=curiosity_kwargs['num_predictors'], device=curiosity_kwargs['device'], ) elif curiosity_kwargs['curiosity_alg'] == 'rnd': self.curiosity_model = RND( image_shape=image_shape, prediction_beta=curiosity_kwargs['prediction_beta'], drop_probability=curiosity_kwargs['drop_probability'], gamma=curiosity_kwargs['gamma'], device=curiosity_kwargs['device']) if curiosity_kwargs['feature_encoding'] == 'idf': self.conv = UniverseHead( image_shape=image_shape, batch_norm=curiosity_kwargs['batch_norm']) self.conv.output_size = self.curiosity_model.feature_size elif curiosity_kwargs['feature_encoding'] == 'idf_burda': self.conv = BurdaHead( image_shape=image_shape, output_size=self.curiosity_model.feature_size, batch_norm=curiosity_kwargs['batch_norm']) self.conv.output_size = self.curiosity_model.feature_size elif curiosity_kwargs['feature_encoding'] == 'idf_maze': self.conv = MazeHead( image_shape=image_shape, output_size=self.curiosity_model.feature_size, batch_norm=curiosity_kwargs['batch_norm']) self.conv.output_size = self.curiosity_model.feature_size elif curiosity_kwargs['feature_encoding'] == 'none': self.conv = Conv2dHeadModel( image_shape=image_shape, channels=channels or [16, 32], kernel_sizes=kernel_sizes or [8, 4], strides=strides or [4, 2], paddings=paddings or [0, 1], use_maxpool=use_maxpool, hidden_sizes=fc_sizes) # Applies nonlinearity at end. else: self.conv = Conv2dHeadModel( image_shape=image_shape, channels=channels or [16, 32], kernel_sizes=kernel_sizes or [8, 4], strides=strides or [4, 2], paddings=paddings or [0, 1], use_maxpool=use_maxpool, hidden_sizes=fc_sizes, # Applies nonlinearity at end. ) self.lstm = torch.nn.LSTM(self.conv.output_size + output_size, lstm_size) self.pi = torch.nn.Linear(lstm_size, output_size) self.value = torch.nn.Linear(lstm_size, 1)