def __init__(self, obs_shape, image_obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, log_std_min, log_std_max, num_layers, num_filters): super().__init__() self.encoder = make_encoder(encoder_type, image_obs_shape, encoder_feature_dim, num_layers, num_filters, output_logits=True) self.obs_dim = obs_shape[0] print('low obs dim:', self.obs_dim, ": ", self.encoder.feature_dim) self.log_std_min = log_std_min self.log_std_max = log_std_max self.trunk = nn.Sequential( nn.Linear(self.encoder.feature_dim + self.obs_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, 2 * action_shape[0])) self.test_layer = nn.Linear(17, 17) self.outputs = dict() self.apply(weight_init)
def __init__(self, obs_shape, encoder_feature_dim, num_layers=4, num_filters=32, num_keypoints=10, sigma=0.1): super().__init__() assert len(obs_shape) == 3 self.num_layers = num_layers # keynet's image encoder. Similar to AE's image encoder self.image_encoder = make_encoder('pixel', obs_shape, encoder_feature_dim, num_layers, num_filters) self.features_to_score_maps = nn.Conv2d(in_channels=num_filters, out_channels=num_keypoints, kernel_size=1, stride=1, padding=0, dilation=1, groups=1, bias=True) self.num_keypoints = num_keypoints self.sigma = sigma self.outputs = dict()
def __init__(self, obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, log_std_min, log_std_max, num_layers, num_filters, hybrid_state_shape, two_conv): super().__init__() self.encoder = make_encoder(encoder_type, obs_shape, encoder_feature_dim, num_layers, num_filters, output_logits=True, two_conv=two_conv) self.log_std_min = log_std_min self.log_std_max = log_std_max if hybrid_state_shape is None: trunk_input_dim = self.encoder.feature_dim else: trunk_input_dim = self.encoder.feature_dim + hybrid_state_shape[0] self.trunk = nn.Sequential(nn.Linear(trunk_input_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, 2 * action_shape[0])) self.outputs = dict() self.apply(weight_init)
def __init__( self, obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, num_layers, num_filters, ): super().__init__() self.encoder = make_encoder( encoder_type, obs_shape, encoder_feature_dim, num_layers, num_filters, output_logits=True, ) self.Q1 = QFunction(self.encoder.feature_dim, action_shape[0], hidden_dim) self.Q2 = QFunction(self.encoder.feature_dim, action_shape[0], hidden_dim) self.outputs = dict() self.apply(weight_init)
def __init__( self, obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, num_layers, num_filters, key_points, sigma, key_net ): super().__init__() self.encoder = make_encoder( encoder_type, obs_shape, encoder_feature_dim, num_layers, num_filters ) self.key_net = key_net self.key_points = key_points self.Q1 = QFunction( self.encoder.feature_dim + 2 * self.key_points, action_shape[0], hidden_dim ) self.Q2 = QFunction( self.encoder.feature_dim + 2 * self.key_points, action_shape[0], hidden_dim ) self.outputs = dict() self.apply(weight_init)
def __init__( self, obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, log_std_min, log_std_max, num_layers, num_filters, key_points, sigma, key_net): super().__init__() self.encoder = make_encoder( encoder_type, obs_shape, encoder_feature_dim, num_layers, num_filters ) self.key_net = key_net self.log_std_min = log_std_min self.log_std_max = log_std_max self.key_points = key_points self.trunk = nn.Sequential( nn.Linear(self.encoder.feature_dim + 2*self.key_points, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, 2 * action_shape[0]) ) self.outputs = dict() self.apply(weight_init)
def __init__(self, obs_shape): super().__init__() self.encoder = make_encoder(encoder_type='pixel', obs_shape=obs_shape, feature_dim=100, num_layers=4, num_filters=32).to(device) self.decoder = make_decoder('pixel', obs_shape, 50, 4, 32).to(device) self.decoder.apply(weight_init)
def __init__(self,obs_shape, z_dim, encoder_feature_dim,hidden_dim, downsample = True): super(CURL, self).__init__() # Need to fix the encoders since I do not plan to use the critics self.encoder = make_encoder( # Nawid - Encoder of critic which is also used for the contrastive loss obs_shape, encoder_feature_dim, downsample = downsample) # Encoder target required for momentum encoding self.encoder_target = make_encoder( # Nawid - Encoder of critic which is also used for the contrastive loss - Momentum encoder obs_shape, encoder_feature_dim, downsample = downsample) self.encoder_target.load_state_dict(self.encoder.state_dict()) # copies the parameters of the encoder into the target encoder which is changing slowly self.W = nn.Parameter(torch.rand(z_dim, z_dim)) # Nawid - weight vector for the bilinear product # Part related to the dynamics self.trunk = nn.Sequential( nn.Linear(self.encoder.feature_dim + n_actions, hidden_dim),nn.ReLU(), # Size of the input is related to the encoder output as well as the concatenated one hot vector for the actions nn.Linear(hidden_dim, hidden_dim), ) self.W_skip = nn.Parameter(torch.rand(self.encoder.feature_dim, hidden_dim)) # Nawid - weight vector for the skip connection self.output_linear = nn.Linear(hidden_dim, z_dim) self.apply(weight_init)
def __init__(self, obs_shape, encoder_feature_dim, num_layers, num_filters, num_keypoints, sigma): super(IMM, self).__init__() self.ImageEncoder = make_encoder('pixel', obs_shape, encoder_feature_dim, num_layers, num_filters) self.KeyNet = make_keynet(obs_shape, encoder_feature_dim, num_layers, num_filters, num_keypoints, sigma) self.GenNet = make_gennet( obs_shape, num_filters, num_keypoints, encoded_image_size=self.KeyNet.image_encoder.out_dim) self.output = None
def __init__(self, obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, num_layers, num_filters, hybrid_state_shape, two_conv): super().__init__() self.encoder = make_encoder(encoder_type, obs_shape, encoder_feature_dim, num_layers, num_filters, output_logits=True, two_conv=two_conv) if hybrid_state_shape is None: trunk_input_dim = self.encoder.feature_dim else: trunk_input_dim = self.encoder.feature_dim + hybrid_state_shape[0] self.Q1 = QFunction(trunk_input_dim, action_shape[0], hidden_dim) self.Q2 = QFunction(trunk_input_dim, action_shape[0], hidden_dim) self.outputs = dict() self.apply(weight_init)
def __init__( self, obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, log_std_min, log_std_max, num_layers, num_filters ): super().__init__() self.encoder = make_encoder( encoder_type, obs_shape, encoder_feature_dim, num_layers, num_filters, output_logits=True ) self.log_std_min = log_std_min self.log_std_max = log_std_max self.trunk = nn.Sequential( nn.Linear(self.encoder.feature_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, 2 * action_shape) ) self.outputs = dict() self.apply(weight_init)
def __init__(self, obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, log_std_min, log_std_max, num_layers, num_filters, lstm_num_layers, lstm_dropout): super().__init__() self.encoder = make_encoder(encoder_type, obs_shape, encoder_feature_dim, num_layers, num_filters, output_logits=True) self.log_std_min = log_std_min self.log_std_max = log_std_max self.lstm = nn.LSTM(self.encoder.feature_dim, hidden_dim, lstm_num_layers, dropout=lstm_dropout) self.linear_layer = nn.Linear(hidden_dim, 2 * action_shape[0]) self.outputs = dict() self.apply(weight_init)
def __init__( self, obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, log_std_min, log_std_max, num_layers, num_filters, num_mlp_layers, ): super().__init__() self.encoder = make_encoder( encoder_type, obs_shape, encoder_feature_dim, num_layers, num_filters, output_logits=True ) self.log_std_min = log_std_min self.log_std_max = log_std_max self.trunk = D2RLNetwork( self.encoder.feature_dim, hidden_dim, 2*action_shape[0], num_mlp_layers ) self.outputs = dict() self.apply(weight_init)
def __init__(self, obs_dim, action_dim, action_range, device, encoder_type, critic_cfg, actor_cfg, discount, init_temperature, alpha_lr, l2_regularizer_weight, alpha_betas, actor_lr, actor_betas, actor_update_frequency, critic_lr, critic_betas, critic_tau, num_envs, encoder_feature_dim, penalty_type, critic_target_update_frequency, encoder_batch_size, sac_batch_size, penalty_anneal_iters, penalty_weight): super().__init__() self.action_range = action_range self.device = torch.device(device) self.discount = discount self.critic_tau = critic_tau self.actor_update_frequency = actor_update_frequency self.critic_target_update_frequency = critic_target_update_frequency self.encoder_batch_size = encoder_batch_size self.sac_batch_size = sac_batch_size self.num_envs = num_envs self.l2_regularizer_weight = l2_regularizer_weight self.penalty_anneal_iters = penalty_anneal_iters self.penalty_weight = penalty_weight self.penalty_type = penalty_type self.encoder = make_encoder(encoder_type, obs_dim, encoder_feature_dim, 2, 32).to(self.device) self.reward_model = nn.Sequential(nn.Linear(encoder_feature_dim, 200), nn.ReLU(), nn.Linear(200, 200), nn.ReLU(), nn.Linear(200, 1)).to(device) self.model = make_dynamics_model(encoder_feature_dim, 200, action_dim).to(device) self.critic = hydra.utils.instantiate(critic_cfg).to(self.device) self.critic.encoder = self.encoder self.critic_target = hydra.utils.instantiate(critic_cfg).to( self.device) self.critic_target.encoder = make_encoder(encoder_type, obs_dim, encoder_feature_dim, 2, 32).to(self.device) self.critic_target.load_state_dict(self.critic.state_dict()) self.actor = hydra.utils.instantiate(actor_cfg).to(self.device) self.actor.encoder = self.encoder self.log_alpha = torch.tensor(np.log(init_temperature)).to(self.device) self.log_alpha.requires_grad = True # set target entropy to -|A| self.target_entropy = -action_dim # optimizers self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=actor_lr, betas=actor_betas) self.critic_optimizer = torch.optim.Adam(list( self.critic.parameters()), lr=critic_lr, betas=critic_betas) self.log_alpha_optimizer = torch.optim.Adam([self.log_alpha], lr=alpha_lr, betas=alpha_betas) self.decoder_optimizer = torch.optim.Adam( list(self.model.parameters()) + list(self.reward_model.parameters()) + list(self.encoder.parameters()), lr=1e-4, weight_decay=1e-5) self.train() self.critic_target.train()
def __init__( self, obs_dim, action_dim, action_range, device, encoder_type, encoder_feature_dim, critic_cfg, actor_cfg, discount, init_temperature, alpha_lr, alpha_betas, actor_lr, actor_betas, actor_update_frequency, critic_lr, critic_betas, critic_tau, critic_target_update_frequency, batch_size, ): super().__init__() self.action_range = action_range self.device = torch.device(device) self.discount = discount self.critic_tau = critic_tau self.actor_update_frequency = actor_update_frequency self.critic_target_update_frequency = critic_target_update_frequency self.batch_size = batch_size self.encoder = make_encoder( encoder_type, obs_dim, encoder_feature_dim, 2, 32 ).to(self.device) self.critic = hydra.utils.instantiate(critic_cfg).to(self.device) self.critic.encoder = self.encoder self.critic_target = hydra.utils.instantiate(critic_cfg).to(self.device) self.critic_target.encoder = self.encoder self.critic_target.load_state_dict(self.critic.state_dict()) self.actor = hydra.utils.instantiate(actor_cfg).to(self.device) self.actor.encoder = self.encoder self.log_alpha = torch.tensor(np.log(init_temperature)).to(self.device) self.log_alpha.requires_grad = True # set target entropy to -|A| self.target_entropy = -action_dim # optimizers self.actor_optimizer = torch.optim.Adam( self.actor.parameters(), lr=actor_lr, betas=actor_betas ) self.critic_optimizer = torch.optim.Adam( self.critic.parameters(), lr=critic_lr, betas=critic_betas ) self.log_alpha_optimizer = torch.optim.Adam( [self.log_alpha], lr=alpha_lr, betas=alpha_betas ) self.train() self.critic_target.train()
def __init__( self, obs_dim, action_dim, action_range, device, encoder_type, num_envs, c_ent, kld, critic_cfg, actor_cfg, discount, init_temperature, alpha_lr, encoder_lr, c_ent_iters, alpha_betas, actor_lr, actor_betas, actor_update_frequency, decoder_lr, decoder_weight_lambda, critic_lr, critic_betas, critic_tau, encoder_feature_dim, decoder_latent_lambda, critic_target_update_frequency, batch_size, ): super().__init__() self.action_range = action_range self.device = torch.device(device) self.discount = discount self.critic_tau = critic_tau self.actor_update_frequency = actor_update_frequency self.critic_target_update_frequency = critic_target_update_frequency self.batch_size = batch_size self.num_envs = num_envs self.encoder_tau = 0.005 self.decoder_latent_lambda = decoder_latent_lambda self.encoder_type = encoder_type self.c_ent = c_ent self.c_ent_iters = c_ent_iters self.kld = kld self.encoder = make_encoder( encoder_type, obs_dim, encoder_feature_dim, 2, 32 ).to(self.device) self.task_specific_encoders = [ make_encoder(encoder_type, obs_dim, encoder_feature_dim, 2, 32).to(device) for i in range(self.num_envs) ] self.model = make_dynamics_model(encoder_feature_dim, 200, action_dim).to( device ) self.task_specific_models = [ make_dynamics_model(encoder_feature_dim, 200, action_dim).to(device) for i in range(self.num_envs) ] self.reward_model = nn.Sequential( nn.Linear(encoder_feature_dim, 200), nn.ReLU(), nn.Linear(200, 200), nn.ReLU(), nn.Linear(200, 1), ).to(device) self.decoder = nn.Sequential( nn.Linear(encoder_feature_dim * 2, 200), nn.ReLU(), nn.Linear(200, 200), nn.ReLU(), nn.Linear(200, obs_dim), ).to(device) self.classifier = nn.Sequential( nn.Linear(encoder_feature_dim, 512), nn.ReLU(), nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, self.num_envs), ).to(device) self.critic = hydra.utils.instantiate(critic_cfg).to(self.device) self.critic.encoder = self.encoder self.critic_target = hydra.utils.instantiate(critic_cfg).to(self.device) self.critic_target.encoder = make_encoder( encoder_type, obs_dim, encoder_feature_dim, 2, 32 ).to(self.device) self.critic_target.load_state_dict(self.critic.state_dict()) self.actor = hydra.utils.instantiate(actor_cfg).to(self.device) self.actor.encoder = self.encoder self.log_alpha = torch.tensor(np.log(init_temperature)).to(self.device) self.log_alpha.requires_grad = True # set target entropy to -|A| self.target_entropy = -action_dim # optimizers self.actor_optimizer = torch.optim.Adam( self.actor.parameters(), lr=actor_lr, betas=actor_betas ) self.critic_optimizer = torch.optim.Adam( list(self.critic.parameters()) + list(self.encoder.parameters()), lr=critic_lr, betas=critic_betas, ) self.log_alpha_optimizer = torch.optim.Adam( [self.log_alpha], lr=alpha_lr, betas=alpha_betas ) self.classifier_optimizer = torch.optim.Adam( self.classifier.parameters(), lr=actor_lr, betas=actor_betas ) # optimizer for critic encoder for reconstruction loss self.encoder_optimizer = torch.optim.Adam( self.critic.encoder.parameters(), lr=encoder_lr ) # optimizer for decoder task_specific_parameters = [ params for t in (self.task_specific_encoders + self.task_specific_models) for params in list(t.parameters()) ] self.decoder_optimizer = torch.optim.Adam( list(self.decoder.parameters()) + list(self.model.parameters()) + list(self.reward_model.parameters()) + task_specific_parameters + list(self.encoder.parameters()), lr=decoder_lr, weight_decay=decoder_weight_lambda, ) self.train() self.critic_target.train()