def __init__( self, observation_space, hidden_size=512, backbone_name='efficientnet-b7', pretrained=False, finetune=False, normalize_visual_inputs=False, ): super().__init__() if "rgb" in observation_space.spaces: self._n_input_rgb = observation_space.spaces["rgb"].shape[2] else: self._n_input_rgb = 0 if "depth" in observation_space.spaces: self._n_input_depth = observation_space.spaces["depth"].shape[2] else: self._n_input_depth = 0 if normalize_visual_inputs: self.running_mean_and_var = RunningMeanAndVar(self._n_input_depth + self._n_input_rgb) else: self.running_mean_and_var = nn.Sequential() if not self.is_blind: input_channels = self._n_input_depth + self._n_input_rgb self.backbone = efficientnet(backbone_name, pretrained, input_channels, finetune, hidden_size) self.output_shape = hidden_size
def __init__( self, observation_space: spaces.Dict, baseplanes: int = 32, ngroups: int = 32, spatial_size: int = 128, make_backbone=None, normalize_visual_inputs: bool = False, ): super().__init__() if "rgb" in observation_space.spaces: self._n_input_rgb = observation_space.spaces["rgb"].shape[2] spatial_size = observation_space.spaces["rgb"].shape[0] // 2 else: self._n_input_rgb = 0 if "depth" in observation_space.spaces: self._n_input_depth = observation_space.spaces["depth"].shape[2] spatial_size = observation_space.spaces["depth"].shape[0] // 2 else: self._n_input_depth = 0 if normalize_visual_inputs: self.running_mean_and_var: nn.Module = RunningMeanAndVar( self._n_input_depth + self._n_input_rgb ) else: self.running_mean_and_var = nn.Sequential() if not self.is_blind: input_channels = self._n_input_depth + self._n_input_rgb self.backbone = make_backbone(input_channels, baseplanes, ngroups) final_spatial = int( spatial_size * self.backbone.final_spatial_compress ) after_compression_flat_size = 2048 num_compression_channels = int( round(after_compression_flat_size / (final_spatial ** 2)) ) self.compression = nn.Sequential( nn.Conv2d( self.backbone.final_channels, num_compression_channels, kernel_size=3, padding=1, bias=False, ), nn.GroupNorm(1, num_compression_channels), nn.ReLU(True), ) self.output_shape = ( num_compression_channels, final_spatial, final_spatial, )
def __init__( self, actor_critic, clip_param, ppo_epoch, num_mini_batch, value_loss_coef, entropy_coef, use_aux_losses, lr=None, eps=None, max_grad_norm=None, use_clipped_value_loss=True, use_normalized_advantage=True, ): super().__init__() self.actor_critic = actor_critic self.clip_param = clip_param self.ppo_epoch = ppo_epoch self.num_mini_batch = num_mini_batch self.value_loss_coef = value_loss_coef self.entropy_coef = entropy_coef self.max_grad_norm = max_grad_norm self.use_clipped_value_loss = use_clipped_value_loss self.optimizer = optim.Adam( list(filter(lambda p: p.requires_grad, actor_critic.parameters())), lr=lr, eps=eps, ) self.device = next(actor_critic.parameters()).device self.use_normalized_advantage = use_normalized_advantage self.reward_whitten = RunningMeanAndVar(shape=(1, )) self.reward_whitten.to(self.device) self.use_aux_losses = use_aux_losses
class PPO(nn.Module): def __init__( self, actor_critic, clip_param, ppo_epoch, num_mini_batch, value_loss_coef, entropy_coef, use_aux_losses, lr=None, eps=None, max_grad_norm=None, use_clipped_value_loss=True, use_normalized_advantage=True, ): super().__init__() self.actor_critic = actor_critic self.clip_param = clip_param self.ppo_epoch = ppo_epoch self.num_mini_batch = num_mini_batch self.value_loss_coef = value_loss_coef self.entropy_coef = entropy_coef self.max_grad_norm = max_grad_norm self.use_clipped_value_loss = use_clipped_value_loss self.optimizer = optim.Adam( list(filter(lambda p: p.requires_grad, actor_critic.parameters())), lr=lr, eps=eps, ) self.device = next(actor_critic.parameters()).device self.use_normalized_advantage = use_normalized_advantage self.reward_whitten = RunningMeanAndVar(shape=(1, )) self.reward_whitten.to(self.device) self.use_aux_losses = use_aux_losses def forward(self, *x): raise NotImplementedError def get_advantages(self, rollouts): advantages = rollouts.returns[:-1] - rollouts.value_preds[:-1] if not self.use_normalized_advantage: return advantages return (advantages - advantages.mean()) / (advantages.std() + EPS_PPO) def update(self, rollouts): advantages = self.get_advantages(rollouts) value_loss_epoch = 0 action_loss_epoch = 0 aux_losses_epoch = 0 dist_entropy_epoch = 0 AuxLosses.activate() for e in range(self.ppo_epoch): data_generator = rollouts.recurrent_generator( advantages, self.num_mini_batch) for sample in data_generator: ( obs_batch, prev_obs_batch, recurrent_hidden_states_batch, actions_batch, prev_actions_batch, value_preds_batch, return_batch, masks_batch, old_action_log_probs_batch, adv_targ, ) = sample AuxLosses.clear() # Reshape to do in a single forward pass for all steps ( values, action_log_probs, dist_entropy, ) = self.actor_critic.evaluate_actions( obs_batch, prev_obs_batch, recurrent_hidden_states_batch, prev_actions_batch, masks_batch, actions_batch, ) ratio = torch.exp(action_log_probs - old_action_log_probs_batch) surr1 = ratio * adv_targ surr2 = (torch.clamp(ratio, 1.0 - self.clip_param, 1.0 + self.clip_param) * adv_targ) action_loss = -torch.min(surr1, surr2).mean() if self.use_clipped_value_loss: value_pred_clipped = value_preds_batch + ( values - value_preds_batch).clamp( -self.clip_param, self.clip_param) value_losses = (values - return_batch).pow(2) value_losses_clipped = (value_pred_clipped - return_batch).pow(2) value_loss = ( 0.5 * torch.max(value_losses, value_losses_clipped).mean()) else: value_loss = 0.5 * (return_batch - values).pow(2).mean() self.optimizer.zero_grad() total_loss = (value_loss * self.value_loss_coef + action_loss - dist_entropy * self.entropy_coef) use_aux_loss = self.use_aux_losses if use_aux_loss: aux_losses = AuxLosses.reduce() else: aux_losses = AuxLosses.get_loss( "information") * AuxLosses._loss_alphas["information"] aux_losses_epoch += aux_losses.item() total_loss = total_loss + aux_losses self.before_backward(total_loss) total_loss.backward() self.after_backward(total_loss) self.before_step() self.optimizer.step() self.after_step() value_loss_epoch += value_loss.item() action_loss_epoch += action_loss.item() dist_entropy_epoch += dist_entropy.item() num_updates = self.ppo_epoch * self.num_mini_batch value_loss_epoch /= num_updates action_loss_epoch /= num_updates dist_entropy_epoch /= num_updates aux_losses_epoch /= num_updates AuxLosses.deactivate() return ( value_loss_epoch, action_loss_epoch, dist_entropy_epoch, aux_losses_epoch, ) def before_backward(self, loss): pass def after_backward(self, loss): pass def before_step(self): nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm) def after_step(self): pass
def __init__( self, observation_space, baseplanes=32, ngroups=32, spatial_size=128, make_backbone=None, normalize_visual_inputs=False, obs_transform=None, ): super().__init__() obs_transform = None self.obs_transform = obs_transform if self.obs_transform is not None: observation_space = self.obs_transform.transform_observation_space( observation_space ) if "rgb" in observation_space.spaces: self._n_input_rgb = observation_space.spaces["rgb"].shape[2] spatial_size = observation_space.spaces["rgb"].shape[0:2] else: self._n_input_rgb = 0 if "depth" in observation_space.spaces: self._n_input_depth = observation_space.spaces["depth"].shape[2] spatial_size = observation_space.spaces["depth"].shape[0:2] else: self._n_input_depth = 0 if normalize_visual_inputs: self.running_mean_and_var = RunningMeanAndVar( self._n_input_depth + self._n_input_rgb ) else: self.running_mean_and_var = nn.Sequential() if not self.is_blind: self.initial_pool = nn.AvgPool2d(3) input_channels = self._n_input_depth + self._n_input_rgb self.backbone = make_backbone(input_channels, baseplanes, ngroups) spatial_size = tuple(int((s - 1) // 3 + 1) for s in spatial_size) for _ in range(self.backbone.spatial_compression_steps): spatial_size = tuple(int((s - 1) // 2 + 1) for s in spatial_size) self.output_shape = ( self.backbone.final_channels, spatial_size[0], spatial_size[1], ) after_compression_flat_size = 2048 num_compression_channels = int( round(after_compression_flat_size / np.prod(spatial_size)) ) self.compression = nn.Sequential( nn.Conv2d( self.output_shape[0], num_compression_channels, kernel_size=3, padding=1, bias=False, ), nn.GroupNorm(1, num_compression_channels), nn.ReLU(True), ) compression_shape = list(self.output_shape) compression_shape[0] = num_compression_channels self.compression_shape = tuple(compression_shape)
def __init__( self, observation_space, baseplanes=32, ngroups=32, spatial_size=128, make_backbone=None, normalize_visual_inputs=False, obs_transform=ResizeCenterCropper(size=(256, 256)), # noqa: B008 ): super().__init__() self.obs_transform = obs_transform if self.obs_transform is not None: observation_space = self.obs_transform.transform_observation_space( observation_space) if "rgb" in observation_space.spaces: self._n_input_rgb = observation_space.spaces["rgb"].shape[2] spatial_size = observation_space.spaces["rgb"].shape[0] // 2 else: self._n_input_rgb = 0 if "depth" in observation_space.spaces: self._n_input_depth = observation_space.spaces["depth"].shape[2] spatial_size = observation_space.spaces["depth"].shape[0] // 2 else: self._n_input_depth = 0 if normalize_visual_inputs: self.running_mean_and_var = RunningMeanAndVar(self._n_input_depth + self._n_input_rgb) else: self.running_mean_and_var = nn.Sequential() if not self.is_blind: input_channels = self._n_input_depth + self._n_input_rgb self.backbone = make_backbone(input_channels, baseplanes, ngroups) final_spatial = int(spatial_size * self.backbone.final_spatial_compress) after_compression_flat_size = 2048 num_compression_channels = int( round(after_compression_flat_size / (final_spatial**2))) self.compression = nn.Sequential( nn.Conv2d( self.backbone.final_channels, num_compression_channels, kernel_size=3, padding=1, bias=False, ), nn.GroupNorm(1, num_compression_channels), nn.ReLU(True), ) self.output_shape = ( num_compression_channels, final_spatial, final_spatial, )