def __init__( self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, normalize_images: bool = True, n_critics: int = 2, film_critic: bool = False, num_env_params: int = 0, share_features_extractor: bool = True, ): super().__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, ) action_dim = get_action_dim(self.action_space) self.share_features_extractor = share_features_extractor self.n_critics = n_critics self.q_networks = [] self.film = film_critic for idx in range(n_critics): q_net = create_mlp(features_dim + action_dim, 1, net_arch, activation_fn) q_net = nn.Sequential(*q_net) self.add_module(f"qf{idx}", q_net) self.q_networks.append(q_net)
def __init__( self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, features_extractor: nn.Module, features_dim: int, net_arch: Optional[List[int]] = None, activation_fn: Type[nn.Module] = nn.ReLU, normalize_images: bool = True, ): super(QNetwork, self).__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, ) if net_arch is None: net_arch = [64, 64] self.net_arch = net_arch self.activation_fn = activation_fn self.features_extractor = features_extractor self.features_dim = features_dim self.normalize_images = normalize_images action_dim = self.action_space.n # number of actions q_net = create_mlp(self.features_dim, action_dim, self.net_arch, self.activation_fn) self.q_net = nn.Sequential(*q_net)
def __init__( self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, normalize_images: bool = True, device: Union[th.device, str] = "auto", ): super(Actor, self).__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, device=device, squash_output=True, ) self.features_extractor = features_extractor self.normalize_images = normalize_images self.net_arch = net_arch self.features_dim = features_dim self.activation_fn = activation_fn action_dim = get_action_dim(self.action_space) actor_net = create_mlp(features_dim, action_dim, net_arch, activation_fn, squash_output=True) # Deterministic action self.mu = nn.Sequential(*actor_net)
def __init__(self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, use_sde: bool = False, log_std_init: float = -3, full_std: bool = True, sde_net_arch: Optional[List[int]] = None, use_expln: bool = False, clip_mean: float = 2.0, normalize_images: bool = True, device: Union[th.device, str] = 'auto'): super(Actor, self).__init__(observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, device=device, squash_output=True) # Save arguments to re-create object at loading self.use_sde = use_sde self.sde_features_extractor = None self.sde_net_arch = sde_net_arch self.net_arch = net_arch self.features_dim = features_dim self.activation_fn = activation_fn self.log_std_init = log_std_init self.sde_net_arch = sde_net_arch self.use_expln = use_expln self.full_std = full_std self.clip_mean = clip_mean action_dim = get_action_dim(self.action_space) latent_pi_net = create_mlp(features_dim, -1, net_arch, activation_fn) self.latent_pi = nn.Sequential(*latent_pi_net) last_layer_dim = net_arch[-1] if len(net_arch) > 0 else features_dim if self.use_sde: latent_sde_dim = last_layer_dim # Separate feature extractor for gSDE if sde_net_arch is not None: self.sde_features_extractor, latent_sde_dim = create_sde_features_extractor(features_dim, sde_net_arch, activation_fn) self.action_dist = StateDependentNoiseDistribution(action_dim, full_std=full_std, use_expln=use_expln, learn_features=True, squash_output=True) self.mu, self.log_std = self.action_dist.proba_distribution_net(latent_dim=last_layer_dim, latent_sde_dim=latent_sde_dim, log_std_init=log_std_init) # Avoid numerical issues by limiting the mean of the Gaussian # to be in [-clip_mean, clip_mean] if clip_mean > 0.0: self.mu = nn.Sequential(self.mu, nn.Hardtanh(min_val=-clip_mean, max_val=clip_mean)) else: self.action_dist = SquashedDiagGaussianDistribution(action_dim) self.mu = nn.Linear(last_layer_dim, action_dim) self.log_std = nn.Linear(last_layer_dim, action_dim)
def __init__(self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, normalize_images: bool = True, device: Union[th.device, str] = 'auto'): super(Critic, self).__init__(observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, device=device) action_dim = get_action_dim(self.action_space) q1_net = create_mlp(features_dim + action_dim, 1, net_arch, activation_fn) self.q1_net = nn.Sequential(*q1_net) q2_net = create_mlp(features_dim + action_dim, 1, net_arch, activation_fn) self.q2_net = nn.Sequential(*q2_net) self.q_networks = [self.q1_net, self.q2_net]
def __init__(self, observation_space: gym.Space, obs_unwrapper_function: Callable, obs_space_dict: Dict[str, gym.Space], normalize_images: bool, features_dim: int = 96): super().__init__(observation_space, obs_unwrapper_function, obs_space_dict, normalize_images, features_dim) self.cnn_extractor = NatureCNN(obs_space_dict['pov'], features_dim=80) self.camera_angle_dim = get_first_dim_from_shape( obs_space_dict['cameraAngle'].shape) camera_angle_modules = create_mlp(input_dim=self.camera_angle_dim, output_dim=8, net_arch=[10]) self.camera_angle_extractor = nn.Sequential(*camera_angle_modules) self.dirt_inventory_dim = get_first_dim_from_shape( obs_space_dict['inventory']['dirt'].shape) dirt_inventory_modules = create_mlp(input_dim=self.dirt_inventory_dim, output_dim=8, net_arch=[10]) self.dirt_inventory_extractor = nn.Sequential(*dirt_inventory_modules)
def __init__( self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, features_extractor: nn.Module, features_dim: int, latent_dim: int, net_arch: Optional[List[int]] = None, activation_fn: Type[nn.Module] = nn.ReLU, normalize_images: bool = True, ): super(VAE, self).__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, ) if net_arch is None: net_arch = [64] else: net_arch = [64] self.net_arch = net_arch self.activation_fn = activation_fn self.features_extractor = features_extractor self.features_dim = features_dim self.latent_dim = latent_dim self.normalize_images = normalize_images encoder = create_mlp(self.features_dim, 64, self.net_arch, self.activation_fn) self.encoder = nn.Sequential(*encoder) self.fc_mu = nn.Linear(64, self.latent_dim) self.fc_logsigma = nn.Linear(64, self.latent_dim) self.net_arch = [64, 64] decoder = create_mlp(self.latent_dim, self.features_dim, self.net_arch, self.activation_fn, True) self.decoder = nn.Sequential(*decoder)
def create_sde_features_extractor( features_dim: int, sde_net_arch: List[int], activation_fn: Type[nn.Module] ) -> Tuple[nn.Sequential, int]: """ Create the neural network that will be used to extract features for the gSDE exploration function. :param features_dim: :param sde_net_arch: :param activation_fn: :return: """ # Special case: when using states as features (i.e. sde_net_arch is an empty list) # don't use any activation function sde_activation = activation_fn if len(sde_net_arch) > 0 else None latent_sde_net = create_mlp(features_dim, -1, sde_net_arch, activation_fn=sde_activation, squash_output=False) latent_sde_dim = sde_net_arch[-1] if len(sde_net_arch) > 0 else features_dim sde_features_extractor = nn.Sequential(*latent_sde_net) return sde_features_extractor, latent_sde_dim
def __init__(self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, use_sde: bool = False, log_std_init: float = -3, full_std: bool = True, sde_net_arch: Optional[List[int]] = None, use_expln: bool = False, clip_mean: float = 2.0, normalize_images: bool = True, device: Union[th.device, str] = 'auto'): super(Actor, self).__init__(observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, device=device, squash_output=True) # Save arguments to re-create object at loading self.use_sde = use_sde self.sde_features_extractor = None self.sde_net_arch = sde_net_arch self.net_arch = net_arch self.features_dim = features_dim self.activation_fn = activation_fn self.log_std_init = log_std_init self.sde_net_arch = sde_net_arch self.use_expln = use_expln self.full_std = full_std self.clip_mean = clip_mean action_dim = get_action_dim(self.action_space) latent_pi_net = create_mlp(features_dim, -1, net_arch, activation_fn) self.latent_pi = nn.Sequential(*latent_pi_net) last_layer_dim = net_arch[-1] if len(net_arch) > 0 else features_dim self.mu = nn.Linear(last_layer_dim, action_dim) self.log_std = nn.Linear(last_layer_dim, action_dim)
def __init__( self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, normalize_images: bool = True, share_features_extractor: bool = True, action_dist_num=32, ): super().__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, ) self.share_features_extractor = share_features_extractor self.n_cos = 64 self.action_dist_num = action_dist_num self.first_hidden_size = net_arch[0] self.pis = th.FloatTensor([ np.pi * i for i in range(1, self.n_cos + 1) ]).view(1, 1, self.n_cos).to(self.device) action_dim = get_action_dim(self.action_space) net = create_mlp(features_dim + action_dim, 1, net_arch, activation_fn) self.net_head = nn.Sequential(*net[0:2]) self.net_cos_embedding = nn.Sequential( nn.Linear(self.n_cos, self.first_hidden_size), activation_fn()) self.net_out = nn.Sequential(*net[2:]) self.add_module("net_head", self.net_head) self.add_module("net_cos_embedding", self.net_cos_embedding) self.add_module("net_out", self.net_out)
def __init__( self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, mask_policy: int = 0, normalize_images: bool = True, ): super(Actor, self).__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, squash_output=True, ) self.net_arch = net_arch self.features_dim = features_dim self.activation_fn = activation_fn action_dim = get_action_dim(self.action_space) actor_net = create_mlp(features_dim, action_dim, net_arch, activation_fn, squash_output=True) # Deterministic action self.mu = nn.Sequential(*actor_net) policy_mask = [True] * features_dim for i in range(1, mask_policy + 1): policy_mask[-i] = False self.policy_mask = th.Tensor(policy_mask)
def __init__(self, observation_space: gym.Space, obs_unwrapper_function: Callable, obs_space_dict: Dict[str, gym.Space], normalize_images: bool, features_dim: int = 20, cnn_extractor_class: BaseFeaturesExtractor = NatureCNN, cnn_feature_dim: int = 12, mlp_net_arch: Iterable = (4, ), mlp_feature_dim: int = 6, embedding_dim: int = 6): super().__init__(observation_space, obs_unwrapper_function, obs_space_dict, normalize_images, features_dim) # This gets the string obs spaces associated with each extractor # They're stored in a nested _ separated string self.split_chars = "__" self.inferred_extractor_mapping = self.recursive_space_infer( obs_space_dict) self.cnn_spaces = self.inferred_extractor_mapping['CNN'] self.mlp_spaces = self.inferred_extractor_mapping['MLP'] self.embed_spaces = self.inferred_extractor_mapping['EMBED'] _cnn_extractors = [] total_flattened_dim = 0 # Create CNN extractors for space_designation in self.cnn_spaces: cnn_space = recursive_lookup_from_string(obs_space_dict, space_designation, self.split_chars) assert is_image_space(cnn_space) _cnn_extractors.append( cnn_extractor_class(cnn_space, cnn_feature_dim)) total_flattened_dim += cnn_feature_dim self.cnn_extractors = nn.ModuleList(_cnn_extractors) # Create MLP Extractor total_mlp_dim = 0 if len(self.mlp_spaces) > 0: for space_designation in self.mlp_spaces: mlp_space = recursive_lookup_from_string( obs_space_dict, space_designation, self.split_chars) assert isinstance(mlp_space, gym.spaces.Box) # assume if the space is multi-dimensional, we'll flatten it # before sending it to a MLP n_dim = int(np.prod(mlp_space.shape)) total_mlp_dim += n_dim self.mlp_extractor = nn.Sequential( *create_mlp(total_mlp_dim, mlp_feature_dim, mlp_net_arch)) total_flattened_dim += mlp_feature_dim else: self.mlp_extractor = None # Create Embed tables if len(self.embed_spaces) > 0: _embedding_tables = [] for space_designation in self.embed_spaces: embed_space = recursive_lookup_from_string( obs_space_dict, space_designation, self.split_chars) assert isinstance(embed_space, gym.spaces.Discrete) space_n = embed_space.n _embedding_tables.append( nn.Embedding(embedding_dim=embedding_dim, num_embeddings=space_n)) total_flattened_dim += embedding_dim self.embedding_tables = nn.ModuleList(_embedding_tables) else: self.embedding_tables = None self.projection_layer = nn.Linear(total_flattened_dim, features_dim)