def _get_convolution_net(in_channels: int, history_len: int = 1, channels: List = None, kernel_sizes: List = None, strides: List = None, use_bias: bool = False, use_groups: bool = False, use_normalization: bool = False, use_dropout: bool = False, activation: str = "ReLU") -> nn.Module: channels = channels or [32, 64, 32] kernel_sizes = kernel_sizes or [8, 4, 3] strides = strides or [4, 2, 1] activation_fn = torch.nn.__dict__[activation] assert len(channels) == len(kernel_sizes) == len(strides) def _get_block(**conv_params): layers = [nn.Conv2d(**conv_params)] if use_normalization: layers.append(nn.InstanceNorm2d(conv_params["out_channels"])) if use_dropout: layers.append(nn.Dropout2d(p=0.1)) layers.append(activation_fn(inplace=True)) return layers channels.insert(0, history_len * in_channels) params = [] for i, (in_channels, out_channels) in enumerate(utils.pairwise(channels)): num_groups = 1 if use_groups: num_groups = history_len if i == 0 else 4 params.append({ "in_channels": in_channels, "out_channels": out_channels, "bias": use_bias, "kernel_size": kernel_sizes[i], "stride": strides[i], "groups": num_groups, }) layers = [] for block_params in params: layers.extend(_get_block(**block_params)) net = nn.Sequential(*layers) net.apply(utils.create_optimal_inner_init(activation_fn)) # input_shape: tuple = (3, 84, 84) # conv_input = torch.Tensor(torch.randn((1,) + input_shape)) # conv_output = net(conv_input) # torch.Size([1, 32, 7, 7]), 1568 # print(conv_output.shape, conv_output.nelement()) return net
def _get_convolution_net( in_channels: int, history_len: int = 1, channels: List = None, use_bias: bool = False, use_groups: bool = False, use_normalization: bool = False, use_dropout: bool = False, activation: str = "ReLU" ) -> nn.Module: channels = channels or [12, 3, 12] activation_fn = torch.nn.__dict__[activation] def _get_block(**conv_params): layers = [nn.Conv2d(**conv_params)] if use_normalization: layers.append(nn.InstanceNorm2d(conv_params["out_channels"])) if use_dropout: layers.append(nn.Dropout2d(p=0.1)) layers.append(activation_fn(inplace=True)) layers.append(nn.MaxPool2d(2)) return layers channels.insert(0, history_len * in_channels) params = [] for i, (in_channels, out_channels) in enumerate(utils.pairwise(channels)): num_groups = 1 if use_groups: num_groups = history_len if i == 0 else 4 params.append( { "in_channels": in_channels, "out_channels": out_channels, "bias": use_bias, "kernel_size": 3, "stride": 1, "padding": 1, "groups": num_groups, } ) layers = [] for block_params in params: layers.extend(_get_block(**block_params)) net = nn.Sequential(*layers) net.apply(utils.initialization.get_optimal_inner_init(activation_fn)) return net
def get_convolution_net(in_channels: int, history_len: int = 1, channels: List = None, kernel_sizes: List = None, strides: List = None, groups: List = None, use_bias: bool = False, normalization: str = None, dropout_rate: float = None, activation: str = "ReLU") -> nn.Module: channels = channels or [32, 64, 64] kernel_sizes = kernel_sizes or [8, 4, 3] strides = strides or [4, 2, 1] groups = groups or [1, 1, 1] activation_fn = nn.__dict__[activation] assert len(channels) == len(kernel_sizes) == len(strides) == len(groups) def _get_block(**conv_params): layers = [nn.Conv2d(**conv_params)] if normalization is not None: normalization_fn = MODULES.get_if_str(normalization) layers.append(normalization_fn(conv_params["out_channels"])) if dropout_rate is not None: layers.append(nn.Dropout2d(p=dropout_rate)) layers.append(activation_fn(inplace=True)) return layers channels.insert(0, history_len * in_channels) params = [] for i, (in_channels, out_channels) in enumerate(utils.pairwise(channels)): params.append({ "in_channels": in_channels, "out_channels": out_channels, "bias": use_bias, "kernel_size": kernel_sizes[i], "stride": strides[i], "groups": groups[i], }) layers = [] for block_params in params: layers.extend(_get_block(**block_params)) net = nn.Sequential(*layers) net.apply(utils.create_optimal_inner_init(activation_fn)) return net
def _get_linear_net( in_features: int, history_len: int = 1, features: List = None, use_bias: bool = False, use_normalization: bool = False, use_dropout: bool = False, activation: str = "ReLU" ) -> nn.Module: features = features or [12, 128, 12] activation_fn = torch.nn.__dict__[activation] def _get_block(**linear_params): layers = [nn.Linear(**linear_params)] if use_normalization: layers.append(nn.LayerNorm(linear_params["out_features"])) if use_dropout: layers.append(nn.Dropout(p=0.1)) layers.append(activation_fn(inplace=True)) return layers features.insert(0, history_len * in_features) params = [] for i, (in_features, out_features) in enumerate(utils.pairwise(features)): params.append( { "in_features": in_features, "out_features": out_features, "bias": use_bias, } ) layers = [] for block_params in params: layers.extend(_get_block(**block_params)) net = nn.Sequential(*layers) net.apply(utils.initialization.get_optimal_inner_init(activation_fn)) return net
def __init__( self, hiddens, layer_fn: Union[str, Dict, List], norm_fn: Union[str, Dict, List] = None, dropout_fn: Union[str, Dict, List] = None, activation_fn: Union[str, Dict, List] = None, residual: Union[bool, str] = False, layer_order: List = None, ): super().__init__() assert len(hiddens) > 1, "No sequence found" # layer params layer_fn = _process_additional_params(layer_fn, hiddens[1:]) # normalization params norm_fn = _process_additional_params(norm_fn, hiddens[1:]) # dropout params dropout_fn = _process_additional_params(dropout_fn, hiddens[1:]) # activation params activation_fn = _process_additional_params(activation_fn, hiddens[1:]) if isinstance(residual, bool) and residual: residual = "hard" residual = _process_additional_params(residual, hiddens[1:]) layer_order = layer_order or ["layer", "norm", "drop", "act"] def _layer_fn(layer_fn, f_in, f_out, **kwargs): layer_fn = MODULES.get_if_str(layer_fn) layer_fn = layer_fn(f_in, f_out, **kwargs) return layer_fn def _normalization_fn(normalization_fn, f_in, f_out, **kwargs): normalization_fn = MODULES.get_if_str(normalization_fn) normalization_fn = \ normalization_fn(f_out, **kwargs) \ if normalization_fn is not None \ else None return normalization_fn def _dropout_fn(dropout_fn, f_in, f_out, **kwargs): dropout_fn = MODULES.get_if_str(dropout_fn) dropout_fn = dropout_fn(**kwargs) \ if dropout_fn is not None \ else None return dropout_fn def _activation_fn(activation_fn, f_in, f_out, **kwargs): activation_fn = MODULES.get_if_str(activation_fn) activation_fn = activation_fn(**kwargs) \ if activation_fn is not None \ else None return activation_fn name2fn = { "layer": _layer_fn, "norm": _normalization_fn, "drop": _dropout_fn, "act": _activation_fn, } name2params = { "layer": layer_fn, "norm": norm_fn, "drop": dropout_fn, "act": activation_fn, } net = [] for i, (f_in, f_out) in enumerate(utils.pairwise(hiddens)): block = [] for key in layer_order: sub_fn = name2fn[key] sub_params = deepcopy(name2params[key][i]) if isinstance(sub_params, Dict): sub_module = sub_params.pop("module") else: sub_module = sub_params sub_params = {} sub_block = sub_fn(sub_module, f_in, f_out, **sub_params) if sub_block is not None: block.append((f"{key}", sub_block)) block_ = OrderedDict(block) block = torch.nn.Sequential(block_) if block_.get("act", None) is not None: activation = block_["act"] activation_init = \ utils.get_optimal_inner_init(nonlinearity=activation) block.apply(activation_init) if residual == "hard" or (residual == "soft" and f_in == f_out): block = ResidualWrapper(net=block) net.append((f"block_{i}", block)) self.net = torch.nn.Sequential(OrderedDict(net))
def __init__( self, hiddens, layer_fn: Union[str, Dict, List], norm_fn: Union[str, Dict, List] = None, dropout_fn: Union[str, Dict, List] = None, activation_fn: Union[str, Dict, List] = None, residual: Union[bool, str] = False, layer_order: List = None, ): """@TODO: Docs. Contribution is welcome.""" super().__init__() assert len(hiddens) > 1, "No sequence found" # layer params layer_fn = _process_additional_params(layer_fn, hiddens[1:]) # normalization params norm_fn = _process_additional_params(norm_fn, hiddens[1:]) # dropout params dropout_fn = _process_additional_params(dropout_fn, hiddens[1:]) # activation params activation_fn = _process_additional_params(activation_fn, hiddens[1:]) if isinstance(residual, bool) and residual: residual = "hard" residual = _process_additional_params(residual, hiddens[1:]) layer_order = layer_order or ["layer", "norm", "drop", "act"] name2fn = { "layer": _layer_fn, "norm": _normalization_fn, "drop": _dropout_fn, "act": _activation_fn, } name2params = { "layer": layer_fn, "norm": norm_fn, "drop": dropout_fn, "act": activation_fn, } net = [] for i, (f_in, f_out) in enumerate(utils.pairwise(hiddens)): block_list = [] for key in layer_order: sub_fn = name2fn[key] sub_params = deepcopy(name2params[key][i]) if isinstance(sub_params, Dict): sub_module = sub_params.pop("module") else: sub_module = sub_params sub_params = {} sub_block = sub_fn(sub_module, f_in, f_out, **sub_params) if sub_block is not None: block_list.append((f"{key}", sub_block)) block_dict = OrderedDict(block_list) block_net = torch.nn.Sequential(block_dict) if block_dict.get("act", None) is not None: activation = block_dict["act"] activation_init = utils.get_optimal_inner_init( nonlinearity=activation) block_net.apply(activation_init) if residual == "hard" or (residual == "soft" and f_in == f_out): block_net = ResidualWrapper(net=block_net) net.append((f"block_{i}", block_net)) self.net = torch.nn.Sequential(OrderedDict(net))