def get_mlp( from_dim: int, to_dim: int, hidden_dims: List[int], layer_norm: bool, dropout: float, export_embedding: bool = False, ): layers = [] for i in range(len(hidden_dims)): dim = hidden_dims[i] layers.append(nn.Linear(from_dim, dim, True)) # Skip ReLU, LayerNorm, and dropout for the last layer if export_embedding if not (export_embedding and i == len(hidden_dims) - 1): layers.append(get_activation(Activation.RELU)) if layer_norm: layers.append(nn.LayerNorm(dim)) if dropout > 0: layers.append(nn.Dropout(dropout)) from_dim = dim if to_dim > 0: layers.append(nn.Linear(from_dim, to_dim, True)) return nn.Sequential(*layers)
def __init__(self, config: Config, in_dim: int, out_dim: int = 0) -> None: super().__init__(config) layers = [] for dim in config.hidden_dims or []: layers.append(nn.Linear(in_dim, dim, config.bias)) layers.append(get_activation(config.activation)) if config.layer_norm: layers.append(nn.LayerNorm(dim)) if config.dropout > 0: layers.append(nn.Dropout(config.dropout)) in_dim = dim if config.out_dim is not None: out_dim = config.out_dim if out_dim > 0: layers.append(nn.Linear(in_dim, out_dim, config.bias)) assert len(layers) > 0 if config.spectral_normalization: layers[-1] = torch.nn.utils.spectral_norm(layers[-1]) self.mlp = nn.Sequential(*layers) self.out_dim = out_dim if out_dim > 0 else config.hidden_dims[-1] self.temperature = config.temperature if config.load_model_path: with PathManager.open(config.load_model_path, "rb") as f: mlp_state = torch.load(f, map_location=lambda s, l: default_restore_location(s, "cpu")) print("loaded mlp state") self.load_state_dict(mlp_state, strict=config.load_strict) log_class_usage(__class__)
def __init__(self, config: Config, embed_dim: int) -> None: super().__init__(config) out_channels = config.cnn.kernel_num kernel_sizes = config.cnn.kernel_sizes weight_norm = config.cnn.weight_norm dilated = config.cnn.dilated causal = config.cnn.causal activation = config.activation separable = config.separable conv_layers = [] trim_layers = [] linear_layers = [] in_channels = embed_dim for i, k in enumerate(kernel_sizes): assert (k - 1) % 2 == 0 proj = (nn.Linear(in_channels, out_channels) if in_channels != out_channels else None) linear_layers.append(proj) dilation = 2**i if dilated else 1 padding = (k - 1) * dilation if causal else ( (k - 1) // 2) * dilation conv_layer = SeparableConv1d if separable else nn.Conv1d single_conv = conv_layer( in_channels, (out_channels * 2 if activation == Activation.GLU else out_channels), k, padding=padding, dilation=dilation, ) single_conv = (nn.utils.weight_norm(single_conv) if weight_norm else single_conv) conv_layers.append(single_conv) # Non-causal convolutions are centered, so they will consume # ((k - 1) // 2) * d padding on both the left and the right of the sequence. # Causal convolutions are shifted to the left (to account for temporal # ordering), so they will only consume padding from the left. Therefore, # we pad this side with the full amount (k - 1) * d. trim = Trim1d(padding) if causal else None trim_layers.append(trim) in_channels = out_channels self.convs = nn.ModuleList(conv_layers) self.trims = nn.ModuleList(trim_layers) self.projections = nn.ModuleList(linear_layers) self.activation = get_activation(activation) self.representation_dim = out_channels self.dropout = nn.Dropout(p=config.dropout)
def __init__(self, config: Config, embed_dim: int) -> None: super().__init__(config) out_channels = config.cnn.kernel_num kernel_sizes = config.cnn.kernel_sizes weight_norm = config.cnn.weight_norm dilated = config.cnn.dilated causal = config.cnn.causal activation = config.activation pooling_type = config.pooling_type separable = config.separable bottleneck = config.bottleneck conv_layers = [] linear_layers = [] in_channels = embed_dim for i, k in enumerate(kernel_sizes): assert (k - 1) % 2 == 0 proj = ( nn.Linear(in_channels, out_channels) if in_channels != out_channels else None ) linear_layers.append(proj) single_conv = create_conv_package( index=i, activation=activation, in_channels=in_channels, out_channels=out_channels, kernel_size=k, causal=causal, dilated=dilated, separable=separable, bottleneck=bottleneck, weight_norm=weight_norm, ) conv_layers.append(single_conv) in_channels = out_channels self.convs = nn.ModuleList(conv_layers) self.projections = nn.ModuleList(linear_layers) self.activation = get_activation(activation) self.pooling_type = pooling_type self.representation_dim = out_channels self.dropout = nn.Dropout(p=config.dropout)
def __init__( self, in_dim: int, out_dim: int, bias: bool, hidden_dims: List[int] = None, activation: Activation = Activation.RELU, ) -> None: super().__init__() layers = [] for dim in hidden_dims or []: layers.append(nn.Linear(in_dim, dim, bias)) layers.append(get_activation(activation)) in_dim = dim layers.append(nn.Linear(in_dim, out_dim, bias)) self.mlp = nn.Sequential(*layers)
def __init__(self, config: Config, in_dim: int, out_dim: int = 0) -> None: super().__init__(config) layers = [] for dim in config.hidden_dims or []: layers.append(nn.Linear(in_dim, dim)) layers.append(get_activation(config.activation)) if config.layer_norm: layers.append(nn.LayerNorm(dim)) if config.dropout > 0: layers.append(nn.Dropout(config.dropout)) in_dim = dim if config.out_dim: out_dim = config.out_dim if out_dim > 0: layers.append(nn.Linear(in_dim, out_dim)) self.mlp = nn.Sequential(*layers) self.out_dim = out_dim if out_dim > 0 else config.hidden_dims[-1]
def __init__( self, embed_dim: int, conv_dim: int, max_target_positions: int, length_dropout: float, glu: bool, activation, pooling_type, conv_layers, ): super().__init__() self.length_dropout = length_dropout self.conv_layers = nn.ModuleList(conv_layers) self.glu = glu if glu: self.linear1 = nn.Linear(embed_dim, 2 * conv_dim) else: self.linear1 = nn.Linear(embed_dim, conv_dim) self.linear2 = nn.Linear(conv_dim, embed_dim) self.activation = get_activation(activation, dim=2) self.pooling_type = pooling_type self.lengths_pred = nn.Linear(embed_dim, max_target_positions)
def __init__(self, config: Config, in_dim: int, out_dim: int = 0) -> None: super().__init__(config) layers = [] for dim in config.hidden_dims or []: layers.append(nn.Linear(in_dim, dim, config.bias)) layers.append(get_activation(config.activation)) if config.layer_norm: layers.append(nn.LayerNorm(dim)) if config.dropout > 0: layers.append(nn.Dropout(config.dropout)) in_dim = dim if config.out_dim is not None: out_dim = config.out_dim if out_dim > 0: layers.append(nn.Linear(in_dim, out_dim, config.bias)) assert len(layers) > 0 if config.spectral_normalization: layers[-1] = torch.nn.utils.spectral_norm(layers[-1]) self.mlp = nn.Sequential(*layers) self.out_dim = out_dim if out_dim > 0 else config.hidden_dims[-1] self.temperature = config.temperature log_class_usage(__class__)