def __init__(self, latent_dim, input_dim, lstm_output_size=20, use_delta_t=True, device=torch.device("cpu")): super(Encoder_z0_RNN, self).__init__() self.gru_rnn_output_size = lstm_output_size self.latent_dim = latent_dim self.input_dim = input_dim self.device = device self.use_delta_t = use_delta_t self.hiddens_to_z0 = nn.Sequential( nn.Linear(self.gru_rnn_output_size, 50), nn.Tanh(), nn.Linear(50, latent_dim * 2), ) utils.init_network_weights(self.hiddens_to_z0) input_dim = self.input_dim if use_delta_t: self.input_dim += 1 self.gru_rnn = GRU(self.input_dim, self.gru_rnn_output_size).to(device)
def __init__(self, input_size, hidden_size, layers=1, bidirectional=False, initpara=True, attn_decode=False, post_size=None): super(MyGRU, self).__init__() self.input_size, self.hidden_size, self.layers, self.bidirectional = \ input_size, hidden_size, layers, bidirectional self.GRU = GRU(input_size, hidden_size, layers, bidirectional=bidirectional) self.initpara = initpara if initpara: if bidirectional: self.h_init = Parameter( torch.Tensor(2 * layers, 1, hidden_size)) else: self.h_init = Parameter(torch.Tensor(layers, 1, hidden_size)) self.reset_parameters() if attn_decode: self.attn_query = nn.Linear(hidden_size, post_size)
def __init__(self, input_size, hidden_size, post_size, initpara=True, gru_input_attn=False): super().__init__() self.input_size, self.hidden_size, self.post_size = \ input_size, hidden_size, post_size self.gru_input_attn = gru_input_attn if self.gru_input_attn: self.GRU = GRU(input_size + post_size, hidden_size, 1) else: self.GRU = GRU(input_size, hidden_size, 1) self.attn_query = nn.Linear(hidden_size, post_size) if initpara: self.h_init = Parameter(torch.Tensor(1, 1, hidden_size)) stdv = 1.0 / math.sqrt(self.hidden_size) self.h_init.data.uniform_(-stdv, stdv)
def __init__(self, input_size, hidden_size, initpara=True): super().__init__() self.input_size, self.hidden_size = input_size, hidden_size self.GRU = GRU(input_size, hidden_size, 1) self.initpara = initpara if initpara: self.h_init = Parameter(torch.Tensor(1, 1, hidden_size)) stdv = 1.0 / math.sqrt(self.hidden_size) self.h_init.data.uniform_(-stdv, stdv)
def __init__(self, latent_dim, input_dim, device, hidden_to_z0_units=20, bidirectional=False): super(Encoder_z0_RNN, self).__init__() self.device = device self.latent_dim = latent_dim # latent dim for z0 and encoder rnn self.input_dim = input_dim self.hidden_to_z0 = nn.Sequential( nn.Linear(2 * latent_dim if bidirectional else latent_dim, hidden_to_z0_units), nn.Tanh(), nn.Linear(hidden_to_z0_units, 2 * latent_dim)) self.rnn = GRU(input_dim, latent_dim, batch_first=True, bidirectional=bidirectional).to(device)
def __init__(self, d_model, nhead, bidirectional=True, dropout=0, activation="relu"): super(TransformerEncoderLayer, self).__init__() self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout) # Implementation of Feedforward model # self.linear1 = Linear(d_model, dim_feedforward) self.gru = GRU(d_model, d_model*2, 1, bidirectional=bidirectional) self.dropout = Dropout(dropout) # self.linear2 = Linear(dim_feedforward, d_model) if bidirectional: self.linear2 = Linear(d_model*2*2, d_model) else: self.linear2 = Linear(d_model*2, d_model) self.norm1 = LayerNorm(d_model) self.norm2 = LayerNorm(d_model) self.dropout1 = Dropout(dropout) self.dropout2 = Dropout(dropout) self.activation = _get_activation_fn(activation)
def __init__(self, input_size, hidden_size, attn_wait=3, initpara=True): super().__init__() self.input_size, self.hidden_size = \ input_size, hidden_size self.attn_wait = attn_wait self.decoderGRU = GRU(input_size + hidden_size, hidden_size, 1) self.encoderGRU = GRU(input_size, hidden_size, 1) self.attn_query = nn.Linear(hidden_size, hidden_size) #self.attn_null = Parameter(torch.Tensor(1, 1, hidden_size)) #stdv = 1.0 / math.sqrt(self.hidden_size) #self.attn_null.data.uniform_(-stdv, stdv) if initpara: self.eh_init = Parameter(torch.Tensor(1, 1, hidden_size)) stdv = 1.0 / math.sqrt(self.hidden_size) self.eh_init.data.uniform_(-stdv, stdv) self.dh_init = Parameter(torch.Tensor(1, 1, hidden_size)) self.dh_init.data.uniform_(-stdv, stdv)
def __init__(self, input_size, hidden_size, layers=1, bidirectional=False, initpara=True): super(MyGRU, self).__init__() self.input_size, self.hidden_size, self.layers, self.bidirectional = \ input_size, hidden_size, layers, bidirectional self.GRU = GRU(input_size, hidden_size, layers, bidirectional=bidirectional) self.initpara = initpara if initpara: if bidirectional: self.h_init = Parameter(torch.Tensor(2 * layers, 1, hidden_size)) else: self.h_init = Parameter(torch.Tensor(layers, 1, hidden_size)) self.reset_parameters()
def __init__(self, n_mels, conv_dim, rnn_dim, dropout=0.5): super().__init__() self.convs = nn.ModuleList([ BatchNormConv(n_mels, conv_dim, 5, activation=torch.tanh, dropout=dropout), BatchNormConv(conv_dim, conv_dim, 5, activation=torch.tanh, dropout=dropout), BatchNormConv(conv_dim, conv_dim, 5, activation=torch.tanh, dropout=dropout) ]) self.gru = GRU(conv_dim, rnn_dim, bidirectional=True, batch_first=True) self.linear = nn.Linear(2 * rnn_dim, 1)
class TransformerEncoderLayer(Module): r"""TransformerEncoderLayer is made up of self-attn and feedforward network. This standard encoder layer is based on the paper "Attention Is All You Need". Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information Processing Systems, pages 6000-6010. Users may modify or implement in a different way during application. Args: d_model: the number of expected features in the input (required). nhead: the number of heads in the multiheadattention models (required). dim_feedforward: the dimension of the feedforward network model (default=2048). dropout: the dropout value (default=0.1). activation: the activation function of intermediate layer, relu or gelu (default=relu). Examples:: >>> encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8) >>> src = torch.rand(10, 32, 512) >>> out = encoder_layer(src) """ def __init__(self, d_model, nhead, bidirectional=True, dropout=0, activation="relu"): super(TransformerEncoderLayer, self).__init__() self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout) # Implementation of Feedforward model # self.linear1 = Linear(d_model, dim_feedforward) self.gru = GRU(d_model, d_model*2, 1, bidirectional=bidirectional) self.dropout = Dropout(dropout) # self.linear2 = Linear(dim_feedforward, d_model) if bidirectional: self.linear2 = Linear(d_model*2*2, d_model) else: self.linear2 = Linear(d_model*2, d_model) self.norm1 = LayerNorm(d_model) self.norm2 = LayerNorm(d_model) self.dropout1 = Dropout(dropout) self.dropout2 = Dropout(dropout) self.activation = _get_activation_fn(activation) def __setstate__(self, state): if 'activation' not in state: state['activation'] = F.relu super(TransformerEncoderLayer, self).__setstate__(state) def forward(self, src, src_mask=None, src_key_padding_mask=None): # type: (Tensor, Optional[Tensor], Optional[Tensor]) -> Tensor r"""Pass the input through the encoder layer. Args: src: the sequnce to the encoder layer (required). src_mask: the mask for the src sequence (optional). src_key_padding_mask: the mask for the src keys per batch (optional). Shape: see the docs in Transformer class. """ src2 = self.self_attn(src, src, src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0] src = src + self.dropout1(src2) src = self.norm1(src) # src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) self.gru.flatten_parameters() out, h_n = self.gru(src) del h_n src2 = self.linear2(self.dropout(self.activation(out))) src = src + self.dropout2(src2) src = self.norm2(src) return src