Пример #1
0
    def __init__(self, input_size, hidden_size, num_layers):
        super(TorchRNN, self).__init__()

        self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
        self.softmax = nn.LogSoftmax(dim=1)
    def __init__(self,input_dim, embedding_dim, hidden_dim, output_dim):
        super().__init__()

        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim,output_dim)
Пример #3
0
import torch
import torch.nn as nn
from torch.autograd import Variable

# One hot encoding for each char in 'hello'
h = [1, 0, 0, 0]
e = [0, 1, 0, 0]
l = [0, 0, 1, 0]
o = [0, 0, 0, 1]

# One cell RNN input_dim (4) -> output_dim (2). sequence: 5
cell = nn.RNN(input_size=4, hidden_size=2, batch_first=True)

# (num_layers * num_directions, batch, hidden_size)
# (batch, num_layers * num_directions, hidden_size) for batch_first=True
hidden = (Variable(torch.randn(1, 1, 2)))

# Propagate input through RNN
# Input: (batch, seq_len, input_size) when batch_first=True
inputs = Variable(torch.Tensor([h, e, l, l, o]))
for one in inputs:
    one = one.view(1, 1, -1)
    # Input: (batch, seq_len, input_size) when batch_first=True
    out, hidden = cell(one, hidden)
    print("one input size", one.size(), "out size", out.size())

# We can do the whole at once
# Propagate input through RNN
# Input: (batch, seq_len, input_size) when batch_first=True
inputs = inputs.view(1, 5, -1)
out, hidden = cell(inputs, hidden)
 def __init__(self, hidden_size):
     super(RNN, self).__init__()
     self.rnn = nn.RNN(300, hidden_size, 1)
     self.linear = nn.Linear(hidden_size,1)
     self.sigmoid = nn.Sigmoid()
Пример #5
0
import time
import math
import numpy as np
import torch
from torch import nn, optim
import torch.functional as F
import d2lzh_pytorch as d2l

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
(corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics()

num_hiddens = 256
rnn_layer = nn.RNN(input_size = vocab_size, hidden_size = num_hiddens)


# num_steps = 35
# batch_size = 2
# state = None
# X = torch.rand(num_steps, batch_size, vocab_size)
# Y, state_new = rnn_layer(X, state)
# print(Y.shape, len(state_new), state_new[0].shape)
class RNNModel(nn.Module):
    def __init__(self, rnn_layer, vocab_size):
        super(RNNModel, self).__init__()
        self.rnn = rnn_layer
        self.hidden_size = rnn_layer.hidden_size * (2 if rnn_layer.bidirectional else 1)
        self.vocab_size = vocab_size
        self.dense = nn.Linear(self.hidden_size, vocab_size)
        self.state = None

    def forward(self, inputs, state):  # inputs:(batch,seq_len)
Пример #6
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 output_size,
                 rnn_type='lstm',
                 num_layers=1,
                 num_hidden_layers=2,
                 bias=True,
                 batch_first=True,
                 batch_size=32,
                 dropout=0,
                 bidirectional=False,
                 nr_cells=5,
                 read_heads=2,
                 cell_size=10,
                 nonlinearity='tanh',
                 gpu_id=-1,
                 independent_linears=False,
                 share_memory=True,
                 debug=False,
                 define_layers=True,
                 clip=20):
        super(DNC, self).__init__()
        # todo: separate weights and RNNs for the interface and output vectors

        self.input_size = input_size
        self.real_output_size = output_size
        self.hidden_size = hidden_size
        self.rnn_type = rnn_type
        self.num_layers = num_layers
        self.num_hidden_layers = num_hidden_layers
        self.bias = bias
        self.batch_first = batch_first
        self.batch_size = batch_size
        self.dropout = dropout
        self.bidirectional = bidirectional
        self.nr_cells = nr_cells
        self.read_heads = read_heads
        self.cell_size = cell_size
        self.nonlinearity = nonlinearity
        self.gpu_id = gpu_id
        self.independent_linears = independent_linears
        self.share_memory = share_memory
        self.debug = debug
        self.clip = clip
        self.define_layers = define_layers

        self.w = self.cell_size
        self.r = self.read_heads

        self.read_vectors_size = self.r * self.w
        self.output_size = self.hidden_size

        self.nn_input_size = self.input_size + self.read_vectors_size
        self.nn_output_size = self.output_size + self.read_vectors_size

        self.rnns = []
        self.memories = []

        if self.define_layers:
            for layer in range(self.num_layers):
                if self.rnn_type.lower() == 'rnn':
                    self.rnns.append(
                        nn.RNN((self.input_size
                                if layer == 0 else self.nn_output_size),
                               self.output_size,
                               bias=self.bias,
                               nonlinearity=self.nonlinearity,
                               batch_first=self.batch_first,
                               dropout=self.dropout,
                               num_layers=self.num_hidden_layers))
                elif self.rnn_type.lower() == 'gru':
                    self.rnns.append(
                        nn.GRU((self.input_size
                                if layer == 0 else self.nn_output_size),
                               self.output_size,
                               bias=self.bias,
                               batch_first=self.batch_first,
                               dropout=self.dropout,
                               num_layers=self.num_hidden_layers))
                if self.rnn_type.lower() == 'lstm':
                    self.rnns.append(
                        nn.LSTM((self.input_size
                                 if layer == 0 else self.nn_output_size),
                                self.output_size,
                                bias=self.bias,
                                batch_first=self.batch_first,
                                dropout=self.dropout,
                                num_layers=self.num_hidden_layers))
                setattr(self,
                        self.rnn_type.lower() + '_layer_' + str(layer),
                        self.rnns[layer])

                # memories for each layer
                if not self.share_memory:
                    self.memories.append(
                        Memory(input_size=self.output_size,
                               mem_size=self.nr_cells,
                               cell_size=self.w,
                               read_heads=self.r,
                               gpu_id=self.gpu_id,
                               independent_linears=self.independent_linears))
                    setattr(self, 'rnn_layer_memory_' + str(layer),
                            self.memories[layer])

        # only one memory shared by all layers
        if self.share_memory:
            self.memories.append(
                Memory(input_size=self.output_size,
                       mem_size=self.nr_cells,
                       cell_size=self.w,
                       read_heads=self.r,
                       gpu_id=self.gpu_id,
                       independent_linears=self.independent_linears))
            setattr(self, 'rnn_layer_memory_shared', self.memories[0])

        # final output layer
        # self.output = nn.Linear(self.nn_output_size, self.real_output_size)
        # orthogonal(self.output.weight)
        if self.define_layers:
            self.output = nn.Sequential(
                nn.Linear(self.nn_output_size, self.hidden_size), nn.Tanh(),
                nn.Linear(self.hidden_size, self.real_output_size))

            if self.gpu_id != -1:
                [x.cuda(self.gpu_id) for x in self.rnns]
                [x.cuda(self.gpu_id) for x in self.memories]
                self.output.cuda()
Пример #7
0
    def __init__(self,
                 eye_size,
                 eyebrow_size,
                 nose_size,
                 mouth_size,
                 h1_size,
                 h2_size,
                 h3_size,
                 h4_size,
                 h5_size,
                 h6_size,
                 total_length=30,
                 bidirectional=True,
                 bias=False,
                 num_classes=6):
        super(PHRNN, self).__init__()

        self.length = total_length

        self.rnn1_1 = nn.RNN(input_size=eye_size,
                             hidden_size=h1_size,
                             bias=bias,
                             batch_first=True,
                             bidirectional=True)
        self.rnn1_2 = nn.RNN(input_size=eyebrow_size,
                             hidden_size=h1_size,
                             bias=bias,
                             batch_first=True,
                             bidirectional=True)
        self.rnn1_3 = nn.RNN(input_size=nose_size,
                             hidden_size=h1_size,
                             bias=bias,
                             batch_first=True,
                             bidirectional=True)
        self.rnn1_4 = nn.RNN(input_size=mouth_size,
                             hidden_size=h1_size,
                             bias=bias,
                             batch_first=True,
                             bidirectional=True)
        if bidirectional:
            h1_size = h1_size * 2

        self.rnn2_1 = nn.RNN(input_size=h1_size * 2,
                             hidden_size=h2_size,
                             bias=bias,
                             batch_first=True,
                             bidirectional=True)
        self.rnn2_2 = nn.RNN(input_size=h1_size,
                             hidden_size=h2_size,
                             bias=bias,
                             batch_first=True,
                             bidirectional=True)
        self.rnn2_3 = nn.RNN(input_size=h1_size,
                             hidden_size=h2_size,
                             bias=bias,
                             batch_first=True,
                             bidirectional=True)

        if bidirectional:
            h2_size = h2_size * 2

        self.rnn3_1 = nn.RNN(input_size=h2_size,
                             hidden_size=h3_size,
                             bias=bias,
                             batch_first=True,
                             bidirectional=True)
        self.rnn3_2 = nn.RNN(input_size=h2_size,
                             hidden_size=h3_size,
                             bias=bias,
                             batch_first=True,
                             bidirectional=True)
        self.rnn3_3 = nn.RNN(input_size=h2_size,
                             hidden_size=h3_size,
                             bias=bias,
                             batch_first=True,
                             bidirectional=True)

        if bidirectional:
            h3_size = h3_size * 2

        self.rnn4_1 = nn.RNN(input_size=h3_size * 2,
                             hidden_size=h4_size,
                             bias=bias,
                             batch_first=True,
                             bidirectional=True)
        self.rnn4_2 = nn.RNN(input_size=h3_size * 2,
                             hidden_size=h4_size,
                             bias=bias,
                             batch_first=True,
                             bidirectional=True)

        if bidirectional:
            h4_size = h4_size * 2

        self.rnn5_1 = nn.RNN(input_size=h4_size,
                             hidden_size=h5_size,
                             bias=bias,
                             batch_first=True,
                             bidirectional=True)
        self.rnn5_2 = nn.RNN(input_size=h4_size,
                             hidden_size=h5_size,
                             bias=bias,
                             batch_first=True,
                             bidirectional=True)

        if bidirectional:
            h5_size = h5_size * 2

        self.lstm = nn.LSTM(input_size=h5_size * 2,
                            hidden_size=h6_size,
                            bias=bias,
                            batch_first=True,
                            bidirectional=True)

        if bidirectional:
            h6_size = h6_size * 2
        self.h6_size = h6_size

        self.fc1 = nn.Linear(h6_size * total_length, 1024)
        self.fc2 = nn.Linear(1024, 256)
        self.fc3 = nn.Linear(256, num_classes)
Пример #8
0
 def test_rnn_single_layer(self):
     rnn = nn.RNN(10, 20, 1, nonlinearity='relu')
     input = Variable(torch.randn(5, 3, 10))
     h0 = Variable(torch.randn(1, 3, 20))
     self.assertONNX(rnn, input, h0)
Пример #9
0
 def test_rnn(self):
     rnn = nn.RNN(10, 20, 2)
     input = Variable(torch.randn(5, 3, 10))
     h0 = Variable(torch.randn(2, 3, 20))
     self.assertONNX(rnn, input, h0)
Пример #10
0
 def __init__(self):
     super().__init__()
     self.embed_layer = nn.Embedding(10, 32)
     self.dense1 = nn.Linear(64, 64)
     self.rnn = nn.RNN(64, 64, 2, batch_first=True, nonlinearity='relu')
     self.dense = nn.Linear(64, 10)
Пример #11
0
    def __init__(self,
                 rnn_type,
                 vocab_size,
                 embedding_size,
                 hidden_size,
                 num_layers,
                 batch_first,
                 dropout,
                 bidirectional=False,
                 tied=False):
        """
        Parameters
        ----------
        rnn_type: str
             The type of RNN to use. Available options are "LSTM" for a LSTM,
             "GRU" for a GRU, "RNN_TANH" for a Elman RNN with tanh activation,
             and "RNN_RELU" for a Elman RNN with ReLU activation.

        vocab_size: int
            The number of types in the vocabulary.

        embedding_size: int
            The dimension of the embeddings.

        hidden_size: int
            The dimension of the hidden state vector.

        num_layers: int
            The number of layers to use in the RNN.

        batch_first: boolean
            If True, then the input and output tensors are
            provided as (batch, seq, feature).

        dropout: float
            The dropout proportion applied to RNN layers, the embedding,
            and the RNN output.

        bidirectional: boolean, optional (default=False)
            Whether to use a bidirectional RNN. Only valid if the class
            is a FixedContextRNNLM

        tied: boolean, optional (default=False)
            Whether or not to tie the embedding and the output embedding
            weights.
        """
        super(EmbeddingRNN, self).__init__()
        self.rnn_type = rnn_type
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.batch_first = batch_first
        self.dropout = nn.Dropout(dropout)
        self.bidirectional = bidirectional

        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_size)

        self.bidirectional = bidirectional

        if rnn_type in ["LSTM", "GRU"]:
            # Create a LSTM or GRU
            self.rnn = getattr(nn, rnn_type)(input_size=self.embedding_size,
                                             hidden_size=self.hidden_size,
                                             num_layers=self.num_layers,
                                             batch_first=self.batch_first,
                                             dropout=dropout,
                                             bidirectional=self.bidirectional)
        elif rnn_type in ["RNN_TANH", "RNN_RELU"]:
            # Create an Elman RNN with the specified activation function.
            nonlinearity = {"RNN_TANH": "tanh", "RNN_RELU": "relu"}[rnn_type]
            self.rnn = nn.RNN(input_size=self.embedding_size,
                              hidden_size=hidden_size,
                              num_layers=num_layers,
                              nonlinearity=nonlinearity,
                              batch_first=self.batch_first,
                              dropout=dropout,
                              bidirectional=self.bidirectional)
        else:
            raise ValueError("An invalid rnn_type {} was specified. "
                             "Options are \"LSTM\", \"GRU\", \"RNN_TANH\", "
                             "or \"RNN_RELU\".".format(rnn_type))

        if self.bidirectional:
            self.rnn_output_size = self.hidden_size * 2
        else:
            self.rnn_output_size = self.hidden_size
        self.decoder = nn.Linear(self.rnn_output_size, self.vocab_size)

        self.tied = tied
        if self.tied:
            if self.hidden_size != self.embedding_size:
                raise ValueError("When using the tied flag, hidden "
                                 "size must equal embedding size.")
            self.decoder.weight = self.embedding.weight
Пример #12
0
 def __init__(self, n_items, hidden_size=64, dim=128):
     super(RNN_rec, self).__init__()
     # 随机初始化所有物品向量
     self.items = nn.Embedding(n_items, dim, max_norm=1)
     self.rnn = nn.RNN(dim, hidden_size, batch_first=True)
     self.dense = self.dense_layer(hidden_size, 1)
Пример #13
0
from d2l import torch as d2l
import torch
from torch import nn
from torch.nn import functional as F

batch_size, num_steps = 32, 35
train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps)

num_hiddens = 256
rnn_layer = nn.RNN(len(vocab), num_hiddens)

state = torch.zeros(
    (1, batch_size, num_hiddens
     ))  # (number of hidden layers, batch size, number of hidden units).
state.shape


class RNNModel(nn.Module):
    """The RNN model."""
    def __init__(self, rnn_layer, vocab_size, **kwargs):
        super(RNNModel, self).__init__(**kwargs)
        self.rnn = rnn_layer
        self.vocab_size = vocab_size
        self.num_hiddens = self.rnn.hidden_size
        # If the RNN is bidirectional (to be introduced later),
        # `num_directions` should be 2, else it should be 1.
        if not self.rnn.bidirectional:
            self.num_directions = 1
            self.linear = nn.Linear(self.num_hiddens, self.vocab_size)
        else:
            self.num_directions = 2
 def __init__(self, input_size, embedding_size, hidden_size, output_size):
     super(RNNClassifier, self).__init__()
     self.hidden_size = hidden_size
     self.embedding = nn.Embedding(input_size, embedding_size)
     self.rnn = nn.RNN(embedding_size, hidden_size, batch_first=True)
     self.o2o = nn.Linear(hidden_size, output_size)
Пример #15
0
 def __init__(self, nhidden, dropout):
     super(BrnnStructure, self).__init__()
     self.rnn = nn.RNN(nhidden, nhidden, 1, bidirectional=True)
     self.compress = FC(3 * nhidden, nhidden, relu=True)
     self.compress_r = FC(3 * nhidden, nhidden, relu=True)
     self.dropout = dropout
Пример #16
0
 def __init__(self):
     super(Decoder, self).__init__()
     self.dec_cell = nn.RNN(input_size=len_n,
                            hidden_size=n_hidden,
                            dropout=0.5)
Пример #17
0
    def __init__(self, config):
        super(CoVeEncoder, self).__init__()
        print('---build batched CoVeEncoder---')
        self.gpu = config.gpu
        self.bidirectional = config.bid_flag
        self.batch_size = config.batch_size
        self.char_hidden_dim = 0

        self.use_char = config.use_char
        if self.use_char:
            self.char_hidden_dim = config.char_hidden_dim
            self.char_embedding_dim = config.char_emb_dim
            self.char = Char(config.char_features, config.char_alphabet.size(),
                             self.char_embedding_dim, self.char_hidden_dim,
                             config.dropout, self.gpu)

        self.rnn = nn.LSTM(300,
                           300,
                           num_layers=2,
                           bidirectional=True,
                           batch_first=True)
        self.rnn.load_state_dict(
            model_zoo.load_url(model_urls['wmt-lstm'], model_dir=model_cache))

        self.embedding_dim = config.word_emb_dim  # catnlp
        self.hidden_dim = config.hidden_dim
        self.hyper_hidden_dim = config.hyper_hidden_dim
        self.hyper_embedding_dim = config.hyper_embedding_dim
        self.layers = config.layers
        self.drop = nn.Dropout(config.dropout)
        self.word_embeddings = nn.Embedding(config.word_alphabet.size(),
                                            self.embedding_dim)
        if config.pretrain_word_embedding is not None:
            self.word_embeddings.weight.data.copy_(
                torch.from_numpy(config.pretrain_word_embedding))
        else:
            self.word_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(config.word_alphabet.size(),
                                          self.embedding_dim)))

        self.mode = config.word_features
        self.embedding_dim = 600
        if self.mode == 'BaseRNN':
            self.encoder = nn.RNN(self.char_hidden_dim + self.embedding_dim,
                                  self.hidden_dim,
                                  num_layers=self.layers,
                                  batch_first=True)
        elif self.mode == 'RNN':
            self.encoder = RNN(self.char_hidden_dim + self.embedding_dim,
                               self.hidden_dim,
                               num_layers=self.layers,
                               gpu=self.gpu)
        elif self.mode == 'MetaRNN':
            self.encoder = MetaRNN(self.char_hidden_dim + self.embedding_dim,
                                   self.hidden_dim,
                                   self.hyper_hidden_dim,
                                   self.hyper_embedding_dim,
                                   num_layers=self.layers,
                                   gpu=self.gpu)
        elif self.mode == 'BaseLSTM':
            self.encoder = nn.LSTM(self.char_hidden_dim + self.embedding_dim,
                                   self.hidden_dim // 2,
                                   num_layers=self.layers,
                                   batch_first=True,
                                   bidirectional=True)
        elif self.mode == 'LSTM':
            self.encoder = LSTM(self.char_hidden_dim + self.embedding_dim,
                                self.hidden_dim // 2,
                                num_layers=self.layers,
                                gpu=self.gpu,
                                bidirectional=self.bidirectional)
        elif self.mode == 'MetaLSTM':
            self.encoder = MetaLSTM(self.char_hidden_dim + self.embedding_dim,
                                    self.hidden_dim // 2,
                                    self.hyper_hidden_dim,
                                    self.hyper_embedding_dim,
                                    num_layers=self.layers,
                                    gpu=self.gpu,
                                    bidirectional=self.bidirectional)
        else:
            print(
                'Error word feature selection, please check config.word_features.'
            )
            exit(0)

        if config.gpu:
            self.rnn = self.rnn.cuda()
            self.encoder = self.encoder.cuda()

        self.hidden2tag = nn.Linear(self.hidden_dim,
                                    config.label_alphabet_size)
Пример #18
0
    def __init__(self,
                 embed_size: int,
                 field_size: int,
                 padding_idx: int = 0,
                 rnn_method: str = "lstm",
                 output_method: str = "avg_pooling",
                 nn_embedding: nn.Parameter = None,
                 **kwargs):
        r"""Initialize SequenceIndicesEmbedding.
        
        Args:
            embed_size (int): Size of embedding tensor
            field_size (int): Size of inputs field
            padding_idx (int, optional): Padding index. 
                Defaults to 0.
            rnn_method (str, optional): Method of RNN.
                Allow: ["gru", "lstm", "rnn"]. 
                Defaults to "lstm".
            output_method (str, optional): Method of aggregation. 
                Allow: ["avg_pooling", "max_pooling", "mean", "none", "sum"]. 
                Defaults to "avg_pooling".
            nn_embedding (nn.Parameter, optional): Pretrained embedding values. 
                Defaults to None.
            
        Kwargs:
            num_layers (int): Number of layers of RNN.
                Default to 1.
            bias (bool): Whether bias is added to RNN or not.
                Default to True.
            dropout (float): Probability of Dropout in RNN.
                Default to 0.0.
            bidirectional (bool): Whether bidirectional is used in RNN or not.
                Default to False.
        
        Attributes:
            length (int): Size of embedding tensor.
            embedding (torch.nn.Module): Embedding layer.
            rnn_layers (torch.nn.Module): RNN layers.
            aggregation (Union[torch.nn.Module, callable]): Pooling layer or aggregation function.
            output_method (string): Type of output_method.
        
        Raises:
            ValueError: when rnn_method is not in ["gru", "lstm", "rnn"].
            ValueError: when output_method is not in ["avg_pooling", "max_pooling", "mean", "sum"].
        """
        # refer to parent class
        super(SequenceIndicesEmbedding, self).__init__()

        # bind embedding to pre-trained embedding module if nn_embedding is not None
        if nn_embedding is not None:
            self.length = nn_embedding.size("E")
            self.embedding = nn.Embedding.from_pretrained(nn_embedding)
        # else, create a embedding module with the given arguments
        else:
            self.length = embed_size
            self.embedding = nn.Embedding(field_size,
                                          embed_size,
                                          padding_idx=padding_idx,
                                          **kwargs)

        # parse bidirectional from kwargs
        bidirectional = kwargs.get("bidirectional", False)
        if bidirectional:
            # set hidden_size to embed_size // 2 if bidirectional is True
            hidden_size = embed_size // 2
        else:
            # else, set hidden_size to embed_size
            hidden_size = embed_size

        # parse arguments of RNN layers
        rnn_args = dict(input_size=embed_size,
                        hidden_size=hidden_size,
                        num_layers=kwargs.get("num_layers", 1),
                        bias=kwargs.get("bias", True),
                        batch_first=True,
                        dropout=kwargs.get("dropout", 0.0),
                        bidirectional=bidirectional)

        # initialize RNN layers
        if rnn_method == "rnn":
            self.rnn_layers = nn.RNN(**rnn_args)
        elif rnn_method == "lstm":
            self.rnn_layers = nn.LSTM(**rnn_args)
        elif rnn_method == "gru":
            self.rnn_layers = nn.GRU(**rnn_args)
        else:
            raise ValueError('rnn_method only allows ["rnn", "lstm", "gru"].')

        # initialize aggregation layer for outputs and bind output_method to output_method
        if output_method == "avg_pooling":
            self.aggregation = nn.AdaptiveAvgPool1d(1)
        elif output_method == "max_pooling":
            self.aggregation = nn.AdaptiveMaxPool1d(1)
        elif output_method == "mean":
            self.aggregation = partial(torch.mean, dim="N", keepdim=True)
        elif output_method == "none":
            self.aggregation = torch.Tensor
        elif output_method == "sum":
            self.aggregation = partial(torch.sum, dim="N", keepdim=True)
        else:
            raise ValueError(
                'output_method only allows ["avg_pooling", "max_pooling", "mean", "none", "sum"].'
            )
        self.output_method = output_method
Пример #19
0
    def __init__(self,
                 rnn_type,
                 ntoken,
                 ninp,
                 nhid,
                 nlayers,
                 dropout=0.5,
                 tie_weights=False):
        super(RNNModel, self).__init__()
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        if rnn_type in ['LSTM', 'GRU']:
            self.rnn = getattr(nn, rnn_type)(ninp,
                                             nhid,
                                             nlayers,
                                             dropout=dropout)
        elif rnn_type in ['python_LSTM']:
            self.rnn = NaiveLSTM(ninp, nhid)
        elif rnn_type in ['new_LSTM']:
            self.rnn = NewLSTM(ninp, nhid)

        else:
            try:
                nonlinearity = {
                    'RNN_TANH': 'tanh',
                    'RNN_RELU': 'relu'
                }[rnn_type]
            except KeyError:
                raise ValueError(
                    """An invalid option for `--model` was supplied,
                                 options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']"""
                )
            self.rnn = nn.RNN(ninp,
                              nhid,
                              nlayers,
                              nonlinearity=nonlinearity,
                              dropout=dropout)

        if rnn_type in ['new_LSTM']:
            self.decoder = nn.Linear(nhid + nhid, ntoken)
        else:
            self.decoder = nn.Linear(nhid, ntoken)

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            if nhid != ninp:
                raise ValueError(
                    'When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight
            # ???

        self.init_weights()

        self.rnn_type = rnn_type
        self.nhid = nhid
        self.nlayers = nlayers
Пример #20
0
 def __init__(self, in_dim, n_hidden, n_layer, out_dim):
     super(RNN, self).__init__()
     self.layer1 = nn.RNN(in_dim, n_hidden, n_layer, nonlinearity='tanh',batch_first=True)
     self.layer2 = nn.Linear(n_hidden, out_dim)
Пример #21
0
 def __init__(self):
     super(Model,self).__init__()
     self.rnn=nn.RNN(input_size=input_size,hidden_size=hidden_size,batch_first=True)
Пример #22
0
    def __init__(self):
        super(Seq2Seq, self).__init__()

        self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)
        self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)
        self.fc = nn.Linear(n_hidden, n_class)
Пример #23
0
# -*- coding: utf-8 -*-
""" Created on 11:42 AM 12/3/18
    @author: ngunhuconchocon
    @brief:
"""

from __future__ import print_function

import torch
import torch.nn as nn
import torch.nn.functional as F

from torchsummary.torchsummary import summary

rnn = nn.RNN(10, 20, 2)
input = torch.randn(5, 3, 10)
h0 = torch.randn(2, 3, 20)
output, hn = rnn(input, h0)

print(output)

# #In this example i have 2 different nets. The input is fed to the first net.
# #The output of the first net is fed to the second net.
# #I only want to train the second net in this example
#
# net1_input = get_data()
# net1_output = net1(net1_input)
# #This creates a computation graph from input to output (i.e. all operations performed on the input to acquire the output). This is used when we want to get gradients for the weights we want to improve.
# #As we only want to train the second net we dont need the computation graph from the first net. Therefore we detach the output from the previous computation graph
# net2_input = net1_output.detach()
# net2_output = net2(net2_input)
Пример #24
0
import torch
import torch.nn as nn
rnn = nn.RNN(5, 6, 1)
input = torch.randn(1, 3, 5)
h0 = torch.randn(1, 3, 6)

output, hn = rnn(input, h0)
print(output)
Пример #25
0
    def __init__(self,
                 vocab_size,
                 embedding_dim,
                 rnn_hidden_size,
                 rnn_num_layers=1,
                 rnn_type="LSTM",
                 tie_weights=False):
        super(PoetryGenerator, self).__init__()

        self.rnn_type = rnn_type
        self.rnn_hidden_size = rnn_hidden_size
        self.rnn_num_layers = rnn_num_layers

        self.encoder = self.init_embedding(
            embedding_dim)  # use embedding layer as first layer

        # TODO: better CSM
        # ==============================================================================================
        # Convolutional Sentence Model (CSM) layers, compresses a line (sequence of vectors) to a vector
        # use full convolutional layers without pooling
        # ==============================================================================================
        self.csm_l1 = nn.Sequential(
            nn.Conv3d(1, 1, kernel_size=(1, 2, 1), stride=(1, 1, 1)),
            nn.Dropout2d())
        self.csm_l2 = nn.Sequential(
            nn.Conv3d(1, 1, kernel_size=(1, 2, 1), stride=(1, 1, 1)),
            nn.Dropout2d())
        self.csm_l3 = nn.Sequential(
            nn.Conv3d(1, 1, kernel_size=(1, 3, 1), stride=(1, 1, 1)),
            nn.Dropout2d())
        self.csm_l4 = nn.Sequential(
            nn.Conv3d(1, 1, kernel_size=(1, 3, 1), stride=(1, 1, 1)),
            nn.Dropout2d())

        # TODO: better context
        # ====================================================================================================
        # Context Model (CM) layers, compresses vectors of lines to one vector
        # for convenience, define 2 selectable layers for the training data is QUATRAIN which contains 4 lines
        # ====================================================================================================
        # Compress 2 lines context into 1 vector
        self.cm_21 = nn.Sequential(
            nn.Conv2d(1, 1, kernel_size=(2, 1), stride=(1, 1)), nn.Dropout2d())
        # Compress 3 lines context into 1 vector
        self.cm_31 = nn.Sequential(
            nn.Conv2d(1, 1, kernel_size=(3, 1), stride=(1, 1)), nn.Dropout2d())

        # ==============================================================================================
        # Recurrent Generation Model (RGM) layers,
        # generates one word according to the previous words in the current line and the previous lines
        # ==============================================================================================
        # the inputs is concatenation of word embedding and lines vector (the same dimension as word embedding just now)
        if self.rnn_type == "LSTM":
            self.rnn = nn.LSTM(self.rnn_hidden_size * 2,
                               self.rnn_hidden_size,
                               self.rnn_num_layers,
                               batch_first=True,
                               dropout=0.5)
        elif self.rnn_type == "GRU":
            self.rnn = nn.GRU(self.rnn_hidden_size * 2,
                              self.rnn_hidden_size,
                              self.rnn_num_layers,
                              batch_first=True,
                              dropout=0.5)
        else:
            self.rnn = nn.RNN(self.rnn_hidden_size * 2,
                              self.rnn_hidden_size,
                              self.rnn_num_layers,
                              batch_first=True,
                              dropout=0.5)

        self.decoder = nn.Linear(self.rnn_hidden_size, vocab_size)

        # tie weights, e.i. use same weight as encoder for decoder, (I learned this trick from PyTorch Example).
        if tie_weights:
            self.decoder.weight = self.encoder.weight
Пример #26
0
    def __init__(
        self,
        *,
        d_hid: int,
        n_hid_lyr: int,
        p_hid: float,
        **kwargs: Optional[Dict],
    ):
        super().__init__()

        # Create vanilla RNN layers and put in module list.
        # RNN in `self.recur` are treated as sequential RNN.
        # Input tensor : Output of `SAttnRNNModel.pre_hid`.
        # Input shape  : `(B, S, H)`.
        # Input dtype  : `torch.float32`.
        # Output tensor: Batch of recurrent token hidden states.
        # Output shape : `(B, S, H)`.
        # Output dtype : `torch.float32`.
        self.recur = nn.ModuleList([
            nn.RNN(input_size=d_hid, hidden_size=d_hid, batch_first=True)
            for _ in range(n_hid_lyr)
        ])

        # Create self attention query, key and value transformation layers and
        # put in module list.
        # Input tensor : Output of `self.recur`.
        # Input shape  : `(B, S, H)`.
        # Input dtype  : `torch.float32`.
        # Output tensor: Query, key and value token features.
        # Output shape : `(B, S, H)`.
        # Output dtype : `torch.float32`.
        self.query = nn.ModuleList([
            nn.Linear(in_features=d_hid, out_features=d_hid)
            for _ in range(n_hid_lyr)
        ])
        self.key = nn.ModuleList([
            nn.Linear(in_features=d_hid, out_features=d_hid)
            for _ in range(n_hid_lyr)
        ])
        self.value = nn.ModuleList([
            nn.Linear(in_features=d_hid, out_features=d_hid)
            for _ in range(n_hid_lyr)
        ])

        # Create self attention final output transformation layers and put in
        # module list.
        # Input tensor : Output of self attention weighted sum.
        # Input shape  : `(B, S, H)`.
        # Input dtype  : `torch.float32`.
        # Output tensor: Linear transformation on self attention weighted sum.
        # Output shape : `(B, S, H)`.
        # Output dtype : `torch.float32`.
        self.out = nn.ModuleList([
            nn.Linear(in_features=d_hid, out_features=d_hid)
            for _ in range(n_hid_lyr)
        ])

        # Create dropout layers.
        # Only need to create `n_hid_lyr - 1` since `SAttnRNNModel.post_hid`
        # drop output of `SAttnRNNModel.hid`.
        # Input tensor : Output of `self.out`.
        # Input shape  : `(B, S, H)`.
        # Input dtype  : `torch.float32`.
        # Output tensor: Sparse output of `self.out`.
        # Output shape : `(B, S, H)`.
        # Output dtype : `torch.float32`.
        dp: List[nn.Module] = [
            nn.Dropout(p=p_hid) for _ in range(n_hid_lyr - 1)
        ]
        dp.append(nn.Identity())
        self.dp = nn.ModuleList(dp)
Пример #27
0
    def __init__(self,
                 device,
                 cnn_type='new',
                 cnn_freeze=False,
                 rnn_type='lstm',
                 bidirection='False',
                 regression='last_only',
                 input_sequence_length=2,
                 hidden_size=100,
                 num_layers=2,
                 learning_rate=0.0001):

        super().__init__()

        ### Deep Neural Network Setup ###
        self.cnn_type = cnn_type

        if cnn_type == 'new':
            self.CNN = nn.Sequential(
                nn.Conv2d(in_channels=3,
                          out_channels=64,
                          kernel_size=(7, 7),
                          stride=(2, 2),
                          padding=(3, 3),
                          bias=False),
                nn.BatchNorm2d(64),
                nn.LeakyReLU(0.1),
                nn.Conv2d(in_channels=64,
                          out_channels=128,
                          kernel_size=(5, 5),
                          stride=(2, 2),
                          padding=(2, 2),
                          bias=False),
                nn.BatchNorm2d(128),
                nn.LeakyReLU(0.1),
                nn.Conv2d(in_channels=128,
                          out_channels=256,
                          kernel_size=(5, 5),
                          stride=(2, 2),
                          padding=(2, 2),
                          bias=False),
                nn.BatchNorm2d(256),
                nn.LeakyReLU(0.1),
                nn.Conv2d(in_channels=256,
                          out_channels=256,
                          kernel_size=(3, 3),
                          stride=(1, 1),
                          padding=(1, 1),
                          bias=False),
                nn.BatchNorm2d(256),
                nn.LeakyReLU(0.1),
                nn.Conv2d(in_channels=256,
                          out_channels=512,
                          kernel_size=(3, 3),
                          stride=(2, 2),
                          padding=(1, 1),
                          bias=False),
                nn.BatchNorm2d(512),
                nn.LeakyReLU(0.1),
                nn.MaxPool2d(kernel_size=3, stride=1),
                nn.Conv2d(in_channels=512,
                          out_channels=512,
                          kernel_size=(3, 3),
                          stride=(1, 1),
                          padding=(1, 1),
                          bias=False),
                nn.BatchNorm2d(512),
                nn.LeakyReLU(0.1),
                nn.Conv2d(in_channels=512,
                          out_channels=512,
                          kernel_size=(3, 3),
                          stride=(2, 2),
                          padding=(1, 1),
                          bias=False),
                nn.BatchNorm2d(512),
                nn.LeakyReLU(0.1),
                nn.Conv2d(in_channels=512,
                          out_channels=512,
                          kernel_size=(3, 3),
                          stride=(1, 1),
                          padding=(1, 1),
                          bias=False),
                nn.BatchNorm2d(512),
                nn.LeakyReLU(0.1),

                # nn.AvgPool2d(kernel_size=3, stride=1),
                nn.Conv2d(in_channels=512,
                          out_channels=1024,
                          kernel_size=(3, 3),
                          stride=(2, 2),
                          padding=(1, 1),
                          bias=False),
                nn.BatchNorm2d(1024),
                nn.LeakyReLU(0.1),
                nn.Conv2d(in_channels=1024,
                          out_channels=1024,
                          kernel_size=(3, 3),
                          stride=(1, 1),
                          padding=(1, 1),
                          bias=False),
                nn.BatchNorm2d(1024),
                nn.LeakyReLU(0.1),
                nn.AvgPool2d(kernel_size=3, stride=1),
                nn.Conv2d(in_channels=1024,
                          out_channels=2048,
                          kernel_size=(3, 3),
                          stride=(1, 1),
                          padding=(1, 1),
                          bias=False),
                nn.BatchNorm2d(2048),
                nn.LeakyReLU(0.1),
            )

            CNN_flat_output_size = 16384
            self.CNN.to(device)

        elif cnn_type == 'mobilenetV3_large':

            self.CNN = models.mobilenet_v3_large(pretrained=True)

            CNN_flat_output_size = 1000

            if cnn_freeze == True:
                print('Freeze ' + cnn_type)
                for name, module in self.CNN.named_children():
                    for layer in module.children():
                        for param in layer.parameters():
                            param.requires_grad = False
                            # print(param)

        elif cnn_type == 'mobilenetV3_small':

            self.CNN = models.mobilenet_v3_small(pretrained=True)

            CNN_flat_output_size = 1000

            if cnn_freeze == True:
                print('Freeze ' + cnn_type)
                for name, module in self.CNN.named_children():
                    for layer in module.children():
                        for param in layer.parameters():
                            param.requires_grad = False
                            # print(param)

        elif cnn_type == 'vgg16':

            self.CNN = models.vgg16(pretrained=True)

            CNN_flat_output_size = 1000

            if cnn_freeze == True:
                print('Freeze ' + cnn_type)
                for name, module in self.CNN.named_children():
                    for layer in module.children():
                        for param in layer.parameters():
                            param.requires_grad = False
                            # print(param)

        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.rnn_type = rnn_type
        self.regression = regression

        if rnn_type == 'rnn':

            if num_layers > 1:
                self.RNN = nn.RNN(input_size=CNN_flat_output_size,
                                  hidden_size=hidden_size,
                                  num_layers=num_layers,
                                  batch_first=True,
                                  dropout=0.5)

            else:
                self.RNN = nn.RNN(input_size=CNN_flat_output_size,
                                  hidden_size=hidden_size,
                                  num_layers=num_layers,
                                  batch_first=True)

        elif rnn_type == 'lstm':

            if num_layers > 1:
                self.RNN = nn.LSTM(input_size=CNN_flat_output_size,
                                   hidden_size=hidden_size,
                                   num_layers=num_layers,
                                   batch_first=True,
                                   dropout=0.5)

            else:
                self.RNN = nn.LSTM(input_size=CNN_flat_output_size,
                                   hidden_size=hidden_size,
                                   num_layers=num_layers,
                                   batch_first=True)

        # self.linear = nn.Linear(in_features=hidden_size, out_features=6)

        if regression == 'last_only':
            self.linear = nn.Linear(
                in_features=hidden_size,
                out_features=3)  # For last sequence only case

        elif regression == 'full_sequence':
            in_features_num = input_sequence_length * hidden_size
            out_features_num = (num_layers * hidden_size) // 2
            self.linear1 = nn.Linear(
                in_features=in_features_num,
                out_features=out_features_num)  # For full sequence
            self.batchnorm_linear1 = nn.BatchNorm1d(out_features_num)
            self.leakyrelu_linear1 = nn.LeakyReLU(0.1)
            self.dropout_linear1 = nn.Dropout(p=0.5)

            in_features_num = out_features_num
            out_features_num = out_features_num // 2
            self.linear2 = nn.Linear(
                in_features=in_features_num,
                out_features=out_features_num)  # For full sequence
            self.batchnorm_linear2 = nn.BatchNorm1d(out_features_num)
            self.leakyrelu_linear2 = nn.LeakyReLU(0.1)
            self.dropout_linear2 = nn.Dropout(p=0.5)

            in_features_num = out_features_num
            out_features_num = 3
            self.linear3 = nn.Linear(
                in_features=in_features_num,
                out_features=out_features_num)  # For full sequence

        ### Training Setup ###
        self.device = device
        self.to(self.device)

        # self.optimizer = optim.RMSprop(self.parameters(), lr=learning_rate)
        self.optimizer = optim.Adam(self.parameters(), lr=learning_rate)

        self.translation_loss = nn.MSELoss()
Пример #28
0
 def build_rnn(self):
     self.rnn = nn.RNN(input_size=512,
                       hidden_size=self.num_class,
                       num_layers=1,
                       batch_first=True)
Пример #29
0
 def __init__(self, n_features):
     super(SequentialLayer, self).__init__()
     self.rnn = nn.RNN(n_features, 3, 1)
     signal = torch.tensor([[-1.], [0.], [1.]])
     self.register_buffer('signal', signal)
Пример #30
0
    def __init__(self,
                 embedding_matrix=None,
                 hidden_dim=None,
                 num_layers=None,
                 dropout=None,
                 bidirectional=None,
                 useGlove=None,
                 trainable=None,
                 typeOfPadding="no_padding",
                 typeOfRNN="simple",
                 typeOfAttention="multiplicative"):
        super().__init__()
        # Create an embedding layer of dimension vocabulary size * 100
        self.embeddingLayer = nn.Embedding(len(embedding_matrix), 100)

        # Set embedding to GloVe representation if flag is True
        if useGlove:
            self.embeddingLayer.weight.data.copy_(embedding_matrix)

        # Set weights of the matrix to non-updatable if flag is False
        if not trainable:
            self.embeddingLayer.weight.requires_grad = False

        # Initialize the RNN structure
        self.typeOfRNN = typeOfRNN

        if typeOfRNN == "simple":
            self.rnnLayer = nn.RNN(input_size=100,
                                   hidden_size=hidden_dim,
                                   batch_first=True,
                                   num_layers=num_layers,
                                   dropout=dropout,
                                   bidirectional=bidirectional)
        elif typeOfRNN == "GRU":
            self.rnnLayer = nn.GRU(input_size=100,
                                   hidden_size=hidden_dim,
                                   batch_first=True,
                                   num_layers=num_layers,
                                   dropout=dropout,
                                   bidirectional=bidirectional)
        else:
            self.rnnLayer = nn.LSTM(input_size=100,
                                    hidden_size=hidden_dim,
                                    batch_first=True,
                                    num_layers=num_layers,
                                    dropout=dropout,
                                    bidirectional=bidirectional)

        # Initialize variables to define the fully-connected layer
        self.num_directions = 2 if bidirectional else 1
        self.typeOfPadding = typeOfPadding
        self.typeOfAttention = typeOfAttention
        self.num_layers = num_layers
        self.hidden_size = hidden_dim

        # Initialize the learnable attention weights
        if self.typeOfAttention == "multiplicative":
            self.attn_weight = nn.Linear(hidden_dim * self.num_directions,
                                         hidden_dim * self.num_directions)
        else:
            self.Wh = nn.Linear(self.hidden_size * self.num_directions,
                                self.hidden_size * self.num_directions)
            self.Ws = nn.Linear(self.hidden_size * self.num_directions,
                                self.hidden_size * self.num_directions)

        # Initialize a fully-connected layer
        self.fc = nn.Linear(hidden_dim * self.num_directions, 1)

        # Sigmoid activation function squashes the output between 0 and 1
        self.sigmoid = nn.Sigmoid()