Exemplo n.º 1
0
    def __init__(self,
                 features,
                 d_rnn = 50,
                 bidirectional = True,
                 n_layers = 1,
                 cell_type = 'LSTM', # LSTM, GRU, RNN or QRNN (if it's installed)
                 dropout=0.,
                 qrnn_use_cuda=False,  # TODO unfortunately QRNN needs to know this
                 *extra_rnn_args
                 ):
        # model is:
        #   run biLSTM backwards over e[n], get r[n] = biLSTM state
        # we need to know dimensionality for:
        #   d_emb     - word embedding e[]
        #   d_rnn     - dimensionality
        #   n_layers  - how many layers of RNN
        #   bidirectional - is the RNN bidirectional?
        #   cell_type - RNN/GRU/LSTM?
        # we assume that state:Env defines state.N and state.{input_field}
        macarico.StaticFeatures.__init__(self, d_rnn * (2 if bidirectional else 1))

        self.features = features
        self.bidirectional = bidirectional
        self.d_emb = features.dim
        self.d_rnn = d_rnn
        
        assert cell_type in ['LSTM', 'GRU', 'RNN', 'QRNN']
        if cell_type == 'QRNN':
            assert qrnn_available, 'you asked from QRNN but torchqrnn is not installed'
            assert dropout == 0., 'QRNN does not support dropout' # TODO talk to @smerity
            #assert not bidirectional, 'QRNN does not support bidirections, talk to @smerity!'
            self.rnn = QRNN(self.d_emb,
                            self.d_rnn,
                            num_layers=n_layers,
                            use_cuda=qrnn_use_cuda, # TODO do this properly
                            *extra_rnn_args,
                           )
            if bidirectional:
                self.rnn2 = QRNN(self.d_emb,
                                 self.d_rnn,
                                 num_layers=n_layers,
                                 use_cuda=qrnn_use_cuda, # TODO do this properly
                                 *extra_rnn_args,
                                )
                self.rev = list(range(255, -1, -1))
        else:
            self.rnn = getattr(nn, cell_type)(self.d_emb,
                                              self.d_rnn,
                                              num_layers=n_layers,
                                              bidirectional=bidirectional,
                                              dropout=dropout,
                                              batch_first=True,
                                              *extra_rnn_args)
Exemplo n.º 2
0
def build_rnn_block(in_size,
                    rnn_size,
                    rnn_layers,
                    rnn_type,
                    bidirectional=True,
                    dropout=0,
                    use_cuda=True):
    if (rnn_type.lower() == 'qrnn') and QRNN is not None:
        if bidirectional:
            print('WARNING: QRNN ignores bidirectional flag')
            rnn_size = 2 * rnn_size
        rnn = QRNN(in_size,
                   rnn_size,
                   rnn_layers,
                   dropout=dropout,
                   window=2,
                   use_cuda=use_cuda)
    elif rnn_type.lower() == 'lstm' or rnn_type.lower() == 'gru':
        rnn = getattr(nn, rnn_type.upper())(in_size,
                                            rnn_size,
                                            rnn_layers,
                                            dropout=dropout,
                                            bidirectional=bidirectional)
    else:
        raise TypeError('Unrecognized rnn type: ', rnn_type)
    return rnn
Exemplo n.º 3
0
	def __init__(self, n_z=256, layers=[3,4,6,3], block=PreActBottleneck, proj_size=0, ncoef=23, sm_type='none', delta=False):
		self.in_planes = 32
		super(ResNet_qrnn, self).__init__()

		self.conv1 = nn.Conv2d(3 if delta else 1, 32, kernel_size=(ncoef,3), stride=(1,1), padding=(0,1), bias=False)

		self.layer1 = self._make_layer(block, 64, layers[0], stride=1)
		self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
		self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
		self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

		from torchqrnn import QRNN

		self.qrnn = QRNN(block.expansion*512, 512, num_layers=2, dropout=0.3)

		self.fc = nn.Linear(1536,512)
		self.lbn = nn.BatchNorm1d(512)

		self.fc_mu = nn.Linear(512, n_z)

		self.initialize_params()

		self.attention = SelfAttention(512)

		if proj_size>0 and sm_type!='none':
			if sm_type=='softmax':
				self.out_proj=Softmax(input_features=n_z, output_features=proj_size)
			elif sm_type=='am_softmax':
				self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size)
			else:
				raise NotImplementedError
Exemplo n.º 4
0
    def __init__(self, rnn_type: str, ntoken: int, ninp: int,
                 nhid: int, nlayers: int, dropout=0.5, tie_weights=False):
        super(RNNModel, self).__init__()
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        if rnn_type in ['LSTM', 'GRU']:
            self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout)
        elif rnn_type == 'QRNN':
            self.rnn = QRNN(ninp, nhid, nlayers, dropout=dropout)
        else:
            try:
                nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type]
            except KeyError:
                raise ValueError("""An invalid option for `--model` was supplied,
                                 options are ['LSTM', 'GRU', 'QRNN', 'RNN_TANH' or 'RNN_RELU']""")
            self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout)
        self.decoder = nn.Linear(nhid, ntoken)

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            if nhid != ninp:
                raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.nhid = nhid
        self.nlayers = nlayers
Exemplo n.º 5
0
    def __init__(self,
                 n_input=15,
                 n_output=6,
                 use_cuda=True,
                 batch=1,
                 hidden_nodes=HIDDEN_NODES,
                 lstm_layers=LSTM_LAYERS,
                 use_qrnn=False,
                 wdrop=0.,
                 dropouti=0.):
        super(LstmStriker, self).__init__()
        self.use_cuda = use_cuda
        self.batch = batch
        # self.idrop = nn.Dropout(dropouti)
        self.odrop = nn.Dropout(dropouti)
        self.lstm_layers = lstm_layers
        self.hidden_nodes = hidden_nodes

        self.linear1 = nn.Linear(n_input, hidden_nodes)
        # self.batch_norm = nn.BatchNorm1d(hidden_nodes)
        if use_qrnn:
            self.lstm1 = QRNN(hidden_nodes,
                              hidden_nodes,
                              num_layers=LSTM_LAYERS,
                              dropout=0.4)
        else:
            self.lstm1 = nn.LSTM(hidden_nodes, hidden_nodes, self.lstm_layers)
            if wdrop:
                self.lstm1 = WeightDrop(self.lstm1, ['weight_hh_l0'],
                                        dropout=wdrop)
        self.linear2 = nn.Linear(hidden_nodes, n_output)

        self.hidden = self.init_hidden()
Exemplo n.º 6
0
 def __init__(self,
              embedding_dim,
              hidden_dim,
              vocab_size,
              label_size,
              batch_size,
              num_layers=1,
              dropout=0,
              zoneout=0,
              window=1,
              save_prev_x=False):
     super().__init__()
     self.hidden_dim = hidden_dim
     self.batch_size = batch_size
     self.num_layers = num_layers
     self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
     self.qrnn = QRNN(embedding_dim,
                      hidden_dim,
                      dropout=dropout,
                      zoneout=zoneout,
                      window=window,
                      save_prev_x=save_prev_x,
                      num_layers=num_layers)
     self.dropout = nn.Dropout(dropout)
     self.hidden_to_label = nn.Linear(hidden_dim, label_size)
     self.hidden = self.init_hidden()
Exemplo n.º 7
0
    def __init__(
        self,
        b: int = 512,
        d: int = 64,
        fc_sizes: List[int] = None,
        output_size: int = 2,
        lr: float = 0.025,
        dropout: float = 0.5,
    ):
        super().__init__()
        if fc_sizes is None:
            fc_sizes = [128, 64]

        self.hparams = {
            "b": b,
            "d": d,
            "fc_size": fc_sizes,
            "lr": lr,
            "output_size": output_size,
            "dropout": dropout,
        }

        layers: List[nn.Module] = []
        for x, y in zip([d] + fc_sizes, fc_sizes + [output_size]):
            layers.append(nn.ReLU())
            layers.append(nn.Linear(x, y))

        self.tanh = nn.Hardtanh()
        self.qrnn = QRNN(b, d, num_layers=2, dropout=dropout)
        self.output = nn.ModuleList(layers)
        self.loss = nn.CrossEntropyLoss()
Exemplo n.º 8
0
    def __init__(self,
                 embedding_dim=None,
                 vocab_size=None,
                 hidden_dim=2400,
                 num_layers=3,
                 dropout_keep_prob=0.6,
                 pool_type='mean',
                 is_cuda=None):
        super(QRNNEncoder, self).__init__()

        assert pool_type in ['max', 'mean']

        self.pool_type = pool_type
        self.embedding_dim = embedding_dim or EMBEDDING_DIM
        self.vocab_size = vocab_size or MAX_NUM_WORDS
        self.dropout_keep_prob = dropout_keep_prob
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.is_cuda = is_cuda if is_cuda is not None else torch.cuda.is_available(
        )

        self.qrnn = QRNN(self.embedding_dim,
                         self.hidden_dim,
                         self.num_layers,
                         dropout=1 -
                         self.dropout_keep_prob)  # Outputs: output, h_n
    def __init__(self, hidden_size=512, num_layers=2):
        super(QRNNModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, 64)
        self.rnn = QRNN(64, hidden_size, num_layers=num_layers)

        self.proj = nn.Sequential(
            nn.Linear(hidden_size, vocab_size)
        )
Exemplo n.º 10
0
    def __init__(self, num_inputs, num_outputs, num_layers):
        super(LSTMController, self).__init__()

        self.num_inputs = num_inputs
        self.num_outputs = num_outputs
        self.num_layers = num_layers

        self.qrnn = QRNN(input_size=num_inputs,
                         hidden_size=num_outputs,
                         num_layers=num_layers)  #.cuda()

        # The hidden state is a learned parameter
        self.qrnn_h_bias = Parameter(
            torch.randn(self.num_layers, 1, self.num_outputs) * 0.05)  #.cuda()
        self.qrnn_c_bias = Parameter(
            torch.randn(self.num_layers, 1, self.num_outputs) * 0.05)  #.cuda()

        self.reset_parameters()
Exemplo n.º 11
0
def run_qrnn(batch_size=20,
             input_size=128,
             seq_len=20,
             warmup=10,
             benchmark=10,
             hidden_size=256,
             num_layers=10,
             use_kernel=False,
             jit=False,
             cuda=False):
    assert not (use_kernel and jit)
    if use_kernel:
        assert cuda

    benchmark_init(0, 0, True)
    name = 'qrnn{}{}{}'.format(tag(cuda=cuda), tag(jit=jit),
                               tag(kernel=use_kernel))
    iter_timer = Bench(name=name, cuda=cuda, warmup_iters=warmup)
    niters = warmup + benchmark

    size = (seq_len, batch_size, input_size)
    if cuda:
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu')
    batches = [
        torch.rand(size, requires_grad=True, device=device)
        for _ in range(niters)
    ]
    qrnn = QRNN(input_size,
                hidden_size,
                num_layers=num_layers,
                dropout=0.4,
                use_kernel=use_kernel,
                jit=jit).to(device)

    for X in batches:
        gc.collect()
        with iter_timer:
            output, hidden = qrnn(X)
            output.sum().backward()

    return iter_timer
Exemplo n.º 12
0
    def __init__(
        self,
        src_vocab: Vocabulary,
        hidden_size: int,
        num_layers: int,
        dropout: float,
    ):
        super(EncoderQRNN, self).__init__()
        self.input_size = len(src_vocab)
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout = dropout

        self.embedding = nn.Embedding(
            len(src_vocab),
            hidden_size,
        )
        self.lstm = QRNN(
            input_size=hidden_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
        )
Exemplo n.º 13
0
    def __init__(
        self,
        trg_vocab: Vocabulary,
        hidden_size: int,
        num_layers: int,
        dropout: float,
        teacher_student_ratio: float,
    ):
        super(AttentionDecoderQRNN, self).__init__()

        self.hidden_size = hidden_size
        self.output_size = len(trg_vocab)
        self.num_layers = num_layers
        self.dropout = dropout
        self.teacher_student_ratio = teacher_student_ratio
        self.trg_vocab = trg_vocab

        # layers
        self.embedding = nn.Embedding(
            len(trg_vocab),
            hidden_size,
        )

        self.dropout = nn.Dropout(dropout)

        self.attn = AttentionModule('general', hidden_size)

        self.lstm = QRNN(
            input_size=hidden_size * 2,
            hidden_size=hidden_size,
            num_layers=num_layers,
        )

        self.out = nn.Linear(
            hidden_size,
            len(trg_vocab),
        )
Exemplo n.º 14
0
    def __init__(self,
                 vocab_size,
                 embedding_dim,
                 pad_idx,
                 hidden_size,
                 num_layers=2,
                 dropout=0.20,
                 zoneout=.0):
        super().__init__()

        self.embedding = nn.Embedding(vocab_size,
                                      embedding_dim,
                                      padding_idx=pad_idx)

        self.qrnn = QRNN(embedding_dim,
                         hidden_size,
                         num_layers=num_layers,
                         window=2,
                         dropout=dropout,
                         zoneout=zoneout)
        #self.rnn = cell_class(embedding_dim, hidden_size, batch_first=True)

        self.fc = nn.Linear(hidden_size, vocab_size)
        self.dropout = nn.Dropout(dropout)
Exemplo n.º 15
0
    def __init__(self,
                 embedding_dim=None,
                 vocab_size=None,
                 hidden_dim=2400,
                 num_layers=3,
                 is_cuda=None,
                 dropout_keep_prob=0.6):
        super(QRNNEncoderConcat, self).__init__()

        assert hidden_dim % num_layers == 0, 'Number of hidden dims must be divisable by number of layers'

        self.embedding_dim = embedding_dim or EMBEDDING_DIM
        self.vocab_size = vocab_size or MAX_NUM_WORDS
        self.dropout_keep_prob = dropout_keep_prob
        self.num_layers = num_layers
        self.hidden_dim = int(hidden_dim / self.num_layers)
        self.is_cuda = is_cuda if is_cuda is not None else torch.cuda.is_available(
        )

        self.qrnn = QRNN(self.embedding_dim,
                         self.hidden_dim,
                         self.num_layers,
                         dropout=1 -
                         self.dropout_keep_prob)  # Outputs: output, h_n
Exemplo n.º 16
0
import torch
from torchqrnn import QRNN

seq_len, batch_size, hidden_size = 7, 20, 256
size = (seq_len, batch_size, hidden_size)
X = torch.autograd.Variable(torch.rand(size), requires_grad=True).cuda()

qrnn = QRNN(hidden_size, hidden_size, num_layers=2, dropout=0.4)
# qrnn.cuda()
output, hidden = qrnn(X)

print(output.size(), hidden.size())