def __init__(self, embedding_dim, hidden_dim, vocab_size, label_size, batch_size, num_layers=1, dropout=0, zoneout=0, window=1, save_prev_x=False): super().__init__() self.hidden_dim = hidden_dim self.batch_size = batch_size self.num_layers = num_layers self.word_embeddings = nn.Embedding(vocab_size, embedding_dim) self.qrnn = QRNN(embedding_dim, hidden_dim, dropout=dropout, zoneout=zoneout, window=window, save_prev_x=save_prev_x, num_layers=num_layers) self.dropout = nn.Dropout(dropout) self.hidden_to_label = nn.Linear(hidden_dim, label_size) self.hidden = self.init_hidden()
def __init__(self, features, d_rnn = 50, bidirectional = True, n_layers = 1, cell_type = 'LSTM', # LSTM, GRU, RNN or QRNN (if it's installed) dropout=0., qrnn_use_cuda=False, # TODO unfortunately QRNN needs to know this *extra_rnn_args ): # model is: # run biLSTM backwards over e[n], get r[n] = biLSTM state # we need to know dimensionality for: # d_emb - word embedding e[] # d_rnn - dimensionality # n_layers - how many layers of RNN # bidirectional - is the RNN bidirectional? # cell_type - RNN/GRU/LSTM? # we assume that state:Env defines state.N and state.{input_field} macarico.StaticFeatures.__init__(self, d_rnn * (2 if bidirectional else 1)) self.features = features self.bidirectional = bidirectional self.d_emb = features.dim self.d_rnn = d_rnn assert cell_type in ['LSTM', 'GRU', 'RNN', 'QRNN'] if cell_type == 'QRNN': assert qrnn_available, 'you asked from QRNN but torchqrnn is not installed' assert dropout == 0., 'QRNN does not support dropout' # TODO talk to @smerity #assert not bidirectional, 'QRNN does not support bidirections, talk to @smerity!' self.rnn = QRNN(self.d_emb, self.d_rnn, num_layers=n_layers, use_cuda=qrnn_use_cuda, # TODO do this properly *extra_rnn_args, ) if bidirectional: self.rnn2 = QRNN(self.d_emb, self.d_rnn, num_layers=n_layers, use_cuda=qrnn_use_cuda, # TODO do this properly *extra_rnn_args, ) self.rev = list(range(255, -1, -1)) else: self.rnn = getattr(nn, cell_type)(self.d_emb, self.d_rnn, num_layers=n_layers, bidirectional=bidirectional, dropout=dropout, batch_first=True, *extra_rnn_args)
class QRNNClassifier(nn.Module): def __init__(self, embedding_dim, hidden_dim, vocab_size, label_size, batch_size, num_layers=1, dropout=0, zoneout=0, window=1, save_prev_x=False): super().__init__() self.hidden_dim = hidden_dim self.batch_size = batch_size self.num_layers = num_layers self.word_embeddings = nn.Embedding(vocab_size, embedding_dim) self.qrnn = QRNN(embedding_dim, hidden_dim, dropout=dropout, zoneout=zoneout, window=window, save_prev_x=save_prev_x, num_layers=num_layers) self.dropout = nn.Dropout(dropout) self.hidden_to_label = nn.Linear(hidden_dim, label_size) self.hidden = self.init_hidden() def init_hidden(self): """ Initialize weight of hidden """ # the first is the hidden h # the second is the cell c return autograd.Variable( torch.zeros(self.num_layers, self.batch_size, self.hidden_dim).cuda()) def reset(self): self.qrnn.reset() def forward(self, sentence): embeds = self.word_embeddings(sentence) x = embeds.view(len(sentence), self.batch_size, -1) out, self.hidden = self.qrnn(x, self.hidden) out = self.dropout(out) y = self.hidden_to_label(out[-1]) log_probs = F.log_softmax(y) return log_probs
def build_rnn_block(in_size, rnn_size, rnn_layers, rnn_type, bidirectional=True, dropout=0, use_cuda=True): if (rnn_type.lower() == 'qrnn') and QRNN is not None: if bidirectional: print('WARNING: QRNN ignores bidirectional flag') rnn_size = 2 * rnn_size rnn = QRNN(in_size, rnn_size, rnn_layers, dropout=dropout, window=2, use_cuda=use_cuda) elif rnn_type.lower() == 'lstm' or rnn_type.lower() == 'gru': rnn = getattr(nn, rnn_type.upper())(in_size, rnn_size, rnn_layers, dropout=dropout, bidirectional=bidirectional) else: raise TypeError('Unrecognized rnn type: ', rnn_type) return rnn
def __init__(self, embedding_dim=None, vocab_size=None, hidden_dim=2400, num_layers=3, dropout_keep_prob=0.6, pool_type='mean', is_cuda=None): super(QRNNEncoder, self).__init__() assert pool_type in ['max', 'mean'] self.pool_type = pool_type self.embedding_dim = embedding_dim or EMBEDDING_DIM self.vocab_size = vocab_size or MAX_NUM_WORDS self.dropout_keep_prob = dropout_keep_prob self.hidden_dim = hidden_dim self.num_layers = num_layers self.is_cuda = is_cuda if is_cuda is not None else torch.cuda.is_available( ) self.qrnn = QRNN(self.embedding_dim, self.hidden_dim, self.num_layers, dropout=1 - self.dropout_keep_prob) # Outputs: output, h_n
def __init__(self, n_z=256, layers=[3,4,6,3], block=PreActBottleneck, proj_size=0, ncoef=23, sm_type='none', delta=False): self.in_planes = 32 super(ResNet_qrnn, self).__init__() self.conv1 = nn.Conv2d(3 if delta else 1, 32, kernel_size=(ncoef,3), stride=(1,1), padding=(0,1), bias=False) self.layer1 = self._make_layer(block, 64, layers[0], stride=1) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) from torchqrnn import QRNN self.qrnn = QRNN(block.expansion*512, 512, num_layers=2, dropout=0.3) self.fc = nn.Linear(1536,512) self.lbn = nn.BatchNorm1d(512) self.fc_mu = nn.Linear(512, n_z) self.initialize_params() self.attention = SelfAttention(512) if proj_size>0 and sm_type!='none': if sm_type=='softmax': self.out_proj=Softmax(input_features=n_z, output_features=proj_size) elif sm_type=='am_softmax': self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, n_input=15, n_output=6, use_cuda=True, batch=1, hidden_nodes=HIDDEN_NODES, lstm_layers=LSTM_LAYERS, use_qrnn=False, wdrop=0., dropouti=0.): super(LstmStriker, self).__init__() self.use_cuda = use_cuda self.batch = batch # self.idrop = nn.Dropout(dropouti) self.odrop = nn.Dropout(dropouti) self.lstm_layers = lstm_layers self.hidden_nodes = hidden_nodes self.linear1 = nn.Linear(n_input, hidden_nodes) # self.batch_norm = nn.BatchNorm1d(hidden_nodes) if use_qrnn: self.lstm1 = QRNN(hidden_nodes, hidden_nodes, num_layers=LSTM_LAYERS, dropout=0.4) else: self.lstm1 = nn.LSTM(hidden_nodes, hidden_nodes, self.lstm_layers) if wdrop: self.lstm1 = WeightDrop(self.lstm1, ['weight_hh_l0'], dropout=wdrop) self.linear2 = nn.Linear(hidden_nodes, n_output) self.hidden = self.init_hidden()
def __init__(self, rnn_type: str, ntoken: int, ninp: int, nhid: int, nlayers: int, dropout=0.5, tie_weights=False): super(RNNModel, self).__init__() self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) if rnn_type in ['LSTM', 'GRU']: self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout) elif rnn_type == 'QRNN': self.rnn = QRNN(ninp, nhid, nlayers, dropout=dropout) else: try: nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type] except KeyError: raise ValueError("""An invalid option for `--model` was supplied, options are ['LSTM', 'GRU', 'QRNN', 'RNN_TANH' or 'RNN_RELU']""") self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: if nhid != ninp: raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.nhid = nhid self.nlayers = nlayers
def __init__( self, b: int = 512, d: int = 64, fc_sizes: List[int] = None, output_size: int = 2, lr: float = 0.025, dropout: float = 0.5, ): super().__init__() if fc_sizes is None: fc_sizes = [128, 64] self.hparams = { "b": b, "d": d, "fc_size": fc_sizes, "lr": lr, "output_size": output_size, "dropout": dropout, } layers: List[nn.Module] = [] for x, y in zip([d] + fc_sizes, fc_sizes + [output_size]): layers.append(nn.ReLU()) layers.append(nn.Linear(x, y)) self.tanh = nn.Hardtanh() self.qrnn = QRNN(b, d, num_layers=2, dropout=dropout) self.output = nn.ModuleList(layers) self.loss = nn.CrossEntropyLoss()
def __init__(self, hidden_size=512, num_layers=2): super(QRNNModel, self).__init__() self.embedding = nn.Embedding(vocab_size, 64) self.rnn = QRNN(64, hidden_size, num_layers=num_layers) self.proj = nn.Sequential( nn.Linear(hidden_size, vocab_size) )
def __init__(self, num_inputs, num_outputs, num_layers): super(LSTMController, self).__init__() self.num_inputs = num_inputs self.num_outputs = num_outputs self.num_layers = num_layers self.qrnn = QRNN(input_size=num_inputs, hidden_size=num_outputs, num_layers=num_layers) #.cuda() # The hidden state is a learned parameter self.qrnn_h_bias = Parameter( torch.randn(self.num_layers, 1, self.num_outputs) * 0.05) #.cuda() self.qrnn_c_bias = Parameter( torch.randn(self.num_layers, 1, self.num_outputs) * 0.05) #.cuda() self.reset_parameters()
def run_qrnn(batch_size=20, input_size=128, seq_len=20, warmup=10, benchmark=10, hidden_size=256, num_layers=10, use_kernel=False, jit=False, cuda=False): assert not (use_kernel and jit) if use_kernel: assert cuda benchmark_init(0, 0, True) name = 'qrnn{}{}{}'.format(tag(cuda=cuda), tag(jit=jit), tag(kernel=use_kernel)) iter_timer = Bench(name=name, cuda=cuda, warmup_iters=warmup) niters = warmup + benchmark size = (seq_len, batch_size, input_size) if cuda: device = torch.device('cuda:0') else: device = torch.device('cpu') batches = [ torch.rand(size, requires_grad=True, device=device) for _ in range(niters) ] qrnn = QRNN(input_size, hidden_size, num_layers=num_layers, dropout=0.4, use_kernel=use_kernel, jit=jit).to(device) for X in batches: gc.collect() with iter_timer: output, hidden = qrnn(X) output.sum().backward() return iter_timer
class QRNNController(nn.Module): """An NTM controller based on LSTM.""" def __init__(self, num_inputs, num_outputs, num_layers): super(LSTMController, self).__init__() self.num_inputs = num_inputs self.num_outputs = num_outputs self.num_layers = num_layers self.qrnn = QRNN(input_size=num_inputs, hidden_size=num_outputs, num_layers=num_layers) #.cuda() # The hidden state is a learned parameter self.qrnn_h_bias = Parameter( torch.randn(self.num_layers, 1, self.num_outputs) * 0.05) #.cuda() self.qrnn_c_bias = Parameter( torch.randn(self.num_layers, 1, self.num_outputs) * 0.05) #.cuda() self.reset_parameters() def create_new_state(self, batch_size): # Dimension: (num_layers * num_directions, batch, hidden_size) lstm_h = self.qrnn_h_bias.clone().repeat(1, batch_size, 1) #.cuda() lstm_c = self.qrnn_c_bias.clone().repeat(1, batch_size, 1) #.cuda() return lstm_h, lstm_c def reset_parameters(self): for p in self.qrnn.parameters(): if p.dim() == 1: nn.init.constant_(p, 0) else: stdev = 5 / (np.sqrt(self.num_inputs + self.num_outputs)) nn.init.uniform_(p, -stdev, stdev) def size(self): return self.num_inputs, self.num_outputs def forward(self, x, prev_state): x = x.unsqueeze(0) outp, state = self.qrnn(x, prev_state) return outp.squeeze(0), state
def __init__( self, src_vocab: Vocabulary, hidden_size: int, num_layers: int, dropout: float, ): super(EncoderQRNN, self).__init__() self.input_size = len(src_vocab) self.hidden_size = hidden_size self.num_layers = num_layers self.dropout = dropout self.embedding = nn.Embedding( len(src_vocab), hidden_size, ) self.lstm = QRNN( input_size=hidden_size, hidden_size=hidden_size, num_layers=num_layers, )
def __init__( self, trg_vocab: Vocabulary, hidden_size: int, num_layers: int, dropout: float, teacher_student_ratio: float, ): super(AttentionDecoderQRNN, self).__init__() self.hidden_size = hidden_size self.output_size = len(trg_vocab) self.num_layers = num_layers self.dropout = dropout self.teacher_student_ratio = teacher_student_ratio self.trg_vocab = trg_vocab # layers self.embedding = nn.Embedding( len(trg_vocab), hidden_size, ) self.dropout = nn.Dropout(dropout) self.attn = AttentionModule('general', hidden_size) self.lstm = QRNN( input_size=hidden_size * 2, hidden_size=hidden_size, num_layers=num_layers, ) self.out = nn.Linear( hidden_size, len(trg_vocab), )
def __init__(self, vocab_size, embedding_dim, pad_idx, hidden_size, num_layers=2, dropout=0.20, zoneout=.0): super().__init__() self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx) self.qrnn = QRNN(embedding_dim, hidden_size, num_layers=num_layers, window=2, dropout=dropout, zoneout=zoneout) #self.rnn = cell_class(embedding_dim, hidden_size, batch_first=True) self.fc = nn.Linear(hidden_size, vocab_size) self.dropout = nn.Dropout(dropout)
def __init__(self, embedding_dim=None, vocab_size=None, hidden_dim=2400, num_layers=3, is_cuda=None, dropout_keep_prob=0.6): super(QRNNEncoderConcat, self).__init__() assert hidden_dim % num_layers == 0, 'Number of hidden dims must be divisable by number of layers' self.embedding_dim = embedding_dim or EMBEDDING_DIM self.vocab_size = vocab_size or MAX_NUM_WORDS self.dropout_keep_prob = dropout_keep_prob self.num_layers = num_layers self.hidden_dim = int(hidden_dim / self.num_layers) self.is_cuda = is_cuda if is_cuda is not None else torch.cuda.is_available( ) self.qrnn = QRNN(self.embedding_dim, self.hidden_dim, self.num_layers, dropout=1 - self.dropout_keep_prob) # Outputs: output, h_n
import torch from torchqrnn import QRNN seq_len, batch_size, hidden_size = 7, 20, 256 size = (seq_len, batch_size, hidden_size) X = torch.autograd.Variable(torch.rand(size), requires_grad=True).cuda() qrnn = QRNN(hidden_size, hidden_size, num_layers=2, dropout=0.4) # qrnn.cuda() output, hidden = qrnn(X) print(output.size(), hidden.size())