def build_rnn_block(in_size, rnn_size, rnn_layers, rnn_type, bidirectional=True, dropout=0, use_cuda=True): if (rnn_type.lower() == 'qrnn') and QRNN is not None: if bidirectional: print('WARNING: QRNN ignores bidirectional flag') rnn_size = 2 * rnn_size rnn = QRNN( in_size, rnn_size, rnn_layers, dropout=dropout, window=2, ) #use_cuda=use_cuda) elif rnn_type.lower() == 'lstm' or rnn_type.lower() == 'gru': rnn = getattr(nn, rnn_type.upper())(in_size, rnn_size, rnn_layers, dropout=dropout, bidirectional=bidirectional) else: raise TypeError('Unrecognized rnn type: ', rnn_type) return rnn
def test_qrnn_bidir(): this_tests(QRNN) qrnn = QRNN(10, 20, 2, bidirectional=True, batch_first=True, window=2, output_gate=False) x = torch.randn(7,5,10) y,h = qrnn(x) assert y.size() == torch.Size([7, 5, 40]) assert h.size() == torch.Size([4, 7, 20]) #Without an out gate, the last timestamp in the forward output is the second to last hidden #and the first timestamp of the backward output is the last hidden assert torch.allclose(y[:,-1,:20], h[2]) assert torch.allclose(y[:,0,20:], h[3])
def __init__(self, vocab_sz: int, emb_sz: int, n_hid: int, n_layers: int, pad_token: int = 1, hidden_p: float = 0.2, input_p: float = 0.6, embed_p: float = 0.1, weight_p: float = 0.5, qrnn: bool = False, bidir: bool = False, mixup: bool = False): super().__init__() self.bs, self.qrnn, self.emb_sz, self.n_hid, self.n_layers = 1, qrnn, emb_sz, n_hid, n_layers self.mixup = mixup self.n_dir = 2 if bidir else 1 self.encoder = nn.Embedding(vocab_sz, emb_sz, padding_idx=pad_token) self.encoder_dp = EmbeddingDropout(self.encoder, embed_p) if self.qrnn: #Using QRNN requires an installation of cuda from fastai.text.models.qrnn import QRNN self.rnns = [ QRNN(emb_sz if l == 0 else n_hid, (n_hid if l != n_layers - 1 else emb_sz) // self.n_dir, 1, save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True, bidirectional=bidir) for l in range(n_layers) ] for rnn in self.rnns: rnn.layers[0].linear = WeightDropout(rnn.layers[0].linear, weight_p, layer_names=['weight']) else: self.rnns = [ nn.LSTM(emb_sz if l == 0 else n_hid, (n_hid if l != n_layers - 1 else emb_sz) // self.n_dir, 1, batch_first=True, bidirectional=bidir) for l in range(n_layers) ] self.rnns = [WeightDropout(rnn, weight_p) for rnn in self.rnns] self.rnns = nn.ModuleList(self.rnns) self.encoder.weight.data.uniform_(-self.initrange, self.initrange) self.input_dp = RNNDropout(input_p) self.hidden_dps = nn.ModuleList( [RNNDropout(hidden_p) for l in range(n_layers)])
def __init__(self, input_size: int, hidden_size: int, bidirectional: bool = False, num_layers: int = 1, window: int = None): super().__init__() self.input_size = input_size self.hidden_size = hidden_size self.bidirectional = bidirectional self._qrnn = QRNN(input_size, hidden_size, batch_first=True, bidirectional=bidirectional, n_layers=num_layers, window=window)
def __init__(self, input_dim, hidden_dim, output_dim, batchsize=64, rnn_type='LSTM', num_layers=1, dropout=0., bidirectional=False, reduction=16): super().__init__() self.hidden_dim = hidden_dim self.num_layers = num_layers self.rnn_type = rnn_type self.bidirectional = 2 if bidirectional else 1 dropout = 0. if num_layers == 1 else dropout assert rnn_type in ['LSTM', 'GRU', 'QRNN'], 'RNN type is not supported.' if rnn_type in ['LSTM', 'GRU']: self.rnn = getattr(nn, rnn_type)(input_dim, hidden_dim, num_layers=num_layers, batch_first=True, dropout=dropout, bidirectional=bidirectional) else: self.rnn = QRNN(input_dim, hidden_dim, n_layers=num_layers, batch_first=True, dropout=dropout, bidirectional=bidirectional) # way1: 使用激活函数 # self.fc = nn.Linear(hidden_dim * self.bidirectional, hidden_dim // 2) # self.ac = nn.Tanh() # 注意预测区间是 [-1, 1] 激活函数应该选择 tanh, LSTM 输出门本身就做了 tanh 激活 # self.fc1 = nn.Linear(hidden_dim // 2, output_dim) # way2: 不使用激活函数 dim1 = hidden_dim * self.bidirectional self.bn = BatchNorm1dFlat(dim1) self.fc = nn.Linear(dim1, output_dim) # self.fc = nn.Linear(dim1, dim1 // reduction) # self.bn1 = BatchNorm1dFlat(dim1 // reduction) # self.fc1 = nn.Linear(dim1 // reduction, output_dim) # self.hidden = self.initHidden(batchsize) self.hidden = self.initHidden(batchsize)
def _one_rnn(self, n_in, n_out, bidir, weight_p, l): from fastai.text.models.qrnn import QRNN rnn = QRNN(n_in, n_out, 1, save_prev_x=(not bidir), zoneout=0, window=2 if l == 0 else 1, output_gate=True, bidirectional=bidir) rnn.layers[0].linear = WeightDropout(rnn.layers[0].linear, weight_p, layer_names='weight') return rnn