def __init__(self, n_token, n_layer, n_head, d_model, d_head, d_inner, dropoute, dropouti, dropouta, dropoutf, dropouth, dropouto, tie_weight=True, tgt_len=None, ext_len=0, mem_len=0, clamp_len=-1): super(AWDTransformerXL, self).__init__() self.n_token = n_token self.d_model = d_model self.n_head = n_head self.d_head = d_head self.word_emb = nn.Embedding(n_token, d_model) self.emb_scale = d_model**0.5 self.dropoute = dropoute self.dropouti = dropouti self.dropouth = dropouth self.dropouto = dropouto self.drop_i = nn.Dropout(dropouti) self.locked_drop_i = LockedDropout(dropouti) self.locked_drop_h = LockedDropout(dropouth) self.locked_drop_o = LockedDropout(dropouto) self.n_layer = n_layer self.tgt_len = tgt_len self.ext_len = ext_len self.mem_len = mem_len self.clamp_len = clamp_len self.layers = nn.ModuleList() for i in range(n_layer): self.layers.append( RelDecoderLayer(n_head, d_model, d_head, d_inner, dropoutf=dropoutf, dropouta=dropouta)) self.out_layer = nn.Linear(d_model, n_token) if tie_weight: self.out_layer.weight = self.word_emb.weight self._create_params()
def __init__(self, input_size, hidden_size, output_size, numclass=0, dropouti=0.05, wdrop=0.2, dropouto=0.05): super(WeightDropBiLSTM, self).__init__() self.rnn1 = nn.LSTM( input_size, hidden_size, bidirectional=True, ) self.linear_rnn = nn.Linear(hidden_size * 2, hidden_size) self.rnn2 = nn.LSTM( hidden_size, hidden_size, bidirectional=True, ) self.linear = nn.Linear(hidden_size * 2 + hidden_size, numclass) self.lockdrop = LockedDropout() self.weight_drop1 = WeightDrop(self.rnn1, ['weight_hh_l0'], dropout=wdrop) self.weight_drop2 = WeightDrop(self.rnn2, ['weight_hh_l0'], dropout=wdrop) self.dropouti = dropouti self.dropouto = dropouto self.dropouti = 0.05 self.dropouto = 0.05 initrange = 0.1 self.linear_rnn.weight.data.uniform_(-initrange, initrange) self.linear_rnn.bias.data.fill_(0) self.linear.weight.data.uniform_(-initrange, initrange) self.linear.bias.data.fill_(0)
def __init__(self, ntoken, h_dim, emb_dim, nlayers, chunk_size, wdrop=0, dropouth=0.5): super(sentence_encoder, self).__init__() self.lockdrop = LockedDropout() self.hdrop = nn.Dropout(dropouth) self.encoder = nn.Embedding(ntoken, emb_dim) self.rnn = ONLSTMStack([emb_dim] + [h_dim] * nlayers, chunk_size=chunk_size, dropconnect=wdrop, dropout=dropouth) initrange = 0.1 self.encoder.weight.data.uniform_(-initrange, initrange) self.h_dim = h_dim self.emb_dim = emb_dim self.nlayers = nlayers self.ntoken = ntoken self.chunk_size = chunk_size self.wdrop = wdrop self.dropouth = dropouth
def __init__(self, config, TEXT): super(RNNLM, self).__init__() vocabSize = config.data.vocabSize nemd = config.model.rnn.nemd nhid = config.model.rnn.nhid self.nlayer = config.model.rnn.nlayer tie_weight = config.model.rnn.tie_weight rnn_type = config.model.rnn.rnn_type self.embed_drop_ratio = config.model.rnn.embed_drop_ratio self.locked_drope = config.model.rnn.locked_drope self.locked_droph = config.model.rnn.locked_droph self.locked_dropo = config.model.rnn.locked_dropo if config.model.rnn.pretrained_embedding: self.embedding = nn.Embedding( vocabSize, nemd).from_pretrained(TEXT.vocab.vectors, freeze=False) else: self.embedding = nn.Embedding(vocabSize, nemd) self.lockdrop = LockedDropout() rnns = [getattr(nn, rnn_type)( nemd if l==0 else nhid, nhid if l!=self.nlayer-1 else nemd, dropout=0, batch_first=False) for l in range(self.nlayer)] self.rnns = nn.ModuleList(rnns) self.out = nn.Linear(nemd, vocabSize) if not config.model.rnn.pretrained_embedding: self.init_weights() if tie_weight: self.out.weight = self.embedding.weight
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, pooling=False): super(LMmodel, self).__init__() self.lockdrop = LockedDropout() self.ntoken = ntoken # <---------------- Temporary, probably <NUM>, <MIX_NUM> in another dataset. self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(self.ntoken, ninp) # Pre-trained model doens't use batch_first. if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights self.pooling = pooling
def __init__(self, rnn_type='LSTM', ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, dropoutcomb=0.2, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() # emn_dim: int(ninp/2) # this embedding is shared for both source and target language self.emb_dim = int(ninp/2) # so that the combine of input the preducter target have dimension ninp self.encoder = nn.Embedding(ntoken, emb_dim) assert rnn_type in ['LSTM'], 'use LSTM pls' if rnn_type == 'LSTM': #input is the combined w diemnsion ninp #in the paper n_hid == ninp self.rnns = [torch.nn.LSTM(ninp, nhid, 1, dropout=0) for l in range(nlayers)] self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(emb_dim, ntoken) self.combiner = nn.Linear(nhid, emb_dim) if tie_weights: self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout
def __init__(self, layer_sizes, chunk_size, dropout=0., dropconnect=0., greedy_eval=False, fut_window=5): """ Args: greedy_eval: greedy future position, instead of sampling fut_window: window size over the future """ super().__init__() self.cells = nn.ModuleList([ nmONLSTMCell(layer_sizes[i], layer_sizes[i + 1], chunk_size, greedy_eval=greedy_eval, dropconnect=dropconnect, window=fut_window) for i in range(len(layer_sizes) - 1) ]) self.lockdrop = LockedDropout() self.dropout = dropout self.sizes = layer_sizes
def __init__(self, d_model, d_inner, dropout): super(PositionwiseFF, self).__init__() self.d_model = d_model self.d_inner = d_inner self.dropout = dropout self.CoreNet = nn.Sequential( nn.Linear(d_model, d_inner), nn.ReLU(inplace=True), LockedDropout(dropout), nn.Linear(d_inner, d_model), LockedDropout(dropout), ) self.layer_norm = nn.LayerNorm(d_model)
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, alpha=2, beta=1, bsz=20): super(RNNModel, self).__init__() self.bsz = bsz self.ntoken = ntoken self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights self.alpha = alpha self.beta = beta self.metrics = [self.acc, self.perplexity] self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] if rnn_type == 'GRU': self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() # Build the SplitCrossEntropyLoss criterion here self.build_criterion() self.hidden = None
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=dropouth) for l in range(nlayers)] print(self.rnns) if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute
def __init__(self, nb_words, hidden_size=128, embedding_size=128, n_layers=1, wdrop=0.25, odrop=0.25, edrop=0.1, idrop=0.25, variational=False, standard_dropout=False, batch_first=True): super(Model, self).__init__() self.standard_dropout = standard_dropout self.lockdrop = LockedDropout(batch_first=batch_first) self.odrop = odrop self.idrop = idrop self.edrop = edrop self.n_layers = n_layers self.embedding = nn.Embedding(nb_words, embedding_size) self.rnns = [ nn.LSTM(embedding_size if l == 0 else hidden_size, hidden_size, num_layers=1, batch_first=batch_first) for l in range(n_layers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop, variational=variational) for rnn in self.rnns ] self.rnns = torch.nn.ModuleList(self.rnns) self.output_layer = nn.Linear(hidden_size, 1) self.init_weights()
def __init__(self, rnn_type, ntoken, ninp, nhid, nhidlast, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, ldropout=0.5, n_experts=10): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.encoder = nn.Embedding(ntoken, ninp) self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else nhidlast, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] self.rnns = torch.nn.ModuleList(self.rnns) self.head = MoShead(ntoken, ninp, nhid, nhidlast, self.encoder, self.lockdrop, tie_weights, n_experts) self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nhidlast = nhidlast self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.ldropout = ldropout self.dropoutl = ldropout self.n_experts = n_experts self.ntoken = ntoken size = 0 for p in self.parameters(): size += p.nelement() print('Param size: {}'.format(size))
def __init__(self, rnn_type, ntoken, ninp, nhid, nhidlast, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, ldropout=0.6, n_experts=10, num4embed=0, num4first=0, num4second=0): super(RNNModel, self).__init__() self.model_embeddings_source = ModelEmbeddings(ninp, vocab.src) self.lockdrop = LockedDropout() self.encoder = nn.Embedding(ntoken, ninp) self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else nhidlast, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] self.rnns = torch.nn.ModuleList(self.rnns) self.all_experts = n_experts + num4embed + num4first + num4second self.prior = nn.Linear(nhidlast, self.all_experts, bias=False) self.latent = nn.Linear(nhidlast, n_experts*ninp) if num4embed > 0: self.weight4embed = nn.Linear(ninp, num4embed*ninp) if num4first > 0: self.weight4first = nn.Linear(nhid, num4first*ninp) if num4second > 0: self.weight4second = nn.Linear(nhid, num4second*ninp) self.decoder = nn.Linear(ninp, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.num4embed = num4embed self.num4first = num4first self.num4second = num4second self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nhidlast = nhidlast self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.dropoutl = ldropout self.n_experts = n_experts self.ntoken = ntoken size = 0 for p in self.parameters(): size += p.nelement() print('param size: {}'.format(size))
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [ torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] if rnn_type == 'GRU': self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: # if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') # NOTE: This is really awful code and is just overwriting this one tiny part of the decoders variables, if # your models aren't displaying correctly this is why. Specifically ruins the display of the decoder models # dimensions as they stay the original decode dimensions even though the weights have been tied self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def __init__(self, layer_sizes, chunk_size, dropout=0., dropconnect=0.): super(ONLSTMStack, self).__init__() self.cells = nn.ModuleList([ONLSTMCell(layer_sizes[i], layer_sizes[i+1], chunk_size, dropconnect=dropconnect) for i in range(len(layer_sizes) - 1)]) self.lockdrop = LockedDropout() self.dropout = dropout self.sizes = layer_sizes
def __init__(self, rnn_type, ntoken, ninp, nhid, chunk_size, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) if rnn_type == 'onlstm': self.rnn = ONLSTMStack([ninp] + [nhid] * (nlayers - 1) + [ninp], chunk_size=chunk_size, dropconnect=wdrop, dropout=dropouth) elif rnn_type == 'lstm': self.rnn = LSTMCellStack([ninp] + [nhid] * (nlayers - 1) + [ninp], dropconnect=wdrop, dropout=dropouth) else: raise ValueError('Impossible') self.decoder = nn.Linear(ninp, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def __init__(self, input_size, hidden_size, rrnn_size, context_size, drop=0., max_span_length=10): super(SpanScorer, self).__init__() self._input_size_ = input_size self._hidden_size_ = hidden_size self._rrnn_size_ = rrnn_size self._context_size_ = context_size self._droprate_ = drop self._lockdrop_ = LockedDropout() self._max_span_length_ = max_span_length self._nonlinearity_ = t.tanh self._softmax_ = t.nn.Softmax(dim=2) size = self._rrnn_size_ // 2 self._w_gate_ = Parameter(t.randn(self._rrnn_size_, self._input_size_)) self._b_gate_ = Parameter(t.randn(self._rrnn_size_)) xavier_uniform(self._w_gate_.data, fan_in=self._input_size_, fan_out=size, gain=nn.init.calculate_gain("sigmoid")) self._b_gate_.data.zero_() self._w1_span_ = Parameter( t.randn(self._hidden_size_, self._rrnn_size_)) self._w1_token_ = Parameter( t.randn(self._hidden_size_, self._input_size_)) self._b1_ = Parameter(t.randn(self._hidden_size_)) self._w2_ = Parameter(t.randn(1, self._hidden_size_)) xavier_uniform(self._w1_span_.data, fan_in=self._rrnn_size_ + self._input_size_, fan_out=self._hidden_size_) xavier_uniform(self._w1_token_.data, fan_in=self._rrnn_size_ + self._input_size_, fan_out=self._hidden_size_) xavier_uniform(self._w2_.data, fan_in=self._hidden_size_, fan_out=1) self._b1_.data.zero_() size = self._rrnn_size_ // 2 self.rrnn_fw = RRNNCell(n_in=self._input_size_, n_out=size, dropout=0., rnn_dropout=0., nl="tanh", use_output_gate=False) self.rrnn_bw = RRNNCell(n_in=self._input_size_, n_out=size, dropout=0., rnn_dropout=0., nl="tanh", use_output_gate=False)
def __init__ (self, model, vocsize, embsize, hiddensize, n_layers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=True, ldropout=0.5, n_experts=5, uncertain='gp', position=1): super(RNNLM, self).__init__() self.model = model.lower() self.lockdrop = LockedDropout() self.encoder = nn.Embedding(vocsize, embsize) self.rnns = [] for l in range(n_layers): if l == 0: if uncertain == 'gp': self.rnns.append(GPLSTM(embsize, hiddensize if l != n_layers-1 else embsize, position)) elif uncertain == 'bayes': self.rnns.append(BayesLSTM(embsize, hiddensize if l != n_layers-1 else embsize, position)) else: self.rnns.append(torch.nn.LSTM(embsize, hiddensize if l != n_layers-1 else embsize, 1, dropout=0)) else: self.rnns.append(torch.nn.LSTM(hiddensize, hiddensize if l != n_layers-1 else embsize, 1, dropout=0)) if wdrop: self.rnns = [WeightDrop(rnn, hiddensize if l != n_layers-1 else embsize, ['weight_hh_l0'], dropout=wdrop) for l, rnn in enumerate(self.rnns) if rnn.__class__.__name__ != "GPLSTM"] self.rnns = torch.nn.ModuleList(self.rnns) self.prior = nn.Linear(embsize, n_experts, bias=False) self.latent = nn.Sequential(nn.Linear(embsize, n_experts*embsize), nn.Tanh()) self.decoder_bias = nn.Parameter(torch.empty(vocsize)) if tie_weights: # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" # (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" # (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 self.decoder_weight = self.encoder.weight else: self.decoder_weight = nn.Parameter(torch.empty(vocsize, embsize)) self.vocsize = vocsize self.embsize = embsize self.hiddensize = hiddensize self.n_layers = n_layers self.tie_weights = tie_weights self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.ldropout = ldropout self.dropoutl = ldropout self.n_experts = n_experts self.init_parameters()
def __init__(self, H, char_arr=None, rel_arr=None, def_arr=None, dict=None): super(RNNModel, self).__init__() self.H = H self.dict = dict self.use_dropout = True self._lockdrop = LockedDropout() self.define_embedding(H, char_arr, rel_arr, def_arr) self.define_rnn(H) self.define_joint(H) self.define_bias(H) self.init_weights()
def __init__(self, rnn_type, ntoken, in_size, h_size, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, max_forget=0.875, use_buffers=False): super(RNNModel, self).__init__() self.rnn_type = rnn_type self.in_size = in_size self.h_size = h_size self.nlayers = nlayers self.use_buffers = use_buffers self.dropout = dropout self.dropouth = dropouth self.dropouti = dropouti self.dropoute = dropoute self.lockdrop = LockedDropout() self.embed = nn.Embedding(ntoken, in_size) # Construct RNN cells and apply weight dropping if specified. if rnn_type == 'revgru': rnn = RevGRU module_names = [ 'ih2_to_zr1', 'irh2_to_g1', 'ih1_to_zr2', 'irh1_to_g2' ] elif rnn_type == 'revlstm': rnn = RevLSTM module_names = ['ih2_to_zgfop1', 'ih1_to_zgfop2'] self.rnns = [ rnn(in_size if l == 0 else h_size, h_size if l != nlayers - 1 else in_size, max_forget) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, module_names, wdrop) for rnn in self.rnns ] self.rnns = nn.ModuleList(self.rnns) # Initialize linear transform from hidden states to log probs. self.out = nn.Linear(in_size, ntoken) self.out.weight = self.embed.weight self.init_weights()
def __init__(self, rnn_type, ntoken, ninp, nhid, chunk_size, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, args=None): super(GPTRNNModel, self).__init__() self.transformer = OpenAIGPTModel.from_pretrained('openai-gpt') config = OpenAIGPTConfig() self.lm_head = OpenAIGPTLMHead(self.transformer.tokens_embed.weight, config) self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Linear(768, ninp) self.args = args assert rnn_type in ['LSTM'], 'RNN type is not supported' self.rnn = ONLSTMStack([ninp] + [nhid] * (nlayers - 1) + [ninp], chunk_size=chunk_size, dropconnect=wdrop, dropout=dropouth) self.decoder = nn.Linear(ninp, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 # if tie_weights: # #if nhid != ninp: # # raise ValueError('When using the tied flag, nhid must be equal to emsize') # self.decoder.weight = self.encoder.weight self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.distance = None self.tie_weights = tie_weights
def __init__(self, n_head, d_model, d_head, dropout): super(MultiHeadAttn, self).__init__() self.n_head = n_head self.d_model = d_model self.d_head = d_head self.dropout = dropout self.qkv_net = nn.Sequential( nn.Linear(d_model, 3 * n_head * d_head, bias=False), LockedDropout(dropout)) self.drop = nn.Dropout(dropout) self.locked_drop = LockedDropout(dropout) self.o_net = nn.Linear(n_head * d_head, d_model, bias=False) self.layer_norm = nn.LayerNorm(d_model) self.scale = 1 / (d_head**0.5)
def __init__(self, rnn_type, ntoken, ninp, nhid, nhidlast, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, ldropout=0.5, n_experts=10): super(RNNModel, self).__init__() self.use_dropout = True self.lockdrop = LockedDropout() self.encoder = nn.Embedding(ntoken, ninp) self.rnns = [ModifiedLSTMcell(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else nhidlast, dropout=wdrop if self.use_dropout else 0) for l in range(nlayers)] # if wdrop: # self.rnns = [WeightDrop(rnn, ['weight_hh'], dropout=wdrop if self.use_dropout else 0) for rnn in # self.rnns] self.rnns = torch.nn.ModuleList(self.rnns) self.prior = nn.Linear(nhidlast, n_experts, bias=False) self.latent = nn.Sequential(nn.Linear(nhidlast, n_experts * ninp), nn.Tanh()) self.decoder = nn.Linear(ninp, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: # if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nhidlast = nhidlast self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.ldropout = ldropout self.dropoutl = ldropout self.n_experts = n_experts self.ntoken = ntoken size = 0 for p in self.parameters(): size += p.nelement() print('param size: {}'.format(size))
def __init__(self,vocab_obj, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) embed_matrix_tensor=torch.from_numpy(vocab_obj.embed_matrix).cuda() self.encoder.load_state_dict({'weight':embed_matrix_tensor}) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] if rnn_type == 'GRU': self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) self.decoder = nn.Linear(nhid, ntoken) self.rnns = torch.nn.ModuleList(self.rnns) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropouto=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.encoder = nn.Embedding(ntoken, ninp) if rnn_type == 'LSTM': self.rnns = [ DropconnectCell(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), wdrop=wdrop) for l in range(nlayers) ] if wdrop: print("Using weight drop {}".format(wdrop)) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights if tie_weights: print("Tie weights") self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.ntoken = ntoken self.nlayers = nlayers self.dropouto = dropouto self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.wdrop = wdrop self.tie_weights = tie_weights
def __init__(self, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() # variational dropout self.lockdrop = LockedDropout() # self.idrop = nn.Dropout(dropouti) # self.hdrop = nn.Dropout(dropouth) # self.edrop = nn.Dropout(dropoute) self.encoder = nn.Embedding(ntoken, ninp) self.rnns = [ torch.nn.LSTM(ninp if layer == 0 else nhid, nhid if layer != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for layer in range(nlayers) ] print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: self.decoder.weight = self.encoder.weight self.init_weights() self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, dropoutcomb=0.2, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() print(ninp, nhid) self.encoder = nn.Embedding(ntoken, int(ninp/2)) assert rnn_type in ['LSTM'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(ninp, nhid, 1, dropout=0) for l in range(nlayers)] #print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) self.combiner = nn.Linear(ninp, int(ninp/2)) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.dropoutcomb = dropoutcomb self.tie_weights = tie_weights size = 0 for p in self.parameters(): size += p.nelement() print (p.size()) print('Number of parameters: {:,}'.format(size)) print('Small model')
def __init__(self, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(WeightDropLSTM, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) self.rnns = [ torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0, batch_first=True) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) if tie_weights: # Optionally tie weights self.decoder.weight = self.encoder.weight self.init_weights() self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def __init__( self, rnn_type, ntoken, nemoji, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, ): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) self.rnns = [ torch.nn.LSTM(ninp if l == 0 else nhid, nhid, 1, dropout=0) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, nemoji) self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute
def __init__(self, ntoken, ninp, nhid, nlayers, dropouto=0, dropouth=0, dropouti=0, dropoute=0, wdrop=0, tie_weights=True, num_hparams=1, device='cuda:0'): super(HyperLSTM, self).__init__() self.lockdrop = LockedDropout() self.encoder = HyperEmbedding(ntoken, ninp, num_hparams) self.decoder = nn.Linear(nhid, ntoken) if tie_weights: print("Tie weights") self.decoder.weight = self.encoder.elem_embedding.weight self.rnns = [ HyperLSTMCell( ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), num_hparams=num_hparams, wdrop=wdrop, ) for l in range(nlayers) ] print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.ninp = ninp self.nhid = nhid self.ntoken = ntoken self.nlayers = nlayers self.dropouto = dropouto self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.wdrop = wdrop self.tie_weights = tie_weights