def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, alpha=2, beta=1, bsz=20): super(RNNModel, self).__init__() self.bsz = bsz self.ntoken = ntoken self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights self.alpha = alpha self.beta = beta self.metrics = [self.acc, self.perplexity] self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] if rnn_type == 'GRU': self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() # Build the SplitCrossEntropyLoss criterion here self.build_criterion() self.hidden = None
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [ torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] if rnn_type == 'GRU': self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: # if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') # NOTE: This is really awful code and is just overwriting this one tiny part of the decoders variables, if # your models aren't displaying correctly this is why. Specifically ruins the display of the decoder models # dimensions as they stay the original decode dimensions even though the weights have been tied self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def __init__(self,vocab_obj, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) embed_matrix_tensor=torch.from_numpy(vocab_obj.embed_matrix).cuda() self.encoder.load_state_dict({'weight':embed_matrix_tensor}) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] if rnn_type == 'GRU': self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) self.decoder = nn.Linear(nhid, ntoken) self.rnns = torch.nn.ModuleList(self.rnns) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, joint_emb=None, joint_emb_depth=0, joint_emb_dense=False, joint_emb_dual=True, joint_dropout=0.2, joint_emb_activation='Sigmoid', joint_locked_dropout=False, joint_residual_prev=False, joint_noresid=False): super(RNNModel, self).__init__() self.use_dropout = True self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti if self.use_dropout else 0) self.hdrop = nn.Dropout(dropouth if self.use_dropout else 0) self.drop = nn.Dropout(dropout if self.use_dropout else 0) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights or (joint_emb is not None) else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop if self.use_dropout else 0) for rnn in self.rnns] if rnn_type == 'GRU': self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop if self.use_dropout else 0) for rnn in self.rnns] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop if self.use_dropout else 0) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) if joint_emb is None: if tie_weights: if nhid != ninp: raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder = nn.Linear(ninp, ntoken) self.decoder.weight = self.encoder.weight else: self.decoder = nn.Linear(nhid, ntoken) else: self.dropjoint = nn.Dropout(joint_dropout if self.use_dropout else 0) # Define the first layer of the label encoder network if joint_emb_activation != "Linear": self.joint_encoder_proj_0 = nn.Sequential(nn.Linear(ninp, joint_emb, bias=True), eval("nn.%s()" % joint_emb_activation)) else:
def __init__(self, rnn_type, ntoken, ninp, nhid, proplstm, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, params={}): super(RNNModel, self).__init__() self.params = params self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in [ 'LSTM', 'QRNN', 'GRU', 'MYLSTM', 'MYFASTLSTM', 'SIMPLEPLASTICLSTM', 'FASTPLASTICLSTM', 'PLASTICLSTM', 'SPLITLSTM', 'FWMRNNv2' ], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [ torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers) ] #for rr in self.rnns: # rr.flatten_parameters() if wdrop: print("Using WeightDrop!") self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] elif rnn_type == 'MYLSTM': self.rnns = [ mylstm.MyLSTM( ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid)) for l in range(nlayers) ] elif rnn_type == 'MYFASTLSTM': self.rnns = [ mylstm.MyFastLSTM( ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid)) for l in range(nlayers) ] elif rnn_type == 'PLASTICLSTM': self.rnns = [ mylstm.PlasticLSTM( ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), params) for l in range(nlayers) ] elif rnn_type == 'SIMPLEPLASTICLSTM': # Note that this one ignores the 'params' argument, which is only kept to preserve identical signature with PlasticLSTM self.rnns = [ mylstm.SimplePlasticLSTM( ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), params) for l in range(nlayers) ] elif rnn_type == 'FASTPLASTICLSTM': self.rnns = [ mylstm.MyFastPlasticLSTM( ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), params) for l in range(nlayers) ] elif rnn_type == 'SPLITLSTM': # Not used self.rnns = [ mylstm.SplitLSTM( ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), proplstm, params) for l in range(nlayers) ] elif rnn_type == 'GRU': self.rnns = [ torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [ QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers) ] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) elif rnn_type == 'FWMRNNv2': self.rnns = [ myfastweights_v2.FWMRNN(isize=ninp if l == 0 else nhid, hsize=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), withFWM=l == nlayers - 1, params=params, wdrop=wdrop) for l in range(nlayers) ] print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.proplstm = proplstm self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, use_pre=False, use_demo=False, useone=None, demoembs=None, demouse=None, mainmatrix=0, printfunc=None): #, match_input_size=False super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) # self.match_input_size = match_input_size assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' ninp_mod = ninp # make this bigger if we want to concatenate demographic embeddings if use_demo: assert len(demoembs[0][0][1]) == ninp if demouse == 'cat': if useone != None: ninp_mod = ninp * 2 else: ninp_mod = ninp * 5 # modify the size of the input for concatenated embeddings but the output size should be the same as word embedding size; ninp if rnn_type == 'LSTM': self.rnns = [ torch.nn.LSTM(ninp_mod if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] if rnn_type == 'GRU': self.rnns = [ torch.nn.GRU(ninp_mod if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [ QRNNLayer(input_size=ninp_mod if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers) ] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) self.rnns = torch.nn.ModuleList(self.rnns) self.cprint = printfunc self.cprint(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # self.age_decode = nn.Linear(nhid, len(demoembs[0])) # self.location_decode = nn.Linear(nhid, len(demoembs[1])) # self.religion_decode = nn.Linear(nhid, len(demoembs[2])) # self.gender_decode = nn.Linear(nhid, len(demoembs[3])) if useone != None: self.cprint('Using one demographic input: ' + str(useone)) elif use_demo: self.cprint('Using all four demographic inputs') if use_demo and useone in ['age', None]: self.age_embed = torch.nn.ModuleList( [nn.Embedding(ntoken, ninp) for i in DEMOVARS['AGE']]) if use_demo and useone in ['location', None]: self.location_embed = torch.nn.ModuleList( [nn.Embedding(ntoken, ninp) for i in DEMOVARS['LOCATION']]) if use_demo and useone in ['religion', None]: self.religion_embed = torch.nn.ModuleList( [nn.Embedding(ntoken, ninp) for i in DEMOVARS['RELIGION']]) if use_demo and useone in ['gender', None]: self.gender_embed = torch.nn.ModuleList( [nn.Embedding(ntoken, ninp) for i in DEMOVARS['GENDER']]) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight # self.age_decode.weight = self.age_embed.weight # self.location_decode.weight = self.location_embed.weight # self.religion_decode.weight = self.religion_embed.weight # self.gender_decode.weight = self.gender_embed.weight self.use_pre = use_pre self.use_demo = use_demo self.useone = useone self.demouse = demouse self.init_weights(demoembs, mainmatrix) self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, byte=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) if byte: if ninp != 256: raise ValueError('wrong embedding size for bytes: %d -> 256' % ninp) assert ninp == 256 ntoken = 256 self.encoder = nn.Embedding(ntoken, ninp) self.encoder.weight.data.copy_(torch.eye(256)) self.encoder.weight.requires_grad = False print(self.encoder.weight.data) else: self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [ torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] if rnn_type == 'GRU': self.rnns = [ torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [ QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers) ] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights(byte) self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, mu=0.9, epsilon=0.1, mus=0.999, restart=0): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM', 'QRNN', 'GRU', 'MLSTM', 'NLSTM', 'ALSTM'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [ LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), bias=True) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh'], dropout=wdrop) for rnn in self.rnns ] if rnn_type == 'MLSTM': self.rnns = [ MomentumLSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), mu=mu, epsilon=epsilon, bias=True) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh'], dropout=wdrop) for rnn in self.rnns ] if rnn_type == 'ALSTM': self.rnns = [ AdamLSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), mu=mu, epsilon=epsilon, mus=mus, bias=True) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh'], dropout=wdrop) for rnn in self.rnns ] if rnn_type == 'NLSTM': self.rnns = [ NesterovLSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), epsilon=epsilon, restart=restart, bias=True) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh'], dropout=wdrop) for rnn in self.rnns ] if rnn_type == 'GRU': self.rnns = [ GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, bias=True) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh'], dropout=wdrop) for rnn in self.rnns ] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [ QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers) ] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights self.mu = mu self.epsilon = epsilon self.mus = mus self.restart = restart
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, prior_numstates=5, use_fixed_uniform_prior=False, cuda=False, dictionary=None, latent_plot_typ='kw', infer_nw_arch_type='linear1', inference_pretrained_model_path=None, infer_nw_skip_first_token=False, infer_nw_ignore_token_type='default', infer_nw_share_encoder=True, inference_nw_frozen=False, inference_nw_uniform_distribution=False, emotion_type='basic', inference_pretrained_model_path_extractinference=False, inference_nw_first_word_distribution=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) self.use_cuda = cuda assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 \ else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] if rnn_type == 'GRU': self.rnns = [ torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 \ else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 \ if l == 0 else 1, output_gate=True) for l in range(nlayers)] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) self.inference_pretrained_model_path_extractinference = inference_pretrained_model_path_extractinference # self.decoder_nw_frozen = decoder_nw_frozen #################### WEIGHT TIEING # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight # self.decoder_prior.weight = self.encoder.weight print("******* tied weights ******") print("**self.decoder = ", self.decoder) #################### PRIOR self.latent_plot_typ = latent_plot_typ provided_prior_decoder = None emotion_vocab_list = None self.emotion_type = emotion_type if latent_plot_typ == 'kw': provided_prior_decoder = self.decoder elif latent_plot_typ == 'emotion': self.emotion_vocab_list = emotion_vocab_list = self.get_emotion_vocab_list( dictionary) self.prior_model = PriorModel( typ=latent_plot_typ, ntoken=ntoken, ninp=ninp, nhid=nhid, tie_weights=tie_weights, prior_numstates=None, decoder=provided_prior_decoder, cuda=False, emotion_vocab_list=emotion_vocab_list, use_fixed_uniform_prior=use_fixed_uniform_prior) #################### INFERENCE NETWORK self.infer_nw_share_encoder = infer_nw_share_encoder self.inference_nw_uniform_distribution = inference_nw_uniform_distribution self.inference_nw_first_word_distribution = inference_nw_first_word_distribution if infer_nw_share_encoder: infer_nw_encoder = self.encoder else: infer_nw_encoder = None self.inference_nw = InferenceNW( typ=latent_plot_typ, ninp=ninp, ntoken=ntoken, encoder=infer_nw_encoder, prior_numstates=prior_numstates, use_cuda=cuda, arch_type=infer_nw_arch_type, skip_first_token=infer_nw_skip_first_token, dictionary=dictionary, ignore_token_type=infer_nw_ignore_token_type, nw_frozen=inference_nw_frozen, emotion_vocab_list=emotion_vocab_list, uniform_distribution=inference_nw_uniform_distribution, first_word_distribution=inference_nw_first_word_distribution, emotion_type=emotion_type) self.inference_pretrained_model_path = inference_pretrained_model_path # will call in init_weights self.inference_nw_frozen = inference_nw_frozen #################### INIT AND SAVE HYPERPARAMS self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights self.wdrop = wdrop self.use_fixed_uniform_prior = use_fixed_uniform_prior
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, binarized=False, collect_stats=False, no_md=False, split_cross=False): super(RNNModel, self).__init__() self.binarized = binarized self.collect_stats = collect_stats self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.ctx = ctx = candle.TernaryQuantizeContext() self.scale = nn.Parameter(torch.Tensor([0])) self.nout = ninp self.no_md = no_md self.se = split_cross # self.ternary = ctx.activation(k=8) self.encoder = ctx.bypass(nn.Embedding(ntoken, ninp)) # self.mdC = [] # self.mdH = [] # for _ in range(nlayers): # td = candle.UniformTiedGenerator() # self.mdC.append(candle.LinearMarkovDropout(0.6, min_length=0.4, tied_generator=td, tied_root=True, tied=True, rescale=False)) # self.mdH.append(candle.LinearMarkovDropout(0.6, min_length=0.4, tied_generator=td, tied=True, rescale=False)) if binarized: self.decode_bn = ctx.bypass(nn.BatchNorm1d(ninp)) elif collect_stats: self.encode_bn = ctx.moment_stat(name="encoder") assert rnn_type in ['LSTM', 'QRNN', 'GRU', 'LSTM-MD'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] elif rnn_type == 'LSTM-MD': self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], ['weight_hh_l0', 'weight_ih_l0', 'bias_hh_l0', 'bias_ih_l0'], dropout=wdrop, md=(0.6, 0.4)) for rnn in self.rnns] if rnn_type == 'GRU': self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True, binarized=binarized, ctx=ctx, collect_stats=collect_stats, no_md=no_md, scale=self.scale) for l in range(nlayers)] for rnn in self.rnns: if binarized: rnn.linear.hook_weight(candle.WeightDrop, p=wdrop) # rnn.linear.hook_weight(candle.SignFlip, p=wdrop) else: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) # print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) # self.decoder = ctx.wrap(nn.Linear(nhid, ntoken), soft=True, scale=self.scale) if binarized else ctx.bypass(nn.Linear(nhid, ntoken)) self.decoder = ctx.bypass(nn.Linear(nhid, ntoken)) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') if binarized: pass self.decoder.weight = self.encoder.weight # self.decoder.tie_weight(self.encoder.weight) else: self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, use_pre=False, use_ind=False, indembs=None, induse=None, printfunc=None): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' ninp_mod = ninp if use_ind: if type(indembs[0]) != type(None): assert len(indembs[0][0]) == ninp if induse == 'cat': ninp_mod = ninp * 2 # modify the size of the input for concatenated embeddings but the output size should be the same as word embedding size; ninp if rnn_type == 'LSTM': self.rnns = [ torch.nn.LSTM(ninp_mod if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] if rnn_type == 'GRU': self.rnns = [ torch.nn.GRU(ninp_mod if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [ QRNNLayer(input_size=ninp_mod if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers) ] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) self.rnns = torch.nn.ModuleList(self.rnns) self.cprint = printfunc self.cprint(self.rnns) self.decoder = nn.Linear(nhid, ntoken) if use_ind: self.user_embed = nn.Embedding(ntoken, ninp) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.use_pre = use_pre self.use_ind = use_ind self.induse = induse self.init_weights(indembs) self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def __init__(self, rnn_type, ntoken, emsize, nhid, nlayers, dropoute=0.2, dropouti=0.2, dropoutrnn=0.2, dropout=0.2, wdrop=0.5, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.encoder = nn.Embedding(ntoken, emsize) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = torch.nn.LSTM(emsize, nhid, nlayers, dropout=dropoutrnn) if rnn_type == 'GRU': self.rnns = torch.nn.GRU(emsize, nhid, nlayers, dropout=dropoutrnn) elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [ QRNNLayer(input_size=emsize if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (emsize if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers) ] if wdrop: self.rnns = WeightDrop( self.rnns, ['weight_hh_l{}'.format(i) for i in range(nlayers)], wdrop) print(self.rnns) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: if nhid != emsize: raise ValueError( 'When using the tied flag, nhid must be equal to emsize') self.decoder = nn.Linear(nhid, ntoken, bias=False) self.decoder.weight = self.encoder.weight else: self.decoder = nn.Linear(nhid, ntoken) self.ninp = emsize self.nhid = nhid self.nlayers = nlayers self.dropoute = dropoute self.dropouti = dropouti self.dropout = dropout self.tie_weights = tie_weights self.init_weights()
def __init__(self, config): super(BiRNNLanguageModel, self).__init__() self.config = config self.tie_weights = config.get('tie_weights', True) self.embedding_dim = config.get('embedding_dim', LM_HIDDEN_DIM) self.hidden_dim = self.embedding_dim if self.tie_weights else config.get( 'hidden_dim', LM_HIDDEN_DIM) self.dropout_emb = config.get('emb_dropout', .2) self.dropout_i = config.get('lock_drop', .5) self.dropout_h = config.get('h_dropout', .5) self.dropout_w = config.get('w_dropout', 0) self.num_words = config.get('num_words', LM_VOCAB_SIZE) self.rnn_type = config.get('rnn_type', 'SRU') self.n_layers = config.get('n_layers', 6) self.dropout_rnn = config.get('rnn_dropout', .2) self.highway_bias = config.get('highway_bias', -3) self.use_adasoft = config.get('use_adasoft', True) self.adasoft_cutoffs = config.get( 'adasoft_cutoffs', [LM_VOCAB_SIZE // 2, LM_VOCAB_SIZE // 2]) assert self.rnn_type in ['LSTM', 'GRU', 'SRU', 'QRNN'] self.encoder = nn.Embedding(self.num_words, self.embedding_dim) self.lockdrop = to_gpu(LockedDropout()) # for the mean time weight drop is broken if self.rnn_type == 'LSTM': self.rnns = [ nn.LSTM( self.embedding_dim if layer_ix == 0 else self.hidden_dim, self.hidden_dim // 2, bidirectional=True, dropout=self.dropout_rnn) for layer_ix in range(self.n_layers) ] if self.dropout_w: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=self.dropout_w) for rnn in self.rnns ] elif self.rnn_type == 'GRU': self.rnns = [ nn.GRU( self.embedding_dim if layer_ix == 0 else self.hidden_dim, self.hidden_dim // 2, bidirectional=True, dropout=self.dropout_rnn) for layer_ix in range(self.n_layers) ] if self.dropout_w: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=self.dropout_w) for rnn in self.rnns ] elif self.rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = self.rnns = [ QRNNLayer( self.embedding_dim if layer_ix == 0 else self.hidden_dim, self.hidden_dim // 2, bidirectional=True) for layer_ix in range(self.n_layers) ] if self.dropout_w: for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=self.dropout_w) else: from sru import SRU self.rnns = [ to_gpu( SRU(self.embedding_dim if layer_ix == 0 else self.hidden_dim, self.hidden_dim // 2, num_layers=1, rnn_dropout=self.dropout_rnn, dropout=self.wdrop, rescale=False, highway_bias=self.highway_bias, use_tanh=0, bidirectional=True, v1=True)) for layer_ix in range(self.n_layers) ] self.rnns = nn.ModuleList(self.rnns) self.decoder = nn.Linear( self.embedding_dim if self.tie_weights else self.hidden_dim, self.num_words) # Adaptive softmax self.use_adasoft = config.get('use_adasoft', True) if self.use_adasoft: if 'adasoft_cutoffs' in config: splits = config['adasoft_cutoffs'] else: splits = [] if self.num_words >= 500000: # One Billion # This produces fairly even matrix mults for the buckets: # 0: 11723136, 1: 10854630, 2: 11270961, 3: 11219422 splits = [4200, 35000, 180000] elif self.num_words >= 75000: # WikiText-103 splits = [2800, 20000, 76000] elif self.num_words >= 20000: splits = [2000, 4000, 10000] else: splits = [self.num_words // 3, self.num_words // 3] config['adasoft_cutoffs'] = splits # print('Cross Entropy Splits: Using', splits) self.adasoft = SplitCrossEntropyLoss(self.hidden_dim, splits, ignore_index=0) else: self.adasoft = None # Weight tying if self.tie_weights: self.decoder.weight = self.encoder.weight self.init_weights()
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, nhlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, nr_cells=5, read_heads=2, sparse_reads=10, cell_size=10, gpu_id=-1, independent_linears=False, debug=True): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) self.debug = debug assert rnn_type in ['LSTM', 'QRNN', 'DNC', 'SDNC'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [ torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [ QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else ninp, save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True if l != nlayers - 1 else True) for l in range(nlayers) ] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) elif rnn_type.lower() == 'sdnc': self.rnns = [] self.rnns.append( SDNC(input_size=ninp, hidden_size=nhid, num_layers=nlayers, num_hidden_layers=nhlayers, rnn_type='lstm', nr_cells=nr_cells, read_heads=read_heads, sparse_reads=sparse_reads, cell_size=cell_size, gpu_id=gpu_id, independent_linears=independent_linears, debug=debug, dropout=0)) elif rnn_type.lower() == 'dnc': self.rnns = [] self.rnns.append( DNC(input_size=ninp, hidden_size=nhid, num_layers=nlayers, num_hidden_layers=nhlayers, rnn_type='lstm', nr_cells=nr_cells, read_heads=read_heads, cell_size=cell_size, gpu_id=gpu_id, independent_linears=independent_linears, debug=debug, dropout=wdrop)) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(ninp, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute
def __init__(self, rnn_type, ntoken, nemb, nhid, nhidlast, nlayer, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, spectrum_control=False): super(Dai, self).__init__() self.use_dropout = True self.lockdrop = LockedDropout() if spectrum_control: self.encoder = SvdEmbed(nemb, ntoken) else: self.encoder = nn.Embedding(ntoken, nemb) self.wdropped = True if rnn_type == 'LSTM': self.rnns = [ torch.nn.LSTM(nemb if l == 0 else nhid, nhid if l != nlayer - 1 else nhidlast, 1, dropout=0) for l in range(nlayer) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop if self.use_dropout else 0) for rnn in self.rnns ] else: self.wdropped = False elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [ QRNNLayer(input_size=nemb if l == 0 else nhid, hidden_size=nhid if l != nlayer - 1 else nhidlast, save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayer) ] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) self.rnns = torch.nn.ModuleList(self.rnns) self.rnn_type = rnn_type self.nemb = nemb self.nhid = nhid self.nhidlast = nhidlast self.nlayer = nlayer self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.ntoken = ntoken self.spectrum_control = spectrum_control self.init_weights()
def __init__(self, rnn_type, input_dim, hidden_dim, nlayers, rnn_out_dropout=.5, dropouth=.5, wdrop=0, tie_weights=False): """ Adapted from Salesforce awd-lstm-lm, with a few modifications. Aimed at time series so removed embedding related code. Parameters ---------- rnn_type : str Either of 'LSTM', 'GRU' or 'QRNN' input_dim : TYPE input dimension hidden_dim : TYPE hidden layer size nlayers : TYPE number of layers rnn_out_dropout : float, optional locked dropout, i.e. variational dropout rate. Applied to the output of the last RNN layer. dropouth : float, optional variational dropout between RNN layers wdrop : int, optional weight dropout rate for recurrent weights inside an RNN layer. tie_weights : bool, optional Default False. If True then for the last RNN layer, the hidden size/dim is set to input size/dim. """ # super(AWDRNN, self).__init__() super().__init__() self.lockdrop = LockedDropout() # self.idrop = nn.Dropout(dropouti) # self.hdrop = nn.Dropout(dropouth) # self.drop = nn.Dropout(dropout) # self.encoder = nn.Embedding(ntoken, input_dim) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(input_dim if ll == 0 else hidden_dim, hidden_dim if ll != nlayers - 1 else (input_dim if tie_weights else hidden_dim), 1, dropout=0) for ll in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] if rnn_type == 'GRU': self.rnns = [torch.nn.GRU(input_dim if ll == 0 else hidden_dim, hidden_dim if ll != nlayers - 1 else (input_dim if tie_weights else hidden_dim), 1, dropout=0) for ll in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=input_dim if ll == 0 else hidden_dim, hidden_size=(hidden_dim if ll != nlayers - 1 else (input_dim if tie_weights else hidden_dim)), save_prev_x=True, zoneout=0, window=2 if ll == 0 else 1, output_gate=True) for ll in range(nlayers)] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) print(self.rnns) self.rnns = nn.ModuleList(self.rnns) # self.decoder = nn.Linear(hidden_dim, ntoken) # # # Optionally tie weights as in: # # "Using the Output Embedding to Improve Language Models" # # (Press & Wolf 2016) # # https://arxiv.org/abs/1608.05859 # # and # # "Tying Word Vectors and Word Classifiers: A Loss Framework for # # Language Modeling" (Inan et al. 2016) # # https://arxiv.org/abs/1611.01462 # if tie_weights: # # if hidden_dim != input_dim: # # raise ValueError('When using the tied flag, hidden_dim must # be equal to emsize') # self.decoder.weight = self.encoder.weight # self.init_weights() self.rnn_type = rnn_type self.input_dim = input_dim self.hidden_dim = hidden_dim self.nlayers = nlayers self.rnn_out_dropout = rnn_out_dropout # self.dropouti = dropouti self.dropouth = dropouth # self.dropoute = dropoute self.tie_weights = tie_weights
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [ torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers) ] ## start edit: steps for mts model bias assigments ## STEP1: make a list of size of hidden layers - useful for init step hid_dim = [ nhid if l != nlayers - 1 else (ninp if tie_weights else nhid) for l in range(nlayers) ] ## STEP2: create bias values depending on type of init we want chrono_bias = [np.zeros(hid_dim[l]) for l in range(nlayers)] multi_timescale = True if multi_timescale: #layer 0 with half units of timescale 3 and half of timescale 4 half_length = int(0.5 * hid_dim[0]) timescale_first_half, timescale_second_half = 3, 4 #calculate bias values from timescale and store in an array chrono_bias[0][:half_length] = -1 * np.log( np.exp(1 / timescale_first_half) - 1) chrono_bias[0][half_length:] = -1 * np.log( np.exp(1 / timescale_second_half) - 1) #layer 1 with timescale sampled from an inverse gamma distribution timescale_invgamma = scipy.stats.invgamma.isf(np.linspace( 0, 1, 1151), a=0.56, scale=1)[1:] #calculate bias values from timescales and store in an array chrono_bias[1] = -1 * np.log( np.exp(1 / timescale_invgamma) - 1) ## STEP 3: assign bias values to the layers-first half is input gate bias, second half is forget gate for both i to h and h to h for l in range(nlayers - 1): #Assign biases for only first two layers self.rnns[l].bias_ih_l0.data[0:hid_dim[l] * 2] = torch.tensor( np.zeros(hid_dim[l] * 2), dtype=torch.float) self.rnns[l].bias_hh_l0.data[0:hid_dim[l] * 2] = torch.from_numpy( np.hstack( (-1 * chrono_bias[l], chrono_bias[l])).astype( np.float32)) ## STEP 4: fix the bias - if we want to fix the bias instead of just init them fixed_weights = True if fixed_weights: for l in range(nlayers - 1): print(l) self.rnns[l].bias_ih_l0.requires_grad = False self.rnns[l].bias_hh_l0.requires_grad = False ##end edit ### if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] if rnn_type == 'GRU': self.rnns = [ torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [ QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers) ] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, num_features=0, feature_dim=0, feature_relu_bias=2.0): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.num_features = num_features self.feature_dims = feature_dim if self.num_features == 0: # self.encoder = nn.Embedding(ntoken, ninp) self.encoder = nn.Parameter(torch.FloatTensor(ntoken, ninp)) else: self.word_emb = nn.Parameter(torch.FloatTensor(ntoken, feature_dim)) self.feature_emb = nn.Parameter(torch.FloatTensor(num_features, feature_dim)) self.feature_relu_bias = nn.Parameter(torch.FloatTensor([feature_relu_bias]),requires_grad=False) self.encoder = nn.Parameter(torch.FloatTensor(num_features, ninp)) self.word_emb_cache = None self.feature_emb_cache = None assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] if rnn_type == 'GRU': self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) logging.info(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: assert self.num_features == 0, "Its not supported to tie weights and use feature models right now." #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights if self.num_features == 0: logging.info('Using normal encoder model') self._input_layer_fn = self.normal_encoder else: logging.info('Using feature encoder model %s %s', self.num_features, self.feature_dims) self._input_layer_fn = self.feature_encoder
def __init__(self, config, x_embed): super().__init__() self.num_layers_rnn = 1 self.x_embed = x_embed.x_embed self.wdrop = config.wdrop self.dropoute = config.dropoute self.encoder_out_size = config.rnn_cell_size self.rnn_cell_type = config.rnn_cell_type self.training = True import warnings warnings.filterwarnings("ignore") self.model = None if self.rnn_cell_type.lower() == "lstm": self.rnn = nn.LSTM(input_size=x_embed.embedding_dim, hidden_size=config.rnn_cell_size, num_layers=self.num_layers_rnn, bidirectional=False, dropout=config.dropout, batch_first=True, bias=True) self.model = WeightDrop(self.rnn, ['weight_hh_l0'], dropout=self.wdrop) elif self.rnn_cell_type.lower() == "gru": self.rnn = nn.GRU(input_size=x_embed.embedding_dim, hidden_size=config.rnn_cell_size, num_layers=self.num_layers_rnn, bidirectional=False, dropout=config.dropout, batch_first=True, bias=True) self.model = WeightDrop(self.rnn, ['weight_hh_l0'], dropout=self.wdrop) elif self.rnn_cell_type.lower() == "qrnn": from torchqrnn import QRNNLayer self.model = QRNNLayer(input_size=x_embed.embedding_dim, hidden_size=config.rnn_cell_size, save_prev_x=True, zoneout=0, window=1, output_gate=True, use_cuda=config.use_gpu) self.model.linear = WeightDrop(self.model.linear, ['weight'], dropout=self.wdrop) # self.encoders.reset() self.lockdrop = LockedDropout() self.dropouti = 0.1 # temporal averaging self.beta_ema = config.beta_ema if self.beta_ema > 0: self.avg_param = deepcopy(list(p.data for p in self.parameters())) if config.use_gpu: self.avg_param = [a.cuda() for a in self.avg_param] self.steps_ema = 0. return
def __init__(self, rnn_type, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, wdrop=0, tie_weights=False, class_num=4): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() # self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [ torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] if rnn_type == 'GRU': self.rnns = [ torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [ QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers) ] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) #############################是否使用BN层,因为LRP没法穿过BN层############## # self.bn=nn.BatchNorm1d(nhid, momentum=0.5) ######################################################################### #############################是否使用bias,bias的使用会对LRP产生影响######## # self.decoder = nn.Linear(nhid,class_num,bias=0) self.decoder = nn.Linear(nhid, class_num) ######################################################################### # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 # if tie_weights: # #if nhid != ninp: # # raise ValueError('When using the tied flag, nhid must be equal to emsize') # self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouth = dropouth self.tie_weights = tie_weights