def test_dropout_WITH_PROB_ZERO(self): rnn = DecoderRNN(self.vocab_size, 50, 16, 0, 1, dropout_p=0) for param in rnn.parameters(): param.data.uniform_(-1, 1) output1, _, _ = rnn() output2, _, _ = rnn() for prob1, prob2 in zip(output1, output2): self.assertTrue(torch.equal(prob1.data, prob2.data))
def test_dropout_WITH_NON_ZERO_PROB(self): rnn = DecoderRNN(self.vocab_size, 50, 16, 0, 1, n_layers=2, dropout_p=0.5) for param in rnn.parameters(): param.data.uniform_(-1, 1) equal = True for _ in range(50): output1, _, _ = rnn() output2, _, _ = rnn() if not torch.equal(output1[0].data, output2[0].data): equal = False break self.assertFalse(equal)
def setUp(self): test_path = os.path.dirname(os.path.realpath(__file__)) src = SourceField() tgt = TargetField() self.dataset = torchtext.data.TabularDataset( path=os.path.join(test_path, 'data/eng-fra.txt'), format='tsv', fields=[('src', src), ('tgt', tgt)], ) src.build_vocab(self.dataset) tgt.build_vocab(self.dataset) encoder = EncoderRNN(len(src.vocab), 10, 10, 10, rnn_cell='lstm') decoder = DecoderRNN(len(tgt.vocab), 10, 10, tgt.sos_id, tgt.eos_id, rnn_cell='lstm') self.seq2seq = Seq2seq(encoder, decoder) for param in self.seq2seq.parameters(): param.data.uniform_(-0.08, 0.08)
def test_k_1(self): """ When k=1, the output of topk decoder should be the same as a normal decoder. """ batch_size = 1 eos = 1 for _ in range(10): # Repeat the randomized test multiple times decoder = DecoderRNN(self.vocab_size, 50, 16, 0, eos) for param in decoder.parameters(): param.data.uniform_(-1, 1) topk_decoder = TopKDecoder(decoder, 1) output, _, other = decoder() output_topk, _, other_topk = topk_decoder() self.assertEqual(len(output), len(output_topk)) finished = [False] * batch_size seq_scores = [0] * batch_size for t_step, t_output in enumerate(output): score, _ = t_output.topk(1) symbols = other['sequence'][t_step] for b in range(batch_size): seq_scores[b] += score[b].data[0] symbol = symbols[b].data[0] if not finished[b] and symbol == eos: finished[b] = True self.assertEqual(other_topk['length'][b], t_step + 1) self.assertTrue( np.isclose(seq_scores[b], other_topk['score'][b][0])) if not finished[b]: symbol_topk = other_topk['topk_sequence'][t_step][ b].data[0][0] self.assertEqual(symbol, symbol_topk) self.assertTrue( torch.equal(t_output.data, output_topk[t_step].data)) if sum(finished) == batch_size: break
def setUpClass(self): test_path = os.path.dirname(os.path.realpath(__file__)) src = SourceField() trg = TargetField() dataset = torchtext.data.TabularDataset( path=os.path.join(test_path, 'data/eng-fra.txt'), format='tsv', fields=[('src', src), ('trg', trg)], ) src.build_vocab(dataset) trg.build_vocab(dataset) encoder = EncoderRNN(len(src.vocab), 5, 10, 10, rnn_cell='lstm') decoder = DecoderRNN(len(trg.vocab), 10, 10, trg.sos_id, trg.eos_id, rnn_cell='lstm') seq2seq = Seq2seq(encoder, decoder) self.predictor = Predictor(seq2seq, src.vocab, trg.vocab)
def test_k_greater_then_1(self): """ Implement beam search manually and compare results from topk decoder. """ max_len = 50 beam_size = 3 batch_size = 1 hidden_size = 8 sos = 0 eos = 1 for _ in range(10): decoder = DecoderRNN(self.vocab_size, max_len, hidden_size, sos, eos) for param in decoder.parameters(): param.data.uniform_(-1, 1) topk_decoder = TopKDecoder(decoder, beam_size) encoder_hidden = torch.autograd.Variable( torch.randn(1, batch_size, hidden_size)) _, _, other_topk = topk_decoder(encoder_hidden=encoder_hidden) # Queue state: # 1. time step # 2. symbol # 3. hidden state # 4. accumulated log likelihood # 5. beam number batch_queue = [[(-1, sos, encoder_hidden[:, b, :].unsqueeze(1), 0, None)] for b in range(batch_size)] time_batch_queue = [batch_queue] batch_finished_seqs = [list() for _ in range(batch_size)] for t in range(max_len): new_batch_queue = [] for b in range(batch_size): new_queue = [] for k in range(min(len(time_batch_queue[t][b]), beam_size)): _, inputs, hidden, seq_score, _ = time_batch_queue[t][ b][k] if inputs == eos: batch_finished_seqs[b].append( time_batch_queue[t][b][k]) continue inputs = torch.autograd.Variable( torch.LongTensor([[inputs]])) decoder_outputs, hidden, _ = decoder.forward_step( inputs, hidden, None, F.log_softmax) topk_score, topk = decoder_outputs[0].data.topk( beam_size) for score, sym in zip(topk_score.tolist()[0], topk.tolist()[0]): new_queue.append( (t, sym, hidden, score + seq_score, k)) new_queue = sorted(new_queue, key=lambda x: x[3], reverse=True)[:beam_size] new_batch_queue.append(new_queue) time_batch_queue.append(new_batch_queue) # finished beams finalist = [l[:beam_size] for l in batch_finished_seqs] # unfinished beams for b in range(batch_size): if len(finalist[b]) < beam_size: last_step = sorted(time_batch_queue[-1][b], key=lambda x: x[3], reverse=True) finalist[b] += last_step[:beam_size - len(finalist[b])] # back track topk = [] for b in range(batch_size): batch_topk = [] for k in range(beam_size): seq = [finalist[b][k]] prev_k = seq[-1][4] prev_t = seq[-1][0] while prev_k is not None: seq.append(time_batch_queue[prev_t][b][prev_k]) prev_k = seq[-1][4] prev_t = seq[-1][0] batch_topk.append([s for s in reversed(seq)]) topk.append(batch_topk) for b in range(batch_size): topk[b] = sorted(topk[b], key=lambda s: s[-1][3], reverse=True) topk_scores = other_topk['score'] topk_lengths = other_topk['topk_length'] topk_pred_symbols = other_topk['topk_sequence'] for b in range(batch_size): precision_error = False for k in range(beam_size - 1): if np.isclose(topk_scores[b][k], topk_scores[b][k + 1]): precision_error = True break if precision_error: break for k in range(beam_size): self.assertEqual(topk_lengths[b][k], len(topk[b][k]) - 1) self.assertTrue( np.isclose(topk_scores[b][k], topk[b][k][-1][3])) total_steps = topk_lengths[b][k] for t in range(total_steps): self.assertEqual(topk_pred_symbols[t][b, k].data[0], topk[b][k][t + 1][1]) # topk includes SOS
def test_init(self): decoder = DecoderRNN(self.vocab_size, 50, 16, 0, 1, input_dropout_p=0) TopKDecoder(decoder, 3)
def __init__(self, opt, shared=None): """Set up model if shared params not set, otherwise no work to do.""" super().__init__(opt, shared) opt = self.opt # there is a deepcopy in the init # all instances may need some params self.truncate = opt['truncate'] if opt['truncate'] > 0 else None self.metrics = {'loss': 0, 'num_tokens': 0} self.history = {} self.batch_idx = shared and shared.get('batchindex') or 0 self.states = {} # check for cuda self.use_cuda = not opt.get('no_cuda') and torch.cuda.is_available() if shared: # set up shared properties self.dict = shared['dict'] self.START_IDX = shared['START_IDX'] self.END_IDX = shared['END_IDX'] self.NULL_IDX = shared['NULL_IDX'] # answers contains a batch_size list of the last answer produced self.answers = shared['answers'] if 'model' in shared: # model is shared during hogwild self.model = shared['model'] else: # this is not a shared instance of this class, so do full init # answers contains a batch_size list of the last answer produced self.answers = [None] * opt['batchsize'] if self.use_cuda: torch.cuda.set_device(opt['gpu']) # check first for 'init_model' for loading model from file if opt.get('init_model') and os.path.isfile(opt['init_model']): init_model = opt['init_model'] # next check for 'model_file' elif opt.get('model_file') and os.path.isfile(opt['model_file']): init_model = opt['model_file'] else: init_model = None if init_model is not None: # load model parameters if available print('Loading existing model params from ' + init_model) new_opt, self.states = self.load(init_model) # override model-specific options with stored ones opt = self.override_opt(new_opt) if opt['dict_file'] is None: if init_model is not None and os.path.isfile(init_model + '.dict'): # check first to see if a dictionary exists opt['dict_file'] = init_model + '.dict' elif opt.get('model_file'): # otherwise, set default dict-file if it is not set opt['dict_file'] = opt['model_file'] + '.dict' # load dictionary and basic tokens & vectors self.dict = DictionaryAgent(opt) self.id = 'Seq2Seq' # we use START markers to start our output self.START_IDX = self.dict[self.dict.start_token] # we use END markers to end our output self.END_IDX = self.dict[self.dict.end_token] # get index of null token from dictionary (probably 0) self.NULL_IDX = self.dict[self.dict.null_token] encoder = EncoderRNN( len(self.dict), opt['maxlength_in'], opt['hiddensize'], dropout_p=opt['dropout'], input_dropout_p=opt['dropout'], n_layers=opt['numlayers'], rnn_cell=opt['rnncell'], bidirectional=opt['bidirectional'], variable_lengths=True, ) decoder = DecoderRNN( len(self.dict), opt['maxlength_out'], opt['hiddensize'] * 2 if opt['bidirectional'] else opt['hiddensize'], dropout_p=opt['dropout'], input_dropout_p=opt['dropout'], n_layers=opt['numlayers'], rnn_cell=opt['rnncell'], bidirectional=opt['bidirectional'], sos_id=self.START_IDX, eos_id=self.END_IDX, use_attention=opt['attention'], ) self.model = Seq2seq(encoder, decoder) if self.states: # set loaded states if applicable self.model.load_state_dict(self.states['model']) if self.use_cuda: self.model.cuda() # set up criteria self.criterion = nn.NLLLoss(ignore_index=self.NULL_IDX, size_average=False) if self.use_cuda: self.criterion.cuda() if 'train' in opt.get('datatype', ''): # if model was built, do more setup self.clip = opt['gradient_clip'] # set up tensors once self.START = torch.LongTensor([self.START_IDX]) if self.use_cuda: # push to cuda self.START = self.START.cuda() # set up optimizer lr = opt['learningrate'] optim_class = IbmSeq2seqAgent.OPTIM_OPTS[opt['optimizer']] kwargs = {'lr': lr} if opt['optimizer'] == 'sgd': kwargs['momentum'] = 0.95 kwargs['nesterov'] = True self.optimizer = optim_class( [p for p in self.model.parameters() if p.requires_grad], **kwargs ) if self.states: if self.states['optimizer_type'] != opt['optimizer']: print( 'WARNING: not loading optim state since optim class ' 'changed.' ) else: self.optimizer.load_state_dict(self.states['optimizer']) self.scheduler = optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, 'min', factor=0.5, patience=3, verbose=True ) self.reset()
# Initialize model input_dim = 38 + 10 hidden_dim = 128 bidirectional = False encoder = EncoderRNN(input_seq_len=seqence_len, input_dim=input_dim, hidden_dim=hidden_dim, bidirectional=bidirectional, n_layers=opt.rnn_layers, rnn_cell=opt.rnn_cell_type, dropout_p=opt.rnn_dropout) decoder = DecoderRNN(input_seq_len=seqence_len, output_seq_len=delay, output_dim=output_dim, hidden_dim=hidden_dim * 2 if bidirectional else hidden_dim, dropout_p=opt.rnn_dropout, bidirectional=bidirectional, n_layers=opt.rnn_layers, rnn_cell=opt.rnn_cell_type, use_attention=opt.use_attention) seq2seq = Seq2Seq(encoder, decoder, decode_function=torch.tanh).to(device) for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) print('model:', seq2seq) t = SupervisedTrainer( loss=loss, batch_size=opt.batch_size, checkpoint_every=1000, print_every=100,