示例#1
0
 def test_dropout_WITH_PROB_ZERO(self):
     rnn = DecoderRNN(self.vocab_size, 50, 16, 0, 1, dropout_p=0)
     for param in rnn.parameters():
         param.data.uniform_(-1, 1)
     output1, _, _ = rnn()
     output2, _, _ = rnn()
     for prob1, prob2 in zip(output1, output2):
         self.assertTrue(torch.equal(prob1.data, prob2.data))
示例#2
0
    def test_dropout_WITH_NON_ZERO_PROB(self):
        rnn = DecoderRNN(self.vocab_size, 50, 16, 0, 1, n_layers=2, dropout_p=0.5)
        for param in rnn.parameters():
            param.data.uniform_(-1, 1)

        equal = True
        for _ in range(50):
            output1, _, _ = rnn()
            output2, _, _ = rnn()
            if not torch.equal(output1[0].data, output2[0].data):
                equal = False
                break
        self.assertFalse(equal)
示例#3
0
    def test_k_1(self):
        """ When k=1, the output of topk decoder should be the same as a normal decoder. """
        batch_size = 1
        eos = 1

        for _ in range(10):
            # Repeat the randomized test multiple times
            decoder = DecoderRNN(self.vocab_size, 50, 16, 0, eos)
            for param in decoder.parameters():
                param.data.uniform_(-1, 1)
            topk_decoder = TopKDecoder(decoder, 1)

            output, _, other = decoder()
            output_topk, _, other_topk = topk_decoder()

            self.assertEqual(len(output), len(output_topk))

            finished = [False] * batch_size
            seq_scores = [0] * batch_size

            for t_step, t_output in enumerate(output):
                score, _ = t_output.topk(1)
                symbols = other['sequence'][t_step]
                for b in range(batch_size):
                    seq_scores[b] += score[b].data[0]
                    symbol = symbols[b].data[0]
                    if not finished[b] and symbol == eos:
                        finished[b] = True
                        self.assertEqual(other_topk['length'][b], t_step + 1)
                        self.assertTrue(
                            np.isclose(seq_scores[b],
                                       other_topk['score'][b][0]))
                    if not finished[b]:
                        symbol_topk = other_topk['topk_sequence'][t_step][
                            b].data[0][0]
                        self.assertEqual(symbol, symbol_topk)
                        self.assertTrue(
                            torch.equal(t_output.data,
                                        output_topk[t_step].data))
                if sum(finished) == batch_size:
                    break
示例#4
0
    def setUp(self):
        test_path = os.path.dirname(os.path.realpath(__file__))
        src = SourceField()
        tgt = TargetField()
        self.dataset = torchtext.data.TabularDataset(
            path=os.path.join(test_path, 'data/eng-fra.txt'), format='tsv',
            fields=[('src', src), ('tgt', tgt)],
        )
        src.build_vocab(self.dataset)
        tgt.build_vocab(self.dataset)

        encoder = EncoderRNN(len(src.vocab), 10, 10, 10, rnn_cell='lstm')
        decoder = DecoderRNN(len(tgt.vocab), 10, 10,
                             tgt.sos_id, tgt.eos_id, rnn_cell='lstm')
        self.seq2seq = Seq2seq(encoder, decoder)

        for param in self.seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
示例#5
0
    def setUpClass(self):
        test_path = os.path.dirname(os.path.realpath(__file__))
        src = SourceField()
        trg = TargetField()
        dataset = torchtext.data.TabularDataset(
            path=os.path.join(test_path, 'data/eng-fra.txt'),
            format='tsv',
            fields=[('src', src), ('trg', trg)],
        )
        src.build_vocab(dataset)
        trg.build_vocab(dataset)

        encoder = EncoderRNN(len(src.vocab), 5, 10, 10, rnn_cell='lstm')
        decoder = DecoderRNN(len(trg.vocab),
                             10,
                             10,
                             trg.sos_id,
                             trg.eos_id,
                             rnn_cell='lstm')
        seq2seq = Seq2seq(encoder, decoder)
        self.predictor = Predictor(seq2seq, src.vocab, trg.vocab)
示例#6
0
    def test_k_greater_then_1(self):
        """ Implement beam search manually and compare results from topk decoder. """
        max_len = 50
        beam_size = 3
        batch_size = 1
        hidden_size = 8
        sos = 0
        eos = 1

        for _ in range(10):
            decoder = DecoderRNN(self.vocab_size, max_len, hidden_size, sos,
                                 eos)
            for param in decoder.parameters():
                param.data.uniform_(-1, 1)
            topk_decoder = TopKDecoder(decoder, beam_size)

            encoder_hidden = torch.autograd.Variable(
                torch.randn(1, batch_size, hidden_size))
            _, _, other_topk = topk_decoder(encoder_hidden=encoder_hidden)

            # Queue state:
            #   1. time step
            #   2. symbol
            #   3. hidden state
            #   4. accumulated log likelihood
            #   5. beam number
            batch_queue = [[(-1, sos, encoder_hidden[:, b, :].unsqueeze(1), 0,
                             None)] for b in range(batch_size)]
            time_batch_queue = [batch_queue]
            batch_finished_seqs = [list() for _ in range(batch_size)]
            for t in range(max_len):
                new_batch_queue = []
                for b in range(batch_size):
                    new_queue = []
                    for k in range(min(len(time_batch_queue[t][b]),
                                       beam_size)):
                        _, inputs, hidden, seq_score, _ = time_batch_queue[t][
                            b][k]
                        if inputs == eos:
                            batch_finished_seqs[b].append(
                                time_batch_queue[t][b][k])
                            continue
                        inputs = torch.autograd.Variable(
                            torch.LongTensor([[inputs]]))
                        decoder_outputs, hidden, _ = decoder.forward_step(
                            inputs, hidden, None, F.log_softmax)
                        topk_score, topk = decoder_outputs[0].data.topk(
                            beam_size)
                        for score, sym in zip(topk_score.tolist()[0],
                                              topk.tolist()[0]):
                            new_queue.append(
                                (t, sym, hidden, score + seq_score, k))
                    new_queue = sorted(new_queue,
                                       key=lambda x: x[3],
                                       reverse=True)[:beam_size]
                    new_batch_queue.append(new_queue)
                time_batch_queue.append(new_batch_queue)

            # finished beams
            finalist = [l[:beam_size] for l in batch_finished_seqs]
            # unfinished beams
            for b in range(batch_size):
                if len(finalist[b]) < beam_size:
                    last_step = sorted(time_batch_queue[-1][b],
                                       key=lambda x: x[3],
                                       reverse=True)
                    finalist[b] += last_step[:beam_size - len(finalist[b])]

            # back track
            topk = []
            for b in range(batch_size):
                batch_topk = []
                for k in range(beam_size):
                    seq = [finalist[b][k]]
                    prev_k = seq[-1][4]
                    prev_t = seq[-1][0]
                    while prev_k is not None:
                        seq.append(time_batch_queue[prev_t][b][prev_k])
                        prev_k = seq[-1][4]
                        prev_t = seq[-1][0]
                    batch_topk.append([s for s in reversed(seq)])
                topk.append(batch_topk)

            for b in range(batch_size):
                topk[b] = sorted(topk[b], key=lambda s: s[-1][3], reverse=True)

            topk_scores = other_topk['score']
            topk_lengths = other_topk['topk_length']
            topk_pred_symbols = other_topk['topk_sequence']
            for b in range(batch_size):
                precision_error = False
                for k in range(beam_size - 1):
                    if np.isclose(topk_scores[b][k], topk_scores[b][k + 1]):
                        precision_error = True
                        break
                if precision_error:
                    break
                for k in range(beam_size):
                    self.assertEqual(topk_lengths[b][k], len(topk[b][k]) - 1)
                    self.assertTrue(
                        np.isclose(topk_scores[b][k], topk[b][k][-1][3]))
                    total_steps = topk_lengths[b][k]
                    for t in range(total_steps):
                        self.assertEqual(topk_pred_symbols[t][b, k].data[0],
                                         topk[b][k][t +
                                                    1][1])  # topk includes SOS
示例#7
0
 def test_init(self):
     decoder = DecoderRNN(self.vocab_size, 50, 16, 0, 1, input_dropout_p=0)
     TopKDecoder(decoder, 3)
示例#8
0
    def __init__(self, opt, shared=None):
        """Set up model if shared params not set, otherwise no work to do."""
        super().__init__(opt, shared)
        opt = self.opt  # there is a deepcopy in the init

        # all instances may need some params
        self.truncate = opt['truncate'] if opt['truncate'] > 0 else None
        self.metrics = {'loss': 0, 'num_tokens': 0}
        self.history = {}
        self.batch_idx = shared and shared.get('batchindex') or 0
        self.states = {}

        # check for cuda
        self.use_cuda = not opt.get('no_cuda') and torch.cuda.is_available()

        if shared:
            # set up shared properties
            self.dict = shared['dict']
            self.START_IDX = shared['START_IDX']
            self.END_IDX = shared['END_IDX']
            self.NULL_IDX = shared['NULL_IDX']
            # answers contains a batch_size list of the last answer produced
            self.answers = shared['answers']

            if 'model' in shared:
                # model is shared during hogwild
                self.model = shared['model']
        else:
            # this is not a shared instance of this class, so do full init
            # answers contains a batch_size list of the last answer produced
            self.answers = [None] * opt['batchsize']

            if self.use_cuda:
                torch.cuda.set_device(opt['gpu'])

            # check first for 'init_model' for loading model from file
            if opt.get('init_model') and os.path.isfile(opt['init_model']):
                init_model = opt['init_model']
            # next check for 'model_file'
            elif opt.get('model_file') and os.path.isfile(opt['model_file']):
                init_model = opt['model_file']
            else:
                init_model = None

            if init_model is not None:
                # load model parameters if available
                print('Loading existing model params from ' + init_model)
                new_opt, self.states = self.load(init_model)
                # override model-specific options with stored ones
                opt = self.override_opt(new_opt)

            if opt['dict_file'] is None:
                if init_model is not None and os.path.isfile(init_model + '.dict'):
                    # check first to see if a dictionary exists
                    opt['dict_file'] = init_model + '.dict'
                elif opt.get('model_file'):
                    # otherwise, set default dict-file if it is not set
                    opt['dict_file'] = opt['model_file'] + '.dict'

            # load dictionary and basic tokens & vectors
            self.dict = DictionaryAgent(opt)
            self.id = 'Seq2Seq'
            # we use START markers to start our output
            self.START_IDX = self.dict[self.dict.start_token]
            # we use END markers to end our output
            self.END_IDX = self.dict[self.dict.end_token]
            # get index of null token from dictionary (probably 0)
            self.NULL_IDX = self.dict[self.dict.null_token]

            encoder = EncoderRNN(
                len(self.dict),
                opt['maxlength_in'],
                opt['hiddensize'],
                dropout_p=opt['dropout'],
                input_dropout_p=opt['dropout'],
                n_layers=opt['numlayers'],
                rnn_cell=opt['rnncell'],
                bidirectional=opt['bidirectional'],
                variable_lengths=True,
            )
            decoder = DecoderRNN(
                len(self.dict),
                opt['maxlength_out'],
                opt['hiddensize'] * 2 if opt['bidirectional'] else opt['hiddensize'],
                dropout_p=opt['dropout'],
                input_dropout_p=opt['dropout'],
                n_layers=opt['numlayers'],
                rnn_cell=opt['rnncell'],
                bidirectional=opt['bidirectional'],
                sos_id=self.START_IDX,
                eos_id=self.END_IDX,
                use_attention=opt['attention'],
            )
            self.model = Seq2seq(encoder, decoder)

            if self.states:
                # set loaded states if applicable
                self.model.load_state_dict(self.states['model'])

            if self.use_cuda:
                self.model.cuda()

        # set up criteria
        self.criterion = nn.NLLLoss(ignore_index=self.NULL_IDX, size_average=False)
        if self.use_cuda:
            self.criterion.cuda()

        if 'train' in opt.get('datatype', ''):
            # if model was built, do more setup
            self.clip = opt['gradient_clip']

            # set up tensors once
            self.START = torch.LongTensor([self.START_IDX])

            if self.use_cuda:
                # push to cuda
                self.START = self.START.cuda()

            # set up optimizer
            lr = opt['learningrate']
            optim_class = IbmSeq2seqAgent.OPTIM_OPTS[opt['optimizer']]
            kwargs = {'lr': lr}
            if opt['optimizer'] == 'sgd':
                kwargs['momentum'] = 0.95
                kwargs['nesterov'] = True

            self.optimizer = optim_class(
                [p for p in self.model.parameters() if p.requires_grad], **kwargs
            )
            if self.states:
                if self.states['optimizer_type'] != opt['optimizer']:
                    print(
                        'WARNING: not loading optim state since optim class ' 'changed.'
                    )
                else:
                    self.optimizer.load_state_dict(self.states['optimizer'])
            self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                self.optimizer, 'min', factor=0.5, patience=3, verbose=True
            )

        self.reset()
        # Initialize model
        input_dim = 38 + 10
        hidden_dim = 128
        bidirectional = False
        encoder = EncoderRNN(input_seq_len=seqence_len,
                             input_dim=input_dim,
                             hidden_dim=hidden_dim,
                             bidirectional=bidirectional,
                             n_layers=opt.rnn_layers,
                             rnn_cell=opt.rnn_cell_type,
                             dropout_p=opt.rnn_dropout)
        decoder = DecoderRNN(input_seq_len=seqence_len,
                             output_seq_len=delay,
                             output_dim=output_dim,
                             hidden_dim=hidden_dim *
                             2 if bidirectional else hidden_dim,
                             dropout_p=opt.rnn_dropout,
                             bidirectional=bidirectional,
                             n_layers=opt.rnn_layers,
                             rnn_cell=opt.rnn_cell_type,
                             use_attention=opt.use_attention)
        seq2seq = Seq2Seq(encoder, decoder,
                          decode_function=torch.tanh).to(device)

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
    print('model:', seq2seq)
    t = SupervisedTrainer(
        loss=loss,
        batch_size=opt.batch_size,
        checkpoint_every=1000,
        print_every=100,