def decode(self, src_sentence):
        """This is a generalization to NMT ensembles of 
        ``BeamSearch.search``.
        
        Args:
            src_sentence (list): List of source word ids without <S> or
                                 </S> which make up the source sentence
        
        Returns:
            list. A list of ``Hypothesis`` instances ordered by their
            score.
        """
        for search in self.beam_searches:
            if not search.compiled:
                search.compile()
        seq = self.src_sparse_feat_map.words2dense(
            utils.oov_to_unk(src_sentence,
                             self.src_vocab_size)) + [self.src_eos]
        if self.src_sparse_feat_map.dim > 1:  # sparse src feats
            input_ = np.transpose(np.tile(seq, (self.beam_size, 1, 1)),
                                  (2, 0, 1))
        else:  # word ids on the source side
            input_ = np.tile(seq, (self.beam_size, 1))

        contexts_and_states = []
        for sys_idx in xrange(self.n_networks):
            contexts, states, _ = \
                self.beam_searches[sys_idx].compute_initial_states_and_contexts(
                            {self.nmt_models[sys_idx].sampling_input: input_})
            contexts_and_states.append(
                (contexts, states, self.beam_searches[sys_idx]))

        # This array will store all generated outputs, including those from
        # previous step and those from already finished sequences.
        all_outputs = states['outputs'][None, :]
        all_masks = np.ones_like(all_outputs, dtype=config.floatX)
        all_costs = np.zeros_like(all_outputs, dtype=config.floatX)

        for i in range(3 * len(src_sentence)):
            if all_masks[-1].sum() == 0:
                break
            logprobs_lst = []
            for contexts, states, search in contexts_and_states:
                logprobs_lst.append(search.compute_logprobs(contexts, states))

            logprobs = np.sum(logprobs_lst, axis=0)
            next_costs = (all_costs[-1, :, None] +
                          logprobs * all_masks[-1, :, None])
            (finished, ) = np.where(all_masks[-1] == 0)
            next_costs[finished, :utils.EOS_ID] = np.inf
            next_costs[finished, utils.EOS_ID + 1:] = np.inf

            # The `i == 0` is required because at the first step the beam
            # size is effectively only 1.
            (indexes, outputs), chosen_costs = BeamSearch._smallest(
                next_costs, self.beam_size, only_first_row=i == 0)

            all_outputs = all_outputs[:, indexes]
            all_masks = all_masks[:, indexes]
            all_costs = all_costs[:, indexes]

            # Rearrange everything
            for contexts, states, search in contexts_and_states:
                for name in states:
                    states[name] = states[name][indexes]
                states.update(
                    search.compute_next_states(contexts, states, outputs))

            all_outputs = np.vstack([all_outputs, outputs[None, :]])
            all_costs = np.vstack([all_costs, chosen_costs[None, :]])
            mask = outputs != utils.EOS_ID
            if i == 0:
                mask[:] = 1
            all_masks = np.vstack([all_masks, mask[None, :]])

        all_outputs = all_outputs[1:]
        all_masks = all_masks[:-1]
        all_costs = all_costs[1:] - all_costs[:-1]
        result = all_outputs, all_masks, all_costs
        trans, costs = BeamSearch.result_to_lists(result)
        hypos = []
        max_len = 0
        for idx in xrange(len(trans)):
            max_len = max(max_len, len(trans[idx]))
            hypo = Hypothesis(trans[idx], -costs[idx])
            hypo.score_breakdown = len(trans[idx]) * [[(0.0, 1.0)]]
            hypo.score_breakdown[0] = [(-costs[idx], 1.0)]
            hypos.append(hypo)
        self.apply_predictors_count = max_len * self.beam_size
        return hypos
Пример #2
0
def test_beam_search_smallest():
    a = numpy.array([[3, 6, 4], [1, 2, 7]])
    ind, mins = BeamSearch._smallest(a, 2)
    assert numpy.all(numpy.array(ind) == numpy.array([[1, 1], [0, 1]]))
    assert numpy.all(mins == [1, 2])
Пример #3
0
def test_beam_search_smallest():
    a = numpy.array([[3, 6, 4], [1, 2, 7]])
    ind, mins = BeamSearch._smallest(a, 2)
    assert numpy.all(numpy.array(ind) == numpy.array([[1, 1], [0, 1]]))
    assert numpy.all(mins == [1, 2])
Пример #4
0
    def decode(self, src_sentence):
        """This is a generalization to NMT ensembles of 
        ``BeamSearch.search``.
        
        Args:
            src_sentence (list): List of source word ids without <S> or
                                 </S> which make up the source sentence
        
        Returns:
            list. A list of ``Hypothesis`` instances ordered by their
            score.
        """
        for search in self.beam_searches:
            if not search.compiled:
                search.compile()
        seq = self.src_sparse_feat_map.words2dense(utils.oov_to_unk(
                src_sentence,
                self.src_vocab_size)) + [self.src_eos]
        if self.src_sparse_feat_map.dim > 1: # sparse src feats
            input_ = np.transpose(
                            np.tile(seq, (self.beam_size, 1, 1)),
                            (2,0,1))
        else: # word ids on the source side
            input_ = np.tile(seq, (self.beam_size, 1))

        contexts_and_states = []
        for sys_idx in xrange(self.n_networks):
            contexts, states, _ = \
                self.beam_searches[sys_idx].compute_initial_states_and_contexts(
                            {self.nmt_models[sys_idx].sampling_input: input_})
            contexts_and_states.append((contexts, 
                                        states, 
                                        self.beam_searches[sys_idx]))

        # This array will store all generated outputs, including those from
        # previous step and those from already finished sequences.
        all_outputs = states['outputs'][None, :]
        all_masks = np.ones_like(all_outputs, dtype=config.floatX)
        all_costs = np.zeros_like(all_outputs, dtype=config.floatX)

        for i in range(3*len(src_sentence)):
            if all_masks[-1].sum() == 0:
                break
            logprobs_lst = []
            for contexts, states, search in contexts_and_states:
                logprobs_lst.append(search.compute_logprobs(contexts, states))
            
            logprobs = np.sum(logprobs_lst, axis=0)
            next_costs = (all_costs[-1, :, None] +
                          logprobs * all_masks[-1, :, None])
            (finished,) = np.where(all_masks[-1] == 0)
            next_costs[finished, :utils.EOS_ID] = np.inf
            next_costs[finished, utils.EOS_ID + 1:] = np.inf

            # The `i == 0` is required because at the first step the beam
            # size is effectively only 1.
            (indexes, outputs), chosen_costs = BeamSearch._smallest(
                next_costs, self.beam_size, only_first_row=i == 0)

            all_outputs = all_outputs[:, indexes]
            all_masks = all_masks[:, indexes]
            all_costs = all_costs[:, indexes]
            
            # Rearrange everything
            for contexts, states, search in contexts_and_states:
                for name in states:
                    states[name] = states[name][indexes]
                states.update(search.compute_next_states(contexts, 
                                                         states, 
                                                         outputs))
            
            all_outputs = np.vstack([all_outputs, outputs[None, :]])
            all_costs = np.vstack([all_costs, chosen_costs[None, :]])
            mask = outputs != utils.EOS_ID
            if i == 0:
                mask[:] = 1
            all_masks = np.vstack([all_masks, mask[None, :]])

        all_outputs = all_outputs[1:]
        all_masks = all_masks[:-1]
        all_costs = all_costs[1:] - all_costs[:-1]
        result = all_outputs, all_masks, all_costs
        trans, costs = BeamSearch.result_to_lists(result)
        hypos = []
        max_len = 0
        for idx in xrange(len(trans)):
            max_len = max(max_len, len(trans[idx]))
            hypo = Hypothesis(trans[idx], -costs[idx])
            hypo.score_breakdown = len(trans[idx]) * [[(0.0,1.0)]]
            hypo.score_breakdown[0] = [(-costs[idx],1.0)]
            hypos.append(hypo)
        self.apply_predictors_count = max_len * self.beam_size
        return hypos