Exemplo n.º 1
0
 def _get_initial_hypos(self):
     """Get the list of initial ``PartialHypothesis``. """
     self.cur_fst = load_fst(
         utils.get_path(self.fst_path, self.current_sen_id + 1))
     init_hypo = PartialHypothesis(self.get_predictor_states())
     init_hypo.fst_node = self._find_start_node()
     return [init_hypo]
Exemplo n.º 2
0
 def decode(self, src_sentence):
     """Decodes a single source sentence using beam search. """
     self.initialize_predictors(src_sentence)
     hypos = [PartialHypothesis(self.get_predictor_states())]
     guard_hypo = PartialHypothesis(None)
     guard_hypo.score = utils.NEG_INF
     it = 0
     while self.stop_criterion(hypos):
         if it > self.max_len:  # prevent infinite loops
             break
         it = it + 1
         next_hypos = [guard_hypo]
         #print("HYPOS")
         #for hypo in hypos:
         #    print("it%d: %s (%f)" % (it, utils.apply_trg_wmap(hypo.trgt_sentence), hypo.score))
         for hypo in hypos:
             #print("H: %s (%f: %f, %f, %f)" % (utils.apply_trg_wmap(hypo.trgt_sentence), hypo.score, sum([s[0][0] for s in hypo.score_breakdown]), sum([s[1][0] for s in hypo.score_breakdown]), sum([s[2][0] for s in hypo.score_breakdown])))
             if hypo.get_last_word() == utils.EOS_ID:
                 next_hypos = self._rebuild_hypo_list(next_hypos, hypo)
             for s, cont in self._get_complete_continuations(
                     hypo, next_hypos[-1].score):
                 if hypo.score + s < next_hypos[-1].score:
                     break
                 next_hypos = self._rebuild_hypo_list(
                     next_hypos, cont.generate_expanded_hypo(self))
         hypos = [h for h in next_hypos if h.score > utils.NEG_INF]
     for hypo in hypos:
         if hypo.get_last_word() == utils.EOS_ID:
             self.add_full_hypo(hypo.generate_full_hypothesis())
     if not self.full_hypos:
         logging.warn("No complete hypotheses found for %s" % src_sentence)
         for hypo in hypos:
             self.add_full_hypo(hypo.generate_full_hypothesis())
     return self.get_full_hypos_sorted()
Exemplo n.º 3
0
 def decode(self, src_sentence):
     """Decodes a single source sentence using beam search. """
     self.initialize_predictors(src_sentence)
     hypos = [PartialHypothesis(self.get_predictor_states())]
     guard_hypo = PartialHypothesis(None)
     guard_hypo.score = utils.NEG_INF
     it = 0
     while self.stop_criterion(hypos):
         if it > self.max_len: # prevent infinite loops
             break
         it = it + 1
         next_hypos = [guard_hypo]
         for hypo in hypos:
             if hypo.get_last_word() == utils.EOS_ID:
                 next_hypos = self._rebuild_hypo_list(next_hypos, hypo)
             for s, cont in self._get_complete_continuations(
                                                     hypo,
                                                     next_hypos[-1].score):
                 if hypo.score + s < next_hypos[-1].score:
                     break
                 next_hypos = self._rebuild_hypo_list(
                                         next_hypos,
                                         cont.generate_expanded_hypo(self))
         hypos = [h for h in next_hypos if h.score > utils.NEG_INF]  
     for hypo in hypos:
         if hypo.get_last_word() == utils.EOS_ID:
             self.add_full_hypo(hypo.generate_full_hypothesis()) 
     if not self.full_hypos:
         logging.warn("No complete hypotheses found for %s" % src_sentence)
         for hypo in hypos:
             self.add_full_hypo(hypo.generate_full_hypothesis())
     return self.get_full_hypos_sorted()
Exemplo n.º 4
0
 def create_initial_node(self):
     """Create the root node for the search tree. """
     init_hypo = PartialHypothesis()
     posterior,score_breakdown = self.apply_predictors()
     best_word = utils.argmax(posterior)
     init_hypo.predictor_states = self.get_predictor_states()
     init_node = BOWNode(init_hypo, posterior, score_breakdown, [])
     self._add_to_heap(init_node, best_word, 0.0) # Expected score irrelevant 
Exemplo n.º 5
0
 def create_initial_node(self):
     """Create the root node for the search tree. """
     init_hypo = PartialHypothesis()
     posterior,score_breakdown = self.apply_predictors()
     children = sorted([RestartingChild(w, posterior[w], score_breakdown[w])
                         for w in posterior],
                       key=lambda c: c.score, reverse=True)
     init_hypo.predictor_states = self.get_predictor_states()
     heappush(self.open_nodes, (0.0, RestartingNode(init_hypo, children)))
Exemplo n.º 6
0
 def create_initial_node(self):
     """Create the root node for the search tree. """
     init_hypo = PartialHypothesis()
     posterior,score_breakdown = self.apply_predictors()
     children = sorted([RestartingChild(w, posterior[w], score_breakdown[w])
                         for w in posterior],
                       key=lambda c: c.score, reverse=True)
     init_hypo.predictor_states = self.get_predictor_states()
     heappush(self.open_nodes, (0.0, RestartingNode(init_hypo, children)))
Exemplo n.º 7
0
Arquivo: bow.py Projeto: ml-lab/sgnmt
 def create_initial_node(self):
     """Create the root node for the search tree. """
     init_hypo = PartialHypothesis()
     posterior, score_breakdown = self.apply_predictors()
     best_word = utils.argmax(posterior)
     init_hypo.predictor_states = self.get_predictor_states()
     init_node = BOWNode(init_hypo, posterior, score_breakdown, [])
     self._add_to_heap(init_node, best_word,
                       0.0)  # Expected score irrelevant
Exemplo n.º 8
0
 def decode(self, src_sentence):
     """Decodes a single source sentence exhaustively using depth 
     first search under length constraints.
     
     Args:
         src_sentence (list): List of source word ids without <S> or
                              </S> which make up the source sentence
     
     Returns:
         list. A list of ``Hypothesis`` instances ordered by their
         score.
     """
     if len(self.predictors) != 1:
         logging.fatal("SimpleDFS only works with a single predictor!")
     self.dfs_predictor = self.predictors[0][0]
     self.initialize_predictors(src_sentence)
     lower_bounds = self.all_lower_bounds[self.current_sen_id]
     self.max_len = max(int(el[0]) for el in lower_bounds)
     self.len_enabled = np.zeros((self.max_len + 1, ), np.bool)
     self.len_lower_bounds = -np.ones((self.max_len + 1, )) * np.inf
     self.len_min_lower_bounds = np.zeros((self.max_len + 1, ))
     self.len_best_hypos = [None] * (self.max_len + 1)
     for el in lower_bounds:
         l = int(el[0])
         self.len_enabled[l] = True
         self.len_lower_bounds[l] = float(el[1])
     self._update_min_lower_bounds()
     self._dfs(PartialHypothesis())
     for hypo in self.len_best_hypos:
         if hypo is not None:
             self.add_full_hypo(hypo.generate_full_hypothesis())
     return self.get_full_hypos_sorted()
Exemplo n.º 9
0
 def _initialize_order_ds(self):
     self.queues = [MinMaxHeap() for k in range(self.max_len + 1)]
     self.queues[0].insert(
         (0.0, PartialHypothesis(self.get_predictor_states())))
     self.queue_order = SortedDict({0.0: 0})
     self.score_by_t = [0.0]
     self.score_by_t.extend([None] * self.max_len)
     self.time_sync = defaultdict(lambda: self.beam
                                  if self.beam > 0 else utils.INF)
     self.time_sync[0] = 1
Exemplo n.º 10
0
 def _greedy_decode(self):
     """Performs greedy decoding from the start node. Used to obtain
     initial bigram statistics.
     """
     hypo = PartialHypothesis()
     hypos = []
     posteriors = []
     score_breakdowns = []
     bag = dict(self.full_bag)
     while bag:
         posterior,score_breakdown = self.apply_predictors()
         hypo.predictor_states = copy.deepcopy(self.get_predictor_states())
         bag_posterior = {w: posterior[w] for w in self.full_bag_with_eos}
         bag_breakdown = {w: score_breakdown[w] 
                                     for w in self.full_bag_with_eos}
         posteriors.append(bag_posterior)
         score_breakdowns.append(bag_breakdown)
         hypos.append(hypo)
         best_word = utils.argmax({w: bag_posterior[w] for w in bag})
         bag[best_word] -= 1
         if bag[best_word] < 1:
             del bag[best_word]
         self.consume(best_word)
         hypo = hypo.expand(best_word,
                            None,
                            bag_posterior[best_word],
                            score_breakdown[best_word])
     posterior,score_breakdown = self.apply_predictors()
     hypo.predictor_states = copy.deepcopy(self.get_predictor_states())
     bag_posterior = {w: posterior[w] for w in self.full_bag_with_eos}
     bag_breakdown = {w: score_breakdown[w] for w in self.full_bag_with_eos}
     posteriors.append(bag_posterior)
     score_breakdowns.append(bag_breakdown)
     hypos.append(hypo)
     
     hypo = hypo.cheap_expand(utils.EOS_ID,
                              bag_posterior[utils.EOS_ID],
                              score_breakdown[utils.EOS_ID])
     logging.debug("Greedy hypo (%f): %s" % (
                       hypo.score,
                       ' '.join([str(w) for w in hypo.trgt_sentence])))
     self._process_new_hypos(hypos, posteriors, score_breakdowns, hypo)
Exemplo n.º 11
0
 def decode(self, src_sentence):
     """Decodes a single source sentence using beam search. """
     self.initialize_predictors(src_sentence)
     hypos = [PartialHypothesis(self.get_predictor_states())]
     for _ in range(self.max_len):
         next_hypos = []
         for hypo in hypos:
             next_hypos.extend(self._expand_hypo(hypo))
         next_hypos.sort(key=lambda h: -h.score)
         hypos = next_hypos[:self.beam_size]
     return self.get_full_hypos_sorted()
 def _get_initial_hypos(self):
     """Get the list of initial ``PartialHypothesis``. This is not
     a single empty hypo but one empty hypo for each predictor.
     """
     states = self.get_predictor_states()
     none_states = [None] * len(states)
     ret = []
     for idx, state in enumerate(states):
         pred_states = list(none_states)
         pred_states[idx] = state
         ret.append(PartialHypothesis(pred_states))
     return ret
Exemplo n.º 13
0
    def decode(self, src_sentence):
        """Decodes a single source sentence using A* search. """
        self.initialize_predictors(src_sentence)
        open_set = []
        best_score = self.get_lower_score_bound()
        print("Bound:", best_score)
        heappush(open_set,
                 (0.0, PartialHypothesis(self.get_predictor_states())))
        count = 0
        while open_set:
            c, hypo = heappop(open_set)
            count += 1
            if self.early_stopping and hypo.score < best_score:
                continue
            logging.debug(
                "Expand (est=%f score=%f exp=%d best=%f): sentence: %s" %
                (-c, hypo.score, self.apply_predictors_count, best_score,
                 hypo.trgt_sentence))
            if hypo.get_last_word() == utils.EOS_ID:  # Found best hypothesis
                if hypo.score > best_score:
                    logging.debug(
                        "New best hypo (score=%f exp=%d): %s" %
                        (hypo.score, self.apply_predictors_count, ' '.join(
                            [str(w) for w in hypo.trgt_sentence])))
                    best_score = hypo.score
                self.add_full_hypo(hypo.generate_full_hypothesis())
                if len(self.full_hypos
                       ) >= self.nbest:  # if we have enough hypos
                    return self.get_full_hypos_sorted(), count
                continue
            self.set_predictor_states(copy.deepcopy(hypo.predictor_states))
            if not hypo.word_to_consume is None:  # Consume if cheap expand
                self.consume(hypo.word_to_consume)
                hypo.word_to_consume = None
            posterior, score_breakdown = self.apply_predictors()
            hypo.predictor_states = self.get_predictor_states()
            for trgt_word in posterior:  # Estimate future cost, add to heap
                next_hypo = hypo.cheap_expand(trgt_word, posterior[trgt_word],
                                              score_breakdown[trgt_word])
                score = next_hypo.score
                if score > best_score:
                    # only push if hypothesis can beat lower bound. Saves memory...
                    heappush(open_set, (-score, next_hypo))

            # Limit heap capacity
            if self.capacity > 0 and len(open_set) > self.capacity:
                new_open_set = []
                for _ in range(self.capacity):
                    heappush(new_open_set, heappop(open_set))
                open_set = new_open_set

        return self.get_full_hypos_sorted(), count
Exemplo n.º 14
0
 def _initialize_decoding(self, src_sentence):
     """Helper function for ``decode`` to which initializes all the
     class attributes
     """
     self.initialize_predictors(src_sentence)
     self.max_expansions = self.get_max_expansions(self.max_expansions_param,
                                                   src_sentence) 
     init_hypo = PartialHypothesis()
     init_hypo.predictor_states = self.get_predictor_states()
     init_hypo.scores = []
     init_hypo.parent_hypo_array_idx = 0 # point to guardian
     self.buckets = [[] for _ in xrange(self.max_len+1)]
     self.expanded_hypos = [[] for _ in xrange(self.max_len+1)]
     self.buckets[0].append((0.0, init_hypo))
     self.expand_counts = [0.0] # with guardian
     self.expand_backpointers = [0] # with guardian
     self.last_bucket = 0
     self.best_score = self.get_lower_score_bound()
     self.best_word_scores = [NEG_INF] * (self.max_len+1)
     self.compressed = [True] * (self.max_len+1)
     self.guaranteed_optimality = True
     self.cur_iter = 0
Exemplo n.º 15
0
 def _initialize_decoding(self, src_sentence):
     """Helper function for ``decode`` to which initializes all the
     class attributes
     """
     self.initialize_predictors(src_sentence)
     self.max_expansions = self.get_max_expansions(
         self.max_expansions_param, src_sentence)
     init_hypo = PartialHypothesis()
     init_hypo.predictor_states = self.get_predictor_states()
     init_hypo.scores = []
     init_hypo.parent_hypo_array_idx = 0  # point to guardian
     self.buckets = [[] for _ in xrange(self.max_len + 1)]
     self.expanded_hypos = [[] for _ in xrange(self.max_len + 1)]
     self.buckets[0].append((0.0, init_hypo))
     self.expand_counts = [0.0]  # with guardian
     self.expand_backpointers = [0]  # with guardian
     self.last_bucket = 0
     self.best_score = self.get_lower_score_bound()
     self.best_word_scores = [NEG_INF] * (self.max_len + 1)
     self.compressed = [True] * (self.max_len + 1)
     self.guaranteed_optimality = True
     self.cur_iter = 0
Exemplo n.º 16
0
 def _greedy_decode(self):
     """Performs greedy decoding from the start node. Used to obtain
     the initial hypothesis.
     """
     hypo = PartialHypothesis()
     hypos = []
     posteriors = []
     score_breakdowns = []
     scores = []
     bag = dict(self.full_bag)
     while bag:
         posterior,score_breakdown = self.apply_predictors()
         hypo.predictor_states = copy.deepcopy(self.get_predictor_states())
         hypos.append(hypo)
         posteriors.append(posterior)
         score_breakdowns.append(score_breakdown)
         best_word = utils.argmax({w: posterior[w] for w in bag})
         bag[best_word] -= 1
         if bag[best_word] < 1:
             del bag[best_word]
         self.consume(best_word)
         hypo = hypo.expand(best_word,
                            None,
                            posterior[best_word],
                            score_breakdown[best_word])
         scores.append(posterior[best_word])
     posterior,score_breakdown = self.apply_predictors()
     hypo.predictor_states = self.get_predictor_states()
     hypos.append(hypo)
     posteriors.append(posterior)
     score_breakdowns.append(score_breakdown)
     hypo = hypo.expand(utils.EOS_ID,
                        None,
                        posterior[utils.EOS_ID],
                        score_breakdown[utils.EOS_ID])
     logging.debug("Greedy hypo (%f): %s" % (
                       hypo.score,
                       ' '.join([str(w) for w in hypo.trgt_sentence])))
     scores.append(posterior[utils.EOS_ID])
     self.best_score = hypo.score
     self.add_full_hypo(hypo.generate_full_hypothesis())
     self._process_new_hypos(FlipCandidate(hypo.trgt_sentence,
                                            scores,
                                            self._create_dummy_bigrams(),
                                            hypo.score),
                              len(hypo.trgt_sentence),
                              hypos,
                              posteriors,
                              score_breakdowns)
Exemplo n.º 17
0
 def decode(self, src_sentence):
     """Decodes a single source sentence using beam search. """
     self.initialize_predictors(src_sentence)
     hypos = [PartialHypothesis(self.get_predictor_states())]
     it = 0
     while self.stop_criterion(hypos):
         if it > self.max_len:  # prevent infinite loops
             break
         it = it + 1
         next_hypos = []
         next_scores = []
         self.min_score = utils.NEG_INF
         self.best_scores = []
         print("HYPOS")
         for hypo in hypos:
             print(
                 "it%d: %s (%f)" %
                 (it, utils.apply_trg_wmap(hypo.trgt_sentence), hypo.score))
         for hypo in hypos:
             print("H: %s (%f)" %
                   (utils.apply_trg_wmap(hypo.trgt_sentence), hypo.score))
             if hypo.get_last_word() == utils.EOS_ID:
                 next_hypos.append(hypo)
                 next_scores.append(self._get_combined_score(hypo))
                 continue
             for next_hypo in self._expand_hypo(hypo):
                 next_score = self._get_combined_score(next_hypo)
                 if next_score > self.min_score:
                     next_hypos.append(next_hypo)
                     next_scores.append(next_score)
                     self._register_score(next_score)
         if self.hypo_recombination:
             hypos = self._filter_equal_hypos(next_hypos, next_scores)
         else:
             hypos = self._get_next_hypos(next_hypos, next_scores)
     for hypo in hypos:
         if hypo.get_last_word() == utils.EOS_ID:
             self.add_full_hypo(hypo.generate_full_hypothesis())
     if not self.full_hypos:
         logging.warn("No complete hypotheses found for %s" % src_sentence)
         for hypo in hypos:
             self.add_full_hypo(hypo.generate_full_hypothesis())
     return self.get_full_hypos_sorted()
Exemplo n.º 18
0
 def decode(self, src_sentence):
     """Decodes a single source sentence exhaustively using depth 
     first search.
     
     Args:
         src_sentence (list): List of source word ids without <S> or
                              </S> which make up the source sentence
     
     Returns:
         list. A list of ``Hypothesis`` instances ordered by their
         score.
     """
     if len(self.predictors) != 1:
         logging.fatal("SimpleDFS only works with a single predictor!")
     self.dfs_predictor = self.predictors[0][0]
     if self._min_length_ratio > 0.0:
         self._min_length = int(
             math.ceil(self._min_length_ratio * len(src_sentence))) + 1
     self.initialize_predictors(src_sentence)
     self.best_score = self.get_lower_score_bound()
     self._dfs(PartialHypothesis())
     return self.get_full_hypos_sorted()
Exemplo n.º 19
0
 def decode(self, src_sentence):
     """Decodes a single source sentence using depth first search.
     If ``max_expansions`` equals 0, this corresponds to exhaustive
     search for the globally best scoring hypothesis. Note that with
     ``early_stopping`` enabled, the returned set of hypothesis are
     not necessarily the global n-best hypotheses. To create an 
     exact n-best list, disable both ``max_expansions`` and 
     ``early_stopping`` in the constructor.
     
     Args:
         src_sentence (list): List of source word ids without <S> or
                              </S> which make up the source sentence
     
     Returns:
         list. A list of ``Hypothesis`` instances ordered by their
         score. If ``max_expansions`` equals 0, the first element
         holds the global best scoring hypothesis
     """
     self.initialize_predictors(src_sentence)
     self.max_expansions = self.get_max_expansions(
         self.max_expansions_param, src_sentence)
     self.best_score = self.get_lower_score_bound()
     self._dfs(PartialHypothesis())
     return self.get_full_hypos_sorted()
Exemplo n.º 20
0
 def _greedy_decode(self):
     """Performs greedy decoding from the start node. Used to obtain
     initial bigram statistics.
     """
     hypo = PartialHypothesis()
     hypos = []
     posteriors = []
     score_breakdowns = []
     bag = dict(self.full_bag)
     while bag:
         posterior,score_breakdown = self.apply_predictors()
         hypo.predictor_states = copy.deepcopy(self.get_predictor_states())
         bag_posterior = {w: posterior[w] for w in self.full_bag_with_eos}
         bag_breakdown = {w: score_breakdown[w] 
                                     for w in self.full_bag_with_eos}
         posteriors.append(bag_posterior)
         score_breakdowns.append(bag_breakdown)
         hypos.append(hypo)
         best_word = utils.argmax({w: bag_posterior[w] for w in bag})
         bag[best_word] -= 1
         if bag[best_word] < 1:
             del bag[best_word]
         self.consume(best_word)
         hypo = hypo.expand(best_word,
                            None,
                            bag_posterior[best_word],
                            score_breakdown[best_word])
     posterior,score_breakdown = self.apply_predictors()
     hypo.predictor_states = copy.deepcopy(self.get_predictor_states())
     bag_posterior = {w: posterior[w] for w in self.full_bag_with_eos}
     bag_breakdown = {w: score_breakdown[w] for w in self.full_bag_with_eos}
     posteriors.append(bag_posterior)
     score_breakdowns.append(bag_breakdown)
     hypos.append(hypo)
     
     hypo = hypo.cheap_expand(utils.EOS_ID,
                              bag_posterior[utils.EOS_ID],
                              score_breakdown[utils.EOS_ID])
     logging.debug("Greedy hypo (%f): %s" % (
                       hypo.score,
                       ' '.join([str(w) for w in hypo.trgt_sentence])))
     self._process_new_hypos(hypos, posteriors, score_breakdowns, hypo)
Exemplo n.º 21
0
 def _get_initial_hypos(self):
     """Get the list of initial ``PartialHypothesis``. """
     return [PartialHypothesis(self.get_predictor_states())]