def _combine_posteriors_simple(self, non_zero_words, posterior, unk_prob, top_n=0, original_posterior=None): """ Args: non_zero_words (set): All words with positive probability posteriors: Predictor posterior distributions calculated with ``predict_next()`` unk_probs: UNK probabilities of the predictors, calculated with ``get_unk_probability`` Returns: combined,score_breakdown: like in ``apply_predictors()`` """ if top_n > 0: non_zero_words = utils.argmax_n(posterior, top_n) scores_func = np.vectorize( lambda x: utils.common_get(posterior, x, unk_prob)) scores = scores_func(non_zero_words) orig_scores = None if original_posterior is not None: scores_func = np.vectorize( lambda x: utils.common_get(original_posterior, x, unk_prob)) orig_scores = scores_func(non_zero_words) return non_zero_words, scores, orig_scores
def _get_next_hypos(self, all_hypos, size, other_groups=None): """Get hypos for the next iteration. """ all_scores = np.array([self.get_adjusted_score(hypo) for hypo in all_hypos]) if other_groups: all_scores = all_scores + self.lmbda*self.hamming_distance_penalty(all_hypos, utils.flattened(other_groups)) inds = utils.argmax_n(all_scores, size) return [all_hypos[ind] for ind in inds]
def _scale_combine_non_zero_scores(non_zero_word_count, posteriors, unk_probs, pred_weights, top_n=0): scaled_posteriors = [] for posterior, unk_prob, weight in zip( posteriors, unk_probs, pred_weights): if isinstance(posterior, dict): arr = np.full(non_zero_word_count, unk_prob) for word, score in posterior.items(): if word < non_zero_word_count: arr[word] = score scaled_posteriors.append(arr * weight) else: n_unks = non_zero_word_count - len(posterior) if n_unks > 0: posterior = np.concatenate(( posterior, np.full(n_unks, unk_prob))) elif n_unks < 0: posterior = posterior[:n_unks] scaled_posteriors.append(posterior * weight) combined_scores = np.sum(scaled_posteriors, axis=0) return utils.argmax_n(combined_scores, top_n)
def _get_next_hypos(self, all_hypos, all_scores): """Get hypos for the next iteration. """ inds = utils.argmax_n(all_scores, self.beam_size) return [all_hypos[ind] for ind in inds]
def _best_eos(self, hypos): """Returns true if the best hypothesis ends with </S>""" ln_scores = [self.get_adjusted_score(hypo) for hypo in hypos] best_inds = utils.argmax_n(ln_scores, self.nbest) return all( [hypos[ind].get_last_word() == utils.EOS_ID for ind in best_inds])