def hillclimb(self, alignment_info, j_pegged=None): """ Starting from the alignment in ``alignment_info``, look at neighboring alignments iteratively for the best one, according to Model 4 Note that Model 4 scoring is used instead of Model 5 because the latter is too expensive to compute. There is no guarantee that the best alignment in the alignment space will be found, because the algorithm might be stuck in a local maximum. :param j_pegged: If specified, the search will be constrained to alignments where ``j_pegged`` remains unchanged :type j_pegged: int :return: The best alignment found from hill climbing :rtype: AlignmentInfo """ alignment = alignment_info # alias with shorter name max_probability = IBMModel4.model4_prob_t_a_given_s(alignment, self) while True: old_alignment = alignment for neighbor_alignment in self.neighboring(alignment, j_pegged): neighbor_probability = IBMModel4.model4_prob_t_a_given_s( neighbor_alignment, self) if neighbor_probability > max_probability: alignment = neighbor_alignment max_probability = neighbor_probability if alignment == old_alignment: # Until there are no better alignments break alignment.score = max_probability return alignment
def prune(self, alignment_infos): """ Removes alignments from ``alignment_infos`` that have substantially lower Model 4 scores than the best alignment :return: Pruned alignments :rtype: set(AlignmentInfo) """ alignments = [] best_score = 0 for alignment_info in alignment_infos: score = IBMModel4.model4_prob_t_a_given_s(alignment_info, self) best_score = max(score, best_score) alignments.append((alignment_info, score)) threshold = IBMModel5.MIN_SCORE_FACTOR * best_score alignments = [a[0] for a in alignments if a[1] > threshold] return set(alignments)