Exemplo n.º 1
0
    def hillclimb(self, alignment_info, j_pegged=None):
        """
        Starting from the alignment in ``alignment_info``, look at
        neighboring alignments iteratively for the best one, according
        to Model 4

        Note that Model 4 scoring is used instead of Model 5 because the
        latter is too expensive to compute.

        There is no guarantee that the best alignment in the alignment
        space will be found, because the algorithm might be stuck in a
        local maximum.

        :param j_pegged: If specified, the search will be constrained to
            alignments where ``j_pegged`` remains unchanged
        :type j_pegged: int

        :return: The best alignment found from hill climbing
        :rtype: AlignmentInfo
        """
        alignment = alignment_info  # alias with shorter name
        max_probability = IBMModel4.model4_prob_t_a_given_s(alignment, self)

        while True:
            old_alignment = alignment
            for neighbor_alignment in self.neighboring(alignment, j_pegged):
                neighbor_probability = IBMModel4.model4_prob_t_a_given_s(
                    neighbor_alignment, self)

                if neighbor_probability > max_probability:
                    alignment = neighbor_alignment
                    max_probability = neighbor_probability

            if alignment == old_alignment:
                # Until there are no better alignments
                break

        alignment.score = max_probability
        return alignment
Exemplo n.º 2
0
    def prune(self, alignment_infos):
        """
        Removes alignments from ``alignment_infos`` that have
        substantially lower Model 4 scores than the best alignment

        :return: Pruned alignments
        :rtype: set(AlignmentInfo)
        """
        alignments = []
        best_score = 0

        for alignment_info in alignment_infos:
            score = IBMModel4.model4_prob_t_a_given_s(alignment_info, self)
            best_score = max(score, best_score)
            alignments.append((alignment_info, score))

        threshold = IBMModel5.MIN_SCORE_FACTOR * best_score
        alignments = [a[0] for a in alignments if a[1] > threshold]
        return set(alignments)