Пример #1
0
 def get_opcodes(self):
     if not self._opcodes:
         if self._editops:
             self._opcodes = opcodes(self._editops, self._str1, self._str2)
         else:
             self._opcodes = opcodes(self._str1, self._str2)
     return self._opcodes
Пример #2
0
def alignments(data):
    """
    Converts a data set containing pairs of words into a list of alignments
    between these words. An alignement looks like this: [('#pseud', '#pseud'),
    ('o', 'ó'), ('pod', 'pod'), ('i', ''), ('o#', 'o#')]]
    The alignement is computed using the levenshtein distance (we assume that
    the words are close: few letters differ). For Russian and English, we will
    need to use another method.
    """
    alignments = []
    for s, p in data:
        ops = opcodes(s, p)
        align = []
        word1, word2 = '', ''
        equal = True
        for op, i1, e1, i2, e2 in ops:
            if (op == 'equal' and not equal) or (op != 'equal' and equal):
                equal = (op == 'equal')
                if word1 or word2:
                    align.append((word1, word2))
                    word1, word2 = '', ''
            
            word1 += s[i1:e1]
            word2 += p[i2:e2]
        if word1 or word2:
            align.append((word1, word2))

        alignments.append(align)
    return alignments