def fetch_alignment(basename, langs, backend='hunalign'): assert langs real_langs = list(set(lang[:2] for lang in langs)) if len(real_langs) == 1: text_len = len(fetch_sentences(basename, real_langs[0])) return Alignment.create_straight(text_len, len(langs)) elif len(real_langs) == 2: try: a = Alignment.from_file("%s/%s-%s.%s" % (basename, real_langs[0], real_langs[1], backend)) except IOError: real_langs.reverse() a = Alignment.from_file("%s/%s-%s.%s" % (basename, real_langs[0], real_langs[1], backend)) else: # len(real_langs) == 3 :( a1 = Alignment.from_file('%s/pl-cu.%s' % (basename, backend)).as_ladder() a2 = Alignment.from_file('%s/cu-el.%s' % (basename, backend)).as_ladder() a3 = Alignment.from_file('%s/pl-el.%s' % (basename, backend)).as_ladder() a3 = [(b, a) for (a, b) in a3] # reversed a = merge_3_alignments(a1, a2, a3) real_langs = ['pl', 'cu', 'el'] # needed later columns = _transpose(a.data) columns_map = { real_langs[i] : columns[i] for i in range(len(real_langs)) } # common part for 2 and 3 chosen_columns = [columns_map[lang[:2]] for lang in langs] chosen_columns.append(columns[2]) return Alignment(_transpose(chosen_columns))
def get_alignment(self, langs, backend=None): """like fetcher""" assert len(langs) >= 2 assert not backend or backend in possible_backends real_langs = list(set(lang[:2] for lang in langs)) if len(real_langs) == 1: text_len = len(fetch_sentences(basename, real_langs[0])) return Alignment.create_straight(text_len, len(langs)) elif len(real_langs) == 2: a = None for i in range(2): for b in ([backend] if backend else possible_backends): try: langs_string = '-'.join(str(l) for l in real_langs) a = Alignment.from_file(self._p(langs_string + '.' + b)) break except IOError: continue if a: break real_langs.reverse() if not a: raise IOError else: # len(real_langs) == 3 :( a1 = self.get_alignment(['pl', 'cu'], backend).as_ladder() a2 = self.get_alignment(['cu', 'el'], backend).as_ladder() a3 = self.get_alignment(['pl', 'el'], backend).as_ladder() # a3 = [(b, a) for (a, b) in a3] # reversed a = merge_3_alignments(a1, a2, a3) real_langs = ['pl', 'cu', 'el'] # needed later columns = _transpose(a.data) columns_map = { real_langs[i] : columns[i] for i in range(len(real_langs)) } # common part for 2 and 3 chosen_columns = [columns_map[lang[:2]] for lang in langs] chosen_columns.append(columns[2]) return Alignment(_transpose(chosen_columns))