(lps,err) = apertium.test_apertium() assertion(err == '', "Apertium can't be found.\nPlease check the installation.") #Testing Input data assertion(s_sentence != "", "S should be there.\nSee -h for help") assertion(t_sentence != "", "T should be there.\nSee -h for help") assertion(len(pairs) == 2, "P should be of form 'a-b', eg 'en-eo'\nSee -h for help") #Checking Language pair Installation. check_installation(apertium, args.P, lps, l_dir) #Obtain Subsequences. subseq = get_subseqs(s_sentence, single_words_allowed) #Conversion and printing (out, err) = apertium.convert(subseq, l_dir) means = {} outp = get_pairs(subseq, out, t_sentence, means) #If -r Option is set then doing all above stuff in reverse. if reverse: apertium = Apertium(pairs[1], pairs[0]) check_installation(apertium, "{0}-{1}".format(pairs[1], pairs[0]), lps, l_dir) subseq = get_subseqs(t_sentence, single_words_allowed) (out, err) = apertium.convert(subseq, l_dir) outp += get_pairs(subseq, out, s_sentence, means, reverse) #Print pairs printed_pairs = [] for out in outp:
src, tgt = preprocess(tmxu.getsource()), preprocess(tmxu.gettarget()) #Obtain Subsequences out_locations = {} seqs_covered = [] sub_segments = "" seqs_covered_in_tgt = [] subseq = get_subseq_locations(src, single_words_allowed) for s in subseq: seg = src[s[0]:s[1]] if seg.lower() not in seqs_covered: sub_segments += seg + '.|' seqs_covered.append(seg.lower()) (outp, err) = apertium.convert(sub_segments, l_dir) for s, out in zip(subseq, outp.split('.|')): out = preprocess(out) out_locs = get_out_locations(out, tgt) if out_locs != []: out_locations[s] = out_locs seqs_covered_in_tgt += out_locs if reverse: seqs_covered = [] subseq = get_subseq_locations(tgt, single_words_allowed) for s in subseq: # if s in seqs_covered_in_tgt: # continue seg = tgt[s[0]:s[1]]