Exemplo n.º 1
0
(lps,err) = apertium.test_apertium()
assertion(err == '', "Apertium can't be found.\nPlease check the installation.")

#Testing Input data
assertion(s_sentence != "", "S should be there.\nSee -h for help")
assertion(t_sentence != "", "T should be there.\nSee -h for help")
assertion(len(pairs) == 2, "P should be of form 'a-b', eg 'en-eo'\nSee -h for help")

#Checking Language pair Installation.
check_installation(apertium, args.P, lps, l_dir)

#Obtain Subsequences.
subseq = get_subseqs(s_sentence, single_words_allowed)

#Conversion and printing
(out, err) = apertium.convert(subseq, l_dir)
means = {}
outp = get_pairs(subseq, out, t_sentence, means)

#If -r Option is set then doing all above stuff in reverse.
if reverse:
	apertium = Apertium(pairs[1], pairs[0])
	check_installation(apertium, 
		"{0}-{1}".format(pairs[1], pairs[0]), lps, l_dir)
	subseq = get_subseqs(t_sentence, single_words_allowed)
	(out, err) = apertium.convert(subseq, l_dir)
	outp += get_pairs(subseq, out, s_sentence, means, reverse)

#Print pairs
printed_pairs = []
for out in outp:
Exemplo n.º 2
0
	src, tgt = preprocess(tmxu.getsource()), preprocess(tmxu.gettarget())
	
	#Obtain Subsequences
	out_locations = {}
	seqs_covered = []
	sub_segments = ""
	seqs_covered_in_tgt = []

	subseq = get_subseq_locations(src, single_words_allowed)
	for s in subseq:
		seg = src[s[0]:s[1]]
		if seg.lower() not in seqs_covered:
			sub_segments += seg + '.|'
			seqs_covered.append(seg.lower())

	(outp, err) = apertium.convert(sub_segments, l_dir)
	
	for s, out in zip(subseq, outp.split('.|')):
		out = preprocess(out)
		out_locs = get_out_locations(out, tgt)
		if out_locs != []:
			out_locations[s] = out_locs
			seqs_covered_in_tgt += out_locs

	if reverse:
		seqs_covered = []
		subseq = get_subseq_locations(tgt, single_words_allowed)
		for s in subseq:
			# if s in seqs_covered_in_tgt:
			# 	continue
			seg = tgt[s[0]:s[1]]