Exemplo n.º 1
0
file2.close()

#Testing file pointers
file3 = open(args.SLFT)
file4 = open(args.TLFT)

#Create new files where we could write our Pairs
file5 = open(args.OUT, 'w')

while True:
	line = preprocess(file3.readline())
	line1 = preprocess(file4.readline())
	if not line or not line1:
		break
	if len(line.split()) > max_len:
		continue
	for s,t in zip(src_sentences, tgt_sentences):
		fms = FMS(s, line)
		max_fms = fms.get_max_fms()			#Get max possible FMS for the pair
		if max_fms >= min_fms:
			fms = fms.calculate_using_wanger_fischer()	#Get actual FMS
			if fms >= min_fms and fms < 1.0:
				file5.write(s+"\n")
				file5.write(t+"\n")
				file5.write(line+"\n")
				file5.write(line1+"\n")
#Close remaining files
file3.close()
file4.close()
file5.close()
Exemplo n.º 2
0
# Initiate and check Apertium
apertium = Apertium(lps[0], lps[1])
(out, err) = apertium.check_installations(lp_dir)
assertion(out, err)

tmxf = TMXFile(tmxfile, lps[0], lps[1])
tmunits = tmxf.getunits()

fmses = {}

for tmxu in tmunits:
    src, tgt = preprocess(tmxu.getsource()), preprocess(tmxu.gettarget())
    fms = FMS(s_sentence, src)
    max_fms = fms.get_max_fms()
    if max_fms >= min_fms:
        fms = fms.calculate_using_wanger_fischer()
        if fms >= min_fms:
            fmses[(src, tgt)] = fms

assertion(fmses != {}, "No proper match with FMS > {0} could be found".format(min_fms))

sorted_fms = sorted(fmses, key=fmses.get)

(src, tgt) = sorted_fms[0]  # Best match

patcher = Patcher(apertium, src, s_sentence, tgt, use_caching, cache_db_file)
patches = patcher.patch(min_len, max_len, grounded, lp_dir)
best_patch = patcher.get_best_patch()

got_patches = False
got_patches = print_patch(best_patch, cover_all, verbose, show_traces)