Exemplo n.º 1
0
	def _do_patching(self, t_app, tau, tau1, covered_pos, grounded_only):
		(a,b) = tau
		t_app = t_app.split()

		if(any(a<=c<=b for c in covered_pos)):
			return None, None

		seg = ' '.join(t_app[a:b+1])
		seg_left = ' '.join(t_app[:a])
		seg_right = ' '.join(t_app[b+1:])

		if grounded_only:
			pe = PhraseExtractor(seg.lower(), tau1.lower())
			aligns = pe.find_alignments()
			if aligns == []:
				return None, None
			p = min(a[0] for a in aligns)
			q = max(a[0] for a in aligns)
			r = min(a[1] for a in aligns)
			s = max(a[1] for a in aligns)
			if p == q or r ==s or p != 0 or q != (b-a) or r != 0 or s != len(tau1.split())-1 :
				return None, None
		
		seg = tau1.split()
		
		pe = PhraseExtractor(' '.join(t_app[a:b+1]).lower(), tau1.lower())
		aligns = pe.find_alignments()
		
		tg_aligns = [x for (_, x) in aligns]
		cp = [a+i for i in range(len(seg)) if i not in tg_aligns]
		cp += covered_pos
		# print(cp)

		if seg_left != '':
			tau1 = tau1.lower()
		return (seg_left + ' ' + tau1 + ' ' + seg_right).strip(), cp
Exemplo n.º 2
0
assertion(len(lps) == 2, "LP should be of type a-b, eg, 'en-eo'")

#Read optional params
lp_dir = args.d
min_fms = float(args.min_fms)
min_len = int(args.min_len)
max_len = int(args.max_len) if args.max_len else max(len(s_sentence.split()), len(s1_sentence.split()))

#Calculate FMS between S and S1.
fms = FMS(s_sentence, s1_sentence).calculate()

#Exit if low FMS.
assertion(fms >= min_fms, "Sentences have low fuzzy match score of %.02f." %fms)

#Get A set
phrase_extractor = PhraseExtractor(s_sentence, s1_sentence, min_len, max_len)
a_set = phrase_extractor.extract_pairs()

#Initiate and check Apertium
apertium = Apertium(lps[0], lps[1])
(out, err) = apertium.check_installations(lp_dir)
assertion(out, err)


# Prepare to Generate D set.
S = s_sentence.split()
S1 = s1_sentence.split()

src = ""
src1 = ""
Exemplo n.º 3
0
	def _do_edit_distace_alignment(self, min_len, max_len):
		#Do edit distance alignment
		phrase_extractor = PhraseExtractor(self.s_sentence, self.s1_sentence, min_len, max_len)
		self.phrases = phrase_extractor.extract_pairs()
		self.src_mismatches, self.tgt_mismatches = phrase_extractor.find_non_alignments()