Exemplo n.º 1
0
	def map(self,phrase):
		for term in phrase:
			if len(term) > 4:
				continue
			for word in self.corpus:
				z = Set(term) & Set(word)
				
				matches = []
				if len(z) > 0 and len(z) < len(term):
					#
					#
					g=NGram(z - Set(term))
					#matches = g.search(term)
				else:
					#
					# At this point we assume context is not informative
					# In the advent of context not being informative, we resort to fuzzy lookup
					#		
					g = NGram(word)
					#matches = g.search(term)
				g.remove(term)
				matches = g.search(term)
				key = None
				value = None					
				if len(matches) > 0:
					matches = list(matches[0])
					Pz_ = len(matches) / self.size
					Px_ = fuzz.ratio(term,matches[0]) / 100
					if Px_ > 0.5 and len(term) < len(matches[0]) and len(matches[0]) >= 4:
						key = term
						value= {}
						value= [matches[0],Pz_,Px_,1]
						self.emit (key,value)
Exemplo n.º 2
0
 def test_set_operations(self):
     """Test advanced set operations"""
     items1 = set(["abcde", "cdefg", "fghijk", "ijklm"])
     items2 = set(["cdefg", "lmnop"])
     idx1 = NGram(items1)
     idx2 = NGram(items2)
     results = lambda L: sorted(x[0] for x in L)
     # Item removal
     self.assertEqual(results(idx1.search('cde')), ["abcde", "cdefg"])
     idx1.remove('abcde')
     self.assertEqual(results(idx1.search('cde')), ["cdefg"])
     # Set intersection operation
     items1.remove('abcde')
     idx1.intersection_update(idx2)
     self.assertEqual(idx1, items1.intersection(items2))
     self.assertEqual(results(idx1.search('lmn')), [])
     self.assertEqual(results(idx1.search('ijk')), [])
     self.assertEqual(results(idx1.search('def')), ['cdefg'])
Exemplo n.º 3
0
 def test_set_operations(self):
     """Test advanced set operations"""
     items1 = set(["abcde", "cdefg", "fghijk", "ijklm"])
     items2 = set(["cdefg", "lmnop"])
     idx1 = NGram(items1)
     idx2 = NGram(items2)
     results = lambda L: sorted(x[0] for x in L)
     # Item removal
     self.assertEqual(results(idx1.search('cde')), ["abcde","cdefg"])
     idx1.remove('abcde')
     self.assertEqual(results(idx1.search('cde')), ["cdefg"])
     # Set intersection operation
     items1.remove('abcde')
     idx1.intersection_update(idx2)
     self.assertEqual(idx1, items1.intersection(items2))
     self.assertEqual(results(idx1.search('lmn')), [])
     self.assertEqual(results(idx1.search('ijk')), [])
     self.assertEqual(results(idx1.search('def')), ['cdefg'])