Пример #1
0
	def add_synset(self, word):
		ex = Extractor()
		word_id = md5.md5(word).hexdigest()
		if not self.fdb.get(word_id):
			self.fdb.set(ROOT + word_id, word)
		synset = ex.getWikiBacklinks(word)
		if synset:
			for synonym in synset:
				self.fdb.set(SYN + synonym.upper(), word_id)
Пример #2
0
	def getWikiDist(self, a, b):
		a = a.replace(' ', '_')
		b = b.replace(' ', '_')
		e = Extractor()
		sa = e.getWikiBacklinks(a, filter = "nonredirects")
		sb = e.getWikiBacklinks(b, filter = "nonredirects")
		n1 = log(max(len(sa), len(sb)))
		n2 = log(len(set.intersection(sa, sb)))
		d1 = log(10 ** 7)
		d2 = log(min(len(sa), len(sb)))
		extra1 = extra2 = 0
		#if a in sb: extra1 = log(10 ** 7 / len(sb))
		#if b in sa: extra2 = log(10 ** 7 / len(sa))
		try:
			return (n1 - n2) / float(d1 - d2)
		except ZeroDivisionError as e:
			print e
			return self.INF