Пример #1
0
	def similarity(self, response1, response2):
		words1 = util.wordsFromResponse(response1)
		words2 = util.wordsFromResponse(response2)
		if len(words1 | words2) == 0:
			return 0.0
		overlap = (1.0*len(words1 & words2)) / len(words1 | words2)

		return overlap
Пример #2
0
	def responseToVec(self, resp):
		words = set()
		for ngram in range(1, self.ngram_cap+1):
			words |= util.wordsFromResponse(resp, ngram)

		v = np.zeros(self.vecDim, dtype=float)
		for word in words:
			if word in self.wordIndices:
				idx = self.wordIndices[word]
				v[idx] += 1
		return v