def similarity(self, response1, response2): words1 = util.wordsFromResponse(response1) words2 = util.wordsFromResponse(response2) if len(words1 | words2) == 0: return 0.0 overlap = (1.0*len(words1 & words2)) / len(words1 | words2) return overlap
def responseToVec(self, resp): words = set() for ngram in range(1, self.ngram_cap+1): words |= util.wordsFromResponse(resp, ngram) v = np.zeros(self.vecDim, dtype=float) for word in words: if word in self.wordIndices: idx = self.wordIndices[word] v[idx] += 1 return v