def matchInputWithMemory(self, idea): """ Position of best match for input in memory Cosine, Euclid, Hamman, Forbes, Kulczynski, Manhattan, Pearson, Simpson, Yule, Russell-Rao """ self.matchRate = 0; self.matchedMemoryValues = {} self.allThoughts = [] #experimental - google query addon #string foundKnowledge = ParseForKnowledge(a); #if (!String.IsNullOrEmpty(foundKnowledge)) # botsMemory.Add(a, foundKnowledge); lower_case = idea.lower() # Convert to lower case inputSentenceTokenized = lower_case.split() # Split into words #matrix operations are handled separately if self.associater == MatchingAlgorithm.CosineTFIDF: self.calculateCosine(lower_case) else: #run through memory cntr = 0 for key, value in self.botsMemory.iteritems(): lower_case = value.lower() t = lower_case.split() cntr += 1 if self.associater == MatchingAlgorithm.Levensthein: self.matchRate = self.calculateMatchRateLS(inputSentenceTokenized, t) elif self.associater == MatchingAlgorithm.Dice: self.matchRate = self.calculateMatchRateDice(inputSentenceTokenized, t) elif self.associater == MatchingAlgorithm.Jaccard: self.matchRate = self.calculateMatchRateJ(inputSentenceTokenized, t) elif self.associater == MatchingAlgorithm.Tanimoto: self.matchRate = self.calculateMatchRateTanimoto(inputSentenceTokenized, t) elif self.associater == MatchingAlgorithm.Hamman: self.matchRate = self.calculateMatchRateHamman(inputSentenceTokenized, t) elif self.associater == MatchingAlgorithm.Simpson: self.matchRate = self.calculateMatchRateSimpson(inputSentenceTokenized, t) elif self.associater == MatchingAlgorithm.Kulczynski: self.matchRate = self.calculateMatchRateKulczynski(inputSentenceTokenized, t) else: self.matchRate = self.calculateMatchRate(inputSentenceTokenized, t) tt = Thought(); tt.MatchingMemory = value tt.MatchingRate = self.matchRate tt.PotentialResponse = key self.allThoughts.append(tt) if key not in self.matchedMemoryValues: if self.matchRate != 0: self.matchedMemoryValues[key] = self.matchRate print "[" + str(cntr) + "] @" + str(self.matchRate) + " Matching: " + key
def calculateCosine(self, idea): self.matchedMemoryValues = {} self.allThoughts = [] train_set = list(self.botsMemory.keys()) #print len(train_set) #print idea test_set = [] test_set.append(idea) #Query #print test_set vectorizer = CountVectorizer(stop_words = None) #print vectorizer transformer = TfidfTransformer() #print transformer trainVectorizerArray = vectorizer.fit_transform(train_set).toarray() testVectorizerArray = vectorizer.transform(test_set).toarray() #print 'Fit Vectorizer to train set', trainVectorizerArray #print 'Transform Vectorizer to test set', testVectorizerArray cx = lambda a, b : round(np.inner(a, b)/(LA.norm(a)*LA.norm(b)), 6) ctr = 0 for vector in trainVectorizerArray: ctr += 1 #print vector for testV in testVectorizerArray: #print testV cosine = cx(vector, testV) if cosine > 0.0: #print str(ctr) + "" + str(cosine) self.matchRate = cosine key = train_set[ctr-1] tt = Thought(); tt.MatchingMemory = key tt.MatchingRate = cosine tt.PotentialResponse = self.botsMemory[key] self.allThoughts.append(tt) if key not in self.matchedMemoryValues: self.matchedMemoryValues[key] = self.matchRate