def AnalyzeResultforQuestionWithIDF(utterence,no): y=wordvec.vectorize_query(utterence,IDFset) scores=[] query=Data_Cleaner(utterence) processedQuestion=[] for i in range(Question_vectors.__len__()): x = np.array(Question_vectors[i]) score = spatial.distance.cosine(x, y) processedQuestion.append(Data_Cleaner(TrainingSet.Question[i])) if math.isnan(score): scores.append(1) else: scores.append(score) TestSet = pd.DataFrame(list(zip(TrainingSet.Question,processedQuestion, scores)),columns=['Question','Processed Question', 'Score']) utterence=re.sub('[^A-Za-z0-9 ]+', '', utterence) excel_name=str('_'.join(utterence.split())) +".xlsx" excel_name="TestSet"+no+".xlsx" TestSet.to_excel(excel_name) index=scores.index(np.min(scores)) print("Index",index) answer=TrainingSet.Answer[index] question=TrainingSet.Question[index] return query,question,answer,np.min(scores)
def getAnswer(utterence): y = wordvec.vectorize_query(utterence, IDFset) scores = [] for i in range(Question_vectors.__len__()): x = np.array(Question_vectors[i]) score = spatial.distance.euclidean(x, y) if math.isnan(score): scores.append(1) else: scores.append(score) index = scores.index(np.min(scores)) print("Index", index) answer = TrainingSet.Answer[index] question = TrainingSet.Question[index] return question, answer