def generate_answer(self, question): """Combines stackoverflow and chitchat parts using intent recognition.""" # Recognize intent of the question using `intent_recognizer`. # Don't forget to prepare question and calculate features for the question. #### YOUR CODE HERE #### prepared_question = text_prepare(question) #### YOUR CODE HERE #### features = self.tfidf_vectorizer.transform([prepared_question]) #### YOUR CODE HERE #### intent = self.intent_recognizer.predict(features)[0] # Chit-chat part: if intent == 'dialogue': # Pass question to chitchat_bot to generate a response. #### YOUR CODE HERE #### response = self.chitchat_bot.get_response(question) return response # Goal-oriented part: else: # Pass features to tag_classifier to get predictions. #### YOUR CODE HERE #### tag = self.tag_classifier.predict(features)[0] # Pass prepared_question to thread_ranker to get predictions. #### YOUR CODE HERE #### thread_id = self.thread_ranker.get_best_thread(question, tag)[0] return self.ANSWER_TEMPLATE % (tag, thread_id)
def generate_answer(self, question): """Combines stackoverflow and chitchat parts using intent recognition.""" # Recognize intent of the question using `intent_recognizer`. # Don't forget to prepare question and calculate features for the question. prepared_question = utils.text_prepare(question) # Intent recognition: features = self.tfidf_vectorizer.transform([prepared_question]) intent = self.intent_recognizer.predict(features) print(intent) # Chit-chat part: if intent == 'dialogue': return self.chatbot.get_response(question) # Goal-oriented part: else: # Pass features to tag_classifier to get predictions. tag = self.tag_classifier.predict(features)[0] print(tag) # Pass prepared_question to thread_ranker to get predictions. thread_id = self.thread_ranker.get_best_thread(prepared_question,tag) return self.ANSWER_TEMPLATE % (tag, thread_id)
def get_reply(self, session, input_sentence): input_sentence = text_prepare(input_sentence) X = [[word2id[word] if word in word2id else start_symbol_id for word in input_sentence]] X = np.array(X) feed_dict = { self.input_batch: X, self.input_batch_lengths: np.array([len(input_sentence)]), self.ground_truth_lengths: np.array([15]) } pred = session.run([self.infer_predictions], feed_dict=feed_dict) return " ".join([id2word[index] for index in pred[0][0][:-1]])
def get_response(self, session, input_sentence): sentence = text_prepare(input_sentence) X = [] row = [] for word in sentence: if word in word2id: row.append(word2id[word]) else: row.append(start_symbol_id) X.append(row) X = np.array(X) feed_dict = { self.input_batch: X, self.input_batch_lengths: np.array([len(input_sentence)]), } pred = session.run([self.infer_predictions], feed_dict=feed_dict) return " ".join([id2word[index] for index in pred[0][0][:-1]])
def generate_answer(self, question): """ Combines StackOverflow and chitchat parts using intent recognition. Parameters ---------- question : str The question asked Returns ------- str The answer """ # Recognize intent of the question using `intent_recognizer`. # Don't forget to prepare question and calculate features for the # question. prepared_question = text_prepare(question) if prepared_question == '': # Text preparation is an empty string, tf_idf won't work return self.chatbot.get_response(question) features = self.tfidf_vectorizer.transform(prepared_question.split()) intent = self.intent_recognizer.predict(features)[0] # Chit-chat part: if intent == 'dialogue': # Pass question to chitchat_bot to generate a response. response = self.chatbot.get_response(question) return response # Goal-oriented part: else: # Pass features to tag_classifier to get predictions. # NOTE: [0] as we are just after the first tag tag = self.tag_classifier.predict(features)[0] # Pass prepared_question to thread_ranker to get predictions. thread_id = self.thread_ranker.get_best_thread(question, tag) return self.ANSWER_TEMPLATE.format(tag, thread_id)
def generate_answer(self, question): """Combines stackoverflow and chitchat parts using intent recognition.""" # Recognize intent of the question using `intent_recognizer`. # Don't forget to prepare question and calculate features for the question. print('estoy generando la respuesta...') #### YOUR CODE HERE #### print('entro en -> text_prepare()') prepared_question = text_prepare(question) print('salgo de -> text_prepare()') #### YOUR CODE HERE #### print('entro en -> features') features = self.tfidf_vectorizer.transform([prepared_question]) print('salgo de -> features') #### YOUR CODE HERE #### print('entro en -> intent') intent = self.intent_recognizer.predict(features) print('salgo de -> intent') # Chit-chat part: if intent == 'dialogue': # Pass question to chitchat_bot to generate a response. #### YOUR CODE HERE #### response = self.chatbot.get_response(question) print('dialogue ->', response.text) return response.text # Goal-oriented part: else: # Pass features to tag_clasifier to get predictions. #### YOUR CODE HERE #### tag = self.tag_classifier.predict(features)[0] tag = 'c++' if (tag == 'c\c++') else tag # Pass prepared_question to thread_ranker to get predictions. #### YOUR CODE HERE #### thread_id = self.thread_ranker.get_best_thread(prepared_question, tag) print('programming ->', thread_id) return self.ANSWER_TEMPLATE % (tag, thread_id)
def generate_answer(self, question): """Combines stackoverflow and chitchat parts using intent recognition.""" # Recognize intent of the question using `intent_recognizer`. # Don't forget to prepare question and calculate features for the question. prepared_question = text_prepare(question) #### YOUR CODE HERE #### features = self.tfidf_vectorizer.transform( [prepared_question]) #### YOUR CODE HERE #### intent = self.intent_recognizer.predict( features) #### YOUR CODE HERE #### if question.lower().strip() == "hey": return 'Hi, how are you?' elif question.lower().strip() == "what's your hobby?": return 'I enjoy spending time with my family and riding bikes.' elif question.lower().strip() == "what is ai?": intent = 'dialogue' # Chit-chat part: if intent == 'dialogue': # Pass question to chitchat_bot to generate a response. response = self.chitchat_bot.get_response( question) #### YOUR CODE HERE #### return response # Goal-oriented part: else: # Pass features to tag_classifier to get predictions. tag = self.tag_classifier.predict(features)[ 0] #### YOUR CODE HERE #### # Pass prepared_question to thread_ranker to get predictions. thread_id = self.thread_ranker.get_best_thread( prepared_question, tag) #### YOUR CODE HERE #### return self.ANSWER_TEMPLATE % (tag, thread_id)
"""Performs TF-IDF transformation and dumps the model.""" tfidf_vectorizer=TfidfVectorizer(use_idf=True,ngram_range=(1,2),min_df=0.00005,max_df=0.9,token_pattern='(\S+)') fitted_vectorizer=tfidf_vectorizer.fit(X_train) file = open(vectorizer_path, 'wb') pickle.dump(fitted_vectorizer, file) file.close() X_train=fitted_vectorizer.transform(X_train) X_test=fitted_vectorizer.transform(X_test) return X_train, X_test sample_size = 200000 dialogue_df = pd.read_csv('data/dialogues.tsv', sep='\t').sample(sample_size, random_state=0) stackoverflow_df = pd.read_csv('data/tagged_posts.tsv', sep='\t').sample(sample_size, random_state=0) dialogue_df.head() stackoverflow_df.head() from utils import text_prepare dialogue_df['text'] = [text_prepare(t) for t in dialogue_df['text']] stackoverflow_df['title'] = [text_prepare(t) for t in stackoverflow_df['title']] from sklearn.model_selection import train_test_split X = np.concatenate([dialogue_df['text'].values, stackoverflow_df['title'].values]) y = ['dialogue'] * dialogue_df.shape[0] + ['stackoverflow'] * stackoverflow_df.shape[0] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0) print('Train size = {}, test size = {}'.format(len(X_train), len(X_test))) X_train_tfidf, X_test_tfidf = tfidf_features(X_train,X_test,'tfidf_vectorizer.pkl') # Train the **intent recognizer** using LogisticRegression on the train set from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score intent_recognizer=LogisticRegression(penalty='l2',C=10,random_state=0) intent_recognizer.fit(X_train_tfidf,y_train) y_test_pred = intent_recognizer.predict(X_test_tfidf)
#dialogue_df.head() # In[6]: #stackoverflow_df.head() # Apply *text_prepare* function to preprocess the data: # In[7]: from utils import text_prepare # In[10]: dialogue_df['text'] = [text_prepare(x) for x in dialogue_df['text'] ] ######### YOUR CODE HERE ############# stackoverflow_df['title'] = [ text_prepare(x) for x in stackoverflow_df['title'] ] ######### YOUR CODE HERE ############# # ### Intent recognition # We will do a binary classification on TF-IDF representations of texts. Labels will be either `dialogue` for general questions or `stackoverflow` for programming-related questions. First, prepare the data for this task: # - concatenate `dialogue` and `stackoverflow` examples into one sample # - split it into train and test in proportion 9:1, use *random_state=0* for reproducibility # - transform it into TF-IDF features # In[11]: from sklearn.model_selection import train_test_split
def generate_answer(self, question): question = utils.text_prepare(question) response = self.chatbot.get_response(question) return response
dialogue_df.head() # In[ ]: stackoverflow_df.head() # Apply *text_prepare* function to preprocess the data: # In[ ]: from utils import text_prepare # In[ ]: dialogue_df['text'] = dialogue_df['text'].apply( lambda x: text_prepare(x)) ######### YOUR CODE HERE ############# stackoverflow_df['title'] = stackoverflow_df['title'].apply( lambda x: text_prepare(x)) ######### YOUR CODE HERE ############# # ### Intent recognition # We will do a binary classification on TF-IDF representations of texts. Labels will be either `dialogue` for general questions or `stackoverflow` for programming-related questions. First, prepare the data for this task: # - concatenate `dialogue` and `stackoverflow` examples into one sample # - split it into train and test in proportion 9:1, use *random_state=0* for reproducibility # - transform it into TF-IDF features # In[ ]: from sklearn.model_selection import train_test_split # In[ ]:
sample_size = 200000 dialogue_df = pd.read_csv('data/dialogues.tsv', sep='\t').sample(sample_size, random_state=0) stackoverflow_df = pd.read_csv('data/tagged_posts.tsv', sep='\t').sample(sample_size, random_state=0) """Check how the data look like:""" dialogue_df.head() stackoverflow_df.head() """Apply *text_prepare* function to preprocess the data:""" from utils import text_prepare dialogue_df['text'] = dialogue_df['text'].map(lambda x: text_prepare(x.strip())) ######### YOUR CODE HERE ############# stackoverflow_df['title'] = stackoverflow_df['title'].map(lambda x: text_prepare(x.strip())) ######### YOUR CODE HERE ############# """### Intent recognition We will do a binary classification on TF-IDF representations of texts. Labels will be either `dialogue` for general questions or `stackoverflow` for programming-related questions. First, prepare the data for this task: - concatenate `dialogue` and `stackoverflow` examples into one sample - split it into train and test in proportion 9:1, use *random_state=0* for reproducibility - transform it into TF-IDF features """ from sklearn.model_selection import train_test_split X = np.concatenate([dialogue_df['text'].values, stackoverflow_df['title'].values]) y = ['dialogue'] * dialogue_df.shape[0] + ['stackoverflow'] * stackoverflow_df.shape[0]