Python text_prepare示例，utils.text_prepare Python示例

示例#1

0

显示文件

    def generate_answer(self, question):
        """Combines stackoverflow and chitchat parts using intent recognition."""

        # Recognize intent of the question using `intent_recognizer`.
        # Don't forget to prepare question and calculate features for the question.
        
        #### YOUR CODE HERE ####
        prepared_question = text_prepare(question)
        #### YOUR CODE HERE ####
        features = self.tfidf_vectorizer.transform([prepared_question])
        #### YOUR CODE HERE ####
        intent = self.intent_recognizer.predict(features)[0]

        # Chit-chat part:   
        if intent == 'dialogue':
            # Pass question to chitchat_bot to generate a response.  
            #### YOUR CODE HERE ####
            response = self.chitchat_bot.get_response(question)
            return response
        
        # Goal-oriented part:
        else:        
            # Pass features to tag_classifier to get predictions.
            #### YOUR CODE HERE ####
            tag = self.tag_classifier.predict(features)[0]
            
            # Pass prepared_question to thread_ranker to get predictions.
            #### YOUR CODE HERE ####
            thread_id = self.thread_ranker.get_best_thread(question, tag)[0]
           
            return self.ANSWER_TEMPLATE % (tag, thread_id)

示例#2

0

显示文件

    def generate_answer(self, question):
        """Combines stackoverflow and chitchat parts using intent recognition."""

        # Recognize intent of the question using `intent_recognizer`.
        # Don't forget to prepare question and calculate features for the question.
        
        prepared_question = utils.text_prepare(question)

        # Intent recognition:

        features = self.tfidf_vectorizer.transform([prepared_question])

        intent = self.intent_recognizer.predict(features)

        print(intent)
        # Chit-chat part:   
        if intent == 'dialogue':
    
            return self.chatbot.get_response(question)
        
        # Goal-oriented part:
        else:        
            # Pass features to tag_classifier to get predictions.
        
            tag = self.tag_classifier.predict(features)[0]
            print(tag)
            # Pass prepared_question to thread_ranker to get predictions.
            thread_id = self.thread_ranker.get_best_thread(prepared_question,tag)

            return self.ANSWER_TEMPLATE % (tag, thread_id)

示例#3

0

显示文件

文件： debug.py 项目： kirilcvetkov92/Deep-Learning

 def get_reply(self, session, input_sentence):
     input_sentence = text_prepare(input_sentence)
     X = [[word2id[word] if word in word2id else start_symbol_id for word in input_sentence]]
     X = np.array(X)
     feed_dict = {
         self.input_batch: X,
         self.input_batch_lengths: np.array([len(input_sentence)]),
         self.ground_truth_lengths: np.array([15])
     }
     pred = session.run([self.infer_predictions], feed_dict=feed_dict)
     return " ".join([id2word[index] for index in pred[0][0][:-1]])

示例#4

0

显示文件

    def get_response(self, session, input_sentence):
        sentence = text_prepare(input_sentence)
        X = []
        row = []
        for word in sentence:
            if word in word2id:
                row.append(word2id[word])
            else:
                row.append(start_symbol_id)
        X.append(row)
        X = np.array(X)

        feed_dict = {
            self.input_batch: X,
            self.input_batch_lengths: np.array([len(input_sentence)]),
        }
        pred = session.run([self.infer_predictions], feed_dict=feed_dict)
        return " ".join([id2word[index] for index in pred[0][0][:-1]])

示例#5

0

显示文件

文件： dialogue_manager.py 项目： crystosis/coursera_advanced_machine_learning

    def generate_answer(self, question):
        """
        Combines StackOverflow and chitchat parts using intent recognition.

        Parameters
        ----------
        question : str
            The question asked

        Returns
        -------
        str
            The answer
        """

        # Recognize intent of the question using `intent_recognizer`.
        # Don't forget to prepare question and calculate features for the
        # question.

        prepared_question = text_prepare(question)

        if prepared_question == '':
            # Text preparation is an empty string, tf_idf won't work
            return self.chatbot.get_response(question)

        features = self.tfidf_vectorizer.transform(prepared_question.split())
        intent = self.intent_recognizer.predict(features)[0]

        # Chit-chat part:
        if intent == 'dialogue':
            # Pass question to chitchat_bot to generate a response.
            response = self.chatbot.get_response(question)
            return response

        # Goal-oriented part:
        else:
            # Pass features to tag_classifier to get predictions.
            # NOTE: [0] as we are just after the first tag
            tag = self.tag_classifier.predict(features)[0]

            # Pass prepared_question to thread_ranker to get predictions.
            thread_id = self.thread_ranker.get_best_thread(question, tag)

            return self.ANSWER_TEMPLATE.format(tag, thread_id)

示例#6

0

显示文件

    def generate_answer(self, question):
        """Combines stackoverflow and chitchat parts using intent recognition."""

        # Recognize intent of the question using `intent_recognizer`.
        # Don't forget to prepare question and calculate features for the question.
        print('estoy generando la respuesta...')
        #### YOUR CODE HERE ####
        print('entro en -> text_prepare()')
        prepared_question = text_prepare(question)
        print('salgo de -> text_prepare()')
        #### YOUR CODE HERE ####
        print('entro en -> features')
        features = self.tfidf_vectorizer.transform([prepared_question])
        print('salgo de -> features')
        #### YOUR CODE HERE ####
        print('entro en -> intent')
        intent = self.intent_recognizer.predict(features)
        print('salgo de -> intent')
        
        # Chit-chat part:   
        if intent == 'dialogue':
            # Pass question to chitchat_bot to generate a response.
            #### YOUR CODE HERE ####
            response = self.chatbot.get_response(question)
            print('dialogue ->', response.text)
            return response.text

        # Goal-oriented part:
        else:
            # Pass features to tag_clasifier to get predictions.
            #### YOUR CODE HERE ####
            tag = self.tag_classifier.predict(features)[0]
            tag = 'c++' if (tag == 'c\c++') else tag
            # Pass prepared_question to thread_ranker to get predictions.
            #### YOUR CODE HERE ####
            thread_id = self.thread_ranker.get_best_thread(prepared_question, tag)
            print('programming ->', thread_id)
            return self.ANSWER_TEMPLATE % (tag, thread_id)

示例#7

0

显示文件

文件： dialogue_manager.py 项目： schaber/natural-language-processing

    def generate_answer(self, question):
        """Combines stackoverflow and chitchat parts using intent recognition."""

        # Recognize intent of the question using `intent_recognizer`.
        # Don't forget to prepare question and calculate features for the question.

        prepared_question = text_prepare(question)  #### YOUR CODE HERE ####
        features = self.tfidf_vectorizer.transform(
            [prepared_question])  #### YOUR CODE HERE ####
        intent = self.intent_recognizer.predict(
            features)  #### YOUR CODE HERE ####

        if question.lower().strip() == "hey":
            return 'Hi, how are you?'
        elif question.lower().strip() == "what's your hobby?":
            return 'I enjoy spending time with my family and riding bikes.'
        elif question.lower().strip() == "what is ai?":
            intent = 'dialogue'

        # Chit-chat part:
        if intent == 'dialogue':
            # Pass question to chitchat_bot to generate a response.
            response = self.chitchat_bot.get_response(
                question)  #### YOUR CODE HERE ####
            return response

        # Goal-oriented part:
        else:
            # Pass features to tag_classifier to get predictions.
            tag = self.tag_classifier.predict(features)[
                0]  #### YOUR CODE HERE ####

            # Pass prepared_question to thread_ranker to get predictions.
            thread_id = self.thread_ranker.get_best_thread(
                prepared_question, tag)  #### YOUR CODE HERE ####

            return self.ANSWER_TEMPLATE % (tag, thread_id)

示例#8

0

显示文件

文件： train_models.py 项目： julywater/stackoverflow_assistant_chatbot

    """Performs TF-IDF transformation and dumps the model."""
    tfidf_vectorizer=TfidfVectorizer(use_idf=True,ngram_range=(1,2),min_df=0.00005,max_df=0.9,token_pattern='(\S+)')
    fitted_vectorizer=tfidf_vectorizer.fit(X_train)
    file = open(vectorizer_path, 'wb')
    pickle.dump(fitted_vectorizer, file)
    file.close()
    X_train=fitted_vectorizer.transform(X_train) 
    X_test=fitted_vectorizer.transform(X_test)
    return X_train, X_test
sample_size = 200000
dialogue_df = pd.read_csv('data/dialogues.tsv', sep='\t').sample(sample_size, random_state=0)
stackoverflow_df = pd.read_csv('data/tagged_posts.tsv', sep='\t').sample(sample_size, random_state=0)
dialogue_df.head()
stackoverflow_df.head()
from utils import text_prepare
dialogue_df['text'] = [text_prepare(t) for t in dialogue_df['text']] 
stackoverflow_df['title'] = [text_prepare(t) for t in stackoverflow_df['title']]
from sklearn.model_selection import train_test_split
X = np.concatenate([dialogue_df['text'].values, stackoverflow_df['title'].values])
y = ['dialogue'] * dialogue_df.shape[0] + ['stackoverflow'] * stackoverflow_df.shape[0]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
print('Train size = {}, test size = {}'.format(len(X_train), len(X_test)))

X_train_tfidf, X_test_tfidf = tfidf_features(X_train,X_test,'tfidf_vectorizer.pkl') 
# Train the **intent recognizer** using LogisticRegression on the train set 
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
intent_recognizer=LogisticRegression(penalty='l2',C=10,random_state=0)
intent_recognizer.fit(X_train_tfidf,y_train)
y_test_pred = intent_recognizer.predict(X_test_tfidf)

示例#9

0

显示文件

文件： week5-project.py 项目： prannerta100/nlp-chatbot

#dialogue_df.head()

# In[6]:

#stackoverflow_df.head()

# Apply *text_prepare* function to preprocess the data:

# In[7]:

from utils import text_prepare

# In[10]:

dialogue_df['text'] = [text_prepare(x) for x in dialogue_df['text']
                       ]  ######### YOUR CODE HERE #############
stackoverflow_df['title'] = [
    text_prepare(x) for x in stackoverflow_df['title']
]  ######### YOUR CODE HERE #############

# ### Intent recognition

# We will do a binary classification on TF-IDF representations of texts. Labels will be either `dialogue` for general questions or `stackoverflow` for programming-related questions. First, prepare the data for this task:
# - concatenate `dialogue` and `stackoverflow` examples into one sample
# - split it into train and test in proportion 9:1, use *random_state=0* for reproducibility
# - transform it into TF-IDF features

# In[11]:

from sklearn.model_selection import train_test_split

示例#10

0

显示文件

    def generate_answer(self, question):
        question = utils.text_prepare(question)
        response = self.chatbot.get_response(question)

        return response

示例#11

0

显示文件

文件： week5-project.py 项目： nehatayade18/ChatBot

dialogue_df.head()

# In[ ]:

stackoverflow_df.head()

# Apply *text_prepare* function to preprocess the data:

# In[ ]:

from utils import text_prepare

# In[ ]:

dialogue_df['text'] = dialogue_df['text'].apply(
    lambda x: text_prepare(x))  ######### YOUR CODE HERE #############
stackoverflow_df['title'] = stackoverflow_df['title'].apply(
    lambda x: text_prepare(x))  ######### YOUR CODE HERE #############

# ### Intent recognition

# We will do a binary classification on TF-IDF representations of texts. Labels will be either `dialogue` for general questions or `stackoverflow` for programming-related questions. First, prepare the data for this task:
# - concatenate `dialogue` and `stackoverflow` examples into one sample
# - split it into train and test in proportion 9:1, use *random_state=0* for reproducibility
# - transform it into TF-IDF features

# In[ ]:

from sklearn.model_selection import train_test_split

# In[ ]:

示例#12

0

显示文件

文件： week5_project.py 项目： tacitvenom/coursera_nlp

sample_size = 200000

dialogue_df = pd.read_csv('data/dialogues.tsv', sep='\t').sample(sample_size, random_state=0)
stackoverflow_df = pd.read_csv('data/tagged_posts.tsv', sep='\t').sample(sample_size, random_state=0)

"""Check how the data look like:"""

dialogue_df.head()

stackoverflow_df.head()

"""Apply *text_prepare* function to preprocess the data:"""

from utils import text_prepare

dialogue_df['text'] = dialogue_df['text'].map(lambda x: text_prepare(x.strip())) ######### YOUR CODE HERE #############
stackoverflow_df['title'] = stackoverflow_df['title'].map(lambda x: text_prepare(x.strip())) ######### YOUR CODE HERE #############

"""### Intent recognition

We will do a binary classification on TF-IDF representations of texts. Labels will be either `dialogue` for general questions or `stackoverflow` for programming-related questions. First, prepare the data for this task:
- concatenate `dialogue` and `stackoverflow` examples into one sample
- split it into train and test in proportion 9:1, use *random_state=0* for reproducibility
- transform it into TF-IDF features
"""

from sklearn.model_selection import train_test_split

X = np.concatenate([dialogue_df['text'].values, stackoverflow_df['title'].values])
y = ['dialogue'] * dialogue_df.shape[0] + ['stackoverflow'] * stackoverflow_df.shape[0]