def get_statements(self): """ Returns list of random statements from the API. """ from twitter import TwitterError statements = [] # Generate a random word random_word = self.random_word(self.random_seed_word, self.lang) self.logger.info( u'Requesting 50 random tweets containing the word {}'.format( random_word)) tweets = self.api.GetSearch(term=random_word, count=50, lang=self.lang) for tweet in tweets: statement = Statement(tweet.text) if tweet.in_reply_to_status_id: try: status = self.api.GetStatus(tweet.in_reply_to_status_id) statement.add_response(Response(status.text)) statements.append(statement) except TwitterError as error: self.logger.warning(str(error)) self.logger.info('Adding {} tweets with responses'.format( len(statements))) return statements
def train(self, *corpus_paths): # Allow a list of corpora to be passed instead of arguments if len(corpus_paths) == 1: if isinstance(corpus_paths[0], list): corpus_paths = corpus_paths[0] # Train the chat bot with each statement and response pair for corpus_path in corpus_paths: corpora = self.corpus.load_corpus(corpus_path) corpus_files = self.corpus.list_corpus_files(corpus_path) for corpus_count, corpus in enumerate(corpora): for conversation_count, conversation in enumerate(corpus): if self.show_training_progress: utils.print_progress_bar( str(os.path.basename(corpus_files[corpus_count])) + ' Training', conversation_count + 1, len(corpus)) previous_statement_text = None for text in conversation: statement = self.get_or_create(text) statement.add_tags(corpus.categories) if previous_statement_text: statement.add_response( Response(previous_statement_text)) previous_statement_text = statement.text self.storage.update(statement)
def learn_response(self, statement, previous_statement): """ Learn that the statement provided is a valid response. """ from chatter.chatterbot.conversation import Response if previous_statement: statement.add_response(Response(previous_statement.text)) self.logger.info('Adding "{}" as a response to "{}"'.format( statement.text, previous_statement.text)) # Save the statement after selecting a response self.storage.update(statement)
def deserialize_responses(self, response_list): """ Takes the list of response items and returns the list converted to Response objects. """ Statement = self.get_model('statement') Response = self.get_model('response') proxy_statement = Statement('') for response in response_list: text = response['text'] del response['text'] proxy_statement.add_response(Response(text, **response)) return proxy_statement.in_response_to
def train(self): import glob import csv # Download and extract the Ubuntu dialog corpus if needed corpus_download_path = self.download(self.data_download_url) # Extract if the directory doesn not already exists if not self.is_extracted(self.extracted_data_directory): self.extract(corpus_download_path) extracted_corpus_path = os.path.join(self.extracted_data_directory, '**', '**', '*.tsv') file_kwargs = {} # Specify the encoding in Python versions 3 and up file_kwargs['encoding'] = 'utf-8' # WARNING: This might fail to read a unicode corpus file in Python 2.x for file in glob.iglob(extracted_corpus_path): self.logger.info('Training from: {}'.format(file)) with open(file, 'r', **file_kwargs) as tsv: reader = csv.reader(tsv, delimiter='\t') previous_statement_text = None for row in reader: if len(row) > 0: text = row[3] statement = self.get_or_create(text) print(text, len(row)) statement.add_extra_data('datetime', row[0]) statement.add_extra_data('speaker', row[1]) if row[2].strip(): statement.add_extra_data('addressing_speaker', row[2]) if previous_statement_text: statement.add_response( Response(previous_statement_text)) previous_statement_text = statement.text self.storage.update(statement)
def train(self, conversation): """ Train the chat bot based on the provided list of statements that represents a single conversation. """ previous_statement_text = None for conversation_count, text in enumerate(conversation): if self.show_training_progress: utils.print_progress_bar('List Trainer', conversation_count + 1, len(conversation)) statement = self.get_or_create(text) if previous_statement_text: statement.add_response(Response(previous_statement_text)) previous_statement_text = statement.text self.storage.update(statement)