def run(self, execution_context): filtered_word_types = self.get_config('train.filtered_word_types') data_name = execution_context.event.get('data_name', 'default') text = execution_context.event.get('text') if text is None: raise ValueError('text cannot be null') config = persist.get_data_config(data_name) tokenizer = TokenizerFactory.get_tokenizer(config.get('tokenizer')) # clean text if requested if config.get('clean_text'): text = pre_process.clean_text(text, filtered_word_types) # tokenize words and predict result_word, result_proba = classifier.predict(text, data_name) result_proba *= 100 # pos tagging the text tagged_text = self.tag_text(tokenizer(text)) _, content = self.send_msg(result_word, result_proba, tagged_text) content_obj = json.loads(content.decode('utf-8')) result = 'predict: ' + result_word + ' with probability: ' \ + str((int)(result_proba)) + '%. response from bot: ' \ + str(content_obj.get('msg')) \ + ' [tagged words: ' + ' ' \ + ' '.join(list(map(lambda w: '(' + w[0] + ' ' + w[1] + ')', tagged_text))) \ + ']' execution_context.finish({ 'raw': result, 'bot_response': content_obj.get('msg') })
def run(execution_context): """run the action""" filtered_word_type = app.get_config('predict.filtered_word_types') remove_stop_words = app.get_config('train.remove_stop_words') data_name = execution_context.event.get('data_name', 'default') text = execution_context.event.get('text') if text is None: raise ValueError('text cannot be null') config = persist.get_data_config(data_name) tokenizer = TokenizerFactory.get_tokenizer(config.get('tokenizer')) if config.get('clean_text'): text = pre_process.clean_text(text, remove_stop_words) tokenized_text = tokenizer(text) result_word, result_proba = pre_process.predict(tokenized_text, data_name) LOGGER.warning('predict %s with probability %2f %%', result_word, result_proba * 100) pos_tagged_text = nltk.pos_tag(nltk.word_tokenize(text)) filtered_text = \ [(w, word_type) for w, word_type in pos_tagged_text if not word_type in filtered_word_type] send_msg(result_word, result_proba, filtered_text)
def run(self, execution_context): data_name = execution_context.event.get('data_name', 'default') config = persist.get_data_config(data_name) reader = self.open_test_file(data_name) result = self.test_data(reader, data_name, config) execution_context.finish(result)
def run(execution_context): """run the action""" data_name = execution_context.event.get('data_name', 'default') config = persist.get_data_config(data_name) with open('cache/data/' + data_name + '/raw.csv') as data_file: reader = csv.reader(data_file) train_data(reader, data_name, config)
def run(self, execution_context, app_context): """run the action""" data_name = execution_context.event.get('data_name', 'default') config = persist.get_data_config(data_name) with open('cache/data/' + data_name + '/raw.csv') as data_file: reader = csv.reader(data_file) self.train_data(reader, data_name, config, app_context) execution_context.finish('train done')
def run(self, execution_context, app_context): """run the action""" data_name = execution_context.event.get('data_name', 'default') config = persist.get_data_config(data_name) filtered_word_types = app_context.get_config( 'train.filtered_word_types') with open('cache/data/' + data_name + '/test.csv') as data_file: reader = csv.reader(data_file) result = self.test_data(reader, data_name, config, filtered_word_types) execution_context.finish(result)
def run(self, execution_context, app_context): """run the action""" event = execution_context.event data_name = event.get('data_name', 'default') text = event.get('text') self.validate(text, app_context) data_config = persist.get_data_config(data_name) if not data_config.get('allow_teaching'): raise ValueError('data_name ' + data_name + ' is not allowed to teach') with open("cache/data/" + data_name + '/raw.csv', 'a') as data_file: data_file.write(text + '\n') with open("cache/data/" + data_name + '/test.csv', 'a') as data_file: data_file.write(text + '\n') execution_context.finish('teach done')