def __init__(self): Controller.__init__(self) texts = '' for path in constants.PROCESSED_SPEECHES_PATHS: with open(path, 'r') as corpus: texts += re.sub(pattern='\n+', repl='\n\n', string=corpus.read().lower()).strip() + '\n\n' self.mcc_speech_generator = MccSpeechGenerator().preprocess(texts)
def __init__(self): Controller.__init__(self) self._qr = QueryResponder() with open(constants.PROCESSED_QNA_PATH) as f: qna_list = pickle.load(f) with open(constants.PROCESSED_QUOTES_PATH) as f: text = f.read().lower().decode('utf-8').strip() with open(constants.PROCESSED_FB_POSTS_PATH) as f: text = text + '\n\n' + f.read().lower().decode('utf-8').strip() for path in constants.PROCESSED_SPEECHES_PATHS: with open(path, 'r') as corpus: text += re.sub( pattern='\n+', repl='\n\n', string=corpus.read().lower( )).decode('utf-8').strip() + '\n\n' tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') sentences = tokenizer.tokenize(text) if not os.path.exists(constants.CHAT_INFERSENT_MODEL_PATH): self._engine = self._qr.preprocess(qna_list, sentences)
def __init__(self): Controller.__init__(self) self._api = tweeter_utils.TweeterUtils() os.chdir(DEEP_BB_PREFIX_PATH) with open(constants.PROCESSED_TWEETS_PATH, 'r') as corpus: tweets_text = re.sub(pattern='\n+', repl='\n', string=corpus.read().lower()) with open(constants.PROCESSED_QUOTES_PATH, 'r') as corpus: quotes_text = re.sub(pattern='\n+', repl='\n', string=corpus.read().lower()) with open(constants.PROCESSED_FB_POSTS_PATH, 'r') as corpus: fb_text = re.sub(pattern='.', repl='.\n', string=re.sub(pattern='\n+', repl='\n', string=corpus.read().lower())) self.mcc_tweets_generator = MccTweetGenerator().preprocess( [tweets_text, fb_text, quotes_text], weights=[0.4, 2, 1])
def __init__(self, server): Controller.__init__(self, server)