class WordsGenerator(): def __init__(self) : self.log_parser = LogParser() self.log_parser.loadParsedLog(FilePath.ROOTPATH + FilePath.PARSED_LOG) textlist = self.log_parser.outputAllTexts() self.wordlist = self.makeWordList(textlist) self.markov = self.makeMarkov() def makeWordList(self,textlist): t = MeCab.Tagger("-Owakati") wordlist = [] for line in textlist: #一度に一定行数(10万くらい?超えると読み込めなくなってmがNoneになる m = t.parse(line).rstrip(" \n").split(" ") #split(" ")しないと空白含め1バイトずつ保存されてしまう wordlist.extend(m) return wordlist def makeMarkov(self): markov = {} w1 = "" w2 = "" for word in self.wordlist: if w1 and w2: if (w1, w2) not in markov: markov[(w1, w2)] = [] markov[(w1, w2)].append(word) w1, w2 = w2, word return markov def makeSentence(self,l): # Generate Sentence count = 0 sentence = "" w1, w2 = random.choice(self.markov.keys()) while count < l: if not self.markov.has_key((w1,w2)): break tmp = random.choice(self.markov[(w1, w2)]) sentence += tmp w1, w2 = w2, tmp count += 1 return sentence
log_srcfilename = FilePath.ROOTPATH + FilePath.SOURCE_LOG log_parser.getLog(log_srcfilename) parsed_logfilename = FilePath.ROOTPATH + FilePath.PARSED_LOG elif (argc == 2): print "error : 出力ファイル名を指定してください\n" exit() elif (argc >= 3): srcfile = FilePath.ROOTPATH + FilePath.LOGFILES + params[2] log_parser.getLog(srcfile) parsed_logfilename = FilePath.ROOTPATH + FilePath.LOGFILES + params[3] log_parser.saveParsedLog(parsed_logfilename,"w+") elif mode == "-learn": log_parser = LogParser() log_parser.loadParsedLog(FilePath.ROOTPATH + FilePath.PARSED_LOG) learning_engine = RoleInferenceEngine(log_parser) learning_engine.learnFromLog() learning_engine.checkEvaluations() learning_engine.saveParameters() elif mode == "-infer": role = params[2] role_inference_engine = RoleInferenceEngine() role_inference_engine.loadLearnedParameters() role_inference_engine.roleInferenceTest(role) #log_parser = LogParser() #log_parser.loadParsedLog(FilePath.ROOTPATH + FilePath.PARSED_LOG) #learning_engine =LogLearningEngine(log_parser) #learning_engine.learnFromLog() #learning_engine.roleInferenceTest(role)