def tagger(self, text_list): hmm = HiddenMarkovModel(text_list, self.tags, self.transitions, self.cslm) hmmtags = hmm.generateTags() # generate list of hmm tags words = hmm.words # generate list of words taggedTokens = [] prevLang = "Eng" engTags = [] spnTags = [] engTag = "" spanTag = "" token = re.compile(ur'[^\w\s]', re.UNICODE) print "Tagging {} words".format(len(words)) for k, word in enumerate(words): # check if punctuation else use hmmtag lang = 'Punct' if re.match( token, word) and not word[-1].isalpha() else hmmtags[k] lang = 'Num' if word.isdigit() else lang # check if word is NE if lang != "Punct": index = k % 1000 if index == 0: engTags = self.engClassifier.tag(words[k:k + 1000]) spnTags = self.spanClassifier.tag(words[k:k + 1000]) engTag = engTags[index][1] spanTag = spnTags[index][1] else: engTag = "O" spanTag = "O" # mark as NE if either classifier identifies it if engTag != 'O' or spanTag != 'O': NE = "{}/{}".format(engTag, spanTag) else: NE = "O" # record probabilities if lang in ("Eng", "Spn"): hmmProb = round(hmm.transitions[prevLang][lang], 2) engProb = round(self.cslm.prob("Eng", word), 2) spnProb = round(self.cslm.prob("Spn", word), 2) totalProb = (hmmProb + engProb) if lang == "Eng" else (hmmProb + spnProb) prevLang = lang else: hmmProb = "N/A" engProb = "N/A" spnProb = "N/A" totalProb = "N/A" taggedTokens.append((word, lang, NE, str(engProb), str(spnProb), str(hmmProb), str(totalProb))) #taggedTokens.append((word, lang, NE)) #print word, lang, NE return taggedTokens
def tagger(self, text_list): hmm = HiddenMarkovModel(text_list, self.tags, self.transitions, self.cslm) hmmtags = hmm.generateTags() # generate list of hmm tags words = hmm.words # generate list of words taggedTokens = [] prevLang = "Eng" engTags = [] spnTags = [] engTag = "" spanTag = "" token = re.compile(ur'[^\w\s]', re.UNICODE) print "Tagging {} words".format(len(words)) for k, word in enumerate(words): # check if punctuation else use hmmtag lang = 'Punct' if re.match(token, word) and not word[-1].isalpha() else hmmtags[k] lang = 'Num' if word.isdigit() else lang # check if word is NE if lang != "Punct": index = k % 1000 if index == 0: engTags = self.engClassifier.tag(words[k:k+1000]) spnTags = self.spanClassifier.tag(words[k:k+1000]) engTag = engTags[index][1] spanTag = spnTags[index][1] else: engTag = "O" spanTag = "O" # mark as NE if either classifier identifies it if engTag != 'O' or spanTag != 'O': NE = "{}/{}".format(engTag, spanTag) else: NE = "O" # record probabilities if lang in ("Eng", "Spn"): hmmProb = round(hmm.transitions[prevLang][lang], 2) engProb = round(self.cslm.prob("Eng", word), 2) spnProb = round(self.cslm.prob("Spn", word), 2) totalProb = (hmmProb + engProb) if lang == "Eng" else (hmmProb + spnProb) prevLang = lang else: hmmProb = "N/A" engProb = "N/A" spnProb = "N/A" totalProb = "N/A" taggedTokens.append((word, lang, NE, str(engProb), str(spnProb), str(hmmProb), str(totalProb))) #taggedTokens.append((word, lang, NE)) #print word, lang, NE return taggedTokens
def initML_pw(): training_file = [ 'UPDOWN', 'DOWNUP', 'DOWNRIGHT', 'DOWNLEFT', 'UPRIGHT', 'UPLEFT' ] all_models = [] for file in training_file: file_name = 'training_sourse/' + file + '.pickle' with open(file_name, 'rb') as f: obs = pickle.load(f) temp = {'X': list(), 'Y': list()} for j in range(0, 100): temp['X'].append(obs['X'][j]) temp['Y'].append(obs['Y'][j]) if file == 'CIRCLE': all_models.append(HiddenMarkovModel(4, file, temp)) else: all_models.append(HiddenMarkovModel(2, file, temp)) return all_models
def angList(self, text_list): hmm = HiddenMarkovModel(text_list, self.cslm) ang = "" ang_list = [] for token, tag in zip(text_list, hmm.ang): if tag == "Yes": ang = " ".join([ang, token]) continue else: if ang != "": ang_list.append(ang.strip()) ang = "" return ang_list
def initML_main(dict): # convert keys of gesture dictionary to a list ges_list = [] for key, value in dict.items(): if value and value > 0: ges_list.append(key.text().upper()) all_models = [] for file in ges_list: file_name = 'training_sourse/' + file + '.pickle' with open(file_name, 'rb') as f: obs = pickle.load(f) temp = {'X': list(), 'Y': list()} for j in range(0, 100): temp['X'].append(obs['X'][j]) temp['Y'].append(obs['Y'][j]) if file == 'CIRCLE': all_models.append(HiddenMarkovModel(4, file, temp)) else: all_models.append(HiddenMarkovModel(2, file, temp)) return dict, all_models
def compute_probs(user_df): dayGrouping = pd.Grouper(key="date", freq="1D") weekGrouping = pd.Grouper(key="date", freq="1W") timeGrouping = user_df.groupby(weekGrouping) # print("Starting on user: "******"feature"].values if len(seq) < 1: # If there is no activity for this week logProbScores.append(0) continue if timesTrained > trainingPeriod: logProb = model.sequence_log_probability(seq) logProbScores.append(-logProb) #Train the model on the sequence we have just seen model.learn(seq, max_iters=20, threshold=0.01, restart_threshold=0.1, max_restarts=5, inertia=inert) matrices.append( mm(model.transitions, model.emissions, model.starts)) timesTrained += 1 return (logProbScores, matrices)
def tag(self, text_list): # annotation_lists = [] hmm = HiddenMarkovModel(text_list, self.cslm) annotation_lists = zip(text_list, hmm.lemmas, hmm.lang, hmm.NE, hmm.ang, hmm.engProbs, hmm.spnProbs) return annotation_lists
# Ari Chadda # PA6 CS76 - 11/10/20 from HiddenMarkovModel import HiddenMarkovModel from Maze import Maze if __name__ == "__main__": # maze options for test # test_maze = Maze("maze1.maz") test_maze = Maze("maze2.maz") maze_solver = HiddenMarkovModel(test_maze) # instantiating solver object maze_solver.particle_filtering() # calling filtering algorithm
import numpy as np import matplotlib.pyplot as plt from HiddenMarkovModel import HiddenMarkovModel PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__)) DATA_FILE = os.path.join(PROJECT_ROOT, "../simulation/data/20200304_192852.json.labeled") with open(DATA_FILE, 'r') as f: string = f.read() data = json.loads(string) # Initialize A and B Matrix states = {'left': 0, 'none': 1, 'right': 2} model = HiddenMarkovModel(states) model.initialize() # fig, axs = plt.subplots(7, 7, sharex='col', sharey='row') # for k in data.keys(): # key_list = [i for i in list(data[k].keys()) # if i != 'speed' and i != 'label'] # for idx, e in enumerate(key_list): # for idxx, ee in enumerate(key_list): # axs[idx][idxx].plot(data[k][ee], data[k][e], '.') # axs[idx][idxx].set_xlabel(ee) # axs[idx][idxx].set_ylabel(e) # fig.suptitle(k) # # plt.show()
training_infrequent_words = dataPreProcessor.identify_infrequent_words() trainSet = dataPreProcessor.tag_capital_words(training_infrequent_words, trainSet) trainSet = dataPreProcessor.tag_UNI_ing_words(training_infrequent_words, trainSet) trainSet = dataPreProcessor.tag_numbers(training_infrequent_words, trainSet) testing_infrequent_words = dataPreProcessor.identify_infrequent_words_in_testing_corpus( ) testSet = dataPreProcessor.tag_capital_words(testing_infrequent_words, testSet) testSet = dataPreProcessor.tag_UNI_ing_words(testing_infrequent_words, testSet) testSet = dataPreProcessor.tag_numbers(testing_infrequent_words, testSet) # create an instance of the HHM and passed the training set to generate its parameters. hiddenMarkovModel = HiddenMarkovModel(testSet) hiddenMarkovModel.calculate_transition_prob_for_POS_tags() hiddenMarkovModel.calculate_emission_prob() unified_test_set = [tup for sent in testSet for tup in sent] test_set_tags = [t for (_, t) in unified_test_set] viterbi = Viterbi(hiddenMarkovModel) viterbi_tags = [] for test in testSet: if len(test) < 100: test_observations = [w for (w, _) in test] viterbi_tags += viterbi.tag_words(test_observations) check = [ v_tag for v_tag, t_tag in zip(viterbi_tags, test_set_tags)
#做成输入的标准矩阵 emi_mat = emission_matrix.as_matrix() trans_mat = trans_matrix_reverse.as_matrix() # In[20]: #初始概率,只展示效果较好的初始概率,即平均 allNumber = len(trans_mat) p0 = [1.0/allNumber for i in range(allNumber)] # In[22]: #定义模型 model = HiddenMarkovModel(trans_mat, emi_mat, p0) # In[23]: #模型训练 states_seq, state_prob = model.run_viterbi([i for i in range(len(emission_matrix))],summary=True) # In[56]: #回溯匹配 grid = emission_matrix.columns[states_seq] predict = Grid_ID.loc[grid]#.values
import numpy as np from HiddenMarkovModel import HiddenMarkovModel print("**********Test Tnitialization **********") # states = {0: 'sunny', 1: 'rain', 2: 'cloudy'} states = {0: 'default', 1: 'other'} hmm = HiddenMarkovModel(states, method='gmm', data_dim=2, gmm_k=1) print(hmm) print("**********Test Emission Porbability **********") # obs = [[0, 0], [0, 0], [10, 10], [10, 10], [ # 0, 0], [10, 10], [10, 10], [0, 0], [0, 0]] # obs= [[-1, -1], [-2, 2], [3, 3], [-1, -1], [0, 0], [-1, 1], [3, 3], [-2, 2], [1, 0]] obs = [[1, 1], [1, 1], [1.1, 1], [0.9, 1], [1, 1], [1, 1], [1.2, 1], [0.8, 1], [1, 1]] # np.random.seed(0) # obs = np.ones((100000, 2))+np.random.rand(100000, 2) print(obs) hmm._init_param(np.array(obs)) print(hmm.weights) print(hmm.means) print(hmm.sigmas) emitlogprob = hmm._log_emission(np.array(obs)) print(emitlogprob) print("**********Test Foward Porbability **********") logA = np.log(hmm.A)
#trans.loc[key, dic_concat[key]] = e #trans.loc[dic[key],key] = e trans.loc[key, key] = 1 # In[44]: # 取得输入emission矩阵和transition矩阵 emi_mat = emission_matrix.values tran_mat = trans.values #取得初始概率 allNumber = len(tran_mat) p0 = [1.0 / allNumber for i in range(allNumber)] # In[45]: model = HiddenMarkovModel(tran_mat, emi_mat, p0) # In[46]: states_seq, state_probs = model.run_viterbi([i for i in range(len(emi_mat))], summary=True) # In[47]: states_seq # In[ ]: main = states_seq[0] ls = [main] for i in range(len(states_seq)):