def main(model_file): ''' Run visualizations for the provided model. ''' # Load the model. print('Visulaizations for the model ' + model_file) model = load(model_file) # Visualize the transition and observation matrices. visualize_sparsities(model, O_max_cols=10000, O_vmax=0.003) # Get the most common output words from each state. _, detoken, _ = get_corpus("data/shakespeare.txt", split_by_line=False) for state in range(model.L): print(f'state {state} -> ', end='') print_common_words(model.O[state], detoken, nwords=10) # Make word cloud for each state. obs, obs_map = parse_observations( open(os.path.join(os.getcwd(), 'data/shakespeare.txt')).read()) wordclouds = states_to_wordclouds(model, obs_map) # Make an animation. anim = animate_emission(model, obs_map, M=8) plt.show()
from HMM import unsupervised_HMM from HMM_helper import ( parse_observations, sample_sentence, visualize_sparsities, ) from Utility import Utility # Print the transition matrix. if __name__ == '__main__': train = False n_states = 10 N_iters = 50 text = open(os.path.join(os.getcwd(), '../data/shakespeare.txt')).read() obs, obs_map, stress_dic = parse_observations(text) #print(obs) # Train the HMM. if train: HMM = unsupervised_HMM(obs, n_states, N_iters) file = open('hmm_10.txt', 'wb') pickle.dump(HMM, file) file.close() else: file = open("hmm_10.txt", "rb") HMM = pickle.load(file) ####### dic = open(os.path.join(os.getcwd(), '../data/Syllable_dictionary.txt')).read() lines = [line.split() for line in dic.split('\n') if line.split()]
# ## Visualization of the dataset # We will be using the Constitution as our dataset. First, we visualize the entirety of the Constitution as a wordcloud: # In[7]: text = open(os.path.join(os.getcwd(), 'data/constitution.txt')).read() wordcloud = text_to_wordcloud(text, title='Constitution') # ## Training an HMM # Now we train an HMM on our dataset. We use 10 hidden states and train over 100 iterations: # In[8]: obs, obs_map = parse_observations(text) hmm8 = unsupervised_HMM(obs, 10, 100) # ## Part G: Visualization of the sparsities of A and O # We can visualize the sparsities of the A and O matrices by treating the matrix entries as intensity values and showing them as images. What patterns do you notice? # In[9]: visualize_sparsities(hmm8, O_max_cols=50) # ## Generating a sample sentence # As you have already seen, an HMM can be used to generate sample sequences based on the given dataset. Run the cell below to show a sample sentence based on the Constitution. # In[5]: