def main(model_file):
    ''' Run visualizations for the provided model. '''

    # Load the model.
    print('Visulaizations for the model ' + model_file)
    model = load(model_file)

    # Visualize the transition and observation matrices.
    visualize_sparsities(model, O_max_cols=10000, O_vmax=0.003)

    # Get the most common output words from each state.
    _, detoken, _ = get_corpus("data/shakespeare.txt", split_by_line=False)

    for state in range(model.L):
        print(f'state {state} -> ', end='')
        print_common_words(model.O[state], detoken, nwords=10)

    # Make  word cloud for each state.
    obs, obs_map = parse_observations(
        open(os.path.join(os.getcwd(), 'data/shakespeare.txt')).read())
    wordclouds = states_to_wordclouds(model, obs_map)

    # Make an animation.
    anim = animate_emission(model, obs_map, M=8)
    plt.show()
示例#2
0
from HMM import unsupervised_HMM
from HMM_helper import (
    parse_observations,
    sample_sentence,
    visualize_sparsities,
)
from Utility import Utility

# Print the transition matrix.

if __name__ == '__main__':
    train = False
    n_states = 10
    N_iters = 50
    text = open(os.path.join(os.getcwd(), '../data/shakespeare.txt')).read()
    obs, obs_map, stress_dic = parse_observations(text)
    #print(obs)
    # Train the HMM.
    if train:
        HMM = unsupervised_HMM(obs, n_states, N_iters)
        file = open('hmm_10.txt', 'wb')
        pickle.dump(HMM, file)
        file.close()
    else:
        file = open("hmm_10.txt", "rb")
        HMM = pickle.load(file)

    #######
    dic = open(os.path.join(os.getcwd(),
                            '../data/Syllable_dictionary.txt')).read()
    lines = [line.split() for line in dic.split('\n') if line.split()]
# ## Visualization of the dataset

# We will be using the Constitution as our dataset. First, we visualize the entirety of the Constitution as a wordcloud:

# In[7]:

text = open(os.path.join(os.getcwd(), 'data/constitution.txt')).read()
wordcloud = text_to_wordcloud(text, title='Constitution')

# ## Training an HMM

# Now we train an HMM on our dataset. We use 10 hidden states and train over 100 iterations:

# In[8]:

obs, obs_map = parse_observations(text)
hmm8 = unsupervised_HMM(obs, 10, 100)

# ## Part G: Visualization of the sparsities of A and O

# We can visualize the sparsities of the A and O matrices by treating the matrix entries as intensity values and showing them as images. What patterns do you notice?

# In[9]:

visualize_sparsities(hmm8, O_max_cols=50)

# ## Generating a sample sentence

# As you have already seen, an HMM can be used to generate sample sequences based on the given dataset. Run the cell below to show a sample sentence based on the Constitution.

# In[5]: