示例#1
0
def sequence_generator(n, k, M):
    '''
    Generates k emissions of length M using the HMM stored in the file
    'sequence_data<n>.txt' for a given n and prints the results.

    Arguments:
        N:          File index.
        K:          Number of sequences to generate.
        M:          Length of emission to generate.
    '''
    A, O, seqs = Utility.load_sequence(n)

    # Print file information.
    print("File #{}:".format(n))
    print("{:30}".format('Generated Emission'))
    print('#' * 70)

    # Generate k input sequences.
    for i in range(k):
        # Initialize an HMM.
        HMM = HiddenMarkovModel(A, O)

        # Generate a single input sequence of length m.
        emission, states = HMM.generate_emission(M)
        x = ''.join([str(i) for i in emission])

        # Print the results.
        print("{:30}".format(x))

    print('')
    print('')
示例#2
0
def sequence_prediction(n):
    '''
    Runs sequence prediction on the five sequences at the end of the file
    'sequence_data<n>.txt' for a given n and prints the results.

    Arguments:
        n:          Sequence index.
    '''
    A, O, seqs = Utility.load_sequence(n)

    # Print file information.
    print("File #{}:".format(n))
    print("{:30}{:30}".format('Emission Sequence',
                              'Max Probability State Sequence'))
    print('#' * 70)

    # For each input sequence:
    for seq in seqs:
        # Initialize an HMM.
        HMM = HiddenMarkovModel(A, O)

        # Make predictions.
        x = ''.join([str(xi) for xi in seq])
        y = HMM.viterbi(seq)

        # Print the results.
        print("{:30}{:30}".format(x, y))

    print('')
    print('')
示例#3
0
def sequence_probability(n):
    '''
    Determines the probability of emitting the five sequences at the end of
    the file 'sequence_data<n>.txt' for a given n and prints the results.

    Arguments:
        n:          File index.
    '''
    A, O, seqs = Utility.load_sequence(n)

    # Print file information.
    print("File #{}:".format(n))
    print("{:30}{:10}".format('Emission Sequence',
                              'Probability of Emitting Sequence'))
    print('#' * 70)

    # For each input sequence:
    for seq in seqs:
        # Initialize an HMM.
        HMM = HiddenMarkovModel(A, O)

        # Compute the probability of the input sequence.
        x = ''.join([str(xi) for xi in seq])
        p = HMM.probability_betas(seq)

        # Print the results.
        print("{:30}{:<10.3e}".format(x, p))

    print('')
    print('')
示例#4
0
def supervised_learning(tokenized_lines):
    '''
    Generate a sonnet by training a HMM using supervised learning and then using 
    the HMM to generate a line with 10 words. The dataset is labeled with part of 
    speech tags as states for supervised learning.

    Arguments:
        tokenized_lines: a list of lines tokenized as words
    '''

    # Come up with all of the maps needed (states to part-of-speech and vice versa)
    # These maps will be used for the training portion of the supervised model and for
    # generating the poem.
    states, state_POS_map, POS_state_map = convert_POS_to_states(
        tokenized_lines)
    observations, observation_word_map, word_observation_map = convert_lines_observations(
        tokenized_lines)

    # Initialize transition and observation matrices.
    A = [[0. for j in range(len(state_POS_map))]
         for i in range(len(state_POS_map))]
    O = [[0. for j in range(len(observation_word_map))]
         for i in range(len(state_POS_map))]

    # Create HMM that will be trained. X is a list of lines tokenized as words, and
    # Y is the corresponding part of speech tag labels for every word.
    hmm = HiddenMarkovModel(A, O)
    X = [[]]
    Y = [[]]

    # For each tokenized line that we are using for the training data, find the part of
    # speech of each word and add the corresponding states to Y. Also, fill in
    # the X with words from the lines.
    for line in tokenized_lines:
        words_and_tags = tag_POS(line)
        x = []
        y = []
        for word, POS in words_and_tags:
            x.append(word_observation_map[word])
            y.append(POS_state_map[POS])
        X.append(x)
        Y.append(y)

    # Train HMM using supervised learning with X and Y, where Y contains the part of speech
    # labels.
    hmm.supervised_learning(X, Y)

    # Generate 14 lines with 10 words each and print them out.
    for i in range(14):
        obs = hmm.preethi_generate_emission(10)
        line = ''
        for j in obs:
            line += observation_word_map[j]
            line += " "
        print(line)
示例#5
0
def unsupervised_generation(D, n_states, N_iters, k, sylls, endsylls,
                            rhymedict):
    '''
    Trains an HMM using unsupervised learning on the poems and then calls
    generations() to generate k emissions for each HMM, processing the emissions
    and printing them as strings.

    Arguments:
        ps: the list of poems, where each poem is a list of integers
            representing the tokens (words) of the poem.
        D: the number of "words" contained in ps.
        n_states: number of hidden states that the HMM should have.
        N_iters: the number of iterations the HMM should train for.
        k: the number of generations for each HMM.
    '''
    # simply tells us that we are running unsupervised learning. this isn't
    # necessary, but is nice for now.
    print('')
    print('')
    print('#' * 70)
    print("{:^70}".format("Generating Emissions From HMM with %d States") %
          n_states)
    print('#' * 70)
    print('')
    print('')
    rhymefile = str(n_states) + '_' + str(N_iters) + '_1.txt'
    regularfile = str(n_states) + '_' + str(N_iters) + '_2.txt'

    A, O = processing.read_saved_HMM(rhymefile)
    HMMrhyme = HiddenMarkovModel(A, O)
    A, O = processing.read_saved_HMM(regularfile)
    HMMreg = HiddenMarkovModel(A, O)
    # generates and prints "poems"
    print("RHYMING!!")
    generations(HMMrhyme, k, sylls, endsylls, True, rhymedict, True)
    print("10 SYLLABLES!!")
    generations(HMMrhyme, k, sylls, endsylls, True, rhymedict, False)
    print("REGULAR")
    generations(HMMrhyme, k, sylls, endsylls, False, rhymedict, False)
示例#6
0
@ Description:    Implement HMM_TEST
"""

from HMM import HiddenMarkovModel
import numpy as np
import time

Q = np.array([0, 1]) # hot 0, cold 1
V = np.array([0, 1, 2])
O = np.array([[2, 2, 1], [0, 0, 1], [0, 1, 2]])
I = np.array([[0, 0, 1], [1, 1, 1], [1, 0, 0]])
test = np.array([0, 1, 2])

# # supervised learning algorithm
time_start1 = time.time()
clf1 = HiddenMarkovModel(Q, V)
clf1.train(O, I)
time_end1 = time.time()
print("Supervised learning parameters:")
print("Transfer probability  matrix\n", clf1.A)
print("Observation probability  matirx\n", clf1.B)
print("Initial state probability \n", clf1.Pi)
print("Prediction of Supervised learning", clf1.predict(test))
print("Runtime of Supervised learning:", time_end1-time_start1)
print("________________BOUNDARY_______________________________________")
# unsupervised learning algorithm
time_start2 = time.time()
clf2 = HiddenMarkovModel(Q, V)
clf2.train(O)
time_end2 = time.time()
print("Unsupervised learning  parameters:")
示例#7
0
                                                               lower=True,
                                                               split=' ')

Tokenizer = keras.preprocessing.text.Tokenizer(num_words=None,
                                               filters=filters,
                                               lower=True,
                                               split=' ',
                                               char_level=False,
                                               oov_token=None,
                                               document_count=0)

# fit Tokenizer
Tokenizer.fit_on_texts(word_sequence)

# initalize the
HMM = HiddenMarkovModel(hmm_param['A'], hmm_param['O'])

poem_list = ""
for i in range(5):
    poem, syll_list = poem_that_rhymes(HMM, Tokenizer, r2w_dict, w2s_dict)
    poem_list += poem
    poem_list += "\n\n"
    for syll in syll_list:
        poem_list += str(syll) + ', '

    poem_list += "\n\n"

# save poems as text
fname_write = 'hmm_poems_k' + str(k) + '.txt'

with open(dataPath + fname_write, 'w') as f:
示例#8
0
import numpy as np

TopElem = 100
Folds = 10
Tags =10
Obs = list(set(brown.words()))
Sentences = brown.sents()
alphabetReverseMap = {Obs[t]:t for t in xrange(len(Obs))}
number_obs = len(Obs)
number_sents = len(Sentences)
foldsize = number_sents/Folds
for fold in xrange(Folds):
    train = Sentences[0:foldsize*fold] + Sentences[foldsize*(fold+1):-1]
    train = [alphabetReverseMap[word] for sentence in train for word in sentence]
    #print train[:20]
    hmm = HiddenMarkovModel(Tags,len(train),number_obs)
    I = np.random.rand(Tags)
    A = np.random.rand(Tags,Tags)
    B = np.random.rand(Tags,number_obs)
    I/=I.sum()
    for i in xrange(Tags):
        A[i][:] /= A[i][:].sum()
        B[i][:] /= B[i][:].sum()
    #print A[1:3]
    #print B[1:3]
    #print I
    I,A,B = hmm.forward_backward(I,A,B,train)

    print "#Fold: ", fold
    for i in xrange(Tags):
        top = B[i].argsort()[-TopElem:][::-1]
示例#9
0
from Viterbi import viterbi

if __name__ == "__main__":

    # Handle command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("-v",
                        "--verbosity",
                        help="Increase output verbosity",
                        action="store_true")
    parser.add_argument("string_to_decode", help="The string to be decoded")
    args = parser.parse_args()
    input_string = args.string_to_decode

    # Define the HMM
    h = HiddenMarkovModel(['HOT', 'COLD'], ['1', '2', '3'])

    # Initial probabilities
    initial_probabilities = [0.8, 0.2]

    # Transition probabilities
    transition_probabilities = [[0.7, 0.3], [0.6, 0.4]]

    # Emission probabilities
    emission_probabilities = [[0.2, 0.4, 0.4], [0.5, 0.4, 0.1]]

    # Set up probabilites
    h.set_probabilities(initial_probabilities, transition_probabilities,
                        emission_probabilities)

    # Run Viterbi algorithm