def estMaxSequence(self, filename):

        print("Reading testing data from %s" % (filename))

        # Read in the testing dta from the file
        self.dataset = DataSet(filename)
        self.dataset.readFile(200, "test")

        # Run Viterbi to estimate most likely sequence
        viterbi = Viterbi(self.hmm)
        self.maxSequence = viterbi.mostLikelySequence(self.dataset.testOutput)
def predict_tag(tags, vocab, A, B):

    # Reading the test data and preprocessing it (prep is the word list with empty line marked by <n>)
    test_file = Config.TEST
    original, prep = read_preprocess_test_data(vocab, test_file)

    # Decodes the sequence using Viterbi algorithm and returns optimal predicted tag sequences for each of the sentences
    decoder = Viterbi.Viterbi(vocab, tags, prep, A, B)
    predicted_tags = decoder.decode()

    tagged = []

    for word, tag in zip(original, predicted_tags):
        tagged.append((word, tag))

    # writing the output into a file (location output/
    out_file = Config.TEST_OUT

    with open(out_file, 'w', encoding='utf-8') as out:
        for word, tag in tagged:
            if not word:
                out.write("{0}\t{1}\n".format(word, tag))

 def Decode(self):
     if self.Nodes==[] or self.Edges==None:
         print("No graph for decoding")
         return []
         return self.cls
 def _findPath(self, sequence):
     """Calls the C extension to calculate the most likely sequence of
     states that would generate the given sequence."""
     symbols = sorted(set(sequence))
     seq = self._sequenceToInts(symbols, sequence)
     ems = self._setupEmissions(symbols)
     del(sequence); del(symbols)
     return Viterbi.findPath(list(seq), ems, self.columnProbs)
def findingHiddenStates(sentences):
    foundHiddenWords = []
    f = open("compFile.txt",
             "w+")  # Opening File pointer to store the joined lines
    for line in sentences:
        # S
    for hiddenWord in foundHiddenWords:
        print hiddenWord
    return foundHiddenWords
 def Decode(self):
     if self.Edges == None:
         print("No graph for decoding")
     elif self.Nodes == []:
         return []
     elif len(self.Nodes) == 1:
         return self.Nodes[0].index(max(self.Nodes[0]))
         self.cls = []
         self.cls = Viterbi.Viterbi([self.Edges] * (len(self.Nodes) - 1),
         return self.cls
def getParametrs(dirPath, test):
    seqList = getParssedData(dirPath)

    e, segLen = createEmission(seqList)
    pAlpha = [0.86, 0.89]
    pBeta = [0.89, 0.78]
    tau = createTransition(seqList, pAlpha, pBeta) # todo change to real vals

    testseqList = getParssedData(test)
    print testseqList[0][0]
    # newTau = tau
    # newTau[1,1] -= 0.1
    # newTau[1,0] += 0.1

    for i in range(100):
        cur_seq = testseqList[i][0]

        seq_viterbi = vt.viterbi_round(e, tau, cur_seq)
        seq_fb = fb.fb_round(e, tau, cur_seq)

        print("viterbi\t" + seq_viterbi)
        print("fd  \t" + seq_fb[2])
        print("real\t" + testseqList[i][1])
     #   print("seq\t\t" + testseqList[i][0])

    return testseqList, seqList, e, tau

    # for i in range(91, 300):
    #     print(i)
    #     cur_seq = seqList[i][0]
    #     seq_viterbi = vt.viterbi_round(e, tau, cur_seq)
    #     seq_fb = fb.fb_round(e, tau, cur_seq)
    #     print("viterbi \n", seq_viterbi)
    #     print("forward backward\n", seq_fb)
    #     print("real\n", seqList[i][1])

    # print(segLen)
def runNaive(ObsMat, kmer_size, num_state, event_data_test, write_fasta):
    kmer_map, inv_kmer_map = Util.getKmerMap(kmer_size)
    total_acc = 0.0
    T = 0.0
    for event in event_data_test:
        currentSeq, state_label = DataInput.getData_event(event, kmer_map)
        t = len(currentSeq)
        Vit = Viterbi.Viterbi([], ObsMat, num_state, t, kmer_size)
        Y_hat, seq_est = Vit.decodeNaive(currentSeq)
        Y_test = np.array(state_label).reshape(-1, 1)
        acc = float(np.sum(Y_hat == Y_test)) / t
        total_acc += float(np.sum(Y_hat == Y_test))
        T += t
        print("Accuracy = %f" % acc)
        if write_fasta == 1:
            write_to_file(seq_est, T, kmer_size)
        # print(seq_est)

    total_acc /= T
    print("Total Accuracy = %f" % total_acc)
def perform_prediction(data_to_predict, e, tau, test):
    final_predictions, final_errors = [], []
    viterbi_errors, fb_errors, fb_penalties = [], [], []
    viterbi_wins = 0
    fb_wins = 0
    for data in data_to_predict:
        seq = data[SEQ]
        # train: predict the secondary structure using viterbi and posterior
        viterbi_structure = Viterbi.viterbi_round(e, tau, seq)
        f, b, fb_structure = fb.fb_round(e, tau, seq)

        # find error in v and p
        cur_viterbi_error = calc_error(viterbi_structure, data[SOL])
        cur_fb_error = calc_error(fb_structure, data[SOL])

        # choose the path with the smallest error
        if (cur_viterbi_error < cur_fb_error):
            final_predictions.append((seq, viterbi_structure))
            viterbi_wins += 1
            final_predictions.append((seq, fb_structure))
            fb_wins += 1
        # calc the posterior
        s1, s2, s3 = posterior_table(f, b)
        # calc the penalties on the posterior (for the graph later that tomi wanted)
            calc_fb_penalty([s1[1:], s2[1:], s3[1:]], data[SOL]))
        if test:
            # plot the probability for each state to emit E, H, ot O
            plot_structures_probability(s1[1:], s2[1:], s3[1:], len(seq))

    # plot viterbi vs. posterior by error percentage
    data_size = len(data_to_predict)
    v_total_wins = float(viterbi_wins) / float(data_size)
    fb_total_wins = 1 - v_total_wins
    plot_error_analysis(viterbi_errors, fb_errors, v_total_wins, fb_total_wins)
def viterbiAlgorithm(seq, a, e):
    """Run the Viterbi algorithm and saves the Viterbi Matrix and the Back Trace Matrix in an output folder.
    # Read the A and E matrices
    AEMatrices.init(e, a)

    # Read the input sequence(s) and store them into setX
    setX = Sequences.readSeq(seq, "X")

    # Perform the Viterbi algorithm on the first sequence of the set setX
    # and store the viterbi matrix in the variable vi and the back trace matrix in variable backTrace
    (vi, backTrace, probability) = Viterbi.viterbi(setX[0])

    # Print the output matrices of Viterbi algorithm
    Viterbi.writePathMatrix(vi, setX[0], "output/ViterbiMatrix.tsv")
    Viterbi.writePathMatrix(backTrace, setX[0], "output/BackTraceMatrix.tsv")
    #Print the vitervi path instead of the vitervi backtrace
    with open('ViterbiPath.txt', 'w') as text_file:
        print('Path: {}.'.format(Viterbi.generateStateSeq(backTrace, setX[0])),

    # Print the most likely sequence path of x according to the viterbi algorithm
    print('Most likely state is {0}, with probability of {1}'.format(
        ''.join(Viterbi.generateStateSeq(backTrace, setX[0])), probability))
    for seq in ["aactgcacatgcggcgcgcccgcgctaat", "gggcgcgggcgccccgcg"]:
        # NB. Book and Lio's notes use integrated transition and initial
        # distribution matrix (initial step is transition from dummy state 0)
        # This is confusing, so I will separate them out.
        # Wiki has non-integrated Viterbi algorithm implementation

        # 1.1. Implement Forward algorithm
        fwd = Forward(TransitionP, EmissionP, InitialP)
        p = fwd.prob(seq)
        print "**************************************"
        print "Probability of", seq, ":", p
        print "Log probability:", -log(p)
        print "**************************************"

        # 1.2. Implement Viterbi algorithm
        vtb = Viterbi(TransitionP, EmissionP, InitialP)
        (prob, path) = vtb.maxSeq(seq)
        print "**************************************"
        print "Viterbi path:"
        print "P =", prob
        print seq
        print ''.join(str(i) for i in path)
        print "**************************************"

        # 1.3. Length distribution
        # Suppose we have a string of only G-C (with equal emission probili-
        # ties for each state).
        # Once HMM enters state 1 (detect G-C islands), modify the probability
        # of going out of this state to 1/200, and staying to 199/200. Then on
        # average HMM will stay in that state for 200 characters.
    print("P0 + P1 + P2 = %0.8f" % (P0 + P1 + P2))

    print(25 * "-")
    P0 = H.calculate(a={L + 1: 0}, signal=S)
    P1 = H.calculate(a={L + 1: 1}, signal=S)
    P2 = H.calculate(a={L + 1: 2}, signal=S)
    print("P0 = P(x%s=0 | S%s) = %0.8f" % (L + 1, L, P0))
    print("P1 = P(x%s=1 | S%s) = %0.8f" % (L + 1, L, P1))
    print("P2 = P(x%s=2 | S%s) = %0.8f" % (L + 1, L, P2))
    print("P0 + P1 + P2 = %0.8f" % (P0 + P1 + P2))

    print(25 * "-")
    P0 = H.calculate(a={L + 1: 0}, signal=S, ameans='signal')
    P1 = H.calculate(a={L + 1: 1}, signal=S, ameans='signal')
    P2 = H.calculate(a={L + 1: 2}, signal=S, ameans='signal')
    print("P0 = P(s%s=0 | S%s) = %0.8f" % (L + 1, L, P0))
    print("P1 = P(s%s=1 | S%s) = %0.8f" % (L + 1, L, P1))
    print("P2 = P(s%s=2 | S%s) = %0.8f" % (L + 1, L, P2))
    print("P0 + P1 + P2 = %0.8f" % (P0 + P1 + P2))

    print(25 * "-")
    print("P(S%s) = %0.8f" % (L, H.calculate(signal=S)))

    # Viterbi
    print(25 * "-")
    print("Viterbi algorithm")
    X, P = v.viterbi(A, B, p, S)
    print("Argmax P(X | S) = ", X, ". P = %0.8f" % P)
文件: 项目: elnygren/el-HMM
    u"αββαααβα": u"AHAAAHAA"

states = (u'α', u'β')
observations = ('A', 'H', 'H', 'A', 'A')

start_probability = HMM.start_P(dna, states)
transition_probability = HMM.transition_P(dna, states)
emission_probability = HMM.emission_P(dna, states)

posteriors = ForwardBackward.forward_backward(observations, states,

viterbi = Viterbi.viterbi(observations, states, start_probability,
                          transition_probability, emission_probability)

print 'Probabilities:'
print start_probability
print transition_probability
print emission_probability

print 'Posteriors:'
for line in posteriors:
    print u'α: ' + str("%.4f" % line[u'α']) + u'   ' + u'β: ' + str(
        "%.4f" % line[u'β'])

print 'Viterbi:'
for item in viterbi[1]:
    print item,
            HMM.array_A[line_state[j]][line_state[j+1]] += 1  #array_A计算状态转移概率
        for p in range(len(line_state)):
            HMM.count_dic[line_state[p]] += 1  # 记录每一个状态的出现次数
            for state in HMM.STATES:
                if word_list[p] not in HMM.array_B[state]:
                    HMM.array_B[state][word_list[p]] = 0.0  #保证每个字都在STATES的字典中
            # if word_list[p] not in array_B[line_state[p]]:
            #     # print(word_list[p])
            #     array_B[line_state[p]][word_list[p]] = 0
            # else:
            HMM.array_B[line_state[p]][word_list[p]] += 1  # array_B用于计算发射概率
    HMM.Prob_Array()    #对概率取对数保证精度
    output = ''
    for line in testSet:
        line = line.strip()
        tag = Viterbi.Viterbi(line, HMM.array_Pi, HMM.array_A, HMM.array_B)
        # print(tag)
        seg = wordSplit.tag_seg(line, tag)
        # print(seg)
        list = ''
        for i in range(len(seg)):
            list = list + seg[i] + ' '
        # print(list)
        output = output + list + '\n'
    outputfile = open('output.txt', mode='w', encoding='utf-8')
        #calculate state and path probabilities
        for i in range(0, self.len_a):
            # Smoothing to prevent divide by zero error
            if (0 in self.paths[i]):
                self.paths[i] = [a + 1 for a in self.paths[i]]
            sum_paths = self.paths[i].sum()

            if (0 in self.states[i]):
                self.states[i] = [a + 1 for a in self.states[i]]
            sum_states = self.states[i].sum()

            for j in range(0, self.len_a):
                self.prob_paths[i][j] = float(self.paths[i][j]) / sum_paths
                self.prob_states[i][j] = float(self.states[i][j]) / sum_states

#percentage corruption:
percentages = [0.1, 0.2]
#create wordcorrector class object
wc = WordCorrector()
#call model
for percent in percentages:
    print "Results for corruption percentage: ", percent

    #create viterbi object
    viterbi = Viterbi.Viterbi(wc.prob_states, wc.prob_paths, wc.c_test_data)
    #invoke the execution process of viterbi
            if (0 in self.Eis[i]):
                self.Eis[i] = [x + 1 for x in self.Eis[i]]
            sum = self.Eis[i].sum()
            # print self.alphabets[i], sum
            for j in range(0, len(self.alphabets)):
                self.probEis[i][j] = float(self.Eis[i][j]) / sum

    def getEmissionProbabilities(self):
        return self.probEis

    def getTransitionProbabilities(self):
        return self.probAij

    def trainHMModel(self):
        # Corrupt the text splited for training set and test set
        self.corruptedTrainingSet = self.corruptText(self.trainingSet, True)
        # Calculate the probability for transition from state i to state j
        self.corruptedTestSet = self.corruptText(self.testSet, False)


objSC = SpellingCorrection()
objViterbi = Viterbi.Viterbi(objSC.getEmissionProbabilities(),

    # Calculate Start Bit Probability
    for i in range(16):
        s_sum = 0
        for j in range(SEQS):
            if (rx[j][0] == i):
                s_sum += 1
        s_bits[i] = s_sum/SEQS

    # Calculate State Transition Probabilities
    for i in range(l-1):
        for j in range(SEQS):
            a = rx[j][i]
            b = rx[j][i+1]
            tran[i][a][b] += round(1/SEQS,6)

    ## Find most probable path
    # Find the most probable starting path
    mx = max(s_bits)
    start = 0
    for i in range(len(s_bits)):
        if (s_bits[i] == mx):
            start = i

    path = Viterbi.viterbi_pathfind(rx,1)

    print('-'*(l+2) + '\n' + '-'*(l+2))
    print('  Result: ' + arr_2_hex(path))
    print('Original: ' + arr_2_hex(seq))
class ProcessingFile:

    # Reading PrideAndPrejudiceChapter3.txt and storing each of its line
    #  into a list(sentences)
    fp = open("PrideAndPrejudiceChapter3.txt")
    sentences = fp.readlines()

    # Joins the extracted words and inserts spaces at
    #  the correct locations.
    # Writes the formatted text into an output file
    #  after the output is encoded into UTF8 format
    f = open("opFile.txt", "w+")
    for line in HiddenStates.findingHiddenStates(FormatText.removeSpaces(sentences)):
        for x in line[0]:
            utf8string = x.encode("UTF-8")
            if utf8string == "." or utf8string == "," or utf8string == "-" \
            or utf8string == '"':
      , os.SEEK_CUR) 
            elif utf8string == "\n":
            f.write(utf8string + " ")

    # Comparing the original text with the original using the proposed comparison
    #   technique
    ratio = FormatText.percentageMatching(FormatText.getOriginalText(sentences),

    # Reading the output file
    opFile = open("opFile.txt")
    sentences2 = opFile.readlines()

    # Checking the percentage accuracy of our output file after applying Viterbi
    ratio2 = FormatText.percentageMatching(FormatText.getOriginalText(sentences),
    # Check the accuracy ratio of the output file versus original text
    #   considering the cosine similarity of the words in the original text
    cosSimilarity = accuracyChecks.cosineSimilarity\
        ('PrideAndPrejudiceChapter3.txt', 'opFile.txt')

    # Returns the ratio of the spaces wrong placed in the output file
    #  to the placement of spaces in the original text
    spacesError = accuracyChecks.calculateSpaces\
        ('PrideAndPrejudiceChapter3.txt', 'opFile.txt')

    print "---=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=---"
    print 'ratiocheck : ', ratio
    print 'ratiocheck2 : ', ratio2
    print 'cosine similarity : ', cosSimilarity
    print 'spaces error percentage : ', spacesError

    applyVertibi2 = Viterbi.viterbiAlgorithm('Iamfast')
    applyVertibi3 = Viterbi.viterbiAlgorithm('Letusmeetafternoon')
    # applyVertibi = Viterbi.viterbi_segment('itseasyformetosplitlongruntogetherblocks')
    # print applyVertibi
    # print applyVertibi
    print applyVertibi3
    # print Virtebi.word_prob("therefore")
    # print Virtebi.word_prob("attacked")
    # print Virtebi.word_prob("proudest")
    # print len(Virtebi.dictionary)
    # print applyVertibi3
import numpy as np
import math
import copy
from Viterbi import *

network_type = 'original'
predictions = './predictions/prediction_' + network_type + '_prob.csv'
actual = './predictions/actual_' + network_type + '_prob.csv'

states = [
numOfAct = len(states)

v = Viterbi(states)
actual_labels = v.load_actual_labels(actual)

#transMatrix={'STANDING': {'STANDING': 82.0, 'BENDING': 3.0, 'WALKING': 7.0, 'CYCLING (SITTING)': 1.0, 'SITTING': 1.0, 'CYCLING (STANDING)': 1.0, 'RUNNING': 2.0, 'STAIRS (UP)': 1.0, 'STAIRS (DOWN)': 1.0, 'LYING': 1.0},
#	'BENDING': {'STANDING': 23.0, 'BENDING': 69.0, 'WALKING': 1.0, 'CYCLING (SITTING)': 1.0, 'SITTING': 1.0, 'CYCLING (STANDING)': 1.0, 'RUNNING': 1.0, 'STAIRS (UP)': 1.0, 'STAIRS (DOWN)': 1.0, 'LYING': 1.0},
#	'WALKING': {'STANDING': 14.0, 'BENDING': 1.0, 'WALKING': 78.0, 'CYCLING (SITTING)': 1.0, 'SITTING': 1.0, 'CYCLING (STANDING)': 1.0, 'RUNNING': 1.0, 'STAIRS (UP)': 1.0, 'STAIRS (DOWN)': 1.0, 'LYING': 1.0},
#	'CYCLING (SITTING)': {'STANDING': 1.0, 'BENDING': 1.0, 'WALKING':1.0, 'CYCLING (SITTING)': 89.0, 'SITTING': 3.0, 'CYCLING (STANDING)': 1.0, 'RUNNING': 1.0, 'STAIRS (UP)': 1.0, 'STAIRS (DOWN)': 1.0, 'LYING': 1.0},
#	'SITTING': {'STANDING': 1.0, 'BENDING': 1.0, 'WALKING': 1.0, 'CYCLING (SITTING)': 1.0, 'SITTING': 91.0, 'CYCLING (STANDING)': 1.0, 'RUNNING': 1.0, 'STAIRS (UP)': 1.0, 'STAIRS (DOWN)': 1.0, 'LYING': 1.0},
#	'CYCLING (STANDING)': {'STANDING': 1.0, 'BENDING': 1.0, 'WALKING': 1.0, 'CYCLING (SITTING)': 1.0, 'SITTING': 1.0, 'CYCLING (STANDING)': 91.0, 'RUNNING': 1.0, 'STAIRS (UP)': 1.0, 'STAIRS (DOWN)': 1.0, 'LYING': 1.0},
#	'RUNNING': {'STANDING': 2.0, 'BENDING': 1.0, 'WALKING': 6.0, 'CYCLING (SITTING)': 1.0, 'SITTING': 1.0, 'CYCLING (STANDING)': 1.0, 'RUNNING': 85.0, 'STAIRS (UP)': 1.0, 'STAIRS (DOWN)': 1.0, 'LYING': 1.0},
#	'STAIRS (UP)': {'STANDING': 1.0, 'BENDING': 1.0, 'WALKING': 1.0, 'CYCLING (SITTING)': 1.0, 'SITTING': 1.0, 'CYCLING (STANDING)': 1.0, 'RUNNING': 1.0, 'STAIRS (UP)': 91.0, 'STAIRS (DOWN)': 1.0, 'LYING': 1.0},
#	'STAIRS (DOWN)': {'STANDING': 1.0, 'BENDING': 1.0, 'WALKING': 1.0, 'CYCLING (SITTING)': 1.0, 'SITTING': 1.0, 'CYCLING (STANDING)': 1.0, 'RUNNING': 1.0, 'STAIRS (UP)': 1.0, 'STAIRS (DOWN)': 91.0, 'LYING': 1.0},
#	'LYING': {'STANDING': 1.0, 'BENDING': 1.0, 'WALKING': 1.0, 'CYCLING (SITTING)': 1.0, 'SITTING': 1.0, 'CYCLING (STANDING)': 1.0, 'RUNNING': 1.0, 'STAIRS (UP)': 1.0, 'STAIRS (DOWN)': 1.0, 'LYING': 91.0}}
states = [
    "STAIRS (UP)",
    "STAIRS (DOWN)",
numOfAct = len(states)

v = Viterbi(states)
actual_labels = v.load_actual_labels(actual)

