def calculate_posterior(sentence, label):
    posterior_prob = 1.0
    for index in range(0, len(sentence)):

        posterior_prob *= Probabilities.get_posterior_word_probability(sentence[index], label[index])

        if index == 0:
            posterior_prob *= Probabilities.get_first_speech_prob(label[index])
            posterior_prob *= Probabilities.get_transition_prob(label[index], label[index - 1])

    return log(posterior_prob)
Пример #2
def get_part_of_speech(sentence):
    # print result_cache.results
    result_cache.results = [result_cache.naive_result] + [result_cache.max_marginal] + [result_cache.viterbi_result]
    Probabilities.convert_algo_results(result_cache.results, sentence)
    pos = []
    previous_word = ""
    for index in range(len(sentence)):
        best_prob = - sys.maxint

        for speech in Probabilities.speech_prob.keys():
            word = sentence[index]
            prob = Probabilities.get_word_probability(word, speech)
            if index == 0:
                next_word = sentence[index + 1]
                prob *= Probabilities.get_next_word_speech_probability(next_word,
                                                                       speech) * Probabilities.get_first_speech_prob(
            elif len(sentence) == 1:
                prob *= Probabilities.get_first_speech_prob(speech)
            elif index == len(sentence) - 1:
                prob *= Probabilities.get_transition_prob(speech,
                                                          previous_speech) * Probabilities.get_prev_word_speech_probability(
                    previous_word, speech)
                next_word = sentence[index + 1]

                prob *= Probabilities.get_transition_prob(speech,
                                                          previous_speech) * Probabilities.get_prev_word_speech_probability(
                    previous_word, speech) * Probabilities.get_next_word_speech_probability(next_word, speech)

            previous_speech = speech
            previous_word = word

            if best_prob < prob:
                best_prob = prob
                best_speech = speech

        pos.insert(index, best_speech)

    return pos
def calculate_posterior(sentence, label):
    posterior_prob = 1.0
    for index in range(0, len(sentence)):

        posterior_prob *= Probabilities.get_posterior_word_probability(
            sentence[index], label[index])

        if index == 0:
            posterior_prob *= Probabilities.get_first_speech_prob(label[index])
            posterior_prob *= Probabilities.get_transition_prob(
                label[index], label[index - 1])

    return log(posterior_prob)
def get_part_of_speech(sentense):
    # all the speeches from the train data
    speeches = Probabilities.speech_prob.keys()
    no_words = len(sentense)
    no_speech = len(speeches)

    # Holds backtrack path for trace back
    back_tracks = [[0] * (no_words + 1) for x in range(no_speech)]

    # maximum probabilities for each speech at each level
    max_probabilities = [[0] * (no_words + 1) for x in range(no_speech)]

    # initial probability calculation for first word
    first_word = sentense[0]

    # basic step
    for i in range(0, no_speech):
        max_probabilities[i][0] = Probabilities.get_first_speech_prob(
            speeches[i]) * Probabilities.get_word_probability(
                first_word, speeches[i])
        back_tracks[i][0] = ""

    # recursive step
    for word_index in range(1, no_words):
        curr_word = sentense[word_index]
        for tag_index in range(0, no_speech):
            arg_max = -sys.maxint
            arg_bt = ""
            max_total_prob = -sys.maxint
            curr_tag = speeches[tag_index]
            word_prob = Probabilities.get_word_probability(curr_word, curr_tag)
            for prev_tag_index in range(0, no_speech):
                prev_tag = speeches[prev_tag_index]

                # calculate transition probability
                transition_prob = Probabilities.get_transition_prob(
                    prev_tag) * max_probabilities[prev_tag_index][word_index -

                if transition_prob > arg_max:
                    arg_max = transition_prob
                    arg_bt = prev_tag

                # total probability
                total_prob = transition_prob * word_prob

                if total_prob > max_total_prob:
                    max_total_prob = total_prob

            back_tracks[tag_index][word_index] = arg_bt
            max_probabilities[tag_index][word_index] = max_total_prob

    # terminal step, calculate speech for last word
    max_probabilities[no_speech - 1][no_words] = -1

    for i in range(0, no_speech):
        tag = speeches[i]
        last_prob = max_probabilities[i][
            no_words - 1] * Probabilities.get_last_speech_prob(tag)

        if max_probabilities[no_speech - 1][no_words] < last_prob:
            max_probabilities[no_speech - 1][no_words] = last_prob
            back_tracks[no_speech - 1][no_words] = tag

    # backtrack, get best path
    last_tag = back_tracks[no_speech - 1][no_words]

    solution = [last_tag]

    for word_index in range(no_words - 1, 0, -1):
        prev_tag_index = speeches.index(last_tag)
        last_tag = back_tracks[prev_tag_index][word_index]

    # Due to backtrack it will be in reverse, change it

    # store result in result cache for future use
    result_cache.viterbi_result = solution[:]

    return [[solution], []]
Пример #5
def get_samples(sentence, sample_count):
    ''' Gibbs sampler
    1. Generate initial samples, i.e. Assign the sentence random Parts of Speech [uniform or random or EM, not sure]
    1.1 Optional Burn-in or thinning?, throw away few samples. [TO-DO]
    Repeat sample_count times
        2. Pick the last sample, x[t] <- x[t-1]
        3. Repeat the following steps for all unobserved/non-evidence words
        4. For each word picked, sample it by calculating the posterior probability keeping all other variable as evidence
    5. Add to sample list
    sampleMap = Counter()
    samples = []
    # step 1 - generate initial samples
    initSample = generateInitSamples(sentence)
    previousSample = initSample

    for i in range(1, Constants.gibbs_max_iteration):
        # step 2 - pick last sample
        modifiedSample = previousSample
        # step 3
        for j in range(0, len(sentence)):
            speechTags = []
            probWeights = []
            sumProbWeights = 0
            # step 4, calculate the posterior probability
            for speech in Probabilities.speech_prob.keys():
                word_prob = Probabilities.get_word_probability(sentence[j], speech)

                if len(sentence) == 1:
                    prob1 = Probabilities.get_first_speech_prob(speech)
                    prob = word_prob * prob1
                elif j == 0:  # first word, nothing prior
                    prob1 = Probabilities.get_first_speech_prob(speech) * Probabilities.get_transition_prob(
                        modifiedSample[j + 1], speech)
                    prob = word_prob * prob1
                elif j == len(sentence) - 1:
                    prob1 = Probabilities.get_transition_prob(speech, modifiedSample[j - 1])
                    prob = word_prob * prob1
                    prob1 = Probabilities.get_transition_prob(speech, modifiedSample[
                        j - 1]) * Probabilities.get_transition_prob(modifiedSample[j + 1], speech)
                    prob = word_prob * prob1
                sumProbWeights += prob

            probWeights = [x / sumProbWeights for x in probWeights]

            # cummulative sum
            cumsum = 0
            randomWeight = random()
            for i in range(len(probWeights)):
                cumsum += probWeights[i]
                probWeights[i] = cumsum

            randomIndex = -1
            for i in range(1, len(probWeights)):
                if probWeights[i] >= randomWeight and randomWeight >= probWeights[i - 1]:
                    randomIndex = i

            if randomIndex == -1:
                randomIndex = 0

            modifiedSample[j] = speechTags[randomIndex]

        previousSample = modifiedSample[:]
        sampleMap['_'.join(previousSample)] += 1

    samples = samples[Constants.gibbs_burn_in_count:]
    return samples, sampleMap
def get_part_of_speech(sentense):
    # all the speeches from the train data
    speeches = Probabilities.speech_prob.keys()
    no_words = len(sentense)
    no_speech = len(speeches)

    # Holds backtrack path for trace back
    back_tracks = [[0] * (no_words + 1) for x in range(no_speech)]

    # maximum probabilities for each speech at each level
    max_probabilities = [[0] * (no_words + 1) for x in range(no_speech)]

    # initial probability calculation for first word
    first_word = sentense[0]

    # basic step
    for i in range(0, no_speech):
        max_probabilities[i][0] = Probabilities.get_first_speech_prob(speeches[i]) * Probabilities.get_word_probability(
            first_word, speeches[i]
        back_tracks[i][0] = ""

    # recursive step
    for word_index in range(1, no_words):
        curr_word = sentense[word_index]
        for tag_index in range(0, no_speech):
            arg_max = -sys.maxint
            arg_bt = ""
            max_total_prob = -sys.maxint
            curr_tag = speeches[tag_index]
            word_prob = Probabilities.get_word_probability(curr_word, curr_tag)
            for prev_tag_index in range(0, no_speech):
                prev_tag = speeches[prev_tag_index]

                # calculate transition probability
                transition_prob = (
                    Probabilities.get_transition_prob(curr_tag, prev_tag)
                    * max_probabilities[prev_tag_index][word_index - 1]

                if transition_prob > arg_max:
                    arg_max = transition_prob
                    arg_bt = prev_tag

                # total probability
                total_prob = transition_prob * word_prob

                if total_prob > max_total_prob:
                    max_total_prob = total_prob

            back_tracks[tag_index][word_index] = arg_bt
            max_probabilities[tag_index][word_index] = max_total_prob

    # terminal step, calculate speech for last word
    max_probabilities[no_speech - 1][no_words] = -1

    for i in range(0, no_speech):
        tag = speeches[i]
        last_prob = max_probabilities[i][no_words - 1] * Probabilities.get_last_speech_prob(tag)

        if max_probabilities[no_speech - 1][no_words] < last_prob:
            max_probabilities[no_speech - 1][no_words] = last_prob
            back_tracks[no_speech - 1][no_words] = tag

    # backtrack, get best path
    last_tag = back_tracks[no_speech - 1][no_words]

    solution = [last_tag]

    for word_index in range(no_words - 1, 0, -1):
        prev_tag_index = speeches.index(last_tag)
        last_tag = back_tracks[prev_tag_index][word_index]

    # Due to backtrack it will be in reverse, change it

    # store result in result cache for future use
    result_cache.viterbi_result = solution[:]

    return [[solution], []]