Exemplo n.º 1
0
def laplace_interpolation():
    op = open("English.txt", "r")
    # print tokendictionary_tri
    for line in op.readlines():
        prob = 1
        prob_add = 0
        [new_token, tokendictionary1] = add_tri(line)
        # print new_token ,'\n'
        for tokens_tri in new_token:
            token_parts = tokens_tri.split(" ")
            eq_bi = token_parts[0] + " " + token_parts[1]
            try:
                prob = prob * float((trigram_dict[tokens_tri]) / (float(bigram_dict[eq_bi]) + float(unigram_len)))
            except KeyError:
                try:
                    prob = prob * float((1) / (float(bigram_dict[eq_bi]) + float(unigram_len)))
                except KeyError:
                    prob = prob * float((1) / float(unigram_len))
                    # print prob
        prob_add += 0.5 * prob
        prob = 1
        [new_token, tokendictionary1] = add_bi(line)
        # print new_token ,'\n'
        # break
        for tokens_bi in new_token:
            token_parts = tokens_bi.split(" ")
            eq_uni = token_parts[0]
            try:
                prob = prob * float((bigram_dict[tokens_bi]) / (float(unigram_dict[eq_uni]) + float(unigram_len)))
            except KeyError:
                try:
                    prob = prob * float((1) / (float(unigram_dict[eq_uni]) + float(unigram_len)))
                except KeyError:
                    prob = prob * float((1) / float(unigram_len))
                    # print prob
        prob_add += 0.3 * prob
        prob = 1
        [new_token, tokendictionary1] = add_uni(line)
        # print new_token ,'\n'
        # break
        for tokens_uni in new_token:
            # token_parts = tokens_bi.split(' ')
            # eq_uni = token_parts[0]
            try:
                prob = prob * float((unigram_dict[tokens_uni]) / (2 * float(unigram_len)))
            except KeyError:
                prob = prob * float((1) / (2 * float(unigram_len)))

        prob_add += 0.2 * prob
        prob = 1
        print prob_add
Exemplo n.º 2
0
def good_turing_interpolation():
    bins_tri = {}
    bins_bi = {}
    bins_uni = {}
    for key, value in sorted(trigram_dict.iteritems()):
        if value in bins_tri:
            bins_tri[value] += 1
        else:
            bins_tri[value] = 1
            # print bins_tri
    for key, value in sorted(bigram_dict.iteritems()):
        if value in bins_bi:
            bins_bi[value] += 1
        else:
            bins_bi[value] = 1

    for key, value in sorted(unigram_dict.iteritems()):
        if value in bins_uni:
            bins_uni[value] += 1
        else:
            bins_uni[value] = 1

    op = open("English.txt", "r")
    for line in op.readlines():
        prob = 1
        prob_add = 0
        [new_token, tokendictionary1] = add_tri(line)
        for tokens_tri in new_token:
            prob = prob * float(calculate_good_next(bins_tri, tokens_tri, trigram_len, trigram_dict))
            # print prob
        prob_add = 0.5 * prob
        prob = 1
        [new_token, tokendictionary1] = add_bi(line)
        for tokens_bi in new_token:
            prob = prob * float(calculate_good_next(bins_bi, tokens_bi, bigram_len, bigram_dict))
            # print prob
        prob_add += 0.3 * prob
        prob = 1
        [new_token, tokendictionary1] = add_uni(line)
        for tokens_uni in new_token:
            prob = prob * float(calculate_good_next(bins_uni, tokens_uni, unigram_len, unigram_dict))
        prob_add += 0.2 * prob
        print prob_add