Пример #1
0
def correctWord_sc(input_word):
    input_word = input_word.upper()
    start = time.time()
    cands = trie.search(input_word, 4)
    # print "Got results from trie" + str(len(cands))
    edit = {}
    prob = {}
    for i in cands:
        dis, pro = nc.editDistance(input_word, i[0])
        if dis < 4:
            edit[i[0]] = dis
            prob[(i[0], pro, prior[i[0]])] = pro * prior[i[0]]

            # print "Got the probs"
    sorted_x = sorted(prob.items(), key=operator.itemgetter(1), reverse=True)
    end = time.time()
    # print input_word + '\t',
    normalization = 0
    for i in range(min(10, len(sorted_x))):
        normalization += sorted_x[i][1]

    output_dict = {}
    for i in range(min(10, len(sorted_x))):
        output_dict[sorted_x[i][0][0]] = sorted_x[i][1] / normalization
    return output_dict
Пример #2
0
		        
		if input_ph[1]!= '' and word_ph[0]!='':
		    phonetic_val = lev.distance(input_ph[1],word_ph[0])
		    if maxphval<phonetic_val:
		        maxphval = phonetic_val
		        
		if input_ph[1]!= '' and word_ph[1]!= '':
		    phonetic_val = lev.distance(input_ph[1],word_ph[1])
		    if maxphval<phonetic_val:
		        maxphval = phonetic_val
		#print(maxphval)
		if maxphval==-1:
		    maxphval=1
		maxphval = pow(10,-3*maxphval)
		
		dis, pro = nc.editDistance(input, i[0])
		
		if dis < 4:
			edit[i[0]] = dis
			prob[(i[0], pro, prior[i[0]])] = pro * prior[i[0]] * maxphval

	# print "Got the probs"
	sorted_x = sorted(prob.items(), key=operator.itemgetter(1), reverse = True)
	end = time.time()
	print input + '\t',
	normalization = 0
	for i in range(min(10, len(sorted_x))):
		normalization += sorted_x[i][1]

	for i in range(min(10, len(sorted_x))):
		print sorted_x[i][0][0] + '\t' + str(sorted_x[i][1]/ normalization) + '\t',