Пример #1
0
def findMatch(line):
    words = phrases.splitWord(line)
    alphabets = phrases.get_english_phrase(words)
    nonalphabets = phrases.generate_katakana_phrase(words)
    romanized = []
    for nonalphabet in nonalphabets:
        romanized.append(to_romaji(nonalphabet))
    dim = (len(alphabets), len(romanized))
    similarity = numpy.zeros(dim)
    for i in range(len(alphabets)):
        for j in range(len(romanized)):
            similarity[i][j] = Levenshtein.distance(
                alphabets[i], romanized[j]) / (
                    float(min(len(alphabets[i]), len(romanized[j]))) + 1)
    ans = []
    # if dim[1] > 0:
    # 	for i in range(dim[0]):
    # 		#if min(similarity[i,:]) < 0.5:
    # 		j = numpy.argmin(similarity[i,:])
    # 		ans.append((alphabets[i],nonalphabets[j], similarity[i][j], line))
    # return ans
    for i in range(min(dim[0], dim[1])):
        row_index = similarity.argmin() / similarity.shape[1]
        col_index = similarity.argmin() % similarity.shape[1]
        ans.append((alphabets[row_index], nonalphabets[col_index],
                    similarity[row_index, col_index], line))
        del alphabets[row_index]
        del nonalphabets[col_index]
        similarity = numpy.delete(similarity, row_index, 0)
        similarity = numpy.delete(similarity, col_index, 1)
    return ans
def findMatch(line):
	words = phrases.splitWord(line)
	alphabets = phrases.get_english_phrase(words)
	nonalphabets = phrases.generate_katakana_phrase(words)
	romanized = []
	for nonalphabet in nonalphabets:
		romanized.append(to_romaji(nonalphabet))
	dim = (len(alphabets), len(romanized))
	similarity = numpy.zeros(dim)
	for i in range(len(alphabets)):
		for j in range(len(romanized)):
			similarity[i][j] = Levenshtein.distance(alphabets[i],romanized[j]) / (float(min(len(alphabets[i]), len(romanized[j])))+1)
	ans = []
	# if dim[1] > 0:
	# 	for i in range(dim[0]):
	# 		#if min(similarity[i,:]) < 0.5:
	# 		j = numpy.argmin(similarity[i,:])
	# 		ans.append((alphabets[i],nonalphabets[j], similarity[i][j], line))
	# return ans
	for i in range(min(dim[0], dim[1])):
		row_index = similarity.argmin() / similarity.shape[1]
		col_index = similarity.argmin() % similarity.shape[1]
		ans.append((alphabets[row_index],nonalphabets[col_index], similarity[row_index,col_index],line))
		del alphabets[row_index]
		del nonalphabets[col_index]
		similarity = numpy.delete(similarity, row_index, 0)
		similarity = numpy.delete(similarity, col_index, 1)
	return ans
Пример #3
0
def findMatch(line):
	words = phrases.splitWord(line)
	alphabets = phrases.get_english_phrase(words)
	nonalphabets = phrases.generate_katakana_phrase(words)
	romanized = []
	for nonalphabet in nonalphabets:
		romanized.append(romkan.to_roma(nonalphabet))
	dim = (len(alphabets), len(romanized))
	similarity = numpy.zeros(dim)
	for i in range(len(alphabets)):
		for j in range(len(romanized)):
			similarity[i][j] = distance.euclidean(vectorize(alphabets[i]),vectorize(romanized[j]))
	ans = []
	if dim[1] > 0:
		for i in range(dim[0]):
			if min(similarity[i,:]) < 0.5:
				j = numpy.argmin(similarity[i,:])
				ans.append((alphabets[i],nonalphabets[j], line))
	return ans
Пример #4
0
def findMatch(line):
    words = phrases.splitWord(line)
    alphabets = phrases.get_english_phrase(words)
    nonalphabets = phrases.generate_katakana_phrase(words)
    romanized = []
    for nonalphabet in nonalphabets:
        romanized.append(romkan.to_roma(nonalphabet))
    dim = (len(alphabets), len(romanized))
    similarity = numpy.zeros(dim)
    for i in range(len(alphabets)):
        for j in range(len(romanized)):
            similarity[i][j] = distance.euclidean(vectorize(alphabets[i]),
                                                  vectorize(romanized[j]))
    ans = []
    if dim[1] > 0:
        for i in range(dim[0]):
            if min(similarity[i, :]) < 0.5:
                j = numpy.argmin(similarity[i, :])
                ans.append((alphabets[i], nonalphabets[j], line))
    return ans