def test_get_likelihood(self): query = Suggestion(['foo']) suggestion = Suggestion(['bar']) expected = phrase.error_penalization * -( phrase.get_edits(suggestion[0], query[0])[0] / len(str(query))) self.assertAlmostEqual(phrase.get_likelihood(query, suggestion), expected, 2) # phrase with splits joined up split_str = 'fo o bar roc ks' joined_str = 'foo bar rocks' query = Suggestion(suggestion_str=split_str) suggestion = Suggestion(suggestion_str=joined_str) expected = phrase.error_penalization * -(2 * phrase.space_edit_cost / len(split_str)) self.assertAlmostEqual(phrase.get_likelihood(query, suggestion), expected) # phrases with joins split up split_str = 'fo o bar roc ks' joined_str = 'foo bar rocks' query = Suggestion(suggestion_str=joined_str) suggestion = Suggestion(suggestion_str=split_str) expected = phrase.error_penalization * -(2 * phrase.space_edit_cost / len(joined_str)) self.assertAlmostEqual(phrase.get_likelihood(query, suggestion), expected) # phrases with splits and errors split_str = 'fo o bcr rxc ks' joined_str = 'foo bar rocks' query = Suggestion(suggestion_str=split_str) suggestion = Suggestion(suggestion_str=joined_str) edit_distance = (phrase.get_edits(''.join(joined_str.split()), ''.join(split_str.split()))[0]\ + 2 * phrase.space_edit_cost) expected = phrase.error_penalization * -(edit_distance / len(str(split_str))) self.assertAlmostEqual(phrase.get_likelihood(query, suggestion), expected)
def test_get_likelihood(self): query = Suggestion(['foo']) suggestion = Suggestion(['bar']) expected = phrase.error_penalization * -(phrase.get_edits(suggestion[0], query[0])[0] / len(str(query))) self.assertAlmostEqual(phrase.get_likelihood(query, suggestion), expected, 2) # phrase with splits joined up split_str = 'fo o bar roc ks' joined_str = 'foo bar rocks' query = Suggestion(suggestion_str = split_str) suggestion = Suggestion(suggestion_str = joined_str) expected = phrase.error_penalization * -(2 * phrase.space_edit_cost / len(split_str)) self.assertAlmostEqual(phrase.get_likelihood(query, suggestion), expected) # phrases with joins split up split_str = 'fo o bar roc ks' joined_str = 'foo bar rocks' query = Suggestion(suggestion_str = joined_str) suggestion = Suggestion(suggestion_str = split_str) expected = phrase.error_penalization * -(2 * phrase.space_edit_cost / len(joined_str)) self.assertAlmostEqual(phrase.get_likelihood(query, suggestion), expected) # phrases with splits and errors split_str = 'fo o bcr rxc ks' joined_str = 'foo bar rocks' query = Suggestion(suggestion_str = split_str) suggestion = Suggestion(suggestion_str = joined_str) edit_distance = (phrase.get_edits(''.join(joined_str.split()), ''.join(split_str.split()))[0]\ + 2 * phrase.space_edit_cost) expected = phrase.error_penalization * -(edit_distance / len(str(split_str))) self.assertAlmostEqual(phrase.get_likelihood(query, suggestion), expected)
def test_get_edits(self): word1 = 'foo' word2 = 'bar' expected = (0.61411943319838058, 'sfb soa do ir') self.assertEqual(phrase.get_edits(word1, word2), expected)