def test_single_step_generalization(self): postcode_1 = 'HP2 7PW' postcode_2 = 'HP2 7PF' generalized_1 = reduce_string(postcode_1) generalized_2 = reduce_string(postcode_2) self.assertNotEqual(postcode_1, postcode_2) self.assertEqual(generalized_1, generalized_2)
def test_multistep_generalization(self): postcode_1 = 'HP2 7PW' postcode_2 = 'HP2 4DY' number_of_generalization_steps = 0 while (postcode_1 != postcode_2): if (len(postcode_1) > len(postcode_2)): postcode_1 = reduce_string(postcode_1) else: postcode_2 = reduce_string(postcode_2) number_of_generalization_steps = number_of_generalization_steps + 1 self.assertEqual(postcode_1, postcode_2) self.assertEqual(number_of_generalization_steps, 6)
def test_total_generalization(self): postcode_1 = 'HP2 7PW' postcode_2 = 'CF470JD' number_of_generalization_steps = 0 while (postcode_1 != postcode_2): if (len(postcode_1) > len(postcode_2)): postcode_1 = reduce_string(postcode_1) else: postcode_2 = reduce_string(postcode_2) number_of_generalization_steps = number_of_generalization_steps + 1 self.assertEqual(postcode_1, postcode_2) self.assertEqual(number_of_generalization_steps, 14) self.assertEqual(postcode_1, '*')
def recode_strings(series): """ Generalizes a series of strings by stepwise reduction of strings Parameters ---------- series: Series Series to be recoded. Returns ------- str Single value recoded to. """ values = set(series.unique()) while len(values) > 1: longest_element = next_string_to_reduce(values) values.remove(longest_element) generalized = reduce_string(longest_element) values.add(generalized) return list(values)[0]
def test_generalization(self): postcode = 'NE9 5YE' generalized = reduce_string(postcode) self.assertNotEqual(postcode, generalized)