def test_clean_phone_numbers_valid_numbers_with_incorrect_regionfield_but_correct_region_string(self): """ Verify that if number is valid but region field does not have a value for that number or if country is wrong (in this case canadian numbers), but you have a fallback default region string, it will still work""" orig_mydf = self.mydf mydf = self.mydf.copy(deep=True) expected = pd.DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'], 'phones_by_country': [self.starbucks_vancouver[2], # first element has no country code # so would not viewed as valid, except # in this case we specify the default region self.london_eye_ticketoff[2], np.nan, # input phone was NaN self.boston_mikes_pastry[2], self.starbucks_toronto[2], # has no country code # so would not viewed as valid, except # in this case we specify the default region np.nan, # has incorrect country code (GB) so we wouldn't use the # (in this case correct) substitute region_string self.madrid_laMallorquina_bakery[2], self.london_eye_ticketoff2[2]] }).set_index('key') # replace in place clean_phone_numbers(mydf, phonenum_field='phones_by_country', region_string='CA', region_field='regions_valid_but_missing_or_invalid_canada', use_orig_on_error=False) # new field should hold formatted + validated phones, orig phone field should not be changed assert_series_equal(mydf['phones_by_country'], expected['phones_by_country'])
def test_clean_phone_numbers_some_bad_phonenums_and_put_into_new_field_using_region_string_use_orig_on_error(self): orig_mydf = self.mydf mydf = self.mydf.copy(deep=True) correctedphone = self.starbucks_toronto[2] expected = pd.DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'], 'correctedPhones': [correctedphone, correctedphone, np.nan, 'BILL_TO', ' ', '', 'SHIP_TO', 'BILL_TO'] }).set_index('key') clean_phone_numbers(mydf, phonenum_field='badphones', newField='correctedPhones', region_string='CA', region_field=None, use_orig_on_error=True) # new field should hold formatted + validated phones, orig phone field should not be changed assert_series_equal(mydf['correctedPhones'], expected['correctedPhones']) assert_series_equal(mydf['goodphones'], orig_mydf['goodphones'])
def test_clean_phone_numbers_all_valid_phonenums_and_replace_orig_field_using_region_field(self): """ Verify that valid phone numbers with valid region codes specified in a region field replaces the original phone number field's values when this option is specified """ mydf = self.mydf.copy(deep=True) expected = pd.DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'], 'phones_by_country': [self.starbucks_vancouver[2], self.london_eye_ticketoff[2], np.nan, self.boston_mikes_pastry[2], self.starbucks_toronto[2], self.ontario_tim_hortons[2], self.madrid_laMallorquina_bakery[2], self.london_eye_ticketoff2[2]] }).set_index('key') clean_phone_numbers(mydf, phonenum_field='phones_by_country', region_field='regions_all_valid', use_orig_on_error=False) # orig phone number field should hold formatted + validated phones assert_series_equal(mydf['phones_by_country'], expected['phones_by_country'])
def test_clean_phone_numbers_valid_numbers_but_no_region_field_values_are_nullified(self): """ Verify that if number is valid but region field does not have a value for that number or if country is wrong (in this case canadian numbers), then phone num library will detect it as invalid """ orig_mydf = self.mydf mydf = self.mydf.copy(deep=True) expected = pd.DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'], 'correctedPhones': [np.nan, # first element has no country code so not viewed as valid self.london_eye_ticketoff[2], np.nan, # input phone was NaN self.boston_mikes_pastry[2], np.nan, # has no country code np.nan, # has incorrect country code (GB) self.madrid_laMallorquina_bakery[2], self.london_eye_ticketoff2[2]] }).set_index('key') clean_phone_numbers(mydf, phonenum_field='phones_by_country', newField='correctedPhones', #region_string='CA', region_field='regions_valid_but_missing_or_invalid_canada', use_orig_on_error=False) # new field should hold formatted + validated phones, orig phone field should not be changed assert_series_equal(mydf['correctedPhones'], expected['correctedPhones']) assert_series_equal(mydf['goodphones'], orig_mydf['goodphones'])
def test_input_df_works_with_pandas_generated_keys(self): # any dataframe with pandas generated keys will work because they are unique mydf = self.mydf_generatedkeys.copy(deep=True) clean_phone_numbers(mydf, phonenum_field='goodphones', newField='correctedPhones', region_string='CA', region_field=None, use_orig_on_error=False)