def test_validate_parens_group_parsed(self): """Test validate_parens_groups_parsed""" broken_line1 = '6000 SW 1000TH AVE' result = validate_parens_groups_parsed(broken_line1) self.assertEqual(broken_line1, result) bad_addr = '10000 NE 8TH (ROW HOUSE)' with self.assertRaises(AmbiguousAddressError): validate_parens_groups_parsed(bad_addr)
def normalize_addr_str( addr_str, # type: str line2=None, # type: Optional[str] city=None, # type: Optional[str] state=None, # type: Optional[str] zipcode=None, # type: Optional[str] addtl_funcs=None # type: Sequence[Callable[str, (str, str)]] # noqa ): # noqa # type (...) -> Mapping[str, str] # noqa # type (...) -> Mapping[str, str] """Normalize a complete or partial address string. :param addr_str: str containing address data. :type addr_str: str :param line2: optional str containing occupancy or sub-address data (eg: Unit, Apt, Lot). :type line2: str :param city: optional str city name that does not need to be parsed from addr_str. :type city: str :param state: optional str state name that does not need to be parsed from addr_str. :type state: str :param zipcode: optional str postal code that does not need to be parsed from addr_str. :type zipcode: str :param addtl_funcs: optional sequence of funcs that take string for further processing and return line1 and line2 strings :type addtl_funcs: Sequence[Callable[str, (str, str)]] :return: address dict with uppercase parsed and normalized address values. :rtype: Mapping[str, str] """ # get address parsed into usaddress components. error = None parsed_addr = None addr_str = pre_clean_addr_str(addr_str, normalize_state(state)) try: parsed_addr = parse_address_string(addr_str) except (usaddress.RepeatedLabelError, AmbiguousAddressError) as err: error = err if not line2 and addtl_funcs: for func in addtl_funcs: try: line1, line2 = func(addr_str) error = False # send refactored line_1 and line_2 back through processing return normalize_addr_str(line1, line2=line2, city=city, state=state, zipcode=zipcode) except ValueError: # try a different additional processing function pass if parsed_addr: parsed_addr = normalize_address_components(parsed_addr) zipcode = get_parsed_values(parsed_addr, zipcode, 'ZipCode', addr_str) city = get_parsed_values(parsed_addr, city, 'PlaceName', addr_str) state = get_parsed_values(parsed_addr, state, 'StateName', addr_str) state = normalize_state(state) # assumes if line2 is passed in that it need not be parsed from # addr_str. Primarily used to allow advanced processing of otherwise # unparsable addresses. line2 = line2 if line2 else get_normalized_line_segment( parsed_addr, LINE2_USADDRESS_LABELS) line2 = post_clean_addr_str(line2) # line 1 is fully post cleaned in get_normalized_line_segment. line1 = get_normalized_line_segment(parsed_addr, LINE1_USADDRESS_LABELS) validate_parens_groups_parsed(line1) else: # line1 is set to addr_str so complete dict can be passed to error. line1 = addr_str addr_rec = dict(address_line_1=line1, address_line_2=line2, city=city, state=state, postal_code=zipcode) if error: raise UnParseableAddressError(None, None, addr_rec) else: return addr_rec