def test_validate_parens_group_parsed(self):
        """Test validate_parens_groups_parsed"""
        broken_line1 = '6000 SW 1000TH AVE'
        result = validate_parens_groups_parsed(broken_line1)
        self.assertEqual(broken_line1, result)

        bad_addr = '10000 NE 8TH (ROW HOUSE)'
        with self.assertRaises(AmbiguousAddressError):
            validate_parens_groups_parsed(bad_addr)
示例#2
0
def normalize_addr_str(
    addr_str,  # type: str
    line2=None,  # type: Optional[str]
    city=None,  # type: Optional[str]
    state=None,  # type: Optional[str]
    zipcode=None,  # type: Optional[str]
    addtl_funcs=None  # type: Sequence[Callable[str, (str, str)]]  # noqa
):  # noqa
    # type (...) -> Mapping[str, str]                                        # noqa
    # type (...) -> Mapping[str, str]
    """Normalize a complete or partial address string.

    :param addr_str: str containing address data.
    :type addr_str: str
    :param line2: optional str containing occupancy or sub-address data
        (eg: Unit, Apt, Lot).
    :type line2: str
    :param city: optional str city name that does not need to be parsed from
        addr_str.
    :type city: str
    :param state: optional str state name that does not need to be parsed from
        addr_str.
    :type state: str
    :param zipcode: optional str postal code that does not need to be parsed
        from addr_str.
    :type zipcode: str
    :param addtl_funcs: optional sequence of funcs that take string for further
        processing and return line1 and line2 strings
    :type addtl_funcs: Sequence[Callable[str, (str, str)]]
    :return: address dict with uppercase parsed and normalized address values.
    :rtype: Mapping[str, str]
    """
    # get address parsed into usaddress components.
    error = None
    parsed_addr = None
    addr_str = pre_clean_addr_str(addr_str, normalize_state(state))
    try:
        parsed_addr = parse_address_string(addr_str)
    except (usaddress.RepeatedLabelError, AmbiguousAddressError) as err:
        error = err
        if not line2 and addtl_funcs:
            for func in addtl_funcs:
                try:
                    line1, line2 = func(addr_str)
                    error = False
                    # send refactored line_1 and line_2 back through processing
                    return normalize_addr_str(line1,
                                              line2=line2,
                                              city=city,
                                              state=state,
                                              zipcode=zipcode)
                except ValueError:
                    # try a different additional processing function
                    pass

    if parsed_addr:
        parsed_addr = normalize_address_components(parsed_addr)
        zipcode = get_parsed_values(parsed_addr, zipcode, 'ZipCode', addr_str)
        city = get_parsed_values(parsed_addr, city, 'PlaceName', addr_str)
        state = get_parsed_values(parsed_addr, state, 'StateName', addr_str)
        state = normalize_state(state)

        # assumes if line2 is passed in that it need not be parsed from
        # addr_str. Primarily used to allow advanced processing of otherwise
        # unparsable addresses.
        line2 = line2 if line2 else get_normalized_line_segment(
            parsed_addr, LINE2_USADDRESS_LABELS)
        line2 = post_clean_addr_str(line2)
        # line 1 is fully post cleaned in get_normalized_line_segment.
        line1 = get_normalized_line_segment(parsed_addr,
                                            LINE1_USADDRESS_LABELS)
        validate_parens_groups_parsed(line1)
    else:
        # line1 is set to addr_str so complete dict can be passed to error.
        line1 = addr_str

    addr_rec = dict(address_line_1=line1,
                    address_line_2=line2,
                    city=city,
                    state=state,
                    postal_code=zipcode)
    if error:
        raise UnParseableAddressError(None, None, addr_rec)
    else:
        return addr_rec