Exemplo n.º 1
0
                address_full += '-' + str(address_high)

        # Get unit
        unit_full = None
        if unit_num:
            unit_full = '# {}'.format(unit_num)
        
        address = None
        
        if address_full and street_full:
            source_address_comps = [address_full, street_full, unit_full]
            source_address = ' '.join([x for x in source_address_comps if x])

            # Try to parse
            try:
                address = Address(source_address)

                # QC: check for miscellaneous parcel modifications
                street_address = address.street_address
                if should_check_street_full and source_address != street_address:
                    note = 'Parser changes: {} => {}'.format(source_address, street_address)
                    had_warning('Parser changes', note=note)

                # QC: check for duplicate address
                address_counts.setdefault(street_address, 0)
                address_counts[street_address] += 1
            
            except Exception as e:
                print(source_address)
                had_error('Could not parse')
Exemplo n.º 2
0
    def get_address_response_relationships(self, address=None, **kwargs):
        # TODO: assign in include_units fct?

        #print("normalized: ", self.normalized_address)
        #print(self.ref_addr)
        ref_address = Address(self.ref_addr)
        address = Address(address.street_address)
        # print(ref_address, address)
        ref_base_address = ' '.join(
            [ref_address.address_full, ref_address.street_full])
        ref_base_address_no_suffix = '{} {}'.format(
            ref_address.address_full_num, ref_address.street_full)
        base_address = ' '.join([address.address_full, address.street_full])
        base_address_no_suffix = '{} {}'.format(address.address_full_num,
                                                address.street_full)
        match_type = None
        unit_type_variations = ["APT", "UNIT", "#"]
        street_address_variations = [
            address.street_address,
            address.street_address.replace(address.unit_type, "APT"),
            address.street_address.replace(address.unit_type, "UNIT"),
            address.street_address.replace(address.unit_type, "#")
        ] if address.unit_type else []

        #print(address.unit_type, ref_address.unit_type)
        if address.street_address == ref_address.street_address:
            # Address is same as reference address
            match_type = self.match_type
        # elif address.base_address == ref_address.base_address:
        #     # 1769 FRANKFORD AVE UNIT 99
        #     match_type = 'has_base'
        # elif address.base_address_no_suffix == ref_address.base_address or address.base_address == ref_address.base_address_no_suffix:
        #     #6037 N 17TH ST #A or 6037B N 17TH ST #A
        #     match_type = 'has_base_no_suffix'
        elif address.unit_type not in ('', None):
            # Address is different from ref address and has unit type
            if address.address_high is None:
                # Address also doesn't have a high num
                if ref_address.unit_type not in ('', None):
                    # Reference address has unit type
                    if ref_address.unit_num == address.unit_num:
                        if ref_address.unit_type == address.unit_type:
                            match_type = 'exact' if not ref_address.address_high else 'in_range'
                        # Reference and address have same unit num
                        elif all(unit_type in unit_type_variations
                                 for unit_type in
                                 [ref_address.unit_type, address.unit_type]):
                            match_type = 'generic_unit_sibling' if not ref_address.address_high else 'in_range_generic_unit_sibling'
                        else:
                            match_type = 'unit_sibling' if not ref_address.address_high else 'in_range_unit_sibling'
                    elif base_address == ref_base_address:
                        match_type = 'has_base_unit_child'
                    elif base_address_no_suffix == ref_base_address_no_suffix:
                        match_type = 'has_base_no_suffix_unit_child'
                    elif ref_address.address_high:
                        if ref_address.address_low_suffix == address.address_low_suffix:
                            if address.unit_num == ref_address.unit_num:
                                match_type = 'in_range'
                            elif not address.unit_num:
                                match_type = 'has_base_in_range'
                            else:
                                # 1769-71 FRANKFORD AVE UNIT 8?include_units
                                match_type = 'has_base_in_range_unit_child'
                        elif address.address_low_suffix:
                            if ref_address.address_low_suffix:
                                match_type = 'has_base_no_suffix_in_range_suffix_child_unit_child'
                            else:
                                match_type = 'has_base_in_range'
                        else:
                            if ref_address.address_low_suffix:
                                match_type = 'has_base_no_suffix_in_range_unit_child'
                            else:
                                match_type = 'has_base_in_range_unit_child'
                elif ref_address.address_high is not None:
                    # Ref address has no unit type but has high_num:
                    if ref_address.address_low_suffix == address.address_low_suffix:
                        match_type = 'in_range_unit_child'
                    elif ref_address.address_low_suffix:
                        if address.address_low_suffix:
                            match_type = 'has_base_no_suffix_in_range_suffix_child_unit_child'
                        else:
                            match_type = 'has_base_no_suffix_in_range_unit_child'
                    else:
                        match_type = 'in_range_suffix_child_unit_child'

                else:
                    # ref address has no unit type or address high num
                    if address.address_low_suffix is not None and address.base_address_no_suffix == ref_address.base_address:
                        match_type = 'has_base_no_suffix_unit_child'
                    else:
                        match_type = 'unit_child'
            else:
                # Address is different from ref address and has unit type and high num (is ranged unit address)
                if ref_address.unit_type is not None:
                    # Unit type is a generic unit type
                    if ref_address.street_address in street_address_variations:
                        match_type = 'generic_unit_sibling'
                    else:
                        if ref_address.address_high is None:
                            if all(unit_type in unit_type_variations
                                   for unit_type in
                                   [ref_address.unit_type, address.unit_type]):
                                match_type = 'range_parent_unit_sibling'
                            else:
                                match_type = 'range_parent_unit_child'
                        else:
                            match_type = 'unit_sibling' if ref_address.address_low == address.address_low and ref_address.address_high == address.address_high else 'overlapping_unit_sibling'
                else:
                    if ref_address.address_high is None:
                        match_type = 'range_parent_unit_child'
                    else:
                        match_type = 'unit_child'
                    # match_type = 'range_parent'
        else:
            # Address is different from ref address but has no unit type
            if address.address_high:
                if not ref_address.address_high:
                    match_type = 'range_parent'
                else:
                    if ref_address.address_high != address.address_high or ref_address.address_low != address.address_low:
                        if not ref_address.unit_type and ref_address.address_low_suffix == address.address_low_suffix:
                            match_type = 'overlaps'
                        elif ref_base_address == base_address:
                            match_type = 'has_base_overlaps'
                        else:
                            if address.address_low_suffix:
                                # 4923-49 N 16TH ST
                                match_type = 'overlapping_suffix_child'
                            else:
                                match_type = 'has_base_no_suffix_overlaps'
                    else:
                        if ref_address.address_low_suffix == address.address_low_suffix:
                            match_type = 'has_base'
                        else:
                            # 4923-47 N 16TH ST
                            match_type = 'has_base_no_suffix'
            elif ref_address.address_high:
                # no address.address_high but ref_address.address_high
                if ref_address.unit_type:
                    if ref_address.address_low_suffix == address.address_low_suffix:
                        if not ref_address.unit_num:
                            match_type = 'in_range'
                        else:
                            # 902A-4 N 3RD ST UNIT 2
                            match_type = 'has_base_in_range'

                    elif address.address_low_suffix:
                        # 902-4 N 3RD ST UNIT 2
                        if not ref_address.address_low_suffix:
                            match_type = 'in_range_suffix_child'
                        else:
                            match_type = 'has_base_no_suffix_in_range_suffix_child'
                    else:
                        # 902R-4 N 3RD ST UNIT 2
                        match_type = 'has_base_no_suffix_in_range'
                else:
                    if ref_address.address_low_suffix == address.address_low_suffix:
                        if not ref_address.unit_num:
                            # 901A-4 N 3RD ST
                            match_type = 'in_range'
                        else:
                            match_type = 'in_range_unit_sibling'
                    elif address.address_low_suffix:
                        # 902-4 N 3RD ST UNIT 2
                        match_type = 'in_range_suffix_child'
                    else:
                        # 902R-4 N 3RD ST
                        match_type = 'has_base_no_suffix_in_range'
            elif ref_address.base_address == address.street_address:
                # 1769 FRANKFORD AVE UNIT 8
                match_type = 'has_base'
            elif ref_address.base_address_no_suffix == address.street_address:
                # 1769R FRANKFORD AVE
                match_type = 'has_base_no_suffix'
            elif address.base_address_no_suffix == ref_address.base_address:
                # 902 N 3RD ST UNIT 2
                match_type = 'has_base_suffix_child'
            elif address.base_address_no_suffix == ref_address.base_address_no_suffix:
                # 902 N 3RD ST UNIT 2
                match_type = 'has_base_no_suffix_suffix_child'
            else:
                match_type = 'has_base'

        return match_type
Exemplo n.º 3
0
        try:
            # Try parsing
            parsed_address = parsed_addresses.get(source_address)
            if parsed_address is None:
                # Passyunk no longer raising errors
                try:
                    parsed_address = parser.parse(source_address)
                    parsed_addresses[source_address] = parsed_address

                except:
                    raise ValueError('Could not parse')

            if parsed_address['type'] == "none":
                raise ValueError('Unknown address type')

            address = Address(parsed_address)

            # Get street address and map to source address
            street_address = address.street_address
            _source_addresses = source_address_map.setdefault(street_address, [])
            if not source_address in _source_addresses:
                _source_addresses.append(source_address)

            # Check for zero address
            if address.address_low == 0:
                raise ValueError('Low number is zero')

            # Add address
            if not street_address in street_addresses_seen:
                addresses.append(address)
                street_addresses_seen.add(street_address)
Exemplo n.º 4
0
    zip_range.pop('zip_4_high')
    zip_range['zip_4'] = zip_4

    zip_ranges.append(zip_range)

if WRITE_OUT:
    print('Writing zip ranges to AIS...')
    zip_range_table.write(zip_ranges)

    print('Creating indexes...')
    zip_range_table.create_index('usps_id')

print('\n** RELATE TO ADDRESSES**')
print('Reading addresses...')
addresses = db['address'].read(fields=['street_address'])
addresses = [Address(x['street_address']) for x in addresses]

if WRITE_OUT:
    print('Dropping indexes...')
    address_zip_table.drop_index('street_address')
    address_zip_table.drop_index('usps_id')
    print('Dropping address-zips...')
    address_zip_table.delete()

# index zip ranges by street_full
street_full_fields = [
    'street_predir',
    'street_name',
    'street_suffix',
    'street_postdir',
]
Exemplo n.º 5
0
    def get_address_response_relationships(self, address=None, **kwargs):
        # TODO: assign in include_units fct?

        #print("normalized: ", self.normalized_address)
        #print(self.ref_addr)
        ref_address = Address(self.ref_addr)
        address = Address(address.street_address)
        # print(ref_address, address)
        ref_base_address = ' '.join(
            [ref_address.address_full, ref_address.street_full])
        ref_base_address_no_suffix = '{} {}'.format(
            ref_address.address_full_num, ref_address.street_full)
        base_address = ' '.join([address.address_full, address.street_full])
        base_address_no_suffix = '{} {}'.format(address.address_full_num,
                                                address.street_full)
        match_type = None
        unit_type_variations = ["APT", "UNIT", "#"]
        street_address_variations = [
            address.street_address,
            address.street_address.replace(address.unit_type, "APT"),
            address.street_address.replace(address.unit_type, "UNIT"),
            address.street_address.replace(address.unit_type, "#")
        ] if address.unit_type else []

        #print(address.unit_type, ref_address.unit_type)
        if address.street_address == ref_address.street_address:
            # Address is same as reference address
            match_type = self.match_type
        elif address.unit_type not in ('', None):
            # Address is different from ref address and has unit type
            if address.address_high is None:
                # Address also doesn't have a high num
                if ref_address.unit_type is not None:
                    # Reference address has unit type
                    if ref_address.unit_num == address.unit_num:
                        if ref_address.unit_type == address.unit_type:
                            match_type = 'exact' if not ref_address.address_high else 'in_range'
                        # Reference and address have same unit num
                        elif all(unit_type in unit_type_variations
                                 for unit_type in
                                 [ref_address.unit_type, address.unit_type]):
                            match_type = 'generic_unit_sibling' if not ref_address.address_high else 'in_range_generic_unit_sibling'
                        # if ref_address.street_address in street_address_variations:
                        #     # Unit type is a generic unit type
                        #     # match_type = 'generic_unit_sibling'
                        #     match_type = 'exact'
                        else:
                            match_type = 'unit_sibling' if not ref_address.address_high else 'in_range_unit_sibling'
                    elif base_address == ref_base_address:
                        match_type = 'has_base_unit_child'
                    elif ref_address.address_high:
                        match_type = 'in_range_unit_sibling'
                elif ref_address.address_high is not None:
                    # Ref address has no unit type but has high_num:
                    if ref_address.unit_type is not None:
                        # Ref address has unit type
                        match_type = 'in_range'
                    else:
                        match_type = 'in_range_unit_child'
                else:
                    # ref address has no unit type or address high num
                    if address.address_low_suffix is not None and address.base_address_no_suffix == ref_address.base_address:
                        match_type = 'has_base_no_suffix_unit_child'
                    else:
                        match_type = 'unit_child'
            else:
                # Address is different from ref address and has unit type and high num (is ranged unit address)
                if ref_address.unit_type is not None:
                    # Unit type is a generic unit type
                    if ref_address.street_address in street_address_variations:
                        print(3)
                        if ref_address.address_high is None:
                            match_type = 'unit_sibling'
                        else:
                            match_type = 'unit_child'
                    else:
                        print(4)
                        if ref_address.address_high is None:
                            if all(unit_type in unit_type_variations
                                   for unit_type in
                                   [ref_address.unit_type, address.unit_type]):
                                match_type = 'range_parent_unit_sibling'
                            else:
                                match_type = 'range_parent_unit_child'
                        else:
                            match_type = 'unit_sibling' if ref_address.address_low == address.address_low and ref_address.address_high == address.address_high else 'overlapping_unit_sibling'
                else:
                    if ref_address.address_high is None:
                        match_type = 'range_parent_unit_child'
                    else:
                        match_type = 'unit_child'
                    # match_type = 'range_parent'
        else:
            # Address is different from ref address but has no unit type
            if address.address_high:
                if not ref_address.address_high:
                    match_type = 'range_parent'
                else:
                    if ref_address.address_high != address.address_high or ref_address.address_low != address.address_low:
                        match_type = 'overlaps' if not ref_address.unit_type else "has_base_overlaps"
                    else:
                        match_type = 'has_base'
            elif ref_address.address_high:
                # no address.address_high but ref_address.address_high
                match_type = 'in_range' if not ref_address.unit_type else 'has_base_in_range'
            elif address.base_address_no_suffix == ref_address.street_address:
                match_type = 'has_base_no_suffix'
            else:
                match_type = 'has_base'

        return match_type  #, related_addresses
Exemplo n.º 6
0
        if source_address is None:
            # TODO: it might be helpful to log this, but right now we aren't
            # logging object IDs so there would be no way to identify the
            # null address in the source dataset. Just skipping for now.
            continue

        source_map.setdefault(source_address, []).append(source_name)

        # Make sure this is reset on each run (also for logging)
        street_address = None

        try:
            # Try parsing
            try:
                address = Address(source_address)
            except:
                raise ValueError('Could not parse')

            # Get street address and map to source address
            street_address = address.street_address
            _source_addresses = source_address_map.setdefault(
                street_address, [])
            if not source_address in _source_addresses:
                _source_addresses.append(source_address)

            # Check for zero address
            if address.address_low == 0:
                raise ValueError('Low number is zero')

            # Add address
			if not re.match('\d+[A-Z]', source_address):
				# FEEDBACK: missing suffix
				if '-' in source_address:
					insert_i = source_address.index('-')
				else:
					insert_i = source_address.index(' ')
				source_address = source_address[:insert_i] + address_suffix + \
					source_address[insert_i:]

		# Parse
		try:
			parsed = parser.parse(source_address)
			comps = parsed['components']
		except:
			raise ValueError('Could not parse')
		address = Address(comps)
		street_address = comps['street_address']

		# Owners
		try:
			owners = owner_map[account_num]
		except KeyError:
			owners = ''

		prop = {
			'account_num': account_num,
			'source_address': source_address,
			'tencode': tencode,
			'owners': owners,
			'address_low': comps['address']['low_num'],
			# 'address_low_suffix': comps['address']['low_suffix'] or '',
Exemplo n.º 8
0
            if not re.match('\d+[A-Z]', source_address):
                # FEEDBACK: missing suffix
                if '-' in source_address:
                    insert_i = source_address.index('-')
                else:
                    insert_i = source_address.index(' ')
                source_address = source_address[:insert_i] + address_suffix + \
                 source_address[insert_i:]

        # Parse
        try:
            parsed = parser.parse(source_address)
            comps = parsed['components']
        except:
            raise ValueError('Could not parse')
        address = Address(parsed)
        street_address = comps['output_address']

        # Owners
        try:
            owners = owner_map[account_num]
        except KeyError:
            owners = ''

        prop = {
            'account_num': account_num,
            'source_address': source_address,
            'tencode': tencode,
            'owners': owners,
            'address_low': comps['address']['low_num'],
            # 'address_low_suffix': comps['address']['low_suffix'] or '',