示例#1
0
    def have_hash_in_common(self, address1, address2, **kw):
        """Test whether strings have at least one shared expansion."""
        expansions1 = near_dupe_hashes(address1.keys(), address1.values(),
                                       **kw)
        expansions2 = near_dupe_hashes(address2.keys(), address2.values(),
                                       **kw)

        self.assertTrue(set(expansions1) & set(expansions2))
示例#2
0
    def contained_in_hashes(self, address, output, **kw):
        """Test whether an expansion contains a particular output."""
        hashes = near_dupe_hashes(address.keys(), address.values(), **kw)
        self.assertTrue(hashes)

        hashes = set(expansions)
        self.assertTrue(output in expansions)
示例#3
0
文件: dedupe.py 项目: riordan/lieu
    def near_dupe_hashes(cls,
                         address,
                         languages=None,
                         with_address=True,
                         with_unit=False,
                         with_city_or_equivalent=False,
                         with_small_containing_boundaries=False,
                         with_postal_code=False,
                         with_latlon=True,
                         geohash_precision=DEFAULT_GEOHASH_PRECISION,
                         name_and_address_keys=None,
                         name_only_keys=None,
                         address_only_keys=None):
        lat = address.get(Coordinates.LATITUDE)
        lon = address.get(Coordinates.LONGITUDE)
        if lat is None or lon is None:
            lat = 0.0
            lon = 0.0
            with_latlon = False

        if languages is None:
            address_minus_name = cls.address_minus_name(address)
            languages = cls.combined_place_languages(address,
                                                     address_minus_name)

        labels, values = cls.address_labels_and_values(address)

        if name_only_keys is None:
            name_only_keys = cls.name_only_keys

        if name_and_address_keys is None:
            name_and_address_keys = cls.name_and_address_keys

        if address_only_keys is None:
            address_only_keys = cls.address_only_keys

        return near_dupe_hashes(
            labels,
            values,
            languages=languages,
            with_name=cls.with_name,
            with_address=with_address,
            with_unit=with_unit,
            with_city_or_equivalent=with_city_or_equivalent,
            with_small_containing_boundaries=with_small_containing_boundaries,
            with_postal_code=with_postal_code,
            with_latlon=with_latlon,
            latitude=lat,
            longitude=lon,
            geohash_precision=geohash_precision,
            name_and_address_keys=name_and_address_keys,
            name_only_keys=name_only_keys,
            address_only_keys=cls.address_only_keys)
示例#4
0
文件: dedupe.py 项目: jayredgun/lieu
    def near_dupe_hashes(cls, address, languages=None,
                         with_address=True,
                         with_unit=False,
                         with_city_or_equivalent=False,
                         with_small_containing_boundaries=False,
                         with_postal_code=False,
                         with_zip5=False,
                         with_latlon=True,
                         geohash_precision=None,
                         name_and_address_keys=None,
                         name_only_keys=None,
                         address_only_keys=None):
        lat = address.get(Coordinates.LATITUDE)
        lon = address.get(Coordinates.LONGITUDE)
        if lat is None or lon is None:
            lat = 0.0
            lon = 0.0
            with_latlon = False

        if geohash_precision is None:
            geohash_precision = cls.DEFAULT_GEOHASH_PRECISION

        if languages is None:
            languages = cls.address_languages(address)

        base_house_number = None
        address_with_base_house_number = None
        if AddressComponents.HOUSE_NUMBER_BASE in address:
            base_house_number = address[AddressComponents.HOUSE_NUMBER_BASE]
            address = {k: v for k, v in six.iteritems(address) if k != AddressComponents.HOUSE_NUMBER_BASE}
            address_with_base_house_number = address.copy()
            address_with_base_house_number[AddressComponents.HOUSE_NUMBER] = base_house_number

        if name_only_keys is None:
            name_only_keys = cls.name_only_keys

        if name_and_address_keys is None:
            name_and_address_keys = cls.name_and_address_keys

        if address_only_keys is None:
            address_only_keys = cls.address_only_keys

        input_address = address

        all_hashes = []
        all_hashes_set = set()

        for address in (input_address, address_with_base_house_number):
            if address is None:
                continue

            labels, values = cls.address_labels_and_values(address, use_zip5=with_zip5)
            if not (labels and values and len(labels) == len(values)):
                return []

            hashes = near_dupe_hashes(labels, values, languages=languages,
                                      with_name=cls.with_name,
                                      with_address=with_address,
                                      with_unit=with_unit,
                                      with_city_or_equivalent=with_city_or_equivalent,
                                      with_small_containing_boundaries=with_small_containing_boundaries,
                                      with_postal_code=with_postal_code,
                                      with_latlon=with_latlon,
                                      latitude=lat,
                                      longitude=lon,
                                      geohash_precision=geohash_precision,
                                      name_and_address_keys=name_and_address_keys,
                                      name_only_keys=name_only_keys,
                                      address_only_keys=cls.address_only_keys)

            all_hashes.extend([h for h in hashes if h not in all_hashes_set])
            all_hashes_set |= set(hashes)

        return all_hashes