예제 #1
0
def _add_website_and_provider(site: schema.NormalizedLocation,
                              entry: dict) -> None:
    """Adds website and provider information from `entry`, if any,
    to the given `site` object."""
    # Create a fresh object each time, though many sites may have the same website.
    website = _make_website_contact(entry["link"])
    if website is not None:
        site.contact = site.contact or []
        site.contact.append(website)
        # Try to work out well-known providers from the URL.
        site.parent_organization = _lookup_provider(website)
예제 #2
0
def _add_id(site: schema.NormalizedLocation) -> None:
    """Generates source and site IDs for the given `site` object
    and updates the object in place.
    """
    # We don't have a stable site ID or name from the source document,
    # so generate one ID by hashing whatever name and address info we do have.
    # These are likely to be more stable than the phone or website info.
    # Avoid using the `page` and `provider` numbers from `entry`,
    # because those are sensitive to layout changes in the source document.
    candidate_data: List[Optional[Text]] = list(
        filter(
            None,
            [
                site.name,
                getattr(site.address, "street1", None),
                getattr(site.address, "city", None),
                getattr(site.address, "state", None),
                getattr(site.address, "zip", None),
            ],
        ))
    # Fall back to website or phone info if we don't have concrete name or location info.
    if not candidate_data:
        candidate_data.extend([c.website or c.phone for c in site.contact])

    site.source.id = _md5_hash(candidate_data)
    site_id = _make_site_id(site.source)
    logger.debug("Site ID: %s", site_id)
    site.id = site_id
예제 #3
0
def _process_location(
    normalized_location: location.NormalizedLocation,
) -> Optional[location.NormalizedLocation]:
    """Run through all of the methods to enrich the location"""
    enriched_location = normalized_location.copy()

    _add_provider_from_name(enriched_location)
    _add_source_link(enriched_location)
    _add_provider_tag(enriched_location)

    _normalize_phone_format(enriched_location)

    if not _valid_address(enriched_location):
        logger.warning(
            "Skipping source location %s because its address could not be validated: %s",
            normalized_location.id,
            normalized_location.address,
        )
        return None

    if not enriched_location.location:
        logger.warning(
            "Skipping source location %s because it does not have a location (lat/lng)",
            normalized_location.id,
        )
        return None

    return enriched_location
예제 #4
0
def _add_provider_from_name(loc: location.NormalizedLocation) -> None:
    """Add provider link from name if missing"""
    if not loc.name:
        return

    provider_tuple = normalize.provider_id_from_name(loc.name)

    if not provider_tuple:
        return

    provider_authority, provider_id = provider_tuple

    existing_links = _generate_link_map(loc)

    if str(provider_authority) not in existing_links:
        loc.links = [
            *(loc.links or []),
            location.Link(authority=provider_authority, id=provider_id),
        ]

    if not loc.parent_organization:
        loc.parent_organization = location.Organization(id=provider_authority)
예제 #5
0
def _process_location(
    normalized_location: location.NormalizedLocation,
) -> Optional[location.NormalizedLocation]:
    """Run through all of the methods to enrich the location"""
    enriched_location = normalized_location.copy()

    _add_provider_from_name(enriched_location)
    _add_source_link(enriched_location)

    if not _valid_address(enriched_location):
        return None

    if not enriched_location.location:
        return None

    return enriched_location
예제 #6
0
def _add_source_link(loc: location.NormalizedLocation) -> None:
    """Add source link from source if missing"""
    if not loc.source:
        return

    if not loc.source.source or not loc.source.id:
        return

    existing_links = _generate_link_map(loc)

    if str(loc.source.source) in existing_links:
        return

    loc.links = [
        *(loc.links or []),
        location.Link(authority=loc.source.source, id=loc.source.id),
    ]
예제 #7
0
def _add_provider_tag(loc: location.NormalizedLocation) -> None:
    """Add provider tag to concordances to use for matching"""
    if not loc.parent_organization:
        return

    if not loc.parent_organization.id:
        return

    existing_links = _generate_link_map(loc)

    if PROVIDER_TAG in existing_links:
        return

    provider_id = str(loc.parent_organization.id)

    loc.links = [
        *(loc.links or []),
        location.Link(authority=PROVIDER_TAG, id=provider_id),
    ]
예제 #8
0
def calculate_content_hash(loc: location.NormalizedLocation) -> str:
    """Calculate a hash from the normalized content of a location without source data"""
    loc_dict = loc.dict(exclude_none=True, exclude={"source"})
    loc_json = orjson.dumps(loc_dict, option=orjson.OPT_SORT_KEYS)
    return hashlib.md5(loc_json).hexdigest()