def _get_lat_lng(site: dict) -> Optional[schema.LatLng]: try: source_lat_lng = schema.LatLng(latitude=site["lat"], longitude=site["lng"]) # In the CT data source, some lat/lng pairs are flipped. # If the lat/lng from the datasource is outside our expected boundaries, # flip them. if not _in_bounds(source_lat_lng): flipped_lat_lng = schema.LatLng(latitude=source_lat_lng.longitude, longitude=source_lat_lng.latitude) if not _in_bounds(flipped_lat_lng): logger.warning( "Out of bounds and unflippable lat/lng for %s (%s)", site["_id"], source_lat_lng, ) return None return flipped_lat_lng return source_lat_lng except pydantic.ValidationError as e: logger.warning("Invalid or missing lat/lng for %s: %s", site["_id"], str(e)) return None
def test_bounding_boxes_contains(): boxes = validation.BoundingBoxes( boxes=[ validation.BoundingBox( latitude=validation.MinMax( minimum=-10.0, maximum=0.0, ), longitude=validation.MinMax( minimum=-20.1, maximum=20.1, ), ), validation.BoundingBox( latitude=validation.MinMax( minimum=-0.0, maximum=10.0, ), longitude=validation.MinMax( minimum=-20.1, maximum=20.1, ), ), ] ) assert boxes.contains(location.LatLng(latitude=5, longitude=5)) assert boxes.contains(location.LatLng(latitude=-5, longitude=5)) assert not boxes.contains(location.LatLng(latitude=50, longitude=50)) # Bounding box boundaries are exclusive assert not boxes.contains(location.LatLng(latitude=0, longitude=0))
def test_bounding_box_contains(): box = validation.BoundingBox( latitude=validation.MinMax( minimum=-10.0, maximum=0.0, ), longitude=validation.MinMax( minimum=-20.0, maximum=20.0, ), ) assert box.contains(location.LatLng(latitude=-5, longitude=10)) assert not box.contains(location.LatLng(latitude=10, longitude=10)) assert not box.contains(location.LatLng(latitude=-5, longitude=100))
def _get_location(site: dict): if site["latitude"] == "" or site["longitude"] == "": return None return schema.LatLng( latitude=float(site["latitude"]), longitude=float(site["longitude"]), )
def normalize(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=_get_title(site["title"]), address=_get_address(site["address"]), location=schema.LatLng( latitude=site["lat"], longitude=site["lng"], ), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=None, inventory=None, access=None, parent_organization=None, links=_get_links(site), notes=None, active=None, source=schema.Source( source="immunizenevada_org", id=site["id"], fetched_at=timestamp, fetched_from_uri= "https://www.immunizenevada.org/covid-19-vaccine-locator", data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["USER_Provider_Name"], address=_get_address(site), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=None, inventory=None, access=None, parent_organization=None, links=None, notes=None, active=None, source=schema.Source( source="mo_arcgis", id=site["attributes"]["GlobalID"], fetched_from_uri="https://www.arcgis.com/apps/webappviewer/index.html?id=ab04156a03584e31a14ae2eb36110c20", # noqa: E501 fetched_at=timestamp, published_at=None, data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=f"nyc_arcgis:{site['attributes']['LocationID']}", name=site["attributes"]["FacilityName"], address=schema.Address( street1=site["attributes"]["Address"], street2=site["attributes"]["Address2"], city=site["attributes"]["Borough"], state="NY", zip=site["attributes"]["Zipcode"], ), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), opening_hours=_get_opening_hours(site), availability=_get_availability(site), inventory=_get_inventory(site), access=schema.Access( wheelchair="yes" if site["attributes"]["ADA_Compliant"] == "Yes" else "no"), parent_organization=_get_parent_organization(site), notes=_get_notes(site), source=_get_source(site, timestamp), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["USER_Name"], address=schema.Address( street1=site["attributes"]["USER_Address"], street2=None, city=site["attributes"]["USER_City_Town"], state="RI", zip=site["attributes"]["ZIPCODE"], ), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=None, inventory=_get_inventory(site), access=None, parent_organization=None, links=None, notes=_get_notes(site), active=None, source=schema.Source( source="ri_arcgis", id=site["attributes"]["OBJECTID"], fetched_from_uri="https://rihealth.maps.arcgis.com/apps/instant/nearby/index.html?appid=a25f35833533498bac3f724f92a84b4e", # noqa: E501 fetched_at=timestamp, published_at=None, data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["Facility_Name"], address=schema.Address( street1=site["attributes"]["Address"], street2=site["attributes"]["Address_2"], city=site["attributes"]["City"], state="PA", zip=site["attributes"]["ZIP_Code"], ), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=None, inventory=None, access=None, parent_organization=None, links=None, notes=None, active=None, source=schema.Source( source="pa:arcgis", id=site["attributes"]["Clinic_ID"], fetched_from_uri="https://padoh.maps.arcgis.com/apps/webappviewer/index.html?id=e6f78224c6fe4313a1f70b56f553c357", # noqa: E501 fetched_at=timestamp, published_at=None, data=site, ), )
def _get_location(site: dict) -> Optional[schema.LatLng]: # Sometimes geometry is not included in the site data lat = site["attributes"].get("lat") lon = site["attributes"].get("lon") if lat and lon: return schema.LatLng( latitude=lat, longitude=lon, ) elif site.get("geometry", None): return schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ) else: return None
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: id_ = _get_id(site) return schema.NormalizedLocation( id=id_, name=site["attributes"]["name"], address=_get_address(site), location=schema.LatLng(latitude=site["geometry"]["y"], longitude=site["geometry"]["x"]), contact=_get_contacts(site), opening_hours=_get_opening_hours(site), availability=_get_availability(site), access=_get_access(site), notes=_get_notes(site), source=schema.Source( data=site, fetched_at=timestamp, fetched_from_uri= f"https://adhsgis.maps.arcgis.com/apps/opsdashboard/index.html#/{site['attributes']['service_item_id']}", # noqa: E501 id=id_.split(":")[-1], published_at=_get_published_at(site), source="md_arcgis", ), )
def normalize(site: dict, timestamp: str) -> schema.NormalizedLocation: source_name = "fl_state" return schema.NormalizedLocation( id=f"{source_name}:{_get_id(site)}", name=site["title"], address=schema.Address( street1=site["address"], street2=None, city=site["location"]["city"], state="FL", zip=site["location"].get("postal_code", None), ), location=schema.LatLng( latitude=convert_lat_lng(site["location"]["lat"]), longitude=convert_lat_lng(site["location"]["lng"]), ), contact=_get_contacts(site), notes=_get_notes(site), source=schema.Source( source=source_name, id=site["id"], fetched_from_uri= "https://floridahealthcovid19.gov/vaccines/vaccine-locator/", fetched_at=timestamp, data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["Name"], address=_get_address(site), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=None, inventory=_get_inventory(site), access=None, parent_organization=None, links=None, notes=_get_notes(site), active=None, source=schema.Source( source="in_arcgis", id=site["attributes"]["GlobalID"], fetched_from_uri= "https://experience.arcgis.com/experience/24159814f1dd4f69b6c22e7e87bca65b", # noqa: E501 fetched_at=timestamp, published_at=None, data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["loc_name"], address=_get_address(site), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, # There is an "Appointments" field in the data though it is unclear whether this should be interpreted as # "An appointment is required" or "An appointment is available". Leaving blank as this information # will likely need phone bankers and/or web team to find availability availability=None, inventory=_get_inventory(site), access=None, parent_organization=None, links=None, notes=None, active=_get_activated(site), source=schema.Source( source="sc_arcgis", id=site["attributes"]["GlobalID"], fetched_from_uri= "https://opendata.arcgis.com/datasets/bbd8924909264baaa1a5a1564b393063_0.geojson", # noqa: E501 fetched_at=timestamp, data=site, ), )
def _get_location(site: dict) -> Optional[schema.LatLng]: latitude = site["lat"] longitude = site["long"] if latitude == "" or longitude == "": return None return schema.LatLng( latitude=float(latitude), longitude=float(longitude), )
def _get_location(site: dict) -> Optional[schema.LatLng]: if site["geometry"]["x"] == "NaN": return None x, y = transformer.transform(site["geometry"]["x"], site["geometry"]["y"]) return schema.LatLng( latitude=x, longitude=y, )
def _get_lat_lng(site: dict) -> Optional[schema.LatLng]: lat_lng = schema.LatLng(latitude=site["geometry"]["y"], longitude=site["geometry"]["x"]) if not BOUNDING_BOX.latitude.contains( lat_lng.latitude) or not BOUNDING_BOX.longitude.contains( lat_lng.longitude): return None return lat_lng
def try_get_lat_long(site): location = None try: location = schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ) except KeyError: pass return location
def _get_lat_lng(site: dict) -> Optional[schema.LatLng]: lat_lng = schema.LatLng(latitude=site["geometry"]["y"], longitude=site["geometry"]["x"]) # Some locations in the AZ data set have lat/lng near the south pole. Drop # those values. if not BOUNDING_BOX.latitude.contains( lat_lng.latitude) or not BOUNDING_BOX.longitude.contains( lat_lng.longitude): return None return lat_lng
def _get_location(site: dict) -> Optional[schema.LatLng]: float_pattern = r"-?\d+\.\d+" match = re.search(f"(?P<lng>{float_pattern}) (?P<lat>{float_pattern})", site["location"]) if match: """ "POINT (-73.04276 41.55975)" """ return schema.LatLng( latitude=float(match.group("lat")), longitude=float(match.group("lng")), ) else: return None
def _get_lat_lng(loc: GMVLocation) -> Optional[location.LatLng]: if not loc.position: logger.debug("No lat-lng for location %s (%s)", loc.id, loc.name) return None # Skip positions that are missing a value if not loc.position.latitude or not loc.position.longitude: logger.warning("Skipping position with missing coordinates") return None return location.LatLng( latitude=loc.position.latitude, longitude=loc.position.longitude, )
def normalize(site: dict, timestamp: str) -> schema.NormalizedLocation: source_name = SOURCE_NAME # NOTE: we use `get` where the field is optional in our data source, and # ["key'] access where it is not. return schema.NormalizedLocation( id=f"{source_name}:{_get_id(site)}", name=site["locationName"], address=schema.Address( street1=site.get("addressLine1"), street2=site.get("addressLine2"), city=site.get("city"), state=_get_state(site), zip=_get_good_zip(site), ), location=schema.LatLng(latitude=site["latitude"], longitude=site["longitude"]), contact=_get_contacts(site), notes=site.get("description"), # Since this could be nullable we make sure to only provide it if it's True or False availability=schema.Availability(drop_in=site.get("walkIn")) if site.get("walkIn") is not None else None, access=schema.Access( walk=site.get("walkupSite"), drive=site.get("driveupSite"), wheelchair=_get_wheelchair(site), ), # IF supply_level is UNKNOWN, don't say anything about it inventory=[ schema.Vaccine( vaccine=_get_vaccine_type(vaccine), supply_level=_get_supply_level(site) ) for vaccine in site["vaccineTypes"] if _get_vaccine_type(vaccine) is not None ] if _get_supply_level(site) else None, parent_organization=schema.Organization( id=site.get("providerId"), name=site.get("providerName") ), source=schema.Source( source=source_name, id=site["locationId"], fetched_from_uri="https://apim-vaccs-prod.azure-api.net/web/graphql", fetched_at=timestamp, published_at=site["updatedAt"], data=site, ), )
def _get_normalized_location( site: dict, timestamp: str) -> Optional[schema.NormalizedLocation]: if len(site["attributes"]["loc_name"]) > 256: return None # Contact parsing for this site is a little flaky. Ensure that a bug for # a single entry does not halt overall scraping. try: contacts = _get_contacts(site) except ValidationError: logger.warning( "Errored while trying to parse contact from %s, %s, or %s", site["attributes"]["SitePhone"], site["attributes"]["Contact"], site["attributes"]["URL"], ) contacts = None return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["loc_name"], address=_get_address(site), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=contacts, languages=None, opening_dates=None, opening_hours=None, # There is an "Appointments" field in the data though it is unclear whether this should be interpreted as # "An appointment is required" or "An appointment is available". Leaving blank as this information # will likely need phone bankers and/or web team to find availability availability=None, inventory=_get_inventory(site), access=None, parent_organization=None, links=None, notes=None, active=_get_activated(site), source=schema.Source( source="sc_arcgis", id=site["attributes"]["GlobalID"], fetched_from_uri= "https://opendata.arcgis.com/datasets/bbd8924909264baaa1a5a1564b393063_0.geojson", # noqa: E501 fetched_at=timestamp, data=site, ), )
def normalize(site: dict, timestamp: str) -> schema.NormalizedLocation: source_id = "il_sfsites" name = site["Testing_Center__c"] notes = [] loc = schema.LatLng( latitude=site["Geolocation__Latitude__s"], longitude=site["Geolocation__Longitude__s"], ) addr = _get_address(site) location_id = _id(loc, name, addr) if "Location_Type__c" in site: notes.append(site["Location_Type__c"]) return schema.NormalizedLocation( id=f"{source_id}:{location_id}", name=name, address=addr, location=loc, contact=_get_contact(site), languages=None, opening_dates=None, opening_hours=None, availability=schema.Availability(appointments=True), inventory=None, access=None, parent_organization=_get_parent_organization(name), links=None, notes=notes, active=None, source=schema.Source( source=source_id, id=location_id, fetched_from_uri= "https://coronavirus.illinois.gov/s/vaccination-location", # noqa: E501 fetched_at=timestamp, published_at=None, data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["SiteName"], address=schema.Address( street1=site["attributes"]["AddrLine1"], street2=None, city=site["attributes"]["City"], state=site["attributes"] ["State"], # a few WI locations are included zip=site["attributes"]["Zip"], ), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), languages=None, opening_dates=_get_opening_dates(site), opening_hours=_get_opening_hours(site), availability=_get_availability(site), inventory=None, access=None, parent_organization=None, links=None, notes=_get_notes(site), # The is_Active field only ever says "Active" in the current data. active=True if site["attributes"]["is_Active"] == "Active" else None, source=schema.Source( source="mn_gov", id=site["attributes"]["globalid"], fetched_from_uri= "https://services.arcgis.com/9OIuDHbyhmH91RfZ/arcgis/rest/services/CovidVacLocations_view_prod/FeatureServer/0", # noqa: E501 fetched_at=timestamp, published_at=site["attributes"]["UpdateDate"], data=site, ), )
def _get_lat_lng(geometry: dict, id: str) -> Optional[schema.LatLng]: try: lat_lng = schema.LatLng(latitude=geometry["coordinates"][1], longitude=geometry["coordinates"][0]) if BOUNDING_BOX.latitude.contains( lat_lng.latitude) and BOUNDING_BOX.longitude.contains( lat_lng.longitude): return lat_lng logger.warning( "Out of bounds lat/lng for %s (%s). Returning None", id, f"lat={geometry['coordinates'][1]}, lng={geometry['coordinates'][0]}", ) except pydantic.ValidationError: logger.warning( "Invalid lat/lng for %s (%s). Returning None", id, f"lat={geometry['coordinates'][1]}, lng={geometry['coordinates'][0]}", ) return None
def convert_to_schema(record, timestamp): return location.NormalizedLocation( id=f"{SOURCE_ID}:{record['ProviderID']}", name=record["ProviderName"], address=location.Address( street1=record["Address1"], city=record["City"], state=record["State"], zip=record["Zipcode"], ), location=location.LatLng(latitude=record["LatCoord"], longitude=record["LngCoord"]), active=record["ActiveFlag"], contact=_get_contacts(record) or None, source=location.Source( id=record["ProviderID"], source=SOURCE_ID, fetched_from_uri= "https://vaccinate.iowa.gov/providers/SearchProviders", fetched_at=timestamp, data=record, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["vaccinationSite"], address=schema.Address( street1=site["attributes"]["address"], street2=None, city=site["attributes"]["city"], state="AK", zip=site["attributes"]["zipcode"], ), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=_get_availability(site), inventory=_get_inventory(site), access=None, parent_organization=None, links=None, notes=_get_notes(site), active=None, source=schema.Source( source="ak_arcgis", id=site["attributes"]["globalid"], fetched_from_uri= "https://services1.arcgis.com/WzFsmainVTuD5KML/ArcGIS/rest/services/COVID19_Vaccine_Site_Survey_API/FeatureServer/0", # noqa: E501 fetched_at=timestamp, published_at=_get_published_at(site), data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: # Sometimes geometry is not included in the site data if site.get("geometry", None): location = schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ) else: location = None return schema.NormalizedLocation( id=f"{SOURCE_NAME}:{_get_id(site)}", name=site["attributes"]["NAME"], address=_get_address(site), location=location, contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=_get_availability(site), inventory=_get_inventory(site), access=None, parent_organization=None, links=None, notes=None, active=None, source=schema.Source( source=SOURCE_NAME, id=_get_id(site), fetched_from_uri= "https://tdem.maps.arcgis.com/apps/webappviewer/index.html?id=3700a84845c5470cb0dc3ddace5c376b", # noqa: E501 fetched_at=timestamp, published_at=_get_published_at(site), data=site, ), )
def normalize(site: dict, timestamp: str) -> str: """ input keys: - "organizer": Always "IL-IDPH". - "slug": Unique ID, may be a UUID or something like "7vbvl-29-31". Used in URLs. - "name": Name, often includes dates and vaccine type but no consistent format. - "location": Street address, no consistent format. - "dateFrom": Start time, always formatted like "4/24/2021, 9:30 AM". - "dateTo": End time, may be null, otherwise always formatted like "4/24/2021, 4:30 PM". - "lat": Latitude, may be null. - "lon": Longitude, may be null. - "search": Not interesting, just a bunch of the other fields joined together. """ building, address = _get_building_and_address(site) if site["lat"] and site["lon"]: latlng: Optional[location.LatLng] = location.LatLng( latitude=site["lat"], longitude=site["lon"]) else: latlng = None normalized = location.NormalizedLocation( id=f"il_juvare:{site['slug'].replace('-','_')}", name=_filter_name(building, site), address=address, location=latlng, contact=_get_contact(site), opening_dates=_get_opening_dates(site), opening_hours=_get_opening_hours(site), availability=location.Availability(appointments=True), inventory=_get_inventory(site), access=_get_access(site), active=True, source=_get_source(site, timestamp), ).dict() return normalized