def _get_normalized_site(site: dict, timestamp: str) -> schema.NormalizedLocation: name = site["attributes"]["SITE_NAME"].title() lat_lng = _get_lat_lng(site) addr = schema.Address( street1=site["attributes"]["Match_addr"], street2=None, city=site["attributes"]["CITY"].title(), state=STATE, zip=str(site["attributes"]["ID_ZIPCODE"]), ) id = _id("51d4c310f1fe4d83a63e2b47acb77898", lat_lng, name, addr) return schema.NormalizedLocation( id=f"{SOURCE_NAME}:{id}", name=name, address=addr, location=lat_lng, source=schema.Source( source=SOURCE_NAME, id=id, fetched_from_uri=FETCHED_FROM_URI, fetched_at=timestamp, data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=f"nyc_arcgis:{site['attributes']['LocationID']}", name=site["attributes"]["FacilityName"], address=schema.Address( street1=site["attributes"]["Address"], street2=site["attributes"]["Address2"], city=site["attributes"]["Borough"], state="NY", zip=site["attributes"]["Zipcode"], ), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), opening_hours=_get_opening_hours(site), availability=_get_availability(site), inventory=_get_inventory(site), access=schema.Access( wheelchair="yes" if site["attributes"]["ADA_Compliant"] == "Yes" else "no"), parent_organization=_get_parent_organization(site), notes=_get_notes(site), source=_get_source(site, timestamp), )
def normalize(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=_get_title(site["title"]), address=_get_address(site["address"]), location=schema.LatLng( latitude=site["lat"], longitude=site["lng"], ), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=None, inventory=None, access=None, parent_organization=None, links=_get_links(site), notes=None, active=None, source=schema.Source( source="immunizenevada_org", id=site["id"], fetched_at=timestamp, fetched_from_uri= "https://www.immunizenevada.org/covid-19-vaccine-locator", data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: if site.get("offers_vaccine") == "No": return None return schema.NormalizedLocation( id=f"{SOURCE_NAME}:{_get_id(site)}", name=site["attributes"]["name"], address=_get_address(site), location=try_get_lat_long(site), contact=_get_contacts(site), languages=None, opening_dates=_get_opening_dates(site), opening_hours=_get_opening_hours(site), availability=_get_availability(site), inventory=None, access=_get_access(site), parent_organization=None, links=None, # TODO notes=_get_notes(site), active=_get_active(site), source=schema.Source( source=SOURCE_NAME, id=site["attributes"]["GlobalID"], fetched_from_uri= "https://services.arcgis.com/8ZpVMShClf8U8dae/arcgis/rest/services/Covid19_Vaccination_Locations/FeatureServer/0", # noqa: E501 fetched_at=timestamp, published_at=_get_published_at(site), data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: id_ = _get_id(site) return schema.NormalizedLocation( id=id_, name=site["attributes"]["name"], address=_get_address(site), location=schema.LatLng(latitude=site["geometry"]["y"], longitude=site["geometry"]["x"]), contact=_get_contacts(site), opening_hours=_get_opening_hours(site), availability=_get_availability(site), access=_get_access(site), notes=_get_notes(site), source=schema.Source( data=site, fetched_at=timestamp, fetched_from_uri= f"https://adhsgis.maps.arcgis.com/apps/opsdashboard/index.html#/{site['attributes']['service_item_id']}", # noqa: E501 id=id_.split(":")[-1], published_at=_get_published_at(site), source="md_arcgis", ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=f"{SOURCE_NAME}:{_get_id(site)}", name=site["attributes"]["location"], address=_get_address(site), location=_get_location(site), contact=_get_contacts(site), languages=None, opening_dates=_get_opening_dates(site), opening_hours=_get_opening_hours(site), availability=None, inventory=_get_inventory(site), access=None, parent_organization=_get_parent(site), links=None, notes=_get_notes(site), active=None, source=schema.Source( source=SOURCE_NAME, id=_get_id(site), fetched_from_uri= "https://publichealth.harriscountytx.gov/Resources/2019-Novel-Coronavirus/Register-for-COVID-19-Vaccine", # noqa: E501 fetched_at=timestamp, published_at=_get_published_at(site), data=site, ), )
def normalize(site: dict, timestamp: str) -> schema.NormalizedLocation: source_name = "fl_state" return schema.NormalizedLocation( id=f"{source_name}:{_get_id(site)}", name=site["title"], address=schema.Address( street1=site["address"], street2=None, city=site["location"]["city"], state="FL", zip=site["location"].get("postal_code", None), ), location=schema.LatLng( latitude=convert_lat_lng(site["location"]["lat"]), longitude=convert_lat_lng(site["location"]["lng"]), ), contact=_get_contacts(site), notes=_get_notes(site), source=schema.Source( source=source_name, id=site["id"], fetched_from_uri= "https://floridahealthcovid19.gov/vaccines/vaccine-locator/", fetched_at=timestamp, data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["NAME"], address=schema.Address( street1=site["attributes"]["ADDRESS"], street2=None, city=site["attributes"]["CITY"], state=site["attributes"]["STATE"], zip=site["attributes"]["ZIP"], ), location=_get_location(site), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=None, inventory=None, access=None, parent_organization=None, links=None, notes=None, active=None, source=schema.Source( source="wi_arcgis_map", id=site["attributes"]["OBJECTID"], fetched_from_uri="https://dhsgis.wi.gov/server/rest/services/DHS_COVID19/COVID19_Vaccine_Provider_Sites/MapServer/0", # noqa: E501 fetched_at=timestamp, published_at=None, data=site, ), )
def normalize(site_blob: dict, timestamp: str) -> dict: """ sample entry: {"providerId": 1013, "providerName": "\u1d42**York College - Health and Physical Education Complex - Queens", "vaccineBrand": "Pfizer", "address": "Jamaica, NY", "availableAppointments": "Y", "isShowable": true, "lastUpdated": "2021-04-23T20:04:24"} # noqa: E501 """ name = NAME_CLEAN_RE.sub("", site_blob["providerName"]).strip() city = CITY_RE.search(site_blob["address"]).group(1) appts_available = True if site_blob[ "availableAppointments"] == "Y" else False return schema.NormalizedLocation( id=f"am_i_eligible_covid19vaccine_gov:{site_blob['providerId']}", name=name, address=schema.Address( city=city, state="NY", ), availability=schema.Availability(appointments=appts_available), inventory=_get_inventory(site_blob["vaccineBrand"]), links=[ schema.Link(authority="am_i_eligible_covid19vaccine_gov", id=site_blob["providerId"]), ], source=_get_source(site_blob, timestamp), ).dict()
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["USER_Name"], address=schema.Address( street1=site["attributes"]["USER_Address"], street2=None, city=site["attributes"]["USER_City_Town"], state="RI", zip=site["attributes"]["ZIPCODE"], ), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=None, inventory=_get_inventory(site), access=None, parent_organization=None, links=None, notes=_get_notes(site), active=None, source=schema.Source( source="ri_arcgis", id=site["attributes"]["OBJECTID"], fetched_from_uri="https://rihealth.maps.arcgis.com/apps/instant/nearby/index.html?appid=a25f35833533498bac3f724f92a84b4e", # noqa: E501 fetched_at=timestamp, published_at=None, data=site, ), )
def _get_normalized_site(site: dict, timestamp: str) -> schema.NormalizedLocation: name = site["attributes"]["f2"] lat_lng = _get_lat_lng(site) addr = schema.Address( street1=site["attributes"]["f3"], street2=None, city=site["attributes"]["f4"].title(), state=STATE, ) id = _id("8f23e1c3b5c54198ab60d2f729cb787d", lat_lng, name, addr) return schema.NormalizedLocation( id=f"{SOURCE_NAME}:{id}", name=name, address=addr, location=lat_lng, source=schema.Source( source=SOURCE_NAME, id=id, fetched_from_uri=FETCHED_FROM_URI, fetched_at=timestamp, data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["Name"], address=_get_address(site), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=None, inventory=_get_inventory(site), access=None, parent_organization=None, links=None, notes=_get_notes(site), active=None, source=schema.Source( source="in_arcgis", id=site["attributes"]["GlobalID"], fetched_from_uri= "https://experience.arcgis.com/experience/24159814f1dd4f69b6c22e7e87bca65b", # noqa: E501 fetched_at=timestamp, published_at=None, data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["loc_name"], address=_get_address(site), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, # There is an "Appointments" field in the data though it is unclear whether this should be interpreted as # "An appointment is required" or "An appointment is available". Leaving blank as this information # will likely need phone bankers and/or web team to find availability availability=None, inventory=_get_inventory(site), access=None, parent_organization=None, links=None, notes=None, active=_get_activated(site), source=schema.Source( source="sc_arcgis", id=site["attributes"]["GlobalID"], fetched_from_uri= "https://opendata.arcgis.com/datasets/bbd8924909264baaa1a5a1564b393063_0.geojson", # noqa: E501 fetched_at=timestamp, data=site, ), )
def normalize(site: dict) -> schema.NormalizedLocation: """Converts the parsed `site` into a normalized site location.""" name = site.get("name") address = schema.Address( street1=site.get("street1"), street2=site.get("street2"), city=site.get("city"), state=site.get("state"), zip=normalize_zip(site.get("zip")), ) source = schema.Source( source=_SOURCE_NAME, id=_generate_id(name, address), fetched_from_uri=normalize_url(site.get("fetched_from_uri")), published_at=_normalize_date_string(site.get("published_at")), data=site, ) county = site.get("county") opening_times = _normalize_opening_times(site.get("opening_times")) normalized_site = schema.NormalizedLocation( name=name, id=_make_site_id(source), source=source, address=address, active=True, # this source updates weekly opening_dates=opening_times[0] if opening_times else None, opening_hours=opening_times[1] if opening_times else None, notes=[county] if county else None, ) return normalized_site
def normalize_location( loc: GMVLocation, timestamp: datetime.datetime) -> location.NormalizedLocation: return location.NormalizedLocation( id=f"{SOURCE_NAME}:{loc.id}", name=loc.name, address=_get_address(loc), location=_get_lat_lng(loc), contact=_get_contacts(loc), languages=None, opening_dates=None, opening_hours=None, availability=_get_availability(loc), inventory=_get_inventory(loc), access=None, parent_organization=_get_parent_organization(loc), links=_get_links(loc), notes=None, active=None, source=location.Source( source=SOURCE_NAME, id=loc.id, fetched_from_uri=LOCATIONS_URL, fetched_at=timestamp, published_at=loc.updated_at, data=loc.dict(exclude_none=True), ), )
def _get_normalized_site(site: dict, timestamp: str) -> schema.NormalizedLocation: name = site["attributes"]["f3"] lat_lng = _get_lat_lng(site) id = _id("d1a799c7f98e41fb8c6b4386ca6fe014", lat_lng, name, None) return schema.NormalizedLocation( id=f"{SOURCE_NAME}:{id}", name=name, address=None, location=lat_lng, contact=_get_contact(site), availability=schema.Availability( drop_in=False, appointments=True, ), notes=[site["attributes"]["f5"]], source=schema.Source( source=SOURCE_NAME, id=id, fetched_from_uri=FETCHED_FROM_URI, fetched_at=timestamp, data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["USER_Provider_Name"], address=_get_address(site), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=None, inventory=None, access=None, parent_organization=None, links=None, notes=None, active=None, source=schema.Source( source="mo_arcgis", id=site["attributes"]["GlobalID"], fetched_from_uri="https://www.arcgis.com/apps/webappviewer/index.html?id=ab04156a03584e31a14ae2eb36110c20", # noqa: E501 fetched_at=timestamp, published_at=None, data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["Facility_Name"], address=schema.Address( street1=site["attributes"]["Address"], street2=site["attributes"]["Address_2"], city=site["attributes"]["City"], state="PA", zip=site["attributes"]["ZIP_Code"], ), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=None, inventory=None, access=None, parent_organization=None, links=None, notes=None, active=None, source=schema.Source( source="pa:arcgis", id=site["attributes"]["Clinic_ID"], fetched_from_uri="https://padoh.maps.arcgis.com/apps/webappviewer/index.html?id=e6f78224c6fe4313a1f70b56f553c357", # noqa: E501 fetched_at=timestamp, published_at=None, data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["loc_name"], address=schema.Address( street1=site["attributes"]["addr1"], street2=site["attributes"]["addr2"], city=site["attributes"]["city"], state="AZ", zip=site["attributes"]["zip"], ), location=_get_lat_lng(site), contact=_get_contacts(site), languages=_get_languages(site), opening_dates=_get_opening_dates(site), opening_hours=_get_opening_hours(site), availability=None, inventory=_get_inventory(site), access=None, parent_organization=None, links=None, notes=[site["attributes"]["prereg_comments"]] if site["attributes"]["prereg_comments"] else None, active=None, source=schema.Source( source="az_arcgis", id=site["attributes"]["globalid"], fetched_from_uri= "https://adhsgis.maps.arcgis.com/apps/opsdashboard/index.html#/5d636af4d5134a819833b1a3b906e1b6", # noqa: E501 fetched_at=timestamp, data=site, ), )
def normalize(site: dict, timestamp: str) -> dict: links = [ schema.Link(authority="ct_gov", id=site["_id"]), schema.Link(authority="ct_gov_network_id", id=site["networkId"]), ] parent_organization = schema.Organization(name=site["networks"][0]["name"]) parsed_provider_link = provider_id_from_name(site["name"]) if parsed_provider_link is not None: links.append( schema.Link(authority=parsed_provider_link[0], id=parsed_provider_link[1]) ) parent_organization.id = parsed_provider_link[0] return schema.NormalizedLocation( id=f"ct_gov:{site['_id']}", name=site["displayName"], address=schema.Address( street1=site["addressLine1"], street2=site["addressLine2"], city=site["city"], state="CT", zip=site["zip"], ), location=_get_lat_lng(site), contact=[ schema.Contact( contact_type="booking", phone=site["phone"], website=site["link"] ), ], languages=None, opening_dates=None, opening_hours=None, availability=schema.Availability( appointments=site["availability"], ), inventory=[ schema.Vaccine(vaccine=vaccine["name"]) for vaccine in site["providerVaccines"] ], access=schema.Access( drive=site["isDriveThru"], ), parent_organization=parent_organization, links=links, notes=None, active=None, source=schema.Source( source="covidvaccinefinder_gov", id=site["_id"], fetched_from_uri="https://covidvaccinefinder.ct.gov/api/HttpTriggerGetProvider", # noqa: E501 fetched_at=timestamp, published_at=site["lastModified"], data=site, ), ).dict()
def minimal_location(): return location.NormalizedLocation( id="source:id", source=location.Source( source="source", id="id", data={"id": "id"}, ), )
def normalize(site: dict, timestamp: str) -> str: normalized = schema.NormalizedLocation( id=("me_maine_gov:" + _get_id(site)), name=_get_name(site), contact=_get_contacts(site), source=_get_source(site, timestamp), parent_organization=_get_organization(site), notes=_get_notes(site), ).dict() return normalized
def normalize(site: dict, timestamp: str) -> dict: normalized = schema.NormalizedLocation( id=(f"{SOURCE_NAME}:{_get_id(site)}"), name=_get_name(site), address=_get_address(site), contact=_get_contacts(site), opening_hours=_get_open_hours(site), notes=_get_notes(site), source=_get_source(site, timestamp), ).dict() return normalized
def normalize(site: dict, timestamp: str) -> str: normalized = schema.NormalizedLocation( id=f"{RUNNER_ID}:{_get_id(site)}", name=_get_name(site), address=_get_address(site), contact=_get_contacts(site), inventory=_get_inventories(site), parent_organization=_get_organization(site), source=_get_source(site, timestamp), ).dict() return normalized
def normalize(site: dict, timestamp: str) -> dict: normalized = schema.NormalizedLocation( id=(f"{SOURCE_NAME}:{_get_id(site)}"), name=_get_name(site), address=_get_address(site), location=_get_location(site), contact=_get_contacts(site), inventory=_get_inventories(site), source=_get_source(site, timestamp), ).dict() return normalized
def test_invalid_location_id(invalid_id): parts = invalid_id.split(":") if len(parts) == 2: source, id = parts else: source = "test_source" id = invalid_id with pytest.raises(pydantic.error_wrappers.ValidationError): location.NormalizedLocation( id=invalid_id, source=location.Source(source=source, id=id, data={}) )
def normalize(site: dict, timestamp: str) -> str: normalized = schema.NormalizedLocation( id=_get_id(site), name=_get_name(site), address=_get_address(site), location=_get_location(site), contact=_get_contacts(site), opening_dates=_get_opening_dates(site), inventory=_get_inventories(site), parent_organization=_get_organization(site), notes=_get_notes(site), source=_get_source(site, timestamp), ).dict() return normalized
def normalize(site: dict, timestamp: str) -> dict: normalized = schema.NormalizedLocation( id=("tx_memorialhermann:" + _get_id(site)), name=_get_name(site), address=_get_address(site), opening_hours=_get_hours(site), inventory=_get_vaccine(site), notes=_get_notes(site), availability=schema.Availability( drop_in=True), # The webpage is for walk-in clinics source=_get_source(site, timestamp), ).dict() return normalized
def _get_normalized_location( site: dict, timestamp: str) -> Optional[schema.NormalizedLocation]: if len(site["attributes"]["loc_name"]) > 256: return None # Contact parsing for this site is a little flaky. Ensure that a bug for # a single entry does not halt overall scraping. try: contacts = _get_contacts(site) except ValidationError: logger.warning( "Errored while trying to parse contact from %s, %s, or %s", site["attributes"]["SitePhone"], site["attributes"]["Contact"], site["attributes"]["URL"], ) contacts = None return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["loc_name"], address=_get_address(site), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=contacts, languages=None, opening_dates=None, opening_hours=None, # There is an "Appointments" field in the data though it is unclear whether this should be interpreted as # "An appointment is required" or "An appointment is available". Leaving blank as this information # will likely need phone bankers and/or web team to find availability availability=None, inventory=_get_inventory(site), access=None, parent_organization=None, links=None, notes=None, active=_get_activated(site), source=schema.Source( source="sc_arcgis", id=site["attributes"]["GlobalID"], fetched_from_uri= "https://opendata.arcgis.com/datasets/bbd8924909264baaa1a5a1564b393063_0.geojson", # noqa: E501 fetched_at=timestamp, data=site, ), )
def normalize(site: dict, timestamp: str) -> schema.NormalizedLocation: source_name = SOURCE_NAME # NOTE: we use `get` where the field is optional in our data source, and # ["key'] access where it is not. return schema.NormalizedLocation( id=f"{source_name}:{_get_id(site)}", name=site["locationName"], address=schema.Address( street1=site.get("addressLine1"), street2=site.get("addressLine2"), city=site.get("city"), state=_get_state(site), zip=_get_good_zip(site), ), location=schema.LatLng(latitude=site["latitude"], longitude=site["longitude"]), contact=_get_contacts(site), notes=site.get("description"), # Since this could be nullable we make sure to only provide it if it's True or False availability=schema.Availability(drop_in=site.get("walkIn")) if site.get("walkIn") is not None else None, access=schema.Access( walk=site.get("walkupSite"), drive=site.get("driveupSite"), wheelchair=_get_wheelchair(site), ), # IF supply_level is UNKNOWN, don't say anything about it inventory=[ schema.Vaccine( vaccine=_get_vaccine_type(vaccine), supply_level=_get_supply_level(site) ) for vaccine in site["vaccineTypes"] if _get_vaccine_type(vaccine) is not None ] if _get_supply_level(site) else None, parent_organization=schema.Organization( id=site.get("providerId"), name=site.get("providerName") ), source=schema.Source( source=source_name, id=site["locationId"], fetched_from_uri="https://apim-vaccs-prod.azure-api.net/web/graphql", fetched_at=timestamp, published_at=site["updatedAt"], data=site, ), )