def _get_address(site: dict) -> Optional[schema.Address]: if site["attributes"]["ADDRESS"] is None: return None address_field = site["attributes"]["ADDRESS"].replace(",", "").split(" ") city_starts = 0 city_ends = 0 if site["attributes"]["CITY"] is None: # Some sites put all address data in a single field. # In this case, he data seems to uppercase all characters in the address. # But the city only capitalizes the first letter for index, field in enumerate(address_field): try: if len(field) > 1 and field[1].islower() and city_starts == 0: city_starts = index if field == "TX": city_ends = index if index == len(address_field) - 1 and city_starts == 0: city_starts = index - 1 city_ends = index except IndexError as ie: logger.error("Unable to parse address: %s", ie) return None zip = address_field[-1] if len(zip) < 5: zip = None return schema.Address( street1=" ".join(address_field[0:city_starts]), street2=None, city=" ".join(address_field[city_starts:city_ends]), state=schema.State.TEXAS, zip=zip, ) else: # Sometimes the zip can be None, even though the rest of the address has been entered if site["attributes"]["ZIP"] is None: zip = None else: zip = str(site["attributes"]["ZIP"]) # remove typos if len(zip) < 5: zip = None elif re.match(r"\d{6,}", zip): zip = None elif re.match(r"[a-zA-Z]", zip): zip = None return schema.Address( street1=site["attributes"]["ADDRESS"].strip(), street2=None, city=site["attributes"]["CITY"].strip(), state=schema.State.TEXAS, zip=zip, )
def test_normalize_address(): assert normalize_address(parse_address("qwertyuiop")) == schema.Address( street1="qwertyuiop", street2=None, city=None, state=None, zip=None, ) assert normalize_address( parse_address("1600 Pennsylvania Ave NW, Washington DC, 20500") ) == schema.Address( street1="1600 Pennsylvania Ave NW", city="Washington", state="DC", zip="20500", ) # Superfluous ", USA" removal. assert normalize_address( parse_address("1060 W Addison St, Chicago, IL, USA 60613") ) == schema.Address( street1="1060 W Addison St", city="Chicago", state="IL", zip="60613", ) # Newline handling. assert normalize_address( parse_address("Yosemite Falls\nYosemite Village, CA\n95389\n") ) == schema.Address( street1="Yosemite Falls", street2=None, # This is a weird (but OK) artifact. city="Yosemite Village", state="CA", zip="95389", ) assert normalize_address( parse_address("3720 S Las Vegas Blvd\nSpace 265\nLas Vegas, NV 89158") ) == schema.Address( street1="3720 S Las Vegas Blvd", street2="Space 265", city="Las Vegas", state="NV", zip="89158", )
def normalize(site: dict, timestamp: str) -> schema.NormalizedLocation: source_name = "fl_state" return schema.NormalizedLocation( id=f"{source_name}:{_get_id(site)}", name=site["title"], address=schema.Address( street1=site["address"], street2=None, city=site["location"]["city"], state="FL", zip=site["location"].get("postal_code", None), ), location=schema.LatLng( latitude=convert_lat_lng(site["location"]["lat"]), longitude=convert_lat_lng(site["location"]["lng"]), ), contact=_get_contacts(site), notes=_get_notes(site), source=schema.Source( source=source_name, id=site["id"], fetched_from_uri= "https://floridahealthcovid19.gov/vaccines/vaccine-locator/", fetched_at=timestamp, data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["loc_name"], address=schema.Address( street1=site["attributes"]["addr1"], street2=site["attributes"]["addr2"], city=site["attributes"]["city"], state="AZ", zip=site["attributes"]["zip"], ), location=_get_lat_lng(site), contact=_get_contacts(site), languages=_get_languages(site), opening_dates=_get_opening_dates(site), opening_hours=_get_opening_hours(site), availability=None, inventory=_get_inventory(site), access=None, parent_organization=None, links=None, notes=[site["attributes"]["prereg_comments"]] if site["attributes"]["prereg_comments"] else None, active=None, source=schema.Source( source="az_arcgis", id=site["attributes"]["globalid"], fetched_from_uri= "https://adhsgis.maps.arcgis.com/apps/opsdashboard/index.html#/5d636af4d5134a819833b1a3b906e1b6", # noqa: E501 fetched_at=timestamp, data=site, ), )
def normalize(site: dict) -> schema.NormalizedLocation: """Converts the parsed `site` into a normalized site location.""" name = site.get("name") address = schema.Address( street1=site.get("street1"), street2=site.get("street2"), city=site.get("city"), state=site.get("state"), zip=normalize_zip(site.get("zip")), ) source = schema.Source( source=_SOURCE_NAME, id=_generate_id(name, address), fetched_from_uri=normalize_url(site.get("fetched_from_uri")), published_at=_normalize_date_string(site.get("published_at")), data=site, ) county = site.get("county") opening_times = _normalize_opening_times(site.get("opening_times")) normalized_site = schema.NormalizedLocation( name=name, id=_make_site_id(source), source=source, address=address, active=True, # this source updates weekly opening_dates=opening_times[0] if opening_times else None, opening_hours=opening_times[1] if opening_times else None, notes=[county] if county else None, ) return normalized_site
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["USER_Provider_Name"], address=schema.Address( street1=site["attributes"]["USER_Address"], street2=site["attributes"]["USER_Address_2"], city=site["attributes"]["USER_City"], state=site["attributes"]["USER_State"], zip=site["attributes"]["USER_Zip_Code"], ), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=None, inventory=None, access=None, parent_organization=None, links=None, notes=None, active=None, source=schema.Source( source="mo:arcgis", id=site["attributes"]["GlobalID"], fetched_from_uri="https://www.arcgis.com/apps/webappviewer/index.html?id=ab04156a03584e31a14ae2eb36110c20", # noqa: E501 fetched_at=timestamp, published_at=None, data=site, ), )
def _get_normalized_site(site: dict, timestamp: str) -> schema.NormalizedLocation: name = site["attributes"]["f2"] lat_lng = _get_lat_lng(site) addr = schema.Address( street1=site["attributes"]["f3"], street2=None, city=site["attributes"]["f4"].title(), state=STATE, ) id = _id("8f23e1c3b5c54198ab60d2f729cb787d", lat_lng, name, addr) return schema.NormalizedLocation( id=f"{SOURCE_NAME}:{id}", name=name, address=addr, location=lat_lng, source=schema.Source( source=SOURCE_NAME, id=id, fetched_from_uri=FETCHED_FROM_URI, fetched_at=timestamp, data=site, ), )
def _get_address(site: dict) -> schema.Address: # Example addresses: # # "No 6 Town Plaza Shopping Ctr, Durango, CO" # "1900 18th Street, 1st Floor, Denver, CO" # "2121 S Oneida St Suite 200, Denver, CO, 80224" # # Assuming `street1 "," city "," state ["," zip]`, # where street1 may contain commas street1 = city = zip = None if "address" in site: parts = list(map(lambda part: part.strip(), site["address"].split(","))) zip = parts.pop() if len(parts) == 4 and re.match(r"\d{5}$", parts[3]) else None parts.pop() # "CO" city = parts.pop() street1 = ", ".join(parts) return schema.Address(street1=street1, city=city, state=schema.State.COLORADO, zip=zip)
def add_address_details( self, street_address: Optional[Text] = None, city: Optional[Text] = None, zip: Optional[Text] = None, ) -> None: """Adds the given address information to the current site. If the current site already has one of the provided fields, then starts a fresh site before recording the information. """ # Start a new site if necessary. address = self._current_site.address if address and ((street_address and address.street1) or (city and address.city) or (zip and address.zip)): self.next_site() # Create an Address object. site = self._current_site site.address = site.address or schema.Address(state="AL") # Add the given details. if street_address is not None: site.address.street1 = street_address if city is not None: site.address.city = city if zip is not None: site.address.zip = normalize_zip(zip)
def _get_address(site: dict): return schema.Address( street1=site["address"]["street1"], city=_get_city(site), zip=site["address"]["zip"], state=site["address"]["state"], )
def normalize(site_blob: dict, timestamp: str) -> dict: """ sample entry: {"providerId": 1013, "providerName": "\u1d42**York College - Health and Physical Education Complex - Queens", "vaccineBrand": "Pfizer", "address": "Jamaica, NY", "availableAppointments": "Y", "isShowable": true, "lastUpdated": "2021-04-23T20:04:24"} # noqa: E501 """ name = NAME_CLEAN_RE.sub("", site_blob["providerName"]).strip() city = CITY_RE.search(site_blob["address"]).group(1) appts_available = True if site_blob[ "availableAppointments"] == "Y" else False return schema.NormalizedLocation( id=f"am_i_eligible_covid19vaccine_gov:{site_blob['providerId']}", name=name, address=schema.Address( city=city, state="NY", ), availability=schema.Availability(appointments=appts_available), inventory=_get_inventory(site_blob["vaccineBrand"]), links=[ schema.Link(authority="am_i_eligible_covid19vaccine_gov", id=site_blob["providerId"]), ], source=_get_source(site_blob, timestamp), ).dict()
def _get_normalized_site(site: dict, timestamp: str) -> schema.NormalizedLocation: name = site["attributes"]["SITE_NAME"].title() lat_lng = _get_lat_lng(site) addr = schema.Address( street1=site["attributes"]["Match_addr"], street2=None, city=site["attributes"]["CITY"].title(), state=STATE, zip=str(site["attributes"]["ID_ZIPCODE"]), ) id = _id("51d4c310f1fe4d83a63e2b47acb77898", lat_lng, name, addr) return schema.NormalizedLocation( id=f"{SOURCE_NAME}:{id}", name=name, address=addr, location=lat_lng, source=schema.Source( source=SOURCE_NAME, id=id, fetched_from_uri=FETCHED_FROM_URI, fetched_at=timestamp, data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["NAME"], address=schema.Address( street1=site["attributes"]["ADDRESS"], street2=None, city=site["attributes"]["CITY"], state=site["attributes"]["STATE"], zip=site["attributes"]["ZIP"], ), location=_get_location(site), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=None, inventory=None, access=None, parent_organization=None, links=None, notes=None, active=None, source=schema.Source( source="wi_arcgis_map", id=site["attributes"]["OBJECTID"], fetched_from_uri="https://dhsgis.wi.gov/server/rest/services/DHS_COVID19/COVID19_Vaccine_Provider_Sites/MapServer/0", # noqa: E501 fetched_at=timestamp, published_at=None, data=site, ), )
def _get_address(site: dict) -> schema.Address: CITY_RE = re.compile(r"^(.+?),") ZIP_RE = re.compile(r"\s+(\d{5})") raw_address = site["Description"] sections = raw_address.split("\r\n") # sometimes they just use \n instead of \r\n if len(sections) == 1: sections = raw_address.split("\n") # if there still isn't more than one section, it's likely there isn't an address for this loc if len(sections) == 1: return None adr2 = sections[1] if len(sections) == 3 else None csz_sec = sections[1] if adr2 is None else sections[2] adr1 = sections[0] city = CITY_RE.search(csz_sec).group(1) zip_search = ZIP_RE.search(csz_sec) zipc = None if zip_search is None else zip_search.group(1) # no zip, no valid address if zipc is None: return None else: return schema.Address(street1=adr1, street2=adr2, city=city, state="OK", zip=zipc)
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=f"nyc_arcgis:{site['attributes']['LocationID']}", name=site["attributes"]["FacilityName"], address=schema.Address( street1=site["attributes"]["Address"], street2=site["attributes"]["Address2"], city=site["attributes"]["Borough"], state="NY", zip=site["attributes"]["Zipcode"], ), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), opening_hours=_get_opening_hours(site), availability=_get_availability(site), inventory=_get_inventory(site), access=schema.Access( wheelchair="yes" if site["attributes"]["ADA_Compliant"] == "Yes" else "no"), parent_organization=_get_parent_organization(site), notes=_get_notes(site), source=_get_source(site, timestamp), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["Facility_Name"], address=schema.Address( street1=site["attributes"]["Address"], street2=site["attributes"]["Address_2"], city=site["attributes"]["City"], state="PA", zip=site["attributes"]["ZIP_Code"], ), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=None, inventory=None, access=None, parent_organization=None, links=None, notes=None, active=None, source=schema.Source( source="pa:arcgis", id=site["attributes"]["Clinic_ID"], fetched_from_uri="https://padoh.maps.arcgis.com/apps/webappviewer/index.html?id=e6f78224c6fe4313a1f70b56f553c357", # noqa: E501 fetched_at=timestamp, published_at=None, data=site, ), )
def _get_normalized_location(site: dict, timestamp: str) -> schema.NormalizedLocation: return schema.NormalizedLocation( id=_get_id(site), name=site["attributes"]["USER_Name"], address=schema.Address( street1=site["attributes"]["USER_Address"], street2=None, city=site["attributes"]["USER_City_Town"], state="RI", zip=site["attributes"]["ZIPCODE"], ), location=schema.LatLng( latitude=site["geometry"]["y"], longitude=site["geometry"]["x"], ), contact=_get_contacts(site), languages=None, opening_dates=None, opening_hours=None, availability=None, inventory=_get_inventory(site), access=None, parent_organization=None, links=None, notes=_get_notes(site), active=None, source=schema.Source( source="ri_arcgis", id=site["attributes"]["OBJECTID"], fetched_from_uri="https://rihealth.maps.arcgis.com/apps/instant/nearby/index.html?appid=a25f35833533498bac3f724f92a84b4e", # noqa: E501 fetched_at=timestamp, published_at=None, data=site, ), )
def _get_address(site: dict) -> schema.Address: return schema.Address( street1=site["address_1"], street2=site["address_2"], city=site["city"], state=site["state"], zip=site["postal_code"], )
def _get_address(site: dict): return schema.Address( street1=site["Street Address"], street2=site["Street Address 2"], city=site["City"], state=site["State"], zip=site["Postal Code"], )
def normalize(site: dict, timestamp: str) -> dict: links = [ schema.Link(authority="ct_gov", id=site["_id"]), schema.Link(authority="ct_gov_network_id", id=site["networkId"]), ] parent_organization = schema.Organization(name=site["networks"][0]["name"]) parsed_provider_link = provider_id_from_name(site["name"]) if parsed_provider_link is not None: links.append( schema.Link(authority=parsed_provider_link[0], id=parsed_provider_link[1]) ) parent_organization.id = parsed_provider_link[0] return schema.NormalizedLocation( id=f"ct_gov:{site['_id']}", name=site["displayName"], address=schema.Address( street1=site["addressLine1"], street2=site["addressLine2"], city=site["city"], state="CT", zip=site["zip"], ), location=_get_lat_lng(site), contact=[ schema.Contact( contact_type="booking", phone=site["phone"], website=site["link"] ), ], languages=None, opening_dates=None, opening_hours=None, availability=schema.Availability( appointments=site["availability"], ), inventory=[ schema.Vaccine(vaccine=vaccine["name"]) for vaccine in site["providerVaccines"] ], access=schema.Access( drive=site["isDriveThru"], ), parent_organization=parent_organization, links=links, notes=None, active=None, source=schema.Source( source="covidvaccinefinder_gov", id=site["_id"], fetched_from_uri="https://covidvaccinefinder.ct.gov/api/HttpTriggerGetProvider", # noqa: E501 fetched_at=timestamp, published_at=site["lastModified"], data=site, ), ).dict()
def _get_address(site: dict) -> Optional[schema.Address]: zipc = normalize_zip(site["attributes"]["SiteZip"]) return schema.Address( street1=site["attributes"]["SiteAddress"], street2=site["attributes"]["SiteAddressDetail"], city=site["attributes"]["SiteCity"], state="SC", zip=zipc, )
def _get_address(site: dict) -> Optional[location.Address]: if "address" not in site: return None return location.Address( street1=site["address"], street2=None, city=site["city"], zip=site["zip"], state="NY", )
def _get_address(site: dict) -> Optional[schema.Address]: if "address" not in site or "lines" not in site: return None return schema.Address( street1=site["address"]["lines"][0], street2=None if len(site["address"]["lines"]) < 2 else " / ".join( site["address"]["lines"][1:]), city=_get_city(site), zip=site["address"]["zip"], state=site["address"]["state"], )
def _get_building_and_address( site: dict) -> Tuple[str, Optional[location.Address]]: # Input has no consistent format. Examples: # - "" # - "100 E. Jeffery St. Kankakee, IL 60901" # - "100 S Main St, Crystal Lake, IL 60014 (Enter off Crystal Lake Ave.)" # - "3330 W 177th St \r\nSuite 3F\r\nHazel Crest IL\r\nEmerge MedStaffing" # - "Bushnell-Prairie City High School\r\n845 N Walnut St\r\nBushnell, IL 61422" # The street name often runs together with the city name, like this: # - "3601 W 183rd St Hazel Crest IL 60429". # We look for punctuation or a street type (like "St") at the end of the # street name. # If there's extra text before or after the address, like "Emerge # MedStaffing" or "Bushnell-Prairie City High School", we remove it from # the address and return it separately. It's probably a building name, and # the _filter_name() function may use it as the name of the location. match = re.match( r""" (?P<building>[^0-9]*) # building name (?P<street>[0-9].* # street address, must end with punctuation or street type (\b(ave|avenue|blvd|boulevard|cir|circle|ct|court|dr|drive|hwy|highway|ln|lane|pkwy|parkway|st|street|way)\b\s*|[.,0-9]\s*|[\r\n]) ) (?P<city>(\b\w+[ ]*)+) # city [,]?\s+(IL|Illinois)\b[.,]? # state (\s+(?P<zip>\d{5}(-\d{4})?))? # zip (?P<extra>.*) # building name, instructions, county name, "united states", etc. """, site["location"], re.DOTALL | re.IGNORECASE | re.VERBOSE, ) if not match: return ("", None) address = location.Address( street1=match.group("street").strip(" ,\r\n"), city=match.group("city"), state=location.State.ILLINOIS, zip=match.group("zip"), ) building = match.group("building") or match.group("extra") if re.match( r".*\b(county|united states|enter off crystal lake)\b", building, flags=re.IGNORECASE, ): # Not a building name. building = "" building = building.strip(" ,\r\n@.") return (building, address)
def _get_address(site: dict) -> Optional[schema.Address]: if (parsed_site["attributes"]["SiteAddress"] is None or parsed_site["attributes"]["SiteZip"] is None): return None return schema.Address( street1=site["attributes"]["SiteAddress"], street2=site["attributes"]["SiteAddressDetail"], city=site["attributes"]["SiteCity"], state="SC", zip=site["attributes"]["SiteZip"], )
def _get_address(site: dict) -> schema.Address: address = site["address"] address_split = address.split(", ") adr2 = None if len(address_split) == 3 else address_split[1] return schema.Address( street1=address_split[0], street2=adr2, city=address_split[-2].replace(f" {config['state'].upper()}", ""), state=config["state"].upper(), zip=address_split[-1], )
def _get_address(site: dict) -> schema.Address: address = site["address"] address_split = address.split(", ") adr2 = None if len(address_split) == 3 else address_split[1] return schema.Address( street1=address_split[0], street2=adr2, city=address_split[-2].replace(" WA", ""), state="WA", zip=address_split[-1], )
def _get_address(address: str) -> schema.Address: parts = address.split(",") street2 = None if len(parts) == 5: street2 = parts[1] result = schema.Address( street1=parts[0], street2=street2, city=parts[-3], state=parts[-2], zip=parts[-1], ) return result
def normalize(site: dict, timestamp: str) -> schema.NormalizedLocation: source_name = SOURCE_NAME # NOTE: we use `get` where the field is optional in our data source, and # ["key'] access where it is not. return schema.NormalizedLocation( id=f"{source_name}:{_get_id(site)}", name=site["locationName"], address=schema.Address( street1=site.get("addressLine1"), street2=site.get("addressLine2"), city=site.get("city"), state=_get_state(site), zip=_get_good_zip(site), ), location=schema.LatLng(latitude=site["latitude"], longitude=site["longitude"]), contact=_get_contacts(site), notes=site.get("description"), # Since this could be nullable we make sure to only provide it if it's True or False availability=schema.Availability(drop_in=site.get("walkIn")) if site.get("walkIn") is not None else None, access=schema.Access( walk=site.get("walkupSite"), drive=site.get("driveupSite"), wheelchair=_get_wheelchair(site), ), # IF supply_level is UNKNOWN, don't say anything about it inventory=[ schema.Vaccine( vaccine=_get_vaccine_type(vaccine), supply_level=_get_supply_level(site) ) for vaccine in site["vaccineTypes"] if _get_vaccine_type(vaccine) is not None ] if _get_supply_level(site) else None, parent_organization=schema.Organization( id=site.get("providerId"), name=site.get("providerName") ), source=schema.Source( source=source_name, id=site["locationId"], fetched_from_uri="https://apim-vaccs-prod.azure-api.net/web/graphql", fetched_at=timestamp, published_at=site["updatedAt"], data=site, ), )
def _get_address(site: dict) -> schema.Address: street1 = site["Street__c"] city = site["City__c"] zipc = site["Postal_Code__c"] if not schema.ZIPCODE_RE.match(zipc): zipc = None return schema.Address( street1=street1, street2=None, city=city, state=schema.State.ILLINOIS, zip=zipc, )