def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if "phone" in site["contact"] and re.match( r"^\(\d{3}\) \d{3}-\d{4}$", site["contact"]["phone"] ): contacts.append( schema.Contact(contact_type="general", phone=site["contact"]["phone"]) ) if "website" in site["contact"]: uri = site["contact"]["website"] if uri[0:7] == "mailto:": contacts.append(schema.Contact(contact_type="general", email=uri[7:])) else: contacts.append( schema.Contact( contact_type="general", website=site["contact"]["website"] ) ) if len(contacts) > 0: return contacts return None
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if site["attributes"]["prereg_phone"]: matches = list( re.finditer( r"(?P<area_code>\d\d\d)\)?-? ?(?P<rest_of_number>\d\d\d-\d\d\d\d)", site["attributes"]["prereg_phone"], )) if not matches: logger.warning("unparseable phone number: '%s'", site["attributes"]["prereg_phone"]) return None for match in matches: phone = f"({match.group('area_code')}) {match.group('rest_of_number')}" contacts.append(schema.Contact(contact_type="general", phone=phone)) website = site["attributes"]["prereg_website"] if website: # this edge case... website = website.replace("htttp", "http") if "http" not in website: website = "https://" + website website = website.replace(" ", "") contacts.append(schema.Contact(contact_type="general", website=website)) if len(contacts) > 0: return contacts return None
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if site["attributes"]["prereg_phone"]: matches = list( re.finditer( r"(?P<area_code>\d\d\d)\)?-? ?(?P<rest_of_number>\d\d\d-\d\d\d\d)", site["attributes"]["prereg_phone"], )) if not matches: logger.warning("unparseable phone number: '%s'", site["attributes"]["prereg_phone"]) return None for match in matches: phone = f"({match.group('area_code')}) {match.group('rest_of_number')}" contacts.append(schema.Contact(phone=phone)) if site["attributes"]["prereg_website"]: contacts.append( schema.Contact(website=site["attributes"]["prereg_website"])) if len(contacts) > 0: return contacts return None
def _get_contacts(site: dict): ret = [] if site["Appointment Phone"] != "": raw_phone = str(site["Appointment Phone"]).lstrip("1") if raw_phone[3] == "-" or raw_phone[7] == "-": phone = "(" + raw_phone[0:3] + ") " + raw_phone[ 4:7] + "-" + raw_phone[8:12] phone_notes = raw_phone[12:] elif len(raw_phone) == 10: phone = "(" + raw_phone[0:3] + ") " + raw_phone[ 3:6] + "-" + raw_phone[6:10] phone_notes = "" else: phone = raw_phone[0:14] phone_notes = raw_phone[14:] if phone_notes == "": ret.append(schema.Contact(phone=phone)) else: phone_notes = phone_notes.lstrip(",") phone_notes = phone_notes.lstrip(";") phone_notes = phone_notes.lstrip(" ") ret.append( schema.Contact(phone=phone, other=f"phone_notes:{phone_notes}")) if site["Web Address"] != "": ret.append(schema.Contact(website=site["Web Address"])) return ret
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if site["attributes"]["SitePhone"]: for phone in normalize_phone(site["attributes"]["SitePhone"]): contacts.append(phone) # Contacts seems to be a free text field where people usually enter emails but also sometimes # other stuff like numbers, hours of operation, etc if site["attributes"]["Contact"]: if "@" in site["attributes"]["Contact"]: contacts.append( schema.Contact(contact_type="general", email=site["attributes"]["Contact"])) else: contacts.append( schema.Contact(contact_type="general", other=site["attributes"]["Contact"])) url = site["attributes"]["URL"] if url: url = url if "http" in url else "https://" + url URL_RE = re.compile( r"^((https?):\/\/)(www.)?[a-z0-9]+\.[a-z]+(\/?[a-zA-Z0-9#]+\/?)*$") valid = URL_RE.match(url) if valid: contacts.append(schema.Contact(contact_type="general", website=url)) if len(contacts) > 0: return contacts return None
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if site["attributes"]["Site_Phone"]: for phone in normalize_phone(site["attributes"]["Site_Phone"]): contacts.append(phone) if site["attributes"]["Site_Zotec_Link"]: contacts.append( schema.Contact(website=site["attributes"]["Site_Zotec_Link"])) elif site["attributes"]["Promote_Name"]: # Sometimes Promote_Name also contains URLs. These are probably worse # than Site_Zotec_Link, but if they're all that we have we mine as # well use them promote_name = site["attributes"]["Promote_Name"] # Copied from SO: https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url promote_url_match = re.search( "https?://(www\\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)", promote_name, ) if promote_url_match: contacts.append(schema.Contact(website=promote_url_match.string)) if site["attributes"]["Site_Location_Info"]: contacts.append( schema.Contact(other=site["attributes"]["Site_Location_Info"])) if len(contacts) > 0: return contacts return None
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if site["attributes"]["SitePhone"]: sourcePhone = site["attributes"]["SitePhone"].lower() # Some numbers in the data have extensions (e.g. 1-855-222-0083 ext 513) which we # are currently not capturing because schema doesn't seem to have space for it if "ext" in sourcePhone: sourcePhone = sourcePhone.split("ext")[0] sourcePhone = re.sub("[^0-9]", "", sourcePhone) if len(sourcePhone) == 11: sourcePhone = sourcePhone[1:] phone = f"({sourcePhone[0:3]}) {sourcePhone[3:6]}-{sourcePhone[6:]}" contacts.append(schema.Contact(phone=phone)) # Contacts seems to be a free text field where people usually enter emails but also sometimes # other stuff like numbers, hours of operation, etc if site["attributes"]["Contact"]: if "@" in site["attributes"]["Contact"]: contacts.append( schema.Contact(email=site["attributes"]["Contact"])) else: contacts.append( schema.Contact(other=site["attributes"]["Contact"])) if site["attributes"]["URL"]: contacts.append(schema.Contact(website=site["attributes"]["URL"])) if len(contacts) > 0: return contacts return None
def test_valid_contact(): assert location.Contact( contact_type=location.ContactType.BOOKING, email="*****@*****.**", ) assert location.Contact(website="https://example.com") assert location.Contact(phone="(510) 555-5555")
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if site["attributes"]["USER_Contact_Phone"]: for phone in normalize_phone( site["attributes"]["USER_Contact_Phone"], contact_type="general" ): contacts.append(phone) if site["attributes"]["USER_Contact_Email"]: email = site["attributes"]["USER_Contact_Email"].replace(" ", "") if "." not in email: return if "/" in email: split_email = email.split(" / ") if len(split_email) == 1: split_email = email.split("/") if len(split_email) == 1: return email = split_email[0] contacts.append(schema.Contact(contact_type="general", email=email)) if site["attributes"]["USER_Contact_Website"]: contacts.append( schema.Contact( contact_type="general", website=site["attributes"]["USER_Contact_Website"], ) ) if len(contacts) > 0: return contacts return None
def _get_contacts(site: dict): ret = [] if "register_phone" in site: raw_phone = site["register_phone"] if raw_phone: raw_phone = raw_phone.lstrip("tel:") raw_phone = raw_phone.lstrip(" ") raw_phone = raw_phone.lstrip("1") raw_phone = raw_phone.lstrip("-") raw_phone = raw_phone.lstrip(" ") if raw_phone[3] == "-" or raw_phone[7] == "-": phone = f"({raw_phone[0:3]}) {raw_phone[4:7]}-{raw_phone[8:12]}" phone_notes = raw_phone[12:] elif len(raw_phone) == 10: phone = f"({raw_phone[0:3]}) {raw_phone[3:6]}-{raw_phone[6:10]}" phone_notes = "" else: phone = raw_phone[0:14] phone_notes = raw_phone[14:] phone_notes = phone_notes.lstrip(",") phone_notes = phone_notes.lstrip(";") phone_notes = phone_notes.lstrip(" ") ret.append(schema.Contact(phone=phone, contact_type="booking")) if "register_online_url" in site: website = site["register_online_url"] if website: ret.append(schema.Contact(website=website, contact_type="booking")) return ret
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if site["attributes"]["phone"]: sourcePhone = re.sub("[^0-9]", "", site["attributes"]["phone"]) if len(sourcePhone) == 11: sourcePhone = sourcePhone[1:] # TODO: handle 3-digit phone numbers like 211, 411 .etc if len(sourcePhone) == 10: phone = f"({sourcePhone[0:3]}) {sourcePhone[3:6]}-{sourcePhone[6:]}" contacts.append(schema.Contact(phone=phone)) # if site["attributes"]["publicEmail"]: # contacts.append(schema.Contact(email=site["attributes"]["publicEmail"])) # there are multiple urls, vaccine, agency, health dept. etc if site["attributes"]["vaccine_url"]: url = site["attributes"]["vaccine_url"] url = sanitize_url(url) if url: contacts.append(schema.Contact(website=url)) if len(contacts) > 0: return contacts return None
def _get_contacts(loc: GMVLocation) -> Optional[List[location.Contact]]: contacts = [] if loc.info_phone: contacts.append( location.Contact( contact_type=location.ContactType.GENERAL, phone=loc.info_phone, )) if loc.info_url: contacts.append( location.Contact( contact_type=location.ContactType.GENERAL, website=loc.info_url, )) if loc.booking_phone: contacts.append( location.Contact( contact_type=location.ContactType.BOOKING, phone=loc.booking_phone, )) if loc.booking_url: contacts.append( location.Contact( contact_type=location.ContactType.BOOKING, website=loc.booking_url, )) if not contacts: return None return contacts
def test_raises_on_invalid_contact(): with pytest.raises(pydantic.error_wrappers.ValidationError): location.Contact(contact_type=location.ContactType.GENERAL) with pytest.raises(pydantic.error_wrappers.ValidationError): location.Contact(contact_type="invalid", email="*****@*****.**") with pytest.raises(pydantic.error_wrappers.ValidationError): location.Contact(email="*****@*****.**", website="https://example.com")
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if "ExtendedData" in site: if "phone" in site["ExtendedData"] and site["ExtendedData"]["phone"]: phone = re.sub(r"\D", "", site["ExtendedData"]["phone"]) if re.match(r"1?\d{10}$", phone): if len(phone) == 11: phone = phone[1:] contacts.append( schema.Contact( contact_type=schema.ContactType.GENERAL, phone=f"({phone[0:3]}) {phone[3:6]}-{phone[6:10]}", )) if "website" in site["ExtendedData"] and site["ExtendedData"][ "website"]: for website in _normalize_websites( site["ExtendedData"]["website"]): contacts.append( schema.Contact(contact_type=schema.ContactType.GENERAL, website=website)) if ("vaccine sign up" in site["ExtendedData"] and site["ExtendedData"]["vaccine sign up"]): for website in _normalize_websites( site["ExtendedData"]["vaccine sign up"]): contacts.append( schema.Contact(contact_type=schema.ContactType.BOOKING, website=website)) else: # Community vaccination sites have no "ExtendedData"; the contact info is in free-form notes contacts.append( schema.Contact( contact_type=schema.ContactType.GENERAL, other="\n".join(site["description"]), )) if len(contacts) == 0: # Try to guess the website for major brands website = _get_provider_store_page(site) if website: contacts.append( schema.Contact(contact_type=schema.ContactType.GENERAL, website=website)) if len(contacts) > 0: return contacts return None
def _get_contact(site: dict) -> List[schema.Contact]: contacts = [] phone = site["phone"] website = site["link"] if phone: contacts.append(schema.Contact(contact_type="booking", phone=phone)) if website: contacts.append(schema.Contact(contact_type="booking", website=website)) return contacts
def _get_contacts(site: dict) -> List[schema.Contact]: # From the docs for this data source: # These are nullable fields, but one is "guaranteed" to be non nullable phone, email, scheduling_link = ( site.get("phone"), site.get("email"), site.get("schedulingLink"), ) info_link = site.get("infoLink") try: phone_contact = (schema.Contact( contact_type="booking", phone=phone, ) if phone is not None else None) except ValidationError: logger.warning(f"Invalid phone contact {phone}") phone_contact = None try: email_contact = (schema.Contact( contact_type="booking", email=email, ) if email is not None else None) except ValidationError: logger.warning(f"Invalid email contact {email}") email_contact = None try: website_contact = (schema.Contact( contact_type="booking", website=scheduling_link, ) if scheduling_link is not None else None) except ValidationError: logger.warning(f"Invalid scheduling_link contact {scheduling_link}") website_contact = None try: general_contact = (schema.Contact( contact_type="general", website=info_link) if info_link else None) except ValidationError: logger.warning(f"Invalid info_link contact {info_link}") general_contact = None return [ contact for contact in (phone_contact, email_contact, website_contact, general_contact) if contact is not None ]
def _get_contacts(site: dict): ret = [] if site["Appointment Phone"] != "": ret.extend(_parse_phone_numbers(site["Appointment Phone"])) url = site["Web Address"] # Some URLs have multiple schemes. valid_url = re.match(r"(https?:\/\/)*(.+)", url) if ( url == "http://" or url == "https://" or url == "none" or url == "" or url.startswith("Please email") ): return ret elif valid_url is not None: if valid_url.group(1) is None: url = valid_url.group(2) else: url = f"{valid_url.group(1)}{valid_url.group(2)}" url = normalize_url(url) ret.append(schema.Contact(website=url)) else: logger.warning(f"Unknown, invalid URL: {url}") return ret
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] records_with_bad_urls = [ "38327", "38316", "38328", "38465", "38852", "39047", "39048", "39072", "39519", "39520", "40071", ] if site["id"] not in records_with_bad_urls and site["location"][ "extra_fields"].get("website", None): url = site["location"]["extra_fields"]["website"] if not url.startswith("http://") and not url.startswith("https://"): # stuff a scheme at the beginning if one is missing or # typo-ed url = re.sub(r"^(.*?:(//)?)?", "http://", url) # some URLs have spaces in them url = urllib.parse.quote(url, safe=":/") contacts.append(schema.Contact(website=url)) if len(contacts) > 0: return contacts return None
def _get_contact(site: dict) -> schema.Contact: PHONE_RE = re.compile(r"(?<=o: )(.*)(?= <)") PHONE_FORMAT_RE = re.compile(r"([0-9]{3}).{0,1}([0-9]{3}).{0,1}([0-9]{4})") raw_address = site["Description"] sections = raw_address.split("\r\n") contact_section = sections[-1] phone_num_search = PHONE_RE.search(contact_section) if phone_num_search is None: return None # sometimes the numbers come formatted in some way - the replacement is undoing that formatting phone_num_string = (phone_num_search.group(1).replace("(", "").replace( ")", "").replace("\xa0", " ").replace(" ", "-")) phone_num_format = PHONE_FORMAT_RE.search(phone_num_string) # less than 10 digit number if phone_num_format is None: return None phone_num = f"({phone_num_format.group(1)}) {phone_num_format.group(2)}-{phone_num_format.group(3)}" return [schema.Contact(contact_type="general", phone=phone_num)]
def _get_contacts(record): contacts = [] try: if record.get("websiteurl"): contacts.append(location.Contact(website=record["websiteurl"])) except ValidationError as e: logger.error( f"Ignoring invalid website '{record['websiteurl']}':\n{str(e)}") try: if record.get("Phone"): contacts.append(location.Contact(phone=record["Phone"])) except ValidationError as e: logger.error( f"Ignoring invalid phone number '{record['Phone']}':\n{str(e)}") return contacts
def _get_contact(site: dict) -> List[schema.Contact]: contacts = [] url = site["Website__c"] formatted_url = url if " " in url: match = URL_RE.search(url) if match and match.group(1): formatted_url = match.group(1) if not formatted_url.startswith("http"): formatted_url = "http://" + url try: contacts.append( schema.Contact(website=formatted_url, contact_type=schema.ContactType.BOOKING)) except pydantic.ValidationError: logger.warning( "Invalid website for id: %s, value: %s. Returning empty Contact", site["Id"], url, ) return contacts
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if site["attributes"]["USER_Scheduling_by_Phone"]: sourcePhone = re.sub("[^0-9]", "", site["attributes"]["USER_Scheduling_by_Phone"]) if len(sourcePhone) == 10: phone = f"({sourcePhone[0:3]}) {sourcePhone[3:6]}-{sourcePhone[6:]}" contacts.append(schema.Contact(phone=phone)) if site["attributes"]["USER_Link_to_Sign_Up"]: contacts.append( schema.Contact(website=site["attributes"]["USER_Link_to_Sign_Up"])) if len(contacts) > 0: return contacts return None
def _get_contact(site: dict) -> Optional[List[schema.Contact]]: click_here_field = site["attributes"]["f6"] regex = re.search(r"(?P<url>https?://[^\s'\"]+)", click_here_field) if regex: url = regex.group("url") return [schema.Contact(contact_type="booking", website=url)] else: return None
def _get_contact(config: dict, site: dict) -> List[schema.Contact]: return [ schema.Contact( contact_type="booking", website= f"{config['url']}/appointment/en/client/registration?clinic_id={site['clinic_id']}", ) ]
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if site["attributes"]["phone"]: for phone in normalize_phone(site["attributes"]["phone"]): contacts.append(phone) if site["attributes"]["email"]: contacts.append(schema.Contact(email=site["attributes"]["email"])) if site["attributes"]["agencyurl"]: contacts.append( schema.Contact(website=site["attributes"]["agencyurl"])) if len(contacts): return contacts return None
def _get_contacts(site: dict) -> List[location.Contact]: """Northwell provides a program_url, which we'll store as the contact website""" ret = [] if "program_url" in site and site["program_url"]: program_url = str(site["program_url"]) ret.append( location.Contact(contact_type="booking", website=program_url)) return ret
def _get_contacts(site: dict) -> List[schema.Contact]: ret = [] for raw_phone in site["phoneNumber"]: general_phone = _normalize_phone(raw_phone) if general_phone is not None and "?" not in general_phone: ret.append(schema.Contact(phone=general_phone, contact_type="general")) for website in site["website"]: ret.append(schema.Contact(website=website, contact_type="general")) scheduling_info_raw = site["schedulingInfo"] website_matches = re.search('href="(http.*)"', scheduling_info_raw) if website_matches: website = website_matches.group(1).split(" ")[0] website = website.replace('"', "") # remove quote marks from urls website = normalize_url(website) else: website = None phone_matches = re.search( "tel:([-() \\d]*)", scheduling_info_raw.replace("\u2013", "-") ) # .replace() replaces en dash with ASCII '-', for better regex if phone_matches: raw_phone = phone_matches.group(1) else: phone_matches = re.search( "(\\d\\d\\d-\\d\\d\\d-\\d\\d\\d\\d)", scheduling_info_raw.replace("\u2013", "-"), ) # .replace() replaces en dash with ASCII '-', for better regex if phone_matches: raw_phone = phone_matches.group(1) elif "1-800-Walgreens" in scheduling_info_raw: raw_phone = "(800) 925-4733" else: raw_phone = "" booking_phone = _normalize_phone(raw_phone) if booking_phone is not None and "?" not in booking_phone: ret.append(schema.Contact(contact_type="booking", phone=booking_phone)) if website is not None: ret.append(schema.Contact(contact_type="booking", website=website)) ret.append(schema.Contact(contact_type="booking", other=scheduling_info_raw)) return ret
def _get_contact(site: dict) -> List[schema.Contact]: return [ schema.Contact( contact_type="booking", website= f"https://prepmod.doh.wa.gov/client/registration?clinic_id={site['clinic_id']}", ) ]
def normalize(site: dict, timestamp: str) -> dict: links = [ schema.Link(authority="ct_gov", id=site["_id"]), schema.Link(authority="ct_gov_network_id", id=site["networkId"]), ] parent_organization = schema.Organization(name=site["networks"][0]["name"]) parsed_provider_link = provider_id_from_name(site["name"]) if parsed_provider_link is not None: links.append( schema.Link(authority=parsed_provider_link[0], id=parsed_provider_link[1]) ) parent_organization.id = parsed_provider_link[0] return schema.NormalizedLocation( id=f"ct_gov:{site['_id']}", name=site["displayName"], address=schema.Address( street1=site["addressLine1"], street2=site["addressLine2"], city=site["city"], state="CT", zip=site["zip"], ), location=_get_lat_lng(site), contact=[ schema.Contact( contact_type="booking", phone=site["phone"], website=site["link"] ), ], languages=None, opening_dates=None, opening_hours=None, availability=schema.Availability( appointments=site["availability"], ), inventory=[ schema.Vaccine(vaccine=vaccine["name"]) for vaccine in site["providerVaccines"] ], access=schema.Access( drive=site["isDriveThru"], ), parent_organization=parent_organization, links=links, notes=None, active=None, source=schema.Source( source="covidvaccinefinder_gov", id=site["_id"], fetched_from_uri="https://covidvaccinefinder.ct.gov/api/HttpTriggerGetProvider", # noqa: E501 fetched_at=timestamp, published_at=site["lastModified"], data=site, ), ).dict()
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] phone = _phone_fixup(site["attributes"]["PublicPhone"]) if phone is not None: contacts.append( schema.Contact(phone=phone, contact_type=schema.ContactType.GENERAL)) website = _website_fixup(site["attributes"]["WEBSITE"]) if website is not None: contacts.append( schema.Contact(website=website, contact_type=schema.ContactType.GENERAL)) if len(contacts) > 0: return contacts return None