def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if site["attributes"]["SitePhone"]: for phone in normalize_phone(site["attributes"]["SitePhone"]): contacts.append(phone) # Contacts seems to be a free text field where people usually enter emails but also sometimes # other stuff like numbers, hours of operation, etc if site["attributes"]["Contact"]: if "@" in site["attributes"]["Contact"]: contacts.append( schema.Contact(contact_type="general", email=site["attributes"]["Contact"])) else: contacts.append( schema.Contact(contact_type="general", other=site["attributes"]["Contact"])) url = site["attributes"]["URL"] if url: url = url if "http" in url else "https://" + url URL_RE = re.compile( r"^((https?):\/\/)(www.)?[a-z0-9]+\.[a-z]+(\/?[a-zA-Z0-9#]+\/?)*$") valid = URL_RE.match(url) if valid: contacts.append(schema.Contact(contact_type="general", website=url)) if len(contacts) > 0: return contacts return None
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if site["attributes"]["Site_Phone"]: for phone in normalize_phone(site["attributes"]["Site_Phone"]): contacts.append(phone) if site["attributes"]["Site_Zotec_Link"]: contacts.append( schema.Contact(website=site["attributes"]["Site_Zotec_Link"])) elif site["attributes"]["Promote_Name"]: # Sometimes Promote_Name also contains URLs. These are probably worse # than Site_Zotec_Link, but if they're all that we have we mine as # well use them promote_name = site["attributes"]["Promote_Name"] # Copied from SO: https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url promote_url_match = re.search( "https?://(www\\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)", promote_name, ) if promote_url_match: contacts.append(schema.Contact(website=promote_url_match.string)) if site["attributes"]["Site_Location_Info"]: contacts.append( schema.Contact(other=site["attributes"]["Site_Location_Info"])) if len(contacts) > 0: return contacts return None
def _get_contact(site: dict) -> List[schema.Contact]: contacts = [] for phone in normalize_phone(site["Description"], "general"): contacts.append(phone) if contacts: return contacts return None
def add_phone(self, phone: Text) -> None: """Adds the given phone number to the current site.""" # It's ok to have multiple phone numbers, # so no need to start fresh if we have a phone number already. site = self._current_site site.contact = site.contact or [] site.contact.extend(normalize_phone(phone, contact_type="booking"))
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if site["attributes"]["USER_Contact_Phone"]: for phone in normalize_phone( site["attributes"]["USER_Contact_Phone"], contact_type="general" ): contacts.append(phone) if site["attributes"]["USER_Contact_Email"]: email = site["attributes"]["USER_Contact_Email"].replace(" ", "") if "." not in email: return if "/" in email: split_email = email.split(" / ") if len(split_email) == 1: split_email = email.split("/") if len(split_email) == 1: return email = split_email[0] contacts.append(schema.Contact(contact_type="general", email=email)) if site["attributes"]["USER_Contact_Website"]: contacts.append( schema.Contact( contact_type="general", website=site["attributes"]["USER_Contact_Website"], ) ) if len(contacts) > 0: return contacts return None
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] phones = normalize_phone(site["attributes"]["phone"]) contacts.extend(phones) website = normalize_url(site["attributes"]["website"]) if website is not None: contacts.append( schema.Contact(website=website, contact_type=schema.ContactType.GENERAL)) if len(contacts) > 0: return contacts return None
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if site["attributes"]["phone"]: for phone in normalize_phone(site["attributes"]["phone"]): contacts.append(phone) if site["attributes"]["email"]: contacts.append(schema.Contact(email=site["attributes"]["email"])) if site["attributes"]["agencyurl"]: contacts.append( schema.Contact(website=site["attributes"]["agencyurl"])) if len(contacts): return contacts return None
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if site["attributes"]["USER_Scheduling_by_Phone"]: for phone in normalize_phone( site["attributes"]["USER_Scheduling_by_Phone"]): contacts.append(phone) if site["attributes"]["USER_Link_to_Sign_Up"]: url = site["attributes"]["USER_Link_to_Sign_Up"].strip() if url is not None and url != "\x08" and url != "-": url = normalize_url(url) contacts.append(schema.Contact(website=url)) if len(contacts) > 0: return contacts return None
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if site["attributes"]["Phone_Number"]: for phone in normalize_phone(site["attributes"]["Phone_Number"]): contacts.append(phone) # if site["attributes"]["publicEmail"]: # contacts.append(schema.Contact(email=site["attributes"]["publicEmail"])) website_contact = _get_website(site) if website_contact: contacts.append(website_contact) if len(contacts) > 0: return contacts return None
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: location_url = _get_filter(site, "locationUrl", "") contacts = [] if site["phoneNumber"]: for phone in normalize_phone(site["phoneNumber"]): contacts.append(phone) def cleanup_url(url): if not url or not url.strip(): return None if url in ["No website", "Website link", "Booking Registration link"]: return None if "@" in url: # Some of these are email addresses. # Skipping those for now. return None url = re.sub(r"^(http|https):/(\w+)", r"\1://\2", url) url = re.sub(r"^www.https://", "https://", url) url = re.sub(r"^https//", "https://", url) url = re.sub(r"^https:(\w+)", r"https://\1", url) url = re.sub(r"^https://wynne apothecary.com", "https://wynneapothecary.com", url) # workaround until samuelcolvin/pydantic#2778 is merged url = url.rstrip("#") if not url.startswith("http"): url = "http://" + url return url if location_url == "healthyguilford.com, conehealth.com/vaccine": contacts.append( schema.Contact.create(website="http://healthyguilford.com")) contacts.append( schema.Contact.create(website="https://conehealth.com/vaccine")) # A few sites have "locationUrl" set to something like this: # `https://myvaccine.fl.gov/ or 866-201-6313` elif match := re.match(r"^(https://\S+) or (\d\d\d-\d\d\d-\d\d\d\d)$", location_url): contacts.append(schema.Contact.create(phone=match[2])) contacts.append(schema.Contact.create(website=match[1]))
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if "contact-phone" in site: for phone in normalize_phone(site["contact-phone"], contact_type="general"): contacts.append(phone) # Filter out sites with a url of "/" if site["url"].startswith("http"): web_contact = schema.Contact( contact_type="general", website=site["url"], ) contacts.append(web_contact) if contacts == []: return None return contacts
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] if site["attributes"]["phone"]: for phone in normalize_phone(site["attributes"]["phone"]): contacts.append(phone) # if site["attributes"]["publicEmail"]: # contacts.append(schema.Contact(email=site["attributes"]["publicEmail"])) # there are multiple urls, vaccine, agency, health dept. etc if site["attributes"]["vaccine_url"]: url = site["attributes"]["vaccine_url"] url = sanitize_url(url) if url: contacts.append(schema.Contact(website=url)) if len(contacts) > 0: return contacts return None
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: contacts = [] phones = normalize_phone(site["attributes"]["PublicPhone"]) for phone in phones: phone = _phone_fixup(phone) if phone is not None: contacts.append( schema.Contact(phone=phone, contact_type=schema.ContactType.GENERAL)) website = _website_fixup(site["attributes"]["WEBSITE"]) if website is not None: contacts.append( schema.Contact(website=website, contact_type=schema.ContactType.GENERAL)) if len(contacts) > 0: return contacts return None
def _get_contacts(site: dict): ret = [] if site["Appointment Phone"]: for phone in normalize_phone(site["Appointment Phone"]): ret.append(phone) url = site["Web Address"] # Some URLs have multiple schemes. valid_url = re.match(r"(https?:\/\/)*(.+)", url) if (url == "http://" or url == "https://" or url == "none" or url == "" or url.startswith("Please email")): return ret elif valid_url is not None: if valid_url.group(1) is None: url = valid_url.group(2) else: url = f"{valid_url.group(1)}{valid_url.group(2)}" url = normalize_url(url) ret.append(schema.Contact(website=url)) else: logger.warning(f"Unknown, invalid URL: {url}") return ret
def test_normalize_phone(): assert normalize_phone("") == [] assert normalize_phone("abc") == [] assert normalize_phone("1234") == [] assert normalize_phone("1234567890") == [ ] # Not a valid phone-like number. assert normalize_phone("212 555 1212") == ["(212) 555-1212"] assert normalize_phone("(212) 555 1212") == ["(212) 555-1212"] assert normalize_phone("212 555 1212 x17") == ["(212) 555-1212 ext. 17"] assert normalize_phone("212 555 1212 OPTION 17") == [ "(212) 555-1212 ext. 17" ] assert normalize_phone("212 555 1212, option 17") == [ "(212) 555-1212 ext. 17" ] assert normalize_phone("212 555 1212 PRESS 17") == [ "(212) 555-1212 ext. 17" ] assert normalize_phone("212 555 1212, press 17 to schedule") == [ "(212) 555-1212 ext. 17" ] assert normalize_phone("212 555 1212 / 212 555 1213") == [ "(212) 555-1212", "(212) 555-1213", ]
def test_normalize_phone(): assert normalize_phone("") == [] assert normalize_phone("abc") == [] assert normalize_phone("1234") == [] assert normalize_phone("1234567890") == [] # Not a valid phone-like number. assert normalize_phone("212 555 1212") == [schema.Contact(phone="(212) 555-1212")] assert normalize_phone("(212) 555 1212") == [schema.Contact(phone="(212) 555-1212")] assert normalize_phone("212 555 1212 ext17") == [ schema.Contact(phone="(212) 555-1212 ext. 17") ] assert normalize_phone("212 555 1212 x17") == [ schema.Contact(phone="(212) 555-1212 ext. 17") ] assert normalize_phone("212 555 1212 OPTION 17") == [ schema.Contact(phone="(212) 555-1212 ext. 17") ] assert normalize_phone("212 555 1212, option 17") == [ schema.Contact(phone="(212) 555-1212 ext. 17") ] assert normalize_phone("212 555 1212 PRESS 17") == [ schema.Contact(phone="(212) 555-1212 ext. 17") ] assert normalize_phone("212 555 1212 press #18") == [ schema.Contact(phone="(212) 555-1212 ext. 18") ] assert normalize_phone("212 555 1212, press 17 to schedule") == [ schema.Contact(phone="(212) 555-1212 ext. 17") ] assert normalize_phone("212 555 1212 / 212 555 1213") == [ schema.Contact(phone="(212) 555-1212"), schema.Contact(phone="(212) 555-1213"), ]
def _get_contacts(site: dict) -> Optional[List[schema.Contact]]: ret = [] if phone := site["attributes"]["phone"]: ret.extend(normalize_phone(phone))