def test_opening_hours(): assert location.OpenHour( day="monday", opens="08:00", closes="14:00", ) with pytest.raises(pydantic.error_wrappers.ValidationError): location.OpenHour(day="monday", opens="8h", closes="14:00") with pytest.raises(pydantic.error_wrappers.ValidationError): location.OpenHour( day="mon", opens="08:00", closes="14:00", ) with pytest.raises(pydantic.error_wrappers.ValidationError): location.OpenHour( day="monday", opens="20:00", closes="06:00", ) with pytest.raises(pydantic.error_wrappers.ValidationError): location.OpenHour( day="monday", opens="2021-01-01T08:00:00", closes="14:00", )
def _get_opening_hours(site: dict) -> Optional[List[schema.OpenHour]]: """ "24 Hours" "Mon - Fri 7 a.m. - 11 a.m." "Mon - Fri 7 a.m. - 7 p.m." "Mon - Fri 7 am - 7 pm" "Mon - Fri 7am - 7 pm" "Mon - Fri 8 a.m. - 4:30 p.m." "Mon - Fri 8 a.m. - 5 p.m. " "Mon - Fri 8 a.m. - 5 p.m. covid testing Tues. 9 a.m. - 3 p.m. covid vaccine Wed and Fri 9 a.m. - 3 p.m." "Mon - Fri 8 a.m. - 5 p.m. covid testing Wed 9 a.m. - 3 p.m. this week. Tues. 9 a.m. - 3 p.m. next week. covid vaccine Mon, Wed, Thurs, Fri and Saturday this week" "Mon - Fri 8 a.m. - 5 p.m." "Mon - Fri 8:30 a.m. - 4:30 p.m.covid hotline Mon - Sat 8:30 a.m. - 4:30 p.m. and Sun 8:30 a.m. - 2 p.m." "Mon - Fri 8:30 a.m. - 6:30 p.m. Sat. 9 a.m. - 1 p.m." "Mon, Tues, Thurs, Fri: 7:30am - 3pm, Wed: 3pm - 7pm" "Mon- Fri 7 a.m. - 11 a.m. and 2 PM-4 PM " "Mon- Fri 7 a.m. - 11 a.m." "Sun 10am - 4pm, Mon - Fri 10am - 7pm, Sat 10am - 5pm" "Sun 10am - 5pm, Mon - Fri 8am - 9pm, Sat 9am - 6pm" "Sun 10am - 5pm, Mon - Fri 9am - 9pm, Sat 9am - 6pm" "Sun 10am - 6pm, Mon - Fri 8am - 9pm, Sat 9am - 6pm" "Sun 9am - 5pm, Mon - Fri 8am - 10pm, Sat 9am - 7pm" "Sun 9am - 5pm, Mon - Fri 8am - 9pm, Sat 9am - 7pm" "Varies" "Wed - Sat 10am to 6pm" "A limited number of walk-up appointments are available Monday through Saturday, from 10 a.m. - 12 p.m." "Limited walk-up, no appointment slots available Mon - Tues 9 a.m. - 1 p.m., Wed - Thurs 2 p.m. - 6 p.m., Fri - Sat 11 a.m. - 3 p.m." "Limited walk-up, no appointment slots available Weds -Sun 10 a.m. - 2 p.m." "Limited walk-up, no appointment slots available each day 10 a.m. - 2 p.m." null """ all_hours = [] # TODO: try to merge operationalhours and WalkUpHours ranges that overlap? for attr in ("operationalhours", "WalkUpHours"): raw = site["attributes"][attr] if not raw: continue processed = raw.lower().strip() if processed == "24 hours": all_hours.extend([ schema.OpenHour(day=d, opens="00:00", closes="23:59") for d in ALL_DAYS ]) continue for piece in _pieces(_strip_flavor_text(processed)): result = _parse_days_and_hours(piece) if result: days, hours = result all_hours.extend([ schema.OpenHour(day=d, opens=hours[0], closes=hours[1]) for d in days ]) return all_hours or None
def _normalize_hours(human_readable_hours: Optional[str], day: str) -> List[schema.OpenHour]: processed_hours = human_readable_hours if processed_hours is None: return [] processed_hours = processed_hours.upper() if processed_hours == "8:00AM7:00PM": return [schema.OpenHour(day=day, opens="08:00", closes="19:00")] processed_hours = re.sub("^BY APPOINTMENT", "", processed_hours).strip() if " AND " in processed_hours: ranges = processed_hours.split(" AND ") return sum( (_normalize_hours(hours_range, day) for hours_range in ranges), []) if ";" in processed_hours: ranges = processed_hours.split(";") return sum( (_normalize_hours(hours_range, day) for hours_range in ranges), []) if " TO " in processed_hours: processed_hours = processed_hours.replace(" TO ", "-") if processed_hours.count("-") != 1: logger.warning("unparseable hours: '%s'", human_readable_hours) return [] open_time, close_time = [ x.strip() for x in re.split(r"\s*-\s*", processed_hours) ] opens = _normalize_time(open_time) closes = _normalize_time(close_time) if opens > closes: if not re.search(r"[AP]\.?M\.?$", close_time): # handle the "9-5" case, where the AM/PM is implied closes = closes.replace(hour=closes.hour + 12) elif len(re.findall(r"P\.?M\.?", processed_hours)) == 2: # handle the "10PM - 5PM" typo cases opens = opens.replace(hour=opens.hour - 12) try: return [ schema.OpenHour( day=day, opens=opens.isoformat("minutes"), closes=closes.isoformat("minutes"), ) ] except ValueError: logger.warning("unparseable hours: '%s'", human_readable_hours) return []
def _normalize_hours(human_readable_hours: Optional[str], day: str) -> List[schema.OpenHour]: processed_hours = human_readable_hours if processed_hours is None: return [] if processed_hours == "8-4": return [schema.OpenHour(day=day, open="08:00", closes="16:00")] if processed_hours == "8:00AM7:00PM": return [schema.OpenHour(day=day, open="08:00", closes="16:00")] processed_hours = processed_hours.upper().lstrip("BY APPOINTMENT ").strip() if " AND " in processed_hours: ranges = processed_hours.split(" AND ") return sum( (_normalize_hours(hours_range, day) for hours_range in ranges), []) if ";" in processed_hours: ranges = processed_hours.split(";") return sum( (_normalize_hours(hours_range, day) for hours_range in ranges), []) if " TO " in processed_hours: processed_hours = processed_hours.replace(" TO ", "-") if processed_hours.count("-") != 1: logger.warning("unparseable hours: '%s'", human_readable_hours) return [] open_time, close_time = processed_hours.split("-") try: return [ schema.OpenHour( day=day, open=_normalize_time(open_time.strip().upper()), closes=_normalize_time(close_time.strip().upper()), ) ] except ValueError: logger.warning("unparseable hours: '%s'", human_readable_hours) return []
def _get_opening_hours(site: dict) -> location.OpenHour: opens = _parse_datetime(site["dateFrom"]) closes = _parse_datetime(site["dateTo"]) if opens and closes and opens.date() == closes.date(): return [ location.OpenHour( day=opens.strftime("%A").lower(), opens=opens.time().isoformat("minutes"), closes=closes.time().isoformat("minutes"), ) ] # If the location is open more than one day, we can't figure out which days # and times it's open from the source we have. We would need to scrape more # information. return None
def _get_opening_hours(site: dict) -> Optional[List[schema.OpenHour]]: weekdays = [ "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", ] hours = [] for day in weekdays: open_time = site["attributes"][f"HoursStart_{day}"] close_time = site["attributes"][f"ClosingHour_{day}"] if open_time != -1 and close_time != -1: if close_time == 2400: close_time = 2359 if open_time == 2400: open_time = 0000 if close_time < open_time: close_time += 1200 # at this point, we'd be guessing. they could be flipped, could be an error, who knows. if close_time < open_time or close_time > 2400 or open_time > 2400: continue open_time = str(open_time) close_time = str(close_time) while len(open_time) <= 3: open_time = "0" + open_time while len(close_time) <= 3: close_time = "0" + close_time opens = f"{open_time[0:2]}:{open_time[2:4]}" closes = f"{close_time[0:2]}:{close_time[2:4]}" hours.append( schema.OpenHour( day=day.lower(), opens=opens, closes=closes, )) if len(hours) > 0: return hours return None
def _normalize_opening_times( opening_times: Optional[Dict[Text, List[Tuple[Text, Text]]]], ) -> Optional[Tuple[List[schema.OpenDate], List[schema.OpenHour]]]: """Normalizes the given opening times for a single vaccine site. Expects opening times as a dict, where each key is a YYYY-MM-DD date string, and each value is a list of pairs of [open time, close time]. Returns a pair of (opening_dates, opening_hours) suitable for the normalized schema, or None if the dates are invalid. """ if not opening_times or not isinstance(opening_times, dict): return None # The normalized schema associates opening hours with a day of the week, not a specific date. # ASSUMPTION: the parsed data won't contain, for example, # different opening hours for the same site on two different Mondays. # The DHHR data is produced for one week at a time, so is expected to satisfy this assumption. # If this assumption goes wrong, we'll have multiple conflicting entries for opening hours on the same day. opening_dates: List[datetime] = [] opening_hours: List[schema.OpenHour] = [] for date_string, time_windows in opening_times.items(): normalized_date = _normalize_date(date_string) if normalized_date is None: # Can't do much with an invalid date, and we've logged it already. continue opening_dates.append(normalized_date) day_lowercase = normalized_date.strftime("%A").lower() for time_window in time_windows: # Can have multiple opening windows for a single day. # This assumes that downstream processing can handle # multiple OpenHour objects with the same day of the week # but different open/close times. open_time, close_time = map(_normalize_time, time_window) if open_time and close_time: opening_hours.append( schema.OpenHour(day=day_lowercase, opens=open_time, closes=close_time)) else: # Shouldn't happen, log and carry on. logger.warning( "Invalid time window %s, normalized as %s", time_window, [open_time, close_time], ) return _make_opening_dates_contiguous(opening_dates), opening_hours
def _get_opening_hours(site: dict) -> List[schema.OpenHour]: date = site["date"] time = site["hours"] time_split = time.split(" - ") date_dt = datetime.datetime.strptime(date, "%m/%d/%Y") time_start = datetime.datetime.strptime(time_split[0], "%I:%M %p") time_end = datetime.datetime.strptime(time_split[1], "%I:%M %p") return [ schema.OpenHour( day=calendar.day_name[date_dt.weekday()], open=time_start.strftime("%H:%M"), closes=time_end.strftime("%H:%M"), ) ]
def _get_open_hours(site: dict) -> List[schema.OpenHour]: raw_hours = site["WIC Hours"] raw_times = raw_hours.split(" ")[-1] raw_open = raw_times.split("-")[0] opens = (f"{raw_open[:-2]}:00:00" if raw_open[-2:] == "am" else f"{int(raw_open[:-2]) + 12}:00:00") raw_close = raw_times.split("-")[1] closes = (f"{raw_close[:-2]}:00:00" if raw_close[-2:] == "am" else f"{int(raw_close[:-2]) + 12}:00:00") raw_hours = " ".join(raw_hours.split(" ")[:-1]) days_of_week = [] if raw_hours[-12:] == "of the Month": days_of_week = [raw_hours.split(" ")[-4].lower()] elif "to" in raw_hours: start_day_of_week = raw_hours.split(" to ")[0].lower() end_day_of_week = raw_hours.split(" to ")[1].lower() in_range = False for day in [ schema.DayOfWeek.SUNDAY, schema.DayOfWeek.MONDAY, schema.DayOfWeek.TUESDAY, schema.DayOfWeek.WEDNESDAY, schema.DayOfWeek.THURSDAY, schema.DayOfWeek.FRIDAY, schema.DayOfWeek.SATURDAY, ]: if day == start_day_of_week: in_range = True if in_range: days_of_week.append(day) if day == end_day_of_week: in_range = False else: days_of_week = [raw_hours.rstrip("s").lower()] return [ schema.OpenHour(day=day, opens=opens, closes=closes) for day in days_of_week ]
def _get_hours(site: dict) -> List[schema.OpenHour]: hour_list = site["hours"] open_hours = [] for day_of_week in hour_list: days = day_of_week[0] times = day_of_week[1] days = days.split("–") schema_days = [] if len(days) == 2: day1 = days[0].strip() day2 = days[-1].strip() day1_index = DAYS.index(day1) day2_index = DAYS.index(day2) # Could do this with modular arithmetic, but it would be less readable if day1_index <= day2_index: for i in range(day1_index, day2_index + 1): schema_days.append(_get_schema_day(DAYS[i])) else: for i in range(day1_index, len(DAYS)): schema_days.append(_get_schema_day(DAYS[i])) for i in range(0, day2_index + 1): schema_days.append(_get_schema_day(DAYS[i])) elif len(days) == 1: schema_day = _get_schema_day(days[0].strip()) schema_days.append(schema_day) for time in times: start_time, end_time = _parse_time_string(time) for schema_day in schema_days: open_hour = schema.OpenHour(day=schema_day, opens=start_time, closes=end_time) open_hours.append(open_hour) return open_hours
def _get_opening_hours(site: dict) -> Optional[List[schema.OpenHour]]: open_hours = [] def _parse_time(t: str) -> datetime.time: # example: "8:30am" hh, mmxx = t.split(":") if hh == "12": hh = "0" return datetime.time(int(hh) + (12 if mmxx[2] == "p" else 0), int(mmxx[:2])) for day in [ "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", ]: start = site["attributes"][f"{day}Start"] end = site["attributes"][f"{day}End"] if start and end and (start != "Closed") and (end != "Closed"): try: open_hours.append( schema.OpenHour( day=day.lower(), opens=_parse_time(start), closes=_parse_time(end), ) ) except ValidationError as e: logger.info("ignoring incorrect open hours: %r", e) if open_hours: return open_hours return None
def _get_opening_hours(site: dict) -> Optional[List[schema.OpenHour]]: opening_hours = [] days_hours = site["Normal Days / Hours"].lower() # short-circuit if it's an entry with dates, not days if not re.search(DAYS_OF_WEEK_PATTERN, days_hours): return None # there's "."s after abbreviated days of the week in the one with # different vaccines on different days. just nuke 'em. if re.search(VACCINES_PATTERN, days_hours): days_hours = days_hours.replace(".", "").replace(" & ", "/") # split on the parenthesized vaccine if it's there, otherwise the whole # of `days_hours` is a single `piece` for piece in re.split(fr"\s*\((?:{VACCINES_PATTERN})\)\s*", days_hours): if not piece: continue if re.search( f"({DAYS_OF_WEEK_PATTERN})(/({DAYS_OF_WEEK_PATTERN}))+", piece, ): """ "Tues/Wed/Fri \n10am-4pm" "Tues/Wed/Fri 10am - 4pm" """ days_match = re.search( f"(?P<days>(?:{DAYS_OF_WEEK_PATTERN})(?:/(?:{DAYS_OF_WEEK_PATTERN}))+)", piece, ) days = _normalize_days( re.split(r"\s*/\s*", days_match.group("days"))) for hours_range in HOURS_RANGE_RE.findall(piece): opens, closes = [ _normalize_time(*m) for m in HOURS_RE.findall(hours_range) ] opening_hours.extend([ schema.OpenHour(day=d, opens=opens.isoformat(), closes=closes.isoformat()) for d in days ]) elif re.search( rf"({DAYS_OF_WEEK_PATTERN})\s*-\s*({DAYS_OF_WEEK_PATTERN})", piece): """ "Thursday-Sunday \n8am-12pm, 1pm-5pm" "Monday-Thursday \n9am-3pm" "Monday-Saturday \n9am-5pm" "Tuesday - Sunday\n10am - 2pm" """ days_match = re.search( rf"(?P<days>({DAYS_OF_WEEK_PATTERN})\s*-\s*({DAYS_OF_WEEK_PATTERN}))", piece, ) start_day, end_day = _normalize_days( re.split(r"\s*-\s*", days_match.group("days"))) start_idx = DOUBLE_DAYS.index(start_day) end_idx = DOUBLE_DAYS.index(end_day, start_idx) days = DOUBLE_DAYS[start_idx:end_idx + 1] for hours_range in HOURS_RANGE_RE.findall(piece): opens, closes = [ _normalize_time(*m) for m in HOURS_RE.findall(hours_range) ] opening_hours.extend([ schema.OpenHour(day=d, opens=opens.isoformat(), closes=closes.isoformat()) for d in days ]) else: logger.info(f'Unable to parse opening hours from "{piece}"') return opening_hours or None
def full_location(): return location.NormalizedLocation( id="source:dad13365-2202-4dea-9b37-535288b524fe", name="Rite Aid Pharmacy 5952", address=location.Address( street1="1991 Mountain Boulevard", city="Oakland", state="CA", zip="94611", ), location=location.LatLng( latitude=37.8273167, longitude=-122.2105179, ), contact=[ location.Contact( contact_type=location.ContactType.BOOKING, phone="(916) 445-2841", ), location.Contact( contact_type=location.ContactType.GENERAL, phone="(510) 339-2215", ), ], languages=["en"], opening_dates=[ location.OpenDate( opens="2021-04-01", closes="2021-04-01", ), ], opening_hours=[ location.OpenHour( day=location.DayOfWeek.MONDAY, opens="08:00", closes="14:00", ), ], availability=location.Availability( drop_in=False, appointments=True, ), inventory=[ location.Vaccine( vaccine=location.VaccineType.MODERNA, supply_level=location.VaccineSupply.IN_STOCK, ), ], access=location.Access( walk=True, drive=False, wheelchair=location.WheelchairAccessLevel.PARTIAL, ), parent_organization=location.Organization( id=location.VaccineProvider.RITE_AID, name="Rite Aid Pharmacy", ), links=[ location.Link( authority=location.LocationAuthority.GOOGLE_PLACES, id="ChIJA0MOOYWHj4ARW8M-vrC9yGA", ), ], notes=["long note goes here"], active=True, source=location.Source( source="source", id="dad13365-2202-4dea-9b37-535288b524fe", fetched_from_uri="https://example.org", fetched_at="2020-04-04T04:04:04.4444", published_at="2020-04-04T04:04:04.4444", data={"id": "dad13365-2202-4dea-9b37-535288b524fe"}, ), )
def test_valid_location(): # Minimal record assert location.NormalizedLocation( id="source:id", source=location.Source( source="source", id="id", data={"id": "id"}, ), ) # Full record with str enums full_loc = location.NormalizedLocation( id="source:id", name="name", address=location.Address( street1="1991 Mountain Boulevard", street2="#1", city="Oakland", state="CA", zip="94611", ), location=location.LatLng( latitude=37.8273167, longitude=-122.2105179, ), contact=[ location.Contact( contact_type="booking", phone="(916) 445-2841", ) ], languages=["en"], opening_dates=[ location.OpenDate( opens="2021-04-01", closes="2021-04-01", ), ], opening_hours=[ location.OpenHour( day="monday", opens="08:00", closes="14:00", ), ], availability=location.Availability( drop_in=False, appointments=True, ), inventory=[ location.Vaccine( vaccine="moderna", supply_level="in_stock", ), ], access=location.Access( walk=True, drive=False, wheelchair="partial", ), parent_organization=location.Organization( id="rite_aid", name="Rite Aid", ), links=[ location.Link( authority="google_places", id="abc123", ), ], notes=["note"], active=True, source=location.Source( source="source", id="id", fetched_from_uri="https://example.org", fetched_at="2020-04-04T04:04:04.4444", published_at="2020-04-04T04:04:04.4444", data={"id": "id"}, ), ) assert full_loc # Verify dict serde full_loc_dict = full_loc.dict() assert full_loc_dict parsed_full_loc = location.NormalizedLocation.parse_obj(full_loc_dict) assert parsed_full_loc assert parsed_full_loc == full_loc # Verify json serde full_loc_json = full_loc.json() assert full_loc_json parsed_full_loc = location.NormalizedLocation.parse_raw(full_loc_json) assert parsed_full_loc assert parsed_full_loc == full_loc # Verify dict->json serde full_loc_json_dumps = json.dumps(full_loc_dict) assert full_loc_json_dumps assert full_loc_json_dumps == full_loc_json parsed_full_loc = location.NormalizedLocation.parse_raw( full_loc_json_dumps) assert parsed_full_loc assert parsed_full_loc == full_loc