def convert_csv_data_to_center_info(data: dict) -> CenterInfo: name = data.get('nom', None) departement = '' ville = '' url = data.get('rdv_site_web', None) try: departement = departementUtils.to_departement_number( data.get('com_insee', None)) except ValueError as e: logger.error( f"erreur lors du traitement de la ligne avec le gid {data['gid']}, com_insee={data['com_insee']} : {e}" ) center = CenterInfo(departement, name, url) if data.get('iterator', '') == 'ordoclic': return convert_ordoclic_to_center_info(data, center) center.fill_localization(convert_csv_data_to_location(data)) center.metadata = dict() center.metadata['address'] = convert_csv_address(data) if data.get('rdv_tel'): center.metadata['phone_number'] = format_phone_number( data.get('rdv_tel')) if data.get('phone_number'): center.metadata['phone_number'] = format_phone_number( data.get('phone_number')) center.metadata['business_hours'] = convert_csv_business_hours(data) return center
def convert_csv_data_to_center_info(data: dict) -> CenterInfo: name = data.get("nom", None) departement = "" ville = "" url = data.get("rdv_site_web", None) try: departement = departementUtils.to_departement_number( data.get("com_insee", None)) except ValueError as e: logger.error( f"erreur lors du traitement de la ligne avec le gid {data['gid']}, com_insee={data['com_insee']} : {e}" ) center = CenterInfo(departement, name, url) if data.get("iterator", "") == "ordoclic": return convert_ordoclic_to_center_info(data, center) center.fill_localization(convert_csv_data_to_location(data)) center.metadata = dict() center.metadata["address"] = convert_csv_address(data) if data.get("rdv_tel"): center.metadata["phone_number"] = format_phone_number( data.get("rdv_tel")) if data.get("phone_number"): center.metadata["phone_number"] = format_phone_number( data.get("phone_number")) center.metadata["business_hours"] = convert_csv_business_hours(data) return center
def _metadata_from_csv_data(data: dict) -> dict: metadata = { "address": convert_csv_address(data), "business_hours": convert_csv_business_hours(data) } if data.get("rdv_tel"): metadata.update( {"phone_number": format_phone_number(data.get("rdv_tel"))}) if data.get("phone_number"): metadata.update({ "phone_number": format_phone_number(data.get("phone_number")) }) return metadata
def get_dict_infos_center_page(url_path: str) -> dict: internal_api_url = BOOKING_URL.format( parse.urlsplit(url_path).path.split("/")[-1]) logger.info(f"> Parsing {internal_api_url}") data = requests.get(internal_api_url) data.raise_for_status() output = data.json().get('data', {}) # Parse place places = output.get('places', {}) if places: place = find_place(places, url_path) # Parse place location infos_page = {} infos_page['gid'] = 'd{0}'.format( output.get('profile', {}).get('id', '')) infos_page['address'] = place['full_address'] infos_page['long_coor1'] = place.get('longitude') infos_page['lat_coor1'] = place.get('latitude') infos_page["com_insee"] = departementUtils.cp_to_insee( place["zipcode"]) # Parse landline number if place.get('landline_number'): phone_number = place.get('landline_number') else: phone_number = place.get('phone_number') if phone_number: infos_page['phone_number'] = format_phone_number(phone_number) infos_page["business_hours"] = parse_doctolib_business_hours(place) return infos_page else: return {}
def test_format_phone_number(): phone_number = "+331204312" assert format_phone_number(phone_number) == "+331204312" phone_number = "+569492392" assert format_phone_number(phone_number) == "+569492392" phone_number = "0123456789" assert format_phone_number(phone_number) == "+33123456789" phone_number = "01.20.43.12" assert format_phone_number(phone_number) == "+331204312" phone_number = "3975" assert format_phone_number(phone_number) == "+333975"
def get_center_details(center): start_date = datetime.date.today() end_date = datetime.date.today() + datetime.timedelta( NUMBER_OF_SCRAPED_DAYS) request_url = SLOTS_URL.format(pharmacy_id=f'{center["id"]}/', start_date=start_date, end_date=end_date) try: r = httpx.Client().get(request_url, headers=BIMEDOC_HEADERS) r.raise_for_status() center_details = r.json() if r.status_code != 200: logger.error( f"Can't access API center details - {r.status_code} => {json.loads(r.text)}" ) else: useless_keys = [ "slots", "id", "postcode", "coordinates", "city", "street", "building_number", "name" ] logger.info( f'[Bimedoc] Found Center {center_details["name"]} ({center_details["postcode"]})' ) center_details["rdv_site_web"] = APPOINTMENT_URL.format( pharmacy_id=center_details["id"]) center_details["platform_is"] = PLATFORM center_details["gid"] = f'bimedoc{center_details["id"]}' center_details["nom"] = center_details["name"] center_details["com_insee"] = departementUtils.cp_to_insee( center_details["postcode"]) long_coor1, lat_coor1 = get_coordinates(center_details) address = f'{center_details["street"]}, {center_details["postcode"]} {center_details["city"]}' center_details["address"] = address center_details["long_coor1"] = long_coor1 center_details["lat_coor1"] = lat_coor1 center_details["type"] = set_center_type("pharmacie") center_details["phone_number"] = format_phone_number( center_details["phone_number"]) center_details["vaccine_names"] = [ get_vaccine_name(vaccine).value for vaccine in center_details["vaccine_names"] if vaccine ] [ center_details.pop(key) for key in list(center_details.keys()) if key in useless_keys ] except httpx.HTTPError as exc: logger.error( f"Can't access API center details for URL {exc.request.url} - {exc}" ) return None return center_details
def maiia_center_to_csv(center: dict, root_center: dict) -> dict: if "url" not in center: logger.warning(f"url not found - {center}") csv = dict() csv["gid"] = center.get("id")[:8] csv["nom"] = center.get("name") csv["rdv_site_web"] = f'{MAIIA_URL}{center["url"]}?centerid={center["id"]}' if "pharmacie" in center["url"]: csv["type"] = DRUG_STORE else: csv["type"] = VACCINATION_CENTER csv["vaccine_type"] = [] for consultation_reason in root_center["consultationReasons"]: vaccine_name = get_vaccine_name(consultation_reason.get("name")) if vaccine_name and vaccine_name not in csv["vaccine_type"]: csv["vaccine_type"].append(vaccine_name) if "publicInformation" not in center: return csv if "address" in center["publicInformation"]: zip = center["publicInformation"]["address"].get("zipCode") csv["com_cp"] = zip csv["com_insee"] = center["publicInformation"]["address"].get( "inseeCode", "") if len(csv["com_insee"]) < 5: csv["com_insee"] = departementUtils.cp_to_insee(zip) csv["address"] = center["publicInformation"]["address"].get( "fullAddress") if "location" in center["publicInformation"]["address"]: csv["long_coor1"] = center["publicInformation"]["address"][ "location"]["coordinates"][0] csv["lat_coor1"] = center["publicInformation"]["address"][ "location"]["coordinates"][1] elif ("locality" in center["publicInformation"]["address"] and "location" in center["publicInformation"]["address"]["locality"]): csv["long_coor1"] = center["publicInformation"]["address"][ "locality"]["location"]["x"] csv["lat_coor1"] = center["publicInformation"]["address"][ "locality"]["location"]["y"] if "officeInformation" in center["publicInformation"]: csv["phone_number"] = format_phone_number( center["publicInformation"]["officeInformation"].get( "phoneNumber", "")) if "openingSchedules" in center["publicInformation"][ "officeInformation"]: csv["business_hours"] = maiia_schedule_to_business_hours( center["publicInformation"]["officeInformation"] ["openingSchedules"]) return csv
def convert_ordoclic_to_center_info(data: dict, center: CenterInfo) -> CenterInfo: localization = data.get("location") coordinates = localization.get("coordinates") if coordinates["lon"] or coordinates["lat"]: city = urlify(localization.get("city")) loc = CenterLocation(coordinates["lon"], coordinates["lat"], city) center.fill_localization(loc) center.metadata = dict() center.metadata["address"] = f'{localization["address"]}, {localization["zip"]} {localization["city"]}' if len(data.get("phone_number", "")) > 3: center.metadata["phone_number"] = format_phone_number(data.get("phone_number")) center.metadata["business_hours"] = None return center
def convert_ordoclic_to_center_info(data: dict, center: CenterInfo) -> CenterInfo: localization = data.get('location') coordinates = localization.get('coordinates') if coordinates['lon'] or coordinates['lat']: city = urlify(localization.get('city')) loc = CenterLocation(coordinates['lon'], coordinates['lat'], city) center.fill_localization(loc) center.metadata = dict() center.metadata['address'] = f'{localization["address"]}, {localization["zip"]} {localization["city"]}' if len(data.get('phone_number', '')) > 3: center.metadata['phone_number'] = format_phone_number(data.get('phone_number')) center.metadata['business_hours'] = None return center
def maiia_center_to_csv(center: dict, root_center: dict) -> dict: if 'url' not in center: logger.warning(f'url not found - {center}') csv = dict() csv['gid'] = center.get('id')[:8] csv['nom'] = center.get('name') csv['rdv_site_web'] = f'{MAIIA_URL}{center["url"]}?centerid={center["id"]}' if 'pharmacie' in center['url']: csv['type'] = DRUG_STORE else: csv['type'] = VACCINATION_CENTER csv['vaccine_type'] = [] for consultation_reason in root_center['consultationReasons']: vaccine_name = get_vaccine_name(consultation_reason.get('name')) if vaccine_name and vaccine_name not in csv['vaccine_type']: csv['vaccine_type'].append(vaccine_name) if 'publicInformation' not in center: return csv if 'address' in center['publicInformation']: csv['com_insee'] = center['publicInformation']['address'].get( 'inseeCode', '') if len(csv['com_insee']) < 5: zip = center['publicInformation']['address'].get('zipCode') csv['com_insee'] = departementUtils.cp_to_insee(zip) csv['address'] = center['publicInformation']['address'].get( 'fullAddress') if 'location' in center['publicInformation']['address']: csv['long_coor1'] = center['publicInformation']['address'][ 'location']['coordinates'][0] csv['lat_coor1'] = center['publicInformation']['address'][ 'location']['coordinates'][1] elif 'locality' in center['publicInformation']['address'] \ and 'location' in center['publicInformation']['address']['locality']: csv['long_coor1'] = center['publicInformation']['address'][ 'locality']['location']['x'] csv['lat_coor1'] = center['publicInformation']['address'][ 'locality']['location']['y'] if 'officeInformation' in center['publicInformation']: csv['phone_number'] = format_phone_number( center['publicInformation']['officeInformation'].get( 'phoneNumber', '')) if 'openingSchedules' in center['publicInformation'][ 'officeInformation']: csv['business_hours'] = maiia_schedule_to_business_hours( center['publicInformation']['officeInformation'] ['openingSchedules']) return csv
def get_dict_infos_center_page(url_path: str) -> dict: internal_api_url = BOOKING_URL.format( centre=parse.urlsplit(url_path).path.split("/")[-1]) logger.info(f"> Parsing {internal_api_url}") liste_infos_page = [] output = None try: req = requests.get(internal_api_url) req.raise_for_status() data = req.json() output = data.get("data", {}) except: logger.warn(f"> Could not retrieve data from {internal_api_url}") return liste_infos_page # Parse place places = output.get("places", {}) for place in places: infos_page = {} # Parse place location infos_page["gid"] = "d{0}".format( output.get("profile", {}).get("id", "")) infos_page["place_id"] = place["id"] infos_page["address"] = place["full_address"] infos_page["ville"] = place["city"] infos_page["long_coor1"] = place.get("longitude") infos_page["lat_coor1"] = place.get("latitude") infos_page["com_insee"] = departementUtils.cp_to_insee( place["zipcode"].replace(" ", "").strip()) infos_page["booking"] = output # Parse landline number if place.get("landline_number"): phone_number = place.get("landline_number") else: phone_number = place.get("phone_number") if phone_number: infos_page["phone_number"] = format_phone_number(phone_number) infos_page["business_hours"] = parse_doctolib_business_hours(place) # Parse visit motives, not sure it's the right place to do it, maybe this function should be refactored extracted_visit_motives = output.get("visit_motives", []) infos_page["visit_motives"] = list( map(lambda vm: vm.get("name"), extracted_visit_motives)) liste_infos_page.append(infos_page) # Returns a list with data for each place return liste_infos_page
def parse_mesoigner_centers(): unique_centers = [] centers_list = scrap_centers() useless_keys = [ "id", "zipcode", "position", "opening_hours", "position", "adress_city", "adress_street" ] if centers_list is None: return None for centre in centers_list: logger.info( f'[Mesoigner] Found Center {centre["name"]} ({centre["zipcode"]})') if centre["rdv_site_web"] not in [ unique_center["rdv_site_web"] for unique_center in unique_centers ]: centre["gid"] = centre["id"] centre["nom"] = centre["name"] centre["com_insee"] = departementUtils.cp_to_insee( centre["zipcode"]) long_coor1, lat_coor1 = get_coordinates(centre) address = f'{centre["adress_street"]}, {centre["zipcode"]} {centre["adress_city"]}' centre["address"] = address centre["long_coor1"] = long_coor1 centre["lat_coor1"] = lat_coor1 centre["business_hours"] = parse_mesoigner_business_hours(centre) centre["type"] = set_center_type(centre["center_type"]) centre["phone_number"] = format_phone_number( centre["phone_number"]) [ centre.pop(key) for key in list(centre.keys()) if key in useless_keys ] unique_centers.append(centre) return unique_centers
def get_dict_infos_center_page(url_path: str) -> dict: internal_api_url = BOOKING_URL.format( parse.urlsplit(url_path).path.split("/")[-1]) logger.info(f"> Parsing {internal_api_url}") data = requests.get(internal_api_url) data.raise_for_status() output = data.json().get("data", {}) # Parse place places = output.get("places", {}) if places: place = find_place(places, url_path) # Parse place location infos_page = {} infos_page["gid"] = "d{0}".format( output.get("profile", {}).get("id", "")) infos_page["address"] = place["full_address"] infos_page["long_coor1"] = place.get("longitude") infos_page["lat_coor1"] = place.get("latitude") infos_page["com_insee"] = departementUtils.cp_to_insee( place["zipcode"].replace(" ", "").strip()) # Parse landline number if place.get("landline_number"): phone_number = place.get("landline_number") else: phone_number = place.get("phone_number") if phone_number: infos_page["phone_number"] = format_phone_number(phone_number) infos_page["business_hours"] = parse_doctolib_business_hours(place) # Parse visit motives, not sure it's the right place to do it, maybe this function should be refactored extracted_visit_motives = output.get("visit_motives", []) infos_page["visit_motives"] = list( map(lambda vm: vm.get("name"), extracted_visit_motives)) return infos_page else: return {}
def parse_place(place: Dict) -> Dict: phone_number = place.get("landline_number", place.get("phone_number")) return { "place_id": place["id"], "address": place["full_address"], "ville": place["city"], "long_coor1": place.get("longitude"), "lat_coor1": place.get("latitude"), "com_insee": departementUtils.cp_to_insee(place["zipcode"].replace(" ", "").strip()), "phone_number": format_phone_number(phone_number) if phone_number else None, "business_hours": parse_doctolib_business_hours(place), }