def get_last_scans(centres): url = get_conf_inputs().get("last_scans") last_scans = {} liste_centres = [] for centre in centres: liste_centres.append(centre) try: response = requests.get(url) response.raise_for_status() info_centres = response.json() except Exception as e: logger.warning(f"Impossible de récupérer le fichier info_centres: {e}") info_centres = {} for last_centres in info_centres.values(): for centre in last_centres["centres_disponibles"] + last_centres[ "centres_indisponibles"]: if "last_scan_with_availabilities" in centre: last_scans[ centre["url"]] = centre["last_scan_with_availabilities"] for centre in liste_centres: if not centre.prochain_rdv: if centre.url in last_scans: centre.last_scan_with_availabilities = last_scans[centre.url] else: centre.last_scan_with_availabilities = dt.datetime.now( tz=pytz.timezone("Europe/Paris")).isoformat() return liste_centres
def to_departement_number(insee_code: str) -> str: """ Renvoie le numéro de département correspondant au code INSEE d'une commune. Le code INSEE est un code à 5 chiffres, qui est typiquement différent du code postal, mais qui commence (en général) aussi par les 2 chiffres du département. >>> to_departement_number('59350') # Lille '59' >>> to_departement_number('75106') # Paris 6e arr '75' >>> to_departement_number('97701') # Saint-Barthélémy '971' """ insee_code = insee_code.strip() if len(insee_code) == 4: # Quand le CSV des centres de vaccinations est édité avec un tableur comme Excel, # il est possible que le 1er zéro soit retiré si la colonne est interprétée comme # un nombre (par ex 02401 devient 2401, mais on veut 02401 au complet). insee_code = insee_code.zfill(5) if len(insee_code) != 5: raise ValueError(f"Code INSEE non-valide : {insee_code}") with open(get_conf_inputs().get( "insee_to_postalcode_and_dep")) as json_file: insee_to_code_departement_table = json.load(json_file) if insee_code in insee_to_code_departement_table: return insee_to_code_departement_table[insee_code]["departement"] else: raise ValueError( f"Code INSEE absent de la base des codes INSEE : {insee_code}")
def get_blocklist_urls() -> set: path_blocklist = get_conf_inputs().get("blocklist") centers_blocklist_urls = set([ center["url"] for center in json.load(open(path_blocklist))["centers_not_displayed"] ]) return centers_blocklist_urls
def generate_stats_date(centres_stats): stats_path = get_conf_inputs().get("from_gitlab_public").get("by_date") stats_data = { "dates": [], "total_centres_disponibles": [], "total_centres": [], "total_appointments": [], } try: history_rq = requests.get(f"{DATA_AUTO}{stats_path}") data = history_rq.json() if data: stats_data = data except Exception: logger.warning( f"Unable to fetch {DATA_AUTO}{stats_path}: generating a template file." ) ctz = pytz.timezone("Europe/Paris") current_time = datetime.now(tz=ctz).strftime("%Y-%m-%d %H:00:00") if current_time in stats_data["dates"]: with open(Path("data", "output", stats_path), "w") as stat_graph_file: json.dump(stats_data, stat_graph_file) logger.info(f"Stats file already updated: {stats_path}") return data_alldep = centres_stats["tout_departement"] stats_data["dates"].append(current_time) stats_data["total_centres_disponibles"].append(data_alldep["disponibles"]) stats_data["total_centres"].append(data_alldep["total"]) stats_data["total_appointments"].append(data_alldep["creneaux"]) with open(Path("data", "output", stats_path), "w") as stat_graph_file: json.dump(stats_data, stat_graph_file) logger.info(f"Updated stats file: {stats_path}")
def parse_atlas(): url = get_conf_inputs().get("from_data_gouv_website").get("centers_gouv") data = requests.get(url).json() keldoc_gouv_centers = {} for center in data["features"]: centre_pro = center["properties"].get("c_reserve_professionels_sante", False) url = center["properties"].get("c_rdv_site_web", None) id_adresse = center["properties"].get("c_id_adr", None) gid = center["properties"].get("c_gid", None) if centre_pro: continue if not url: continue if not gid: continue if not "keldoc" in url: continue if "redirect" in url: parsed = parse.parse_qs(parse.urlparse( center["properties"]["c_rdv_site_web"]).query, keep_blank_values=True) url = f'http://keldoc.com/{parsed["dom"][0]}/{parsed["inst"][0]}/{parsed["user"][0]}' end_url = f'{parse.urlsplit(url).path.split("/")[3]}' keldoc_gouv_centers[gid] = { "url_end": end_url, "id_adresse": id_adresse } return keldoc_gouv_centers
def parse_atlas(): url = get_conf_inputs().get("from_data_gouv_website").get("centers_gouv") data = requests.get(url).json() doctolib_gouv_centers = {} for center in data["features"]: centre_pro = center["properties"].get("c_reserve_professionels_sante", False) url = center["properties"].get("c_rdv_site_web", None) id_adresse = center["properties"].get("c_id_adr", None) gid = center["properties"].get("c_gid", None) if centre_pro: continue if not url: continue if not gid: continue if not "doctolib" in url: continue end_url = f'{parse.urlsplit(url).path.split("/")[-1]}' doctolib_gouv_centers[gid] = { "url_end": end_url, "id_adresse": id_adresse } return doctolib_gouv_centers
def get_departements(excluded_departments: List[str] = []) -> List[str]: with open(get_conf_inputs()["from_main_branch"]["departements"], encoding="utf8", newline="\n") as csvfile: reader = csv.DictReader(csvfile) departements = [ row["nom_departement"] for row in reader if row["nom_departement"] not in excluded_departments ] return departements
def generate_stats_center_types(centres_info): stats_path = get_conf_inputs().get("from_gitlab_public").get("center_types") stats_data = {"dates": [], "plateformes": {}, "center_types": {}} try: history_rq = requests.get(f"{DATA_AUTO}{stats_path}") data = history_rq.json() if data: stats_data = data except Exception: logger.warning(f"Unable to fetch {DATA_AUTO}{stats_path}: generating a template file.") ctz = pytz.timezone("Europe/Paris") current_time = datetime.now(tz=ctz).strftime("%Y-%m-%d %H:00:00") if current_time in stats_data["dates"]: with open(f"data/output/{stats_path}", "w") as stat_graph_file: json.dump(stats_data, stat_graph_file) logger.info(f"Stats file already updated: {stats_path}") return if "center_types" not in stats_data: stats_data["center_types"] = {} stats_data["dates"].append(current_time) current_calc = compute_plateforme_data(centres_info) for plateforme in current_calc[0]: plateform_data = current_calc[0][plateforme] if plateforme not in stats_data["plateformes"]: stats_data["plateformes"][plateforme] = { "disponible": [plateform_data["disponible"]], "total": [plateform_data["total"]], "creneaux": [plateform_data["creneaux"]], } continue current_data = stats_data["plateformes"][plateforme] current_data["disponible"].append(plateform_data["disponible"]) current_data["total"].append(plateform_data["total"]) current_data["creneaux"].append(plateform_data["creneaux"]) for center_type in current_calc[1]: center_type_data = current_calc[1][center_type] if center_type not in stats_data["center_types"]: stats_data["center_types"][center_type] = { "disponible": [center_type_data["disponible"]], "total": [center_type_data["total"]], "creneaux": [center_type_data["creneaux"]], } continue current_data = stats_data["center_types"][center_type] current_data["disponible"].append(center_type_data["disponible"]) current_data["total"].append(center_type_data["total"]) current_data["creneaux"].append(center_type_data["creneaux"]) with open(f"data/output/{stats_path}", "w") as stat_graph_file: json.dump(stats_data, stat_graph_file) logger.info(f"Updated stats file: {stats_path}")
def get_departements(): import csv # Guyane uses Maiia and does not have doctolib pages NOT_INCLUDED_DEPARTEMENTS = ["Guyane"] with open(get_conf_inputs().get("departements"), encoding="utf8", newline="\n") as csvfile: reader = csv.DictReader(csvfile) departements = [str(row["nom_departement"]) for row in reader] [departements.remove(ndep) for ndep in NOT_INCLUDED_DEPARTEMENTS] return departements
def get_departements() -> List[str]: with open(get_conf_inputs()["from_main_branch"]["departements"], encoding="utf8", newline="\n") as csvfile: reader = list(csv.DictReader(csvfile, delimiter=",")) departements = [ f'{department_urlify(row["nom_departement"])}-{row["code_departement"]}' if row["nom_departement"] not in KELDOC_WEIRD_DEPS else f'{department_urlify(KELDOC_WEIRD_DEPS[row["nom_departement"]])}-{row["code_departement"]}' for row in reader ] departements = departements + KELDOC_MISSING_DEPS return departements
def import_departements() -> List[str]: """ Renvoie la liste des codes départements. >>> departements = import_departements() >>> len(departements) 101 >>> departements[:3] ['01', '02', '03'] >>> departements[83] '83' >>> departements.index('2A') 28 >>> sorted(departements) == departements True """ with open(get_conf_inputs().get("departements"), newline="\n") as csvfile: reader = csv.DictReader(csvfile) return [str(row["code_departement"]) for row in reader]
def load_cedex_to_insee() -> dict: with open(get_conf_inputs().get("cedex_to_insee")) as json_file: return json.load(json_file)
def load_insee() -> dict: with open(get_conf_inputs().get("postalcode_to_insee")) as json_file: return json.load(json_file)
from utils.vmd_config import get_conf_inputs from utils.vmd_logger import enable_logger_for_debug from utils.vmd_utils import get_departements_numbers timeout = httpx.Timeout(30.0, connect=30.0) DEFAULT_CLIENT = httpx.Client(timeout=timeout) logger = logging.getLogger("scraper") PALETTE_FB = ["#ffffff", "#eaeaea", "#cecece", "#80bdf4", "#2d8dfe"] PALETTE_FB_RDV = [ "#eaeaea", "#F44848", "#FF9255", "#FFD84F", "#FEE487", "#7DF0AE", "#27DF76", "#00B94F" ] ECHELLE_STROKE = "#797979" ECHELLE_FONT = "#424242" MAP_SRC_PATH = Path(get_conf_inputs().get("from_main_branch").get("map")) CSV_POP_URL = get_conf_inputs().get("from_main_branch").get("dep_pop") CSV_RDV_URL = get_conf_inputs().get("from_data_gouv_website").get("rdv_gouv") JSON_INFO_CENTRES_URL = get_conf_inputs().get("from_gitlab_public").get( "last_scans") def get_pop(): dept_pop = {} with open(CSV_POP_URL, encoding="utf-8", newline="") as file: csvreader = csv.DictReader(file, delimiter=";") for row in csvreader: dept_pop[row["dep"]] = row["departmentPopulation"] return dept_pop
from utils.vmd_config import get_conf_inputs from utils.vmd_logger import enable_logger_for_production, enable_logger_for_debug timeout = httpx.Timeout(30.0, connect=30.0) DEFAULT_CLIENT = httpx.Client(timeout=timeout) logger = logging.getLogger("scraper") PALETTE_FB = ["#ffffff", "#eaeaea", "#cecece", "#80bdf4", "#2d8dfe"] PALETTE_FB_RDV = [ "#eaeaea", "#F44848", "#FF9255", "#FFD84F", "#FEE487", "#7DF0AE", "#27DF76", "#00B94F" ] ECHELLE_STROKE = "#797979" ECHELLE_FONT = "#424242" MAP_SRC_PATH = Path(get_conf_inputs().get("map")) CSV_POP_URL = get_conf_inputs().get("dep_pop") CSV_RDV_URL = get_conf_inputs().get("rdv_gouv") JSON_INFO_CENTRES_URL = get_conf_inputs().get("last_scans") def get_csv(url: str, header=True, delimiter=";", encoding="utf-8", client: httpx.Client = DEFAULT_CLIENT): try: r = client.get(url) r.raise_for_status() except httpx.HTTPStatusError as hex: logger.warning(f"{url} returned error {hex.response.status_code}")
def get_departements(): with open(get_conf_inputs()["departements"], encoding="utf8", newline="\n") as csvfile: reader = csv.DictReader(csvfile) departements = [str(row["nom_departement"]) for row in reader] return departements