class FrameX: """ Utility class to access the FrameX API """ BASE_URL = API_BASE def __init__(self): self.client = Client() def video(self, video: Text) -> Video: """ Fetches information about a video """ r = self.client.get(urljoin(self.BASE_URL, f"video/{quote(video)}/")) r.raise_for_status() return Video(**r.json()) def video_frame(self, video: Text, frame: int) -> bytes: """ Fetches the JPEG data of a single frame """ r = self.client.get( urljoin(self.BASE_URL, f'video/{quote(video)}/frame/{quote(f"{frame}")}/') ) r.raise_for_status() return r.content
def get_mapharma_opendata( client: httpx.Client = DEFAULT_CLIENT, opendata_url: str = MAPHARMA_OPEN_DATA_URL, opendata_url_fallback: str = MAPHARMA_OPEN_DATA_URL_FALLBACK, ) -> dict: try: request = client.get(opendata_url, headers=MAPHARMA_HEADERS) request.raise_for_status() # Let's update opendata file f = open(MAPHARMA_OPEN_DATA_FILE, "w", encoding="utf-8") f.write( json.dumps( {"artifact_date": datetime.today().strftime("%Y-%m-%d %H:%M:%S"), "data": request.json()}, indent=2 ) ) f.close() return request.json() except httpx.TimeoutException as hex: logger.warning(f"{opendata_url} timed out {hex}") except httpx.HTTPStatusError as hex: logger.warning(f"{opendata_url} returned error {hex.response.status_code}") try: request = client.get(opendata_url_fallback, headers=MAPHARMA_HEADERS) request.raise_for_status() return request.json()["data"] except httpx.TimeoutException as hex: logger.warning(f"{opendata_url_fallback} timed out {hex}") except httpx.HTTPStatusError as hex: logger.warning(f"{opendata_url_fallback} returned error {hex.response.status_code}") return None
def get_mapharma_opendata( client: httpx.Client = DEFAULT_CLIENT, opendata_url: str = MAPHARMA_OPEN_DATA_URL, opendata_url_fallback: str = MAPHARMA_OPEN_DATA_URL_FALLBACK, ) -> dict: try: request = client.get(opendata_url, headers=MAPHARMA_HEADERS) request.raise_for_status() return request.json() except httpx.TimeoutException as hex: logger.warning(f"{opendata_url} timed out {hex}") except httpx.HTTPStatusError as hex: logger.warning( f"{opendata_url} returned error {hex.response.status_code}") try: request = client.get(opendata_url_fallback, headers=MAPHARMA_HEADERS) request.raise_for_status() return request.json() except httpx.TimeoutException as hex: logger.warning(f"{opendata_url_fallback} timed out {hex}") except httpx.HTTPStatusError as hex: logger.warning( f"{opendata_url_fallback} returned error {hex.response.status_code}" ) return None
def test_sync_digest_auth_raises_protocol_error_on_malformed_header( auth_header: bytes, ) -> None: url = "https://example.org/" auth = DigestAuth(username="******", password="******") client = Client( transport=SyncMockTransport(auth_header=auth_header, status_code=401)) with pytest.raises(ProtocolError): client.get(url, auth=auth)
def search(client: httpx.Client = DEFAULT_CLIENT): base_url = "https://api.ordoclic.fr/v1/public/search" # toutes les pharmacies # payload = {'page': '1', 'per_page': '10000', 'in.isPublicProfile': 'true'} # toutes les pharmacies faisant des vaccins # payload = {'page': '1', 'per_page': '10000', 'in.isPublicProfile': 'true', 'in.isCovidVaccineSupported': 'true'} # toutes les pharmacies faisant des vaccins avec des calendriers en ligne # payload = {'page': '1', 'per_page': '10000', 'in.isPublicProfile': 'true', 'in.isCovidVaccineSupported': 'true', 'in.covidOnlineBookingAvailabilities.covidInjection1': 'true' } # toutes les pharmacies faisant du Pfizer ou de l'AstraZeneca payload = { "page": "1", "per_page": "10000", "in.isPublicProfile": "true", "in.isCovidVaccineSupported": "true", "or.covidOnlineBookingAvailabilities.Vaccination Pfizer": "true", "or.covidOnlineBookingAvailabilities.Vaccination AstraZeneca": "true", } try: r = client.get(base_url, params=payload) r.raise_for_status() except httpx.TimeoutException as hex: logger.warning(f"request timed out for center: {base_url} (search)") return None except httpx.HTTPStatusError as hex: logger.warning(f"{base_url} returned error {hex.response.status_code}") return None return r.json()
def find_urls(client: httpx.Client, url: str): if url in parsed_urls: return if 'https://l-tike' not in url: print('not found:', url) return if not url.startswith('http'): return print('next: ', url) client.cookies = None try: r = client.get(url) except Exception as e: find_urls(client, url) print('error:', url) return parsed_urls.add(url) dom = pq(r.text) for a in dom('main a').items(): href = a.attr('href') href = urllib.parse.urljoin(bsae_url, href) if '/order/' in href: order_urls.add(href) print('order:', len(order_urls)) continue find_urls(client, href)
class HTTPClient: def __init__( self, base_url: str, default_headers: Optional[dict] = None, default_params: Optional[dict] = None, ): self.base_url = base_url self.default_headers = default_headers or {} self.default_params = default_params or {} self.http_client = Client( base_url=self.base_url, headers=default_headers, params=self.default_params ) def get(self, url: str, params: dict, headers: dict = None): custom_headers = headers or {} if not params.get("_rticket"): params["_rticket"] = int(round(time() * 1000)) response = self.http_client.get(url=url, params=params, headers=custom_headers) return response def post(self, url: str, data: dict, headers: dict = None): custom_headers = headers or {} rticket = int(round(time() * 1000)) response = self.http_client.post( url=url, params={"_rticket": rticket}, data=data, headers=custom_headers ) return response
def get_method(url: str, headers: dict = None, timeout=5, max_retries=5, c: httpx.Client = None): """ timeout: 超时时间,单位秒(s), 默认为 5 秒, 为 `None` 时禁用 max_retries: 最大尝试次数, 默认为 5 次, 为 0 时禁用 """ k = 1 while (k <= max_retries) or (max_retries == 0): try: if c is not None: res = c.get(url, headers=headers, timeout=timeout) else: res = httpx.get(url, headers=headers, timeout=timeout) except Exception as e: k = k + 1 print(sys._getframe().f_code.co_name + ": " + str(e)) time.sleep(1) continue else: break try: return res except Exception: sys.exit(sys._getframe().f_code.co_name + ": " + "Max retries exceeded")
def _req(client: httpx.Client, url: URL, headers: Dict): logger.debug(f'request url is {url}') res = client.get(url.human_repr(), headers=headers) if res.status_code != 200: res.raise_for_status() return res
def _get_token(self, client: Client, concourse_url: str) -> str: concourse_login = f"{concourse_url}/sky/login" r = client.get(concourse_login, follow_redirects=True) ldap_url = expect( re.search(_LDAP_URL_REGEX, r.text), "BUG: no ldap url found" ).group(0) ldap_login_url = f"{concourse_url}{ldap_url}" print("Concourse login") username = input("Username: "******"login": username, "password": password} r = client.post(ldap_login_url, data=data, follow_redirects=True) token = expect( re.search(_BEARER_REGEX, r.text), "BUG: no bearer found" ).group(1) return token
def get_next_slot_date( self, center_id: str, consultation_reason_name: str, start_date: str, client: httpx.Client = DEFAULT_CLIENT, request: ScraperRequest = None, ) -> Optional[str]: url = PLATFORM_API.get("next_slot").format( center_id=center_id, consultation_reason_name=consultation_reason_name, start_date=start_date) if request: request.increase_request_count("next-slots") try: r = client.get(url) r.raise_for_status() except httpx.HTTPStatusError as hex: logger.warning(f"{url} returned error {hex.response.status_code}") request.increase_request_count("error") return None result = r.json() if "firstPhysicalStartDateTime" in result: return result["firstPhysicalStartDateTime"] return None
def download_and_save_filing( client: httpx.Client, download_folder: Path, ticker_or_cik: str, accession_number: str, filing_type: str, download_url: str, save_filename: str, *, resolve_urls: bool = False, ) -> None: resp = client.get(download_url) resp.raise_for_status() filing_text = resp.content # Only resolve URLs in HTML files if resolve_urls and Path(save_filename).suffix == ".html": base_url = f"{download_url.rsplit('/', 1)[0]}/" filing_text = resolve_relative_urls_in_filing(filing_text, base_url) # Create all parent directories as needed and write content to file save_path = (download_folder / ROOT_SAVE_FOLDER_NAME / ticker_or_cik / filing_type / accession_number / save_filename) save_path.parent.mkdir(parents=True, exist_ok=True) save_path.write_bytes(filing_text) # Prevent rate limiting time.sleep(SEC_EDGAR_RATE_LIMIT_SLEEP_INTERVAL)
def get_paged(url: str, limit: MAIIA_LIMIT, client: httpx.Client = DEFAULT_CLIENT) -> dict: result = dict() result["items"] = [] result["total"] = 0 page = 0 loops = 0 while loops <= result["total"]: base_url = f"{url}&limit={limit}&page={page}" try: r = client.get(base_url) r.raise_for_status() except httpx.HTTPStatusError as hex: logger.warning( f"{base_url} returned error {hex.response.status_code}") break try: payload = r.json() except json.decoder.JSONDecodeError as jde: logger.warning(f'{base_url} raised {jde}') break result["total"] = payload["total"] if not payload["items"]: break for item in payload.get("items"): result["items"].append(item) if len(result["items"]) >= result["total"]: break page += 1 loops += 1 return result
def get_paged(url: str, limit: MAIIA_LIMIT, client: httpx.Client = DEFAULT_CLIENT) -> dict: result = dict() result['items'] = [] result['total'] = 0 page = 0 loops = 0 while loops <= result['total']: base_url = f'{url}&limit={limit}&page={page}' try: r = client.get(base_url) r.raise_for_status() except httpx.HTTPStatusError as hex: logger.warning( f'{base_url} returned error {hex.response.status_code}') break payload = r.json() result['total'] = payload['total'] if not payload['items']: break for item in payload.get('items'): result['items'].append(item) if len(result['items']) >= result['total']: break page += 1 loops += 1 return result
def test_sync_too_many_redirects_calling_next(): client = Client(transport=SyncMockTransport()) url = "https://example.org/multiple_redirects?count=21" response = client.get(url, allow_redirects=False) with pytest.raises(TooManyRedirects): while response.is_redirect: response = response.next()
def search(client: httpx.Client = DEFAULT_CLIENT) -> Optional[list]: url = AVECMONDOC_API.get("search", "") limit = AVECMONDOC_API.get("search_page_size", 10) page = 1 result = {"data": [], "hasNextPage": True} while result["hasNextPage"]: payload = {"limit": limit, "page": page} try: r = client.get(url, params=payload) r.raise_for_status() except httpx.TimeoutException as hex: logger.warning(f"{url} timed out (search)") return None except httpx.HTTPStatusError as hex: logger.warning(f"{url} returned error {hex.response.status_code}") logger.warning(r.content) return None try: paged_result = r.json() except json.decoder.JSONDecodeError as jde: logger.warning(f"{url} raised {jde}") break page += 1 if result["data"] == []: result = paged_result continue result["hasNextPage"] = paged_result["hasNextPage"] for item in paged_result["data"]: result["data"].append(item) # logger.info(f"Downloaded {j['page']}/{j['pages']}") return result
def get_json(url: str, client: httpx.Client = DEFAULT_CLIENT): try: r = client.get(url) r.raise_for_status() except httpx.HTTPStatusError as hex: logger.warning(f"{url} returned error {hex.response.status_code}") return None return r.json()
def fetch_topic(client: Client, url: str, pbar: tqdm = None) -> None: """Fetch and save each topic original html.""" path = DATA_PATH / "htmls" / Path(url).relative_to("/") path.parent.mkdir(parents=True, exist_ok=True) with path.with_suffix(".html").open("w") as f: f.write(client.get(f"{URL_BASE}{url}").text) if pbar: pbar.update()
def get_slots(campagneId: str, optionId: str, start_date: str, client: httpx.Client = DEFAULT_CLIENT) -> dict: base_url = f'https://mapharma.net/api/public/calendar/{campagneId}/{start_date}/{optionId}' client.headers.update({'referer': 'https://mapharma.net/'}) try: r = client.get(base_url) r.raise_for_status() except httpx.HTTPStatusError as hex: logger.warning(f'{base_url} returned error {hex.response.status_code}') return {} return r.json()
def getProfile(request: ScraperRequest, client: httpx.Client = DEFAULT_CLIENT): slug = request.get_url().rsplit('/', 1)[-1] prof = request.get_url().rsplit('/', 2)[-2] if prof in ['pharmacien', 'medecin']: base_url = f'https://api.ordoclic.fr/v1/professionals/profile/{slug}' else: base_url = f'https://api.ordoclic.fr/v1/public/entities/profile/{slug}' r = client.get(base_url) r.raise_for_status() return r.json()
def search(client: httpx.Client = DEFAULT_CLIENT): base_url = 'https://api.ordoclic.fr/v1/public/search' # toutes les pharmacies # payload = {'page': '1', 'per_page': '10000', 'in.isPublicProfile': 'true'} # toutes les pharmacies faisant des vaccins # payload = {'page': '1', 'per_page': '10000', 'in.isPublicProfile': 'true', 'in.isCovidVaccineSupported': 'true'} # toutes les pharmacies faisant des vaccins avec des calendriers en ligne payload = {'page': '1', 'per_page': '10000', 'in.isPublicProfile': 'true', 'in.isCovidVaccineSupported': 'true', 'in.covidOnlineBookingAvailabilities.covidInjection1': 'true' } r = client.get(base_url, params=payload) r.raise_for_status() return r.json()
def get_csv(url: str, header=True, delimiter=";", encoding="utf-8", client: httpx.Client = DEFAULT_CLIENT): try: r = client.get(url) r.raise_for_status() except httpx.HTTPStatusError as hex: logger.warning(f"{url} returned error {hex.response.status_code}") return None reader = io.StringIO(r.content.decode(encoding)) csvreader = csv.DictReader(reader, delimiter=delimiter) return csvreader
def get_next_slot_date(center_id: str, consultation_reason_name: str, start_date: str, client: httpx.Client = DEFAULT_CLIENT) -> Optional[str]: url = f'{MAIIA_URL}/api/pat-public/availability-closests?centerId={center_id}&consultationReasonName={consultation_reason_name}&from={start_date}' try: r = client.get(url) r.raise_for_status() except httpx.HTTPStatusError as hex: logger.warning(f'{url} returned error {hex.response.status_code}') return None result = r.json() if 'firstPhysicalStartDateTime' in result: return result['firstPhysicalStartDateTime'] return None
def test_sync_auth_reads_response_body() -> None: """ Test that we can read the response body in an auth flow if `requires_response_body` is set. """ url = "https://example.org/" auth = ResponseBodyAuth("xyz") client = Client(transport=SyncMockTransport()) response = client.get(url, auth=auth) assert response.status_code == 200 assert response.json() == {"auth": '{"auth": "xyz"}'}
def get_one(client: httpx.Client, obj_path: str, obj_class, obj_uid: str, path_tail: str = None): path = f'api/{obj_path}/{obj_uid}' if path_tail: path = f'{path}/{path_tail}' response = client.get(f'/{path}/') check_response(response) obj = obj_class.parse_obj(response.json()) return obj
def get_reasons(entityId, client: httpx.Client = DEFAULT_CLIENT): base_url = f"https://api.ordoclic.fr/v1/solar/entities/{entityId}/reasons" try: r = client.get(base_url) r.raise_for_status() except httpx.TimeoutException as hex: logger.warning(f"request timed out for center: {base_url}") return None except httpx.HTTPStatusError as hex: logger.warning(f"{base_url} returned error {hex.response.status_code}") return None return r.json()
def keys(client: httpx.Client, jwks_uri: str) -> str: try: response = client.get(jwks_uri) except httpx.HTTPError as e: raise exceptions.JOSEError( f"{type(e).__name__} error while retrieving keys: {str(e)}") if response.is_error: raise exceptions.JOSEError( f"HTTP {response.status_code} error while retrieving keys: {response.text}" ) return response.text
def _test_chunk(client: httpx.Client, config: Config, file: ResumableFile, chunk: FileChunk) -> bool: """Check if the chunk exists on the server. Returns ------- bool True if the chunk exists on the server """ response = client.get(config.target, params=_build_query(file, chunk, config.additional_query_params)) return response.status_code == 200
def get_reasons(entityId, client: httpx.Client = DEFAULT_CLIENT, request: ScraperRequest = None): base_url = ORDOCLIC_API.get("motives").format(entityId=entityId) if request: request.increase_request_count("motives") try: r = client.get(base_url) r.raise_for_status() except httpx.TimeoutException as hex: logger.warning(f"request timed out for center: {base_url}") return None except httpx.HTTPStatusError as hex: logger.warning(f"{base_url} returned error {hex.response.status_code}") return None return r.json()
def search(client: httpx.Client = DEFAULT_CLIENT): base_url = ORDOCLIC_API.get("scraper") payload = ORDOCLIC_CONF.get("scraper_payload") try: r = client.get(base_url, params=payload) r.raise_for_status() except httpx.TimeoutException as hex: logger.warning(f"request timed out for center: {base_url} (search)") return None except httpx.HTTPStatusError as hex: logger.warning(f"{base_url} returned error {hex.response.status_code}") return None return r.json()