Пример #1
0
class FrameX:
    """
    Utility class to access the FrameX API
    """

    BASE_URL = API_BASE

    def __init__(self):
        self.client = Client()

    def video(self, video: Text) -> Video:
        """
        Fetches information about a video
        """

        r = self.client.get(urljoin(self.BASE_URL, f"video/{quote(video)}/"))
        r.raise_for_status()
        return Video(**r.json())

    def video_frame(self, video: Text, frame: int) -> bytes:
        """
        Fetches the JPEG data of a single frame
        """

        r = self.client.get(
            urljoin(self.BASE_URL, f'video/{quote(video)}/frame/{quote(f"{frame}")}/')
        )
        r.raise_for_status()
        return r.content
Пример #2
0
def get_mapharma_opendata(
    client: httpx.Client = DEFAULT_CLIENT,
    opendata_url: str = MAPHARMA_OPEN_DATA_URL,
    opendata_url_fallback: str = MAPHARMA_OPEN_DATA_URL_FALLBACK,
) -> dict:
    try:
        request = client.get(opendata_url, headers=MAPHARMA_HEADERS)
        request.raise_for_status()

        # Let's update opendata file
        f = open(MAPHARMA_OPEN_DATA_FILE, "w", encoding="utf-8")
        f.write(
            json.dumps(
                {"artifact_date": datetime.today().strftime("%Y-%m-%d %H:%M:%S"), "data": request.json()}, indent=2
            )
        )
        f.close()

        return request.json()

    except httpx.TimeoutException as hex:
        logger.warning(f"{opendata_url} timed out {hex}")
    except httpx.HTTPStatusError as hex:
        logger.warning(f"{opendata_url} returned error {hex.response.status_code}")
    try:
        request = client.get(opendata_url_fallback, headers=MAPHARMA_HEADERS)
        request.raise_for_status()
        return request.json()["data"]
    except httpx.TimeoutException as hex:
        logger.warning(f"{opendata_url_fallback} timed out {hex}")
    except httpx.HTTPStatusError as hex:
        logger.warning(f"{opendata_url_fallback} returned error {hex.response.status_code}")
    return None
Пример #3
0
def get_mapharma_opendata(
    client: httpx.Client = DEFAULT_CLIENT,
    opendata_url: str = MAPHARMA_OPEN_DATA_URL,
    opendata_url_fallback: str = MAPHARMA_OPEN_DATA_URL_FALLBACK,
) -> dict:
    try:
        request = client.get(opendata_url, headers=MAPHARMA_HEADERS)
        request.raise_for_status()
        return request.json()
    except httpx.TimeoutException as hex:
        logger.warning(f"{opendata_url} timed out {hex}")
    except httpx.HTTPStatusError as hex:
        logger.warning(
            f"{opendata_url} returned error {hex.response.status_code}")
    try:
        request = client.get(opendata_url_fallback, headers=MAPHARMA_HEADERS)
        request.raise_for_status()
        return request.json()
    except httpx.TimeoutException as hex:
        logger.warning(f"{opendata_url_fallback} timed out {hex}")
    except httpx.HTTPStatusError as hex:
        logger.warning(
            f"{opendata_url_fallback} returned error {hex.response.status_code}"
        )
    return None
Пример #4
0
def test_sync_digest_auth_raises_protocol_error_on_malformed_header(
    auth_header: bytes, ) -> None:
    url = "https://example.org/"
    auth = DigestAuth(username="******", password="******")
    client = Client(
        transport=SyncMockTransport(auth_header=auth_header, status_code=401))

    with pytest.raises(ProtocolError):
        client.get(url, auth=auth)
Пример #5
0
def search(client: httpx.Client = DEFAULT_CLIENT):
    base_url = "https://api.ordoclic.fr/v1/public/search"
    # toutes les pharmacies
    # payload = {'page': '1', 'per_page': '10000', 'in.isPublicProfile': 'true'}
    # toutes les pharmacies faisant des vaccins
    # payload = {'page': '1', 'per_page': '10000', 'in.isPublicProfile': 'true', 'in.isCovidVaccineSupported': 'true'}
    # toutes les pharmacies faisant des vaccins avec des calendriers en ligne
    # payload = {'page': '1', 'per_page': '10000', 'in.isPublicProfile': 'true', 'in.isCovidVaccineSupported': 'true', 'in.covidOnlineBookingAvailabilities.covidInjection1': 'true' }
    # toutes les pharmacies faisant du Pfizer ou de l'AstraZeneca
    payload = {
        "page": "1",
        "per_page": "10000",
        "in.isPublicProfile": "true",
        "in.isCovidVaccineSupported": "true",
        "or.covidOnlineBookingAvailabilities.Vaccination Pfizer": "true",
        "or.covidOnlineBookingAvailabilities.Vaccination AstraZeneca": "true",
    }
    try:
        r = client.get(base_url, params=payload)
        r.raise_for_status()
    except httpx.TimeoutException as hex:
        logger.warning(f"request timed out for center: {base_url} (search)")
        return None
    except httpx.HTTPStatusError as hex:
        logger.warning(f"{base_url} returned error {hex.response.status_code}")
        return None
    return r.json()
Пример #6
0
def find_urls(client: httpx.Client, url: str):
    if url in parsed_urls:
        return
    if 'https://l-tike' not in url:
        print('not found:', url)
        return
    if not url.startswith('http'):
        return
    print('next: ', url)
    client.cookies = None
    try:
        r = client.get(url)
    except Exception as e:
        find_urls(client, url)
        print('error:', url)
        return

    parsed_urls.add(url)
    dom = pq(r.text)

    for a in dom('main a').items():
        href = a.attr('href')
        href = urllib.parse.urljoin(bsae_url, href)
        if '/order/' in href:
            order_urls.add(href)
            print('order:', len(order_urls))
            continue

        find_urls(client, href)
Пример #7
0
class HTTPClient:
    def __init__(
        self,
        base_url: str,
        default_headers: Optional[dict] = None,
        default_params: Optional[dict] = None,
    ):
        self.base_url = base_url
        self.default_headers = default_headers or {}
        self.default_params = default_params or {}

        self.http_client = Client(
            base_url=self.base_url, headers=default_headers, params=self.default_params
        )

    def get(self, url: str, params: dict, headers: dict = None):
        custom_headers = headers or {}

        if not params.get("_rticket"):
            params["_rticket"] = int(round(time() * 1000))

        response = self.http_client.get(url=url, params=params, headers=custom_headers)

        return response

    def post(self, url: str, data: dict, headers: dict = None):
        custom_headers = headers or {}

        rticket = int(round(time() * 1000))

        response = self.http_client.post(
            url=url, params={"_rticket": rticket}, data=data, headers=custom_headers
        )

        return response
Пример #8
0
def get_method(url: str,
               headers: dict = None,
               timeout=5,
               max_retries=5,
               c: httpx.Client = None):
    """
    timeout: 超时时间,单位秒(s), 默认为 5 秒, 为 `None` 时禁用
    max_retries: 最大尝试次数, 默认为 5 次, 为 0 时禁用
    """
    k = 1
    while (k <= max_retries) or (max_retries == 0):
        try:
            if c is not None:
                res = c.get(url, headers=headers, timeout=timeout)
            else:
                res = httpx.get(url, headers=headers, timeout=timeout)
        except Exception as e:
            k = k + 1
            print(sys._getframe().f_code.co_name + ": " + str(e))
            time.sleep(1)
            continue
        else:
            break
    try:
        return res
    except Exception:
        sys.exit(sys._getframe().f_code.co_name + ": " +
                 "Max retries exceeded")
Пример #9
0
    def _req(client: httpx.Client, url: URL, headers: Dict):
        logger.debug(f'request url is {url}')
        res = client.get(url.human_repr(), headers=headers)
        if res.status_code != 200:
            res.raise_for_status()

        return res
    def _get_token(self, client: Client, concourse_url: str) -> str:
        concourse_login = f"{concourse_url}/sky/login"

        r = client.get(concourse_login, follow_redirects=True)

        ldap_url = expect(
            re.search(_LDAP_URL_REGEX, r.text),
            "BUG: no ldap url found"
        ).group(0)

        ldap_login_url = f"{concourse_url}{ldap_url}"

        print("Concourse login")
        username = input("Username: "******"login": username, "password": password}

        r = client.post(ldap_login_url, data=data, follow_redirects=True)

        token = expect(
            re.search(_BEARER_REGEX, r.text),
            "BUG: no bearer found"
        ).group(1)

        return token
Пример #11
0
 def get_next_slot_date(
     self,
     center_id: str,
     consultation_reason_name: str,
     start_date: str,
     client: httpx.Client = DEFAULT_CLIENT,
     request: ScraperRequest = None,
 ) -> Optional[str]:
     url = PLATFORM_API.get("next_slot").format(
         center_id=center_id,
         consultation_reason_name=consultation_reason_name,
         start_date=start_date)
     if request:
         request.increase_request_count("next-slots")
     try:
         r = client.get(url)
         r.raise_for_status()
     except httpx.HTTPStatusError as hex:
         logger.warning(f"{url} returned error {hex.response.status_code}")
         request.increase_request_count("error")
         return None
     result = r.json()
     if "firstPhysicalStartDateTime" in result:
         return result["firstPhysicalStartDateTime"]
     return None
Пример #12
0
def download_and_save_filing(
    client: httpx.Client,
    download_folder: Path,
    ticker_or_cik: str,
    accession_number: str,
    filing_type: str,
    download_url: str,
    save_filename: str,
    *,
    resolve_urls: bool = False,
) -> None:
    resp = client.get(download_url)
    resp.raise_for_status()
    filing_text = resp.content

    # Only resolve URLs in HTML files
    if resolve_urls and Path(save_filename).suffix == ".html":
        base_url = f"{download_url.rsplit('/', 1)[0]}/"
        filing_text = resolve_relative_urls_in_filing(filing_text, base_url)

    # Create all parent directories as needed and write content to file
    save_path = (download_folder / ROOT_SAVE_FOLDER_NAME / ticker_or_cik /
                 filing_type / accession_number / save_filename)
    save_path.parent.mkdir(parents=True, exist_ok=True)
    save_path.write_bytes(filing_text)

    # Prevent rate limiting
    time.sleep(SEC_EDGAR_RATE_LIMIT_SLEEP_INTERVAL)
Пример #13
0
def get_paged(url: str,
              limit: MAIIA_LIMIT,
              client: httpx.Client = DEFAULT_CLIENT) -> dict:
    result = dict()
    result["items"] = []
    result["total"] = 0
    page = 0
    loops = 0
    while loops <= result["total"]:
        base_url = f"{url}&limit={limit}&page={page}"
        try:
            r = client.get(base_url)
            r.raise_for_status()
        except httpx.HTTPStatusError as hex:
            logger.warning(
                f"{base_url} returned error {hex.response.status_code}")
            break
        try:
            payload = r.json()
        except json.decoder.JSONDecodeError as jde:
            logger.warning(f'{base_url} raised {jde}')
            break
        result["total"] = payload["total"]
        if not payload["items"]:
            break
        for item in payload.get("items"):
            result["items"].append(item)
        if len(result["items"]) >= result["total"]:
            break
        page += 1
        loops += 1
    return result
Пример #14
0
def get_paged(url: str,
              limit: MAIIA_LIMIT,
              client: httpx.Client = DEFAULT_CLIENT) -> dict:
    result = dict()
    result['items'] = []
    result['total'] = 0
    page = 0
    loops = 0
    while loops <= result['total']:
        base_url = f'{url}&limit={limit}&page={page}'
        try:
            r = client.get(base_url)
            r.raise_for_status()
        except httpx.HTTPStatusError as hex:
            logger.warning(
                f'{base_url} returned error {hex.response.status_code}')
            break
        payload = r.json()
        result['total'] = payload['total']
        if not payload['items']:
            break
        for item in payload.get('items'):
            result['items'].append(item)
        if len(result['items']) >= result['total']:
            break
        page += 1
        loops += 1
    return result
Пример #15
0
def test_sync_too_many_redirects_calling_next():
    client = Client(transport=SyncMockTransport())
    url = "https://example.org/multiple_redirects?count=21"
    response = client.get(url, allow_redirects=False)
    with pytest.raises(TooManyRedirects):
        while response.is_redirect:
            response = response.next()
Пример #16
0
def search(client: httpx.Client = DEFAULT_CLIENT) -> Optional[list]:
    url = AVECMONDOC_API.get("search", "")
    limit = AVECMONDOC_API.get("search_page_size", 10)
    page = 1
    result = {"data": [], "hasNextPage": True}
    while result["hasNextPage"]:
        payload = {"limit": limit, "page": page}
        try:
            r = client.get(url, params=payload)
            r.raise_for_status()
        except httpx.TimeoutException as hex:
            logger.warning(f"{url} timed out (search)")
            return None
        except httpx.HTTPStatusError as hex:
            logger.warning(f"{url} returned error {hex.response.status_code}")
            logger.warning(r.content)
            return None
        try:
            paged_result = r.json()
        except json.decoder.JSONDecodeError as jde:
            logger.warning(f"{url} raised {jde}")
            break
        page += 1
        if result["data"] == []:
            result = paged_result
            continue
        result["hasNextPage"] = paged_result["hasNextPage"]
        for item in paged_result["data"]:
            result["data"].append(item)
        # logger.info(f"Downloaded {j['page']}/{j['pages']}")
    return result
Пример #17
0
def get_json(url: str, client: httpx.Client = DEFAULT_CLIENT):
    try:
        r = client.get(url)
        r.raise_for_status()
    except httpx.HTTPStatusError as hex:
        logger.warning(f"{url} returned error {hex.response.status_code}")
        return None
    return r.json()
Пример #18
0
def fetch_topic(client: Client, url: str, pbar: tqdm = None) -> None:
    """Fetch and save each topic original html."""
    path = DATA_PATH / "htmls" / Path(url).relative_to("/")
    path.parent.mkdir(parents=True, exist_ok=True)
    with path.with_suffix(".html").open("w") as f:
        f.write(client.get(f"{URL_BASE}{url}").text)
    if pbar:
        pbar.update()
Пример #19
0
def get_slots(campagneId: str, optionId: str, start_date: str, client: httpx.Client = DEFAULT_CLIENT) -> dict:
    base_url = f'https://mapharma.net/api/public/calendar/{campagneId}/{start_date}/{optionId}'
    client.headers.update({'referer': 'https://mapharma.net/'})
    try:
        r = client.get(base_url)
        r.raise_for_status()
    except httpx.HTTPStatusError as hex:
        logger.warning(f'{base_url} returned error {hex.response.status_code}')
        return {}
    return r.json()
Пример #20
0
def getProfile(request: ScraperRequest, client: httpx.Client = DEFAULT_CLIENT):
    slug = request.get_url().rsplit('/', 1)[-1]
    prof = request.get_url().rsplit('/', 2)[-2]
    if prof in ['pharmacien', 'medecin']:
        base_url = f'https://api.ordoclic.fr/v1/professionals/profile/{slug}'
    else:
        base_url = f'https://api.ordoclic.fr/v1/public/entities/profile/{slug}'
    r = client.get(base_url)
    r.raise_for_status()
    return r.json()
Пример #21
0
def search(client: httpx.Client = DEFAULT_CLIENT):
    base_url = 'https://api.ordoclic.fr/v1/public/search'
    # toutes les pharmacies
    # payload = {'page': '1', 'per_page': '10000', 'in.isPublicProfile': 'true'}
    # toutes les pharmacies faisant des vaccins
    # payload = {'page': '1', 'per_page': '10000', 'in.isPublicProfile': 'true', 'in.isCovidVaccineSupported': 'true'}
    # toutes les pharmacies faisant des vaccins avec des calendriers en ligne
    payload = {'page': '1', 'per_page': '10000', 'in.isPublicProfile': 'true', 'in.isCovidVaccineSupported': 'true', 'in.covidOnlineBookingAvailabilities.covidInjection1': 'true' }
    r = client.get(base_url, params=payload)
    r.raise_for_status()
    return r.json()
Пример #22
0
def get_csv(url: str, header=True, delimiter=";", encoding="utf-8", client: httpx.Client = DEFAULT_CLIENT):
    try:
        r = client.get(url)
        r.raise_for_status()
    except httpx.HTTPStatusError as hex:
        logger.warning(f"{url} returned error {hex.response.status_code}")
        return None

    reader = io.StringIO(r.content.decode(encoding))
    csvreader = csv.DictReader(reader, delimiter=delimiter)
    return csvreader
Пример #23
0
def get_next_slot_date(center_id: str, consultation_reason_name: str, start_date: str, client: httpx.Client = DEFAULT_CLIENT) -> Optional[str]:
    url = f'{MAIIA_URL}/api/pat-public/availability-closests?centerId={center_id}&consultationReasonName={consultation_reason_name}&from={start_date}'
    try:
        r = client.get(url)
        r.raise_for_status()
    except httpx.HTTPStatusError as hex:
        logger.warning(f'{url} returned error {hex.response.status_code}')
        return None
    result = r.json()
    if 'firstPhysicalStartDateTime' in result:
        return result['firstPhysicalStartDateTime']
    return None
Пример #24
0
def test_sync_auth_reads_response_body() -> None:
    """
    Test that we can read the response body in an auth flow if `requires_response_body`
    is set.
    """
    url = "https://example.org/"
    auth = ResponseBodyAuth("xyz")
    client = Client(transport=SyncMockTransport())

    response = client.get(url, auth=auth)
    assert response.status_code == 200
    assert response.json() == {"auth": '{"auth": "xyz"}'}
Пример #25
0
def get_one(client: httpx.Client,
            obj_path: str,
            obj_class,
            obj_uid: str,
            path_tail: str = None):
    path = f'api/{obj_path}/{obj_uid}'
    if path_tail:
        path = f'{path}/{path_tail}'
    response = client.get(f'/{path}/')
    check_response(response)
    obj = obj_class.parse_obj(response.json())
    return obj
Пример #26
0
def get_reasons(entityId, client: httpx.Client = DEFAULT_CLIENT):
    base_url = f"https://api.ordoclic.fr/v1/solar/entities/{entityId}/reasons"
    try:
        r = client.get(base_url)
        r.raise_for_status()
    except httpx.TimeoutException as hex:
        logger.warning(f"request timed out for center: {base_url}")
        return None
    except httpx.HTTPStatusError as hex:
        logger.warning(f"{base_url} returned error {hex.response.status_code}")
        return None
    return r.json()
Пример #27
0
def keys(client: httpx.Client, jwks_uri: str) -> str:
    try:
        response = client.get(jwks_uri)
    except httpx.HTTPError as e:
        raise exceptions.JOSEError(
            f"{type(e).__name__} error while retrieving keys: {str(e)}")

    if response.is_error:
        raise exceptions.JOSEError(
            f"HTTP {response.status_code} error while retrieving keys: {response.text}"
        )

    return response.text
Пример #28
0
def _test_chunk(client: httpx.Client, config: Config, file: ResumableFile,
                chunk: FileChunk) -> bool:
    """Check if the chunk exists on the server.

    Returns
    -------
    bool
        True if the chunk exists on the server
    """
    response = client.get(config.target,
                          params=_build_query(file, chunk,
                                              config.additional_query_params))
    return response.status_code == 200
Пример #29
0
def get_reasons(entityId, client: httpx.Client = DEFAULT_CLIENT, request: ScraperRequest = None):
    base_url = ORDOCLIC_API.get("motives").format(entityId=entityId)
    if request:
        request.increase_request_count("motives")
    try:
        r = client.get(base_url)
        r.raise_for_status()
    except httpx.TimeoutException as hex:
        logger.warning(f"request timed out for center: {base_url}")
        return None
    except httpx.HTTPStatusError as hex:
        logger.warning(f"{base_url} returned error {hex.response.status_code}")
        return None
    return r.json()
Пример #30
0
def search(client: httpx.Client = DEFAULT_CLIENT):
    base_url = ORDOCLIC_API.get("scraper")

    payload = ORDOCLIC_CONF.get("scraper_payload")
    try:
        r = client.get(base_url, params=payload)
        r.raise_for_status()
    except httpx.TimeoutException as hex:
        logger.warning(f"request timed out for center: {base_url} (search)")
        return None
    except httpx.HTTPStatusError as hex:
        logger.warning(f"{base_url} returned error {hex.response.status_code}")
        return None
    return r.json()