def getUrl(url: str, timeout) -> str: logger = getLogger("aqt.helper") with requests.Session() as session: retries = requests.adapters.Retry( total=Settings.max_retries_on_connection_error, backoff_factor=Settings.backoff_factor) adapter = requests.adapters.HTTPAdapter(max_retries=retries) session.mount("http://", adapter) session.mount("https://", adapter) try: r = requests.get(url, allow_redirects=False, timeout=timeout) num_redirects = 0 while 300 < r.status_code < 309 and num_redirects < 10: num_redirects += 1 logger.debug("Asked to redirect({}) to: {}".format( r.status_code, r.headers["Location"])) newurl = altlink(r.url, r.headers["Location"]) logger.info("Redirected: {}".format(urlparse(newurl).hostname)) r = session.get(newurl, stream=True, timeout=timeout) except ( ConnectionResetError, requests.exceptions.ConnectionError, requests.exceptions.Timeout, ) as e: raise ArchiveConnectionError( f"Failure to connect to {url}: {type(e).__name__}") from e else: if r.status_code != 200: msg = f"Failed to retrieve file at {url}\nServer response code: {r.status_code}, reason: {r.reason}" raise ArchiveDownloadError(msg) result = r.text return result
def getUrl(url: str, timeout, expected_hash: Optional[bytes] = None) -> str: """ Gets a file from `url` via HTTP GET. No caller should call this function without providing an expected_hash, unless the caller is `get_hash`, which cannot know what the expected hash should be. """ logger = getLogger("aqt.helper") with requests.sessions.Session() as session: retries = requests.adapters.Retry( total=Settings.max_retries_on_connection_error, backoff_factor=Settings.backoff_factor) adapter = requests.adapters.HTTPAdapter(max_retries=retries) session.mount("http://", adapter) session.mount("https://", adapter) try: r = session.get(url, allow_redirects=False, timeout=timeout) num_redirects = 0 while 300 < r.status_code < 309 and num_redirects < 10: num_redirects += 1 logger.debug("Asked to redirect({}) to: {}".format( r.status_code, r.headers["Location"])) newurl = altlink(r.url, r.headers["Location"]) logger.info("Redirected: {}".format(urlparse(newurl).hostname)) r = session.get(newurl, stream=True, timeout=timeout) except ( ConnectionResetError, requests.exceptions.ConnectionError, requests.exceptions.Timeout, ) as e: raise ArchiveConnectionError( f"Failure to connect to {url}: {type(e).__name__}") from e else: if r.status_code != 200: msg = f"Failed to retrieve file at {url}\nServer response code: {r.status_code}, reason: {r.reason}" raise ArchiveDownloadError(msg) result = r.text filename = url.split("/")[-1] actual_hash = hashlib.sha256(bytes(result, "utf-8")).digest() if expected_hash is not None and expected_hash != actual_hash: raise ArchiveChecksumError( f"Downloaded file {filename} is corrupted! Detect checksum error.\n" f"Expect {expected_hash.hex()}: {url}\n" f"Actual {actual_hash.hex()}: {filename}") return result
def downloadBinaryFile(url: str, out: str, hash_algo: str, exp: bytes, timeout): logger = getLogger("aqt.helper") filename = Path(url).name with requests.Session() as session: retries = requests.adapters.Retry( total=Settings.max_retries_on_connection_error, backoff_factor=Settings.backoff_factor) adapter = requests.adapters.HTTPAdapter(max_retries=retries) session.mount("http://", adapter) session.mount("https://", adapter) try: r = session.get(url, allow_redirects=False, stream=True, timeout=timeout) if 300 < r.status_code < 309: logger.debug("Asked to redirect({}) to: {}".format( r.status_code, r.headers["Location"])) newurl = altlink(r.url, r.headers["Location"]) logger.info("Redirected: {}".format(urlparse(newurl).hostname)) r = session.get(newurl, stream=True, timeout=timeout) except requests.exceptions.ConnectionError as e: raise ArchiveConnectionError(f"Connection error: {e.args}") from e except requests.exceptions.Timeout as e: raise ArchiveConnectionError( f"Connection timeout: {e.args}") from e else: hash = hashlib.new(hash_algo) try: with open(out, "wb") as fd: for chunk in r.iter_content(chunk_size=8196): fd.write(chunk) hash.update(chunk) fd.flush() except Exception as e: raise ArchiveDownloadError( f"Download of {filename} has error: {e}") from e if exp is not None and hash.digest() != exp: raise ArchiveChecksumError( f"Downloaded file {filename} is corrupted! Detect checksum error.\n" f"Expect {exp.hex()}: {url}\n" f"Actual {hash.digest().hex()}: {out}")
def mock_get_url(url, *args, **kwargs): if not xml_file: raise ArchiveDownloadError( f"Failed to retrieve file at {url}\nServer response code: 404, reason: Not Found" ) return (Path(__file__).parent / "data" / xml_file).read_text("utf-8")
def _mock(url, **kwargs): urls_requested.add(url) if len(urls_requested) <= 1: raise ArchiveDownloadError() return "some_html_content"
def _mock(url, **kwargs): urls_requested.add(url) raise ArchiveDownloadError()
def mock_get_url(url, *args, **kwargs): if not xml_file: raise ArchiveDownloadError( f"Failed to retrieve file at {url}\nServer response code: 404, reason: Not Found" ) return xml