Пример #1
0
def download_file(url, filename):
    headers = {"User-Agent": "Audible/671 CFNetwork/1240.0.4 Darwin/20.6.0"}
    with httpx.stream("GET", url, headers=headers) as r:
        with open(filename, 'wb') as f:
            for chunck in r.iter_bytes():
                f.write(chunck)
    return filename
Пример #2
0
def downloadFileSync(url: str, target: Path, apikey: str) -> None:
    try:
        with target.open('wb') as file:
            with stream('GET',
                        url,
                        headers={
                            'apikey'.encode('ascii'):
                            apikey.strip().encode('ascii', 'backslashreplace')
                        },
                        timeout=250.0) as download:
                if download.status_code == 429:
                    raise RequestLimitReachedError()
                if download.status_code == 404:
                    raise NotFoundError(f'No file with URL {url} found')
                if download.status_code == 403:
                    raise NoPremiumMembershipException()
                if download.status_code == 401:
                    raise UnauthorizedError()
                if download.status_code != 200:
                    raise ResponseError(
                        f'Unexpected response: Status {download.status_code}')
                for data in download.iter_bytes():
                    file.write(data)
    except HTTPStatusError as e:
        raise RequestError(request=e.request, response=e.response, kind=str(e))
    except HTTPError as e:
        raise RequestError(request=e.request, response=None, kind=str(e))
Пример #3
0
    def _get_antismash_zip_data(self, accession_id, filename, local_path):
        for base_url in [
                ANTISMASH_DB_DOWNLOAD_URL, ANTISMASH_DBV2_DOWNLOAD_URL
        ]:
            zipfile_url = base_url.format(accession_id, filename)
            with open(local_path, 'wb') as f:
                total_bytes = 0
                try:
                    with httpx.stream('GET', zipfile_url) as r:
                        if r.status_code == 404:
                            logger.debug('antiSMASH download URL was a 404')
                            continue

                        logger.info('Downloading from antiSMASH: {}'.format(
                            zipfile_url))
                        filesize = int(r.headers['content-length'])
                        bar = Bar(filename,
                                  max=filesize,
                                  suffix='%(percent)d%%')
                        for data in r.iter_bytes():
                            f.write(data)
                            total_bytes += len(data)
                            bar.next(len(data))
                        bar.finish()
                except Exception as e:
                    logger.warning(
                        'antiSMASH zip download failed: {}'.format(e))
                    continue

            return True

        return False
Пример #4
0
def pull_github(source: dict,
                path: Optional[str] = None,
                secrets: dict = {},
                **kwargs) -> Files:
    """
    Pull a GitHub repo/subpath.

    If a user token is provided in `secrets` it will be used to authenticate
    as that user.
    """
    assert source.get("repo"), "GitHub source must have a repo"

    subpath = source.get("subpath") or ""
    if subpath.endswith("/"):
        subpath = subpath[:-1]

    path = path or "."

    # Get the possibly token protected link for the repo archive
    # See https://developer.github.com/v3/repos/contents/#download-a-repository-archive
    client = github_client(secrets.get("token"))
    repo_resource = client.get_repo(source["repo"])
    archive_link = repo_resource.get_archive_link("zipball")

    # Get the archive. To avoid it filling up memory, stream directly to file,
    # Increase timeout over the default of 5s.
    zip_file = tempfile.NamedTemporaryFile(suffix=".zip", delete=False)
    with httpx.stream("GET", archive_link, timeout=60) as response:
        for data in response.iter_bytes():
            zip_file.write(data)
    zip_file.close()

    return pull_zip(zip_file.name, subpath=subpath, path=path)
Пример #5
0
def download(path, *args, **kwargs):
    with httpx.stream('GET', *args, **kwargs) as response:
        response.raise_for_status()

        with path.open(mode='wb', buffering=0) as f:
            for chunk in response.iter_bytes(16384):
                f.write(chunk)
Пример #6
0
 def __init__(self,
              name: str,
              icon: str,
              id: str,
              author: str,
              version: str,
              config: str,
              description: str,
              tmpdir: str,
              parent=None):
     super(item, self).__init__(parent)
     self.tmpdir = tmpdir
     self.setText(name)
     iconame = icon.split("/")[-1]
     with httpx.stream("GET", icon) as response:
         with open(self.tmpdir + iconame, "wb+") as f:
             for chunk in response.iter_bytes():
                 f.write(chunk)
     self.setIcon(QIcon(self.tmpdir + iconame))
     self.setToolTip(f"标题:{name}\nID:{id}\n作者:{author}\n版本:{version}")
     self.config = config
     self.description = description
     self.name = name
     self.version = version
     self.author = author
Пример #7
0
def links(timeout: int = 10, delay: float = 0.1):
    dynamically_generated_links_via_injection = {
        "https://www.sante.fr/cf/centres-vaccination-covid/departement-22-cotes-d'armor.html",
        "https://www.sante.fr/cf/centres-vaccination-covid/departement-20A-corse-du-sud.html",
        "https://www.sante.fr/cf/centres-vaccination-covid/departement-01-ain.html",
        "https://www.sante.fr/cf/centres-vaccination-covid.html",
        "https://www.sante.fr/cf/centres-depistage-covid/departement-01.html",
        "https://www.sante.fr/cf/centres-depistage-covid/departement-2A.html",
        "https://www.sante.fr/cf/centres-depistage-covid.html",
    }
    parser = LinkExtractor()
    content = (SRC_DIR / "index.html").read_text()
    parser.feed(content)
    links = parser.links.union(dynamically_generated_links_via_injection)
    for link in sorted(links):
        print(link)
        with httpx.stream(
            "GET",
            link,
            timeout=timeout,
            verify=False,  # ignore SSL certificate validation errors
        ) as response:
            if response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
                print("Warning: we’re being throttled, skipping link (429)")
                continue
            if response.status_code != HTTPStatus.OK:
                raise Exception(f"{link} is broken! ({response.status_code})")
        time.sleep(delay)  # avoid being throttled
Пример #8
0
async def test_stream(server):
    async with httpx.stream("GET", server.url) as response:
        await response.aread()

    assert response.status_code == 200
    assert response.reason_phrase == "OK"
    assert response.text == "Hello, world!"
    assert response.http_version == "HTTP/1.1"
Пример #9
0
def fetch_tornado_reports():
    if os.path.isfile(PATH):
        print(f'Using cached {PATH}')
        return
    print(f'Fetchng {PATH}')
    with open(PATH, 'w') as file:
        with httpx.stream('GET', URL) as r:
            for data in r.iter_text():
                file.writelines(data)
Пример #10
0
async def download_file(url: str, filename: Optional[str] = None) -> str:
    filename = filename or url.split("/")[-1]
    async with httpx.stream("GET", url) as resp:
        resp.raise_for_status()
        async with aiofiles.open(filename, "wb") as f:
            async for data in resp.aiter_bytes():
                if data:
                    await f.write(data)
    return filename
def _download_file(url, local_path, timeout):
    with httpx.stream(
        "GET",
        url,
        timeout=timeout,
        verify=False,  # ignore SSL certificate validation errors
    ) as response:
        if response.status_code != HTTPStatus.OK:
            raise Exception(f"{url} is broken! ({response.status_code})")
        _save_binary_response(local_path, response)
Пример #12
0
def sync_detailed(
    *,
    client: AuthenticatedClient,
    name: str,
    snapshot_id: str,
    limit: Optional[int] = None,
) -> Response[Union[None, HTTPValidationError]]:
    kwargs = _get_kwargs(client=client, name=name, snapshot_id=snapshot_id, limit=limit)

    with httpx.stream("GET", **kwargs) as response:
        return build_stream_response(response=response)
Пример #13
0
    def DownloadImage(self: Any, url: str) -> Image.Image:
        """Download the specified image file and return the image object."""

        with httpx.stream("GET", url, timeout=30.0) as res:
            if res.status_code == 200:
                try:
                    return Image.open(res, "RGBA")
                except ValueError:
                    return Image.open(res).convert("RGBA")
            else:
                log.error(f"Failed to download image (HTTP {res.status_code})")
Пример #14
0
 def __enter__(self):
     self.archive_dir = Path(tempfile.mkdtemp())
     self.archive_file = self.archive_dir / self.url.split('/')[-1]
     logger.debug(f'Downloading fontawesome to {self.archive_file}')
     with httpx.stream('GET', self.url) as r:
         with self.archive_file.open('wb') as fd:
             for chunk in r.iter_bytes():
                 fd.write(chunk)
     self.root = Path(tempfile.mkdtemp())
     logger.debug(f'Unpacking to {self.root}')
     shutil.unpack_archive(self.archive_file, self.root)
     return self.root / self.archive_file.stem
Пример #15
0
def sign_remote_file(url: str, **signer_kwargs):
    with tempfile.SpooledTemporaryFile(10 * 1024 * 1024) as f:
        with httpx.stream("GET", url) as r:
            pbar = tqdm(total=int(r.headers["content-length"]),
                        unit="B",
                        unit_scale=True,
                        desc=str(r.url))
            for data in r.iter_bytes():
                f.write(data)
                pbar.update(len(data))
            pbar.close()

        return sign_file(f, **signer_kwargs)
Пример #16
0
def sync_detailed(
    *,
    client: AuthenticatedClient,
    name: str,
    request: Optional[TextClassificationQuery] = None,
    limit: Optional[int] = None,
) -> Response[Union[None, HTTPValidationError]]:
    kwargs = _get_kwargs(client=client,
                         name=name,
                         limit=limit,
                         json_body=request)

    with httpx.stream("POST", **kwargs) as response:
        return build_stream_response(response=response)
def download_and_unpack(url: str, out_path: Path) -> None:
    with NamedTemporaryFile() as download_file:
        with httpx.stream("GET", url) as response:
            total = int(response.headers["Content-Length"])

            with wk.utils.get_rich_progress() as progress:
                download_task = progress.add_task("Download Image Data",
                                                  total=total)
                for chunk in response.iter_bytes():
                    download_file.write(chunk)
                    progress.update(download_task,
                                    completed=response.num_bytes_downloaded)
        with ZipFile(download_file, "r") as zip_file:
            zip_file.extractall(out_path)
Пример #18
0
    def get_sbml(self, model_id: str) -> bytes:
        """
        Attempt to download an SBML document from the repository.

        Parameters
        ----------
        model_id : str
            The identifier of the desired metabolic model. This is typically repository
            specific.

        Returns
        -------
        bytes
            A gzip-compressed, UTF-8 encoded SBML document.

        Raises
        ------
        httpx.HTTPError
            In case there are any connection problems.

        """
        data = BytesIO()
        response = httpx.get(
            url=self._url.join(f"files/{model_id}"),
            headers={"Accept": "application/json"},
        )
        response.raise_for_status()
        files = BioModelsFilesResponse.parse_obj(response.json())
        for model in files.main:
            if model.name.endswith("xml"):
                break
        else:
            RuntimeError(f"Could not find an SBML document for '{model_id}'.")
        with self._progress, httpx.stream(
                method="GET",
                url=self._url.join(f"download/{model_id}"),
                params={"filename": model.name},
        ) as response:
            response.raise_for_status()
            task_id = self._progress.add_task(
                description="download",
                total=model.size,
                model_id=model_id,
            )
            for chunk in response.iter_bytes():
                data.write(chunk)
                self._progress.update(task_id=task_id, advance=len(chunk))
        data.seek(0)
        return gzip.compress(data.read())
Пример #19
0
    def _download(self, url, dirname, filename):

        if not dirname.exists():
            dirname.mkdir()

        file = dirname / filename

        with open(file.with_suffix('.url'), 'wt', encoding='utf8') as f:
            f.write(url)

        with open(file, 'wb') as f, httpx.stream('GET', url) as r:
            for chunk in r.iter_bytes():
                f.write(chunk)

        return file
Пример #20
0
    def html_to_bytes(self, html):
        kwargs = {
            "auth": ("api", settings.PDFSHIFT_API_KEY),
            "json": {
                "source": html,
                "sandbox": settings.PDFSHIFT_SANDBOX_MODE
            },
        }
        with httpx.stream("POST", self.url, timeout=10.0,
                          **kwargs) as response:
            response.raise_for_status()
            result = io.BytesIO()
            for chunk in response.iter_bytes(1024):
                result.write(chunk)

            return result.getvalue()
Пример #21
0
def _download_url(url: str, dest: Path) -> None:
    """Download archive from URL to file `dest`.

    Raise `WorkbenchModuleImportError` on HTTP error.
    """
    try:
        with dest.open("wb") as w:
            with httpx.stream("GET", url) as r:
                for chunk in r.iter_bytes():
                    w.write(chunk)
    except httpx.HTTPError as err:
        raise WorkbenchModuleImportError(
            "HTTP error downloading %(url)s: %(message)s"
            % dict(url=url, message=str(err))
            % {"url": url, "message": str(err)}
        )
Пример #22
0
def cli(url, output, etags, verbose):
    """
    Fetch data using HTTP conditional get
    """
    headers = {}
    try:
        existing_etags = json.load(open(etags))
    except IOError:
        existing_etags = {}
    if url in existing_etags:
        headers["If-None-Match"] = existing_etags[url]
        if verbose:
            click.echo("Existing ETag: {}".format(existing_etags[url]),
                       err=True)
    with httpx.stream("GET", url, headers=headers) as response:
        if verbose:
            click.echo("Response status code: {}".format(response.status_code),
                       err=True)
        if not output:
            # Detect output from URL and content_type
            bits = urlparse(url)
            output = bits.path.split("/")[-1]
            if not output:
                # Use index.filetype
                content_type = response.headers["content-type"].split()[0]
                ext = CONTENT_TYPE_TO_EXT.get(content_type,
                                              content_type.split("/")[-1])
                output = "index.{}".format(ext)

        if 304 == response.status_code:
            return
        elif 200 == response.status_code:
            etag = response.headers.get("etag")
            if etag:
                existing_etags[url] = etag
            bar = None
            if verbose and response.headers.get("content-length"):
                bar = click.progressbar(
                    length=int(response.headers["content-length"]))
            with open(output, "wb") as fp:
                for b in response.iter_bytes():
                    fp.write(b)
                    if bar:
                        bar.update(len(b))
            open(etags, "w").write(json.dumps(existing_etags, indent=4))
Пример #23
0
def get_test_file() -> Path:
    """Download a FITS file from e-Callisto to use during the tests.
    
    :returns: Path object of the downloaded FITS file.
    """
    callisto_archives = ("http://soleil80.cs.technik.fhnw.ch/"
                         "solarradio/data/2002-20yy_Callisto/")

    date_xpath = "2011/08/09/"
    fitsfile = "BLEN7M_20110809_080004_25.fit.gz"
    fitsurl = callisto_archives + date_xpath + fitsfile

    with open(fitsfile, "wb") as fin:
        with httpx.stream("GET", fitsurl) as r:
            for chunk in r.iter_raw():
                fin.write(chunk)

    return Path(fitsfile)
Пример #24
0
def download(url, local_destination, recursing = 0):
    '''Download the 'url' to the file 'local_destination'.'''

    def addurl(text):
        return f'{text} for {url}'

    timeout = httpx.Timeout(15, connect = 15, read = 15, write = 15)
    with httpx.stream('get', url, verify = False, timeout = timeout,
                      follow_redirects = True) as resp:
        code = resp.status_code
        if code == 202:
            # Code 202 = Accepted, "received but not yet acted upon."
            wait(2)                     # Sleep a short time and try again.
            raise_for_interrupts()
            recursing += 1
            if recursing <= _MAX_RECURSIVE_CALLS:
                if __debug__: log('calling download(url) recursively for code 202')
                download(url, local_destination, recursing)
            else:
                raise ServiceFailure(addurl('Exceeded max retries for code 202'))
        elif 200 <= code < 400:
            with open(local_destination, 'wb') as f:
                for chunk in resp.iter_bytes():
                    raise_for_interrupts()
                    f.write(chunk)
            resp.close()
            size = stat(local_destination).st_size
            if __debug__: log(f'wrote {size} bytes to file {local_destination}')
        elif code in [401, 402, 403, 407, 451, 511]:
            raise AuthenticationFailure(addurl('Access is forbidden'))
        elif code in [404, 410]:
            raise NoContent(addurl('No content found'))
        elif code in [405, 406, 409, 411, 412, 414, 417, 428, 431, 505, 510]:
            raise InternalError(addurl(f'Server returned code {code}'))
        elif code in [415, 416]:
            raise ServiceFailure(addurl('Server rejected the request'))
        elif code == 429:
            raise RateLimitExceeded('Server blocking further requests due to rate limits')
        elif code == 503:
            raise ServiceFailure('Server is unavailable -- try again later')
        elif code in [500, 501, 502, 506, 507, 508]:
            raise ServiceFailure(addurl(f'Internal server error (HTTP code {code})'))
        else:
            raise NetworkFailure(f'Unable to resolve {url}')
Пример #25
0
def download_and_extract_mibig_json(download_path, output_path, version='1.4'):
    archive_path = os.path.join(download_path,
                                'mibig_json_{}.tar.gz'.format(version))
    logger.debug(
        'Checking for existing MiBIG archive at {}'.format(archive_path))
    cached = False
    if os.path.exists(archive_path):
        logger.info('Found cached file at {}'.format(archive_path))
        try:
            _ = tarfile.open(archive_path)
            cached = True
        except:
            logger.info('Invalid MiBIG archive found, will download again')
            os.unlink(archive_path)

    if not cached:
        url = MIBIG_JSON_URL.format(version)
        with open(archive_path, 'wb') as f:
            total_bytes, last_total = 0, 0
            with httpx.stream('GET', url) as r:
                filesize = int(r.headers['content-length'])
                bar = Bar(url, max=filesize, suffix='%(percent)d%%')
                for data in r.iter_bytes():
                    f.write(data)
                    total_bytes += len(data)
                    bar.next(len(data))
                bar.finish()

    logger.debug('Extracting MiBIG JSON data')

    if os.path.exists(os.path.join(output_path, 'completed')):
        return True

    mibig_gz = tarfile.open(archive_path, 'r:gz')
    # extract and rename to "mibig_json"
    # TODO annoyingly the 2.0 version has been archived with a subdirectory, while
    # 1.4 just dumps all the files into the current directory, so if/when 2.0 support
    # is required this will need to handle both cases
    mibig_gz.extractall(path=os.path.join(output_path))
    # os.rename(os.path.join(self.project_file_cache, 'mibig_json_{}'.format(version)), os.path.join(self.project_file_cache, 'mibig_json'))

    open(os.path.join(output_path, 'completed'), 'w').close()

    return True
Пример #26
0
def event_hook_tqdm():
    """
    事件钩子结合tqdm 实现文件下载进度显式
    """
    with tempfile.NamedTemporaryFile() as download_file:
        url = "https://speed.hetzner.de/100MB.bin"
        with httpx.stream("GET", url) as response:
            total = int(response.headers["Content-Length"])

            with tqdm(total=total,
                      unit_scale=True,
                      unit_divisor=1024,
                      unit="B") as progress:
                num_bytes_downloaded = response.num_bytes_downloaded
                for chunk in response.iter_bytes():
                    download_file.write(chunk)
                    progress.update(response.num_bytes_downloaded -
                                    num_bytes_downloaded)
                    num_bytes_downloaded = response.num_bytes_downloaded
Пример #27
0
async def get_container_file(  # pylint: disable=too-many-arguments
    cluster: str,
    environ: str,
    container: str,
    path: str,
    topology_name: str = Query(..., alias="topology"),
    role: Optional[str] = None,
):
    """Return a given raw file."""
    topology = state.tracker.get_topology(cluster, role, environ,
                                          topology_name)
    stmgr = topology.info.physical_plan.stmgrs[f"stmgr-{container}"]
    url = f"http://{stmgr.host}:{stmgr.shell_port}/download/{path}"

    _, _, filename = path.rpartition("/")
    async with httpx.stream("GET", url) as response:
        return await StreamingResponse(
            content=response.iter_bytes(),
            headers={
                "Content-Disposition": f"attachment; filename={filename}"
            },
        )
Пример #28
0
def pull_http(source: dict,
              path: Optional[str] = None,
              secrets: dict = {},
              **kwargs) -> Files:
    """
    Pull a file from a HTTP source.
    """
    url = source.get("url")
    assert url, "Source must have a URL"

    with httpx.stream("GET", url) as response:
        if response.status_code != 200:
            raise RuntimeError(
                f"Error when fetching {url}: {response.status_code}")

        size = int(response.headers.get("Content-Length", 0)) / GIGABYTE
        if size > MAX_SIZE:
            RuntimeError(
                f"Size of file is greater than {MAX_SIZE}GB maximum: {size}GB")

        if not path:
            path = str(os.path.basename(url))
        if not file_ext(path):
            content_type = response.headers.get("Content-Type",
                                                "text/html").split(";")[0]
            ext = mimetypes.guess_extension(content_type, strict=False)
            path += ext or ".txt"
        ensure_parent(path)
        remove_if_dir(path)

        with open(path, "wb") as file:
            for data in response.iter_bytes():
                file.write(data)

        return {path: file_info(path)}

    return {}
Пример #29
0
def test_httpcore_exception_mapping(server) -> None:
    """
    HTTPCore exception mapping works as expected.
    """

    # Make sure we don't just map to `NetworkError`.
    with pytest.raises(httpx.ConnectError):
        httpx.get("http://doesnotexist")

    # Make sure streaming methods also map exceptions.
    url = server.url.copy_with(path="/slow_stream_response")
    timeout = httpx.Timeout(None, read=0.1)
    with httpx.stream("GET", url, timeout=timeout) as stream:
        with pytest.raises(httpx.ReadTimeout):
            stream.read()

    # Make sure it also works with custom transports.
    class MockTransport(httpcore.SyncHTTPTransport):
        def request(self, *args: Any, **kwargs: Any) -> Any:
            raise httpcore.ProtocolError()

    client = httpx.Client(transport=MockTransport())
    with pytest.raises(httpx.ProtocolError):
        client.get("http://testserver")
Пример #30
0
    def get_sbml(self, model_id: str) -> bytes:
        """
        Attempt to download an SBML document from the repository.

        Parameters
        ----------
        model_id : str
            The identifier of the desired metabolic model. This is typically repository
            specific.

        Returns
        -------
        bytes
            A gzip-compressed, UTF-8 encoded SBML document.

        Raises
        ------
        httpx.HTTPError
            In case there are any connection problems.

        """
        compressed = BytesIO()
        filename = f"{model_id}.xml.gz"
        with self._progress, httpx.stream(
                method="GET", url=self._url.join(filename)) as response:
            response.raise_for_status()
            task_id = self._progress.add_task(
                description="download",
                total=int(response.headers["Content-Length"]),
                model_id=model_id,
            )
            for chunk in response.iter_bytes():
                compressed.write(chunk)
                self._progress.update(task_id=task_id, advance=len(chunk))
        compressed.seek(0)
        return compressed.read()