def download_file(self, uri, *, local_path=None, base_url=None, cache=True, cloud_only=False): """ Downloads a single file based on the data URI Parameters ---------- uri : str The product dataURI, e.g. mast:JWST/product/jw00736-o039_t001_miri_ch1-long_x1d.fits local_path : str Directory in which the files will be downloaded. Defaults to current working directory. base_url: str A base url to use when downloading. Default is the MAST Portal API cache : bool Default is True. If file is found on disk it will not be downloaded again. cloud_only : bool, optional Default False. If set to True and cloud data access is enabled (see `enable_cloud_dataset`) files that are not found in the cloud will be skipped rather than downloaded from MAST as is the default behavior. If cloud access is not enables this argument as no affect. Returns ------- status: str download status message. Either COMPLETE, SKIPPED, or ERROR. msg : str An error status message, if any. url : str The full url download path """ # create the full data URL base_url = base_url if base_url else self._portal_api_connection.MAST_DOWNLOAD_URL data_url = base_url + "?uri=" + uri # create a local file path if none is input. Use current directory as default. if not local_path: filename = os.path.basename(uri) local_path = os.path.join(os.path.abspath('.'), filename) # recreate the data_product key for cloud connection check data_product = {'dataURI': uri} status = "COMPLETE" msg = None url = None try: if self._cloud_connection is not None and self._cloud_connection.is_supported(data_product): try: self._cloud_connection.download_file(data_product, local_path, cache) except Exception as ex: log.exception("Error pulling from S3 bucket: {}".format(ex)) if cloud_only: log.warn("Skipping file...") local_path = "" status = "SKIPPED" else: log.warn("Falling back to mast download...") self._download_file(data_url, local_path, cache=cache, head_safe=True, continuation=False) else: self._download_file(data_url, local_path, cache=cache, head_safe=True, continuation=False) # check if file exists also this is where would perform md5, # and also check the filesize if the database reliably reported file sizes if (not os.path.isfile(local_path)) and (status != "SKIPPED"): status = "ERROR" msg = "File was not downloaded" url = data_url except HTTPError as err: status = "ERROR" msg = "HTTPError: {0}".format(err) url = data_url return status, msg, url
def _download_file(self, url, local_filepath, timeout=None, auth=None, continuation=True, cache=False, method="GET", head_safe=False, **kwargs): """ Download a file. Resembles `astropy.utils.data.download_file` but uses the local ``_session`` Parameters ---------- url : string local_filepath : string timeout : int auth : dict or None continuation : bool If the file has already been partially downloaded *and* the server supports HTTP "range" requests, the download will be continued where it left off. cache : bool method : "GET" or "POST" head_safe : bool """ if head_safe: response = self._session.request("HEAD", url, timeout=timeout, stream=True, auth=auth, **kwargs) else: response = self._session.request(method, url, timeout=timeout, stream=True, auth=auth, **kwargs) response.raise_for_status() if 'content-length' in response.headers: length = int(response.headers['content-length']) if length == 0: log.warn('URL {0} has length=0'.format(url)) else: length = None if ((os.path.exists(local_filepath) and ('Accept-Ranges' in response.headers) and continuation)): open_mode = 'ab' existing_file_length = os.stat(local_filepath).st_size if length is not None and existing_file_length >= length: # all done! log.info( "Found cached file {0} with expected size {1}.".format( local_filepath, existing_file_length)) return elif existing_file_length == 0: open_mode = 'wb' else: log.info("Continuing download of file {0}, with {1} bytes to " "go ({2}%)".format( local_filepath, length - existing_file_length, (length - existing_file_length) / length * 100)) # bytes are indexed from 0: # https://en.wikipedia.org/wiki/List_of_HTTP_header_fields#range-request-header end = "{0}".format(length - 1) if length is not None else "" self._session.headers['Range'] = "bytes={0}-{1}".format( existing_file_length, end) response = self._session.request(method, url, timeout=timeout, stream=True, auth=auth, **kwargs) response.raise_for_status() del self._session.headers['Range'] elif cache and os.path.exists(local_filepath): if length is not None: statinfo = os.stat(local_filepath) if statinfo.st_size != length: log.warning("Found cached file {0} with size {1} that is " "different from expected size {2}".format( local_filepath, statinfo.st_size, length)) open_mode = 'wb' else: log.info( "Found cached file {0} with expected size {1}.".format( local_filepath, statinfo.st_size)) response.close() return else: log.info("Found cached file {0}.".format(local_filepath)) response.close() return else: open_mode = 'wb' if head_safe: response = self._session.request(method, url, timeout=timeout, stream=True, auth=auth, **kwargs) response.raise_for_status() blocksize = astropy.utils.data.conf.download_block_size log.debug( f"Downloading URL {url} to {local_filepath} with size {length} " f"by blocks of {blocksize}") bytes_read = 0 # Only show progress bar if logging level is INFO or lower. if log.getEffectiveLevel() <= 20: progress_stream = None # Astropy default else: progress_stream = io.StringIO() with ProgressBarOrSpinner( length, ('Downloading URL {0} to {1} ...'.format(url, local_filepath)), file=progress_stream) as pb: with open(local_filepath, open_mode) as f: for block in response.iter_content(blocksize): f.write(block) bytes_read += blocksize if length is not None: pb.update( bytes_read if bytes_read <= length else length) else: pb.update(bytes_read) response.close() return response