def test_is_log_to_file_configed(): """ Test to make sure that the logger follows the config: log_to_file, log_file_level, log_file_path """ if config.get('logger', 'log_to_file') == 'True': # there must be two handlers, one streaming and one to file. assert len(log.handlers) == 2 # one of the handlers must be FileHandler assert isinstance(log.handlers[0], logging.FileHandler) or isinstance( log.handlers[1], logging.FileHandler) fh = None if isinstance(log.handlers[0], logging.FileHandler): fh = log.handlers[0] if isinstance(log.handlers[1], logging.FileHandler): fh = log.handlers[1] if fh is not None: log_file_level = config.get('logger', 'log_file_level') assert level_to_numeric.get(log_file_level) == fh.level log_file_path = config.get('logger', 'log_file_path') assert os.path.basename(fh.baseFilename) == os.path.basename(log_file_path)
def test_is_log_to_file_configed(): """ Test to make sure that the logger follows the config: log_to_file, log_file_level, log_file_path """ if config.get('logger', 'log_to_file') == 'True': # there must be two handlers, one streaming and one to file. assert len(log.handlers) == 2 # one of the handlers must be FileHandler assert isinstance(log.handlers[0], logging.FileHandler) or isinstance(log.handlers[1], logging.FileHandler) fh = None if isinstance(log.handlers[0], logging.FileHandler): fh = log.handlers[0] if isinstance(log.handlers[1], logging.FileHandler): fh = log.handlers[1] if fh is not None: log_file_level = config.get('logger', 'log_file_level') assert level_to_numeric.get(log_file_level) == fh.level log_file_path = config.get('logger', 'log_file_path') assert os.path.basename(fh.baseFilename) == os.path.basename(log_file_path)
def _download(uri, kwargs, err="Unable to download data at specified URL"): """Attempts to download data at the specified URI""" _filename = os.path.basename(uri).split("?")[0] # user specifies a download directory if "directory" in kwargs: download_dir = os.path.expanduser(kwargs["directory"]) else: download_dir = config.get("downloads", "download_dir") # overwrite the existing file if the keyword is present if "overwrite" in kwargs: overwrite = kwargs["overwrite"] else: overwrite = False # If the file is not already there, download it filepath = os.path.join(download_dir, _filename) if not (os.path.isfile(filepath)) or (overwrite and os.path.isfile(filepath)): try: response = urllib2.urlopen(uri) except (urllib2.HTTPError, urllib2.URLError): raise urllib2.URLError(err) with open(filepath, "wb") as fp: shutil.copyfileobj(response, fp) else: warnings.warn( "Using existing file rather than downloading, use overwrite=True to override.", RuntimeWarning ) return filepath
def _download(uri, kwargs, err='Unable to download data at specified URL'): """Attempts to download data at the specified URI""" _filename = os.path.basename(uri).split("?")[0] # user specifies a download directory if "directory" in kwargs: download_dir = os.path.expanduser(kwargs["directory"]) else: download_dir = config.get("downloads", "download_dir") # overwrite the existing file if the keyword is present if "overwrite" in kwargs: overwrite = kwargs["overwrite"] else: overwrite = False # If the file is not already there, download it filepath = os.path.join(download_dir, _filename) if not (os.path.isfile(filepath)) or (overwrite and os.path.isfile(filepath)): try: response = urllib2.urlopen(uri) except (urllib2.HTTPError, urllib2.URLError): raise urllib2.URLError(err) with open(filepath, 'wb') as fp: shutil.copyfileobj(response, fp) else: warnings.warn( "Using existing file rather than downloading, use overwrite=True to override.", RuntimeWarning) return filepath
def test_is_level_configed(): """ Test to make sure that the logger follows the config: log_level """ config_level_numeric = level_to_numeric.get(config.get('logger', 'log_level')) assert log.getEffectiveLevel() == config_level_numeric
def get(self, query_response, path=None, methods=('URL-FILE',), downloader=None): """ Download data specified in the query_response. Parameters ---------- query_response : sunpy.net.vso.QueryResponse QueryResponse containing the items to be downloaded. path : str Specify where the data is to be downloaded. Can refer to arbitrary fields of the QueryResponseItem (instrument, source, time, ...) via string formatting, moreover the file-name of the file downloaded can be refered to as file, e.g. "{source}/{instrument}/{time.start}/{file}". methods : {list of str} Methods acceptable to user. downloader : sunpy.net.downloader.Downloader Downloader used to download the data. Returns ------- out : :py:class:`Results` object that supplies a list of filenames with meta attributes containing the respective QueryResponse. Examples -------- >>> res = get(qr).wait() """ if downloader is None: downloader = download.Downloader() thread = threading.Thread(target=downloader.reactor.run) thread.daemon = True thread.start() res = Results( lambda _: downloader.reactor.stop(), 1, lambda mp: self.link(query_response, mp) ) else: res = Results( lambda _: None, 1, lambda mp: self.link(query_response, mp) ) if path is None: path = os.path.join(config.get('downloads','download_dir'), '{file}') fileids = VSOClient.by_fileid(query_response) if not fileids: res.poke() return res self.download_all( self.api.service.GetData( self.make_getdatarequest(query_response, methods) ), methods, downloader, path, fileids, res ) res.poke() return res
def from_url(cls, url): """ Return object read from URL. Parameters ---------- url : str URL to retrieve the data from """ default_dir = config.get("downloads", "download_dir") path = download_file(url, default_dir) return cls.read(path)
def fetch(self, query_results: QueryResponseTable, *, path: os.PathLike = None, downloader: parfive.Downloader, **kwargs): """ Fetch asdf files describing the datasets. Parameters ---------- query_results: Results to download. path : `str` or `pathlib.Path`, optional Path to the download directory downloader : `parfive.Downloader` The download manager to use. """ # This logic is being upstreamed into Fido hopefully in 2.1rc4 if path is None: path = Path(config.get( 'downloads', 'download_dir')) / '{file}' # pragma: no cover elif isinstance(path, (str, os.PathLike)) and '{file}' not in str(path): path = Path(path) / '{file}' # pragma: no cover else: path = Path(path) # pragma: no cover path = path.expanduser() if not len(query_results): return for row in query_results: url = f"{self._BASE_DOWNLOAD_URL}/asdf?datasetId={row['Dataset ID']}" # Set max_splits here as the metadata streamer doesn't like accept-range at the moment. downloader.enqueue_file(url, filename=partial(self._make_filename, path, row), max_splits=1)
def fetch(self, query_response, path=None, methods=None, site=None, progress=True, overwrite=False, downloader=None, wait=True): """ Download data specified in the query_response. Parameters ---------- query_response : sunpy.net.vso.QueryResponse QueryResponse containing the items to be downloaded. path : str Specify where the data is to be downloaded. Can refer to arbitrary fields of the QueryResponseItem (instrument, source, time, ...) via string formatting, moreover the file-name of the file downloaded can be referred to as file, e.g. "{source}/{instrument}/{time.start}/{file}". methods : {list of str} Download methods, defaults to URL-FILE_Rice then URL-FILE. Methods are a concatenation of one PREFIX followed by any number of SUFFIXES i.e. `PREFIX-SUFFIX_SUFFIX2_SUFFIX3`. The full list of `PREFIXES <https://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_PREFIX>`_ and `SUFFIXES <https://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_SUFFIX>`_ are listed on the VSO site. site : str There are a number of caching mirrors for SDO and other instruments, some available ones are listed below. =============== ======================================================== NSO National Solar Observatory, Tucson (US) SAO (aka CFA) Smithonian Astronomical Observatory, Harvard U. (US) SDAC (aka GSFC) Solar Data Analysis Center, NASA/GSFC (US) ROB Royal Observatory of Belgium (Belgium) MPS Max Planck Institute for Solar System Research (Germany) UCLan University of Central Lancashire (UK) IAS Institut Aeronautique et Spatial (France) KIS Kiepenheuer-Institut fur Sonnenphysik Germany) NMSU New Mexico State University (US) =============== ======================================================== progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bars will be shown at all. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. wait : `bool`, optional If `False` ``downloader.download()`` will not be called. Only has any effect if `downloader` is not `None`. Returns ------- out : `parfive.Results` Object that supplies a list of filenames and any errors. Examples -------- >>> files = fetch(qr) # doctest:+SKIP """ if path is None: path = os.path.join(config.get('downloads', 'download_dir'), '{file}') elif isinstance(path, str) and '{file}' not in path: path = os.path.join(path, '{file}') path = os.path.expanduser(path) dl_set = True if not downloader: dl_set = False downloader = Downloader(progress=progress) fileids = VSOClient.by_fileid(query_response) if not fileids: return downloader.download() if wait else Results() # Adding the site parameter to the info info = {} if site is not None: info['site'] = site VSOGetDataResponse = self.api.get_type("VSO:VSOGetDataResponse") data_request = self.make_getdatarequest(query_response, methods, info) data_response = VSOGetDataResponse( self.api.service.GetData(data_request)) err_results = self.download_all(data_response, methods, downloader, path, fileids) if dl_set and not wait: return err_results results = downloader.download() results += err_results results._errors += err_results.errors return results
def get_request(self, requestIDs, path=None, overwrite=False, progress=True, max_conn=5, downloader=None, results=None): """ Query JSOC to see if request_id is ready for download. If the request is ready for download, download it. Parameters ---------- requestIDs: list or string One or many requestID strings path: string Path to save data to, defaults to SunPy download dir overwrite: bool Replace files with the same name if True progress: bool Print progress info to terminal max_conns: int Maximum number of download connections. downloader: sunpy.download.Downloader instance A Custom downloader to use results: Results instance A Results manager to use. Returns ------- res: Results A Results instance or None if no URLs to download """ # Convert IDs to a list if not already if not astropy.utils.misc.isiterable(requestIDs) or isinstance(requestIDs, basestring): requestIDs = [requestIDs] if path is None: path = config.get('downloads','download_dir') path = os.path.expanduser(path) if downloader is None: downloader = Downloader(max_conn=max_conn, max_total=max_conn) # A Results object tracks the number of downloads requested and the # number that have been completed. if results is None: results = Results(lambda x: None) urls = [] for request_id in requestIDs: u = self._request_status(request_id) if u.status_code == 200 and u.json()['status'] == '0': for ar in u.json()['data']: if overwrite or not os.path.isfile(os.path.join(path, ar['filename'])): urls.append(urlparse.urljoin(BASE_DL_URL + u.json()['dir'] + '/', ar['filename'])) if progress: print("{0} URLs found for Download. Totalling {1}MB".format( len(urls), u.json()['size'])) else: if progress: self.check_request(request_id) if urls: for url, rcall in list(zip(urls, list(map(lambda x: results.require([x]), urls)))): downloader.download(url, callback=rcall, path=path) else: #Make Results think it has finished. results.require([]) results.poke() return results
def get_request(self, requests, path=None, overwrite=False, progress=True, downloader=None, wait=True): """ Query JSOC to see if the request(s) is ready for download. If the request is ready for download, it will then download it. Parameters ---------- requests : `~drms.ExportRequest`, `str`, `list` `~drms.ExportRequest` objects or `str` request IDs or lists returned by `~sunpy.net.jsoc.jsoc.JSOCClient.request_data`. path : `str` Path to save data to, defaults to SunPy download dir. progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bar will be shown. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. wait : `bool`, optional If `False` ``downloader.download()`` will not be called. Only has any effect if `downloader` is not `None`. Returns ------- res: `~sunpy.net.download.Results` A `~sunpy.net.download.Results` instance or `None` if no URLs to download """ c = drms.Client() # Convert Responses to a list if not already if isinstance(requests, str) or not isiterable(requests): requests = [requests] # Ensure all the requests are drms ExportRequest objects for i, request in enumerate(requests): if isinstance(request, str): r = c.export_from_id(request) requests[i] = r # We only download if all are finished if not all([r.has_succeeded() for r in requests]): raise NotExportedError("Can not download as not all the requests " "have been exported for download yet.") # Ensure path has a {file} in it if path is None: default_dir = config.get("downloads", "download_dir") path = os.path.join(default_dir, '{file}') elif isinstance(path, str) and '{file}' not in path: path = os.path.join(path, '{file}') paths = [] for request in requests: for filename in request.data['filename']: # Ensure we don't duplicate the file extension ext = os.path.splitext(filename)[1] if path.endswith(ext): fname = path.strip(ext) else: fname = path fname = fname.format(file=filename) fname = os.path.expanduser(fname) paths.append(fname) dl_set = True if not downloader: dl_set = False downloader = Downloader(progress=progress, overwrite=overwrite) urls = [] for request in requests: if request.status == 0: for index, data in request.data.iterrows(): url_dir = request.request_url + '/' urls.append(urllib.parse.urljoin(url_dir, data['filename'])) if urls: if progress: print_message = "{0} URLs found for download. Full request totalling {1}MB" print(print_message.format(len(urls), request._d['size'])) for aurl, fname in zip(urls, paths): downloader.enqueue_file(aurl, filename=fname) if dl_set and not wait: return Results() results = downloader.download() return results
def get(self, query_response, path=None, methods=('URL-FILE',), downloader=None, site=None): """ Download data specified in the query_response. Parameters ---------- query_response : sunpy.net.vso.QueryResponse QueryResponse containing the items to be downloaded. path : str Specify where the data is to be downloaded. Can refer to arbitrary fields of the QueryResponseItem (instrument, source, time, ...) via string formatting, moreover the file-name of the file downloaded can be refered to as file, e.g. "{source}/{instrument}/{time.start}/{file}". methods : {list of str} Methods acceptable to user. downloader : sunpy.net.downloader.Downloader Downloader used to download the data. site: str There are a number of caching mirrors for SDO and other instruments, some available ones are listed below. NSO : National Solar Observatory, Tucson (US) SAO (aka CFA) : Smithonian Astronomical Observatory, Harvard U. (US) SDAC (aka GSFC) : Solar Data Analysis Center, NASA/GSFC (US) ROB : Royal Observatory of Belgium (Belgium) MPS : Max Planck Institute for Solar System Research (Germany) UCLan : University of Central Lancashire (UK) IAS : Institut Aeronautique et Spatial (France) KIS : Kiepenheuer-Institut fur Sonnenphysik Germany) NMSU : New Mexico State University (US) Returns ------- out : :py:class:`Results` object that supplies a list of filenames with meta attributes containing the respective QueryResponse. Examples -------- >>> res = get(qr).wait() """ if downloader is None: downloader = download.Downloader() thread = threading.Thread(target=downloader.reactor.run) thread.daemon = True thread.start() res = Results( lambda _: downloader.reactor.stop(), 1, lambda mp: self.link(query_response, mp) ) else: res = Results( lambda _: None, 1, lambda mp: self.link(query_response, mp) ) if path is None: path = os.path.join(config.get('downloads','download_dir'), '{file}') fileids = VSOClient.by_fileid(query_response) if not fileids: res.poke() return res # Adding the site parameter to the info info = {} if site is not None: info['site']=site self.download_all( self.api.service.GetData( self.make_getdatarequest(query_response, methods, info) ), methods, downloader, path, fileids, res ) res.poke() return res
def get(self, requestIDs, path=None, overwrite=False, progress=True, max_conn=5, downloader=None, results=None): """ Query JSOC to see if request_id is ready for download. If the request is ready for download download it. Parameters ---------- requestIDs: list or string One or many requestID strings path: string Path to save data to, defaults to SunPy download dir overwrite: bool Replace files with the same name if True progress: bool Print progress info to terminal max_conns: int Maximum number of download connections. downloader: sunpy.download.Downloder instance A Custom downloader to use results: Results instance A Results manager to use. Returns ------- res: Results A Results instance or None if no URLs to download """ # Convert IDs to a list if not already if not astropy.utils.misc.isiterable(requestIDs) or isinstance(requestIDs, basestring): requestIDs = [requestIDs] if path is None: path = config.get('downloads','download_dir') if downloader is None: downloader = Downloader(max_conn=max_conn, max_total=max_conn) # A Results object tracks the number of downloads requested and the # number that have been completed. if results is None: results = Results(lambda x: None) urls = [] for request_id in requestIDs: u = self._request_status(request_id) if u.status_code == 200 and u.json()['status'] == '0': for ar in u.json()['data']: if overwrite or not os.path.isfile(os.path.join(path, ar['filename'])): urls.append(urlparse.urljoin(BASE_DL_URL + u.json()['dir']+'/', ar['filename'])) if progress: print("{0} URLs found for Download. Totalling {1}MB".format(len(urls), u.json()['size'])) else: if progress: self.check_request(request_id) if urls: for url, rcall in list(zip(urls, list(map(lambda x: results.require([x]), urls)))): downloader.download(url, callback=rcall, path=path) else: #Make Results think it has finished. results.require([]) results.poke() return results
from sunpy.net import hek from sunpy.time import parse_time from sunpy import config from sunpy import lightcurve from sunpy.util.net import check_download_file __all__ = ['get_goes_event_list', 'temp_em', 'goes_chianti_tem'] # Check required data files are present in user's default download dir # Define location where GOES data files are stored. # Manually resolve the hostname HOST = socket.gethostbyname_ex('hesperia.gsfc.nasa.gov')[-1][0] GOES_REMOTE_PATH = "http://{0}/ssw/gen/idl/synoptic/goes/".format(HOST) # Define location where data files should be downloaded to. DATA_PATH = config.get("downloads", "download_dir") # Define variables for file names FILE_TEMP_COR = "goes_chianti_temp_cor.csv" FILE_TEMP_PHO = "goes_chianti_temp_pho.csv" FILE_EM_COR = "goes_chianti_em_cor.csv" FILE_EM_PHO = "goes_chianti_em_pho.csv" def get_goes_event_list(timerange, goes_class_filter=None): """ Retrieve list of flares detected by GOES within a given time range. Parameters ---------- timerange: sunpy.time.TimeRange The time range to download the event list for.
""" This module provies a object that can handle a time range. """ from datetime import timedelta import astropy.units as u from astropy.time import Time, TimeDelta from sunpy import config from sunpy.time import is_time_equal, parse_time from sunpy.time.time import _variables_for_parse_time_docstring from sunpy.util.decorators import add_common_docstring TIME_FORMAT = config.get('general', 'time_format') __all__ = ['TimeRange'] @add_common_docstring(**_variables_for_parse_time_docstring()) class TimeRange: """ A class to create and handle time ranges. .. note:: Regardless of how a `sunpy.time.TimeRange` is constructed it will always provide a positive time range where the start time is before the end time. Parameters ---------- a : {parse_time_types}
import fnmatch import os from itertools import imap from astropy.units import Unit, nm, equivalencies from sqlalchemy import Column, Integer, Float, String, DateTime, Boolean, Table, ForeignKey from sqlalchemy.orm import relationship from sqlalchemy.ext.declarative import declarative_base from sunpy.time import parse_time from sunpy.io import fits, file_tools as sunpy_filetools from sunpy.util import print_table from sunpy import config TIME_FORMAT = config.get("general", "time_format") __all__ = [ "WaveunitNotFoundError", "WaveunitNotConvertibleError", "JSONDump", "FitsHeaderEntry", "FitsKeyComment", "Tag", "DatabaseEntry", "entries_from_query_result", "entries_from_file", "entries_from_dir", "display_entries", ]
from os import remove import os.path from zipfile import ZipFile from urllib2 import URLError from shutil import move from astropy.utils.data import download_file from sunpy.util.net import url_exists from sunpy import config __author__ = "Steven Christe" __email__ = "*****@*****.**" sampledata_dir = config.get("downloads", "sample_dir") # urls to search for the sample data _base_urls = ('http://data.sunpy.org/sample-data/', 'http://hesperia.gsfc.nasa.gov/~schriste/sunpy-sample-data/', 'https://github.com/ehsteve/sunpy-sample-data/raw/master/') # keys are file shortcuts # values consist of filename as well as optional file extension if files are # hosted compressed. This extension is removed after download. _files = { "AIA_171_IMAGE": ("AIA20110319_105400_0171.fits", ""), "RHESSI_IMAGE": ("hsi_image_20101016_191218.fits", ""), "EIT_195_IMAGE": ("eit_l1_20020625_100011.fits", ""), "CALLISTO_IMAGE": ("BIR_20110922_103000_01.fit", ""), "RHESSI_EVENT_LIST":
from os import remove import os.path from zipfile import ZipFile from urllib2 import URLError from shutil import move from astropy.utils.data import download_file from sunpy.util.net import url_exists from sunpy import config __author__ = "Steven Christe" __email__ = "*****@*****.**" sampledata_dir = config.get("downloads", "sample_dir") # urls to search for the sample data _base_urls = ( 'http://data.sunpy.org/sample-data/', 'http://hesperia.gsfc.nasa.gov/~schriste/sunpy-sample-data/') # keys are file shortcuts # values consist of filename as well as optional file extension if files are # hosted compressed. This extension is removed after download. _files = { "AIA_171_IMAGE": ("AIA20110319_105400_0171.fits", ""), "RHESSI_IMAGE": ("hsi_image_20101016_191218.fits", ""), "EIT_195_IMAGE": ("eit_l1_20020625_100011.fits", ""), "CALLISTO_IMAGE": ("BIR_20110922_103000_01.fit", ""), "RHESSI_EVENT_LIST": ("hsi_calib_ev_20020220_1106_20020220_1106_25_40.fits", ""),
def fetch(self, query_response, path=None, methods=('URL-FILE_Rice', 'URL-FILE'), downloader=None, site=None): """ Download data specified in the query_response. Parameters ---------- query_response : sunpy.net.vso.QueryResponse QueryResponse containing the items to be downloaded. path : str Specify where the data is to be downloaded. Can refer to arbitrary fields of the QueryResponseItem (instrument, source, time, ...) via string formatting, moreover the file-name of the file downloaded can be referred to as file, e.g. "{source}/{instrument}/{time.start}/{file}". methods : {list of str} Download methods, defaults to URL-FILE_Rice then URL-FILE. Methods are a concatenation of one PREFIX followed by any number of SUFFIXES i.e. `PREFIX-SUFFIX_SUFFIX2_SUFFIX3`. The full list of `PREFIXES <http://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_PREFIX>`_ and `SUFFIXES <http://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_SUFFIX>`_ are listed on the VSO site. downloader : sunpy.net.downloader.Downloader Downloader used to download the data. site : str There are a number of caching mirrors for SDO and other instruments, some available ones are listed below. =============== ======================================================== NSO National Solar Observatory, Tucson (US) SAO (aka CFA) Smithonian Astronomical Observatory, Harvard U. (US) SDAC (aka GSFC) Solar Data Analysis Center, NASA/GSFC (US) ROB Royal Observatory of Belgium (Belgium) MPS Max Planck Institute for Solar System Research (Germany) UCLan University of Central Lancashire (UK) IAS Institut Aeronautique et Spatial (France) KIS Kiepenheuer-Institut fur Sonnenphysik Germany) NMSU New Mexico State University (US) =============== ======================================================== Returns ------- out : :py:class:`Results` Object that supplies a list of filenames with meta attributes containing the respective QueryResponse. Examples -------- >>> res = fetch(qr).wait() # doctest:+SKIP """ if downloader is None: downloader = download.Downloader() downloader.init() res = download.Results(lambda _: downloader.stop(), 1, lambda mp: self.link(query_response, mp)) else: res = download.Results(lambda _: None, 1, lambda mp: self.link(query_response, mp)) if path is None: path = os.path.join(config.get('downloads', 'download_dir'), '{file}') elif isinstance(path, str) and '{file}' not in path: path = os.path.join(path, '{file}') path = os.path.expanduser(path) fileids = VSOClient.by_fileid(query_response) if not fileids: res.poke() return res # Adding the site parameter to the info info = {} if site is not None: info['site'] = site self.download_all( self.api.service.GetData( self.make_getdatarequest(query_response, methods, info)), methods, downloader, path, fileids, res) res.poke() return res
import astropy.units as u import sunpy from sunpy import config from sunpy.data._sample import download_sample_data from sunpy.data.data_manager.cache import Cache from sunpy.data.data_manager.downloader import ParfiveDownloader from sunpy.data.data_manager.manager import DataManager from sunpy.data.data_manager.storage import SqliteStorage from sunpy.util.config import CACHE_DIR _download_dir = config.get('downloads', 'remote_data_manager_dir') manager = DataManager( Cache(ParfiveDownloader(), SqliteStorage(_download_dir + '/data_manager.db'), _download_dir)) cache = Cache(ParfiveDownloader(), SqliteStorage(CACHE_DIR + '/cache.db'), CACHE_DIR, expiry=int(config.get('downloads', 'cache_expiry')) * u.day) __all__ = ["download_sample_data", "manager", "cache"]
def get(self, query_response, path=None, methods=('URL-FILE',), downloader=None, site=None): """ Download data specified in the query_response. Parameters ---------- query_response : sunpy.net.vso.QueryResponse QueryResponse containing the items to be downloaded. path : str Specify where the data is to be downloaded. Can refer to arbitrary fields of the QueryResponseItem (instrument, source, time, ...) via string formatting, moreover the file-name of the file downloaded can be refered to as file, e.g. "{source}/{instrument}/{time.start}/{file}". methods : {list of str} Methods acceptable to user. downloader : sunpy.net.downloader.Downloader Downloader used to download the data. site: str There are a number of caching mirrors for SDO and other instruments, some available ones are listed below. =============== ======================================================== NSO National Solar Observatory, Tucson (US) SAO (aka CFA) Smithonian Astronomical Observatory, Harvard U. (US) SDAC (aka GSFC) Solar Data Analysis Center, NASA/GSFC (US) ROB Royal Observatory of Belgium (Belgium) MPS Max Planck Institute for Solar System Research (Germany) UCLan University of Central Lancashire (UK) IAS Institut Aeronautique et Spatial (France) KIS Kiepenheuer-Institut fur Sonnenphysik Germany) NMSU New Mexico State University (US) =============== ======================================================== Returns ------- out : :py:class:`Results` object that supplies a list of filenames with meta attributes containing the respective QueryResponse. Examples -------- >>> res = get(qr).wait() # doctest:+SKIP """ if downloader is None: downloader = download.Downloader() downloader.init() res = Results( lambda _: downloader.stop(), 1, lambda mp: self.link(query_response, mp) ) else: res = Results( lambda _: None, 1, lambda mp: self.link(query_response, mp) ) if path is None: path = os.path.join(config.get('downloads', 'download_dir'), '{file}') fileids = VSOClient.by_fileid(query_response) if not fileids: res.poke() return res # Adding the site parameter to the info info = {} if site is not None: info['site'] = site self.download_all( self.api.service.GetData( self.make_getdatarequest(query_response, methods, info) ), methods, downloader, path, fileids, res ) res.poke() return res
import copy import csv import urllib import sqlite3 import numpy as np from astropy.io import fits import pandas from sunpy.time import parse_time from sunpy import config from sunpy.util.net import check_download_file from sunpy import lightcurve LYTAF_REMOTE_PATH = "http://proba2.oma.be/lyra/data/lytaf/" LYTAF_PATH = config.get("downloads", "download_dir") def remove_lytaf_events_from_lightcurve(lc, artifacts=None, return_artifacts=False, lytaf_path=None, force_use_local_lytaf=False): """ Removes periods of LYRA artifacts defined in LYTAF from a LYRALightCurve. Parameters ---------- lc : `sunpy.lightcurve.LightCurve` artifacts : list of strings Contain the artifact types to be removed. For list of artifact types
import csv import sqlite3 import numpy as np from astropy.io import fits import pandas from sunpy.time import parse_time from sunpy import config from sunpy.util.net import check_download_file from sunpy import lightcurve from sunpy.extern.six.moves import urllib LYTAF_REMOTE_PATH = "http://proba2.oma.be/lyra/data/lytaf/" LYTAF_PATH = config.get("downloads", "download_dir") def remove_lytaf_events_from_lightcurve(lc, artifacts=None, return_artifacts=False, lytaf_path=None, force_use_local_lytaf=False): """ Removes periods of LYRA artifacts defined in LYTAF from a LYRALightCurve. Parameters ---------- lc : `sunpy.lightcurve.LightCurve` artifacts : list of strings Contain the artifact types to be removed. For list of artifact types
def get_request(self, requestIDs, path=None, overwrite=False, progress=True, max_conn=5, downloader=None, results=None): """ Query JSOC to see if request_id is ready for download. If the request is ready for download, download it. Parameters ---------- requestIDs : list or string One or many requestID strings path : string Path to save data to, defaults to SunPy download dir overwrite : bool Replace files with the same name if True progress : bool Print progress info to terminal max_conns : int Maximum number of download connections. downloader : `sunpy.download.Downloader` instance A Custom downloader to use results: Results instance A Results manager to use. Returns ------- res: Results A Results instance or None if no URLs to download """ # Convert IDs to a list if not already if not isiterable(requestIDs) or isinstance(requestIDs, six.string_types): requestIDs = [requestIDs] if path is None: path = config.get('downloads', 'download_dir') path = os.path.expanduser(path) if downloader is None: downloader = Downloader(max_conn=max_conn, max_total=max_conn) # A Results object tracks the number of downloads requested and the # number that have been completed. if results is None: results = Results(lambda _: downloader.stop()) urls = [] for request_id in requestIDs: u = self._request_status(request_id) if u.status_code == 200 and u.json()['status'] == '0': for ar in u.json()['data']: is_file = os.path.isfile(os.path.join( path, ar['filename'])) if overwrite or not is_file: url_dir = BASE_DL_URL + u.json()['dir'] + '/' urls.append( urllib.parse.urljoin(url_dir, ar['filename'])) else: print_message = "Skipping download of file {} as it " \ "has already been downloaded" print(print_message.format(ar['filename'])) # Add the file on disk to the output results.map_.update({ ar['filename']: { 'path': os.path.join(path, ar['filename']) } }) if progress: print_message = "{0} URLs found for download. Totalling {1}MB" print(print_message.format(len(urls), u.json()['size'])) else: if progress: self.check_request(request_id) if urls: for url in urls: downloader.download(url, callback=results.require([url]), errback=lambda x: print(x), path=path) else: # Make Results think it has finished. results.require([]) results.poke() return results
def fetch(self, query_response, path=None, methods=None, site=None, progress=True, overwrite=False, downloader=None, wait=True): """ Download data specified in the query_response. Parameters ---------- query_response : sunpy.net.vso.QueryResponse QueryResponse containing the items to be downloaded. path : str Specify where the data is to be downloaded. Can refer to arbitrary fields of the QueryResponseItem (instrument, source, time, ...) via string formatting, moreover the file-name of the file downloaded can be referred to as file, e.g. "{source}/{instrument}/{time.start}/{file}". methods : {list of str} Download methods, defaults to URL-FILE_Rice then URL-FILE. Methods are a concatenation of one PREFIX followed by any number of SUFFIXES i.e. `PREFIX-SUFFIX_SUFFIX2_SUFFIX3`. The full list of `PREFIXES <https://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_PREFIX>`_ and `SUFFIXES <https://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_SUFFIX>`_ are listed on the VSO site. site : str There are a number of caching mirrors for SDO and other instruments, some available ones are listed below. =============== ======================================================== NSO National Solar Observatory, Tucson (US) SAO (aka CFA) Smithonian Astronomical Observatory, Harvard U. (US) SDAC (aka GSFC) Solar Data Analysis Center, NASA/GSFC (US) ROB Royal Observatory of Belgium (Belgium) MPS Max Planck Institute for Solar System Research (Germany) UCLan University of Central Lancashire (UK) IAS Institut Aeronautique et Spatial (France) KIS Kiepenheuer-Institut fur Sonnenphysik Germany) NMSU New Mexico State University (US) =============== ======================================================== progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bars will be shown at all. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. wait : `bool`, optional If `False` ``downloader.download()`` will not be called. Only has any effect if `downloader` is not `None`. Returns ------- out : `parfive.Results` Object that supplies a list of filenames and any errors. Examples -------- >>> files = fetch(qr) # doctest:+SKIP """ if path is None: path = os.path.join(config.get('downloads', 'download_dir'), '{file}') elif isinstance(path, str) and '{file}' not in path: path = os.path.join(path, '{file}') path = os.path.expanduser(path) dl_set = True if not downloader: dl_set = False downloader = Downloader(progress=progress) fileids = VSOClient.by_fileid(query_response) if not fileids: return downloader.download() # Adding the site parameter to the info info = {} if site is not None: info['site'] = site VSOGetDataResponse = self.api.get_type("VSO:VSOGetDataResponse") data_request = self.make_getdatarequest(query_response, methods, info) data_response = VSOGetDataResponse(self.api.service.GetData(data_request)) err_results = self.download_all(data_response, methods, downloader, path, fileids) if dl_set and not wait: return err_results results = downloader.download() results += err_results results._errors += err_results.errors return results
def get_request(self, requests, path=None, overwrite=False, progress=True, max_conn=5, downloader=None, results=None): """ Query JSOC to see if the request(s) is ready for download. If the request is ready for download, it will then download it. Parameters ---------- requests : `~drms.ExportRequest`, `str`, `list` `~drms.ExportRequest` objects or `str` request IDs or lists returned by `~sunpy.net.jsoc.jsoc.JSOCClient.request_data`. path : `str` Path to save data to, defaults to SunPy download dir. overwrite : `bool` Replace files with the same name if True. progress : `bool` Print progress info to terminal. max_conns : `int` Maximum number of download connections. downloader : `~sunpy.net.download.Downloader` A Custom downloader to use results: `~sunpy.net.download.Results` A `~sunpy.net.download.Results` manager to use. Returns ------- res: `~sunpy.net.download.Results` A `~sunpy.net.download.Results` instance or `None` if no URLs to download """ c = drms.Client() # Convert Responses to a list if not already if isinstance(requests, str) or not isiterable(requests): requests = [requests] # Ensure all the requests are drms ExportRequest objects for i, request in enumerate(requests): if isinstance(request, str): r = c.export_from_id(request) requests[i] = r # We only download if all are finished if not all([r.has_succeeded() for r in requests]): raise NotExportedError("Can not download as not all the requests " "have been exported for download yet.") # Ensure path has a {file} in it if path is None: default_dir = config.get("downloads", "download_dir") path = os.path.join(default_dir, '{file}') elif isinstance(path, str) and '{file}' not in path: path = os.path.join(path, '{file}') paths = [] for request in requests: for filename in request.data['filename']: # Ensure we don't duplicate the file extension ext = os.path.splitext(filename)[1] if path.endswith(ext): fname = path.strip(ext) else: fname = path fname = fname.format(file=filename) fname = os.path.expanduser(fname) fname = partial(simple_path, fname) paths.append(fname) if downloader is None: downloader = Downloader(max_conn=max_conn, max_total=max_conn) # A Results object tracks the number of downloads requested and the # number that have been completed. if results is None: results = Results( lambda _: downloader.stop(), done=lambda maps: [v['path'] for v in maps.values()]) urls = [] for request in requests: if request.status == 0: for index, data in request.data.iterrows(): is_file = os.path.isfile(paths[index].args[0]) if overwrite or not is_file: url_dir = request.request_url + '/' urls.append( urllib.parse.urljoin(url_dir, data['filename'])) if not overwrite and is_file: print_message = "Skipping download of file {} as it " \ "has already been downloaded. " \ "If you want to redownload the data, "\ "please set overwrite to True" print(print_message.format(data['filename'])) # Add the file on disk to the output results.map_.update( {data['filename']: { 'path': paths[index].args[0] }}) if urls: if progress: print_message = "{0} URLs found for download. Full request totalling {1}MB" print(print_message.format(len(urls), request._d['size'])) for i, url in enumerate(urls): downloader.download(url, callback=results.require([url]), errback=lambda x: print(x), path=paths[i]) else: # Make Results think it has finished. results.require([]) results.poke() return results
def fetch(self, query_response, path=None, methods=None, downloader=None, site=None): """ Download data specified in the query_response. Parameters ---------- query_response : sunpy.net.vso.QueryResponse QueryResponse containing the items to be downloaded. path : str Specify where the data is to be downloaded. Can refer to arbitrary fields of the QueryResponseItem (instrument, source, time, ...) via string formatting, moreover the file-name of the file downloaded can be referred to as file, e.g. "{source}/{instrument}/{time.start}/{file}". methods : {list of str} Download methods, defaults to URL-FILE_Rice then URL-FILE. Methods are a concatenation of one PREFIX followed by any number of SUFFIXES i.e. `PREFIX-SUFFIX_SUFFIX2_SUFFIX3`. The full list of `PREFIXES <http://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_PREFIX>`_ and `SUFFIXES <http://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_SUFFIX>`_ are listed on the VSO site. downloader : sunpy.net.downloader.Downloader Downloader used to download the data. site : str There are a number of caching mirrors for SDO and other instruments, some available ones are listed below. =============== ======================================================== NSO National Solar Observatory, Tucson (US) SAO (aka CFA) Smithonian Astronomical Observatory, Harvard U. (US) SDAC (aka GSFC) Solar Data Analysis Center, NASA/GSFC (US) ROB Royal Observatory of Belgium (Belgium) MPS Max Planck Institute for Solar System Research (Germany) UCLan University of Central Lancashire (UK) IAS Institut Aeronautique et Spatial (France) KIS Kiepenheuer-Institut fur Sonnenphysik Germany) NMSU New Mexico State University (US) =============== ======================================================== Returns ------- out : :py:class:`Results` Object that supplies a list of filenames with meta attributes containing the respective QueryResponse. Examples -------- >>> res = fetch(qr).wait() # doctest:+SKIP """ if downloader is None: downloader = download.Downloader() downloader.init() res = download.Results( lambda _: downloader.stop(), 1, lambda mp: self.link(query_response, mp) ) else: res = download.Results( lambda _: None, 1, lambda mp: self.link(query_response, mp) ) if path is None: path = os.path.join(config.get('downloads', 'download_dir'), '{file}') elif isinstance(path, str) and '{file}' not in path: path = os.path.join(path, '{file}') path = os.path.expanduser(path) fileids = VSOClient.by_fileid(query_response) if not fileids: res.poke() return res # Adding the site parameter to the info info = {} if site is not None: info['site'] = site VSOGetDataResponse = self.api.get_type("VSO:VSOGetDataResponse") data_request = self.make_getdatarequest(query_response, methods, info) data_response = VSOGetDataResponse(self.api.service.GetData(data_request)) self.download_all(data_response, methods, downloader, path, fileids, res) res.poke() return res
""" This module provies a object that can handle a time range. """ from datetime import timedelta import astropy.units as u from astropy.time import Time, TimeDelta from sunpy import config from sunpy.time import is_time_equal, parse_time from sunpy.time.time import _variables_for_parse_time_docstring from sunpy.util.decorators import add_common_docstring TIME_FORMAT = config.get('general', 'time_format') __all__ = ['TimeRange'] @add_common_docstring(**_variables_for_parse_time_docstring()) class TimeRange: """ A class to create and handle time ranges. .. note:: Regardless of how a `sunpy.time.TimeRange` is constructed it will always provide a positive time range where the start time is before the end time. `~sunpy.time.TimeRange.__contains__` has been implemented which means you can check if a time is within the time range you have created. Please see the example section below.
def fetch(self, *query_results, path=None, max_conn=5, progress=True, overwrite=False, downloader=None, **kwargs): """ Download the records represented by `~sunpy.net.base_client.QueryResponseTable` or `~sunpy.net.fido_factory.UnifiedResponse` objects. Parameters ---------- query_results : `sunpy.net.fido_factory.UnifiedResponse` or `~sunpy.net.base_client.QueryResponseTable` Container returned by query method, or multiple. path : `str` The directory to retrieve the files into. Can refer to any fields in `~sunpy.net.base_client.BaseQueryResponse.response_block_properties` via string formatting, moreover the file-name of the file downloaded can be referred to as file, e.g. "{source}/{instrument}/{time.start}/{file}". max_conn : `int`, optional The number of parallel download slots. progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bars will be shown at all. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if ``'unique'`` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. If specified the ``max_conn``, ``progress`` and ``overwrite`` arguments are ignored. Returns ------- `parfive.Results` Examples -------- >>> from sunpy.net.attrs import Time, Instrument >>> unifresp = Fido.search(Time('2012/3/4','2012/3/5'), Instrument('EIT')) # doctest: +REMOTE_DATA >>> filepaths = Fido.fetch(unifresp) # doctest: +SKIP If any downloads fail, they can be retried by passing the `parfive.Results` object back into ``fetch``. >>> filepaths = Fido.fetch(filepaths) # doctest: +SKIP """ if path is None: path = Path(config.get('downloads', 'download_dir')) / '{file}' elif isinstance(path, (str, os.PathLike)) and '{file}' not in str(path): path = Path(path) / '{file}' else: path = Path(path) path = path.expanduser() # Ensure we have write permissions to the path exists = list(filter(lambda p: p.exists(), Path(path).resolve().parents)) if not os.access(exists[0], os.W_OK): raise PermissionError('You do not have permission to write' f' to the directory {exists[0]}.') if "wait" in kwargs: raise ValueError("wait is not a valid keyword argument to Fido.fetch.") # TODO: Remove when parfive allows us to special case URLS. # Avoid more than one connection for JSOC only requests. from sunpy.net.jsoc import JSOCClient is_jsoc_only = False for query_result in query_results: if isinstance(query_result, UnifiedResponse): is_jsoc_only = all([isinstance(result.client, JSOCClient) for result in query_result]) elif isinstance(query_result, QueryResponseTable): is_jsoc_only = all([isinstance(result.table.client, JSOCClient) for result in query_result]) if downloader is None: if is_jsoc_only: max_conn = 1 kwargs['max_splits'] = 1 downloader = Downloader(max_conn=max_conn, progress=progress, overwrite=overwrite) elif not isinstance(downloader, parfive.Downloader): raise TypeError("The downloader argument must be a parfive.Downloader instance.") # Handle retrying failed downloads retries = [isinstance(arg, Results) for arg in query_results] if all(retries): results = Results() for retry in query_results: dr = downloader.retry(retry) results.data += dr.data results._errors += dr._errors return results elif any(retries): raise TypeError("If any arguments to fetch are `parfive.Results` objects, all arguments must be.") reslist = [] for query_result in query_results: if isinstance(query_result, QueryResponseRow): responses = [query_result.as_table()] elif isinstance(query_result, QueryResponseTable): responses = [query_result] elif isinstance(query_result, UnifiedResponse): responses = query_result else: raise ValueError(f"Query result has an unrecognized type: {type(query_result)} " "Allowed types are QueryResponseRow, QueryResponseTable or UnifiedResponse.") for block in responses: result = block.client.fetch(block, path=path, downloader=downloader, wait=False, **kwargs) if result not in (NotImplemented, None): reslist.append(result) results = downloader.download() # Combine the results objects from all the clients into one Results # object. for result in reslist: if not isinstance(result, Results): raise TypeError( "If wait is False a client must return a parfive.Downloader and either None" " or a parfive.Results object.") results.data += result.data results._errors += result.errors return results
import astropy.nddata from sunpy.image.transform import affine_transform import sunpy.io as io import sunpy.wcs as wcs from sunpy.visualization import toggle_pylab from sunpy.sun import constants from sunpy.sun import sun from sunpy.time import parse_time, is_time from sunpy.image.rescale import reshape_image_to_4d_superpixel from sunpy.image.rescale import resample as sunpy_image_resample __all__ = ['GenericMap'] from sunpy import config TIME_FORMAT = config.get("general", "time_format") """ Questions --------- * Should we use Helioviewer or VSO's data model? (e.g. map.meas, map.wavelength or something else?) * Should 'center' be renamed to 'offset' and crpix1 & 2 be used for 'center'? """ class GenericMap(astropy.nddata.NDData): """ A Generic spatially-aware 2D data array Parameters ----------
def get_request(self, requestIDs, path=None, overwrite=False, progress=True, max_conn=5, downloader=None, results=None): """ Query JSOC to see if request_id is ready for download. If the request is ready for download, download it. Parameters ---------- requestIDs : list or string One or many requestID strings path : string Path to save data to, defaults to SunPy download dir overwrite : bool Replace files with the same name if True progress : bool Print progress info to terminal max_conns : int Maximum number of download connections. downloader : `sunpy.download.Downloader` instance A Custom downloader to use results: Results instance A Results manager to use. Returns ------- res: Results A Results instance or None if no URLs to download """ # Convert IDs to a list if not already if not isiterable(requestIDs) or isinstance(requestIDs, six.string_types): requestIDs = [requestIDs] if path is None: path = config.get('downloads', 'download_dir') path = os.path.expanduser(path) if downloader is None: downloader = Downloader(max_conn=max_conn, max_total=max_conn) # A Results object tracks the number of downloads requested and the # number that have been completed. if results is None: results = Results(lambda _: downloader.stop()) urls = [] for request_id in requestIDs: u = self._request_status(request_id) if u.status_code == 200 and u.json()['status'] == '0': for ar in u.json()['data']: is_file = os.path.isfile(os.path.join(path, ar['filename'])) if overwrite or not is_file: url_dir = BASE_DL_URL + u.json()['dir'] + '/' urls.append(urllib.parse.urljoin(url_dir, ar['filename'])) else: print_message = "Skipping download of file {} as it " \ "has already been downloaded" print(print_message.format(ar['filename'])) # Add the file on disk to the output results.map_.update({ar['filename']: {'path': os.path.join(path, ar['filename'])}}) if progress: print_message = "{0} URLs found for download. Totalling {1}MB" print(print_message.format(len(urls), u.json()['size'])) else: if progress: self.check_request(request_id) if urls: for url in urls: downloader.download(url, callback=results.require([url]), errback=lambda x: print(x), path=path) else: # Make Results think it has finished. results.require([]) results.poke() return results
def get_request(self, requests, path=None, overwrite=False, progress=True, max_conn=5, downloader=None, results=None): """ Query JSOC to see if the request(s) is ready for download. If the request is ready for download, it will then download it. Parameters ---------- requests : `~drms.ExportRequest`, `str`, `list` `~drms.ExportRequest` objects or `str` request IDs or lists returned by `~sunpy.net.jsoc.jsoc.JSOCClient.request_data`. path : `str` Path to save data to, defaults to SunPy download dir. overwrite : `bool` Replace files with the same name if True. progress : `bool` Print progress info to terminal. max_conns : `int` Maximum number of download connections. downloader : `~sunpy.net.download.Downloader` A Custom downloader to use results: `~sunpy.net.download.Results` A `~sunpy.net.download.Results` manager to use. Returns ------- res: `~sunpy.net.download.Results` A `~sunpy.net.download.Results` instance or `None` if no URLs to download """ c = drms.Client() # Convert Responses to a list if not already if isinstance(requests, six.string_types) or not isiterable(requests): requests = [requests] # Ensure all the requests are drms ExportRequest objects for i, request in enumerate(requests): if isinstance(request, six.string_types): r = c.export_from_id(request) requests[i] = r # We only download if all are finished if not all([r.has_succeeded() for r in requests]): raise NotExportedError("Can not download as not all the requests" "have been exported for download yet.") # Ensure path has a {file} in it if path is None: default_dir = config.get("downloads", "download_dir") path = os.path.join(default_dir, '{file}') elif isinstance(path, six.string_types) and '{file}' not in path: path = os.path.join(path, '{file}') paths = [] for request in requests: for filename in request.data['filename']: # Ensure we don't duplicate the file extension ext = os.path.splitext(filename)[1] if path.endswith(ext): fname = path.strip(ext) else: fname = path fname = fname.format(file=filename) fname = os.path.expanduser(fname) fname = partial(simple_path, fname) paths.append(fname) if downloader is None: downloader = Downloader(max_conn=max_conn, max_total=max_conn) # A Results object tracks the number of downloads requested and the # number that have been completed. if results is None: results = Results(lambda _: downloader.stop(), done=lambda maps: [v['path'] for v in maps.values()]) urls = [] for request in requests: if request.status == 0: for index, data in request.data.iterrows(): is_file = os.path.isfile(paths[index].args[0]) if overwrite or not is_file: url_dir = request.request_url + '/' urls.append(urllib.parse.urljoin(url_dir, data['filename'])) if not overwrite and is_file: print_message = "Skipping download of file {} as it " \ "has already been downloaded. " \ "If you want to redownload the data, "\ "please set overwrite to True" print(print_message.format(data['filename'])) # Add the file on disk to the output results.map_.update({data['filename']: {'path': paths[index].args[0]}}) if urls: if progress: print_message = "{0} URLs found for download. Full request totalling {1}MB" print(print_message.format(len(urls), request._d['size'])) for i, url in enumerate(urls): downloader.download(url, callback=results.require([url]), errback=lambda x: print(x), path=paths[i]) else: # Make Results think it has finished. results.require([]) results.poke() return results