def get_request(self, requests, path=None, overwrite=False, progress=True, downloader=None, wait=True, max_conn=default_max_conn, **kwargs): """ Query JSOC to see if the request(s) is ready for download. If the request is ready for download, it will then download it. Parameters ---------- requests : `~drms.client.ExportRequest`, `str`, `list` `~drms.client.ExportRequest` objects or `str` request IDs or lists returned by `~sunpy.net.jsoc.jsoc.JSOCClient.request_data`. path : `str` Path to save data to, defaults to SunPy download dir. progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bar will be shown. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if ``'unique'`` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. wait : `bool`, optional If `False` ``downloader.download()`` will not be called. Only has any effect if `downloader` is not `None`. Returns ------- res: `~sunpy.net.download.Results` A `~sunpy.net.download.Results` instance or `None` if no URLs to download """ c = drms.Client() kwargs['max_splits'] = kwargs.get('max_splits', 2) # Convert Responses to a list if not already if isinstance(requests, str) or not isiterable(requests): requests = [requests] # Ensure all the requests are drms ExportRequest objects for i, request in enumerate(requests): if isinstance(request, str): r = c.export_from_id(request) requests[i] = r # We only download if all are finished if not all([r.has_succeeded() for r in requests]): raise NotExportedError("Can not download as not all the requests " "have been exported for download yet.") # Ensure path has a {file} in it if path is None: default_dir = config.get("downloads", "download_dir") path = os.path.join(default_dir, '{file}') elif isinstance(path, Path): path = str(path) if isinstance(path, str) and '{file}' not in path: path = os.path.join(path, '{file}') paths = [] for request in requests: if request.method == 'url-tar': fname = path.format(file=Path(request.tarfile).name) paths.append(os.path.expanduser(fname)) else: for filename in request.data['filename']: # Ensure we don't duplicate the file extension ext = os.path.splitext(filename)[1] if path.endswith(ext): fname = path.strip(ext) else: fname = path fname = fname.format(file=filename) fname = os.path.expanduser(fname) paths.append(fname) dl_set = True if not downloader: dl_set = False downloader = Downloader(progress=progress, overwrite=overwrite, max_conn=max_conn) if downloader.max_conn * kwargs['max_splits'] > 10: warnings.warn(("JSOC does not support more than 10 parallel connections. " + f"Changing the number of parallel connections to {2 * self.default_max_conn}."), SunpyUserWarning) kwargs['max_splits'] = 2 downloader.max_conn = self.default_max_conn urls = [] for request in requests: if request.status == 0: if request.protocol == 'as-is' or request.method == 'url-tar': urls.extend(list(request.urls.url)) else: for index, data in request.data.iterrows(): url_dir = request.request_url + '/' urls.append(urllib.parse.urljoin(url_dir, data['filename'])) if urls: if progress: print_message = "{0} URLs found for download. Full request totalling {1}MB" print(print_message.format(len(urls), request._d['size'])) for aurl, fname in zip(urls, paths): downloader.enqueue_file(aurl, filename=fname, **kwargs) if dl_set and not wait: return Results() results = downloader.download() return results
def fetch(self, qres, path=None, error_callback=None, **kwargs): """ Download a set of results. Parameters ---------- qres : `~sunpy.net.dataretriever.QueryResponse` Results to download. Returns ------- Results Object """ urls = [qrblock['url'] for qrblock in qres] filenames = [] local_filenames = [] for i, [url, qre] in enumerate(zip(urls, qres)): name = url.split('/')[-1] # temporary fix !!! coz All QRBs have same start_time values day = Time(qre['Time'].start.strftime('%Y-%m-%d')) + TimeDelta( i * u.day) if name not in filenames: filenames.append(name) if name.endswith('.gz'): local_filenames.append('{}SRS.txt'.format( day.strftime('%Y%m%d'))) else: local_filenames.append(name) # Files to be actually downloaded paths = self._get_full_filenames(qres, filenames, path) # Those files that will be present after get returns local_paths = self._get_full_filenames(qres, local_filenames, path) # remove duplicate urls. This will make paths and urls to have same number of elements. # OrderedDict is required to maintain ordering because it will be zipped with paths later urls = list(OrderedDict.fromkeys(urls)) dobj = Downloader(max_conn=5) for aurl, fname in zip(urls, paths): dobj.enqueue_file(aurl, filename=fname) paths = dobj.download() outfiles = [] for fname, srs_filename in zip(local_paths, local_filenames): name = fname.name past_year = False for i, fname2 in enumerate(paths): fname2 = pathlib.Path(fname2) if fname2.name.endswith('.txt'): continue year = fname2.name.split('_SRS')[0] if year in name: TarFile = tarfile.open(fname2) filepath = fname.parent member = TarFile.getmember('SRS/' + srs_filename) member.name = name TarFile.extract(member, path=filepath) TarFile.close() outfiles.append(fname) past_year = True break if past_year is False: outfiles.append(fname) paths.data = list(map(str, outfiles)) return paths
def fetch(self, qres, path=None, error_callback=None, **kwargs): """ Download a set of results. Parameters ---------- qres : `~sunpy.net.dataretriever.QueryResponse` Results to download. Returns ------- Results Object """ urls = [qrblock['url'] for qrblock in qres] filenames = [] local_filenames = [] for url, qre in zip(urls, qres): name = url.split('/')[-1] day = qre['Start Time'] if name not in filenames: filenames.append(name) if name.endswith('.gz'): local_filenames.append('{}SRS.txt'.format( day.strftime('%Y%m%d'))) else: local_filenames.append(name) if path is not None: path = pathlib.Path(path) # Files to be actually downloaded paths = self._get_full_filenames(qres, filenames, path) # Those files that will be present after get returns local_paths = self._get_full_filenames(qres, local_filenames, path) # remove duplicate urls. This will make paths and urls to have same number of elements. # OrderedDict is required to maintain ordering because it will be zipped with paths later urls = list(OrderedDict.fromkeys(urls)) downloader = Downloader(max_conn=2) for aurl, fname in zip(urls, paths): downloader.enqueue_file(aurl, filename=fname) paths = downloader.download() outfiles = [] for fname, srs_filename in zip(local_paths, local_filenames): name = fname.name past_year = False for fname2 in paths: fname2 = pathlib.Path(fname2) if fname2.name.endswith('.txt'): continue year = fname2.name.split('_SRS')[0] if year in name: with tarfile.open(fname2) as open_tar: filepath = fname.parent try: member = open_tar.getmember('SRS/' + srs_filename) except KeyError: # Some tars have a {year}_SRS when extracted, 2010 being one example member = open_tar.getmember(f'{year}_SRS/' + srs_filename) member.name = name open_tar.extract(member, path=filepath) outfiles.append(fname) past_year = True break if past_year is False: outfiles.append(fname) paths.data = list(map(str, outfiles)) return paths
def download_sample_data(overwrite=False): """ Download all sample data at once. This will overwrite any existing files. Parameters ---------- overwrite: `bool` Overwrite existing sample data. """ # Workaround for tox only. This is not supported as a user option sampledata_dir = os.environ.get("SUNPY_SAMPLEDIR", False) if sampledata_dir: sampledata_dir = Path(sampledata_dir).expanduser().resolve() _is_writable_dir(sampledata_dir) else: # Creating the directory for sample files to be downloaded sampledata_dir = Path(get_and_create_sample_dir()) dl = Downloader(overwrite=overwrite) first_url = _base_urls[0] already_downloaded = [] for file_name in _sample_files.keys(): url = urljoin(first_url, file_name) fname = sampledata_dir / file_name # We have to avoid calling download if we already have all the files. if fname.exists() and not overwrite: already_downloaded.append(fname) else: dl.enqueue_file(url, filename=fname) if dl.queued_downloads: results = dl.download() else: return already_downloaded if not results.errors: return results else: log.info( 'Failed to download one or more sample data files, retrying with a mirror.' ) for retry_url in _base_urls[1:]: for i, err in enumerate(results.errors): file_name = err.filepath_partial().name log.debug( f"Failed to download {_sample_files[file_name]} from {err.url}: {err.exception}" ) # Overwrite the parfive error to change the url to a mirror new_url = urljoin(retry_url, file_name) results._errors[i] = _error(err.filepath_partial, new_url, err.exception) results = dl.retry(results) if not results.errors: return results for err in results.errors: file_name = err.filepath_partial().name log.debug( f"Failed to download {_sample_files[file_name]} from {err.url}: {err.exception}" ) log.error( f"Failed to download {_sample_files[file_name]} from all mirrors, the file will not be available." ) return results
def fetch(self, qres, path=None, overwrite=False, progress=True, downloader=None, wait=True, **kwargs): """ Download a set of results. Parameters ---------- qres : `~sunpy.net.dataretriever.QueryResponse` Results to download. path : `str` or `pathlib.Path`, optional Path to the download directory, or file template including the ``{file}`` string which will be replaced with the filename. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if ``'unique'`` the filename will be modified to be unique. progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bar will be shown. downloader : `parfive.Downloader`, optional The download manager to use. wait : `bool`, optional If `False` ``downloader.download()`` will not be called. Only has any effect if ``downloader`` is not `None`. **kwargs : dict, optional Passed to `parfive.Downloader.enqueue_file`. Returns ------- results: `parfive.Results` """ if path is not None: path = Path(path) if isinstance(qres, QueryResponseRow): qres = qres.as_table() urls = [] if len(qres): urls = list(qres['url']) filenames = [url.split('/')[-1] for url in urls] paths = self._get_full_filenames(qres, filenames, path) dl_set = True if not downloader: dl_set = False downloader = Downloader(progress=progress, overwrite=overwrite) for url, filename in zip(urls, paths): downloader.enqueue_file(url, filename=filename, **kwargs) if dl_set and not wait: return return downloader.download()