def retrieve_data_from_uid(self, uids, *, cache=True): """ Stage & Download ALMA data. Will print out the expected file size before attempting the download. Parameters ---------- uids : list or str A list of valid UIDs or a single UID. UIDs should have the form: 'uid://A002/X391d0b/X7b' cache : bool Whether to cache the downloads. Returns ------- downloaded_files : list A list of the downloaded file paths """ if isinstance(uids, (str, bytes)): uids = [uids] if not isinstance(uids, (list, tuple, np.ndarray)): raise TypeError("Datasets must be given as a list of strings.") files = self.get_data_info(uids) file_urls = files['access_url'] totalsize = files['content_length'].sum()*u.B # each_size, totalsize = self.data_size(files) log.info("Downloading files of size {0}...".format(totalsize.to(u.GB))) # TODO: Add cache=cache keyword here. Currently would have no effect. downloaded_files = self.download_files(file_urls) return downloaded_files
def get_artifact(self, artifact_id, filename=None, verbose=False): """ Download artifacts from EHST. Artifact is a single Hubble product file. Parameters ---------- artifact_id : string id of the artifact to be downloaded, mandatory The identifier of the physical product (file) we want to retrieve. filename : string file name to be used to store the artifact, optional, default None File name for the artifact verbose : bool optional, default 'False' flag to display information about the process Returns ------- None. It downloads the artifact indicated """ params = {"ARTIFACT_ID": artifact_id} response = self._request('GET', self.data_url, save=True, cache=True, params=params) if filename is None: filename = artifact_id if verbose: log.info(self.data_url + "?ARTIFACT_ID=" + artifact_id) log.info(self.copying_string.format(filename)) shutil.move(response, filename)
def __init__(self, provider="AWS", profile=None, verbose=False): """ Initialize class to enable downloading public files from S3 instead of STScI servers. Requires the boto3 and botocore libraries to function. Parameters ---------- provider : str Which cloud data provider to use. Currently only AWS S3 is supported, so at the moment this argument is ignored. profile : str Profile to use to identify yourself to the cloud provider (usually in ~/.aws/config). verbose : bool Default False. Display extra info and warnings if true. """ # Dealing with deprecated argument if profile is not None: warnings.warn(("MAST Open Data on AWS is now free to access and does " "not require an AWS account"), AstropyDeprecationWarning) import boto3 import botocore self.supported_missions = ["mast:hst/product", "mast:tess/product", "mast:kepler"] self.boto3 = boto3 self.botocore = botocore self.config = botocore.client.Config(signature_version=botocore.UNSIGNED) self.pubdata_bucket = "stpubdata" if verbose: log.info("Using the S3 STScI public dataset")
def test_esasky_get_spectra_obs_id(self): download_directory = "ESASkyRemoteTest" if not os.path.exists(download_directory): os.makedirs(download_directory) missions = [ "ISO-IR", "Chandra", "IUE", "XMM-NEWTON", "HST-IR", "Herschel", "HST-UV", "HST-OPTICAL" ] result = ESASkyClass.get_spectra(observation_ids=[ "02101201", "1005", "LWR13178", "0001730201", "ibh706cqq", "1342253595", "z1ax0102t", "oeik2s020" ], missions=missions, download_dir=download_directory) for mission in missions: file_path = os.path.join(download_directory, mission) assert os.path.exists(file_path) log.info("Checking {} data.".format(mission)) if mission.upper() == "HERSCHEL": assert (isinstance( result[mission.upper()]["1342253595"]["WBS"] ["WBS-V_USB_4b"], HDUList)) assert (isinstance( result[mission.upper()]["1342253595"]["HRS"] ["HRS-H_LSB_4b"], HDUList)) else: assert (isinstance(result[mission.upper()][0], HDUList)) result = None shutil.rmtree(download_directory)
def get_postcard(self, observation_id, calibration_level="RAW", resolution=256, filename=None, verbose=False): """ Download postcards from EHST Parameters ---------- observation_id : string id of the observation for which download the postcard, mandatory The identifier of the observation we want to retrieve, regardless of whether it is simple or composite. calibration_level : string calibration level, optional, default 'RAW' The identifier of the data reduction/processing applied to the data. By default, the most scientifically relevant level will be chosen. RAW, CALIBRATED, PRODUCT or AUXILIARY resolution : integer postcard resolution, optional, default 256 Resolution of the retrieved postcard. 256 or 1024 filename : string file name to be used to store the postcard, optional, default None File name for the artifact verbose : bool optional, default 'False' Flag to display information about the process Returns ------- None. It downloads the observation postcard indicated """ params = { "RETRIEVAL_TYPE": "POSTCARD", "OBSERVATION_ID": observation_id, "CALIBRATION_LEVEL": calibration_level, "RESOLUTION": resolution } response = self._request('GET', self.data_url, save=True, cache=True, params=params) if filename is None: filename = observation_id if verbose: log.info(self.data_url + "&".join([ "?RETRIEVAL_TYPE=POSTCARD", "OBSERVATION_ID=" + observation_id, "CALIBRATION_LEVEL=" + calibration_level, "RESOLUTION=" + str(resolution) ])) log.info(self.copying_string.format(filename)) shutil.move(response, filename)
def test_esasky_get_images_obs_id(self): download_directory = "ESASkyRemoteTest" if not os.path.exists(download_directory): os.makedirs(download_directory) missions = [ "SUZAKU", "ISO-IR", "Chandra", "XMM-OM-OPTICAL", "XMM", "XMM-OM-UV", "HST-IR", "Herschel", "Spitzer", "HST-UV", "HST-OPTICAL", "INTEGRAL" ] result = ESASkyClass.get_images(observation_ids=[ "100001010", "01500403", "21171", "0852000101", "0851180201", "0851180201", "n3tr01c3q", "1342247257", "30002561-25100", "hst_07553_3h_wfpc2_f160bw_pc", "ocli05leq", "88600210001" ], missions=missions, download_dir=download_directory) for mission in missions: file_path = os.path.join(download_directory, mission) assert os.path.exists(file_path) log.info("Checking {} data.".format(mission)) if mission.upper() == "HERSCHEL": assert (isinstance(result[mission.upper()][0]["250"], HDUList)) assert (isinstance(result[mission.upper()][0]["350"], HDUList)) assert (isinstance(result[mission.upper()][0]["500"], HDUList)) else: assert (isinstance(result[mission.upper()][0], HDUList)) result = None shutil.rmtree(download_directory)
def make_finder_chart(target, radius, save_prefix, service=SkyView.get_images, service_kwargs={ 'survey': ['2MASS-K'], 'pixels': 500 }, alma_kwargs={ 'public': False, 'science': False }, **kwargs): """ Create a "finder chart" showing where ALMA has pointed in various bands, including different color coding for public/private data and each band. Contours are set at various integration times. Parameters ---------- target : `astropy.coordinates` or str A legitimate target name radius : `~astropy.units.Quantity` A degree-equivalent radius. save_prefix : str The prefix for the output files. Both .reg and .png files will be written. The .reg files will have the band numbers and public/private appended, while the .png file will be named prefix_almafinderchart.png service : function The ``get_images`` function of an astroquery service, e.g. SkyView. service_kwargs : dict The keyword arguments to pass to the specified service. For example, for SkyView, you can give it the survey ID (e.g., 2MASS-K) and the number of pixels in the resulting image. See the documentation for the individual services for more details. alma_kwargs : dict Keywords to pass to the ALMA archive when querying. private_band_colors / public_band_colors : tuple A tuple or list of colors to be associated with private/public observations in the various bands integration_time_contour_levels : list or np.array The levels at which to draw contours in units of seconds. Default is log-spaced (2^n) seconds: [ 1., 2., 4., 8., 16., 32.]) """ log.info("Querying {0} for images".format(service)) images = service(target, radius=radius, **service_kwargs) image0_hdu = images[0][0] return make_finder_chart_from_image(image0_hdu, target=target, radius=radius, save_prefix=save_prefix, alma_kwargs=alma_kwargs, **kwargs)
def wrapper(*args, **kwargs): ts = time.time() for i in range(num_tries): result = function(*args, **kwargs) te = time.time() tt = (te - ts) / num_tries if verbose: # pragma: no cover log.info('{} took {} s on AVERAGE for {} call(s).'.format( function.__name__, tt, num_tries)) return tt, result
def _get_product_filename(self, product_type, filename): if (product_type == "PRODUCT"): return filename elif (product_type == "SCIENCE_PRODUCT"): log.info("This is a SCIENCE_PRODUCT, the filename will be " "renamed to " + filename + ".fits.gz") return filename + ".fits.gz" else: log.info("This is a POSTCARD, the filename will be " "renamed to " + filename + ".jpg") return filename + ".jpg"
def login(self, token=None, store_token=False, reenter_token=False): """ Log session into the MAST portal. Parameters ---------- token : string, optional Default is None. The token to authenticate the user. This can be generated at https://auth.mast.stsci.edu/token?suggested_name=Astroquery&suggested_scope=mast:exclusive_access. If not supplied, it will be prompted for if not in the keyring or set via $MAST_API_TOKEN store_token : bool, optional Default False. If true, MAST token will be stored securely in your keyring. reenter_token : bool, optional Default False. Asks for the token even if it is already stored in the keyring or $MAST_API_TOKEN environment variable. This is the way to overwrite an already stored password on the keyring. """ if token is None and "MAST_API_TOKEN" in os.environ: token = os.environ["MAST_API_TOKEN"] if token is None: token = keyring.get_password("astroquery:mast.stsci.edu.token", "masttoken") if token is None or reenter_token: info_msg = "If you do not have an API token already, visit the following link to create one: " log.info(info_msg + self.AUTH_URL) token = getpass("Enter MAST API Token: ") # store token if desired if store_token: keyring.set_password("astroquery:mast.stsci.edu.token", "masttoken", token) self.session.headers["Accept"] = "application/json" self.session.cookies["mast_token"] = token info = self.session_info() if not info["anon"]: log.info("MAST API token accepted, welcome {}".format( info["attrib"].get("display_name"))) else: warn_msg = ( "MAST API token invalid!\n" "To make create a new API token visit to following link: " + self.AUTH_URL) warnings.warn(warn_msg, AuthenticationWarning) return not info["anon"]
def query_target(self, name, filename=None, output_format='votable', verbose=False): """ It executes a query over EHST and download the xml with the results. Parameters ---------- name : string target name to be requested, mandatory filename : string file name to be used to store the metadata, optional, default None output_format : string optional, default 'votable' output format of the query verbose : bool optional, default 'False' Flag to display information about the process Returns ------- Table with the result of the query. It downloads metadata as a file. """ params = { "RESOURCE_CLASS": "OBSERVATION", "USERNAME": "******", "SELECTED_FIELDS": "OBSERVATION", "QUERY": "(TARGET.TARGET_NAME=='" + name + "')", "RETURN_TYPE": str(output_format) } response = self._request('GET', self.metadata_url, save=True, cache=True, params=params) if verbose: log.info(self.metadata_url + "?RESOURCE_CLASS=OBSERVATION&" "SELECTED_FIELDS=OBSERVATION&QUERY=(TARGET.TARGET_NAME" "=='" + name + "')&USERNAME=ehst-astroquery&" "RETURN_TYPE=" + str(output_format)) log.info(self.copying_string.format(filename)) if filename is None: filename = "target.xml" shutil.move(response, filename) return modelutils.read_results_table_from_file(filename, str(output_format))
def download_product(self, observation_id, calibration_level="RAW", filename=None, verbose=False): """ Download products from EHST Parameters ---------- observation_id : string id of the observation to be downloaded, mandatory The identifier of the observation we want to retrieve, regardless of whether it is simple or composite. calibration_level : string calibration level, optional, default 'RAW' The identifier of the data reduction/processing applied to the data. By default, the most scientifically relevant level will be chosen. RAW, CALIBRATED, PRODUCT or AUXILIARY filename : string file name to be used to store the artifact, optional, default None File name for the observation. verbose : bool optional, default 'False' flag to display information about the process Returns ------- None. It downloads the observation indicated """ params = { "OBSERVATION_ID": observation_id, "CALIBRATION_LEVEL": calibration_level } if filename is None: filename = observation_id + ".tar" response = self._request('GET', self.data_url, save=True, cache=True, params=params) if verbose: log.info(self.data_url + "?OBSERVATION_ID=" + observation_id + "&CALIBRATION_LEVEL=" + calibration_level) log.info(self.copying_string.format(filename)) shutil.move(response, filename)
def get_files_from_tarballs(self, downloaded_files, *, regex=r'.*\.fits$', path='cache_path', verbose=True): """ Given a list of successfully downloaded tarballs, extract files with names matching a specified regular expression. The default is to extract all FITS files NOTE: alma now supports direct listing and downloads of tarballs. See ``get_data_info`` and ``download_and_extract_files`` Parameters ---------- downloaded_files : list A list of downloaded files. These should be paths on your local machine. regex : str A valid regular expression path : 'cache_path' or str If 'cache_path', will use the astroquery.Alma cache directory (``Alma.cache_location``), otherwise will use the specified path. Note that the subdirectory structure of the tarball will be maintained. Returns ------- filelist : list A list of the extracted file locations on disk """ if path == 'cache_path': path = self.cache_location elif not os.path.isdir(path): raise OSError("Specified an invalid path {0}.".format(path)) fitsre = re.compile(regex) filelist = [] for fn in downloaded_files: tf = tarfile.open(fn) for member in tf.getmembers(): if fitsre.match(member.name): if verbose: log.info("Extracting {0} to {1}".format(member.name, path)) tf.extract(member, path) filelist.append(os.path.join(path, member.name)) return filelist
def make_finder_chart_from_image(image, target, radius, save_prefix, alma_kwargs={ 'public': False, 'science': False, 'cache': False, }, **kwargs): """ Create a "finder chart" showing where ALMA has pointed in various bands, including different color coding for public/private data and each band. Contours are set at various integration times. Parameters ---------- image : fits.PrimaryHDU or fits.ImageHDU object The image to overlay onto target : `astropy.coordinates` or str A legitimate target name radius : `astropy.units.Quantity` A degree-equivalent radius save_prefix : str The prefix for the output files. Both .reg and .png files will be written. The .reg files will have the band numbers and public/private appended, while the .png file will be named prefix_almafinderchart.png alma_kwargs : dict Keywords to pass to the ALMA archive when querying. private_band_colors / public_band_colors : tuple A tuple or list of colors to be associated with private/public observations in the various bands integration_time_contour_levels : list or np.array The levels at which to draw contours in units of seconds. Default is log-spaced (2^n) seconds: [ 1., 2., 4., 8., 16., 32.]) """ log.info("Querying ALMA around {0}".format(target)) catalog = Alma.query_region(coordinate=target, radius=radius, get_html_version=True, **alma_kwargs) return make_finder_chart_from_image_and_catalog(image, catalog=catalog, save_prefix=save_prefix, **kwargs)
def _parse_result(self, response, verbose=False): # if verbose is False then suppress any VOTable related warnings if not verbose: commons.suppress_vo_warnings() # try to parse the result into an astropy.Table, else # return the raw result with an informative error message. try: # do something with regex to get the result into # astropy.Table form. return the Table. data = BytesIO(response.content) table = Table.read(data) return table except ValueError as e: # catch common errors here, but never use bare excepts # return raw result/ handle in some way log.info("Failed to convert query result to table", e) return response
def get_postcard(self, tdt, *, filename=None, verbose=False): """ Download postcards from ISO Data Archive Parameters ---------- tdt : string id of the observation for which download the postcard, mandatory The identifier of the observation we want to retrieve, regardless of whether it is simple or composite. filename : string file name to be used to store the postcard, optional, default None verbose : bool optional, default 'False' Flag to display information about the process Returns ------- File name to be used to store the postcard """ link = self.get_postcard_link(tdt, filename, verbose) local_filepath = self._request('GET', link, cache=True, save=True) if filename is None: response = self._request('HEAD', link) response.raise_for_status() filename = re.findall('filename="(.+)"', response.headers["Content-Disposition"])[0] else: filename = filename + ".png" if verbose: log.info("Copying file to {0}...".format(filename)) shutil.move(local_filepath, filename) if verbose: log.info("Wrote {0} to {1}".format(link, filename)) return filename
def _format_output(self, raw_output): if self.FORMAT == 'csv': fixed_raw_output = re.sub('<[^<]+?>', '', raw_output) split_output = fixed_raw_output.splitlines() # Remove any HTML tags columns = list( csv.reader([split_output[0]], delimiter=',', quotechar='"'))[0] rows = split_output[1:] # Quick test to see if API returned a valid csv file # If not, try to return JSON-compliant dictionary. test_row = list(csv.reader([rows[0]], delimiter=',', quotechar='"'))[0] if (len(columns) != len(test_row)): log.info("The API did not return a valid CSV output! \n" "Outputing JSON-compliant dictionary instead.") output = json.loads(raw_output) return output # Initialize and populate dictionary output_dict = {key: [] for key in columns} for row in rows: split_row = list( csv.reader([row], delimiter=',', quotechar='"'))[0] for ct, key in enumerate(columns): output_dict[key].append(split_row[ct]) # Convert dictionary to Astropy Table. output = Table(output_dict, names=columns) else: # Server response is JSON compliant. Simply # convert from raw text to dictionary. output = json.loads(raw_output) return output
def logout(self, verbose=False): """Performs a logout Parameters ---------- verbose : bool, optional, default 'False' flag to display information about the process """ try: TapPlus.logout(self, verbose=verbose) except HTTPError as err: log.error("Error logging out TAP server") return log.info("Gaia TAP server logout OK") try: TapPlus.logout(self.__gaiadata, verbose=verbose) log.info("Gaia data server logout OK") except HTTPError as err: log.error("Error logging out data server")
def species_lookuptable(cache=True): """ Get a lookuptable from chemical name + OrdinaryStructuralFormula to VAMDC id """ if not os.path.exists(Conf.cache_location): os.makedirs(Conf.cache_location) lut_path = os.path.join(Conf.cache_location, 'species_lookuptable.json') if os.path.exists(lut_path) and cache: log.info("Loading cached molecular line ID database") with open(lut_path, 'r') as fh: lutdict = json.load(fh) lookuptable = SpeciesLookuptable(lutdict) else: log.info("Loading molecular line ID database") from vamdclib import nodes as vnodes from vamdclib import request as vrequest nl = vnodes.Nodelist() nl.findnode('cdms') cdms = nl.findnode('cdms') request = vrequest.Request(node=cdms) # Retrieve all species from CDMS result = request.getspecies() molecules = result.data['Molecules'] lutdict = { "{0} {1}".format(molecules[key].ChemicalName, molecules[key].OrdinaryStructuralFormula): molecules[key].VAMDCSpeciesID for key in molecules } lookuptable = SpeciesLookuptable(lutdict) if cache: with open(lut_path, 'w') as fh: json.dump(lookuptable, fh) return lookuptable
def get_download_link(self, tdt, retrieval_type, filename, verbose, **kwargs): """ Get download link for ISO Parameters ---------- tdt : string id of the Target Dedicated Time (observation identifier) to be downloaded, mandatory The identifier of the observation we want to retrieve, 8 digits example: 40001501 product_level : string level to download, optional, by default everything is selected values: DEFAULT_DATA_SET, FULLY_PROC, RAW_DATA, BASIC_SCIENCE, QUICK_LOOK, DEFAULT_DATA_SET, HPDP, ALL retrieval_type : string type of retrieval: OBSERVATION for full observation or STANDALONE for single files filename : string file name to be used to store the file verbose : bool optional, default 'False' flag to display information about the process Returns ------- None if not verbose. It downloads the observation indicated If verbose returns the filename """ link = self.data_url link = link + "retrieval_type=" + retrieval_type link = link + "&DATA_RETRIEVAL_ORIGIN=astroquery" link = link + "&tdt=" + tdt link = link + "".join("&{0}={1}".format(key, val) for key, val in kwargs.items()) if verbose: log.info(link) return link
def login(self, user=None, password=None, credentials_file=None, verbose=False): """Performs a login. User and password arguments can be used or a file that contains username and password (2 lines: one for username and the following one for the password). If no arguments are provided, a prompt asking for username and password will appear. Parameters ---------- user : str, default None login name password : str, default None user password credentials_file : str, default None file containing user and password in two lines verbose : bool, optional, default 'False' flag to display information about the process """ try: log.info("Login to gaia TAP server") TapPlus.login(self, user=user, password=password, credentials_file=credentials_file, verbose=verbose) except HTTPError as err: log.error("Error logging in TAP server") return u = self._TapPlus__user p = self._TapPlus__pwd try: log.info("Login to gaia data server") TapPlus.login(self.__gaiadata, user=u, password=p, verbose=verbose) except HTTPError as err: log.error("Error logging in data server") log.error("Logging out from TAP server") TapPlus.logout(self, verbose=verbose)
def _run_job(self, job_location, verbose, poll_interval=20): """ Start an async job (e.g. TAP or SODA) and wait for it to be completed. Parameters ---------- job_location: str The url to query the job status and details verbose: bool Should progress be logged periodically poll_interval: int, optional The number of seconds to wait between checks on the status of the job. Returns ------- The single word final status of the job. Normally COMPLETED or ERROR """ # Start the async job if verbose: log.info("Starting the retrieval job...") self._request('POST', job_location + "/phase", data={'phase': 'RUN'}, cache=False) # Poll until the async job has finished prev_status = None count = 0 job_details = self._get_job_details_xml(job_location) status = self._read_job_status(job_details, verbose) while status == 'EXECUTING' or status == 'QUEUED' or status == 'PENDING': count += 1 if verbose and (status != prev_status or count > 10): log.info("Job is %s, polling every %d seconds." % (status, poll_interval)) count = 0 prev_status = status time.sleep(poll_interval) job_details = self._get_job_details_xml(job_location) status = self._read_job_status(job_details, verbose) return status
def get_postcard_link(self, tdt, filename=None, verbose=False): """ Get postcard link for ISO Parameters ---------- tdt : string id of the Target Dedicated Time (observation identifier) to be downloaded, mandatory The identifier of the observation we want to retrieve, 8 digits example: 40001501 product_level : string level to download, optional, by default everything is selected values: DEFAULT_DATA_SET, FULLY_PROC, RAW_DATA, BASIC_SCIENCE, QUICK_LOOK, DEFAULT_DATA_SET, HPDP, ALL retrieval_type : string type of retrieval: OBSERVATION for full observation or STANDALONE for single files filename : string file name to be used to store the file verbose : bool optional, default 'False' flag to display information about the process Returns ------- The postcard filename """ link = self.data_url link = link + "retrieval_type=POSTCARD" link = link + "&DATA_RETRIEVAL_ORIGIN=astroquery" link = link + "&tdt=" + tdt if verbose: log.info(link) return link
def login_gui(self, verbose=False): """Performs a login using a GUI dialog Parameters ---------- verbose : bool, optional, default 'False' flag to display information about the process """ try: log.info("Login to gaia TAP server") TapPlus.login_gui(self, verbose=verbose) except HTTPError as err: log.error("Error logging in TAP server") return u = self._TapPlus__user p = self._TapPlus__pwd try: log.info("Login to gaia data server") TapPlus.login(self.__gaiadata, user=u, password=p, verbose=verbose) except HTTPError as err: log.error("Error logging in data server") log.error("Logging out from TAP server") TapPlus.logout(self, verbose=verbose)
def get_postcard(self, observation_id, *, image_type="OBS_EPIC", filename=None, verbose=False): """ Download postcards from XSA Parameters ---------- observation_id : string id of the observation for which download the postcard, mandatory The identifier of the observation we want to retrieve, regardless of whether it is simple or composite. image_type : string image type, optional, default 'OBS_EPIC' The image_type to be returned. It can be: OBS_EPIC, OBS_RGS_FLUXED, OBS_RGS_FLUXED_2, OBS_RGS_FLUXED_3, OBS_EPIC_MT, OBS_RGS_FLUXED_MT, OBS_OM_V, OBS_OM_B, OBS_OM_U, OBS_OM_L, OBS_OM_M, OBS_OM_S, OBS_OM_W filename : string file name to be used to store the postcard, optional, default None verbose : bool optional, default 'False' Flag to display information about the process Returns ------- None. It downloads the observation postcard indicated """ params = {'RETRIEVAL_TYPE': 'POSTCARD', 'OBSERVATION_ID': observation_id, 'OBS_IMAGE_TYPE': image_type, 'PROTOCOL': 'HTTP'} link = self.data_url + "".join("&{0}={1}".format(key, val) for key, val in params.items()) if verbose: log.info(link) local_filepath = self._request('GET', link, params, cache=True, save=True) if filename is None: response = self._request('HEAD', link) response.raise_for_status() filename = re.findall('filename="(.+)"', response.headers[ "Content-Disposition"])[0] else: filename = observation_id + ".png" log.info(f"Copying file to {filename}...") shutil.move(local_filepath, filename) if verbose: log.info(f"Wrote {link} to {filename}") return filename
def _read_job_status(self, job_details_xml, verbose): """ Read job status from the job details XML Parameters ---------- job_details_xml: `xml.etree.ElementTree` The SODA job details verbose: bool Should additional information be logged for errors Returns ------- The single word status of the job. e.g. COMPLETED, EXECUTING, ERROR """ status_node = job_details_xml.find( "{http://www.ivoa.net/xml/UWS/v1.0}phase") if status_node is None: if verbose: log.info("Unable to find status in status xml:") ElementTree.dump(job_details_xml) raise ValueError('Invalid job status xml received.') status = status_node.text return status
def _check_existing_files(self, datasets, continuation=False, destination=None): """Detect already downloaded datasets.""" datasets_to_download = [] files = [] for dataset in datasets: ext = os.path.splitext(dataset)[1].lower() if ext in ('.fits', '.tar'): local_filename = dataset elif ext == '.fz': local_filename = dataset[:-3] elif ext == '.z': local_filename = dataset[:-2] else: local_filename = dataset + ".fits" if destination is not None: local_filename = os.path.join(destination, local_filename) elif self.cache_location is not None: local_filename = os.path.join(self.cache_location, local_filename) if os.path.exists(local_filename): log.info("Found {0}.fits...".format(dataset)) if continuation: datasets_to_download.append(dataset) else: files.append(local_filename) elif os.path.exists(local_filename + ".Z"): log.info("Found {0}.fits.Z...".format(dataset)) if continuation: datasets_to_download.append(dataset) else: files.append(local_filename + ".Z") elif os.path.exists(local_filename + ".fz"): # RICE-compressed log.info("Found {0}.fits.fz...".format(dataset)) if continuation: datasets_to_download.append(dataset) else: files.append(local_filename + ".fz") else: datasets_to_download.append(dataset) return datasets_to_download, files
def _print_query_help(self, url, cache=True): """ Download a form and print it in a quasi-human-readable way """ log.info("List of accepted column_filters parameters.") log.info("The presence of a column in the result table can be " "controlled if prefixed with a [ ] checkbox.") log.info("The default columns in the result table are shown as " "already ticked: [x].") result_string = [] resp = self._request("GET", url, cache=cache) doc = BeautifulSoup(resp.content, 'html5lib') form = doc.select("html body form pre")[0] # Unwrap all paragraphs paragraph = form.find('p') while paragraph: paragraph.unwrap() paragraph = form.find('p') # For all sections for section in form.select("table"): section_title = "".join(section.stripped_strings) section_title = "\n".join(["", section_title, "-" * len(section_title)]) result_string.append(section_title) checkbox_name = "" checkbox_value = "" for tag in section.next_siblings: if tag.name == u"table": break elif tag.name == u"input": if tag.get(u'type') == u"checkbox": checkbox_name = tag['name'] checkbox_value = u"[x]" if ('checked' in tag.attrs) else u"[ ]" name = "" value = "" else: name = tag['name'] value = "" elif tag.name == u"select": options = [] for option in tag.select("option"): options += ["{0} ({1})" .format(option['value'], "".join(option.stripped_strings))] name = tag[u"name"] value = ", ".join(options) else: name = "" value = "" if u"tab_" + name == checkbox_name: checkbox = checkbox_value else: checkbox = " " if name != u"": result_string.append("{0} {1}: {2}" .format(checkbox, name, value)) print("\n".join(result_string)) return result_string
def retrieve_data(self, datasets, continuation=False, destination=None, with_calib='none', request_all_objects=False, unzip=True, request_id=None): """ Retrieve a list of datasets form the ESO archive. Parameters ---------- datasets : list of strings or string List of datasets strings to retrieve from the archive. destination: string Directory where the files are copied. Files already found in the destination directory are skipped, unless continuation=True. Default to astropy cache. continuation : bool Force the retrieval of data that are present in the destination directory. with_calib : string Retrieve associated calibration files: 'none' (default), 'raw' for raw calibrations, or 'processed' for processed calibrations. request_all_objects : bool When retrieving associated calibrations (``with_calib != 'none'``), this allows to request all the objects included the already downloaded ones, to be sure to retrieve all calibration files. This is useful when the download was interrupted. `False` by default. unzip : bool Unzip compressed files from the archive after download. `True` by default. request_id : str, int Retrieve from an existing request number rather than sending a new query, with the identifier from the URL in the email sent from the archive from the earlier request as in: https://dataportal.eso.org/rh/requests/[USERNAME]/[request_id] Returns ------- files : list of strings or string List of files that have been locally downloaded from the archive. Examples -------- >>> dptbl = Eso.query_instrument('apex', pi_coi='ginsburg') >>> dpids = [row['DP.ID'] for row in dptbl if 'Map' in row['Object']] >>> files = Eso.retrieve_data(dpids) """ calib_options = {'none': '', 'raw': 'CalSelectorRaw2Raw', 'processed': 'CalSelectorRaw2Master'} if with_calib not in calib_options: raise ValueError("invalid value for 'with_calib', " "it must be 'none', 'raw' or 'processed'") if isinstance(datasets, str): return_list = False datasets = [datasets] else: return_list = True if not isinstance(datasets, (list, tuple, np.ndarray)): raise TypeError("Datasets must be given as a list of strings.") # First: Detect datasets already downloaded if with_calib != 'none' and request_all_objects: datasets_to_download, files = list(datasets), [] else: log.info("Detecting already downloaded datasets...") datasets_to_download, files = self._check_existing_files( datasets, continuation=continuation, destination=destination) # Second: Check that the datasets to download are in the archive if request_id is None: log.info("Checking availability of datasets to download...") valid_datasets = [self.verify_data_exists(ds) for ds in datasets_to_download] else: # Assume all valid if a request_id was provided valid_datasets = [(ds, True) for ds in datasets_to_download] if not all(valid_datasets): invalid_datasets = [ds for ds, v in zip(datasets_to_download, valid_datasets) if not v] raise ValueError("The following data sets were not found on the " "ESO servers: {0}".format(invalid_datasets)) # Third: Download the other datasets log.info("Downloading datasets...") if datasets_to_download: if not self.authenticated(): self.login() url = "http://archive.eso.org/cms/eso-data/eso-data-direct-retrieval.html" with suspend_cache(self): # Never cache staging operations if request_id is None: log.info("Contacting retrieval server...") retrieve_data_form = self._request("GET", url, cache=False) retrieve_data_form.raise_for_status() log.info("Staging request...") inputs = {"list_of_datasets": "\n".join(datasets_to_download)} data_confirmation_form = self._activate_form( retrieve_data_form, form_index=-1, inputs=inputs, cache=False) data_confirmation_form.raise_for_status() root = BeautifulSoup(data_confirmation_form.content, 'html5lib') login_button = root.select('input[value=LOGIN]') if login_button: raise LoginError("Not logged in. " "You must be logged in to download data.") inputs = {} if with_calib != 'none': inputs['requestCommand'] = calib_options[with_calib] # TODO: There may be another screen for Not Authorized; # that should be included too # form name is "retrieve"; no id data_download_form = self._activate_form( data_confirmation_form, form_index=-1, inputs=inputs, cache=False) else: # Build URL by hand request_url = 'https://dataportal.eso.org/rh/requests/' request_url += f'{self.USERNAME}/{request_id}' data_download_form = self._request("GET", request_url, cache=False) _content = data_download_form.content.decode('utf-8') if ('Request Handler - Error' in _content): # Likely a problem with the request_url msg = (f"The form at {request_url} returned an error." " See your recent requests at " "https://dataportal.eso.org/rh/requests/" f"{self.USERNAME}/recentRequests") raise RemoteServiceError(msg) log.info("Staging form is at {0}" .format(data_download_form.url)) root = BeautifulSoup(data_download_form.content, 'html5lib') state = root.select('span[id=requestState]')[0].text t0 = time.time() while state not in ('COMPLETE', 'ERROR'): time.sleep(2.0) data_download_form = self._request("GET", data_download_form.url, cache=False) root = BeautifulSoup(data_download_form.content, 'html5lib') state = root.select('span[id=requestState]')[0].text print("{0:20.0f}s elapsed" .format(time.time() - t0), end='\r') sys.stdout.flush() if state == 'ERROR': raise RemoteServiceError("There was a remote service " "error; perhaps the requested " "file could not be found?") if with_calib != 'none': # when requested files with calibrations, some javascript is # used to display the files, which prevent retrieving the files # directly. So instead we retrieve the download script provided # in the web page, and use it to extract the list of files. # The benefit of this is also that in the download script the # list of files is de-duplicated, whereas on the web page the # calibration files would be duplicated for each exposure. link = root.select('a[href$="/script"]')[0] if 'downloadRequest' not in link.text: # Make sure that we found the correct link raise RemoteServiceError( "A link was found in the download file for the " "calibrations that is not a downloadRequest link " "and therefore appears invalid.") href = link.attrs['href'] script = self._request("GET", href, cache=False) fileLinks = re.findall( r'"(https://dataportal\.eso\.org/dataPortal/api/requests/.*)"', script.text) # urls with api/ require using Basic Authentication, though # it's easier for us to reuse the existing requests session (to # avoid asking agin for a username/password if it is not # stored). So we remove api/ from the urls: fileLinks = [ f.replace('https://dataportal.eso.org/dataPortal/api/requests', 'https://dataportal.eso.org/dataPortal/requests') for f in fileLinks] log.info("Detecting already downloaded datasets, " "including calibrations...") fileIds = [f.rsplit('/', maxsplit=1)[1] for f in fileLinks] filteredIds, files = self._check_existing_files( fileIds, continuation=continuation, destination=destination) fileLinks = [f for f, fileId in zip(fileLinks, fileIds) if fileId in filteredIds] else: fileIds = root.select('input[name=fileId]') fileLinks = ["http://dataportal.eso.org/dataPortal" + fileId.attrs['value'].split()[1] for fileId in fileIds] nfiles = len(fileLinks) log.info("Downloading {} files...".format(nfiles)) log.debug("Files:\n{}".format('\n'.join(fileLinks))) for i, fileLink in enumerate(fileLinks, 1): fileId = fileLink.rsplit('/', maxsplit=1)[1] if request_id is not None: # Since we fetched the script directly without sending # a new request, check here that the file in the list # is among those requested in the input list if fileId.split('.fits')[0] not in datasets_to_download: continue log.info("Downloading file {}/{}: {}..." .format(i, nfiles, fileId)) filename = self._request("GET", fileLink, save=True, continuation=True) if filename.endswith(('.gz', '.7z', '.bz2', '.xz', '.Z')) and unzip: log.info("Unzipping file {0}...".format(fileId)) filename = system_tools.gunzip(filename) if destination is not None: log.info("Copying file {0} to {1}...".format(fileId, destination)) destfile = os.path.join(destination, os.path.basename(filename)) shutil.move(filename, destfile) files.append(destfile) else: files.append(filename) # Empty the redirect cache of this request session # Only available and needed for requests versions < 2.17 try: self._session.redirect_cache.clear() except AttributeError: pass log.info("Done!") if (not return_list) and (len(files) == 1): files = files[0] return files
def _login(self, username=None, store_password=False, reenter_password=False): """ Login to the ESO User Portal. Parameters ---------- username : str, optional Username to the ESO Public Portal. If not given, it should be specified in the config file. store_password : bool, optional Stores the password securely in your keyring. Default is False. reenter_password : bool, optional Asks for the password even if it is already stored in the keyring. This is the way to overwrite an already stored passwork on the keyring. Default is False. """ if username is None: if self.USERNAME != "": username = self.USERNAME elif self.username is not None: username = self.username else: raise LoginError("If you do not pass a username to login(), " "you should configure a default one!") else: # store username as we may need it to re-authenticate self.username = username # Get password from keyring or prompt password, password_from_keyring = self._get_password( "astroquery:www.eso.org", username, reenter=reenter_password) # Authenticate log.info("Authenticating {0} on www.eso.org...".format(username)) # Do not cache pieces of the login process login_response = self._request("GET", "https://www.eso.org/sso/login", cache=False) root = BeautifulSoup(login_response.content, 'html5lib') login_input = root.find(name='input', attrs={'name': 'execution'}) if login_input is None: raise ValueError("ESO login page did not have the correct attributes.") execution = login_input.get('value') login_result_response = self._request("POST", "https://www.eso.org/sso/login", data={'username': username, 'password': password, 'execution': execution, '_eventId': 'submit', 'geolocation': '', }) login_result_response.raise_for_status() root = BeautifulSoup(login_result_response.content, 'html5lib') authenticated = root.find('h4').text == 'Login successful' if authenticated: log.info("Authentication successful!") else: log.exception("Authentication failed!") # When authenticated, save password in keyring if needed if authenticated and password_from_keyring is None and store_password: keyring.set_password("astroquery:www.eso.org", username, password) return authenticated