def logout(self, verbose=False): """Performs a logout Parameters ---------- verbose : bool, optional, default 'False' flag to display information about the process """ try: TapPlus.logout(self, verbose=verbose) except HTTPError as err: log.error("Error logging out TAP server") return log.info("Gaia TAP server logout OK") try: TapPlus.logout(self.__gaiadata, verbose=verbose) log.info("Gaia data server logout OK") except HTTPError as err: log.error("Error logging out data server")
def _login(self, username, password): """ Login to the Gemini Archive website. This method will authenticate the session as a particular user. This may give you access to additional information or access based on your credentials Parameters ---------- username : str The username to login as password : str The password for the given account """ params = dict(username=username, password=password) r = self._session.request('POST', 'https://archive.gemini.edu/login/', params=params) if b'<P>Welcome, you are sucessfully logged in' not in r.content: log.error('Unable to login, please check your credentials') return False return True
def login(self, user=None, password=None, credentials_file=None, verbose=False): """Performs a login. User and password arguments can be used or a file that contains username and password (2 lines: one for username and the following one for the password). If no arguments are provided, a prompt asking for username and password will appear. Parameters ---------- user : str, default None login name password : str, default None user password credentials_file : str, default None file containing user and password in two lines verbose : bool, optional, default 'False' flag to display information about the process """ try: log.info("Login to gaia TAP server") TapPlus.login(self, user=user, password=password, credentials_file=credentials_file, verbose=verbose) except HTTPError as err: log.error("Error logging in TAP server") return u = self._TapPlus__user p = self._TapPlus__pwd try: log.info("Login to gaia data server") TapPlus.login(self.__gaiadata, user=u, password=p, verbose=verbose) except HTTPError as err: log.error("Error logging in data server") log.error("Logging out from TAP server") TapPlus.logout(self, verbose=verbose)
def login_gui(self, verbose=False): """Performs a login using a GUI dialog Parameters ---------- verbose : bool, optional, default 'False' flag to display information about the process """ try: log.info("Login to gaia TAP server") TapPlus.login_gui(self, verbose=verbose) except HTTPError as err: log.error("Error logging in TAP server") return u = self._TapPlus__user p = self._TapPlus__pwd try: log.info("Login to gaia data server") TapPlus.login(self.__gaiadata, user=u, password=p, verbose=verbose) except HTTPError as err: log.error("Error logging in data server") log.error("Logging out from TAP server") TapPlus.logout(self, verbose=verbose)
def api_key(self): """ Return the Astrometry.net API key. """ if not conf.api_key: log.error("Astrometry.net API key not in configuration file") return conf.api_key
def get_epic_lightcurve(self, filename, source_number, *, instrument=[], path=""): """Extracts the EPIC sources light curve products from a given TAR file For a given TAR file obtained with ``XMMNewton.download_data``. This function extracts the EPIC sources light curve products in a given instrument (or instruments) from said TAR file The result is a dictionary containing the paths to the extracted EPIC sources light curve products with the key being the instrument If the instrument is not specified, this function will return all available instruments Parameters ---------- filename : string, mandatory The name of the tarfile to be proccessed source_number : integer, mandatory The source number, in decimal, in the observation instruments : array of strings, optional, default [] An array of strings indicating the desired instruments path: string, optional If set, extracts the EPIC images in the indicated path Returns ------- A dictionary with the full paths of the extracted EPIC sources light curve products. The key is the instrument Notes ----- The filenames will contain the source number in hexadecimal, as this is the convention used by the pipeline. The structure and the content of the extracted compressed FITS files are described in details in the Pipeline Products Description [XMM-SOC-GEN-ICD-0024](https://xmm-tools.cosmos.esa.int/external/xmm_obs_info/odf/data/docs/XMM-SOC-GEN-ICD-0024.pdf). """ _instrumnet = ["M1", "M2", "PN", "EP"] _band = [8] _product_type = ["SRCTSR", "FBKTSR"] _path = "" ret = None if instrument == []: instrument = _instrumnet else: for inst in instrument: if inst not in _instrumnet: log.warning("Invalid instrument %s" % inst) instrument.remove(inst) if path != "" and os.path.exists(path): _path = path try: with tarfile.open(filename, "r") as tar: ret = {} for member in tar.getmembers(): paths = os.path.split(member.name) fname = paths[1] paths = os.path.split(paths[0]) if paths[1] != "pps": continue fname_info = self._parse_filename(fname) if fname_info["X"] != "P": continue if not fname_info["I"] in instrument: continue if not int(fname_info["S"]) in _band: continue if not fname_info["T"] in _product_type: continue if int(fname_info["X-"], 16) != source_number: continue tar.extract(member, _path) key = fname_info["I"] path_inst_name = os.path.abspath(os.path.join(_path, member.name)) if fname_info["T"] == "FBKTSR": key = fname_info["I"] + "_bkg" if ret.get(key) and type(ret.get(key)) == str: log.warning("More than one file found with the " "instrument: %s" % key) ret[key] = [ret[key], path_inst_name] elif ret.get(key) and type(ret.get(key)) == list: ret[key].append(path_inst_name) else: ret[key] = path_inst_name except FileNotFoundError: log.error("File %s not found" % (filename)) return None if ret is None or ret == {}: log.info("Nothing to extract with the given parameters:\n" " PPS: %s\n" " Source Number: %u\n" " Instrument: %s\n" % (filename, source_number, instrument)) return ret
def get_epic_images(self, filename, band=[], instrument=[], get_detmask=False, get_exposure_map=False, path="", **kwargs): """Extracts the EPIC images from a given TAR file This function extracts the EPIC images in a given band (or bands) and instrument (or instruments) from it The result is a dictionary containing the paths to the extracted EPIC images with keys being the band and the instrument If the band or the instrument are not specified this function will return all the available bands and instruments Additionally, ``get_detmask`` and ``get_exposure_map`` can be set to True. If so, this function will also extract the exposure maps and detector masks within the specified bands and instruments Parameters ---------- filename : string, mandatory The name of the tarfile to be proccessed band : array of integers, optional, default [] An array of intergers indicating the desired bands instruments : array of strings, optional, default [] An array of strings indicating the desired instruments get_detmask : bool, optional If True, also extracts the detector masks get_exposure_map : bool, optional If True, also extracts the exposure maps path: string, optional If set, extracts the EPIC images in the indicated path Returns ------- A dictionary of dictionaries with the full paths of the extracted EPIC images. The keys of each dictionary are the band for the first level dictionary and the instrument for the second level dictionaries Notes ----- The structure and the content of the extracted compressed FITS files are described in details in the Pipeline Products Description [XMM-SOC-GEN-ICD-0024](https://xmm-tools.cosmos.esa.int/external/xmm_obs_info/odf/data/docs/XMM-SOC-GEN-ICD-0024.pdf). """ _product_type = ["IMAGE_"] _instrument = ["M1", "M2", "PN", "EP"] _band = [1, 2, 3, 4, 5, 8] _path = "" if get_detmask: _product_type.append("DETMSK") if get_exposure_map: _product_type.append("EXPMAP") if path != "" and os.path.exists(path): _path = path ret = None if band == []: band = _band else: for b in band: if b not in _band: log.warning("Invalid band %u" % b) band.remove(b) if instrument == []: instrument = _instrument else: for inst in instrument: if inst not in _instrument: log.warning("Invalid instrument %s" % inst) instrument.remove(inst) try: with tarfile.open(filename, "r") as tar: ret = {} for member in tar.getmembers(): paths = os.path.split(member.name) fname = paths[1] paths = os.path.split(paths[0]) if paths[1] != "pps": continue fname_info = self._parse_filename(fname) if fname_info["X"] != "P": continue if not fname_info["I"] in instrument: continue if not int(fname_info["S"]) in band: continue if not fname_info["T"] in _product_type: continue tar.extract(member, _path) if not ret.get(int(fname_info["S"])): ret[int(fname_info["S"])] = {} b = int(fname_info["S"]) ins = fname_info["I"] path_member_name = os.path.abspath(os.path.join(_path, member.name)) if fname_info["T"] == "DETMSK": ins = fname_info["I"] + "_det" elif fname_info["T"] == "EXPMAP": ins = fname_info["I"] + "_expo" if ret[b].get(ins) and type(ret[b].get(ins)) == str: log.warning("More than one file found with the " "band %u and " "the instrument: %s" % (b, ins)) ret[b][ins] = [ret[b][ins], path_member_name] elif ret[b].get(ins) and type(ret[b].get(ins)) == list: ret[b][ins].append(path_member_name) else: ret[b][ins] = path_member_name except FileNotFoundError: log.error("File %s not found" % (filename)) return None return ret
def get_epic_spectra(self, filename, source_number, *, instrument=[], path="", verbose=False): """Extracts in path (when set) the EPIC sources spectral products from a given TAR file. This function extracts the EPIC sources spectral products in a given instrument (or instruments) from it The result is a dictionary containing the paths to the extracted EPIC sources spectral products with key being the instrument If the instrument is not specified this function will return all the available instruments Parameters ---------- filename : string, mandatory The name of the tarfile to be processed source_number : integer, mandatory The source number, in decimal, in the observation instruments : array of strings, optional, default [] An array of strings indicating the desired instruments path: string, optional If set, extracts the EPIC images in the indicated path verbose : bool optional, default 'False' flag to display information about the process Returns ------- A dictionary with the full paths of the extracted EPIC sources spectral products. The key is the instrument Notes ----- The filenames will contain the source number in hexadecimal, as this is the convention used by the pipeline. The structure and the content of the extracted compressed FITS files are described in details in the Pipeline Products Description [XMM-SOC-GEN-ICD-0024](https://xmm-tools.cosmos.esa.int/external/xmm_obs_info/odf/data/docs/XMM-SOC-GEN-ICD-0024.pdf). """ _instrument = ["M1", "M2", "PN", "EP"] _product_type = ["SRSPEC", "BGSPEC", "SRCARF"] _path = "" ret = None if instrument == []: instrument = _instrument else: for inst in instrument: if inst not in _instrument: log.warning(f"Invalid instrument {inst}") instrument.remove(inst) if path != "" and os.path.exists(path): _path = path try: with tarfile.open(filename, "r") as tar: ret = {} for member in tar.getmembers(): paths = os.path.split(member.name) fname = paths[1] paths = os.path.split(paths[0]) if paths[1] != "pps": continue fname_info = self._parse_filename(fname) if fname_info["X"] != "P": continue if not fname_info["I"] in instrument: continue if not fname_info["T"] in _product_type: continue if int(fname_info["X-"], 16) != source_number: continue tar.extract(member, _path) key = fname_info["I"] path_inst_name = os.path.abspath(os.path.join(_path, member.name)) if fname_info["T"] == "BGSPEC": key = fname_info["I"] + "_bkg" elif fname_info["T"] == "SRCARF": key = fname_info["I"] + "_arf" else: # process the source spectrum with fits.open(path_inst_name) as hdul: # pick up the SAS version, needed for the pn RMF sasver = hdul[0].header["SASVERS"].split("-")[1][:-2] # # build a list with old RMF folders, going back two versions, last bulk in 18.0 # hardcoded_old_pn_rmfs = ['2020-10-28_sas19.0.0','2019-07-31_sas18.0.0'] old_rmf = False for irmf in hardcoded_old_pn_rmfs: if (sasver in irmf): rmf_path = irmf #if (verbose): # print (f'Info: picking up an old RMF file from {rmf_path} folder') old_rmf = True break # for ext in hdul: if ext.name != "SPECTRUM": continue rmf_fname = ext.header["RESPFILE"] if fname_info["I"] == "M1" or fname_info["I"] == "M2": inst = "MOS/" + str(ext.header["SPECDELT"]) + "eV/" elif fname_info["I"] == "PN": inst = "PN/" file_name, file_ext = os.path.splitext(rmf_fname) rmf_fname = file_name + "_v" + sasver + file_ext if (old_rmf): inst = f"old/pn/{rmf_path}/" link = self._rmf_ftp + inst + rmf_fname if verbose: log.info("rmf link is: %s" % link) response = self._request('GET', link) response.raise_for_status() rsp_filename = os.path.join(_path, paths[0], paths[1], ext.header["RESPFILE"]) with open(rsp_filename, 'wb') as f: f.write(response.content) ret[fname_info["I"] + "_rmf"] = rsp_filename if ret.get(key) and type(ret.get(key)) == str: log.warning("More than one file found with the instrument: %s" % key) ret[key] = [ret[key], path_inst_name] elif ret.get(key) and type(ret.get(key)) == list: ret[key].append(path_inst_name) else: ret[key] = path_inst_name except FileNotFoundError: log.error("File %s not found" % (filename)) return if not ret: log.info("Nothing to extract with the given parameters:\n" " PPS: %s\n" " Source Number: %u\n" " Instrument: %s\n" % (filename, source_number, instrument)) return ret
def load_data(self, ids, data_release=None, data_structure='INDIVIDUAL', retrieval_type="ALL", valid_data=True, band=None, avoid_datatype_check=False, format="votable", output_file=None, overwrite_output_file=False, verbose=False): """Loads the specified table TAP+ only Parameters ---------- ids : str list, mandatory list of identifiers data_release: str, optional, default None data release from which data should be taken. E.g. 'Gaia DR2' By default, it takes the current default one. data_structure: str, optional, default 'INDIVIDUAL' it can be 'INDIVIDUAL', 'COMBINED', 'RAW': 'INDIVIDUAL' means... 'COMBINED' means... 'RAW' means... retrieval_type : str, optional, default 'ALL' retrieval type identifier. It can be either 'epoch_photometry' for compatibility reasons or 'ALL' to retrieve all data from the list of sources. valid_data : bool, optional, default True By default, the epoch photometry service returns only valid data, that is, all data rows where flux is not null and rejected_by_photometry flag is not true. In order to retrieve all data associated to a given source without this filter, this request parameter should be included (valid_data=False) band : str, optional, default None, valid values: G, BP, RP By default, the epoch photometry service returns all the available photometry bands for the requested source. This parameter allows to filter the output lightcurve by its band. avoid_datatype_check: boolean, optional, default False. By default, this value will be set to False. If it is set to 'true' the Datalink items tags will not be checked. format : str, optional, default 'votable' loading format output_file : string, optional, default None file where the results are saved. If it is not provided, the http response contents are returned. overwrite_output_file : boolean, optional, default False To overwrite the output_file if it already exists. verbose : bool, optional, default 'False' flag to display information about the process Returns ------- A table object """ if retrieval_type is None: raise ValueError("Missing mandatory argument 'retrieval_type'") now = datetime.now() now_formatted = now.strftime("%Y%m%d_%H%M%S") temp_dirname = "temp_" + now_formatted downloadname_formated = "download_" + now_formatted output_file_specified = False if output_file is None: output_file = os.path.join(os.getcwd(), temp_dirname, downloadname_formated) else: output_file_specified = True output_file = os.path.abspath(output_file) if not overwrite_output_file and os.path.exists(output_file): raise ValueError(f"{output_file} file already exists. Please use overwrite_output_file='False' to " f"overwrite output file.") path = os.path.dirname(output_file) if ids is None: raise ValueError("Missing mandatory argument 'ids'") if avoid_datatype_check is False: # we need to check params rt = str(retrieval_type).upper() if rt != 'ALL' and rt not in self.VALID_DATALINK_RETRIEVAL_TYPES: raise ValueError(f"Invalid mandatory argument 'retrieval_type'. Found {retrieval_type}, " f"expected: 'ALL' or any of {self.VALID_DATALINK_RETRIEVAL_TYPES}") params_dict = {} if not valid_data or str(retrieval_type) == 'ALL': params_dict['VALID_DATA'] = "false" elif valid_data: params_dict['VALID_DATA'] = "true" if band is not None: if band != 'G' and band != 'BP' and band != 'RP': raise ValueError("Invalid band value '%s' (Valid values: " + "'G', 'BP' and 'RP)" % band) else: params_dict['BAND'] = band if isinstance(ids, str): ids_arg = ids else: if isinstance(ids, int): ids_arg = str(ids) else: ids_arg = ','.join(str(item) for item in ids) params_dict['ID'] = ids_arg if data_release is not None: params_dict['RELEASE'] = data_release params_dict['DATA_STRUCTURE'] = data_structure params_dict['FORMAT'] = str(format) params_dict['RETRIEVAL_TYPE'] = str(retrieval_type) params_dict['USE_ZIP_ALWAYS'] = 'true' if path != '': try: os.mkdir(path) except FileExistsError: log.error("Path %s already exist" % path) except OSError: log.error("Creation of the directory %s failed" % path) try: self.__gaiadata.load_data(params_dict=params_dict, output_file=output_file, verbose=verbose) files = Gaia.__get_data_files(output_file=output_file, path=path) except Exception as err: raise err finally: if not output_file_specified: shutil.rmtree(path) if verbose: if output_file_specified: log.info("output_file = %s" % output_file) log.debug("List of products available:") # for key, value in files.items(): # print("Product =", key) items = [key for key in files.keys()] items.sort() for item in items: # print(f'* {item}') if verbose: log.debug("Product = " + item) return files
def login(self, user=None, password=None, certificate_file=None): """ login allows user to authenticate to the service. Both user/password and https client certificates are supported. Alternatively, the Cadc class can be instantiated with an authenticated session. Parameters ---------- user : str, required if certificate is None username to login with password : str, required if user is set password to login with certificate : str, required if user is None path to certificate to use with logging in """ # start with a new session if not isinstance(self.cadctap._session, (requests.Session, authsession.AuthSession)): raise TypeError('Cannot login with user provided session that is ' 'not an pyvo.authsession.AuthSession or ' 'requests.Session') if not certificate_file and not (user and password): raise AttributeError('login credentials missing (user/password ' 'or certificate)') if certificate_file: if isinstance(self.cadctap._session, authsession.AuthSession): self.cadctap._session.credentials.\ set_client_certificate(certificate_file) else: # if the session was already used to call CADC, requests caches # it without using the cert. Therefore need to close all # existing https sessions first. https_adapter = self.cadctap._session.adapters['https://'] if https_adapter: https_adapter.close() self.cadctap._session.cert = certificate_file if user and password: login_url = get_access_url(self.CADCLOGIN_SERVICE_URI, 'ivo://ivoa.net/std/UMS#login-0.1') if login_url is None: raise RuntimeError("No login URL") # need to login and get a cookie args = {"username": str(user), "password": str(password)} header = { "Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain" } response = self._request(method='POST', url=login_url, data=args, headers=header, cache=False) try: response.raise_for_status() except Exception as e: log.error('Logging error: {}'.format(e)) raise e # extract cookie cookie = '"{}"'.format(response.text) if cookie is not None: if isinstance(self.cadctap._session, authsession.AuthSession): self.cadctap._session.credentials.set_cookie( CADC_COOKIE_PREFIX, cookie) else: self.cadctap._session.cookies.set(CADC_COOKIE_PREFIX, cookie)
def download_files(self, files, *, savedir=None, cache=True, continuation=True, skip_unauthorized=True, verify_only=False): """ Given a list of file URLs, download them Note: Given a list with repeated URLs, each will only be downloaded once, so the return may have a different length than the input list Parameters ---------- files : list List of URLs to download savedir : None or str The directory to save to. Default is the cache location. cache : bool Cache the download? continuation : bool Attempt to continue where the download left off (if it was broken) skip_unauthorized : bool If you receive "unauthorized" responses for some of the download requests, skip over them. If this is False, an exception will be raised. verify_only : bool Option to go through the process of checking the files to see if they're the right size, but not actually download them. This option may be useful if a previous download run failed partway. """ if self.USERNAME: auth = self._get_auth_info(self.USERNAME) else: auth = None downloaded_files = [] if savedir is None: savedir = self.cache_location for file_link in unique(files): log.debug("Downloading {0} to {1}".format(file_link, savedir)) try: check_filename = self._request('HEAD', file_link, auth=auth) check_filename.raise_for_status() except requests.HTTPError as ex: if ex.response.status_code == 401: if skip_unauthorized: log.info("Access denied to {url}. Skipping to" " next file".format(url=file_link)) continue else: raise(ex) try: filename = re.search("filename=(.*)", check_filename.headers['Content-Disposition']).groups()[0] except KeyError: log.info(f"Unable to find filename for {file_link} " "(missing Content-Disposition in header). " "Skipping to next file.") continue if savedir is not None: filename = os.path.join(savedir, filename) if verify_only: existing_file_length = os.stat(filename).st_size if 'content-length' in check_filename.headers: length = int(check_filename.headers['content-length']) if length == 0: warnings.warn('URL {0} has length=0'.format(url)) elif existing_file_length == length: log.info(f"Found cached file {filename} with expected size {existing_file_length}.") elif existing_file_length < length: log.info(f"Found cached file {filename} with size {existing_file_length} < expected " f"size {length}. The download should be continued.") elif existing_file_length > length: warnings.warn(f"Found cached file {filename} with size {existing_file_length} > expected " f"size {length}. The download is likely corrupted.", CorruptDataWarning) else: warnings.warn(f"Could not verify {url} because it has no 'content-length'") try: if not verify_only: self._download_file(file_link, filename, timeout=self.TIMEOUT, auth=auth, cache=cache, method='GET', head_safe=False, continuation=continuation) downloaded_files.append(filename) except requests.HTTPError as ex: if ex.response.status_code == 401: if skip_unauthorized: log.info("Access denied to {url}. Skipping to" " next file".format(url=file_link)) continue else: raise(ex) elif ex.response.status_code == 403: log.error("Access denied to {url}".format(url=file_link)) if 'dataPortal' in file_link and 'sso' not in file_link: log.error("The URL may be incorrect. Try using " "{0} instead of {1}" .format(file_link.replace('dataPortal/', 'dataPortal/sso/'), file_link)) raise ex elif ex.response.status_code == 500: # empirically, this works the second time most of the time... self._download_file(file_link, filename, timeout=self.TIMEOUT, auth=auth, cache=cache, method='GET', head_safe=False, continuation=continuation) downloaded_files.append(filename) else: raise ex return downloaded_files
def query_async(self, payload, *, public=True, science=True, legacy_columns=False, get_query_payload=None, **kwargs): """ Perform a generic query with user-specified payload Parameters ---------- payload : dictionary Please consult the `help` method public : bool True to return only public datasets, False to return private only, None to return both science : bool True to return only science datasets, False to return only calibration, None to return both legacy_columns : bool True to return the columns from the obsolete ALMA advanced query, otherwise return the current columns based on ObsCore model. Returns ------- Table with results. Columns are those in the ALMA ObsCore model (see ``help_tap``) unless ``legacy_columns`` argument is set to True. """ if payload is None: payload = {} for arg in kwargs: value = kwargs[arg] if 'band_list' == arg and isinstance(value, list): value = ' '.join([str(_) for _ in value]) if arg in payload: payload[arg] = '{} {}'.format(payload[arg], value) else: payload[arg] = value if science is not None: payload['science_observation'] = science if public is not None: payload['public_data'] = public if get_query_payload: return payload query = _gen_sql(payload) result = self.query_tap(query, maxrec=payload.get('maxrec', None)) if result is not None: result = result.to_table() else: # Should not happen raise RuntimeError('BUG: Unexpected result None') if legacy_columns: legacy_result = Table() # add 'Observation date' column for col_name in _OBSCORE_TO_ALMARESULT: if col_name in result.columns: if col_name == 't_min': legacy_result['Observation date'] = \ [Time(_['t_min'], format='mjd').strftime( ALMA_DATE_FORMAT) for _ in result] else: legacy_result[_OBSCORE_TO_ALMARESULT[col_name]] = \ result[col_name] else: log.error("Invalid column mapping in OBSCORE_TO_ALMARESULT: " "{}:{}. Please " "report this as an Issue." .format(col_name, _OBSCORE_TO_ALMARESULT[col_name])) return legacy_result return result
def download_and_extract_files(self, urls, *, delete=True, regex=r'.*\.fits$', include_asdm=False, path='cache_path', verbose=True): """ Given a list of tarball URLs, it extracts all the FITS files (or whatever matches the regex) Parameters ---------- urls : str or list A single URL or a list of URLs include_asdm : bool Only affects cycle 1+ data. If set, the ASDM files will be downloaded in addition to the script and log files. By default, though, this file will be downloaded and deleted without extracting any information: you must change the regex if you want to extract data from an ASDM tarball """ if isinstance(urls, str): urls = [urls] if not isinstance(urls, (list, tuple, np.ndarray)): raise TypeError("Datasets must be given as a list of strings.") filere = re.compile(regex) all_files = [] tar_files = [] expanded_files = [] for url in urls: if url[-4:] != '.tar': raise ValueError("URLs should be links to tarballs.") tarfile_name = os.path.split(url)[-1] if tarfile_name in self._cycle0_tarfile_content['ID']: # It is a cycle 0 file: need to check if it contains FITS match = (self._cycle0_tarfile_content['ID'] == tarfile_name) if not any(re.match(regex, x) for x in self._cycle0_tarfile_content['Files'][match]): log.info("No FITS files found in {0}".format(tarfile_name)) continue else: if 'asdm' in tarfile_name and not include_asdm: log.info("ASDM tarballs do not contain FITS files; " "skipping.") continue tar_file = url.split('/')[-1] files = self.get_data_info(tar_file) if files: expanded_files += [x for x in files['access_url'] if filere.match(x.split('/')[-1])] else: tar_files.append(url) try: # get the tar files downloaded = self.download_files(tar_files, savedir=path) fitsfilelist = self.get_files_from_tarballs(downloaded, regex=regex, path=path, verbose=verbose) if delete: for tarball_name in downloaded: log.info("Deleting {0}".format(tarball_name)) os.remove(tarball_name) all_files += fitsfilelist # download the other files all_files += self.download_files(expanded_files, savedir=path) except requests.ConnectionError as ex: self.partial_file_list = all_files log.error("There was an error downloading the file. " "A partially completed download list is " "in Alma.partial_file_list") raise ex except requests.HTTPError as ex: if ex.response.status_code == 401: log.info("Access denied to {url}. Skipping to" " next file".format(url=url)) else: raise ex return all_files
def load_data(self, ids, data_release=None, data_structure='INDIVIDUAL', retrieval_type="ALL", valid_data=False, band=None, avoid_datatype_check=False, format="votable", output_file=None, overwrite_output_file=False, verbose=False): """Loads the specified table TAP+ only Parameters ---------- ids : str list, mandatory list of identifiers data_release: str, optional, default None data release from which data should be taken. E.g. 'Gaia DR2' By default, it takes the current default one. data_structure: str, optional, default 'INDIVIDUAL' it can be 'INDIVIDUAL', 'COMBINED', 'RAW': 'INDIVIDUAL' means products are provided in separate files for each sourceId. All files are zipped in a single bundle, even if only one source/file is considered 'COMBINED' means products are provided in a single file concatenating the data of all sourceIds together. How this is organised depends on the chosen format 'RAW' means products are provided following a Data Model similar to that used in the MDB, meaning in particular that parameters stored as arrays will remain as such. Like in the COMBINED structure, a single file is provided for the data of all sourceIds together, but in this case there will be always be one row per sourceId retrieval_type : str, optional, default 'ALL' to retrieve all data from the list of sources retrieval type identifier. For GAIA DR2 possible values are ['EPOCH_PHOTOMETRY'] For future GAIA DR3 (Once published), possible values will be ['EPOC_PHOTOMETRY', 'RVS', 'XP_CONTINUOUS', 'XP_SAMPLED', 'MCMC_GSPPHOT' or 'MCMC_MSC'] valid_data : bool, optional, default False By default, the epoch photometry service returns all available data, including data rows where flux is null and/or the rejected_by_photometry flag is set to True. In order to retrieve only valid data (data rows where flux is not null and/or the rejected_by_photometry flag is set to False) this request parameter should be included with valid_data=True. band : str, optional, default None, valid values: G, BP, RP By default, the epoch photometry service returns all the available photometry bands for the requested source. This parameter allows to filter the output lightcurve by its band. avoid_datatype_check: boolean, optional, default False. By default, this value will be set to False. If it is set to 'true' the Datalink items tags will not be checked. format : str, optional, default 'votable' loading format. Other available formats are 'csv', 'ecsv','json','votable_plain' and 'fits' output_file : string, optional, default None file where the results are saved. If it is not provided, the http response contents are returned. overwrite_output_file : boolean, optional, default False To overwrite the output_file if it already exists. verbose : bool, optional, default 'False' flag to display information about the process Returns ------- A table object """ if retrieval_type is None: raise ValueError("Missing mandatory argument 'retrieval_type'") now = datetime.now() now_formatted = now.strftime("%Y%m%d_%H%M%S") temp_dirname = "temp_" + now_formatted downloadname_formated = "download_" + now_formatted output_file_specified = False if output_file is None: output_file = os.path.join(os.getcwd(), temp_dirname, downloadname_formated) else: output_file_specified = True output_file = os.path.abspath(output_file) if not overwrite_output_file and os.path.exists(output_file): raise ValueError( f"{output_file} file already exists. Please use overwrite_output_file='False' to " f"overwrite output file.") path = os.path.dirname(output_file) if ids is None: raise ValueError("Missing mandatory argument 'ids'") if avoid_datatype_check is False: # we need to check params rt = str(retrieval_type).upper() if rt != 'ALL' and rt not in self.VALID_DATALINK_RETRIEVAL_TYPES: raise ValueError( f"Invalid mandatory argument 'retrieval_type'. Found {retrieval_type}, " f"expected: 'ALL' or any of {self.VALID_DATALINK_RETRIEVAL_TYPES}" ) params_dict = {} if not valid_data or str(retrieval_type) == 'ALL': params_dict['VALID_DATA'] = "false" elif valid_data: params_dict['VALID_DATA'] = "true" if band is not None: if band != 'G' and band != 'BP' and band != 'RP': raise ValueError("Invalid band value '%s' (Valid values: " + "'G', 'BP' and 'RP)" % band) else: params_dict['BAND'] = band if isinstance(ids, str): ids_arg = ids else: if isinstance(ids, int): ids_arg = str(ids) else: ids_arg = ','.join(str(item) for item in ids) params_dict['ID'] = ids_arg if data_release is not None: params_dict['RELEASE'] = data_release params_dict['DATA_STRUCTURE'] = data_structure params_dict['FORMAT'] = str(format) params_dict['RETRIEVAL_TYPE'] = str(retrieval_type) params_dict['USE_ZIP_ALWAYS'] = 'true' if path != '': try: os.mkdir(path) except FileExistsError: log.error("Path %s already exist" % path) except OSError: log.error("Creation of the directory %s failed" % path) try: self.__gaiadata.load_data(params_dict=params_dict, output_file=output_file, verbose=verbose) files = Gaia.__get_data_files(output_file=output_file, path=path) except Exception as err: raise err finally: if not output_file_specified: shutil.rmtree(path) if verbose: if output_file_specified: log.info("output_file = %s" % output_file) log.debug("List of products available:") # for key, value in files.items(): # print("Product =", key) items = [key for key in files.keys()] items.sort() for item in items: # print(f'* {item}') if verbose: log.debug("Product = " + item) return files
def get_epic_spectra(self, filename, source_number, *, instrument=[], path="", verbose=False): """Extracts in path (when set) the EPIC sources spectral products from a given TAR file. For a given TAR file obtained with: XMM.download_data(OBS_ID,level="PPS",extension="FTZ",filename=tarfile) This function extracts the EPIC sources spectral products in a given instrument (or instruments) from it The result is a dictionary containing the paths to the extracted EPIC sources spectral products with key being the instrument If the instrument is not specified this function will return all the available instruments Examples: Extracting all bands and instruments: result = XMM.get_epic_spectra(tarfile,83, instrument=['M1','M2','PN']) If we want to retrieve the source spectrum of the instrument PN fits_image = result['PN'] fits_image will be the full path to the extracted FTZ file Parameters ---------- filename : string, mandatory The name of the tarfile to be processed source_number : integer, mandatory The source number, in decimal, in the observation instruments : array of strings, optional, default [] An array of strings indicating the desired instruments path: string, optional If set, extracts the EPIC images in the indicated path verbose : bool optional, default 'False' flag to display information about the process Returns ------- A dictionary with the full paths of the extracted EPIC sources spectral products. The key is the instrument Notes ----- The filenames will contain the source number in hexadecimal, as this is the convention used by the pipeline. The structure and the content of the extracted compressed FITS files are described in details in the Pipeline Products Description [XMM-SOC-GEN-ICD-0024](https://xmm-tools.cosmos.esa.int/external/xmm_obs_info/odf/data/docs/XMM-SOC-GEN-ICD-0024.pdf). """ _instrument = ["M1", "M2", "PN", "EP"] _product_type = ["SRSPEC", "BGSPEC", "SRCARF"] _path = "" ret = None if instrument == []: instrument = _instrument else: for inst in instrument: if inst not in _instrument: log.warning(f"Invalid instrument {inst}") instrument.remove(inst) if path != "" and os.path.exists(path): _path = path try: with tarfile.open(filename, "r") as tar: ret = {} for member in tar.getmembers(): paths = os.path.split(member.name) fname = paths[1] paths = os.path.split(paths[0]) if paths[1] != "pps": continue fname_info = self._parse_filename(fname) if fname_info["X"] != "P": continue if not fname_info["I"] in instrument: continue if not fname_info["T"] in _product_type: continue if int(fname_info["X-"], 16) != source_number: continue tar.extract(member, _path) key = fname_info["I"] path_inst_name = os.path.abspath(os.path.join(_path, member.name)) if fname_info["T"] == "BGSPEC": key = fname_info["I"] + "_bkg" elif fname_info["T"] == "SRCARF": key = fname_info["I"] + "_arf" else: with fits.open(path_inst_name) as hdul: for ext in hdul: if ext.name != "SPECTRUM": continue rmf_fname = ext.header["RESPFILE"] if fname_info["I"] == "M1" or fname_info["I"] == "M2": inst = "MOS/" + str(ext.header["SPECDELT"]) + "eV/" elif fname_info["I"] == "PN": inst = "PN/" file_name, file_ext = os.path.splitext(rmf_fname) rmf_fname = file_name + "_v18.0" + file_ext link = self._rmf_ftp + inst + rmf_fname if verbose: log.info("rmf link is: %s" % link) response = self._request('GET', link) rsp_filename = os.path.join(_path, paths[0], paths[1], ext.header["RESPFILE"]) with open(rsp_filename, 'wb') as f: f.write(response.content) ret[fname_info["I"] + "_rmf"] = rsp_filename if ret.get(key) and type(ret.get(key)) == str: log.warning("More than one file found with the instrument: %s" % key) ret[key] = [ret[key], path_inst_name] elif ret.get(key) and type(ret.get(key)) == list: ret[key].append(path_inst_name) else: ret[key] = path_inst_name except FileNotFoundError: log.error("File %s not found" % (filename)) return if not ret: log.info("Nothing to extract with the given parameters:\n" " PPS: %s\n" " Source Number: %u\n" " Instrument: %s\n" % (filename, source_number, instrument)) return ret