def fetch_field(self, sources, scale=1.25): """Fetch catalog sources for this field and save to database. Search radius and center are derived from the source list. Parameters ---------- sources : SkyCoord Sources to be matched. scale : float, optional Search radius scale factor. """ sr = max((sources.separation(c).max() for c in sources)) * scale / 2 self.logger.debug(('Fetching SkyMapper catalog from ASVO over {:.2g}' ' field-of-view.').format(sr)) q = ''' SELECT TOP {max} {columns} FROM dr{dr}.master WHERE 1=CONTAINS(POINT('ICRS', raj2000, dej2000), CIRCLE('ICRS', {ra}, {dec}, {sr})) ORDER BY ngood DESC '''.format(dr=self.dr, max=self.max_records, columns=','.join(self.table.columns), ra=np.mean(sources.ra.deg), dec=np.mean(sources.dec.deg), sr=sr.deg) # self.logger.debug(q) skym = Tap(url='https://api.skymapper.nci.org.au/public/tap/') job = skym.launch_job(q) tab = job.get_results() self.logger.debug('Updating {} with {} sources.'.format( self.table.name, len(tab))) self.db.executemany( ''' INSERT OR IGNORE INTO {} VALUES({}) '''.format(self.table.name, ','.join('?' * len(self.table.columns))), self._masked_to_null(tab)) self.db.commit()
def __init__(self, tap_handler=None): super().__init__() if tap_handler is None: self._tap = Tap(url=self.metadata_url) else: self._tap = tap_handler
import numpy, os if not os.path.isdir('data'): os.mkdir('data') # this is the basic interface for querying the Gaia archive from astroquery.utils.tap.core import Tap gaia = Tap(url="https://gea.esac.esa.int/tap-server/tap") # parallax zero-point correction from Lindegren+2020 try: from zero_point import zpt zpt.load_tables() except Exception as ex: print("Parallax zero-point correction not available: " + str(ex)) zpt = None def retrieve(ra, dec, radius, filename, parallax0): """ Query the Gaia archive for all sources within a certain radius from the given point, which have parallax below the given limit (within 3 sigma), and save the result as a numpy zip archive. """ job = gaia.launch_job( "select top 999999 " + "ra, dec, parallax, parallax_error, pmra, pmra_error, pmdec, pmdec_error, pmra_pmdec_corr, " + "phot_g_mean_mag, bp_rp, ruwe, astrometric_excess_noise, phot_bp_rp_excess_factor, " + "nu_eff_used_in_astrometry, pseudocolour, ecl_lat, astrometric_params_solved " + "FROM gaiaedr3.gaia_source WHERE " +
class HSAClass(BaseQuery): data_url = conf.DATA_ACTION metadata_url = conf.METADATA_ACTION timeout = conf.TIMEOUT def __init__(self, tap_handler=None): super().__init__() if tap_handler is None: self._tap = Tap(url=self.metadata_url) else: self._tap = tap_handler def download_data(self, *, retrieval_type="OBSERVATION", observation_id=None, instrument_name=None, filename=None, observation_oid=None, instrument_oid=None, product_level=None, verbose=False, download_dir="", cache=True, **kwargs): """ Download data from Herschel Parameters ---------- observation_id : string, optional id of the observation to be downloaded The identifies of the observation we want to retrieve, 10 digits example: 1342195355 retrieval_type : string, optional, default 'OBSERVATION' The type of product that we want to retrieve values: OBSERVATION, PRODUCT, POSTCARD, POSTCARDFITS, REQUESTFILE_XML, STANDALONE, UPDP, HPDP instrument_name : string, optional, default 'PACS' values: PACS, SPIRE, HIFI The instrument name, by default 'PACS' if the retrieval_type is 'OBSERVATION' filename : string, optional, default None If the filename is not set it will use the observation_id as filename file name to be used to store the file verbose : bool, optional, default False flag to display information about the process observation_oid : string, optional Observation internal identifies. This is the database identifier instrument_oid : string, optional The database identifies of the instrument values: 1, 2, 3 product_level : string, optional level to download values: ALL, AUXILIARY, CALIBRATION, LEVEL0, LEVEL0_5, LEVEL1, LEVEL2, LEVEL2_5, LEVEL3, ALL-LEVEL3 download_dir : string, optional The directory in which the file will be downloaded Returns ------- File name of downloaded data """ if filename is not None: filename = os.path.splitext(filename)[0] params = {'retrieval_type': retrieval_type} if observation_id is not None: params['observation_id'] = observation_id if retrieval_type == "OBSERVATION" and instrument_name is None: instrument_name = "PACS" if instrument_name is not None: params['instrument_name'] = instrument_name if observation_oid is not None: params['observation_oid'] = observation_oid if instrument_oid is not None: params['instrument_oid'] = instrument_oid if product_level is not None: params['product_level'] = product_level link = self.data_url + "".join(f"&{key}={val}" for key, val in params.items()) link += "".join(f"&{key}={val}" for key, val in kwargs.items()) if verbose: log.info(link) response = self._request('HEAD', link, save=False, cache=cache) if response.status_code == 401: error = "Data protected by proprietary rights. Please check your credentials" raise LoginError(error) response.raise_for_status() if filename is None: if observation_id is not None: filename = observation_id else: error = "Please set either 'obervation_id' or 'filename' for the output" raise ValueError(error) _, res_params = cgi.parse_header( response.headers['Content-Disposition']) r_filename = res_params["filename"] suffixes = Path(r_filename).suffixes if len(suffixes) > 1 and suffixes[-1] == ".jpg": filename += suffixes[-1] else: filename += "".join(suffixes) filename = os.path.join(download_dir, filename) self._download_file(link, filename, head_safe=True, cache=cache) if verbose: log.info(f"Wrote {link} to {filename}") return filename def get_observation(self, observation_id, instrument_name, *, filename=None, observation_oid=None, instrument_oid=None, product_level=None, verbose=False, download_dir="", cache=True, **kwargs): """ Download observation from Herschel. This consists of a .tar file containing: - The auxiliary directory: contains all Herschel non-science spacecraft data - The calibarion directory: contains the uplink and downlink calibration products - <obs_id> directory: contains the science data distributed in sub-directories called level0/0.5/1/2/2.5/3. More information can be found here: https://www.cosmos.esa.int/web/herschel/data-products-overview Parameters ---------- observation_id : string id of the observation to be downloaded The identifies of the observation we want to retrieve, 10 digits example: 1342195355 instrument_name : string The instrument name values: PACS, SPIRE, HIFI filename : string, optional, default None If the filename is not set it will use the observation_id as filename file name to be used to store the file verbose : bool, optional, default 'False' flag to display information about the process observation_oid : string, optional Observation internal identifies. This is the database identifier istrument_oid : string, optional The database identifies of the instrument values: 1, 2, 3 product_level : string, optional level to download values: ALL, AUXILIARY, CALIBRATION, LEVEL0, LEVEL0_5, LEVEL1, LEVEL2, LEVEL2_5, LEVEL3, ALL-LEVEL3 download_dir : string, optional The directory in which the file will be downloaded Returns ------- File name of downloaded data """ if filename is not None: filename = os.path.splitext(filename)[0] params = { 'retrieval_type': "OBSERVATION", 'observation_id': observation_id, 'instrument_name': instrument_name } if observation_oid is not None: params['observation_oid'] = observation_oid if instrument_oid is not None: params['instrument_oid'] = instrument_oid if product_level is not None: params['product_level'] = product_level link = self.data_url + "".join(f"&{key}={val}" for key, val in params.items()) link += "".join(f"&{key}={val}" for key, val in kwargs.items()) if verbose: log.info(link) response = self._request('HEAD', link, save=False, cache=cache) if response.status_code == 401: error = "Data protected by proprietary rights. Please check your credentials" raise LoginError(error) response.raise_for_status() _, res_params = cgi.parse_header( response.headers['Content-Disposition']) r_filename = res_params["filename"] suffixes = Path(r_filename).suffixes if filename is None: filename = observation_id filename += "".join(suffixes) filename = os.path.join(download_dir, filename) self._download_file(link, filename, head_safe=True, cache=cache) if verbose: log.info(f"Wrote {link} to {filename}") return filename def get_postcard(self, observation_id, instrument_name, *, filename=None, verbose=False, download_dir="", cache=True, **kwargs): """ Download postcard from Herschel Parameters ---------- observation_id : string id of the observation to be downloaded The identifies of the observation we want to retrieve, 10 digits example: 1342195355 instrument_name : string The instrument name values: PACS, SPIRE, HIFI filename : string, optional, default None If the filename is not set it will use the observation_id as filename file name to be used to store the file verbose : bool, optional, default False flag to display information about the process observation_oid : string, optional Observation internal identifies. This is the database identifier istrument_oid : string, optional The database identifies of the instrument values: 1, 2, 3 product_level : string, optional level to download values: ALL, AUXILIARY, CALIBRATION, LEVEL0, LEVEL0_5, LEVEL1, LEVEL2, LEVEL2_5, LEVEL3, ALL-LEVEL3 postcard_single : string, optional 'true' to retrieve one single postcard (main one) values: true, false download_dir : string, optional The directory in which the file will be downloaded Returns ------- File name of downloaded data """ if filename is not None: filename = os.path.splitext(filename)[0] params = { 'retrieval_type': "POSTCARD", 'observation_id': observation_id, 'instrument_name': instrument_name } link = self.data_url + "".join(f"&{key}={val}" for key, val in params.items()) link += "".join(f"&{key}={val}" for key, val in kwargs.items()) if verbose: log.info(link) response = self._request('HEAD', link, save=False, cache=cache) response.raise_for_status() local_filepath = self._request('GET', link, cache=True, save=True) original_filename = re.findall( 'filename="(.+)"', response.headers["Content-Disposition"])[0] _, ext = os.path.splitext(original_filename) if filename is None: filename = observation_id filename += ext filename = os.path.join(download_dir, filename) shutil.move(local_filepath, filename) if verbose: log.info(f"Wrote {link} to {filename}") return filename def query_hsa_tap(self, query, *, output_file=None, output_format="votable", verbose=False): """ Launches a synchronous job to query HSA Tabular Access Protocol (TAP) Service Parameters ---------- query : string query (adql) to be executed output_file : string, optional, default None file name where the results are saved if dumpToFile is True. If this parameter is not provided, the jobid is used instead output_format : string, optional, default 'votable' values 'votable' or 'csv' verbose : bool, optional, default 'False' flag to display information about the process Returns ------- A table object """ job = self._tap.launch_job(query=query, output_file=output_file, output_format=output_format, verbose=verbose, dump_to_file=output_file is not None) table = job.get_results() return table def get_tables(self, *, only_names=True, verbose=False): """ Get the available table in HSA TAP service Parameters ---------- only_names : bool, optional, default True True to load table names only verbose : bool, optional, default False flag to display information about the process Returns ------- A list of tables """ tables = self._tap.load_tables(verbose=verbose) if only_names: return [t.name for t in tables] else: return tables def get_columns(self, table_name, *, only_names=True, verbose=False): """ Get the available columns for a table in HSA TAP service Parameters ---------- table_name : string table name of which, columns will be returned only_names : bool, optional, default True True to load column names only verbose : bool, optional, default False flag to display information about the process Returns ------- A list of columns """ tables = self._tap.load_tables(verbose=verbose) columns = None for t in tables: if str(t.name) == str(table_name): columns = t.columns break if columns is None: raise ValueError("table name specified was not found in " "HSA TAP service") if only_names: return [c.name for c in columns] else: return columns def query_observations(self, coordinate, radius, *, n_obs=10, **kwargs): """ Get the observation IDs from a given region Parameters ---------- coordinate : string / `astropy.coordinates` the identifier or coordinates around which to query radius : int / `~astropy.units.Quantity` the radius of the region n_obs : int, optional the number of observations kwargs : dict passed to `query_hsa_tap` Returns ------- A table object with the list of observations in the region """ return self.query_region(coordinate, radius, n_obs=n_obs, columns="observation_id", **kwargs) def query_region(self, coordinate, radius, *, n_obs=10, columns='*', **kwargs): """ Get the observation metadata from a given region Parameters ---------- coordinate : string / `astropy.coordinates` the identifier or coordinates around which to query radius : int / `~astropy.units.Quantity` the radius of the region n_obs : int, optional the number of observations columns : str, optional the columns to retrieve from the data table kwargs : dict passed to `query_hsa_tap` Returns ------- A table object with the list of observations in the region """ r = radius if not isinstance(radius, u.Quantity): r = radius * u.deg coord = commons.parse_coordinates(coordinate).icrs query = ( f"select top {n_obs} {columns} from hsa.v_active_observation " f"where contains(" f"point('ICRS', hsa.v_active_observation.ra, hsa.v_active_observation.dec), " f"circle('ICRS', {coord.ra.degree},{coord.dec.degree},{r.to(u.deg).value}))=1" ) return self.query_hsa_tap(query, **kwargs)
def main( args: Optional[list] = None, opts: Optional[argparse.Namespace] = None ): """Script Function. Retrieve the data from the Gaia archive (all sources satisfying the maximum distance from cluster center and a simple parallax cut). Source data for each cluster is stored in a separate numpy zip file: "data/[cluster_name].npz". Additionally, the table for computing the renormalized unit weight error (an astrometric quality flag) is retrieved from the Gaia website and stored in "DR2_RUWE_V1/table_u0_2D.txt". DEPENDENCIES: numpy, scipy, astropy, astroquery (astropy-affiliated package). RESOURCES: run time: a few minutes (depending on internet speed); disk space: a few tens of megabytes to store the downloaded data. Parameters ---------- args : list, optional an optional single argument that holds the sys.argv list, except for the script name (e.g., argv[1:]) opts : Namespace, optional pre-constructed results of parsed args if not None, used ONLY if args is None """ if opts is not None and args is None: pass else: if opts is not None: warnings.warn("Not using `opts` because `args` are given") parser = make_parser() opts = parser.parse_args(args) if not os.path.isdir(DATA): os.mkdir(DATA) # download the file with renormalized unit weight error correction tables from the Gaia website if not os.path.isdir(DATA + "DR2_RUWE_V1"): os.mkdir(DATA + "DR2_RUWE_V1") ruwefile = DATA + "DR2_RUWE_V1/table_u0_2D.txt" if not os.path.isfile(ruwefile): subprocess.call( ( # no , b/c combine into 1 string "curl https://www.cosmos.esa.int/documents/29201/1769576/" "DR2_RUWE_V1.zip/d90f37a8-37c9-81ba-bf59-dd29d9b1438f" " > temp.zip" ), shell=True, ) subprocess.call( "unzip temp.zip DR2_RUWE_V1/table_u0_2D.txt", shell=True ) os.remove("temp.zip") os.rename("DR2_RUWE_V1/table_u0_2D.txt", ruwefile) shutil.rmtree("DR2_RUWE_V1") if not os.path.isdir(DATA + "gczs/"): os.mkdir(DATA + "gczs/") # construct interpolator for renorm unit weight error correction table rtab = np.loadtxt(ruwefile, delimiter=",", skiprows=1) # correction factor as a function of g_mag and bp_rp rint = scipy.interpolate.RectBivariateSpline( x=rtab[:, 0], y=np.linspace(-1.0, 10.0, 111), z=rtab[:, 2:], kx=1, ky=1 ) # correction factor in case of no bp/rp, as a function of g_mag only rint0 = scipy.interpolate.UnivariateSpline( x=rtab[:, 0], y=rtab[:, 1], k=1, s=0 ) gaia = Tap(url="https://gea.esac.esa.int/tap-server/tap") # read the list of clusters and query the Gaia archive for each of them lst = np.genfromtxt(DATA + "input.txt", dtype=str) for l in tqdm.tqdm(lst): filename = DATA + "gczs/" + l[0] + '.npz' if not os.path.isfile(filename): retrieve( gaia=gaia, rint=rint, rint0=rint0, ra=float(l[1]), dec=float(l[2]), radius=float(l[7]) / 60, # convert from arcmin to degrees filename=filename, parallax_limit=1.0 / float(l[3]), ) return