def fetch_sdss_filter(fname, data_home=None, download_if_missing=True): """Loader for SDSS Filter profiles Parameters ---------- fname : str filter name: must be one of 'ugriz' data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : ndarray data is an array of shape (5, Nlam) first row: wavelength in angstroms second row: sensitivity to point source, airmass 1.3 third row: sensitivity to extended source, airmass 1.3 fourth row: sensitivity to extended source, airmass 0.0 fifth row: assumed atmospheric extinction, airmass 1.0 """ if fname not in 'ugriz': raise ValueError("Unrecognized filter name '%s'" % fname) url = FILTER_URL % fname data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_file = os.path.join(data_home, '%s.dat' % fname) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print "downloading from %s" % url F = urllib2.urlopen(url) open(archive_file, 'w').write(F.read()) F = open(archive_file) return np.loadtxt(F, unpack=True)
def fetch_sdss_filter(fname, data_home=None, download_if_missing=True): """Loader for SDSS Filter profiles Parameters ---------- fname : str filter name: must be one of 'ugriz' data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : ndarray data is an array of shape (5, Nlam) first row: wavelength in angstroms second row: sensitivity to point source, airmass 1.3 third row: sensitivity to extended source, airmass 1.3 fourth row: sensitivity to extended source, airmass 0.0 fifth row: assumed atmospheric extinction, airmass 1.0 """ if fname not in 'ugriz': raise ValueError("Unrecognized filter name '%s'" % fname) url = FILTER_URL % fname data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_file = os.path.join(data_home, '%s.dat' % fname) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print("downloading from %s" % url) F = urlopen(url) open(archive_file, 'wb').write(F.read()) F = open(archive_file) return np.loadtxt(F, unpack=True)
def fetch_rrlyrae_mags(data_home=None, download_if_missing=True): """Loader for RR-Lyrae data Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : recarray, shape = (483,) record array containing imaging data Examples -------- >>> from astroML.datasets import fetch_rrlyrae_mags >>> data = fetch_rrlyrae_mags() # doctest: +IGNORE_OUTPUT >>> data.shape # number of objects in dataset (483,) Notes ----- This data is from table 1 of Sesar et al 2010 ApJ 708:717 """ # fits is an optional dependency: don't import globally from astropy.io import fits data_home = get_data_home(data_home) archive_file = os.path.join(data_home, os.path.basename(DATA_URL)) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') fitsdata = download_with_progress_bar(DATA_URL) open(archive_file, 'wb').write(fitsdata) hdulist = fits.open(archive_file) return np.asarray(hdulist[1].data)
def fetch_vega_spectrum(data_home=None, download_if_missing=True): """Loader for Vega reference spectrum Parameters ---------- fname : str filter name: must be one of 'ugriz' data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : ndarray data[0] is the array of wavelength in angstroms data[1] is the array of fluxes in Jy (F_nu, not F_lambda) """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_name = os.path.join(data_home, VEGA_URL.split('/')[-1]) if not os.path.exists(archive_name): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print "downnloading from %s" % VEGA_URL F = urllib2.urlopen(VEGA_URL) open(archive_name, 'w').write(F.read()) F = open(archive_name) return np.loadtxt(F, unpack=True)
def fetch_vega_spectrum(data_home=None, download_if_missing=True): """Loader for Vega reference spectrum Parameters ---------- fname : str filter name: must be one of 'ugriz' data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : ndarray data[0] is the array of wavelength in angstroms data[1] is the array of fluxes in Jy (F_nu, not F_lambda) """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_name = os.path.join(data_home, VEGA_URL.split('/')[-1]) if not os.path.exists(archive_name): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print("downnloading from %s" % VEGA_URL) F = urlopen(VEGA_URL) open(archive_name, 'wb').write(F.read()) F = open(archive_name, 'r') return np.loadtxt(F, unpack=True)
def fetch_sdss_S82standards(data_home=None, download_if_missing=True, crossmatch_2mass=False): """Loader for SDSS stripe82 standard star catalog Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : bool, optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. crossmatch_2mass: bool, optional, default=False If True, return the standard star catalog cross-matched with 2mass magnitudes Returns ------- data : ndarray, shape = (313859,) record array containing sdss standard stars (see notes below) Notes ----- Information on the data can be found at http://www.astro.washington.edu/users/ivezic/sdss/catalogs/stripe82.html Data is described in Ivezic et al. 2007 (Astronomical Journal, 134, 973). Columns are as follows: RA Right-ascention of source (degrees) DEC Declination of source (degrees) RArms rms of right-ascention (arcsec) DECrms rms of declination (arcsec) Ntot total number of epochs A_r SFD ISM extinction (mags) for each band in (u g r i z): Nobs_<band> number of observations in this band mmed_<band> median magnitude in this band mmu_<band> mean magnitude in this band msig_<band> standard error on the mean (1.25 times larger for median) mrms_<band> root-mean-square scatter mchi2_<band> chi2 per degree of freedom for mean magnitude For 2-MASS, the following columns are added: ra2MASS 2-mass right-ascention dec2MASS 2-mass declination J J-band magnitude Jerr J-band error H H-band magnitude Herr H-band error K K-band magnitude Kerr K-band error theta difference between SDSS and 2MASS position (arcsec) Examples -------- >>> data = fetch_sdss_S82standards() # doctest: +IGNORE_OUTPUT >>> u_g = data['mmed_u'] - data['mmed_g'] >>> print(u_g[:4]) [-22.23500061 1.34900093 1.43799973 2.08200073] References ---------- Ivesic et al. ApJ 134:973 (2007) """ data_home = get_data_home(data_home) if crossmatch_2mass: archive_file = os.path.join(data_home, ARCHIVE_FILE_2MASS) data_url = DATA_URL_2MASS kwargs = dict(dtype=DTYPE_2MASS) else: archive_file = os.path.join(data_home, ARCHIVE_FILE) data_url = DATA_URL kwargs = dict(usecols=COLUMNS, dtype=DTYPE) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print("downloading cross-matched SDSS/2MASS dataset from %s to %s" % (data_url, data_home)) zipped_buf = download_with_progress_bar(data_url, return_buffer=True) gzf = GzipFile(fileobj=zipped_buf, mode='rb') print("uncompressing file...") extracted_buf = BytesIO(gzf.read()) data = np.loadtxt(extracted_buf, **kwargs) np.save(archive_file, data) else: data = np.load(archive_file) return data
def fetch_sdss_sspp(data_home=None, download_if_missing=True, cleaned=False): """Loader for SDSS SEGUE Stellar Parameter Pipeline data Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : bool (optional) default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. cleaned : bool (optional) default=False if True, then return a cleaned catalog where objects with extreme values are removed. Returns ------- data : recarray, shape = (327260,) record array containing pipeline parameters Notes ----- Here are the comments from the fits file header: Imaging data and spectrum identifiers for a sample of 327,260 stars with SDSS spectra, selected as: 1) available SSPP parameters in SDSS Data Release 9 (SSPP rerun 122, file from Y.S. Lee) 2) 14 < r < 21 (psf magnitudes, uncorrected for ISM extinction) 3) 10 < u < 25 & 10 < z < 25 (same as above) 4) errors in ugriz well measured (>0) and <10 5) 0 < u-g < 3 (all color cuts based on psf mags, dereddened) 6) -0.5 < g-r < 1.5 & -0.5 < r-i < 1.0 & -0.5 < i-z < 1.0 7) -200 < pmL < 200 & -200 < pmB < 200 (proper motion in mas/yr) 8) pmErr < 10 mas/yr (proper motion error) 9) 1 < log(g) < 5 10) TeffErr < 300 K Teff and TeffErr are given in Kelvin, radVel and radVelErr in km/s. (ZI, Feb 2012, [email protected]) Examples -------- >>> from astroML.datasets import fetch_sdss_sspp >>> data = fetch_sdss_sspp() # doctest: +IGNORE_OUTPUT >>> data.shape # number of objects in dataset (327260,) >>> print(data.dtype.names[:5]) # names of the first five columns ('ra', 'dec', 'Ar', 'upsf', 'uErr') >>> print(data['ra'][:1]) # first RA value [49.6275024] >>> print(data['dec'][:1]) # first DEC value [-1.04175591] """ # fits is an optional dependency: don't import globally from astropy.io import fits data_home = get_data_home(data_home) archive_file = os.path.join(data_home, os.path.basename(DATA_URL)) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') fitsdata = download_with_progress_bar(DATA_URL) open(archive_file, 'wb').write(fitsdata) hdulist = fits.open(archive_file) data = np.asarray(hdulist[1].data) if cleaned: # -1.1 < FeH < 0.1 data = data[(data['FeH'] > -1.1) & (data['FeH'] < 0.1)] # -0.03 < alpha/Fe < 0.57 data = data[(data['alphFe'] > -0.03) & (data['alphFe'] < 0.57)] # 5000 < Teff < 6500 data = data[(data['Teff'] > 5000) & (data['Teff'] < 6500)] # 3.5 < log(g) < 5 data = data[(data['logg'] > 3.5) & (data['logg'] < 5)] # 0 < error for FeH < 0.1 data = data[(data['FeHErr'] > 0) & (data['FeHErr'] < 0.1)] # 0 < error for alpha/Fe < 0.05 data = data[(data['alphFeErr'] > 0) & (data['alphFeErr'] < 0.05)] # 15 < g mag < 18 data = data[(data['gpsf'] > 15) & (data['gpsf'] < 18)] # abs(radVel) < 100 km/s data = data[(abs(data['radVel']) < 100)] return data