def load_astro_dataset(): """ The BUPA dataset can be obtained from http://www.cs.huji.ac.il/~shais/datasets/ClassificationDatasets.html See description of this dataset at http://www.cs.huji.ac.il/~shais/datasets/bupa/bupa.names """ quasars = fetch_dr7_quasar() stars = fetch_sdss_sspp() quasars = quasars[::5] stars = stars[::5] Nqso = len(quasars) #print 'Numero quasars: ',Nqso Nstars = len(stars) #print 'Numero estrellas: ',Nstars X = empty((Nqso + Nstars, 4), dtype=float) X[:Nqso, 0] = quasars['mag_u'] - quasars['mag_g'] X[:Nqso, 1] = quasars['mag_g'] - quasars['mag_r'] X[:Nqso, 2] = quasars['mag_r'] - quasars['mag_i'] X[:Nqso, 3] = quasars['mag_i'] - quasars['mag_z'] X[Nqso:, 0] = stars['upsf'] - stars['gpsf'] X[Nqso:, 1] = stars['gpsf'] - stars['rpsf'] X[Nqso:, 2] = stars['rpsf'] - stars['ipsf'] X[Nqso:, 3] = stars['ipsf'] - stars['zpsf'] y = zeros(Nqso + Nstars, dtype=int) y[:Nqso] = 1 y[y == 0] = -1 #print 'Salida', y stars = map(tuple, stars) quasars = map(tuple, quasars) stars = array(stars) quasars = array(quasars) #print "Tamano Astro: ", len(X) return X, y
------------------------------------------ This example shows how to fetch the data from the Segue Stellar Parameter Pipeline (SSPP), and plot the temperature and surface gravity of a selection of stars. """ # Author: Jake VanderPlas <*****@*****.**> # License: BSD # The figure produced by this code is published in the textbook # "Statistics, Data Mining, and Machine Learning in Astronomy" (2013) # For more information, see http://astroML.github.com from matplotlib import pyplot as plt from astroML.datasets import fetch_sdss_sspp #------------------------------------------------------------ # Fetch the data data = fetch_sdss_sspp() # select the first 10000 points data = data[:10000] # do some reasonable magnitude cuts rpsf = data['rpsf'] data = data[(rpsf > 15) & (rpsf < 19)] # get the desired data logg = data['logg'] Teff = data['Teff'] #------------------------------------------------------------ # Plot the data ax = plt.axes()
# -*- coding: utf-8 -*- """ Created on Tue Aug 7 15:30:22 2018 @author: zyv57124 """ import numpy as np from astroML.datasets import fetch_LINEAR_geneva from astroML.datasets import fetch_dr7_quasar from astroML.datasets import fetch_sdss_sspp quasars = fetch_dr7_quasar() stars = fetch_sdss_sspp() np.save('quasars.npy', quasars) np.save('stars.npy', stars)
from astroML.datasets import fetch_sdss_sspp from astroML.decorators import pickle_results from astroML.plotting.tools import draw_ellipse #---------------------------------------------------------------------- # This function adjusts matplotlib settings for a uniform feel in the textbook. # Note that with usetex=True, fonts are rendered with LaTeX. This may # result in an error if LaTeX is not installed on your system. In that case, # you can set usetex to False. from astroML.plotting import setup_text_plots setup_text_plots(fontsize=8, usetex=True) #------------------------------------------------------------ # Get the Segue Stellar Parameters Pipeline data data = fetch_sdss_sspp(cleaned=True) X = np.vstack([data['FeH'], data['alphFe']]).T # truncate dataset for speed X = X[::5] #------------------------------------------------------------ # Compute GMM models & AIC/BIC N = np.arange(1, 14) @pickle_results("GMM_metallicity.pkl") def compute_GMM(N, covariance_type='full', n_iter=1000): models = [None for n in N] for i in range(len(N)): print N[i]
#---------------------------------------------------------------------- # This function adjusts matplotlib settings for a uniform feel in the textbook. # Note that with usetex=True, fonts are rendered with LaTeX. This may # result in an error if LaTeX is not installed on your system. In that case, # you can set usetex to False. from astroML.plotting import setup_text_plots setup_text_plots(fontsize=8, usetex=True) #------------------------------------------------------------ # Fetch data and split into training and test samples from astroML.datasets import fetch_dr7_quasar from astroML.datasets import fetch_sdss_sspp quasars = fetch_dr7_quasar() stars = fetch_sdss_sspp() # Truncate data for speed quasars = quasars[::5] stars = stars[::5] # stack colors into matrix X Nqso = len(quasars) Nstars = len(stars) X = np.empty((Nqso + Nstars, 4), dtype=float) X[:Nqso, 0] = quasars['mag_u'] - quasars['mag_g'] X[:Nqso, 1] = quasars['mag_g'] - quasars['mag_r'] X[:Nqso, 2] = quasars['mag_r'] - quasars['mag_i'] X[:Nqso, 3] = quasars['mag_i'] - quasars['mag_z']
from sklearn import preprocessing from astroML.datasets import fetch_sdss_sspp #---------------------------------------------------------------------- # This function adjusts matplotlib settings for a uniform feel in the textbook. # Note that with usetex=True, fonts are rendered with LaTeX. This may # result in an error if LaTeX is not installed on your system. In that case, # you can set usetex to False. from astroML.plotting import setup_text_plots setup_text_plots(fontsize=8, usetex=True) #------------------------------------------------------------ # Get the data np.random.seed(0) data = fetch_sdss_sspp(cleaned=True) # cut out some additional strange outliers data = data[~((data['alphFe'] > 0.4) & (data['FeH'] > -0.3))] X = np.vstack([data['FeH'], data['alphFe']]).T #---------------------------------------------------------------------- # Compute clustering with MeanShift # # We'll work with the scaled data, because MeanShift finds circular clusters X_scaled = preprocessing.scale(X) # The following bandwidth can be automatically detected using # the routine estimate_bandwidth(). Because bandwidth estimation