# License: BSD # The figure produced by this code is published in the textbook # "Statistics, Data Mining, and Machine Learning in Astronomy" (2013) # For more information, see http://astroML.github.com import numpy as np from matplotlib import pyplot as plt from sklearn import manifold, neighbors from astroML.datasets import sdss_corrected_spectra from astroML.datasets import fetch_sdss_corrected_spectra from astroML.decorators import pickle_results from astroML.plotting.tools import discretize_cmap #------------------------------------------------------------ # Fetch the data data = fetch_sdss_corrected_spectra() spec = sdss_corrected_spectra.reconstruct_spectra(data) color = data['lineindex_cln'] #------------------------------------------------------------ # Compute the LLE projection; save the results @pickle_results("spec_LLE.pkl") def compute_spec_LLE(n_neighbors=10, out_dim=3): # Compute the LLE projection LLE = manifold.LocallyLinearEmbedding(n_neighbors, out_dim, method='modified', eigen_solver='dense') Y_LLE = LLE.fit_transform(spec) print " - finished LLE projection"
# https://groups.google.com/forum/#!forum/astroml-general import numpy as np from matplotlib import pyplot as plt from astroML import datasets #---------------------------------------------------------------------- # This function adjusts matplotlib settings for a uniform feel in the textbook. # Note that with usetex=True, fonts are rendered with LaTeX. This may # result in an error if LaTeX is not installed on your system. In that case, # you can set usetex to False. from astroML.plotting import setup_text_plots setup_text_plots(fontsize=8, usetex=True) #------------------------------------------------------------ # load data: data = datasets.fetch_sdss_corrected_spectra() spectra = datasets.sdss_corrected_spectra.reconstruct_spectra(data) # Eigenvalues can be computed using PCA as in the commented code below: #from sklearn.decomposition import PCA #pca = PCA() #pca.fit(spectra) #evals = pca.explained_variance_ratio_ #evals_cs = evals.cumsum() # because the spectra have been reconstructed from masked values, this # is not exactly correct in this case: we'll use the values computed # in the file compute_sdss_pca.py evals = data['evals']**2 evals_cs = evals.cumsum()
# https://groups.google.com/forum/#!forum/astroml-general import numpy as np from matplotlib import pyplot as plt from astroML import datasets #---------------------------------------------------------------------- # This function adjusts matplotlib settings for a uniform feel in the textbook. # Note that with usetex=True, fonts are rendered with LaTeX. This may # result in an error if LaTeX is not installed on your system. In that case, # you can set usetex to False. from astroML.plotting import setup_text_plots setup_text_plots(fontsize=8, usetex=True) #------------------------------------------------------------ # load data: data = datasets.fetch_sdss_corrected_spectra() spectra = datasets.sdss_corrected_spectra.reconstruct_spectra(data) # Eigenvalues can be computed using PCA as in the commented code below: #from sklearn.decomposition import PCA #pca = PCA() #pca.fit(spectra) #evals = pca.explained_variance_ratio_ #evals_cs = evals.cumsum() # because the spectra have been reconstructed from masked values, this # is not exactly correct in this case: we'll use the values computed # in the file compute_sdss_pca.py evals = data['evals'] ** 2 evals_cs = evals.cumsum()
digits_N = np.vstack([ digits.images[:, 2:4, 2:4], digits.images[:, 2:4, 4:6], digits.images[:, 4:6, 2:4], digits.images[:, 4:6, 4:6], digits.images[:, 4:6, 5:7], digits.images[:, 5:7, 4:6] ]) digits_N = digits_N.reshape((-1, 4))[:10000] # For the dimensionality test, we need up to 128 dimesnions, so # we'll combine some of the images. digits_D = np.hstack( (digits.data, np.vstack((digits.data[:1000], digits.data[1000:])))) # The edge pixels are all basically zero. For the dimensionality tests # to be reasonable, we want the low-dimension case to probe interir pixels digits_D = np.hstack([digits_D[:, 28:], digits_D[:, :28]]) spectra = fetch_sdss_corrected_spectra()['spectra'] # Take sections of spectra and stack them to reach N=10000 samples spectra_N = np.vstack( [spectra[:, 500:504], spectra[:, 504:508], spectra[:2000, 508:512]]) # Take a central region of the spectra for the dimensionality study spectra_D = spectra[:1797, 400:528] titles = ['Uniform', 'Digits', 'Spectra'] datasets_D = [uniform_D, digits_D, spectra_D] datasets_N = [uniform_N, digits_N, spectra_N] fig, ax = plt.subplots(1, 3, figsize=(12, 3.5)) for axi, title, dataset in zip(ax, titles, datasets_D): axi.plot(dataset[:, 1], dataset[:, 2], '.k')