def cannon_copy(model): """ Make a new copy of a Cannon model.""" npix, ntheta = model._theta.shape nlabels = len(model.vectorizer.label_names) labelled_set = np.zeros([2, nlabels]) normalized_flux = np.zeros([2, npix]) normalized_ivar = normalized_flux.copy() * 0 # Vectorizer vclass = type(model.vectorizer) vec = vclass(label_names=copy.deepcopy(model.vectorizer._label_names), terms=copy.deepcopy(model.vectorizer._terms)) # Censors censors = censoring.Censors( label_names=copy.deepcopy(model.censors._label_names), num_pixels=copy.deepcopy(model.censors._num_pixels)) # Make new cannon model omodel = tc.CannonModel(labelled_set, normalized_flux, normalized_ivar, vectorizer=vec, dispersion=copy.deepcopy(model.dispersion), regularization=copy.deepcopy(model.regularization), censors=censors) # Copy over all of the attributes for name, value in vars(model).items(): if name not in [ '_vectorizer', '_censors', '_regularization', '_dispersion', 'continuum' ]: setattr(omodel, name, copy.deepcopy(value)) # Continuum if hasattr(model, 'continuum'): omodel.continuum = cannon_copy(model.continuum) return omodel
def retrieve_standards(idl): """Get spectra for standards """ standards = pd.read_csv("standards.tsv", sep="\t", header=0, dtype={"source_id":str}) mask = (standards["teff"] < 5500) * (standards["logg"] > 4.0) training_set = standards[mask][["teff","logg","feh"]] spectra = [] idl = idl_init() standards = standards[mask].copy() for star_i, row in standards.iterrows(): print(star_i) wave, spec = get_idl_spectrum(idl, row["teff"], row["logg"], row["feh"], wl_min, wl_max, resolution, 1, True, wl_per_pixel) spectra.append(spec) spectra = np.array(spectra) normalized_ivar = np.ones_like(spectra) * 0.01 np.savetxt("spectra_standards.csv", spectra) np.savetxt("spectra_wavelengths.csv", wave) import thecannon as tc vectorizer = tc.vectorizer.PolynomialVectorizer(("teff", "logg", "feh"), 2) model = tc.CannonModel(training_set, spectra, normalized_ivar, vectorizer=vectorizer)
def rebin_cannon_model(model, binsize): """ Rebin a Cannon model (or list of models) by an integer amount. Parameters ---------- model : Cannon model or list The Cannon model or list of them to rebin. binsize : int The number of pixels to bin together. Returns ------- omodel : Cannon model or list The rebinned Cannon model or list of Cannon models. Examples -------- omodel = rebin_cannon_model(model,4 """ if type(model) is list: omodel = [] for i in range(len(model)): model1 = model[i] omodel1 = rebin_cannon_model(model1, binsize) omodel.append(omodel1) else: npix, npars = model.theta.shape npix2 = np.round(npix // binsize).astype(int) nlabels = len(model.vectorizer.label_names) labelled_set = np.zeros([2, nlabels]) normalized_flux = np.zeros([2, npix2]) normalized_ivar = normalized_flux.copy() * 0 omodel = tc.CannonModel(labelled_set, normalized_flux, normalized_ivar, model.vectorizer) omodel._s2 = dln.rebin(model._s2[0:npix2 * binsize], npix2) omodel._scales = model._scales omodel._theta = np.zeros((npix2, npars), np.float64) for i in range(npars): omodel._theta[:, i] = dln.rebin(model._theta[0:npix2 * binsize, i], npix2) omodel._design_matrix = model._design_matrix omodel._fiducials = model._fiducials if model.dispersion is not None: omodel.dispersion = dln.rebin(model.dispersion[0:npix2 * binsize], npix2) omodel.regularization = model.regularization if hasattr(model, 'ranges') is True: omodel.ranges = model.ranges # Copy continuum information if hasattr(model, 'continuum'): omodel.continuum = cannon_copy(model.continuum) return omodel
def interp_cannon_model(model, xout=None, wout=None): """ Interpolate a Cannon model or list of models onto a new wavelength (or pixel) scale. Either xout or wout must be input. Parameters ---------- model : Cannon model or list Cannon model or list of Cannon models to interpolate. xout : array, optional The desired output pixel array. wout : array, optional The desired output wavelength aray. Returns ------- omodel : Cannon model or list The interpolated Cannon model or list of models. Examples -------- omodel = interp_cannon_model(model,wout) """ if (xout is None) & (wout is None): raise Exception('xout or wout must be input') if type(model) is list: omodel = [] for i in range(len(model)): model1 = model[i] omodel1 = interp_cannon_model(model1, xout=xout, wout=wout) omodel.append(omodel1) else: if (wout is not None) & (model.dispersion is None): raise Exception( 'wout input but no dispersion information in model') # Convert wout to xout if (xout is None) & (wout is not None): npix, npars = model.theta.shape x = np.arange(npix) xout = interp1d(model.dispersion, x, kind='cubic', bounds_error=False, fill_value='extrapolate', assume_sorted=True)(wout) npix, npars = model.theta.shape npix2 = len(xout) nlabels = len(model.vectorizer.label_names) labelled_set = np.zeros([2, nlabels]) normalized_flux = np.zeros([2, npix2]) normalized_ivar = normalized_flux.copy() * 0 omodel = tc.CannonModel(labelled_set, normalized_flux, normalized_ivar, model.vectorizer) x = np.arange(npix) omodel._s2 = interp1d(x, model._s2, kind='cubic', bounds_error=False, fill_value=(np.nan, np.nan), assume_sorted=True)(xout) omodel._scales = model._scales omodel._theta = np.zeros((npix2, npars), np.float64) for i in range(npars): omodel._theta[:, i] = interp1d(x, model._theta[:, i], kind='cubic', bounds_error=False, fill_value=(np.nan, np.nan), assume_sorted=True)(xout) omodel._design_matrix = model._design_matrix omodel._fiducials = model._fiducials if model.dispersion is not None: omodel.dispersion = interp1d(x, model.dispersion, kind='cubic', bounds_error=False, fill_value='extrapolate', assume_sorted=True)(xout) omodel.regularization = model.regularization if hasattr(model, 'ranges') is True: omodel.ranges = model.ranges # Copy continuum information if hasattr(model, 'continuum'): omodel.continuum = cannon_copy(model.continuum) return omodel
def trim_cannon_model(model, x0=None, x1=None, w0=None, w1=None): """ Trim a Cannon model (or list of Cannon models) to a smaller wavelength range. Either x0 and x1 must be input or w0 and w1. Parameters ---------- model : Cannon model or list Cannon model or list of Cannon models. x0 : int, optional The starting pixel to trim to. x1 : int, optional The ending pixel to trim to. Must be input with x0. w0 : float, optional The starting wavelength to trim to. w1 : float, optional The ending wavelength to trim to. Must be input with w0. Returns ------- omodel : Cannon model(s) The trimmed Cannon model(s). Examples -------- omodel = trim_cannon_model(model,100,1000) """ if type(model) is list: omodel = [] for i in range(len(model)): model1 = model[i] omodel1 = trim_cannon_model(model1, x0=x0, x1=x1, w0=w0, w1=w1) omodel.append(omodel1) else: if x0 is None: x0 = np.argmin(np.abs(model.dispersion - w0)) if x1 is None: x1 = np.argmin(np.abs(model.dispersion - w1)) npix = x1 - x0 + 1 nlabels = len(model.vectorizer.label_names) labelled_set = np.zeros([2, nlabels]) normalized_flux = np.zeros([2, npix]) normalized_ivar = normalized_flux.copy() * 0 omodel = tc.CannonModel(labelled_set, normalized_flux, normalized_ivar, model.vectorizer) omodel._s2 = model._s2[x0:x1 + 1] omodel._scales = model._scales omodel._theta = model._theta[x0:x1 + 1, :] omodel._design_matrix = model._design_matrix omodel._fiducials = model._fiducials omodel.dispersion = model.dispersion[x0:x1 + 1] omodel.regularization = model.regularization if hasattr(model, 'ranges') is True: omodel.ranges = model.ranges # Copy continuum information if hasattr(model, 'continuum'): omodel.continuum = cannon_copy(model.continuum) return omodel
def hstack(models): """ Stack Cannon models. Basically combine all of the pixels right next to each other.""" nmodels = dln.size(models) if nmodels == 1: return models # Number of combined pixels nfpix = 0 for i in range(nmodels): nfpix += len(models[i].dispersion) # Initiate final Cannon model npix, ntheta = models[0]._theta.shape nlabels = len(models[0].vectorizer.label_names) labelled_set = np.zeros([2, nlabels]) normalized_flux = np.zeros([2, nfpix]) normalized_ivar = normalized_flux.copy() * 0 # Vectorizer vclass = type(models[0].vectorizer) vec = vclass(label_names=copy.deepcopy(models[0].vectorizer._label_names), terms=copy.deepcopy(models[0].vectorizer._terms)) # Censors censors = censoring.Censors( label_names=copy.deepcopy(models[0].censors._label_names), num_pixels=copy.deepcopy(models[0].censors._num_pixels)) # Make new cannon model omodel = tc.CannonModel(labelled_set, normalized_flux, normalized_ivar, vectorizer=vec, regularization=copy.deepcopy( models[0].regularization), censors=censors) omodel._s2 = np.zeros(nfpix, np.float64) omodel._scales = models[0]._scales.copy() omodel._theta = np.zeros((nfpix, ntheta), np.float64) omodel._design_matrix = models[0]._design_matrix.copy() omodel._fiducials = models[0]._fiducials.copy() omodel.dispersion = np.zeros(nfpix, np.float64) omodel.regularization = models[0].regularization if hasattr(models[0], 'ranges'): omodel.ranges = models[0].ranges # scales, design_matrix, fiducials should be identical or we have problems if hasattr(models[0], 'ranges'): if (np.sum((models[0]._scales!=models[1]._scales)) + np.sum((models[0]._design_matrix!=models[1]._design_matrix)) + \ np.sum((models[0]._fiducials!=models[1]._fiducials)) + np.sum((models[0].ranges!=models[1].ranges))) > 0: raise ValueError( 'scales, design_matrix, fiducials, and ranges must be identical in the Cannon models' ) else: if (np.sum((models[0]._scales!=models[1]._scales)) + np.sum((models[0]._design_matrix!=models[1]._design_matrix)) + \ np.sum((models[0]._fiducials!=models[1]._fiducials))) > 0: raise ValueError( 'scales, design_matrix, and fiducials must be identical in the Cannon models' ) # Fill in the information off = 0 # offset for i in range(nmodels): model = models[i] npix, ntheta = model._theta.shape omodel._s2[off:off + npix] = model._s2 omodel._theta[off:off + npix, :] = model._theta omodel.dispersion[off:off + npix] = model.dispersion off += npix # Stack the continuum models as well if hasattr(model, 'continuum'): cmodels = [] for i in range(nmodels): cmodels.append(models[i].continuum) contstack = hstack(cmodels) omodel.continuum = contstack return omodel
def convolve_cannon_model(model, lsf): """ Convolve a Cannon model or list of models with an input LSF. Parameters ---------- model : Cannon model or list Input Cannon model or list of Cannon models to convolve, with Npix pixels. lsf : array 2D line spread function (LSF) to convolve with the Cannon model. The shape must be [Npix,Nlsf], where Npix is the same as the number of pixels in the Cannon model. Return ------ omodel : Cannon model or list The convolved Cannon model or list of models. Examples -------- omodel = convolve_cannon_model(model,lsf) """ # Need to allow this to be vary with wavelength # lsf can be a 1D kernel, or a 2D LSF array that gives # the kernel for each pixel separately if type(model) is list: omodel = [] for i in range(len(model)): model1 = model[i] omodel1 = convolve_cannon_model(model1, lsf) omodel.append(omodel1) else: npix, npars = model.theta.shape nlabels = len(model.vectorizer.label_names) labelled_set = np.zeros([2, nlabels]) normalized_flux = np.zeros([2, npix]) normalized_ivar = normalized_flux.copy() * 0 omodel = tc.CannonModel(labelled_set, normalized_flux, normalized_ivar, model.vectorizer) omodel._theta = model._theta * 0 #omodel._theta = np.zeros((npix2,npars),np.float64) if lsf.ndim == 1: omodel._s2 = convolve(model._s2, lsf, mode="reflect") for i in range(npars): omodel._theta[:, i] = convolve(model._theta[:, i], lsf, mode="reflect") else: omodel._s2 = utils.convolve_sparse(model._s2, lsf) for i in range(npars): omodel._theta[:, i] = utils.convolve_sparse( model._theta[:, i], lsf) omodel._scales = model._scales omodel._design_matrix = model._design_matrix omodel._fiducials = model._fiducials if model.dispersion is not None: omodel.dispersion = model.dispersion omodel.regularization = model.regularization if hasattr(model, 'ranges') is True: omodel.ranges = model.ranges # Copy continuum information if hasattr(model, 'continuum'): omodel.continuum = cannon_copy(model.continuum) return omodel
#tag = '3000_18000_whitedwarfs' print(tag) # Import the spectra and labels normalized_flux = fits.getdata('cannongrid_' + tag + '_synth_data_norm.fits.gz') labelled_set = Table.read('cannongrid_' + tag + '_synth_pars.fits') # Add wavelengths nspec, npix = normalized_flux.shape wave = np.arange(npix) * 0.10 + 3000.0 #model3.dispersion = wave normalized_ivar = normalized_flux.copy() * 0 + 1e4 vec3 = tc.vectorizer.PolynomialVectorizer(labelled_set.colnames, 3) model3 = tc.CannonModel(labelled_set, normalized_flux, normalized_ivar, vec3, wave) model3.regularization = 0 # no regularization for now # Train the model nr_theta, nr_s2, nr_metadata = model3.train() if os.path.exists('cannongrid_' + tag + '_norm_cubic_model.pkl'): os.remove('cannongrid_' + tag + '_norm_cubic_model.pkl') model3.write('cannongrid_' + tag + '_norm_cubic_model.pkl') # Check if there is a continuum model to add contfile = 'cannongrid_' + tag + '_cont_cubic_model_logflux.pkl' if os.path.exists(contfile): print('Continuum model found. Adding it') f = open(contfile, 'rb') cont = pickle.load(f) f.close()
S1 = load_spectra(blue_paths[45]) S2 = load_spectra(blue_paths[597]) P = len(S1) fluxes = np.zeros((S,P)) for i,path in enumerate(all_paths): fluxes[i] = load_spectra(path)['flux'] ivars = 10000*np.ones_like(fluxes) vectoriser = tc.vectorizer.PolynomialVectorizer(('teff','logg','fe_h'),1) model = tc.CannonModel(labels,fluxes,ivars,vectoriser) model.train() test_labels,test_cov,test_meta=model.test(fluxes,ivars) labels[57] model([labels[ln][57] for ln in labels.dtype.names]) wavelengths = np.zeros((S,P)) for i,path in enumerate(blue_paths): wavelengths[i] = load_spectra(path)['lambda']
np.arange(4715.94, 4896.00, 0.046), # ab lines 4716.3 - 4892.3 np.arange(5650.06, 5868.25, 0.055), # ab lines 5646.0 - 5867.8 np.arange(6480.52, 6733.92, 0.064), # ab lines 6481.6 - 6733.4 np.arange(7693.50, 7875.55, 0.074), # ab lines 7691.2 - 7838.5 ]) # Create a cannon model. print("Constructing model") vectorizer = tc.vectorizer.PolynomialVectorizer(label_names.astype('str'), 2) training_set_flux, training_set_err = spectra.T training_set_ivar = 1.0 / training_set_err**2 model = tc.CannonModel(training_set_labels, training_set_flux, training_set_ivar, vectorizer, dispersion=dispersion) model._s2 = scatters**2 model._theta = coeffs model._fiducials = offsets model._scales = np.ones_like(offsets) # Check a random spectrum. idx = np.random.choice(len(training_set_flux)) fig, ax = plt.subplots() ax.plot(dispersion, training_set_flux[idx], c='k') model_flux = model(training_set_labels[idx]).flatten() ax.plot(dispersion, model_flux, c='r')
def __init__(self, training_set, label_names, wavelength_arms=None, censors=None, progress_bar=False, threads=None, tolerance=None, polynomial_order=2, load_from_file=None, debugging=False): """ Instantiate the Cannon and train it on the spectra contained within a SpectrumArray. :param training_set: A SpectrumArray containing the spectra to train the Cannon on. :param label_names: A list of the names of the labels the Cannon is to estimate. We require that all of the training spectra have metadata fields defining all of these labels. :param wavelength_arms: A list of the wavelength break-points between arms which should have continuum fitted separately. For compatibility we accept this argument, but it is not used for continuum-normalised spectra. :param threads: The number of CPU cores we should use. If None, we look up how many cores this computer has. :param tolerance: The tolerance xtol which the method <scipy.optimize.fmin_powell> uses to determine convergence. :param polynomial_order: The order of polynomials to use as fitting functions within the Cannon. :param load_from_file: The filename of the internal state of a pre-trained Cannon, which we should load rather than doing training from scratch. :param debugging: Boolean flag determining whether we produce debugging output :type debugging: bool """ self._debugging_output_counter = 0 self._debugging = debugging self.cannon_version = tc.__version__ self._wavelength_arms = wavelength_arms logger.info("Wavelength arm breakpoints: {}".format( self._wavelength_arms)) assert isinstance(training_set, fourgp_speclib.SpectrumArray), \ "Training set for the Cannon should be a SpectrumArray." # Hook for normalising input spectra training_set = self.normalise(training_set) self._training_set = training_set self._progress_bar = progress_bar # Work out how many CPUs we should allow the Cannon to use if threads is None: threads = cpu_count() # Turn error bars on fluxes into inverse variances inverse_variances = training_set.value_errors**(-2) # Flag bad data points ignore = (training_set.values < 0) + ~np.isfinite(inverse_variances) inverse_variances[ignore] = 0 training_set.values[ignore] = 1 # Check that labels are correctly set in metadata for index in range(len(training_set)): metadata = training_set.get_metadata(index) for label in label_names: assert label in metadata, "Label <{}> not set on training spectrum number {}. " \ "Labels on this spectrum are: {}.".format( label, index, ", ".join(list(metadata.keys()))) assert np.isfinite(metadata[label]), "Label <{}> is not finite on training spectrum number {}. " \ "Labels on this spectrum are: {}.".format( label, index, metadata) # Compile table of training values of labels from metadata contained in SpectrumArray training_label_values = Table( names=label_names, rows=[[ training_set.get_metadata(index)[label] for label in label_names ] for index in range(len(training_set))]) self._model = tc.CannonModel( training_set_labels=training_label_values, training_set_flux=training_set.values, training_set_ivar=inverse_variances, dispersion=training_set.wavelengths, vectorizer=tc.vectorizer.PolynomialVectorizer( label_names=label_names, order=2), censors=censors) if load_from_file is None: logger.info("Starting to train the Cannon") with suppress_stdout(self._progress_bar): if tolerance is not None: op_kwds = {'xtol': tolerance, 'ftol': tolerance} else: op_kwds = {} self._model.train(op_kwds=op_kwds, op_bfgs_kwargs={}, threads=threads) logger.info("Cannon training completed") else: logger.info("Loading Cannon from disk") self._model = self._model.read(path=load_from_file) logger.info("Cannon loaded successfully")
# large positive values handling idx_gros = spectral_data > 1.2 if np.sum(idx_gros) > 0: spectral_data[idx_gros] = 1.2 # run Cannon learning procedure # Load the table containing the training set labels, and the spectra. list_cols_fit = ['Teff_cannon', 'Logg_cannon', 'Fe_H_cannon', 'Vsini_cannon'] training_set = general_data[list_cols_fit][idx_rows_read] normalized_ivar = np.full_like(spectral_data, 1 / 0.02**2) # Create the model that will run in parallel using all available cores. vectorizer = tc.vectorizer.polynomial.PolynomialVectorizer( label_names=list_cols_fit, order=2) model = tc.CannonModel(training_set, spectral_data, normalized_ivar, vectorizer, dispersion=wvl_data) # Train the model! print 'Model training' model.train(threads=10) model.write( 'model_cannon181221_DR3_ccd1234_noflat_red0_cannon0_oksnr_vsiniparam_dwarfs_002.dat', include_training_set_spectra=False, overwrite=True, protocol=-1)
ivar = ivar[:,mask] return wave, flux, ivar if __name__=="__main__": mtab = rd.load_roed_data() training_labels = ["Teff", "logg", "[M/H]"] training_labels_2 = ["Teff", "logg", "[M/H]", "Vt", "[Ca I/Fe]"] training_labels_3 = ["Teff", "logg", "[M/H]", "Vt", "[Mg I/Fe]", "[Ca I/Fe]"] ### 3 param, ivar0, star cut (wave, mtab), flux, ivar = load_wave_flux_ivar(remove_hb=True, remove_0708=True) wave, flux, ivar = cut_pixels_1(wave, flux, ivar) print len(wave) model = tc.CannonModel( mtab, flux, ivar, dispersion=wave, vectorizer=tc.vectorizer.PolynomialVectorizer(training_labels, 2)) theta, s2, metadata = model.train(threads=4) model.write("initial_naive_train_starcut.model", overwrite=True) fig_theta = tc.plot.theta(model) fig_theta.savefig("theta_starcut.png",dpi=600) test_labels, cov, metadata = model.test(flux,ivar) fig_comparison = tc.plot.one_to_one(model, test_labels) fig_comparison.savefig("one-to-one_starcut.png", dpi=300) ### 3 param, ivar0, wl cut, star cut #(wave, mtab), flux, ivar = load_wave_flux_ivar(remove_hb=True, remove_0708=True) #wave, flux, ivar = cut_pixels_4000_6800(wave, flux, ivar) #print len(wave) #model = tc.CannonModel( # mtab, flux, ivar,
# Load the training set labels. training_set_labels = Table.read("apogee-dr14-giants.fits") # Load the training set spectra. pkl_kwds = dict(encoding="latin-1") if version_info[0] >= 3 else {} with open("apogee-dr14-giants-flux-ivar.pkl", "rb") as fp: training_set_flux, training_set_ivar = pickle.load(fp, **pkl_kwds) # Specify the labels that we will use to construct this model. label_names = ("TEFF", "LOGG", "FE_H", "MG_FE") #, "NA_FE", "TI_FE", "NI_FE") # Construct a CannonModel object using a quadratic (O=2) polynomial vectorizer. model = tc.CannonModel(training_set_labels, training_set_flux, training_set_ivar, vectorizer=tc.vectorizer.PolynomialVectorizer( label_names, 2)) print(model) #<thecannon.model.CannonModel of 6 labels with a training set of 1624 stars each with 8575 pixels> print(model.vectorizer.human_readable_label_vector) #1 + TEFF + LOGG + FE_H + NA_FE + TI_FE + NI_FE + TEFF^2 + LOGG*TEFF + FE_H*TEFF + NA_FE*TEFF + TEFF*TI_FE + NI_FE*TEFF + LOGG^2 + FE_H*LOGG + LOGG*NA_FE + LOGG*TI_FE + LOGG*NI_FE + FE_H^2 + FE_H*NA_FE + FE_H*TI_FE + FE_H*NI_FE + NA_FE^2 + NA_FE*TI_FE + NA_FE*NI_FE + TI_FE^2 + NI_FE*TI_FE + NI_FE^2 # This model has no regularization. print(model.regularization) #None # This model includes no censoring. print(model.censors)