def mock_trained_emulator(mock_emulator): filename = os.path.join(test_base, "data", "emu.hdf5") if os.path.exists(filename): yield Emulator.load(filename) else: mock_emulator.train() mock_emulator.save(filename) yield mock_emulator
def test_save_load(self, mock_emulator, tmpdir): init = mock_emulator.get_param_dict() filename = tmpdir.join("emu.hdf5") mock_emulator.save(filename) emulator = Emulator.load(filename) final = emulator.get_param_dict() assert init == final assert emulator._trained == mock_emulator._trained
def __init__( self, emulator: Union[str, Emulator], data: Union[str, Spectrum], grid_params: Sequence[float], max_deque_len: int = 100, name: str = "SpectrumModel", **params, ): if isinstance(emulator, str): emulator = Emulator.load(emulator) if isinstance(data, str): data = Spectrum.load(data) if len(data) > 1: raise ValueError( "Multiple orders detected in data, please use EchelleModel") self.emulator: Emulator = emulator self.data_name = data.name self.data = data[0] dv = calculate_dv(self.data.wave) self.min_dv_wave = create_log_lam_grid(dv, self.emulator.wl.min(), self.emulator.wl.max())["wl"] self.bulk_fluxes = resample(self.emulator.wl, self.emulator.bulk_fluxes, self.min_dv_wave) self.residuals = deque(maxlen=max_deque_len) # manually handle cheb coeffs to offset index by 1 chebs = params.pop("cheb", []) cheb_idxs = [str(i) for i in range(1, len(chebs) + 1)] params["cheb"] = dict(zip(cheb_idxs, chebs)) # load rest of params into FlatterDict self.params = FlatterDict(params) self.frozen = [] self.name = name # Unpack the grid parameters self.n_grid_params = len(grid_params) self.grid_params = grid_params # None means "yet to be calculated", do not use NaN self._lnprob = None self._glob_cov = None self._loc_cov = None self.log = logging.getLogger(self.__class__.__name__) self.flux_scalar_func = flux_scalar = LinearNDInterpolator( self.emulator.grid_points, self.emulator.flux_scalar)
def initialize(self, key): ''' Initialize to the correct chunk of data (echelle order). :param key: (spectrum_id, order_key) :param type: (int, int) This method should only be called after all subprocess have been forked. ''' self.id = key spectrum_id, self.order_key = self.id # Make sure these are ints self.spectrum_id = int(spectrum_id) self.instrument = Instruments[self.spectrum_id] self.dataSpectrum = DataSpectra[self.spectrum_id] self.wl = self.dataSpectrum.wls[self.order_key] self.fl = self.dataSpectrum.fls[self.order_key] self.sigma = self.dataSpectrum.sigmas[self.order_key] self.ndata = len(self.wl) self.mask = self.dataSpectrum.masks[self.order_key] self.order = int(self.dataSpectrum.orders[self.order_key]) self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order)) if self.debug: self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.INFO) self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_key)) self.npoly = Starfish.config["cheb_degree"] self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly) # If the file exists, optionally initiliaze to the chebyshev values fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json" if os.path.exists(fname): self.logger.debug("Loading stored Chebyshev parameters.") phi = PhiParam.load(fname) self.chebyshevSpectrum.update(phi.cheb) #self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging self.counter = 0 self.emulator = Emulator.open() self.emulator.determine_chunk_log(self.wl) self.pca = self.emulator.pca self.wl_FFT = self.pca.wl # The raw eigenspectra and mean flux components self.EIGENSPECTRA = np.vstack((self.pca.flux_mean[np.newaxis,:], self.pca.flux_std[np.newaxis,:], self.pca.eigenspectra)) self.ss = np.fft.rfftfreq(self.pca.npix, d=self.emulator.dv) self.ss[0] = 0.01 # junk so we don't get a divide by zero error # Holders to store the convolved and resampled eigenspectra self.eigenspectra = np.empty((self.pca.m, self.ndata)) self.flux_mean = np.empty((self.ndata,)) self.flux_std = np.empty((self.ndata,)) self.flux_scalar = None self.sigma_mat = self.sigma**2 * np.eye(self.ndata) self.mus, self.C_GP, self.data_mat = None, None, None self.Omega = None self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob # self.nregions = 0 # self.exceptions = [] # Update the outdir based upon id self.noutdir = Starfish.routdir + "{}/{}/".format(self.spectrum_id, self.order)
def initialize(self, key): ''' Initialize to the correct chunk of data (echelle order). :param key: (spectrum_id, order_key) :param type: (int, int) This method should only be called after all subprocess have been forked. ''' self.id = key spectrum_id, self.order_key = self.id # Make sure these are ints self.spectrum_id = int(spectrum_id) self.instrument = Instruments[self.spectrum_id] self.dataSpectrum = DataSpectra[self.spectrum_id] self.wl = self.dataSpectrum.wls[self.order_key] self.fl = self.dataSpectrum.fls[self.order_key] self.sigma = self.dataSpectrum.sigmas[self.order_key] self.ndata = len(self.wl) self.mask = self.dataSpectrum.masks[self.order_key] self.order = int(self.dataSpectrum.orders[self.order_key]) self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order)) if self.debug: self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.INFO) self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_key)) self.npoly = Starfish.config["cheb_degree"] self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly) # If the file exists, optionally initiliaze to the chebyshev values fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json" if os.path.exists(fname): self.logger.debug("Loading stored Chebyshev parameters.") phi = PhiParam.load(fname) self.chebyshevSpectrum.update(phi.cheb) self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging self.counter = 0 self.emulator = Emulator.open() self.emulator.determine_chunk_log(self.wl) self.pca = self.emulator.pca self.wl_FFT = self.pca.wl # The raw eigenspectra and mean flux components self.EIGENSPECTRA = np.vstack((self.pca.flux_mean[np.newaxis,:], self.pca.flux_std[np.newaxis,:], self.pca.eigenspectra)) self.ss = np.fft.rfftfreq(self.pca.npix, d=self.emulator.dv) self.ss[0] = 0.01 # junk so we don't get a divide by zero error # Holders to store the convolved and resampled eigenspectra self.eigenspectra = np.empty((self.pca.m, self.ndata)) self.flux_mean = np.empty((self.ndata,)) self.flux_std = np.empty((self.ndata,)) self.sigma_mat = self.sigma**2 * np.eye(self.ndata) self.mus, self.C_GP, self.data_mat = None, None, None self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob # self.nregions = 0 # self.exceptions = [] # Update the outdir based upon id self.noutdir = Starfish.routdir + "{}/{}/".format(self.spectrum_id, self.order)
elif args.params == "emcee": eparams = np.median(np.load("eparams_emcee.npy"), axis=0) print("Using emcee median") else: import sys sys.exit() # Print out the emulator parameters in an easily-readable format lambda_xi = eparams[0] hparams = eparams[1:].reshape((my_pca.m, -1)) print("Emulator parameters are:") print("lambda_xi", lambda_xi) for row in hparams: print(row) emulator = Emulator(my_pca, eparams) # We will want to produce interpolated plots spanning each parameter dimension, # for each eigenspectrum. # Create a list of parameter blocks. # Go through each parameter, and create a list of all parameter combination of # the other two parameters. unique_points = [ np.unique(my_pca.gparams[:, i]) for i in range(len(Starfish.parname)) ] blocks = [] for ipar, pname in enumerate(Starfish.parname): upars = unique_points.copy() dim = upars.pop(ipar) ndim = len(dim)
wl = dataSpec.wls[0] # Truncate these to our shorter range to make it faster # ind = (wl > 5165.) & (wl < 5185.) # wl = wl[ind] # fl = dataSpec.fls[0] #[ind] sigma = dataSpec.sigmas[0] #[ind] # mask = dataSpec.masks[0][ind] ndata = len(wl) print("ndata", ndata) print("Data wl range", wl[0], wl[-1]) # Set up the emulator for this chunk emulator = Emulator.open() emulator.determine_chunk_log(wl) pca = emulator.pca wl_FFT_orig = pca.wl print("FFT length", len(wl_FFT_orig)) print(wl_FFT_orig[0], wl_FFT_orig[-1]) # The raw eigenspectra and mean flux components EIGENSPECTRA = np.vstack((pca.flux_mean[np.newaxis,:], pca.flux_std[np.newaxis,:], pca.eigenspectra)) ss = np.fft.rfftfreq(pca.npix, d=emulator.dv) ss[0] = 0.01 # junk so we don't get a divide by zero error
def initialize(self, key): ''' Initialize the OrderModel to the correct chunk of data (echelle order). :param key: (spectrum_id, order_id) :param type: (int, int) This should only be called after all subprocess have been forked. ''' self.id = key self.spectrum_id, self.order_id = self.id self.logger.info("Initializing model on Spectrum {}, order {}.".format( self.spectrum_id, self.order_id)) self.instrument = Instruments[self.spectrum_id] self.DataSpectrum = DataSpectra[self.spectrum_id] self.wl = self.DataSpectrum.wls[self.order_id] self.fl = self.DataSpectrum.fls[self.order_id] self.sigma = self.DataSpectrum.sigmas[self.order_id] self.npoints = len(self.wl) self.mask = self.DataSpectrum.masks[self.order_id] self.order = self.DataSpectrum.orders[self.order_id] self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order)) if self.debug: self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.INFO) self.npoly = config["cheb_degree"] self.ChebyshevSpectrum = ChebyshevSpectrum(self.DataSpectrum, self.order_id, npoly=self.npoly) self.resid_deque = deque( maxlen=500 ) #Deque that stores the last residual spectra, for averaging self.counter = 0 self.Emulator = Emulator.open( config["PCA_path"]) # Returns mu and var vectors self.Emulator.determine_chunk_log( self.wl) # Truncates the grid to this wl format, power of 2 pg = self.Emulator.PCAGrid self.wl_FFT = pg.wl self.ncomp = pg.ncomp self.PCOMPS = np.vstack((pg.flux_mean[np.newaxis, :], pg.flux_std[np.newaxis, :], pg.pcomps)) self.min_v = self.Emulator.min_v self.ss = np.fft.rfftfreq(len(self.wl_FFT), d=self.min_v) self.ss[0] = 0.01 # junk so we don't get a divide by zero error self.pcomps = np.empty((self.ncomp, self.npoints)) self.flux_mean = np.empty((self.npoints, )) self.flux_std = np.empty((self.npoints, )) self.mus, self.vars = None, None self.C_GP = None self.data_mat = None self.sigma_matrix = self.sigma**2 * np.eye(self.npoints) self.prior = 0.0 # Modified and set by NuisanceSampler.lnprob self.nregions = 0 self.exceptions = [] #TODO: perturb #if args.perturb: #perturb(stellar_Starting, config["stellar_jump"], factor=args.perturb) cheb_MH_cov = float(config["cheb_jump"])**2 * np.ones((self.npoly, )) cheb_tuple = ("logc0", ) # add in new coefficients for i in range(1, self.npoly): cheb_tuple += ("c{}".format(i), ) # set starting position to 0 cheb_Starting = {k: 0.0 for k in cheb_tuple} # Design cov starting cov_Starting = config['cov_params'] cov_tuple = C.dictkeys_to_cov_global_tuple(cov_Starting) cov_MH_cov = np.array( [float(config["cov_jump"][key]) for key in cov_tuple])**2 nuisance_MH_cov = np.diag(np.concatenate((cheb_MH_cov, cov_MH_cov))) nuisance_starting = { "cheb": cheb_Starting, "cov": cov_Starting, "regions": {} } # Because this initialization is happening on the subprocess, I think # the random state should be fine. # Update the outdir based upon id self.noutdir = outdir + "{}/{}/".format(self.spectrum_id, self.order) # Create the nuisance parameter sampler to run independently self.sampler = NuisanceSampler(OrderModel=self, starting_param_dict=nuisance_starting, cov=nuisance_MH_cov, debug=True, outdir=self.noutdir, order=self.order) self.p0 = self.sampler.p0 # Udpate the nuisance parameters to the starting values so that we at # least have a self.data_mat self.logger.info( "Updating nuisance parameter data products to starting values.") self.update_nuisance(nuisance_starting) self.lnprob = None
myInstrument = TRES() #myHDF5Interface = HDF5Interface(config['HDF5_path']) #Somehow parse the list parameters, vz and logOmega into secondary parameters. stellar_Starting = config['stellar_params'] stellar_tuple = C.dictkeys_to_tuple(stellar_Starting) #go for each item in stellar_tuple, and assign the appropriate covariance to it #stellar_MH_cov = np.array([float(config["stellar_jump"][key]) for key in stellar_tuple])**2 \ # * np.identity(len(stellar_Starting)) stellar_MH_cov = np.array([float(config["stellar_jump"][key]) for key in stellar_tuple])**2 temulator = Emulator.open(config['PCA_path']) #Call the emulator at the starting stellar parameters pp = np.array([stellar_Starting["temp"], stellar_Starting["logg"], stellar_Starting["Z"]]) starting_Weights = temulator.draw_weights(pp) stellar_Starting["weights"] = starting_Weights weight_mu, weight_cov = temulator(pp) weight_cov = weight_cov * config["frac_weight"] stellar_MH_cov = np.concatenate((stellar_MH_cov, weight_cov)) stellar_MH_cov = stellar_MH_cov * np.identity(len(stellar_MH_cov)) print(len(stellar_MH_cov)) fix_logg = config.get("fix_logg", None) #Updating specific correlations to speed mixing
# Setup an HDF5 interface in order to allow much quicker reading and writing # than compared to loading FITS files over and over again. from Starfish.grid_tools.instruments import SPEX from Starfish.grid_tools import HDF5Creator creator = HDF5Creator(grid, "F_SPEX_grid.hdf5", instrument=SPEX(), wl_range=(0.9e4, np.inf), ranges=ranges) creator.process_grid() #%% # use the HDF5 Interface to consrtuct the spectral emulator from Starfish.emulator import Emulator emu = Emulator.from_grid("F_SPEX_grid.hdf5") print(emu) #%% # train the emulator (PCA) emu.train(options=dict(maxiter=1e5)) print(emu) # check that it trained properly, the GPs should have smooth lines with small # errors conecting the weights from Starfish.emulator.plotting import plot_emulator plot_emulator(emu) #%%
wl = dataSpec.wls[0] # Truncate these to our shorter range to make it faster # ind = (wl > 5165.) & (wl < 5185.) # wl = wl[ind] # fl = dataSpec.fls[0] #[ind] sigma = dataSpec.sigmas[0] #[ind] # mask = dataSpec.masks[0][ind] ndata = len(wl) print("ndata", ndata) print("Data wl range", wl[0], wl[-1]) # Set up the emulator for this chunk emulator = Emulator.open() emulator.determine_chunk_log(wl) pca = emulator.pca wl_FFT_orig = pca.wl print("FFT length", len(wl_FFT_orig)) print(wl_FFT_orig[0], wl_FFT_orig[-1]) # The raw eigenspectra and mean flux components EIGENSPECTRA = np.vstack((pca.flux_mean[np.newaxis, :], pca.flux_std[np.newaxis, :], pca.eigenspectra)) ss = np.fft.rfftfreq(pca.npix, d=emulator.dv) ss[0] = 0.01 # junk so we don't get a divide by zero error
def test_creation_from_string(self, mock_hdf5): emu = Emulator.from_grid(mock_hdf5) assert emu._trained is False assert np.allclose(emu._grid_sep, [100, 0.5, 0.5]) # issue 134
def mock_emulator(mock_hdf5_interface): yield Emulator.from_grid(mock_hdf5_interface)
def initialize(self, key): ''' Initialize the OrderModel to the correct chunk of data (echelle order). :param key: (spectrum_id, order_id) :param type: (int, int) This should only be called after all subprocess have been forked. ''' self.id = key self.spectrum_id, self.order_id = self.id self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_id)) self.instrument = Instruments[self.spectrum_id] self.DataSpectrum = DataSpectra[self.spectrum_id] self.wl = self.DataSpectrum.wls[self.order_id] self.fl = self.DataSpectrum.fls[self.order_id] self.sigma = self.DataSpectrum.sigmas[self.order_id] self.npoints = len(self.wl) self.mask = self.DataSpectrum.masks[self.order_id] self.order = self.DataSpectrum.orders[self.order_id] self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order)) if self.debug: self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.INFO) self.npoly = config["cheb_degree"] self.ChebyshevSpectrum = ChebyshevSpectrum(self.DataSpectrum, self.order_id, npoly=self.npoly) self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging self.counter = 0 self.Emulator = Emulator.open(config["PCA_path"]) # Returns mu and var vectors self.Emulator.determine_chunk_log(self.wl) # Truncates the grid to this wl format, power of 2 pg = self.Emulator.PCAGrid self.wl_FFT = pg.wl self.ncomp = pg.ncomp self.PCOMPS = np.vstack((pg.flux_mean[np.newaxis,:], pg.flux_std[np.newaxis,:], pg.pcomps)) self.min_v = self.Emulator.min_v self.ss = np.fft.rfftfreq(len(self.wl_FFT), d=self.min_v) self.ss[0] = 0.01 # junk so we don't get a divide by zero error self.pcomps = np.empty((self.ncomp, self.npoints)) self.flux_mean = np.empty((self.npoints,)) self.flux_std = np.empty((self.npoints,)) self.mus, self.vars = None, None self.C_GP = None self.data_mat = None self.sigma_matrix = self.sigma**2 * np.eye(self.npoints) self.prior = 0.0 # Modified and set by NuisanceSampler.lnprob self.nregions = 0 self.exceptions = [] #TODO: perturb #if args.perturb: #perturb(stellar_Starting, config["stellar_jump"], factor=args.perturb) cheb_MH_cov = float(config["cheb_jump"])**2 * np.ones((self.npoly,)) cheb_tuple = ("logc0",) # add in new coefficients for i in range(1, self.npoly): cheb_tuple += ("c{}".format(i),) # set starting position to 0 cheb_Starting = {k:0.0 for k in cheb_tuple} # Design cov starting cov_Starting = config['cov_params'] cov_tuple = C.dictkeys_to_cov_global_tuple(cov_Starting) cov_MH_cov = np.array([float(config["cov_jump"][key]) for key in cov_tuple])**2 nuisance_MH_cov = np.diag(np.concatenate((cheb_MH_cov, cov_MH_cov))) nuisance_starting = {"cheb": cheb_Starting, "cov": cov_Starting, "regions":{}} # Because this initialization is happening on the subprocess, I think # the random state should be fine. # Update the outdir based upon id self.noutdir = outdir + "{}/{}/".format(self.spectrum_id, self.order) # Create the nuisance parameter sampler to run independently self.sampler = NuisanceSampler(OrderModel=self, starting_param_dict=nuisance_starting, cov=nuisance_MH_cov, debug=True, outdir=self.noutdir, order=self.order) self.p0 = self.sampler.p0 # Udpate the nuisance parameters to the starting values so that we at # least have a self.data_mat self.logger.info("Updating nuisance parameter data products to starting values.") self.update_nuisance(nuisance_starting) self.lnprob = None
# than compared to loading FITS files over and over again. from Starfish.grid_tools.instruments import IGRINS_H_custom from Starfish.grid_tools import HDF5Creator creator = HDF5Creator( grid, "IGRINS_grid.hdf5",instrument=IGRINS_H_custom(), wl_range=(16600, 16700), ranges=ranges) creator.process_grid() #%% from Starfish.emulator import Emulator emu = Emulator.from_grid("IGRINS_grid.hdf5") print(emu) #%% emu.train(options=dict(maxiter=1e5)) print(emu) from Starfish.emulator.plotting import plot_emulator plot_emulator(emu) #%% emu.save("IGRINS_emu.hdf5")