def residual(params, x, obs, mol0, ll0, ul0, line_profile0, res0, units, continuum): parvals = params.valuesdict() size = parvals['size'] dV = parvals['dV'] velocity = parvals['velocity'] Tex = parvals['Tex'] column = parvals['column'] #generate a source object source = Source( continuum=continuum, size=size, dV=dV, velocity=velocity, Tex=Tex, column=column, ) #create a simulation sim = Simulation(mol=mol0, ll=ll0, ul=ul0, observation=obs, source=source, line_profile=line_profile0, res=res0, use_obs=True, units=units) return_sims.append(sim) return np.array(obs.spectrum.Tb - sim.spectrum.int_profile)
def make_simulation(self, molecule, ll, ul, obs, res: float = 0.0014): return Simulation( mol=molecule, ll=ll, ul=ul, observation=obs, source=self.source, line_profile="Gaussian", res=res, )
def process_mcmc_json(json_file, molecule, observation, ll=0, ul=float('inf'), line_profile='Gaussian', res=0.0014, stack_params = None, stack_plot_params = None, make_plots=True, return_json = False): from molsim.classes import Source, Simulation from molsim.functions import sum_spectra, velocity_stack, matched_filter from molsim.plotting import plot_stack, plot_mf with open(json_file) as input: json_dict = json.load(input) n_sources = len(json_dict['SourceSize']['mean']) sources = [] for size,vlsr,col,tex,dv in zip( json_dict['SourceSize']['mean'], json_dict['VLSR']['mean'], json_dict['NCol']['mean'], json_dict['Tex']['mean'], json_dict['dV']['mean']): sources.append(Source(size=size,velocity=vlsr,column=col,Tex=tex,dV=dv)) sims = [Simulation(mol=molecule,ll=ll,ul=ul,observation=observation,source=x,line_profile=line_profile,res=res) for x in sources] sum1 = sum_spectra(sims) if make_plots is False: if return_json is True: return sources, sims, sum1, json_dict else: return sources, sims, sum1 internal_stack_params = {'selection' : 'lines', 'freq_arr' : observation.spectrum.frequency, 'int_arr' : observation.spectrum.Tb, 'freq_sim' : sum1.freq_profile, 'int_sim' : sum1.int_profile, 'res_inp' : res, 'dV' : np.mean([x for x in json_dict['dV']['mean']]), 'dV_ext' : 40, 'vlsr' : np.mean([x for x in json_dict['VLSR']['mean']]), 'vel_width' : 40, 'v_res' : 0.02, 'blank_lines' : True, 'blank_keep_range' : [-5*np.mean([x for x in json_dict['dV']['mean']]),5*np.mean([x for x in json_dict['dV']['mean']])], 'flag_lines' : False, 'flag_sigma' : 5, } if stack_params is not None: for x in stack_params: internal_stack_params[x] = stack_params[x] stack = velocity_stack(internal_stack_params) internal_stack_plot_params = {'xlimits' : [-10,10]} if stack_plot_params is not None: for x in stack_plot_params: internal_stack_plot_params[x] = stack_plot_params[x] plot_stack(stack,params=internal_stack_plot_params) mf = matched_filter(stack.velocity, stack.snr, stack.int_sim[find_nearest(stack.velocity,-2):find_nearest(stack.velocity,2)]) plot_mf(mf) if return_json is True: return sources, sims, sum1, json_dict else: return sources, sims, sum1
def simulate_spectrum(self, parameters: np.ndarray, scale: float = 3.0) -> np.ndarray: """ Wraps `molsim` functionality to simulate the spectrum, given a set of input parameters as a NumPy 1D array. On the first pass, this generates a `Simulation` instance and stores it, which has some overhead associated with figuring out which catalog entries to simulate. After the first pass, the instance is re-used with the `Source` object updated with the new parameters. The nuance in this function is with `scale`: during the preprocess step, we assume that the observation frequency is not shifted to the source reference. To simulate with molsim, we identify where the catalog overlaps with our frequency windows, and because it is unshifted this causes molsim to potentially ignore a lot of lines (particularly high frequency ones). The `scale` parameter scales the input VLSR as to make sure that we cover everything as best as we can. Parameters ---------- parameters : np.ndarray NumPy 1D array containing parameters for the simulation. scale : float, optional Modifies the window to consider catalog overlap, by default 3. Returns ------- np.ndarray NumPy 1D array corresponding to the simulated spectrum """ size, vlsr, ncol, Tex, dV = parameters # Assume that the value is in log space, if it's below 1000 if ncol <= 1e3: ncol = 10**ncol source = Source("", vlsr, size, column=ncol, Tex=Tex, dV=dV) if not hasattr(self, "simulation"): min_freq, max_freq = find_limits( self.observation.spectrum.frequency) # there's a buffer here just to make sure we don't go out of bounds # and suddenly stop simulating lines min_offsets = compute.calculate_dopplerwidth_frequency( min_freq, vlsr * scale) max_offsets = compute.calculate_dopplerwidth_frequency( max_freq, vlsr * scale) min_freq -= min_offsets max_freq += max_offsets self.simulation = Simulation( mol=self.molecule, ll=min_freq, ul=max_freq, observation=self.observation, source=source, line_profile="gaussian", use_obs=True, ) else: self.simulation.source = source self.simulation._apply_voffset() self.simulation._calc_tau() self.simulation._make_lines() self.simulation._beam_correct() intensity = self.simulation.spectrum.int_profile return intensity
class SingleComponent(AbstractModel): """ Simplest concrete implementation of an `AbstractModel`, corresponding to a single value for each modeling parameter. Each model parameter expects an `AbstractDistribution` object, which corresponds to the prior distribution over parameters. """ source_size: AbstractDistribution vlsr: AbstractDistribution Ncol: AbstractDistribution Tex: AbstractDistribution dV: AbstractDistribution observation: Observation molecule: Molecule def __post_init__(self): self._distributions = [ self.source_size, self.vlsr, self.Ncol, self.Tex, self.dV, ] def __len__(self) -> int: return len(self._distributions) def _get_components(self): return self._distributions def get_names(self) -> List[str]: return ["SourceSize", "VLSR", "NCol", "Tex", "dV"] def __repr__(self) -> str: output = f"Model: {type(self).__name__}\n" for dist in self._distributions: output += f"{dist}\n" return output def sample_prior(self) -> np.ndarray: """ Draw samples from each respective prior distribution to return an array of parameters. Returns ------- np.ndarray NumPy 1D array of parameter values drawn from the respective prior. """ initial = np.array([param.sample() for param in self._distributions]) return initial def simulate_spectrum(self, parameters: np.ndarray, scale: float = 3.0) -> np.ndarray: """ Wraps `molsim` functionality to simulate the spectrum, given a set of input parameters as a NumPy 1D array. On the first pass, this generates a `Simulation` instance and stores it, which has some overhead associated with figuring out which catalog entries to simulate. After the first pass, the instance is re-used with the `Source` object updated with the new parameters. The nuance in this function is with `scale`: during the preprocess step, we assume that the observation frequency is not shifted to the source reference. To simulate with molsim, we identify where the catalog overlaps with our frequency windows, and because it is unshifted this causes molsim to potentially ignore a lot of lines (particularly high frequency ones). The `scale` parameter scales the input VLSR as to make sure that we cover everything as best as we can. Parameters ---------- parameters : np.ndarray NumPy 1D array containing parameters for the simulation. scale : float, optional Modifies the window to consider catalog overlap, by default 3. Returns ------- np.ndarray NumPy 1D array corresponding to the simulated spectrum """ size, vlsr, ncol, Tex, dV = parameters # Assume that the value is in log space, if it's below 1000 if ncol <= 1e3: ncol = 10**ncol source = Source("", vlsr, size, column=ncol, Tex=Tex, dV=dV) if not hasattr(self, "simulation"): min_freq, max_freq = find_limits( self.observation.spectrum.frequency) # there's a buffer here just to make sure we don't go out of bounds # and suddenly stop simulating lines min_offsets = compute.calculate_dopplerwidth_frequency( min_freq, vlsr * scale) max_offsets = compute.calculate_dopplerwidth_frequency( max_freq, vlsr * scale) min_freq -= min_offsets max_freq += max_offsets self.simulation = Simulation( mol=self.molecule, ll=min_freq, ul=max_freq, observation=self.observation, source=source, line_profile="gaussian", use_obs=True, ) else: self.simulation.source = source self.simulation._apply_voffset() self.simulation._calc_tau() self.simulation._make_lines() self.simulation._beam_correct() intensity = self.simulation.spectrum.int_profile return intensity def prior_constraint(self, parameters: np.ndarray) -> float: """ Function that will apply a constrain on the prior. This function should be overwritten in child models, say for example in the TMC-1 four component case, where we want to constrain parameter space to certain regions. Parameters ---------- parameters : np.ndarray NumPy 1D array containing parameter values Returns ------- float Return zero if parameters pass the constraint, otherwise return -np.inf """ return 0.0 def compute_prior_likelihood(self, parameters: np.ndarray) -> float: """ Calculate the total prior log likelihood. The calculation is handed off to the individual distributions. Parameters ---------- parameters : np.ndarray NumPy 1D array containing the model parameters Returns ------- float The total prior log likelihood """ lnlikelihood = self.prior_constraint(parameters) lnlikelihood += sum([ dist.ln_likelihood(value) for dist, value in zip(self._distributions, parameters) ]) return lnlikelihood def compute_log_likelihood(self, parameters: np.ndarray) -> float: """ Calculate the negative log likelihood, given a set of parameters and our observed data. Parameters ---------- parameters : np.ndarray [description] Returns ------- float Log likelihood of the model """ obs = self.observation.spectrum simulation = self.simulate_spectrum(parameters) # match the simulation with the spectrum lnlike = np.sum( np.log(1.0 / np.sqrt(obs.noise**2.0)) * np.exp(-((obs.Tb - simulation)**2.0) / (2.0 * obs.noise**2.0))) return lnlike def nll(self, parameters: np.ndarray) -> float: """ Calculate the negative log likelihood. This is functionally exactly the sample as `compute_log_likelihood`, except that the sign of the likelihood is negative for use in maximum likelihood estimation. Parameters ---------- parameters : np.ndarray [description] Returns ------- float Negative log likelihood of the model """ return -self.compute_log_likelihood(parameters) def mle_optimization( self, initial: Union[None, np.ndarray] = None, bounds: Union[None, List[Union[Tuple[float, float]]], None] = None, **kwargs, ): """ Obtain a maximum likelihood estimate, given an initial starting point in parameter space. Because of the often highly covariant nature of models, Additional kwargs are passed into `scipy.optimize.minimize`, and can be used to overwrite things like the optimization method. The `Result` object from `scipy.optimize` is returned, which holds the MLE parameters as the attribute `x`, and the likelihood value as `fun`. Parameters ---------- initial : Union[None, np.ndarray], optional Initial parameters for optimization, by default None, which will take the mean of the prior. bounds : Union[None, List[Union[Tuple[float, float]]], None], optional Bounds for constrained optimization. By default None, which imposes no constraints (highly not recommended!). See the `scipy.optimize.minimize` page for how `bounds` is specified. Returns ------- `scipy.optimize.Result` A fit `Result` object that contains the final state of the minimization """ if initial is None: initial = np.array([self.sample_prior() for _ in range(3000)]).mean(axis=0) opt_kwargs = { "fun": self.nll, "x0": initial, "method": "Powell", "bounds": bounds, } opt_kwargs.update(**kwargs) result = minimize(**opt_kwargs) return result @classmethod def from_yml(cls, yml_path: str): input_dict = load_yaml(yml_path) cls_dict = dict() # the two stragglers for key in input_dict.keys(): if key not in ["observation", "molecule", "nominal_vlsr"]: if hasattr(input_dict[key], "mu"): dist = GaussianLikelihood else: dist = UniformLikelihood cls_dict[key] = dist.from_values(**input_dict[key]) else: if key != "nominal_vlsr": # load in the observed data cls_dict[key] = load(input_dict[key]) else: logger.warning( f"{key} is not recognized, and therefore ignored.") return cls(**cls_dict)