def merge_blocks(var, models_dict, all_variances): """Merges the defined data types into single blocks :param str var: The model variable :return: s_mat: the array of model data :rtype: numpy.ndarray """ # TODO: Make sure blocks of different sizes (components) can be handled s_mat = None for e, exp in models_dict.items(): comp_vars = [k for k, v in all_variances[e].items() if k != 'device'] if s_mat is None: s_mat = convert(getattr( exp, var)).loc[:, comp_vars].T.values / all_variances[e]['device'] else: s_mat_append = convert(getattr( exp, var)).loc[:, comp_vars].T.values / all_variances[e]['device'] s_mat = np.hstack((s_mat, s_mat_append)) return s_mat
def model_fit(parameter_estimator): """ Runs basic post-processing lack of fit analysis :param ParameterEstimator parameter_estimator: The parameter estimator object after solving :return: Various model fit values :rtype: dict """ model = parameter_estimator.model num_params = len(parameter_estimator.param_names) if hasattr(model, 'C'): C = convert(model.C) S = convert(model.S) C_red = C.loc[:, S.columns] exp = convert(model.D) pred = C_red.dot(S.T) elif hasattr(model, 'Cm'): exp = convert(model.Cm) raw_pred = convert(model.Z) pred = raw_pred.loc[exp.index] output = diagnostic_terms(exp, pred, num_params) return output
def D_from_SC(model, results, sigma_d=0): """Given the S and C matrices, the D matrix can be calculated :param ConcreteModel model: A Pyomo model :param ResultsObject results: The results from a solved model :param float sigma_d: The device variance :return: D-matrix :rtype: np.ndarray """ C = convert(model.C) C = C.loc[:, [c for c in model.abs_components]] S = convert(model.S) D = C @ S.T if sigma_d > 0: d_noise = np.random.normal(np.zeros(D.shape), sigma_d) D += d_noise results.D = D return D.values
def make_Vd_matrix(models_dict, all_variances): """Builds d covariance matrix This method is not intended to be used by users directly :param dict models_dict: Either a pyomo ConcreteModel or a dict of ReactionModels :param dict all_variances: variances :return: None """ from kipet.model_tools.pyomo_model_tools import convert Vd_dict = {} M_dict = {} total_shape = 0 n_models = len(models_dict) for name, model in models_dict.items(): variances = all_variances[name] times = model.allmeas_times.ordered_data() waves = model.meas_lambdas.ordered_data() n_waves = len(waves) n_times = len(times) Vd = np.zeros( (n_models * n_times * n_waves, n_models * n_times * n_waves)) S = convert(model.S) comp_vars = [k for k, v in variances.items() if k != 'device'] S = S.loc[:, comp_vars] device_variance = variances['device'] M = np.array([v for k, v in variances.items() if k != 'device' ]) * S.values @ S.values.T M_diag = np.einsum('ii->i', M) M_diag += device_variance M_dict[name] = M for t in range(n_models * n_times): Vd[t * n_waves:(t + 1) * n_waves, t * n_waves:(t + 1) * n_waves] = M total_shape += Vd.shape[0] Vd_dict[name] = Vd if n_models > 1: Vd_combined = np.zeros((total_shape, total_shape)) start_index = 0 for model, Vd in Vd_dict.items(): Vd_combined[start_index:Vd.shape[0] + start_index, start_index:Vd.shape[1] + start_index] = Vd start_index = Vd.shape[0] return coo_matrix(Vd_combined) return coo_matrix(Vd)
def _plot_D_parity(self): """Plot state profiles :param str var: concentration variable """ fig = go.Figure() use_spectral_format = False exp = convert(self.reaction_model.p_model.D) C = getattr(self.results, 'C') S = getattr(self.results, 'S') C = C.loc[:, S.columns] pred = C.dot(S.T) exp = exp.values.flatten() pred = pred.values.flatten() line = dict(color='gray', width=2, dash='dash') fig.add_trace( go.Scatter( x=[0, np.max(pred)], y=[0, np.max(pred)], line=line, )) self.color_num = 0 self._parity_plot(fig, 'D', pred, exp) title = f'Model: {self.reaction_model.name} | Spectral Parity' time_scale = f'Time [{self.reaction_model.unit_base.time}]' fig.update_layout( title=title, xaxis_title=f'Model Prediction', yaxis_title='Measured', autosize=False, width=550, height=550, ) filename = self._fig_finishing(fig, pred, plot_name=f'spectral-parity', use_index=False, exp=exp) return filename
def load_from_pyomo_model(self, model, to_load=None): """Load variables from the pyomo model into various formats. This will set the attribute of all the model variables in a specific format depending on the dimensionality of the variable into the ResultsObject. :param ConcreteModel model: Model of the reaction system :return: None """ variables_to_load = get_vars(model) for name in variables_to_load: if name == 'init_conditions': continue var = getattr(model, name) var_data = convert(var) setattr(self, name, var_data)
def run_param_est_with_subset_lambdas(self, builder_clone, end_time, subset, nfe, ncp, sigmas, solver='ipopt'): """ Performs the parameter estimation with a specific subset of wavelengths. At the moment, this is performed as a totally new Pyomo model, based on the original estimation. Initialization strategies for this will be needed. :param TemplateBuilder builder_clone: Template builder class of complete model without the data added yet :param float end_time: the end time for the data and simulation :param list subset: list of selected wavelengths :param int nfe: number of finite elements :param int ncp: number of collocation points :param dict sigmas: dictionary containing the variances, as used in the ParameterEstimator class :return ResultsObject results: The solved pyomo model results """ if not isinstance(subset, (list, dict)): raise RuntimeError("subset must be of type list or dict!") if isinstance(subset, dict): lists1 = sorted(subset.items()) x1, y1 = zip(*lists1) subset = list(x1) # This is the filter for creating the new data subset old_D = convert(self.model.D) new_D = old_D.loc[:, subset] print(end_time, new_D) # Now that we have a new DataFrame, we need to build the entire problem from this # An entire new ParameterEstimation problem should be set up, on the outside of # this function and class structure, from the model already developed by the user. new_template = construct_model_from_reduced_set(builder_clone, end_time, new_D) # need to put in an optional running of the variance estimator for the new # parameter estiamtion run, or just use the previous full model run to initialize... results, lof = run_param_est(new_template, nfe, ncp, sigmas, solver=solver) return results
def _plot_D_residuals(self): """Plot state profiles :param str var: concentration variable """ fig = go.Figure() use_spectral_format = False exp = convert(self.reaction_model.p_model.D) C = getattr(self.results, 'C') S = getattr(self.results, 'S') C = C.loc[:, S.columns] pred = C.dot(S.T) for i, col in enumerate(exp.columns): self._residual_plot(fig, col, pred[col], exp[col], use_spectral_format=True) #self.color_num += 1 self.color_num = 0 title = f'Model: {self.reaction_model.name} | Spectral Residuals' time_scale = f'Time [{self.reaction_model.unit_base.time}]' fig.update_layout( title=title, xaxis_title=f'{time_scale}', yaxis_title='Residuals', showlegend=False, ) filename = self._fig_finishing(fig, pred, plot_name=f'spectral-residuals') return filename
def run_lof_analysis(self, builder_before_data, end_time, correlations, lof_full_model, nfe, ncp, sigmas, step_size=0.2, search_range=(0, 1)): """ Runs the lack of fit minimization problem used in the Michael's Reaction paper from Chen et al. (submitted). To use this function, the full parameter estimation problem should be solved first and the correlations for wavelngths from this optimization need to be supplied to the function as an option. :param TemplateBuilder builder_before_data: Template builder class of complete model without the data added yet :param int end_time: the end time for the data and simulation :param dict correlations: dictionary containing the wavelengths and their correlations to the concentration profiles :param int lof_full_model: the value of the lack of fit of the full model (with all wavelengths) :param int nfe: number of finite elements :param int ncp: number of collocation points :param dict sigmas: dictionary containing the variances, as used in the ParameterEstimator class :param float step_size: The spacing used in correlation thresholds :param tuple search_range: correlation bounds within to search :return: None """ if not isinstance(step_size, float): raise RuntimeError("step_size must be a float between 0 and 1") elif step_size >= 1 or step_size <= 0: return RuntimeError("step_size must be a float between 0 and 1") if not isinstance(search_range, tuple): raise RuntimeError("search range must be a tuple") elif search_range[0] < 0 or search_range[0] > 1 and not ( isinstance(search_range, float) or isinstance(search_range, int)): raise RuntimeError("search range lower value must be between 0 and 1 and must be type float") elif search_range[1] < 0 or search_range[1] > 1 and not ( isinstance(search_range, float) or isinstance(search_range, int)): raise RuntimeError("search range upper value must be between 0 and 1 and must be type float") elif search_range[1] <= search_range[0]: raise RuntimeError("search_range[1] must be bigger than search_range[0]!") # firstly we will run the initial search from at increments of 20 % for the correlations # we already have lof(0) so we want 10,30,50,70, 90. count = 0 filt = 0.0 initial_solutions = list() initial_solutions.append((0, lof_full_model)) while filt < search_range[1]: filt += step_size if filt > search_range[1]: break elif filt == 1: break new_subs = wavelength_subset_selection(correlations=correlations, n=filt) lists1 = sorted(new_subs.items()) x1, y1 = zip(*lists1) x = list(x1) old_D = convert(self.model.D) new_D = old_D.loc[:, new_subs] # opt_model, nfe, ncp = construct_model_from_reduced_set(builder_before_data, end_time, new_D) # Now that we have a new DataFrame, we need to build the entire problem from this # An entire new ParameterEstimation problem should be set up, on the outside of # this function and class structure, from the model already developed by the user. new_template = construct_model_from_reduced_set(builder_before_data, end_time, new_D) # need to put in an optional running of the variance estimator for the new # parameter estimation run, or just use the previous full model run to initialize... results, lof = run_param_est(new_template, nfe, ncp, sigmas) initial_solutions.append((filt, lof)) count += 1 count = 0 for x in initial_solutions: print(f'When wavelengths of less than {x[0]:0.3f} correlation are removed') print(f'The lack of fit is {x[1]:0.6f} %')
def S_from_DC(model, C_dataFrame, tee=False, with_bounds=False, max_iter=200): """Solves a basic least squares problems for determining S from D and C data. :param pandas.DataFrame C_dataFrame: data frame with concentration values :param bool tee: Option to output least_squares results :param bool with_bounds: Option to set lower bound to zero :param int max_iter: The maximum number of iterations used in least_squares :return s_shaped: DataFrame with estimated S_values :rtype: pandas.DataFrame """ D_data = convert(model.D) D_vector = D_data.values.flatten() if not with_bounds: # Use simple matrix multiplication to get S # Imports used only here from kipet.calculation_tools.interpolation import interpolate_trajectory2 C_orig = C_dataFrame C = interpolate_trajectory2(list(D_data.index), C_orig) #model.mixture_components.ordered_data() # non_abs_species = r1.components.get_match('absorbing', False) # C = C.drop(columns=non_abs_species) indx_list = list(D_data.index) for i, ind in enumerate(indx_list): indx_list[i] = round(ind, 6) D_data.index = indx_list assert C.shape[0] == D_data.values.shape[0] M1 = np.linalg.inv(C.T @ C) M2 = C.T @ D_data.values S = (M1 @ M2).T S.columns = C.columns S = S.set_index(D_data.columns) else: # Use least_squares to get S using zero as the lower bound # Imports only used here from scipy.optimize import least_squares from scipy.sparse import coo_matrix import pandas as pd num_lambdas = D_data.shape[1] lambdas = list(D_data.columns) num_times = D_data.shape[0] times = list(D_data.index) components = C_dataFrame.columns num_components = len(components) row = [] col = [] data = [] for i,t in enumerate(times): for j,l in enumerate(lambdas): for k,c in enumerate(components): row.append(i*num_lambdas+j) col.append(j*num_components+k) data.append(C_dataFrame[c][t]) Bd = coo_matrix((data, (row, col)), shape=(num_times*num_lambdas, num_components*num_lambdas) ) x0 = np.zeros(num_lambdas*num_components)+1e-2 M = Bd.tocsr() def F(x, M, rhs): return rhs - M.dot(x) def JF(x, M, rhs): return -M verbose = 2 if tee else 0 res_lsq = least_squares( F, x0, JF, bounds=(0.0, np.inf), max_nfev=max_iter, verbose=verbose, args=(M, D_vector) ) s_shaped = res_lsq.x.reshape((num_lambdas, num_components)) S = pd.DataFrame(s_shaped, columns=components, index=lambdas) return S.values