def sort_parameter_info(self, f_par_info, simulation_info, fittedData): """ Seperate the information from the original input, the "get_fitted_parameters(f, simulation_info)" function, the "extract_file_info(filename)" function and the "extract_base_stats(f, simulation_id, info)" function into appropriate rows Parameters ---------- f_par_info : dict output of the get_fitted_parameters(f, simulation_info) function simulation_info : pandas.DataFrame the MS fragment file corresponding to the simulation fittedData : pandas.DataFrame the output of the "extract_base_stats(f, simulation_id, info)" function Returns ------- fittedFluxes : pandas.DataFrame info about the parameters of the fluxes used as an input for the simulation fittedFragments : pandas.DataFrame info about the parameters of the MS data used as an input for the simulation """ fittedFluxes = {} fittedFragments = {} rxn_id = f_par_info["rxn_id"] flux = f_par_info["flux"] flux_stdev = f_par_info["flux_stdev"] par_type = f_par_info["par_type"] flux_lb = f_par_info["flux_lb"] flux_ub = f_par_info["flux_ub"] flux_units = f_par_info["flux_units"] fit_alf = f_par_info["fit_alf"] free = f_par_info["free"] # f_par_chi2s = f_par_info["fit_chi2s"] # f_par_cor = f_par_info["fit_cor"] # f_par_cov = f_par_info["fit_cov"] simulation_id = fittedData["simulation_id"].unique()[0] simulation_dateAndTime = fittedData["simulation_dateAndTime"].unique( )[0] for cnt, p_type in enumerate(par_type): if p_type == "Net flux": fittedFluxes[cnt] = { "simulation_id": simulation_id, "simulation_dateAndTime": simulation_dateAndTime, "rxn_id": rxn_id[cnt], "flux": flux[cnt], "flux_stdev": flux_stdev[cnt], "flux_lb": flux_lb[cnt], "flux_ub": flux_ub[cnt], "flux_units": flux_units[cnt], "fit_alf": fit_alf[cnt], "fit_chi2s": None, "fit_cor": None, "fit_cov": None, "free": free[cnt], "used_": True, "comment_": None, } elif p_type == "Norm": # parse the id id_list = rxn_id[cnt].split(" ") expt = id_list[0] fragment_id = id_list[1] fragment_string = id_list[2] units = id_list[3] # parse the id into fragment_id and mass fragment_string = re.sub("_DASH_", "-", fragment_string) fragment_string = re.sub("_LPARANTHES_", "[(]", fragment_string) fragment_string = re.sub("_RPARANTHES_", "[)]", fragment_string) fragment_list = fragment_string.split("_") if not len(fragment_list) > 5 or not ("MRM" in fragment_list or "EPI" in fragment_list): fragment_mass = Formula(fragment_list[2]).mass + float( fragment_list[3]) time_point = fragment_list[4] else: fragment_mass = Formula(fragment_list[2]).mass + float( fragment_list[4]) time_point = fragment_list[5] if expt in list(simulation_info["experiment_id"]): fittedFragments[cnt] = { "simulation_id": simulation_id, "simulation_dateAndTime": simulation_dateAndTime, "experiment_id": expt, "sample_name_abbreviation": simulation_info["sample_name_abbreviation"][0], "time_point": time_point, "fragment_id": fragment_id, "fragment_mass": fragment_mass, "fit_val": flux[cnt], "fit_stdev": flux_stdev[cnt], "fit_units": units, "fit_alf": fit_alf[cnt], "fit_cor": None, "fit_cov": None, "free": free[cnt], "used_": True, "comment_": None, } elif expt in list(simulation_info["sample_name_abbreviation"]): fittedFragments[cnt] = { "simulation_id": simulation_id, "simulation_dateAndTime": simulation_dateAndTime, "experiment_id": simulation_info["experiment_id"][0], "sample_name_abbreviation": expt, "time_point": time_point, "fragment_id": fragment_id, "fragment_mass": fragment_mass, "fit_val": flux[cnt], "fit_stdev": flux_stdev[cnt], "fit_units": units, "fit_alf": fit_alf[cnt], "fit_cor": None, "fit_cov": None, "free": free[cnt], "used_": True, "comment_": None, } else: print("type not recognized") fittedFluxes = pd.DataFrame.from_dict(fittedFluxes, "index") fittedFragments = pd.DataFrame.from_dict(fittedFragments, "index") return fittedFluxes, fittedFragments
def _parse_json_sbml_cobra_model(cobra_model, model_id, date, model_file_name, filetype): """ Helper function for parse_cobra_model(), parses reaction- and metabolite information out of an already loaded cobra model Parameters ---------- cobra_model : cobra.Model Cobra metabolic model as loaded by the file type specific import function model_id : str Name of the model (for downstream reference) date : str Date of model processing (for downstream reference) model_file_name : str or path + str Filename or path to file + filename of the cobra metabolic model filetype : str Extension of the provided file Returns ------- model_data : pandas.DataFrame General information about the processed metabolic model reaction_data : pandas.DataFrame Information about the reactions in the metabolic model metabolite_data : pandas.DataFrame Information about the metabolites in the metabolic model """ # Pre-process the model file information with open(model_file_name, "r", encoding="utf-8") as f: model_file = f.read() # parse out model data model_data = pd.DataFrame( { "model_id": model_id, "date": date, "model_description": cobra_model.description, "model_file": model_file, "file_type": filetype, }, index=[0], ) # parse out reaction data reaction_data_temp = {} for cnt, r in enumerate(cobra_model.reactions): reaction_data_dict = { "model_id": model_id, "rxn_id": r.id, "rxn_name": r.name, "equation": r.build_reaction_string(), "subsystem": r.subsystem, "gpr": r.gene_reaction_rule, "genes": [g.id for g in r.genes], "reactants_stoichiometry": [r.get_coefficient(react.id) for react in r.reactants], "reactants_ids": [react.id for react in r.reactants], "products_stoichiometry": [r.get_coefficient(prod.id) for prod in r.products], "products_ids": [prod.id for prod in r.products], "lower_bound": r.lower_bound, "upper_bound": r.upper_bound, "objective_coefficient": r.objective_coefficient, "flux_units": "mmol*gDW-1*hr-1", "reversibility": r.reversibility, "used_": True, } reaction_data_temp[cnt] = reaction_data_dict reaction_data = pd.DataFrame.from_dict(reaction_data_temp, "index") # parse out metabolite data metabolite_data_tmp = {} for cnt, met in enumerate(cobra_model.metabolites): # Pre-process formulas using FIA-MS database methods if is_valid(met): formula = Formula(met.formula) formula = str(formula) else: formula = None # set up part of temp dict to transform into df later metabolite_data_dict = { "model_id": model_id, "met_name": met.name, "met_id": met.id, "formula": formula, "charge": met.charge, "compartment": met.compartment, "bound": met._bound, "annotations": met.annotation, "used_": True, } metabolite_data_tmp[cnt] = metabolite_data_dict metabolite_data = pd.DataFrame.from_dict(metabolite_data_tmp, "index") return model_data, reaction_data, metabolite_data
def sort_residual_info(self, f_mnt_res_info, simulation_info, fittedData): """ Seperate the information from the original input, the "get_residuals_info(f)" function, the "extract_file_info(filename)" function and the "extract_base_stats(f, simulation_id, info)" function into appropriate rows Parameters ---------- f_mnt_res_info : dict the output of the "get_residuals_info(f)" function simulation_info : pandas.DataFrame the MS fragment file corresponding to the simulation fittedData : pandas.DataFrame the output of the "extract_base_stats(f, simulation_id, info)" function Returns ------- fittedMeasuredFluxResiduals : pandas.DataFrame info about the residuals of the fluxes used as an input for the simulation fittedMeasuredFragmentResiduals : pandas.DataFrame info about the residuals of the fragments in the MS data used as an input for the simulation """ fittedMeasuredFluxResiduals = {} fittedMeasuredFragmentResiduals = {} expt_type = f_mnt_res_info["expt_type"] experiment_id = f_mnt_res_info["experiment_id"] time_point = f_mnt_res_info["time_point"] rxn_id = f_mnt_res_info["rxn_id"] res_data = f_mnt_res_info["res_data"] res_fit = f_mnt_res_info["res_fit"] res_peak = f_mnt_res_info["res_peak"] res_stdev = f_mnt_res_info["res_stdev"] res_val = f_mnt_res_info["res_val"] simulation_id = fittedData["simulation_id"].unique()[0] simulation_dateAndTime = fittedData["simulation_dateAndTime"].unique( )[0] for cnt, x_type in enumerate(expt_type): if x_type == "Flux": if experiment_id[cnt] in list( simulation_info["experiment_id"]): fittedMeasuredFluxResiduals[cnt] = { "simulation_id": simulation_id, "simulation_dateAndTime": simulation_dateAndTime, "experiment_id": experiment_id[cnt], "sample_name_abbreviation": simulation_info["sample_name_abbreviation"][0], "time_point": time_point[cnt], "rxn_id": rxn_id[cnt], "res_data": float(res_data[cnt]), "res_fit": float(res_fit[cnt]), "res_peak": res_peak[cnt], "res_stdev": float(res_stdev[cnt]), "res_val": float(res_val[cnt]), "res_msens": None, "res_esens": None, "used_": True, "comment_": None, } elif experiment_id[cnt] in list( simulation_info["sample_name_abbreviation"]): fittedMeasuredFluxResiduals[cnt] = { "simulation_id": simulation_id, "simulation_dateAndTime": simulation_dateAndTime, "experiment_id": simulation_info["experiment_id"][0], "sample_name_abbreviation": experiment_id[cnt], "time_point": time_point[cnt], "rxn_id": rxn_id[cnt], "res_data": float(res_data[cnt]), "res_fit": float(res_fit[cnt]), "res_peak": res_peak[cnt], "res_stdev": float(res_stdev[cnt]), "res_val": float(res_val[cnt]), "res_msens": None, "res_esens": None, "used_": True, "comment_": None, } elif x_type == "MS": # parse the id into fragment_id and mass fragment_string = rxn_id[cnt] fragment_string = re.sub("_DASH_", "-", fragment_string) fragment_string = re.sub("_LPARANTHES_", "[(]", fragment_string) fragment_string = re.sub("_RPARANTHES_", "[)]", fragment_string) fragment_list = fragment_string.split("_") if not len(fragment_list) > 5 or not ("MRM" in fragment_list or "EPI" in fragment_list): fragment_id = "_".join( [fragment_list[0], fragment_list[1], fragment_list[2]]) fragment_mass = Formula(fragment_list[2]).mass + float( fragment_list[3]) time_point = fragment_list[4] else: fragment_id = "_".join([ fragment_list[0], fragment_list[1], fragment_list[2], fragment_list[3], ]) fragment_mass = Formula(fragment_list[2]).mass + float( fragment_list[4]) time_point = fragment_list[5] fragment_id = re.sub("-", "_DASH_", fragment_id) fragment_id = re.sub("[(]", "_LPARANTHES_", fragment_id) fragment_id = re.sub("[)]", "_RPARANTHES_", fragment_id) if experiment_id[cnt] in list( simulation_info["experiment_id"]): fittedMeasuredFragmentResiduals[cnt] = { "simulation_id": simulation_id, "simulation_dateAndTime": simulation_dateAndTime, "experiment_id": experiment_id[cnt], "sample_name_abbreviation": simulation_info["sample_name_abbreviation"][0], "time_point": time_point, "fragment_id": fragment_id, "fragment_mass": fragment_mass, "res_data": float(res_data[cnt]), "res_fit": float(res_fit[cnt]), "res_peak": res_peak[cnt], "res_stdev": float(res_stdev[cnt]), "res_val": float(res_val[cnt]), "res_msens": None, "res_esens": None, "used_": True, "comment_": None, } elif experiment_id[cnt] in list( simulation_info["sample_name_abbreviation"]): fittedMeasuredFragmentResiduals[cnt] = { "simulation_id": simulation_id, "simulation_dateAndTime": simulation_dateAndTime, "experiment_id": simulation_info["experiment_id"][0], "sample_name_abbreviation": experiment_id[cnt], "time_point": time_point[cnt], "fragment_id": fragment_id, "fragment_mass": fragment_mass, "res_data": float(res_data[cnt]), "res_fit": float(res_fit[cnt]), "res_peak": res_peak[cnt], "res_stdev": float(res_stdev[cnt]), "res_val": float(res_val[cnt]), "res_msens": None, "res_esens": None, "used_": True, "comment_": None, } else: print("type not recognized") fittedMeasuredFluxResiduals = pd.DataFrame.from_dict( fittedMeasuredFluxResiduals, "index") fittedMeasuredFragmentResiduals = pd.DataFrame.from_dict( fittedMeasuredFragmentResiduals, "index") return fittedMeasuredFluxResiduals, fittedMeasuredFragmentResiduals