Exemplo n.º 1
0
    def sort_parameter_info(self, f_par_info, simulation_info, fittedData):
        """
        Seperate the information from the original input, the
        "get_fitted_parameters(f, simulation_info)"
        function, the "extract_file_info(filename)" function and the
        "extract_base_stats(f, simulation_id, info)" function
        into appropriate rows

        Parameters
        ----------
        f_par_info : dict
            output of the
            get_fitted_parameters(f, simulation_info) function
        simulation_info : pandas.DataFrame
            the MS fragment file corresponding to
            the simulation
        fittedData : pandas.DataFrame
            the output of the
            "extract_base_stats(f, simulation_id, info)" function

        Returns
        -------
        fittedFluxes : pandas.DataFrame
            info about the parameters of the fluxes used
            as an input for the simulation
        fittedFragments : pandas.DataFrame
            info about the parameters of the MS data
            used as an input for the simulation
        """
        fittedFluxes = {}
        fittedFragments = {}

        rxn_id = f_par_info["rxn_id"]
        flux = f_par_info["flux"]
        flux_stdev = f_par_info["flux_stdev"]
        par_type = f_par_info["par_type"]
        flux_lb = f_par_info["flux_lb"]
        flux_ub = f_par_info["flux_ub"]
        flux_units = f_par_info["flux_units"]
        fit_alf = f_par_info["fit_alf"]
        free = f_par_info["free"]
        # f_par_chi2s = f_par_info["fit_chi2s"]
        # f_par_cor = f_par_info["fit_cor"]
        # f_par_cov = f_par_info["fit_cov"]

        simulation_id = fittedData["simulation_id"].unique()[0]
        simulation_dateAndTime = fittedData["simulation_dateAndTime"].unique(
        )[0]

        for cnt, p_type in enumerate(par_type):
            if p_type == "Net flux":
                fittedFluxes[cnt] = {
                    "simulation_id": simulation_id,
                    "simulation_dateAndTime": simulation_dateAndTime,
                    "rxn_id": rxn_id[cnt],
                    "flux": flux[cnt],
                    "flux_stdev": flux_stdev[cnt],
                    "flux_lb": flux_lb[cnt],
                    "flux_ub": flux_ub[cnt],
                    "flux_units": flux_units[cnt],
                    "fit_alf": fit_alf[cnt],
                    "fit_chi2s": None,
                    "fit_cor": None,
                    "fit_cov": None,
                    "free": free[cnt],
                    "used_": True,
                    "comment_": None,
                }
            elif p_type == "Norm":
                # parse the id
                id_list = rxn_id[cnt].split(" ")
                expt = id_list[0]
                fragment_id = id_list[1]
                fragment_string = id_list[2]
                units = id_list[3]
                # parse the id into fragment_id and mass
                fragment_string = re.sub("_DASH_", "-", fragment_string)
                fragment_string = re.sub("_LPARANTHES_", "[(]",
                                         fragment_string)
                fragment_string = re.sub("_RPARANTHES_", "[)]",
                                         fragment_string)
                fragment_list = fragment_string.split("_")
                if not len(fragment_list) > 5 or not ("MRM" in fragment_list or
                                                      "EPI" in fragment_list):
                    fragment_mass = Formula(fragment_list[2]).mass + float(
                        fragment_list[3])
                    time_point = fragment_list[4]
                else:
                    fragment_mass = Formula(fragment_list[2]).mass + float(
                        fragment_list[4])
                    time_point = fragment_list[5]
                if expt in list(simulation_info["experiment_id"]):
                    fittedFragments[cnt] = {
                        "simulation_id":
                        simulation_id,
                        "simulation_dateAndTime":
                        simulation_dateAndTime,
                        "experiment_id":
                        expt,
                        "sample_name_abbreviation":
                        simulation_info["sample_name_abbreviation"][0],
                        "time_point":
                        time_point,
                        "fragment_id":
                        fragment_id,
                        "fragment_mass":
                        fragment_mass,
                        "fit_val":
                        flux[cnt],
                        "fit_stdev":
                        flux_stdev[cnt],
                        "fit_units":
                        units,
                        "fit_alf":
                        fit_alf[cnt],
                        "fit_cor":
                        None,
                        "fit_cov":
                        None,
                        "free":
                        free[cnt],
                        "used_":
                        True,
                        "comment_":
                        None,
                    }
                elif expt in list(simulation_info["sample_name_abbreviation"]):
                    fittedFragments[cnt] = {
                        "simulation_id": simulation_id,
                        "simulation_dateAndTime": simulation_dateAndTime,
                        "experiment_id": simulation_info["experiment_id"][0],
                        "sample_name_abbreviation": expt,
                        "time_point": time_point,
                        "fragment_id": fragment_id,
                        "fragment_mass": fragment_mass,
                        "fit_val": flux[cnt],
                        "fit_stdev": flux_stdev[cnt],
                        "fit_units": units,
                        "fit_alf": fit_alf[cnt],
                        "fit_cor": None,
                        "fit_cov": None,
                        "free": free[cnt],
                        "used_": True,
                        "comment_": None,
                    }
            else:
                print("type not recognized")
        fittedFluxes = pd.DataFrame.from_dict(fittedFluxes, "index")
        fittedFragments = pd.DataFrame.from_dict(fittedFragments, "index")
        return fittedFluxes, fittedFragments
Exemplo n.º 2
0
def _parse_json_sbml_cobra_model(cobra_model, model_id, date, model_file_name,
                                 filetype):
    """
    Helper function for parse_cobra_model(), parses reaction- and metabolite
    information out of an already loaded cobra model

    Parameters
    ----------
    cobra_model : cobra.Model
        Cobra metabolic model as loaded by the file type specific import
        function
    model_id : str
        Name of the model (for downstream reference)
    date : str
        Date of model processing (for downstream reference)
    model_file_name : str or path + str
        Filename or path to file + filename of the cobra metabolic model
    filetype : str
        Extension of the provided file

    Returns
    -------
    model_data : pandas.DataFrame
        General information about the processed metabolic model
    reaction_data : pandas.DataFrame
        Information about the reactions in the metabolic model
    metabolite_data : pandas.DataFrame
        Information about the metabolites in the metabolic model
    """
    # Pre-process the model file information
    with open(model_file_name, "r", encoding="utf-8") as f:
        model_file = f.read()
    # parse out model data
    model_data = pd.DataFrame(
        {
            "model_id": model_id,
            "date": date,
            "model_description": cobra_model.description,
            "model_file": model_file,
            "file_type": filetype,
        },
        index=[0],
    )
    # parse out reaction data
    reaction_data_temp = {}
    for cnt, r in enumerate(cobra_model.reactions):
        reaction_data_dict = {
            "model_id":
            model_id,
            "rxn_id":
            r.id,
            "rxn_name":
            r.name,
            "equation":
            r.build_reaction_string(),
            "subsystem":
            r.subsystem,
            "gpr":
            r.gene_reaction_rule,
            "genes": [g.id for g in r.genes],
            "reactants_stoichiometry":
            [r.get_coefficient(react.id) for react in r.reactants],
            "reactants_ids": [react.id for react in r.reactants],
            "products_stoichiometry":
            [r.get_coefficient(prod.id) for prod in r.products],
            "products_ids": [prod.id for prod in r.products],
            "lower_bound":
            r.lower_bound,
            "upper_bound":
            r.upper_bound,
            "objective_coefficient":
            r.objective_coefficient,
            "flux_units":
            "mmol*gDW-1*hr-1",
            "reversibility":
            r.reversibility,
            "used_":
            True,
        }
        reaction_data_temp[cnt] = reaction_data_dict
    reaction_data = pd.DataFrame.from_dict(reaction_data_temp, "index")
    # parse out metabolite data
    metabolite_data_tmp = {}
    for cnt, met in enumerate(cobra_model.metabolites):
        # Pre-process formulas using FIA-MS database methods
        if is_valid(met):
            formula = Formula(met.formula)
            formula = str(formula)
        else:
            formula = None
        # set up part of temp dict to transform into df later
        metabolite_data_dict = {
            "model_id": model_id,
            "met_name": met.name,
            "met_id": met.id,
            "formula": formula,
            "charge": met.charge,
            "compartment": met.compartment,
            "bound": met._bound,
            "annotations": met.annotation,
            "used_": True,
        }
        metabolite_data_tmp[cnt] = metabolite_data_dict
    metabolite_data = pd.DataFrame.from_dict(metabolite_data_tmp, "index")

    return model_data, reaction_data, metabolite_data
Exemplo n.º 3
0
    def sort_residual_info(self, f_mnt_res_info, simulation_info, fittedData):
        """
        Seperate the information from the original input, the
        "get_residuals_info(f)" function, the "extract_file_info(filename)"
        function and the "extract_base_stats(f, simulation_id, info)" function
        into appropriate rows

        Parameters
        ----------
        f_mnt_res_info : dict
            the output of the "get_residuals_info(f)" function
        simulation_info : pandas.DataFrame
            the MS fragment file corresponding to
            the simulation
        fittedData : pandas.DataFrame
            the output of the
            "extract_base_stats(f, simulation_id, info)" function

        Returns
        -------
        fittedMeasuredFluxResiduals : pandas.DataFrame
            info about the residuals of the
            fluxes used as an input for the simulation
        fittedMeasuredFragmentResiduals : pandas.DataFrame
            info about the residuals of
            the fragments in the MS data used as an input for
            the simulation
        """

        fittedMeasuredFluxResiduals = {}
        fittedMeasuredFragmentResiduals = {}

        expt_type = f_mnt_res_info["expt_type"]
        experiment_id = f_mnt_res_info["experiment_id"]
        time_point = f_mnt_res_info["time_point"]
        rxn_id = f_mnt_res_info["rxn_id"]
        res_data = f_mnt_res_info["res_data"]
        res_fit = f_mnt_res_info["res_fit"]
        res_peak = f_mnt_res_info["res_peak"]
        res_stdev = f_mnt_res_info["res_stdev"]
        res_val = f_mnt_res_info["res_val"]
        simulation_id = fittedData["simulation_id"].unique()[0]
        simulation_dateAndTime = fittedData["simulation_dateAndTime"].unique(
        )[0]

        for cnt, x_type in enumerate(expt_type):
            if x_type == "Flux":
                if experiment_id[cnt] in list(
                        simulation_info["experiment_id"]):
                    fittedMeasuredFluxResiduals[cnt] = {
                        "simulation_id":
                        simulation_id,
                        "simulation_dateAndTime":
                        simulation_dateAndTime,
                        "experiment_id":
                        experiment_id[cnt],
                        "sample_name_abbreviation":
                        simulation_info["sample_name_abbreviation"][0],
                        "time_point":
                        time_point[cnt],
                        "rxn_id":
                        rxn_id[cnt],
                        "res_data":
                        float(res_data[cnt]),
                        "res_fit":
                        float(res_fit[cnt]),
                        "res_peak":
                        res_peak[cnt],
                        "res_stdev":
                        float(res_stdev[cnt]),
                        "res_val":
                        float(res_val[cnt]),
                        "res_msens":
                        None,
                        "res_esens":
                        None,
                        "used_":
                        True,
                        "comment_":
                        None,
                    }
                elif experiment_id[cnt] in list(
                        simulation_info["sample_name_abbreviation"]):
                    fittedMeasuredFluxResiduals[cnt] = {
                        "simulation_id": simulation_id,
                        "simulation_dateAndTime": simulation_dateAndTime,
                        "experiment_id": simulation_info["experiment_id"][0],
                        "sample_name_abbreviation": experiment_id[cnt],
                        "time_point": time_point[cnt],
                        "rxn_id": rxn_id[cnt],
                        "res_data": float(res_data[cnt]),
                        "res_fit": float(res_fit[cnt]),
                        "res_peak": res_peak[cnt],
                        "res_stdev": float(res_stdev[cnt]),
                        "res_val": float(res_val[cnt]),
                        "res_msens": None,
                        "res_esens": None,
                        "used_": True,
                        "comment_": None,
                    }
            elif x_type == "MS":
                # parse the id into fragment_id and mass
                fragment_string = rxn_id[cnt]
                fragment_string = re.sub("_DASH_", "-", fragment_string)
                fragment_string = re.sub("_LPARANTHES_", "[(]",
                                         fragment_string)
                fragment_string = re.sub("_RPARANTHES_", "[)]",
                                         fragment_string)
                fragment_list = fragment_string.split("_")
                if not len(fragment_list) > 5 or not ("MRM" in fragment_list or
                                                      "EPI" in fragment_list):
                    fragment_id = "_".join(
                        [fragment_list[0], fragment_list[1], fragment_list[2]])
                    fragment_mass = Formula(fragment_list[2]).mass + float(
                        fragment_list[3])
                    time_point = fragment_list[4]
                else:
                    fragment_id = "_".join([
                        fragment_list[0],
                        fragment_list[1],
                        fragment_list[2],
                        fragment_list[3],
                    ])
                    fragment_mass = Formula(fragment_list[2]).mass + float(
                        fragment_list[4])
                    time_point = fragment_list[5]
                fragment_id = re.sub("-", "_DASH_", fragment_id)
                fragment_id = re.sub("[(]", "_LPARANTHES_", fragment_id)
                fragment_id = re.sub("[)]", "_RPARANTHES_", fragment_id)
                if experiment_id[cnt] in list(
                        simulation_info["experiment_id"]):
                    fittedMeasuredFragmentResiduals[cnt] = {
                        "simulation_id":
                        simulation_id,
                        "simulation_dateAndTime":
                        simulation_dateAndTime,
                        "experiment_id":
                        experiment_id[cnt],
                        "sample_name_abbreviation":
                        simulation_info["sample_name_abbreviation"][0],
                        "time_point":
                        time_point,
                        "fragment_id":
                        fragment_id,
                        "fragment_mass":
                        fragment_mass,
                        "res_data":
                        float(res_data[cnt]),
                        "res_fit":
                        float(res_fit[cnt]),
                        "res_peak":
                        res_peak[cnt],
                        "res_stdev":
                        float(res_stdev[cnt]),
                        "res_val":
                        float(res_val[cnt]),
                        "res_msens":
                        None,
                        "res_esens":
                        None,
                        "used_":
                        True,
                        "comment_":
                        None,
                    }
                elif experiment_id[cnt] in list(
                        simulation_info["sample_name_abbreviation"]):
                    fittedMeasuredFragmentResiduals[cnt] = {
                        "simulation_id": simulation_id,
                        "simulation_dateAndTime": simulation_dateAndTime,
                        "experiment_id": simulation_info["experiment_id"][0],
                        "sample_name_abbreviation": experiment_id[cnt],
                        "time_point": time_point[cnt],
                        "fragment_id": fragment_id,
                        "fragment_mass": fragment_mass,
                        "res_data": float(res_data[cnt]),
                        "res_fit": float(res_fit[cnt]),
                        "res_peak": res_peak[cnt],
                        "res_stdev": float(res_stdev[cnt]),
                        "res_val": float(res_val[cnt]),
                        "res_msens": None,
                        "res_esens": None,
                        "used_": True,
                        "comment_": None,
                    }
            else:
                print("type not recognized")
        fittedMeasuredFluxResiduals = pd.DataFrame.from_dict(
            fittedMeasuredFluxResiduals, "index")
        fittedMeasuredFragmentResiduals = pd.DataFrame.from_dict(
            fittedMeasuredFragmentResiduals, "index")
        return fittedMeasuredFluxResiduals, fittedMeasuredFragmentResiduals