def make_13CEnsemble(self,formula_str_I):
        '''Make formula for m + 0 to m + # carbons'''
        # input:
        #       formula_str_I = string of formula
        # output:
        #       mass_ensemble_O = ensemble of distributions
        formula_str = re.sub('[+-]', '', formula_str_I) # remove '-' or '+' 

        formula = Formula(formula_str);
        mass_ensemble_O = {};
        if 'C' in formula._elements:
            nC = formula._elements['C'][0]; # count the number of carbon;
            for c in range(nC+1):
                tmp = Formula(formula_str);
                if c==0:tmp._elements['C'] = {0:nC-c};
                elif nC-c==0:tmp._elements['C'] = {13:c};
                else:tmp._elements['C'] = {0:nC-c, 13:c};
                mass_ensemble_O[c] = Formula(tmp.formula);
            return mass_ensemble_O;
        else: 
            nC = 0;
            mass_ensemble_O = {0:formula};
            return mass_ensemble_O
    def execute_scheduledMRMPro_quant(self,met_ids_I):
        '''generate the MRMs for each compound for the scheduled MRM pro acquisition method'''
        # input: 
        #       met_ids_I = [{'met_id': , 'precursor_formula':, 'product_formula':},]
        # ouptput:
        #       dictionary of MRMs
        
        mrms_O = [];
        # loop over each met_id
        for met in met_ids_I:
            # query transition from the tuning method
            trans = self.get_row_MSComponents_metIDAndFormula(met['met_id'],met['precursor_formula'],met['product_formula'],'tuning');
            transUC13 = trans.copy();
            # make component_name, group_name
            trans['component_name'] = met['met_id'] + '.' + met['met_id'] + '_' + str(trans['quantifier']) + '.Light';
            trans['ms_group'] = met['met_id'];
            trans['ms_methodtype'] = 'quantification';

            # make UC13 component_name, group_name
            transUC13['component_name'] = met['met_id'] + '.' + met['met_id'] + '_' + str(trans['quantifier']) + '.Heavy';
            transUC13['met_ID'] = met['met_id'] + '-UC13';
            transUC13['met_name'] = transUC13['met_name'] + '-UC13';
            transUC13['ms_group'] = met['met_id'] + '-UC13';
            transUC13['ms_methodtype'] = 'quantification';
                
            # make UC13 equivalent: q1/q3_mass, precursor/product_formula, precursor/product_exactmass
            if trans['precursor_formula']:
                trans_precursor_formula = Formula(re.sub('[+-]', '', trans['precursor_formula'])) # remove '-' or '+'
                trans['precursor_formula'] = trans_precursor_formula.formula + trans['ms_mode'];
                trans['precursor_exactmass'] = trans_precursor_formula.isotope.mass;
                if 'C' in list(trans_precursor_formula._elements.keys()):
                    nC = trans_precursor_formula._elements['C'][0];
                    tmp = Formula(trans_precursor_formula.formula);
                    tmp._elements['C'] = {13:nC};
                    transUC13_precursor_formula = Formula(tmp.formula);
                else:
                    transUC13_precursor_formula = trans_precursor_formula;
                transUC13['precursor_formula'] = transUC13_precursor_formula.formula + trans['ms_mode'];
                transUC13['precursor_exactmass'] = transUC13_precursor_formula.isotope.mass;
                # substitute for algorithm that checks for unique q1_masses
                # therefore, must ensure that each q1_mass/q3_mass is unique for a given mode in ms_components
                transUC13['q1_mass'] = trans['q1_mass'] + transUC13_precursor_formula.isotope.mass - trans_precursor_formula.isotope.mass;

            if trans['product_formula']:
                trans_product_formula = Formula(re.sub('[+-]', '', trans['product_formula'])) # remove '-' or '+'
                trans['product_formula'] = trans_product_formula.formula + trans['ms_mode'];
                trans['product_exactmass'] = trans_product_formula.isotope.mass;
                if 'C' in list(trans_product_formula._elements.keys()):
                    nC = trans_product_formula._elements['C'][0];
                    tmp = Formula(trans_product_formula.formula);
                    tmp._elements['C'] = {13:nC};
                    transUC13_product_formula = Formula(tmp.formula);
                else:
                    transUC13_product_formula = trans_product_formula;
                transUC13['product_formula'] = transUC13_product_formula.formula + trans['ms_mode'];
                transUC13['product_exactmass'] = transUC13_product_formula.isotope.mass;
                # substitute for algorithm that checks for unique q1_masses
                # therefore, must ensure that each q1_mass/q3_mass is unique for a given mode in ms_components
                transUC13['q3_mass'] = trans['q3_mass'] + transUC13_product_formula.isotope.mass - trans_product_formula.isotope.mass;
            # set defaults: window = 120 sec, dwell = 1, priority, ms_include = False
            mrms_O.append(trans);
            mrms_O.append(transUC13);
        
        self.add_MSComponents(mrms_O);
    def _parse_model_sbml(self,model_id_I,date_I,filename_I):
        # Read in the sbml file and define the model conditions
        cobra_model = create_cobra_model_from_sbml_file(filename_I, print_time=True)
        model_data = [];
        model_data_tmp = {};
        # parse out model metadata
        model_data_tmp['model_id'] = model_id_I;
        model_data_tmp['model_name'] = None;
        model_data_tmp['date'] = date_I;
        model_data_tmp['model_description'] = cobra_model.description;
        with open(filename_I, 'r') as f:
            model_data_tmp['model_file'] = f.read();
        model_data_tmp['file_type'] = 'sbml'
        model_data.append(model_data_tmp)
        reaction_data = [];
        # parse out reaction data
        for r in cobra_model.reactions:
            reaction_data_dict = {};
            reaction_data_dict['model_id'] = model_id_I
            reaction_data_dict['rxn_id'] = r.id
            reaction_data_dict['rxn_name'] = r.name
            reaction_data_dict['equation'] = r.build_reaction_string()
            reaction_data_dict['subsystem'] = r.subsystem
            reaction_data_dict['gpr'] = r.gene_reaction_rule
            reaction_data_dict['genes'] = []
            genes = r.genes;
            for g in genes:
                reaction_data_dict['genes'].append(g.id);
            reaction_data_dict['reactants_stoichiometry'] = [];
            reaction_data_dict['reactants_ids'] = [];
            reactants = r.reactants;
            for react in reactants:
                reaction_data_dict['reactants_stoichiometry'].append(r.get_coefficient(react.id));
                reaction_data_dict['reactants_ids'].append(react.id);
            reaction_data_dict['products_stoichiometry'] = [];
            reaction_data_dict['products_ids'] = [];
            products = r.products;
            for prod in products:
                reaction_data_dict['products_stoichiometry'].append(r.get_coefficient(prod.id));
                reaction_data_dict['products_ids'].append(prod.id);
            reaction_data_dict['lower_bound'] = r.lower_bound
            reaction_data_dict['upper_bound'] = r.upper_bound
            reaction_data_dict['objective_coefficient'] = r.objective_coefficient
            reaction_data_dict['flux_units'] = 'mmol*gDW-1*hr-1'
            reaction_data_dict['reversibility'] = r.reversibility
            #reaction_data_dict['reactants_stoichiometry_tracked'] = None;
            #reaction_data_dict['products_stoichiometry_tracked'] = None;
            #reaction_data_dict['reactants_ids_tracked'] = None;
            #reaction_data_dict['products_ids_tracked'] = None;
            #reaction_data_dict['reactants_mapping'] = None;
            #reaction_data_dict['products_mapping'] = None;
            #reaction_data_dict['rxn_equation'] = None;
            reaction_data_dict['fixed'] = None;
            reaction_data_dict['free'] = None;
            reaction_data_dict['weight'] = None;
            reaction_data_dict['used_'] = True
            reaction_data_dict['comment_'] = None;
            reaction_data.append(reaction_data_dict);
        metabolite_data = [];
        # parse out metabolite data
        for met in cobra_model.metabolites:
            metabolite_data_tmp = {};
            metabolite_data_tmp['model_id'] = model_id_I
            metabolite_data_tmp['met_name'] = met.name;
            metabolite_data_tmp['met_id'] = met.id;
            formula = {};
            for k,v in met.formula.elements.items():
                formula[k] = {0:v};
            tmp = Formula()
            tmp._elements=formula
            metabolite_data_tmp['formula'] = tmp.formula;
            metabolite_data_tmp['charge'] = met.charge
            metabolite_data_tmp['compartment'] = met.compartment
            metabolite_data_tmp['bound'] = met._bound
            metabolite_data_tmp['constraint_sense'] = met._constraint_sense
            #metabolite_data_tmp['met_elements'] = None;
            #metabolite_data_tmp['met_atompositions'] = None;
            #metabolite_data_tmp['balanced'] = None;
            #metabolite_data_tmp['met_symmetry'] = None;
            #metabolite_data_tmp['met_symmetry_atompositions'] = None;
            metabolite_data_tmp['used_'] = True
            metabolite_data_tmp['comment_'] = None;
            metabolite_data.append(metabolite_data_tmp);

        return model_data,reaction_data,metabolite_data
Пример #4
0
def _parse_json_sbml_cobra_model(cobra_model, model_id, date, model_file_name,
                                 filetype):
    """
    Helper function for parse_cobra_model(), parses reaction- and metabolite
    information out of an already loaded cobra model

    Parameters
    ----------
    cobra_model : cobra.Model
        Cobra metabolic model as loaded by the file type specific import
        function
    model_id : str
        Name of the model (for downstream reference)
    date : str
        Date of model processing (for downstream reference)
    model_file_name : str or path + str
        Filename or path to file + filename of the cobra metabolic model
    filetype : str
        Extension of the provided file

    Returns
    -------
    model_data : pandas.DataFrame
        General information about the processed metabolic model
    reaction_data : pandas.DataFrame
        Information about the reactions in the metabolic model
    metabolite_data : pandas.DataFrame
        Information about the metabolites in the metabolic model
    """
    # Pre-process the model file information
    with open(model_file_name, "r", encoding="utf-8") as f:
        model_file = f.read()
    # parse out model data
    model_data = pd.DataFrame(
        {
            "model_id": model_id,
            "date": date,
            "model_description": cobra_model.description,
            "model_file": model_file,
            "file_type": filetype,
        },
        index=[0],
    )
    # parse out reaction data
    reaction_data_temp = {}
    for cnt, r in enumerate(cobra_model.reactions):
        reaction_data_dict = {
            "model_id":
            model_id,
            "rxn_id":
            r.id,
            "rxn_name":
            r.name,
            "equation":
            r.build_reaction_string(),
            "subsystem":
            r.subsystem,
            "gpr":
            r.gene_reaction_rule,
            "genes": [g.id for g in r.genes],
            "reactants_stoichiometry":
            [r.get_coefficient(react.id) for react in r.reactants],
            "reactants_ids": [react.id for react in r.reactants],
            "products_stoichiometry":
            [r.get_coefficient(prod.id) for prod in r.products],
            "products_ids": [prod.id for prod in r.products],
            "lower_bound":
            r.lower_bound,
            "upper_bound":
            r.upper_bound,
            "objective_coefficient":
            r.objective_coefficient,
            "flux_units":
            "mmol*gDW-1*hr-1",
            "reversibility":
            r.reversibility,
            "used_":
            True,
        }
        reaction_data_temp[cnt] = reaction_data_dict
    reaction_data = pd.DataFrame.from_dict(reaction_data_temp, "index")
    # parse out metabolite data
    metabolite_data_tmp = {}
    for cnt, met in enumerate(cobra_model.metabolites):
        # Pre-process formulas using FIA-MS database methods
        if is_valid(met):
            formula = Formula(met.formula)
            formula = str(formula)
        else:
            formula = None
        # set up part of temp dict to transform into df later
        metabolite_data_dict = {
            "model_id": model_id,
            "met_name": met.name,
            "met_id": met.id,
            "formula": formula,
            "charge": met.charge,
            "compartment": met.compartment,
            "bound": met._bound,
            "annotations": met.annotation,
            "used_": True,
        }
        metabolite_data_tmp[cnt] = metabolite_data_dict
    metabolite_data = pd.DataFrame.from_dict(metabolite_data_tmp, "index")

    return model_data, reaction_data, metabolite_data
Пример #5
0
    def sort_parameter_info(self, f_par_info, simulation_info, fittedData):
        """
        Seperate the information from the original input, the
        "get_fitted_parameters(f, simulation_info)"
        function, the "extract_file_info(filename)" function and the
        "extract_base_stats(f, simulation_id, info)" function
        into appropriate rows

        Parameters
        ----------
        f_par_info : dict
            output of the
            get_fitted_parameters(f, simulation_info) function
        simulation_info : pandas.DataFrame
            the MS fragment file corresponding to
            the simulation
        fittedData : pandas.DataFrame
            the output of the
            "extract_base_stats(f, simulation_id, info)" function

        Returns
        -------
        fittedFluxes : pandas.DataFrame
            info about the parameters of the fluxes used
            as an input for the simulation
        fittedFragments : pandas.DataFrame
            info about the parameters of the MS data
            used as an input for the simulation
        """
        fittedFluxes = {}
        fittedFragments = {}

        rxn_id = f_par_info["rxn_id"]
        flux = f_par_info["flux"]
        flux_stdev = f_par_info["flux_stdev"]
        par_type = f_par_info["par_type"]
        flux_lb = f_par_info["flux_lb"]
        flux_ub = f_par_info["flux_ub"]
        flux_units = f_par_info["flux_units"]
        fit_alf = f_par_info["fit_alf"]
        free = f_par_info["free"]
        # f_par_chi2s = f_par_info["fit_chi2s"]
        # f_par_cor = f_par_info["fit_cor"]
        # f_par_cov = f_par_info["fit_cov"]

        simulation_id = fittedData["simulation_id"].unique()[0]
        simulation_dateAndTime = fittedData["simulation_dateAndTime"].unique(
        )[0]

        for cnt, p_type in enumerate(par_type):
            if p_type == "Net flux":
                fittedFluxes[cnt] = {
                    "simulation_id": simulation_id,
                    "simulation_dateAndTime": simulation_dateAndTime,
                    "rxn_id": rxn_id[cnt],
                    "flux": flux[cnt],
                    "flux_stdev": flux_stdev[cnt],
                    "flux_lb": flux_lb[cnt],
                    "flux_ub": flux_ub[cnt],
                    "flux_units": flux_units[cnt],
                    "fit_alf": fit_alf[cnt],
                    "fit_chi2s": None,
                    "fit_cor": None,
                    "fit_cov": None,
                    "free": free[cnt],
                    "used_": True,
                    "comment_": None,
                }
            elif p_type == "Norm":
                # parse the id
                id_list = rxn_id[cnt].split(" ")
                expt = id_list[0]
                fragment_id = id_list[1]
                fragment_string = id_list[2]
                units = id_list[3]
                # parse the id into fragment_id and mass
                fragment_string = re.sub("_DASH_", "-", fragment_string)
                fragment_string = re.sub("_LPARANTHES_", "[(]",
                                         fragment_string)
                fragment_string = re.sub("_RPARANTHES_", "[)]",
                                         fragment_string)
                fragment_list = fragment_string.split("_")
                if not len(fragment_list) > 5 or not ("MRM" in fragment_list or
                                                      "EPI" in fragment_list):
                    fragment_mass = Formula(fragment_list[2]).mass + float(
                        fragment_list[3])
                    time_point = fragment_list[4]
                else:
                    fragment_mass = Formula(fragment_list[2]).mass + float(
                        fragment_list[4])
                    time_point = fragment_list[5]
                if expt in list(simulation_info["experiment_id"]):
                    fittedFragments[cnt] = {
                        "simulation_id":
                        simulation_id,
                        "simulation_dateAndTime":
                        simulation_dateAndTime,
                        "experiment_id":
                        expt,
                        "sample_name_abbreviation":
                        simulation_info["sample_name_abbreviation"][0],
                        "time_point":
                        time_point,
                        "fragment_id":
                        fragment_id,
                        "fragment_mass":
                        fragment_mass,
                        "fit_val":
                        flux[cnt],
                        "fit_stdev":
                        flux_stdev[cnt],
                        "fit_units":
                        units,
                        "fit_alf":
                        fit_alf[cnt],
                        "fit_cor":
                        None,
                        "fit_cov":
                        None,
                        "free":
                        free[cnt],
                        "used_":
                        True,
                        "comment_":
                        None,
                    }
                elif expt in list(simulation_info["sample_name_abbreviation"]):
                    fittedFragments[cnt] = {
                        "simulation_id": simulation_id,
                        "simulation_dateAndTime": simulation_dateAndTime,
                        "experiment_id": simulation_info["experiment_id"][0],
                        "sample_name_abbreviation": expt,
                        "time_point": time_point,
                        "fragment_id": fragment_id,
                        "fragment_mass": fragment_mass,
                        "fit_val": flux[cnt],
                        "fit_stdev": flux_stdev[cnt],
                        "fit_units": units,
                        "fit_alf": fit_alf[cnt],
                        "fit_cor": None,
                        "fit_cov": None,
                        "free": free[cnt],
                        "used_": True,
                        "comment_": None,
                    }
            else:
                print("type not recognized")
        fittedFluxes = pd.DataFrame.from_dict(fittedFluxes, "index")
        fittedFragments = pd.DataFrame.from_dict(fittedFragments, "index")
        return fittedFluxes, fittedFragments
Пример #6
0
    def sort_residual_info(self, f_mnt_res_info, simulation_info, fittedData):
        """
        Seperate the information from the original input, the
        "get_residuals_info(f)" function, the "extract_file_info(filename)"
        function and the "extract_base_stats(f, simulation_id, info)" function
        into appropriate rows

        Parameters
        ----------
        f_mnt_res_info : dict
            the output of the "get_residuals_info(f)" function
        simulation_info : pandas.DataFrame
            the MS fragment file corresponding to
            the simulation
        fittedData : pandas.DataFrame
            the output of the
            "extract_base_stats(f, simulation_id, info)" function

        Returns
        -------
        fittedMeasuredFluxResiduals : pandas.DataFrame
            info about the residuals of the
            fluxes used as an input for the simulation
        fittedMeasuredFragmentResiduals : pandas.DataFrame
            info about the residuals of
            the fragments in the MS data used as an input for
            the simulation
        """

        fittedMeasuredFluxResiduals = {}
        fittedMeasuredFragmentResiduals = {}

        expt_type = f_mnt_res_info["expt_type"]
        experiment_id = f_mnt_res_info["experiment_id"]
        time_point = f_mnt_res_info["time_point"]
        rxn_id = f_mnt_res_info["rxn_id"]
        res_data = f_mnt_res_info["res_data"]
        res_fit = f_mnt_res_info["res_fit"]
        res_peak = f_mnt_res_info["res_peak"]
        res_stdev = f_mnt_res_info["res_stdev"]
        res_val = f_mnt_res_info["res_val"]
        simulation_id = fittedData["simulation_id"].unique()[0]
        simulation_dateAndTime = fittedData["simulation_dateAndTime"].unique(
        )[0]

        for cnt, x_type in enumerate(expt_type):
            if x_type == "Flux":
                if experiment_id[cnt] in list(
                        simulation_info["experiment_id"]):
                    fittedMeasuredFluxResiduals[cnt] = {
                        "simulation_id":
                        simulation_id,
                        "simulation_dateAndTime":
                        simulation_dateAndTime,
                        "experiment_id":
                        experiment_id[cnt],
                        "sample_name_abbreviation":
                        simulation_info["sample_name_abbreviation"][0],
                        "time_point":
                        time_point[cnt],
                        "rxn_id":
                        rxn_id[cnt],
                        "res_data":
                        float(res_data[cnt]),
                        "res_fit":
                        float(res_fit[cnt]),
                        "res_peak":
                        res_peak[cnt],
                        "res_stdev":
                        float(res_stdev[cnt]),
                        "res_val":
                        float(res_val[cnt]),
                        "res_msens":
                        None,
                        "res_esens":
                        None,
                        "used_":
                        True,
                        "comment_":
                        None,
                    }
                elif experiment_id[cnt] in list(
                        simulation_info["sample_name_abbreviation"]):
                    fittedMeasuredFluxResiduals[cnt] = {
                        "simulation_id": simulation_id,
                        "simulation_dateAndTime": simulation_dateAndTime,
                        "experiment_id": simulation_info["experiment_id"][0],
                        "sample_name_abbreviation": experiment_id[cnt],
                        "time_point": time_point[cnt],
                        "rxn_id": rxn_id[cnt],
                        "res_data": float(res_data[cnt]),
                        "res_fit": float(res_fit[cnt]),
                        "res_peak": res_peak[cnt],
                        "res_stdev": float(res_stdev[cnt]),
                        "res_val": float(res_val[cnt]),
                        "res_msens": None,
                        "res_esens": None,
                        "used_": True,
                        "comment_": None,
                    }
            elif x_type == "MS":
                # parse the id into fragment_id and mass
                fragment_string = rxn_id[cnt]
                fragment_string = re.sub("_DASH_", "-", fragment_string)
                fragment_string = re.sub("_LPARANTHES_", "[(]",
                                         fragment_string)
                fragment_string = re.sub("_RPARANTHES_", "[)]",
                                         fragment_string)
                fragment_list = fragment_string.split("_")
                if not len(fragment_list) > 5 or not ("MRM" in fragment_list or
                                                      "EPI" in fragment_list):
                    fragment_id = "_".join(
                        [fragment_list[0], fragment_list[1], fragment_list[2]])
                    fragment_mass = Formula(fragment_list[2]).mass + float(
                        fragment_list[3])
                    time_point = fragment_list[4]
                else:
                    fragment_id = "_".join([
                        fragment_list[0],
                        fragment_list[1],
                        fragment_list[2],
                        fragment_list[3],
                    ])
                    fragment_mass = Formula(fragment_list[2]).mass + float(
                        fragment_list[4])
                    time_point = fragment_list[5]
                fragment_id = re.sub("-", "_DASH_", fragment_id)
                fragment_id = re.sub("[(]", "_LPARANTHES_", fragment_id)
                fragment_id = re.sub("[)]", "_RPARANTHES_", fragment_id)
                if experiment_id[cnt] in list(
                        simulation_info["experiment_id"]):
                    fittedMeasuredFragmentResiduals[cnt] = {
                        "simulation_id":
                        simulation_id,
                        "simulation_dateAndTime":
                        simulation_dateAndTime,
                        "experiment_id":
                        experiment_id[cnt],
                        "sample_name_abbreviation":
                        simulation_info["sample_name_abbreviation"][0],
                        "time_point":
                        time_point,
                        "fragment_id":
                        fragment_id,
                        "fragment_mass":
                        fragment_mass,
                        "res_data":
                        float(res_data[cnt]),
                        "res_fit":
                        float(res_fit[cnt]),
                        "res_peak":
                        res_peak[cnt],
                        "res_stdev":
                        float(res_stdev[cnt]),
                        "res_val":
                        float(res_val[cnt]),
                        "res_msens":
                        None,
                        "res_esens":
                        None,
                        "used_":
                        True,
                        "comment_":
                        None,
                    }
                elif experiment_id[cnt] in list(
                        simulation_info["sample_name_abbreviation"]):
                    fittedMeasuredFragmentResiduals[cnt] = {
                        "simulation_id": simulation_id,
                        "simulation_dateAndTime": simulation_dateAndTime,
                        "experiment_id": simulation_info["experiment_id"][0],
                        "sample_name_abbreviation": experiment_id[cnt],
                        "time_point": time_point[cnt],
                        "fragment_id": fragment_id,
                        "fragment_mass": fragment_mass,
                        "res_data": float(res_data[cnt]),
                        "res_fit": float(res_fit[cnt]),
                        "res_peak": res_peak[cnt],
                        "res_stdev": float(res_stdev[cnt]),
                        "res_val": float(res_val[cnt]),
                        "res_msens": None,
                        "res_esens": None,
                        "used_": True,
                        "comment_": None,
                    }
            else:
                print("type not recognized")
        fittedMeasuredFluxResiduals = pd.DataFrame.from_dict(
            fittedMeasuredFluxResiduals, "index")
        fittedMeasuredFragmentResiduals = pd.DataFrame.from_dict(
            fittedMeasuredFragmentResiduals, "index")
        return fittedMeasuredFluxResiduals, fittedMeasuredFragmentResiduals