def get_dgf_priors(mi: MaudInput) -> Tuple[pd.Series, pd.DataFrame]: """Given a Maud input, get a multivariate prior from equilibrator. Returns a pandas Series of prior means and a pandas DataFrame of covariances. Both are indexed by metabolite ids. :param mi: A MaudInput object """ cc = ComponentContribution() mu = [] sigmas_fin = [] sigmas_inf = [] external_ids = {m.id: m.inchi_key for m in mi.kinetic_model.metabolites} met_ix = pd.Index(mi.stan_coords.metabolites, name="metabolite") for met_id in met_ix: external_id = external_ids[met_id] if external_id is None: raise ValueError(f"metabolite {met_id} has no external id.") c = cc.get_compound(external_id) if isinstance(c, Compound): mu_c, sigma_fin_c, sigma_inf_c = cc.standard_dg_formation(c) mu.append(mu_c) sigmas_fin.append(sigma_fin_c) sigmas_inf.append(sigma_inf_c) else: raise ValueError(f"cannot find compound for metabolite {met_id}" f" with external id {external_id}.") sigmas_fin = np.array(sigmas_fin) sigmas_inf = np.array(sigmas_inf) cov = sigmas_fin @ sigmas_fin.T + 1e6 * sigmas_inf @ sigmas_inf.T return ( pd.Series(mu, index=met_ix, name="prior_mean_dgf").round(10), pd.DataFrame(cov, index=met_ix, columns=met_ix).round(10), )
def search_equilibrator_compound( cc: ComponentContribution, id: str = None, inchikey: str = None, inchi: str = None, smiles: str = None, logger: Logger = getLogger(__name__) ) -> Dict[str, str]: def copy_data( compound, data: Dict, overwrite: bool = False, ) -> Dict: # ...copy initial data into result compound _compound = deepcopy(data) # fill with eQuilibrator data for empty fields for k, v in _compound.items(): if ( overwrite or v is None or v == '' ): _compound[k] = getattr(compound, k) # keep the key known by eQuilibrator _compound['cc_key'] = key # keep the value known by eQuilibrator _compound[key] = val return _compound data = { 'id': id, 'inchi_key': inchikey, 'inchi': inchi, 'smiles': smiles } for key, val in data.items(): if val: compound = cc.get_compound(val) # If compound is found in eQuilibrator, then... if compound is not None: # ...copy initial data into result compound _compound = copy_data(compound, data) return _compound if inchikey: # In last resort, try to search only with the first part of inchikey compounds = cc.search_compound_by_inchi_key( # first part of inchikey inchikey.split('-')[0] ) # eQuilibrator returns a list of compounds if compounds: # first compound in the list, hope it is sorted by decrease relevance _compound = copy_data(compounds[0], data, overwrite=True) # make inchi_key the ID key _compound['cc_key'] = 'inchi_key' return _compound return {}
def get_dgf_priors(mi: MaudInput) -> Tuple[pd.Series, pd.DataFrame]: """Given a Maud input, get a multivariate prior from equilibrator. Returns a pandas Series of prior means and a pandas DataFrame of covariances. Both are indexed by metabolite ids. :param mi: A MaudInput object """ cc = ComponentContribution() mu = [] sigmas_fin = [] sigmas_inf = [] met_ix = pd.Index(mi.stan_coords.metabolites, name="metabolite") met_order = [m.id for m in mi.kinetic_model.metabolites] for m in mi.kinetic_model.metabolites: external_id = m.id if m.inchi_key is None else m.inchi_key c = cc.get_compound(external_id) if isinstance(c, Compound): mu_c, sigma_fin_c, sigma_inf_c = cc.standard_dg_formation(c) mu_c += c.transform( cc.p_h, cc.ionic_strength, cc.temperature, cc.p_mg ).m_as("kJ/mol") mu.append(mu_c) sigmas_fin.append(sigma_fin_c) sigmas_inf.append(sigma_inf_c) else: raise ValueError( f"cannot find compound for metabolite {m.id}" f" with external id {external_id}." "\nConsider setting the field metabolite_inchi_key" " if you haven't already." ) sigmas_fin = np.array(sigmas_fin) sigmas_inf = np.array(sigmas_inf) cov = sigmas_fin @ sigmas_fin.T + 1e6 * sigmas_inf @ sigmas_inf.T cov = ( pd.DataFrame(cov, index=met_order, columns=met_order) .loc[met_ix, met_ix] .round(10) ) mu = ( pd.Series(mu, index=met_order, name="prior_mean_dgf") .loc[met_ix] .round(10) ) return mu, cov
def runThermo( pathway: rpPathway, cc: ComponentContribution=None, ph: float=DEFAULT_pH, ionic_strength: float=DEFAULT_ionic_strength, pMg: float=DEFAULT_pMg, compound_substitutes: Dict = None, logger: Logger = getLogger(__name__) ) -> Dict: """Given a tar input file, perform thermodynamics analysis for each rpSBML file. :param inFile: The path to the input file :param outFile: The path to the output file :param pathway_id: The id of the heterologous pathway of interest :param ph: The pH of the host organism (Default: 7.5) :param ionic_strength: Ionic strenght of the host organism (Default: 0.25M) :param pMg: The pMg of the host organism (Default: 3.0) :param temp_k: The temperature of the host organism in Kelvin (Default: 298.15) :param stdev_factor: The standard deviation factor to calculate MDF (Default: 1.96) :type pathway: Dict :type pathway_id: str :type ph: float :type ionic_strength: float :type pMg: float :type temp_k: float :type logger: Logger :rtype: Dict :return: Pathway updated with thermodynalics values """ print_title( txt='Pathway Reactions', logger=logger, waiting=False ) for rxn in pathway.get_list_of_reactions(): print_reaction( rxn=rxn, logger=logger ) ## INTERMEDIATE COMPOUNDS # Optimise the production of target # and remove (if possible) intermediate compounds reactions = remove_compounds( compounds=pathway.get_intermediate_species(), reactions=pathway.get_list_of_reactions(), rxn_target_id=pathway.get_target_rxn_id(), logger=logger ) ## eQuilibrator if cc is None: cc = initThermo( ph, ionic_strength, pMg, logger ) # Search for the key ID known by eQuilibrator cc_species = {} substituted_species = {} sep = '__64__' if compound_substitutes is None: compound_substitutes = read_compound_substitutes( os_path.join( os_path.dirname(os_path.realpath(__file__)), 'data', 'compound_substitutes.csv' ) ) for spe in pathway.get_species(): spe_split = spe.get_id().split(sep) if len(spe_split) > 1: _compound_substitutes = {k+sep+spe_split[1]: v for k, v in compound_substitutes.items()} else: _compound_substitutes = deepcopy(compound_substitutes) # If the specie is listed in substitutes file, then take search values from it # Check if starts with in case of compound names are like CMPD_NAME__64__COMPID if spe.get_id() in _compound_substitutes: cc_species[spe.get_id()] = search_equilibrator_compound( cc=cc, id=_compound_substitutes[spe.get_id()]['id'], inchikey=_compound_substitutes[spe.get_id()]['inchikey'], inchi=_compound_substitutes[spe.get_id()]['inchi'], logger=logger ) # Else, take search values from rpCompound else: cc_species[spe.get_id()] = search_equilibrator_compound( cc=cc, id=spe.get_id(), inchikey=spe.get_inchikey(), inchi=spe.get_inchi(), smiles=spe.get_smiles(), logger=logger ) if cc_species[spe.get_id()] != {}: if spe.get_id() != cc_species[spe.get_id()]['id']: substituted_species[spe.get_id()] = cc_species[spe.get_id()][cc_species[spe.get_id()]['cc_key']] else: logger.warning(f'Compound {spe.get_id()} has not been found within eQuilibrator cache') # Store thermo values for the net reactions # and for each of the reactions within the pathway results = { 'net_reaction': {}, 'optimized_net_reaction': Reaction.sum_stoichio(reactions), 'reactions': {}, 'optimized_reactions': { rxn.get_id(): rxn for rxn in reactions }, 'species': {}, 'substituted_species': substituted_species } # Get the formation energy for each compound for spe_id, cc_spe in cc_species.items(): try: value = cc.standard_dg_formation( cc.get_compound( cc_spe[cc_spe['cc_key']] ) )[0] # get .mu except Exception as e: value = None logger.debug(e) if value is None: value = 'NaN' results['species'][spe_id] = { 'standard_dg_formation': { 'value': value, 'units': 'kilojoule / mole' } } # Build the list of IDs known by eQuilibrator species_cc_ids = {} for spe_id, cc_spe in cc_species.items(): if cc_spe == {}: species_cc_ids[spe_id] = spe_id else: species_cc_ids[spe_id] = cc_spe[cc_spe['cc_key']] ## REACTIONS # Compute thermo for each reaction for rxn in pathway.get_list_of_reactions(): results['reactions'][rxn.get_id()] = eQuilibrator( species_stoichio=rxn.get_species(), species_ids=species_cc_ids, cc=cc, logger=logger ) ## THERMO print_title( txt='Computing thermodynamics (eQuilibrator)...', logger=logger, waiting=True ) results['net_reaction'] = eQuilibrator( species_stoichio=Reaction.sum_stoichio(reactions), species_ids=species_cc_ids, cc=cc, logger=logger ) print_OK(logger) # Write results into the pathway write_results_to_pathway(pathway, results, logger) return results