def i_NOD(self, i): if i == None: if self.degradability in ('Readily', 'Slowly') or self.formula in ('H3N', 'NH4', 'NH3', 'NH4+'): i = self.i_N * molecular_weight({'O':4}) / molecular_weight({'N':1}) elif self.formula in ('NO2-', 'HNO2'): i = self.i_N * molecular_weight({'O':1}) / molecular_weight({'N':1}) else: i = 0. self._i_NOD = check_return_property('i_NOD', i)
def get_bmp_stoichiometry(component, **replace): r''' Compute the theoretical biochemical methane potential (BMP) in mol :math:`CH_4`/mol chemical of a given component as in: .. math:: C_vH_wO_xN_yS_z + \frac{4v-w-2x+3y+2z}{2}H2O -> \frac{4v+w-2x-3y-2z}{8}CH4 + \frac{(4v-w+2x+3y+2z)}{8}CO2 + yNH_3 + zH_2S For organic components, elements other than "C", "H", "O", "N", "S", and "P" will be turned into "Ash" with a molecular weight of 1 g/mol. For inorganic components, all dict values will be 0. Parameters ---------- component : obj The component whose COD will be calculated. replace : dict Alternative IDs of the reactant/product components, e.g., if S_O2 is the ID of dissolved oxygen instead of O2, then can pass replace={'O2': 'S_O2'}. Examples -------- >>> from qsdsan import Component >>> from qsdsan.utils import get_bmp_stoichiometry >>> Glucose = Component('Glucose', organic=True, particle_size='Soluble', ... degradability='Readily') >>> get_bmp_stoichiometry(Glucose) {'Glucose': -1.0, 'H2O': -0.0, 'CH4': 3.0, 'CO2': 3.0, 'NH3': 0.0, 'H2S': 0.0} ''' cmp_ID = component.ID atoms = component.atoms keys = (cmp_ID, 'H2O', 'CH4', 'CO2', 'NH3', 'H2S') dct = dict.fromkeys(keys, 0.) if atoms and component.organic and component.formula != 'CH4': nC, nH, nO, nN, nS, nP = get_CHONSP(atoms) dct[cmp_ID] = -1. dct['H2O'] = -(nC - 0.25 * nH - 0.5 * nO + 0.75 * nN + 0.5 * nS) dct['CH4'] = 0.5 * nC + 0.125 * nH - 0.25 * nO - 0.375 * nN - 0.25 * nS dct['CO2'] = 0.5 * nC - 0.125 * nH + 0.25 * nO + 0.375 * nN + 0.25 * nS dct['NH3'] = nN dct['H2S'] = nS bmp_atoms = {i: atoms.get(i, 0) for i in dichromate_oxidizing_elements} MW = component.MW or molecular_weight(atoms) Ash = MW - molecular_weight(bmp_atoms) if Ash / MW > 0.0001: dct['Ash'] = Ash for old_ID, new_ID in replace.items(): dct[new_ID] = dct.pop(old_ID) return dct
def i_COD(self, i): if i is not None: self._i_COD = check_return_property('i_COD', i) else: if self.organic or self.formula in ('H2', 'O2', 'N2', 'NO2-', 'NO3-'): if self.measured_as == 'COD': self._i_COD = 1. elif not self.atoms: raise AttributeError(f"Must specify `i_COD` for organic component {self.ID}, " f"which is not measured as COD and has no formula.") else: chem_MW = molecular_weight(self.atoms) chem_charge = charge_from_formula(self.formula) if self.formula in ('O2', 'N2', 'NO2-', 'NO3-'): cod = electron_acceptor_cod(self.atoms, chem_charge) * molecular_weight({'O':2}) else: Cr2O7 = - cod_test_stoichiometry(self.atoms, chem_charge)['Cr2O7-2'] cod = Cr2O7 * 1.5 * molecular_weight({'O':2}) self._i_COD = check_return_property('i_COD', cod/chem_MW * self.i_mass) else: self._i_COD = 0.
def i_charge(self, i): self._i_charge = check_return_property('i_charge', i) if not self._i_charge: if self.formula: charge = charge_from_formula(self.formula) chem_MW = molecular_weight(self.atoms) i = charge/chem_MW * self.i_mass self._i_charge = check_return_property('i_charge', i) else: self._i_charge = 0.
def measured_as(self, measured_as): ''' When measured_as is set to a different value, all i_{} values will be automatically updated. ''' if measured_as: if measured_as == 'COD': self._MW = molecular_weight({'O':2}) elif measured_as in self.atoms or 'i_'+measured_as in _num_component_properties: self._MW = molecular_weight({measured_as:1}) else: raise AttributeError(f"Component {self.ID} must be measured as " f"either COD or one of its constituent atoms, " f"if not as itself.") if self._measured_as != measured_as: self._convert_i_attr(measured_as) self._measured_as = measured_as
def i_mass(self, i): if self.atoms: if i: raise AttributeError(f'Component {self.ID} has formula, i_mass ' f'is calculated, cannot be set.') else: if self.measured_as in self.atoms: i = 1/get_mass_frac(self.atoms)[self.measured_as] elif self.measured_as == 'COD': chem_MW = molecular_weight(self.atoms) chem_charge = charge_from_formula(self.formula) Cr2O7 = - cod_test_stoichiometry(self.atoms, chem_charge)['Cr2O7-2'] cod = Cr2O7 * 1.5 * molecular_weight({'O':2}) i = chem_MW/cod elif self.measured_as: raise AttributeError(f'Must specify i_mass for component {self.ID} ' f'measured as {self.measured_as}.') if self.measured_as == None: if i and i != 1: raise AttributeError(f'Component {self.ID} is measured as itself, ' f'i_mass cannot be set to values other than 1.') i = 1 self._i_mass = check_return_property('i_mass', i)
def test_organic_user_db(): db = ChemicalMetadataDB(elements=False, main_db=None, user_dbs=[ os.path.join( folder, 'chemical identifiers example user db.tsv') ]) for CAS, d in db.CAS_index.items(): assert CAS_from_any(d.CASs) == d.CASs # Check something was loaded assert len(db.CAS_index) > 100 # Check smiles are unique / can lookup by smiles for smi, d in db.smiles_index.items(): if not smi: continue assert CAS_from_any('smiles=' + smi) == d.CASs # Check formula is formatted right assert all([ i.formula == serialize_formula(i.formula) for i in db.CAS_index.values() ]) # Check CAS validity assert all([check_CAS(i.CASs) for i in db.CAS_index.values()]) # MW checker for i in db.CAS_index.values(): formula = serialize_formula(i.formula) atoms = nested_formula_parser(formula, check=False) mw_calc = molecular_weight(atoms) assert_allclose(mw_calc, i.MW, atol=0.05) for CAS, d in db.CAS_index.items(): assert CAS_from_any('InChI=1S/' + d.InChI) == int_to_CAS(CAS) for CAS, d in db.CAS_index.items(): assert CAS_from_any('InChIKey=' + d.InChI_key) == int_to_CAS(CAS) # Test the pubchem ids which aren't -1 for CAS, d in db.CAS_index.items(): if d.pubchemid != -1: assert CAS_from_any('PubChem=' + str(d.pubchemid)) == int_to_CAS(CAS) CAS_lenth = len(db.CAS_index) assert CAS_lenth == len(db.smiles_index) assert CAS_lenth == len(db.InChI_index) assert CAS_lenth == len(db.InChI_key_index)
def test_component(): import thermosteam as tmo from qsdsan import Component, Components from chemicals.elements import molecular_weight from math import isclose S_NH4 = Component('S_NH4', formula='NH4+', measured_as='N', f_BOD5_COD=0, f_uBOD_COD=0, f_Vmass_Totmass=0, description="Ammonium", particle_size="Soluble", degradability="Undegradable", organic=False) assert S_NH4.i_N == 1 assert S_NH4.i_NOD == molecular_weight({'O':4})/molecular_weight({'N':1}) S_NH4.measured_as = None assert S_NH4.i_mass == 1 S_Ac = Component('S_Ac', formula='CH3COO-', measured_as='COD', f_BOD5_COD=0.717, f_uBOD_COD=0.863, f_Vmass_Totmass=1, description="Acetate", particle_size="Soluble", degradability="Readily", organic=True) assert S_Ac.i_COD == 1 S_Ac.measured_as = None assert S_Ac.i_mass == 1 assert S_Ac.i_COD == molecular_weight({'O':4})/molecular_weight({'C':2, 'H':3, 'O':2}) S_HS = Component.from_chemical('S_HS', tmo.Chemical('Hydrosulfide'), particle_size="Soluble", degradability="Undegradable", organic=False) assert S_HS.i_charge < 0 S_HS.measured_as = 'S' assert S_HS.i_mass > 1 components = Components.load_default(default_compile=False) #!!! Should we allow None for particle_size, degradability, and organic? # with pytest.raises(AssertionError): # H2O = Component.from_chemical('H2O', tmo.Chemical('H2O')) H2O = Component.from_chemical('H2O', tmo.Chemical('H2O'), particle_size='Soluble', degradability='Undegradable', organic=False) with pytest.raises(ValueError): components.append(H2O) components = Components.load_default() assert components.S_H2.measured_as == 'COD' assert components.S_H2.i_COD == 1 assert isclose(components.S_N2.i_COD, - molecular_weight({'O':1.5})/molecular_weight({'N':1}), rel_tol=1e-3) assert isclose(components.S_NO3.i_COD, - molecular_weight({'O':4})/molecular_weight({'N':1}), rel_tol=1e-3) tmo.settings.set_thermo(components)
def compute_stream_COD(stream, units='mg/L'): r''' Compute the chemical oxygen demand (COD) of a given stream by summing the COD of each component in the stream using: .. math:: COD [\frac{kg}{m^3}] = mol_{component} [\frac{kmol}{m^3}] * \frac{g O_2}{mol\ component} ''' try: COD = stream.COD except AttributeError: # not a WasteStream cmps = stream.components mol = stream.mol iCOD = np.array([-get_cod_stoichiometry(i)['O2'] for i in cmps]) COD = (mol * iCOD).sum() * molecular_weight({'O': 2}) / stream.F_vol return auom('mg/L').convert(COD, units)
def test_inorganic_db(): db = ChemicalMetadataDB( elements=False, main_db=None, user_dbs=[os.path.join(folder, 'Inorganic db.tsv')]) # Check CAS lookup for CAS, d in db.CAS_index.items(): assert CAS_from_any(d.CASs) == d.CASs # Try ro check formula lookups for formula, d in db.formula_index.items(): if formula in set(['H2MgO2', 'F2N2']): # Formulas which are not unique by design continue assert CAS_from_any(formula) == d.CASs # Check smiles are unique / can lookup by smiles for smi, d in db.smiles_index.items(): if not smi: continue assert CAS_from_any('smiles=' + smi) == d.CASs # Check formula is formatted right assert all([ i.formula == serialize_formula(i.formula) for i in db.CAS_index.values() ]) # Check CAS validity assert all([check_CAS(i.CASs) for i in db.CAS_index.values()]) # MW checker for i in db.CAS_index.values(): formula = serialize_formula(i.formula) atoms = nested_formula_parser(formula, check=False) mw_calc = molecular_weight(atoms) assert_allclose(mw_calc, i.MW, atol=0.05)
def test_component(): import pytest from qsdsan import Chemical, Component, Components, set_thermo, \ _waste_stream as ws_module from chemicals.elements import molecular_weight from math import isclose S_NH4 = Component('S_NH4', formula='NH4+', measured_as='N', f_BOD5_COD=0, f_uBOD_COD=0, f_Vmass_Totmass=0, description="Ammonium", particle_size="Soluble", degradability="Undegradable", organic=False) assert S_NH4.i_N == 1 assert S_NH4.i_NOD == molecular_weight({'O': 4}) / molecular_weight( {'N': 1}) S_NH4.measured_as = None assert S_NH4.i_mass == 1 S_Ac = Component('S_Ac', formula='CH3COO-', measured_as='COD', f_BOD5_COD=0.717, f_uBOD_COD=0.863, f_Vmass_Totmass=1, description="Acetate", particle_size="Soluble", degradability="Readily", organic=True) assert S_Ac.i_COD == 1 S_Ac.measured_as = None assert S_Ac.i_mass == 1 assert S_Ac.i_COD == molecular_weight({'O': 4}) / molecular_weight({ 'C': 2, 'H': 3, 'O': 2 }) S_HS = Component.from_chemical('S_HS', Chemical('Hydrosulfide'), particle_size="Soluble", degradability="Undegradable", organic=False) assert S_HS.i_charge < 0 S_HS.measured_as = 'S' assert S_HS.i_mass > 1 # Check default components cmps1 = Components.load_default(default_compile=False) H2O_chemical = Chemical('H2O') H2O = Component.from_chemical('H2O', H2O_chemical) with pytest.raises(ValueError): # H2O already in default components cmps1.append(H2O) with pytest.raises( RuntimeError): # key chemical-related properties missing cmps1.compile() # Can compile with default-filling those missing properties cmps1.default_compile(lock_state_at='', particulate_ref='NaCl') cmps2 = Components((cmp for cmp in cmps1 if cmp.ID != 'H2O')) H2O = Component.from_chemical('H2O', Chemical('H2O'), particle_size='Soluble', degradability='Undegradable', organic=False) cmps2.append(H2O) cmps2.default_compile(lock_state_at='', particulate_ref='NaCl') cmps3 = Components.load_default() assert cmps3.S_H2.measured_as == 'COD' assert cmps3.S_H2.i_COD == 1 assert isclose(cmps3.S_N2.i_COD, -molecular_weight({'O': 1.5}) / molecular_weight({'N': 1}), rel_tol=1e-3) assert isclose(cmps3.S_NO3.i_COD, -molecular_weight({'O': 4}) / molecular_weight({'N': 1}), rel_tol=1e-3) set_thermo(cmps3) # Check if the default groups are up-to-date cached_cmp_IDs = ws_module._default_cmp_IDs cached_cmp_groups = ws_module._specific_groups assert set(cmps3.IDs) == cached_cmp_IDs get_IDs = lambda attr: {cmp.ID for cmp in getattr(cmps3, attr)} for attr, IDs in cached_cmp_groups.items(): assert IDs == get_IDs(attr)
def cod_test_stoichiometry(atoms, charge=0, MW=None, missing_handling='elemental'): r''' Return a dictionary of stoichiometric coefficients of the oxidation reaction by dichromate, given a dictionary of a molecule's constituent atoms and their counts, as well as the number of negative charge, if any. This function is based on the oxidation of organic materials by dichromate in an acid solution, as in a typical COD test of water or wastewater samples. Only C, H, O, N, S, P are considered active in the reaction. Parameters ---------- atoms : dict[str, int or float] Dictionary of atoms and their counts, [-]. charge : int or float Charge of the molecule. MW : float, optional Molecular weight of chemical, used only if `missing_handling` is 'Ash', [g/mol] missing_handling : str, optional How to handle compounds which do not appear in the stoichiometric reaction below. If 'elemental', return those atoms in the monatomic state; if 'ash', converts all missing attoms to 'Ash' in the output at a `MW` of 1 g/mol, [-] Returns ------- stoichiometry : dict[str, float] Stoichiometric coefficients of the redox reaction. May inlcude the following keys for complete oxidation: 'H2O', 'CO2', 'NH4+', 'SO4-2', 'PO4-3'; if `missing_handling` is 'elemental' can include the other elements; if `missing_handling` is 'ash', Ash will be present in the output if the compounds whose reactions are not included here. 'Cr2O7-2' is always present, with negative values indicating dichromate is required/consumed. [-] .. note:: The stoichiometry is given by: .. math:: C_n H_a O_b N_c S_d P_e^{f-} + xCr_2O_7^{2-} + (8x+c-2d-3e+f)H^{+} -> nCO_2 + 2xCr^{3+} + cNH_4^{+} + dSO_4^{2-} + ePO_4^{3-} + (b+7x-2n-4d-4e)H_2O .. math:: x = \frac{4n+a-2b-3c+6d+5e+f}{6} Also included in the results is the moles of Cr2O7-2 required per mole of the mixture of the molecule. All products are in aqueous solution. Atoms not in ['C', 'H', 'N', 'O', 'S', 'P'] are returned as pure species; i.e. sodium hydroxide produces water and pure Na. Examples -------- >>> # Acetate in COD test: >>> cod_test_stoichiometry({'C': 2, 'H':3, 'O':2}, -1) {'Cr2O7-2': -1.3333333333333333, 'H+': -11.666666666666666, 'Cr+3': 2.6666666666666665, 'CO2': 2, 'H2O': 7.333333333333332} ''' products = {} nC, nH, nO, nN, nS, nP = get_CHONSP(atoms) ne = -charge if nC <= 0 or nH <= 0: if not (len(atoms) == 1 and nH == 2): # H2 return {'Cr2O7-2': 0.} nCO2 = nC nNH4 = nN nSO4 = nS nPO4 = nP nCr2O7 = -(4 * nC + nH - 2 * nO - 3 * nN + 6 * nS + 5 * nP + ne) / 6 nCr = -2 * nCr2O7 nH2O = nO - 7 * nCr2O7 - 2 * nC - 4 * nS - 4 * nP n_proton = 8 * nCr2O7 - nN + 2 * nS + 3 * nP - ne if nCr2O7 != 0.0: products['Cr2O7-2'] = nCr2O7 if n_proton != 0.0: products['H+'] = n_proton if nCr != 0.0: products['Cr+3'] = nCr if nCO2 != 0.0: products['CO2'] = nCO2 if nSO4 != 0.0: products['SO4-2'] = nSO4 if nNH4 != 0.0: products['NH4+'] = nNH4 if nPO4 != 0.0: products['PO4-3'] = nPO4 if nH2O != 0.0: products['H2O'] = nH2O missing_handling = missing_handling.lower() if missing_handling == 'elemental': for atom, value in atoms.items(): if atom not in dichromate_oxidizing_elements_set: products[atom] = value elif missing_handling == 'ash': cod_atoms = {i: atoms.get(i, 0) for i in dichromate_oxidizing_elements} MW = MW or molecular_weight(atoms) Ash = MW - molecular_weight(cod_atoms) if Ash / MW > 0.0001: products['Ash'] = Ash else: raise ValueError( "Allowed values for `missing_handling` are 'elemental' and 'ash'.") return products
def get_digestion_rxns(components, X_biogas, X_growth, biomass_ID, biodegradability=1.): ''' Generate anaerobic digestion (AD) and biomass growth reactions for a given set of components. AD stoichiometry is based on :func:`qsdsan.utils.get_bmp_stoichiometry` and biodegradabilities of the components as indicated in `biodegradability`. Biomass growth is purely based on mass balance, thus can potentially result in loss of atom balance. No reactions will be generated for inorganic components. Parameters ---------- components : Iterable(obj) Set of components. X_biogas : float Fraction of the organic components that is used for AD. X_growth : float Fraction of the organic components that is used for biomass growth. biomass_ID : str ID of the biomass (should be included in the `components`). biodegradability : float or dict Biodegradabilities of the components. When given as a float, all organic components will be assumed to have the same biodegradability; when given as a dict, the keys should be the IDs of components and values the corresponding biodegradabilities, components without corresponding biodegradabilities will be assumed unbiodegradable. Examples -------- >>> from qsdsan import Component, Components, set_thermo >>> from qsdsan.utils import load_example_cmps, get_digestion_rxns >>> example_cmps = load_example_cmps() >>> NH3 = Component('NH3', phase='g', organic=False, particle_size='Dissolved gas', ... degradability='Undegradable') >>> H2S = Component('H2S', phase='g', organic=False, particle_size='Dissolved gas', ... degradability='Undegradable') >>> P4O10 = Component('P4O10', phase='s', ... organic=False, particle_size='Particulate', ... degradability='Undegradable') >>> Biomass = Component('Biomass', phase='s', formula='CH1.8O0.5N0.2', ... organic=True, particle_size='Particulate', ... degradability='Slowly') >>> Ash = Component('Ash', phase='s', MW=1, ... organic=False, particle_size='Particulate', ... degradability='Undegradable') >>> for i in (P4O10, Biomass, Ash): ... i.copy_models_from(example_cmps.NaCl, ('V',)) >>> for i in (Biomass, Ash): i.default() # doctest:+ELLIPSIS {... >>> cmps = Components([*example_cmps, NH3, H2S, P4O10, Biomass, Ash]) >>> cmps.compile() >>> set_thermo(cmps) >>> cmps CompiledComponents([H2O, CO2, N2O, NaCl, H2SO4, CH4, Methanol, Ethanol, NH3, H2S, P4O10, Biomass, Ash]) >>> rxns = get_digestion_rxns(cmps, X_biogas=0.9, X_growth=0.07, ... biomass_ID='Biomass', biodegradability=0.87) >>> rxns # doctest: +SKIP ParallelReaction (by mol): index stoichiometry reactant X[%] [0] Methanol -> 0.5 H2O + 0.25 CO2 + 0.75 CH4 Methanol 78.30 [1] Ethanol -> 0.5 CO2 + 1.5 CH4 Ethanol 78.30 [2] Methanol -> 1.3 Biomass Methanol 6.09 [3] Ethanol -> 1.87 Biomass Ethanol 6.09 ''' biomass = getattr(components, biomass_ID) biomass_MW = biomass.MW or molecular_weight(biomass.atoms) BD = dict.fromkeys(components.IDs, biodegradability) if isinstance(biodegradability, float) \ else biodegradability if X_biogas + X_growth > 1: raise ValueError('Sum of `X_biogas`/`X_decomp` and `X_biogas` is ' f'{X_biogas+X_growth}, larger than 100%.') biogas_rxns, growth_rxns = [], [] for i in components: ID = i.ID if ID == biomass_ID: continue X = BD.get(i.ID) if not X: continue # assume no entry means not biodegradable biogas_stoyk = get_bmp_stoichiometry(i) if not biogas_stoyk.get(i.ID): # no conversion of this chemical continue iX_biogas = X * X_biogas # the amount of component used for biogas production iX_growth = X * X_growth # the amount of component used for cell growth if iX_biogas: biogas_rxn = Rxn(reaction=biogas_stoyk, reactant=ID, X=iX_biogas, check_atomic_balance=True) biogas_rxns.append(biogas_rxn) if iX_growth: growth_rxn = Rxn(f'{i.ID} -> {i.MW/biomass_MW}{biomass_ID}', reactant=i.ID, X=iX_growth, check_atomic_balance=False) growth_rxns.append(growth_rxn) if len(biogas_rxns) + len(growth_rxns) > 1: return PRxn(biogas_rxns + growth_rxns) return []
def get_cod_stoichiometry(component, aqueous=False, **replace): r''' Get the molar stoichiometry for the theoretical chemical oxygen demand (COD) of a given component. COD stoichiometry is consistent with :func:`qsdsan.utils.cod_test_stoichiometry` other than the oxidant is O2 rather than Cr2O7-2, For organic components, elements other than "C", "H", "O", "N", "S", and "P" will be turned into "Ash" with a molecular weight of 1 g/mol. For inorganic components, all dict values will be 0. If `aqueous` == False, the stoichiometry is given by: .. math:: C_nH_aO_bN_cS_dP_e + \frac{2n+0.5a-b-1.5c+3d+2.5e}{2}O_2 -> nCO_2 + \frac{a-3c-2d}{2}H_2O + cNH_3 + dH_2SO_4 + \frac{e}{4}P_4O_{10} otherwise: .. math:: C_nH_aO_bN_cS_dP_e + \frac{2n+0.5a-b-1.5c+3d+2.5e}{2}O_2 + (c-2d-3e)H^+ -> nCO_2 + \frac{a-3c-2d-3e}{2}H_2O + cNH_4^+ + dSO_4^{2-} + ePO_4^{3-} Parameters ---------- component : obj The component whose COD will be calculated. aqueous : bool Whether the reaction will happen in aqueous phase. replace : dict Alternative IDs of the reactant/product components, e.g., if S_O2 is the ID of dissolved oxygen instead of O2, then can pass replace={'O2': 'S_O2'}. Examples -------- >>> from qsdsan import Component >>> from qsdsan.utils import get_cod_stoichiometry >>> Glucose = Component('Glucose', organic=True, particle_size='Soluble', ... degradability='Readily') >>> get_cod_stoichiometry(Glucose) {'Glucose': -1.0, 'O2': -6.0, 'CO2': 6, 'H2O': 6.0, 'NH3': 0.0, 'H2SO4': 0.0, 'P4O10': 0.0} ''' cmp_ID = component.ID atoms = component.atoms keys = (cmp_ID, 'O2', 'CO2', 'H2O', 'NH3', 'H2SO4', 'P4O10') if not aqueous \ else (cmp_ID, 'O2', 'H+', 'CO2', 'H2O', 'NH4+', 'SO42-', 'PO43-') dct = dict.fromkeys(keys, 0.) if atoms and component.organic: nC, nH, nO, nN, nS, nP = get_CHONSP(atoms) dct[cmp_ID] = -1. dct['O2'] = -(nC + 0.25 * nH - 0.5 * nO - 0.75 * nN + 1.5 * nS + 1.25 * nP) dct['CO2'] = nC if not aqueous: dct['H2O'] = 0.5 * nH - 1.5 * nN - nS dct['NH3'] = nN dct['H2SO4'] = nS dct['P4O10'] = 0.25 * nP else: dct['H+'] = -(nN - 2 * nS - 3 * nP) dct['H2O'] = 0.5 * nH - 1.5 * nN - nS - 1.5 * nP dct['NH4+'] = nN dct['SO42-'] = nS dct['PO43-'] = nP cod_atoms = {i: atoms.get(i, 0) for i in dichromate_oxidizing_elements} MW = component.MW or molecular_weight(atoms) Ash = MW - molecular_weight(cod_atoms) if Ash / MW > 0.0001: dct['Ash'] = Ash for old_ID, new_ID in replace.items(): dct[new_ID] = dct.pop(old_ID) return dct
def process(init_data, use_cache=True): ''' Examples -------- >>> res = process({'CAS': '10170-69-1', 'synonyms': ['14267-36-8', 'NSC 22319'], 'name': 'Manganese, decacarbonyldi-, (Mn-Mn)'}) >>> res['inchi'], res['smiles'], res['cid'], res['CAS'] ('InChI=1S/10CO.2Mn/c10*1-2;;', '[C-]#[O+].[C-]#[O+].[C-]#[O+].[C-]#[O+].[C-]#[O+].[C-]#[O+].[C-]#[O+].[C-]#[O+].[C-]#[O+].[C-]#[O+].[Mn].[Mn]', 517769, '10170-69-1') ''' # print(locals()) init_data = init_data.copy() cc = cc_CAS = cc_name = cc_inchi = cc_inchikey = cc_smiles = cc_synonyms = cc_deprecated_CASs = None if 'CAS' in init_data: try: cc = common_chemistry_data(init_data['CAS']) cc_CAS, cc_name, cc_inchi, cc_inchikey, cc_smiles, cc_synonyms, cc_deprecated_CASs = cc except ValueError: # Compund is not in common chemistry; this is OK pass cid = iupac_name = p_MW = p_inchi = p_inchikey = p_smiles = p_formula = p_synonyms = None if init_data.get('mol', None) is not None: # If not in common chemistry or no InChi there, but if we have a mol file, get the inchi and inchikey for the # pubchem lookup mol = Chem.MolFromMolFile(init_data['mol']) if mol is not None: init_data['inchi'] = MolToInchi(mol) init_data['inchikey'] = InchiToInchiKey(init_data['inchi']) can_search_pubchem = (init_data.get('pubchem') is not None or init_data.get('CASRN', cc_CAS) is not None or init_data.get('inchi', cc_inchi) is not None or init_data.get('inchikey', cc_inchikey) is not None or init_data.get('smiles', cc_smiles) is not None) if can_search_pubchem: try: p = find_pubchem_from_ids( pubchem=init_data.get('pubchem'), CASRN=init_data.get('CASRN', cc_CAS), inchi=init_data.get('inchi', cc_inchi), inchikey=init_data.get('inchikey', cc_inchikey), smiles=init_data.get('smiles', cc_smiles), use_cache=use_cache) except Exception as e: p = None print(e, 'exception') if p is not None: cid, iupac_name, p_MW, p_inchi, p_inchikey, p_smiles, p_formula, p_synonyms = p # print(locals()) mol = None # Be aware some smiles descriptions are wrong # Start with user overridding if 'mol' in init_data: mol = Chem.MolFromMolFile(init_data['mol']) if mol is None and 'smiles' in init_data: mol = Chem.MolFromSmiles(init_data['smiles']) if mol is None and 'inchi' in init_data: mol = MolFromInchi( init_data['inchi']) if init_data['inchi'].startswith( "InChI=1S/") else MolFromInchi("InChI=1S/" + init_data['inchi']) # Trust common chemistry next if mol is None and cc_smiles is not None: mol = Chem.MolFromSmiles(cc_smiles) if mol is None and cc_inchi is not None: mol = MolFromInchi(cc_inchi) if cc_inchi.startswith( "InChI=1S/") else MolFromInchi("InChI=1S/" + cc_inchi) # Did we pull up the structure from pubchem?? if mol is None and p_smiles is not None: mol = Chem.MolFromSmiles(p_smiles) if mol is None and p_inchi is not None: mol = MolFromInchi(p_inchi) if p_inchi.startswith( "InChI=1S/") else MolFromInchi("InChI=1S/" + p_inchi) if mol is None: raise ValueError("No structure found") smiles = Chem.MolToSmiles(mol, True) inchi = MolToInchi(mol) inchikey = InchiToInchiKey(inchi) #MW = Descriptors.ExactMolWt(mol) formula = CalcMolFormula(mol, True, True) formula = serialize_formula(formula) MW = molecular_weight(nested_formula_parser(formula)) # print(inchi, cc_inchi, p_inchi) # print(inchikey, cc_inchikey, p_inchikey) # print(smiles, cc_smiles, p_smiles) # output values if 'pubchem' in init_data: cid = init_data['pubchem'] elif cid is None: cid = -1 if cc_CAS is not None: CAS = cc_CAS elif 'CAS' in init_data: CAS = init_data['CAS'] else: raise ValueError("CAS culd not be found") if 'formula' in init_data: # Override rdkit formula = init_data['formula'] if 'MW' in init_data: # Override rdkit MW = init_data['MW'] if 'smiles' in init_data: smiles = init_data['smiles'] if 'inchi' in init_data: inchi = init_data['inchi'] if 'inchikey' in init_data: inchikey = init_data['inchikey'] if inchikey == '*' or smiles == '*' or inchi == '*': raise ValueError("Failure in rdkit") # Do we have a name specified in the settings? if 'name' in init_data: name = init_data['name'] elif cc_name is not None: name = cc_name elif iupac_name is not None: name = iupac_name else: raise ValueError("There is no name for this compound") synonyms = [] if cc_synonyms is not None: synonyms += cc_synonyms if cc_deprecated_CASs is not None: synonyms += cc_deprecated_CASs if p_synonyms is not None: synonyms += p_synonyms if 'synonyms' in init_data: synonyms += init_data['synonyms'] synonyms = list(set(synonyms)) if name in synonyms: synonyms.remove(name) if synonyms: def key_sort_str(s): return len(s), s.lower() synonyms = sorted(synonyms, key=key_sort_str) # synonyms = natsorted(synonyms) # synonyms = [] return { 'cid': cid, 'CAS': CAS, 'formula': formula, 'MW': MW, 'smiles': smiles, 'inchi': inchi, 'inchikey': inchikey, 'name': name, 'synonyms': synonyms }