def test_McCleskey_data(): # Check the CAS lookups for CAS in cond_data_McCleskey.index: assert pubchem_db.search_CAS(CAS).CASs == CAS # Check the formula lookups for CAS, formula in zip(cond_data_McCleskey.index, cond_data_McCleskey['formula']): assert CAS_from_any(formula) == CAS
def test_CRC_ion_conductivities(): # Check CASs match up for i in CRC_ion_conductivities.index: assert CAS_from_any(i) # Check search by formula matches up for formula, CAS in zip(CRC_ion_conductivities['Formula'].tolist(), CRC_ion_conductivities.index): assert pubchem_db.search_CAS(CAS_from_any(formula)).CASs == CAS
def test_CRC_aqueous_thermodynamics(): assert all([check_CAS(i) for i in CRC_aqueous_thermodynamics.index]) # Check CASs match up assert all( [CAS_from_any(i) == i for i in CRC_aqueous_thermodynamics.index]) # Check search by formula matches up for formula, CAS in zip(CRC_aqueous_thermodynamics['Formula'], CRC_aqueous_thermodynamics.index): assert pubchem_db.search_CAS(CAS_from_any(formula)).CASs == CAS # Check the MWs match up for CAS, MW_specified in zip(CRC_aqueous_thermodynamics.index, CRC_aqueous_thermodynamics['MW']): c = pubchem_db.search_CAS(CAS) assert_close(c.MW, MW_specified, atol=0.05) # Checking names is an option too but of 173, only 162 are unique # and many of the others have names that seem ambiguous for ions which can # have more than one charge assert CRC_aqueous_thermodynamics.index.is_unique assert CRC_aqueous_thermodynamics.shape == (173, 7) Hf_tot = CRC_aqueous_thermodynamics['Hf(aq)'].abs().sum() assert_close(Hf_tot, 70592500.0) Gf_tot = CRC_aqueous_thermodynamics['Gf(aq)'].abs().sum() assert_close(Gf_tot, 80924000.0) S_tot = CRC_aqueous_thermodynamics['S(aq)'].abs().sum() assert_close(S_tot, 17389.9) Cp_tot = CRC_aqueous_thermodynamics['Cp(aq)'].abs().sum() assert_close(Cp_tot, 2111.5)
def test_db_vs_ChemSep(): """The CAS numbers are checked, as are most of the chemical formulas. Some chemical structural formulas aren't supported by the current formula parser and are ignored; otherwise it is a very effective test. DO NOT TRY TO OPTimizE THis FUNCTION - IT HAS ALREADY BEEN TRIED AND FAILED AT. THE TIME IS ONLY TAKEN py the PARSE function. EVEN THAT HAS BEEN REDUCED By 80% by using cElementTree instead of ElementTree. """ import xml.etree.cElementTree as ET folder = os.path.join(os.path.dirname(__file__), 'Data') tree = ET.parse(os.path.join(folder, 'chemsep1.xml')) root = tree.getroot() data = {} for child in root: CAS, name, smiles, formula = None, None, None, None for i in child: tag = i.tag if CAS is None and tag == 'CAS': CAS = i.attrib['value'] elif name is None and tag == 'CompoundID': name = i.attrib['value'] elif smiles is None and tag == 'Smiles': smiles = i.attrib['value'] elif formula is None and tag == 'StructureFormula': formula = i.attrib['value'] # CAS = [i.attrib['value'] if ][0] # name = [i.attrib['value'] for i in child if i.tag ][0] # smiles = [i.attrib['value'] for i in child if i.tag == ] # formula = [i.attrib['value'] for i in child if i.tag == 'StructureFormula'][0] try: if '-' in formula: formula = None else: formula = serialize_formula(formula) except: pass if smiles: smiles = smiles[0] else: smiles = None data[CAS] = {'name': name, 'smiles': smiles, 'formula': formula} for CAS, d in data.items(): hit = pubchem_db.search_CAS(CAS) assert hit.CASs == CAS for CAS, d in data.items(): assert CAS_from_any(CAS) == CAS for CAS, d in data.items(): f = d['formula'] if f is None or f == '1,4-COOH(C6H4)COOH' or d['name'] == 'Air': continue assert pubchem_db.search_CAS(CAS).formula == f
def test_dissociation_reactions(): # Check there's only one dissociation reaction for each product assert len(df['Electrolyte Formula']) == len( set(df['Electrolyte Formula'].values.tolist())) # Check the chemicals match up with the database for name, CAS, formula in zip(df['Electrolyte name'], df['Electrolyte CAS'], df['Electrolyte Formula']): assert CAS_from_any(CAS) == CAS assert pubchem_db.search_CAS(CAS).formula == serialize_formula(formula) # Check the anions match up with the database for formula, CAS, charge in zip(df['Anion formula'], df['Anion CAS'], df['Anion charge']): assert CAS_from_any(CAS) == CAS assert CAS_from_any(formula) == CAS hit = pubchem_db.search_CAS(CAS) assert hit.charge == charge assert hit.formula == serialize_formula(formula) # Check the cations match up with the database for formula, CAS, charge in zip(df['Cation formula'], df['Cation CAS'], df['Cation charge']): assert CAS_from_any(CAS) == CAS assert CAS_from_any(formula) == CAS hit = pubchem_db.search_CAS(CAS) assert hit.charge == charge assert hit.formula == serialize_formula(formula) # Check the charges and counts of ions sums to zero for an_charge, an_count, cat_charge, cat_count in zip( df['Anion charge'].tolist(), df['Anion count'].tolist(), df['Cation charge'].tolist(), df['Cation count'].tolist()): # for index, row in df.iterrows(): # an_charge = row['Anion charge'] # an_count = row['Anion count'] # cat_charge = row['Cation charge'] # cat_count = row['Cation count'] err = an_charge * an_count + cat_charge * cat_count assert err == 0 # Check the reactant counts and product counts sum to be equal and conserve # moles #for index, row in df.iterrows(): for elec, cat, cat_count, an, an_count in zip( df['Electrolyte Formula'].tolist(), df['Cation formula'].tolist(), df['Cation count'].tolist(), df['Anion formula'].tolist(), df['Anion count'].tolist()): elec = nested_formula_parser(elec) #elec = nested_formula_parser(row['Electrolyte Formula']) cat = nested_formula_parser(cat) #cat = nested_formula_parser(row['Cation formula']) #cat_count = row['Cation count'] an = nested_formula_parser(an) #an = nested_formula_parser(row['Anion formula']) #an_count = row['Anion count'] product_counter = Counter() for _ in range(cat_count): product_counter.update(cat) for _ in range(an_count): product_counter.update(an) assert dict(product_counter.items()) == elec