示例#1
0
def test_McCleskey_data():
    # Check the CAS lookups
    for CAS in cond_data_McCleskey.index:
        assert pubchem_db.search_CAS(CAS).CASs == CAS

    # Check the formula lookups
    for CAS, formula in zip(cond_data_McCleskey.index, cond_data_McCleskey['formula']):
        assert CAS_from_any(formula) == CAS
示例#2
0
def test_CRC_ion_conductivities():
    # Check CASs match up
    for i in CRC_ion_conductivities.index:
        assert CAS_from_any(i)

    # Check search by formula matches up
    for formula, CAS in zip(CRC_ion_conductivities['Formula'].tolist(), CRC_ion_conductivities.index):
        assert pubchem_db.search_CAS(CAS_from_any(formula)).CASs == CAS
示例#3
0
def test_CRC_aqueous_thermodynamics():
    assert all([check_CAS(i) for i in CRC_aqueous_thermodynamics.index])

    # Check CASs match up
    assert all(
        [CAS_from_any(i) == i for i in CRC_aqueous_thermodynamics.index])

    # Check search by formula matches up
    for formula, CAS in zip(CRC_aqueous_thermodynamics['Formula'],
                            CRC_aqueous_thermodynamics.index):
        assert pubchem_db.search_CAS(CAS_from_any(formula)).CASs == CAS

    # Check the MWs match up
    for CAS, MW_specified in zip(CRC_aqueous_thermodynamics.index,
                                 CRC_aqueous_thermodynamics['MW']):
        c = pubchem_db.search_CAS(CAS)
        assert_close(c.MW, MW_specified, atol=0.05)

    # Checking names is an option too but of 173, only 162 are unique
    # and many of the others have names that seem ambiguous for ions which can
    # have more than one charge

    assert CRC_aqueous_thermodynamics.index.is_unique
    assert CRC_aqueous_thermodynamics.shape == (173, 7)

    Hf_tot = CRC_aqueous_thermodynamics['Hf(aq)'].abs().sum()
    assert_close(Hf_tot, 70592500.0)

    Gf_tot = CRC_aqueous_thermodynamics['Gf(aq)'].abs().sum()
    assert_close(Gf_tot, 80924000.0)

    S_tot = CRC_aqueous_thermodynamics['S(aq)'].abs().sum()
    assert_close(S_tot, 17389.9)

    Cp_tot = CRC_aqueous_thermodynamics['Cp(aq)'].abs().sum()
    assert_close(Cp_tot, 2111.5)
def test_db_vs_ChemSep():
    """The CAS numbers are checked, as are most of the chemical formulas. Some
    chemical structural formulas aren't supported by the current formula parser
    and are ignored; otherwise it is a very effective test.

    DO NOT TRY TO OPTimizE THis FUNCTION - IT HAS ALREADY BEEN TRIED AND
    FAILED AT. THE TIME IS ONLY TAKEN py the PARSE function.

    EVEN THAT HAS BEEN REDUCED By 80% by using cElementTree instead of
    ElementTree.
    """
    
    import xml.etree.cElementTree as ET
    folder = os.path.join(os.path.dirname(__file__), 'Data')

    tree = ET.parse(os.path.join(folder, 'chemsep1.xml'))
    root = tree.getroot()

    data = {}
    for child in root:
        CAS, name, smiles, formula = None, None, None, None
        for i in child:
            tag = i.tag
            if CAS is None and tag == 'CAS':
                CAS = i.attrib['value']
            elif name is None and tag == 'CompoundID':
                name = i.attrib['value']
            elif smiles is None and tag == 'Smiles':
                smiles = i.attrib['value']
            elif formula is None and tag == 'StructureFormula':
                formula = i.attrib['value']
        
#        CAS = [i.attrib['value'] if  ][0]
#        name = [i.attrib['value'] for i in child if i.tag ][0]
#        smiles = [i.attrib['value'] for i in child if i.tag == ]
#        formula = [i.attrib['value'] for i in child if i.tag == 'StructureFormula'][0]
        
        try:
            if '-' in formula:
                formula = None
            else:
                formula = serialize_formula(formula)
        except:
            pass
        if smiles:
            smiles = smiles[0]
        else:
            smiles = None
        
        data[CAS] = {'name': name, 'smiles': smiles, 'formula': formula}        
    
    for CAS, d in data.items():
        hit = pubchem_db.search_CAS(CAS)
        assert hit.CASs == CAS

    for CAS, d in data.items():
        assert CAS_from_any(CAS) == CAS

    for CAS, d in data.items():
        f = d['formula']
        if f is None or f == '1,4-COOH(C6H4)COOH' or d['name'] == 'Air':
            continue
        assert pubchem_db.search_CAS(CAS).formula == f
示例#5
0
def test_dissociation_reactions():

    # Check there's only one dissociation reaction for each product
    assert len(df['Electrolyte Formula']) == len(
        set(df['Electrolyte Formula'].values.tolist()))

    # Check the chemicals match up with the database
    for name, CAS, formula in zip(df['Electrolyte name'],
                                  df['Electrolyte CAS'],
                                  df['Electrolyte Formula']):
        assert CAS_from_any(CAS) == CAS
        assert pubchem_db.search_CAS(CAS).formula == serialize_formula(formula)

    # Check the anions match up with the database
    for formula, CAS, charge in zip(df['Anion formula'], df['Anion CAS'],
                                    df['Anion charge']):
        assert CAS_from_any(CAS) == CAS
        assert CAS_from_any(formula) == CAS
        hit = pubchem_db.search_CAS(CAS)
        assert hit.charge == charge
        assert hit.formula == serialize_formula(formula)

    # Check the cations match up with the database
    for formula, CAS, charge in zip(df['Cation formula'], df['Cation CAS'],
                                    df['Cation charge']):
        assert CAS_from_any(CAS) == CAS
        assert CAS_from_any(formula) == CAS
        hit = pubchem_db.search_CAS(CAS)
        assert hit.charge == charge
        assert hit.formula == serialize_formula(formula)

    # Check the charges and counts of ions sums to zero
    for an_charge, an_count, cat_charge, cat_count in zip(
            df['Anion charge'].tolist(), df['Anion count'].tolist(),
            df['Cation charge'].tolist(), df['Cation count'].tolist()):
        # for index, row in df.iterrows():
        #     an_charge = row['Anion charge']
        #     an_count = row['Anion count']
        #     cat_charge = row['Cation charge']
        #     cat_count = row['Cation count']
        err = an_charge * an_count + cat_charge * cat_count
        assert err == 0

    # Check the reactant counts and product counts sum to be equal and conserve
    # moles
    #for index, row in df.iterrows():
    for elec, cat, cat_count, an, an_count in zip(
            df['Electrolyte Formula'].tolist(), df['Cation formula'].tolist(),
            df['Cation count'].tolist(), df['Anion formula'].tolist(),
            df['Anion count'].tolist()):
        elec = nested_formula_parser(elec)
        #elec = nested_formula_parser(row['Electrolyte Formula'])
        cat = nested_formula_parser(cat)
        #cat = nested_formula_parser(row['Cation formula'])
        #cat_count = row['Cation count']
        an = nested_formula_parser(an)
        #an = nested_formula_parser(row['Anion formula'])
        #an_count = row['Anion count']
        product_counter = Counter()
        for _ in range(cat_count):
            product_counter.update(cat)
        for _ in range(an_count):
            product_counter.update(an)
        assert dict(product_counter.items()) == elec