Пример #1
0
def test_organic_user_db():
    db = ChemicalMetadataDB(elements=False,
                            main_db=os.path.join(
                                folder,
                                'chemical identifiers example user db.tsv'),
                            user_dbs=[])
    for CAS, d in db.CAS_index.items():
        assert CAS_from_any(d.CASs) == d.CASs
    # Check something was loaded
    assert len(db.CAS_index) > 100

    # Check smiles are unique / can lookup by smiles
    for smi, d in db.smiles_index.items():
        if not smi:
            continue
        assert CAS_from_any('smiles=' + smi) == d.CASs

    # Check formula is formatted right
    assert all([
        i.formula == serialize_formula(i.formula)
        for i in db.CAS_index.values()
    ])

    # Check CAS validity
    assert all([checkCAS(i.CASs) for i in db.CAS_index.values()])

    # MW checker
    for i in db.CAS_index.values():
        formula = serialize_formula(i.formula)
        atoms = nested_formula_parser(formula, check=False)
        mw_calc = molecular_weight(atoms)
        assert_allclose(mw_calc, i.MW, atol=0.05)

    for CAS, d in db.CAS_index.items():
        assert CAS_from_any('InChI=1S/' + d.InChI) == int2CAS(CAS)

    for CAS, d in db.CAS_index.items():
        assert CAS_from_any('InChIKey=' + d.InChI_key) == int2CAS(CAS)

    # Test the pubchem ids which aren't -1
    for CAS, d in db.CAS_index.items():
        if d.pubchemid != -1:
            assert CAS_from_any('PubChem=' + str(d.pubchemid)) == int2CAS(CAS)

    CAS_lenth = len(db.CAS_index)
    assert CAS_lenth == len(db.smiles_index)
    assert CAS_lenth == len(db.InChI_index)
    assert CAS_lenth == len(db.InChI_key_index)
Пример #2
0
def test_TSCA_data():
    tots_calc = [TSCA_data[i].sum() for i in ['UV', 'E', 'F', 'N', 'P', 'S', 'R', 'T', 'XU', 'SP', 'TP', 'Y1', 'Y2']]
    tots = [16829, 271, 3, 713, 8371, 1173, 13, 151, 19035, 74, 50, 352, 9]
    assert tots_calc == tots

    assert TSCA_data.index.is_unique
    assert TSCA_data.shape == (67635, 13)

    assert all([checkCAS(int2CAS(i)) for i in TSCA_data.index])
Пример #3
0
def test_TSCA_data():
    tots_calc = [TSCA_data[i].sum() for i in ['UV', 'E', 'F', 'N', 'P', 'S', 'R', 'T', 'XU', 'SP', 'TP', 'Y1', 'Y2']]
    tots = [16829, 271, 3, 713, 8371, 1173, 13, 151, 19035, 74, 50, 352, 9]
    assert tots_calc == tots

    assert TSCA_data.index.is_unique
    assert TSCA_data.shape == (67635, 13)

    assert all([checkCAS(int2CAS(i)) for i in TSCA_data.index])
Пример #4
0
def test_organic_user_db():
    db = ChemicalMetadataDB(elements=False,
                            main_db=os.path.join(folder, 'chemical identifiers example user db.tsv'),
                            user_dbs=[])
    for CAS, d in  db.CAS_index.items():
        assert CAS_from_any(d.CASs) == d.CASs
    # Check something was loaded
    assert len(db.CAS_index) > 100

    # Check smiles are unique / can lookup by smiles
    for smi, d in db.smiles_index.items():
        if not smi:
            continue
        assert CAS_from_any('smiles=' + smi) == d.CASs

    # Check formula is formatted right
    assert all([i.formula == serialize_formula(i.formula) for i in db.CAS_index.values()])

    # Check CAS validity
    assert all([checkCAS(i.CASs) for i in db.CAS_index.values()])

    # MW checker
    for i in db.CAS_index.values():
        formula = serialize_formula(i.formula)
        atoms = nested_formula_parser(formula, check=False)
        mw_calc = molecular_weight(atoms)
        assert_allclose(mw_calc, i.MW, atol=0.05)


    for CAS, d in db.CAS_index.items():
        assert CAS_from_any('InChI=1S/' + d.InChI) == int2CAS(CAS)
        
    for CAS, d in db.CAS_index.items():
        assert CAS_from_any('InChIKey=' + d.InChI_key) == int2CAS(CAS)

    # Test the pubchem ids which aren't -1
    for CAS, d in db.CAS_index.items():
        if d.pubchemid != -1:
            assert CAS_from_any('PubChem=' + str(d.pubchemid)) == int2CAS(CAS)

    CAS_lenth = len(db.CAS_index)
    assert CAS_lenth == len(db.smiles_index)
    assert CAS_lenth == len(db.InChI_index)
    assert CAS_lenth == len(db.InChI_key_index)
Пример #5
0
def test_HPV_data():
    assert HPV_data.index.is_unique
    assert HPV_data.shape == (5067, 0)
    assert sum(list(HPV_data.index)) == 176952023632

    assert all([checkCAS(int2CAS(i)) for i in HPV_data.index])
Пример #6
0
def test_NLP_data():
    assert NLP_data.index.is_unique
    assert NLP_data.shape == (698, 0)
    assert sum(list(NLP_data.index)) == 83268755392

    assert all([checkCAS(int2CAS(i)) for i in NLP_data.index])
Пример #7
0
def test_SPIN_data():
    assert SPIN_data.index.is_unique
    assert SPIN_data.shape == (26023, 0)
    assert sum(list(SPIN_data.index)) == 1666688770043

    assert all([checkCAS(int2CAS(i)) for i in SPIN_data.index])
Пример #8
0
def test_EINECS_data():
    assert EINECS_data.index.is_unique
    assert EINECS_data.shape == (100203, 0)
    assert sum(list(EINECS_data.index))  == 4497611272838

    assert all([checkCAS(int2CAS(i)) for i in EINECS_data.index])
Пример #9
0
def test_DSL_data():
    assert DSL_data.index.is_unique
    assert DSL_data['Registry'].sum() == 48363
    assert DSL_data.shape == (73036, 1)

    assert all([checkCAS(int2CAS(i)) for i in DSL_data.index])
Пример #10
0
 def CASs(self):
     return int2CAS(self.CAS)
Пример #11
0
 def CASs(self):
     return int2CAS(self.CAS)
Пример #12
0
def test_HPV_data():
    assert HPV_data.index.is_unique
    assert HPV_data.shape == (5067, 0)
    assert sum(list(HPV_data.index)) == 176952023632

    assert all([checkCAS(int2CAS(i)) for i in HPV_data.index])
Пример #13
0
def test_NLP_data():
    assert NLP_data.index.is_unique
    assert NLP_data.shape == (698, 0)
    assert sum(list(NLP_data.index)) == 83268755392

    assert all([checkCAS(int2CAS(i)) for i in NLP_data.index])
Пример #14
0
def test_SPIN_data():
    assert SPIN_data.index.is_unique
    assert SPIN_data.shape == (26023, 0)
    assert sum(list(SPIN_data.index)) == 1666688770043

    assert all([checkCAS(int2CAS(i)) for i in SPIN_data.index])
Пример #15
0
def test_EINECS_data():
    assert EINECS_data.index.is_unique
    assert EINECS_data.shape == (100203, 0)
    assert sum(list(EINECS_data.index))  == 4497611272838

    assert all([checkCAS(int2CAS(i)) for i in EINECS_data.index])
Пример #16
0
def test_DSL_data():
    assert DSL_data.index.is_unique
    assert DSL_data['Registry'].sum() == 48363
    assert DSL_data.shape == (73036, 1)

    assert all([checkCAS(int2CAS(i)) for i in DSL_data.index])