def test_mol_formula(storage_socket): """ Test Molecule pagination """ assert len(storage_socket.get_molecules()["data"]) == 0 mol_names = [ "water_dimer_minima.psimol", ] total = len(mol_names) molecules = [] for mol_name in mol_names: mol = ptl.data.get_molecule(mol_name) molecules.append(mol) inserted = storage_socket.add_molecules(molecules) try: assert inserted["meta"]["n_inserted"] == total ret = storage_socket.get_molecules(molecular_formula="H4O2") assert len(ret["data"]) == 1 assert ret["meta"]["n_found"] == 1 ret = storage_socket.get_molecules(molecular_formula="O2H4") assert len(ret["data"]) == 1 assert ret["meta"]["n_found"] == 1 ret = storage_socket.get_molecules(molecular_formula="H4o2") assert len(ret["data"]) == 0 assert ret["meta"]["n_found"] == 0 finally: # cleanup storage_socket.del_molecules(inserted["data"])
def test_mol_pagination(storage_socket): """ Test Molecule pagination """ assert len(storage_socket.get_molecules()['data']) == 0 mol_names = [ 'water_dimer_minima.psimol', 'water_dimer_stretch.psimol', 'water_dimer_stretch2.psimol', 'neon_tetramer.psimol' ] total = len(mol_names) molecules = [] for mol_name in mol_names: mol = ptl.data.get_molecule(mol_name) molecules.append(mol) inserted = storage_socket.add_molecules(molecules) assert inserted['meta']['n_inserted'] == total ret = storage_socket.get_molecules(skip=1) assert len(ret['data']) == total - 1 assert ret['meta']['n_found'] == total ret = storage_socket.get_molecules(skip=total + 1) assert len(ret['data']) == 0 assert ret['meta']['n_found'] == total # cleanup storage_socket.del_molecules(inserted['data'])
def test_mol_pagination(storage_socket): """ Test Molecule pagination """ assert len(storage_socket.get_molecules()["data"]) == 0 mol_names = [ "water_dimer_minima.psimol", "water_dimer_stretch.psimol", "water_dimer_stretch2.psimol", "neon_tetramer.psimol", ] total = len(mol_names) molecules = [] for mol_name in mol_names: mol = ptl.data.get_molecule(mol_name) molecules.append(mol) inserted = storage_socket.add_molecules(molecules) try: assert inserted["meta"]["n_inserted"] == total ret = storage_socket.get_molecules(skip=1) assert len(ret["data"]) == total - 1 assert ret["meta"]["n_found"] == total ret = storage_socket.get_molecules(skip=total + 1) assert len(ret["data"]) == 0 assert ret["meta"]["n_found"] == total finally: # cleanup storage_socket.del_molecules(inserted["data"])
def test_molecule_sql(storage_socket, session): """ Test the use of the ME class MoleculeORM Note: creation of a MoleculeORM using ME is not implemented yet Should create a MoleculeORM using: mongoengine_socket.add_molecules """ num_mol_in_db = session.query(MoleculeORM).count() # MoleculeORM.objects().delete() assert num_mol_in_db == 0 water = ptl.data.get_molecule("water_dimer_minima.psimol") water2 = ptl.data.get_molecule("water_dimer_stretch.psimol") # Add MoleculeORM ret = storage_socket.add_molecules([water, water2]) assert ret["meta"]["success"] is True assert ret["meta"]["n_inserted"] == 2 ret = storage_socket.get_molecules() assert ret['meta']['n_found'] == 2 # Use the ORM class water_mol = session.query(MoleculeORM).first() assert water_mol.molecular_formula == "H4O2" assert water_mol.molecular_charge == 0 # print(water_mol.dict()) # # Query with fields in the model result_list = session.query(MoleculeORM).filter_by( molecular_formula="H4O2").all() assert len(result_list) == 2 assert result_list[0].molecular_multiplicity == 1 # Query with fields NOT in the model. works too! result_list = session.query(MoleculeORM).filter_by( molecular_charge=0).all() assert len(result_list) == 2 # get unique by hash and formula one_mol = session.query(MoleculeORM).filter_by( molecule_hash=water_mol.molecule_hash, molecular_formula=water_mol.molecular_formula) assert len(one_mol.all()) == 1 # Clean up storage_socket.del_molecules( molecule_hash=[water.get_hash(), water2.get_hash()])
def test_molecules_add(storage_socket): water = ptl.data.get_molecule("water_dimer_minima.psimol") # Add once ret1 = storage_socket.add_molecules([water]) assert ret1["meta"]["success"] is True assert ret1["meta"]["n_inserted"] == 1 # Try duplicate adds ret2 = storage_socket.add_molecules([water]) assert ret2["meta"]["success"] is True assert ret2["meta"]["n_inserted"] == 0 assert ret2["meta"]["duplicates"][0] == ret1["data"][0] # Assert the ids match assert ret1["data"][0] == ret2["data"][0] # Pull molecule from the DB for tests db_json = storage_socket.get_molecules( molecule_hash=water.get_hash())["data"][0] water.compare(db_json) # Cleanup adds ret = storage_socket.del_molecules(molecule_hash=water.get_hash()) assert ret == 1
def test_procedure_pagination(storage_socket): """ Test procedure pagination """ water = ptl.data.get_molecule("water_dimer_minima.psimol") mol = storage_socket.add_molecules([water])['data'][0] assert len( storage_socket.get_procedures(procedure='optimization')['data']) == 0 proc_template = { "initial_molecule": mol, "program": "something", "qc_spec": { "driver": "gradient", "method": "HF", "basis": "sto-3g", "keywords": None, "program": "psi4" }, } total = 10 limit = 5 skip = 4 procedures = [] for i in range(total): tmp = proc_template.copy() tmp['hash_index'] = str(i) procedures.append(ptl.models.OptimizationRecord(**tmp)) inserted = storage_socket.add_procedures(procedures) assert inserted['meta']['n_inserted'] == total ret = storage_socket.get_procedures(procedure='optimization', status=None, limit=limit, skip=skip) # count is total, but actual data size is the limit assert ret['meta']['n_found'] == total assert len(ret['data']) == limit storage_socket.del_procedures(inserted['data']) storage_socket.del_molecules(mol)
def test_molecules_add_many(storage_socket): water = ptl.data.get_molecule("water_dimer_minima.psimol") water2 = ptl.data.get_molecule("water_dimer_stretch.psimol") ret = storage_socket.add_molecules([water, water2]) assert ret["meta"]["n_inserted"] == 2 # Cleanup adds ret = storage_socket.del_molecules(molecule_hash=[water.get_hash(), water2.get_hash()]) assert ret == 2 ret = storage_socket.add_molecules([water, water2]) assert ret["meta"]["n_inserted"] == 2 # Cleanup adds ret = storage_socket.del_molecules(id=ret["data"]) assert ret == 2
def molecules_H4O2(storage_socket): water = ptl.data.get_molecule("water_dimer_minima.psimol") water2 = ptl.data.get_molecule("water_dimer_stretch.psimol") ret = storage_socket.add_molecules([water, water2]) yield list(ret["data"]) r = storage_socket.del_molecules(molecule_hash=[water.get_hash(), water2.get_hash()]) assert r == 2
def test_molecules_mixed_add_get(storage_socket): water = ptl.data.get_molecule("water_dimer_minima.psimol") ret = storage_socket.get_add_molecules_mixed([bad_id1, water, bad_id2, "bad_id"]) assert ret["data"][0] is None assert ret["data"][1].identifiers.molecule_hash == water.get_hash() assert ret["data"][2] is None assert set(ret["meta"]["missing"]) == {0, 2, 3} # Cleanup adds ret = storage_socket.del_molecules(id=ret["data"][1].id) assert ret == 1
def test_molecules_get(storage_socket): water = ptl.data.get_molecule("water_dimer_minima.psimol") # Add once ret = storage_socket.add_molecules([water]) assert ret["meta"]["n_inserted"] == 1 water_id = ret["data"][0] # Pull molecule from the DB for tests water2 = storage_socket.get_molecules(id=water_id)["data"][0] water2.compare(water) # Cleanup adds ret = storage_socket.del_molecules(id=water_id) assert ret == 1
def test_molecules_bad_get(storage_socket): water = ptl.data.get_molecule("water_dimer_minima.psimol") # Add once ret = storage_socket.add_molecules([water]) water_id = ret["data"][0] # Pull molecule from the DB for tests ret = storage_socket.get_molecules(id=[water_id, bad_id1, bad_id2]) assert ret["data"][0].id == water_id assert ret["meta"]["n_found"] == 1 # Cleanup adds ret = storage_socket.del_molecules(id=water_id) assert ret == 1
def test_molecules_bad_get(storage_socket): water = ptl.data.get_molecule("water_dimer_minima.psimol") # Add once ret = storage_socket.add_molecules([water]) water_id = ret["data"][0] # Pull molecule from the DB for tests ret = storage_socket.get_molecules(id=[water_id, "something", 5, (3, 2)]) assert len(ret["meta"]["errors"]) == 1 assert ret["meta"]["errors"][0][0] == "id" assert len(ret["meta"]["errors"][0][1]) == 3 assert ret["meta"]["n_found"] == 1 # Cleanup adds ret = storage_socket.del_molecules(id=water_id) assert ret == 1
def test_identical_mol_insert(storage_socket): """ Tests as edge case where to identical molecules are added under different tags. """ water = ptl.data.get_molecule("water_dimer_minima.psimol") # Add two identical molecules ret1 = storage_socket.add_molecules([water, water]) assert ret1["meta"]["success"] is True assert ret1["meta"]["n_inserted"] == 1 assert ret1["data"][0] == ret1["data"][1] # Should only find one molecule ret2 = storage_socket.get_molecules(molecule_hash=[water.get_hash()]) assert ret2["meta"]["n_found"] == 1 ret = storage_socket.del_molecules(molecule_hash=water.get_hash()) assert ret == 1
def test_molecules_duplicate_insert(storage_socket): water = ptl.data.get_molecule("water_dimer_minima.psimol") water2 = ptl.data.get_molecule("water_dimer_stretch.psimol") ret = storage_socket.add_molecules([water, water2]) assert ret["meta"]["n_inserted"] == 2 ret2 = storage_socket.add_molecules([water, water2]) assert ret2["meta"]["n_inserted"] == 0 assert ret["data"][0] == ret2["data"][0] assert ret["data"][1] == ret2["data"][1] ret3 = storage_socket.add_molecules([water, water]) assert ret2["meta"]["n_inserted"] == 0 assert ret["data"][0] == ret3["data"][0] assert ret["data"][0] == ret3["data"][1] # Cleanup adds ret = storage_socket.del_molecules(id=ret["data"]) assert ret == 2
def test_molecules_mixed_add_get(storage_socket): water = ptl.data.get_molecule("water_dimer_minima.psimol") water2 = ptl.data.get_molecule("water_dimer_stretch.psimol") del_ids = [] water2_id = storage_socket.add_molecules([water2])["data"][0] del_ids.append(water2_id) ret = storage_socket.get_add_molecules_mixed( [bad_id1, water, bad_id2, water2_id]) assert ret["data"][0] is None assert ret["data"][1].identifiers.molecule_hash == water.get_hash() assert ret["data"][2] is None assert ret["data"][3].id == water2_id assert set(ret["meta"]["missing"]) == {0, 2} # Cleanup adds del_ids.append(ret["data"][1].id) ret = storage_socket.del_molecules(id=del_ids) assert ret == 2
def test_results_pagination(storage_socket): """ Test results pagination """ # results = storage_socket.get_results()['data'] # storage_socket.del_results([result['id'] for result in results]) assert len(storage_socket.get_results()['data']) == 0 water = ptl.data.get_molecule("water_dimer_minima.psimol") mol = storage_socket.add_molecules([water])['data'][0] result_template = { "molecule": mol, "method": "M1", "basis": "B1", "keywords": None, "program": "P1", "driver": "energy", } # Save (~ 1-7 msec/doc) t1 = time() total_results = 50 first_half = int(total_results / 2) limit = 10 skip = 5 results = [] for i in range(first_half): tmp = result_template.copy() tmp['basis'] = str(i) results.append(ptl.models.ResultRecord(**tmp)) result_template['method'] = 'M2' for i in range(first_half, total_results): tmp = result_template.copy() tmp['basis'] = str(i) results.append(ptl.models.ResultRecord(**tmp)) inserted = storage_socket.add_results(results) assert inserted['meta']['n_inserted'] == total_results # total_time = (time() - t1) * 1000 / total_results # print('Inserted {} results in {:.2f} msec / doc'.format(total_results, total_time)) # # query (~ 0.03 msec/doc) # t1 = time() ret = storage_socket.get_results(method='M2', status=None, limit=limit, skip=skip) # total_time = (time() - t1) * 1000 / first_half # print('Query {} results in {:.2f} msec /doc'.format(first_half, total_time)) # count is total, but actual data size is the limit assert ret['meta']['n_found'] == total_results - first_half assert len(ret['data']) == limit assert int(ret['data'][0]['basis']) == first_half + skip # get the last page when with fewer than limit are remaining ret = storage_socket.get_results(method='M1', skip=(int(first_half - limit / 2)), status=None) assert len(ret['data']) == limit / 2 # cleanup storage_socket.del_results(inserted['data']) storage_socket.del_molecules(mol)
def storage_results(storage_socket): # Add two waters assert len(storage_socket.get_molecules()['data']) == 0 mol_names = [ 'water_dimer_minima.psimol', 'water_dimer_stretch.psimol', 'water_dimer_stretch2.psimol', 'neon_tetramer.psimol' ] molecules = [] for mol_name in mol_names: mol = ptl.data.get_molecule(mol_name) molecules.append(mol) mol_insert = storage_socket.add_molecules(molecules) kw1 = ptl.models.KeywordSet(**{"values": {}}) kwid1 = storage_socket.add_keywords([kw1])["data"][0] page1 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][0], "method": "M1", "basis": "B1", "keywords": kwid1, "program": "P1", "driver": "energy", "return_result": 5, "hash_index": 0, "status": 'COMPLETE' }) page2 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][1], "method": "M1", "basis": "B1", "keywords": kwid1, "program": "P1", "driver": "energy", "return_result": 10, "hash_index": 1, "status": 'COMPLETE' }) page3 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][0], "method": "M1", "basis": "B1", "keywords": kwid1, "program": "P2", "driver": "gradient", "return_result": 15, "hash_index": 2, "status": 'COMPLETE' }) page4 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][0], "method": "M2", "basis": "B1", "keywords": kwid1, "program": "P2", "driver": "gradient", "return_result": 15, "hash_index": 3, "status": 'COMPLETE' }) page5 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][1], "method": "M2", "basis": "B1", "keywords": kwid1, "program": "P1", "driver": "gradient", "return_result": 20, "hash_index": 4, "status": 'COMPLETE' }) page6 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][1], "method": "M3", "basis": "B1", "keywords": None, "program": "P1", "driver": "gradient", "return_result": 20, "hash_index": 5, "status": 'COMPLETE' }) results_insert = storage_socket.add_results( [page1, page2, page3, page4, page5, page6]) assert results_insert["meta"]["n_inserted"] == 6 yield storage_socket # Cleanup all_tasks = storage_socket.get_queue()['data'] storage_socket.del_tasks(id=[task.id for task in all_tasks]) result_ids = [x for x in results_insert["data"]] ret = storage_socket.del_results(result_ids) assert ret == results_insert["meta"]["n_inserted"] ret = storage_socket.del_molecules(id=mol_insert["data"]) assert ret == mol_insert["meta"]["n_inserted"]
def test_results_add(storage_socket): # Add two waters water = ptl.data.get_molecule("water_dimer_minima.psimol") water2 = ptl.data.get_molecule("water_dimer_stretch.psimol") mol_insert = storage_socket.add_molecules([water, water2]) kw1 = ptl.models.KeywordSet(**{"comments": "a", "values": {}}) kwid1 = storage_socket.add_keywords([kw1])["data"][0] page1 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][0], "method": "M1", "basis": "B1", "keywords": kwid1, "program": "P1", "driver": "energy", # "extras": { # "other_data": 5 # }, "hash_index": 0, }) page2 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][1], "method": "M1", "basis": "B1", "keywords": kwid1, "program": "P1", "driver": "energy", # "extras": { # "other_data": 10 # }, "hash_index": 1, }) page3 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][1], "method": "M22", "basis": "B1", "keywords": None, "program": "P1", "driver": "energy", # "extras": { # "other_data": 10 # }, "hash_index": 2, }) ids = [] ret = storage_socket.add_results([page1, page2]) assert ret["meta"]["n_inserted"] == 2 ids.extend(ret['data']) # add with duplicates: ret = storage_socket.add_results([page1, page2, page3]) assert ret["meta"]["n_inserted"] == 1 assert len(ret['data']) == 3 # first 2 found are None assert len(ret["meta"]['duplicates']) == 2 for res_id in ret['data']: if res_id is not None: ids.append(res_id) ret = storage_socket.del_results(ids) assert ret == 3 ret = storage_socket.del_molecules(id=mol_insert["data"]) assert ret == 2
def test_collections_include_exclude(storage_socket): collection = "Dataset" name = "Dataset123" name2 = name + "_2" # Add two waters water = ptl.data.get_molecule("water_dimer_minima.psimol") water2 = ptl.data.get_molecule("water_dimer_stretch.psimol") mol_insert = storage_socket.add_molecules([water, water2]) db = { "collection": collection, "name": name, "visibility": True, "view_available": False, "group": "default", "records": [ { "name": "He1", "molecule_id": mol_insert["data"][0], "comment": None, "local_results": {} }, { "name": "He2", "molecule_id": mol_insert["data"][1], "comment": None, "local_results": {} }, ], } db2 = { "collection": collection, "name": name2, "visibility": True, "view_available": False, "records": [], "group": "default", } ret = storage_socket.add_collection(db) assert ret["meta"]["n_inserted"] == 1 ret = storage_socket.add_collection(db2) assert ret["meta"]["n_inserted"] == 1 ret = storage_socket.get_collections(collection=collection, name=name) assert ret["meta"]["success"] is True assert len(ret["data"]) == 1 # print('All: ', ret["data"]) include = {"records", "name"} ret = storage_socket.get_collections(collection=collection, name=name, include=include) assert ret["meta"]["success"] is True assert len(ret["data"]) == 1 assert set(ret["data"][0].keys()) == include assert len(ret["data"][0]["records"]) == 2 # print('With projection: ', ret["data"]) include = {"records", "name"} ret = storage_socket.get_collections(collection=collection, name="none_existing", include=include) assert ret["meta"]["success"] is True assert len(ret["data"]) == 0 # print('With projection: ', ret["data"]) include = {"records", "name", "id"} ret = storage_socket.get_collections(collection=collection, name=name2, include=include) assert ret["meta"]["success"] is True assert len(ret["data"]) == 1 assert set(ret["data"][0].keys()) == include assert len(ret["data"][0]["records"]) == 0 # print('With projection: ', ret["data"]) exclude = {"records", "name"} ret = storage_socket.get_collections(collection=collection, name=name, exclude=exclude) assert ret["meta"]["success"] is True assert len(ret["data"]) == 1 assert len(set(ret["data"][0].keys()) & exclude) == 0 # cleanup storage_socket.del_collection(collection=collection, name=name) storage_socket.del_collection(collection=collection, name=name2) storage_socket.del_molecules(mol_insert["data"])
def test_dataset_add_delete_cascade(storage_socket): collection = "dataset" collection2 = "reactiondataset" name = "Dataset123" name2 = name + "_2" # Add two waters water = ptl.data.get_molecule("water_dimer_minima.psimol") water2 = ptl.data.get_molecule("water_dimer_stretch.psimol") mol_insert = storage_socket.add_molecules([water, water2]) db = { "collection": collection, "name": name, "visibility": True, "view_available": False, "group": "default", "records": [ { "name": "He1", "molecule_id": mol_insert["data"][0], "comment": None, "local_results": {} }, { "name": "He2", "molecule_id": mol_insert["data"][1], "comment": None, "local_results": {} }, ], "contributed_values": { "contrib1": { "name": "contrib1", "theory_level": "PBE0", "units": "kcal/mol", "values": [5, 10], "index": ["He2", "He1"], "values_structure": {}, } }, } ret = storage_socket.add_collection(db.copy()) print(ret["meta"]["error_description"]) assert ret["meta"]["n_inserted"] == 1, ret["meta"]["error_description"] ret = storage_socket.get_collections(collection=collection, name=name) assert ret["meta"]["success"] is True assert len(ret["data"][0]["records"]) == 2 ret = storage_socket.get_collections(collection=collection, name=name, include=["records"]) assert ret["meta"]["success"] is True db["contributed_values"] = { "contrib1": { "name": "contrib1", "theory_level": "PBE0 FHI-AIMS", "units": "kcal/mol", "values": np.array([5, 10], dtype=np.int16), "index": ["He2", "He1"], "values_structure": {}, }, "contrib2": { "name": "contrib2", "theory_level": "PBE0 FHI-AIMS tight", "units": "kcal/mol", "values": [np.random.rand(2, 3), np.random.rand(2, 3)], "index": ["He2", "He1"], "values_structure": {}, }, } ret = storage_socket.add_collection(db.copy(), overwrite=True) assert ret["meta"]["n_inserted"] == 1 ret = storage_socket.get_collections(collection=collection, name=name) assert ret["meta"]["success"] is True assert len(ret["data"][0]["contributed_values"].keys()) == 2 # reactiondataset db["name"] = name2 db["collection"] = collection2 db.pop("records") ret = storage_socket.add_collection(db.copy()) assert ret["meta"]["n_inserted"] == 1 ret = storage_socket.get_collections(collection=collection2, name=name2) assert ret["meta"]["success"] is True assert len(ret["data"][0]["contributed_values"].keys()) == 2 assert len(ret["data"][0]["records"]) == 0 # cleanup # Can't delete molecule when datasets refernece it (no cascade) with pytest.raises(sqlalchemy.exc.IntegrityError): storage_socket.del_molecules(mol_insert["data"]) # should cascade delete entries and records when dataset is deleted assert storage_socket.del_collection(collection=collection, name=name) == 1 assert storage_socket.del_collection(collection=collection2, name=name2) == 1 # Now okay to delete molecules storage_socket.del_molecules(mol_insert["data"])