def test_ligand_set(): ligand_set = ligands.LigandSet("mcl1_sample") lig_list = ligand_set.get_list() for key in lig_list: assert key in ligand_set.keys() assert isinstance(ligand_set.get_ligand(key), ligands.Ligand) with pytest.raises(ValueError, match="Ligand xxx is not part of set."): ligand_set.get_ligand("xxx") df = ligand_set.get_dataframe() for i, row in df.iterrows(): test_data = row.loc[ligand_set[row.loc["name"][0]]._data.index] pd.testing.assert_series_equal( ligand_set[row.loc["name"][0]]._data, test_data, check_names=False ) df = ligand_set.get_dataframe(columns=["name", "smiles"]) for i, row in df.iterrows(): assert row["name"][0] == ligand_set[row.loc["name"][0]].get_name() assert row["smiles"][0] == ligand_set[row.loc["name"][0]]._data["smiles"][0] molecules = ligand_set.get_molecules() for name, lig in ligand_set.items(): assert Molecule.are_isomorphic(lig.get_molecule(), molecules[name]) # ToDo: proper test for get_html() ligand_set.get_html() ligand_set.get_html(columns=["name", "smiles"])
def test_ligand_class(): for target in targets.target_dict.keys(): ligand_set = ligands.LigandSet(target) for name, lig in ligand_set.items(): assert lig.get_name() == name df = lig.get_dataframe() assert df["name"][0] == name df = lig.get_dataframe(columns=["name"]) assert df["name"][0] == name # ToDo: make proper tests (?) lig.find_links() lig.get_image() lig.get_html() lig.get_html(columns=["name", "smiles"])
def test_target_class(): target = targets.target_dict["mcl1_sample"] tgt = targets.Target(target["name"]) assert tgt.get_name() == target["name"] assert tgt.ligand_data == None assert tgt.html_data == None assert tgt._ligands == None assert tgt._edges == None ligand_set = ligands.LigandSet("mcl1_sample") assert tgt.get_ligand_set().keys() == ligand_set.keys() assert tgt._ligands != None tgt.add_ligand_data() assert type(tgt.ligand_data) == type(pd.Series(dtype=object)) pd.testing.assert_series_equal(tgt.ligand_data, tgt.get_ligand_data()) tgt.ligand_data = None ligand_data = tgt.get_ligand_data() assert type(tgt.ligand_data) == type(pd.Series(dtype=object)) assert ligand_data["numLigands"] == 15 # cannot compare ROMol column (SVG image), that's why we only compare these columns columns = ["name", "smiles", "docked", "measurement", "DerivedMeasurement"] df1 = tgt.get_ligand_set_dataframe(columns=columns) df2 = ligand_set.get_dataframe(columns=columns) pd.testing.assert_frame_equal(df1, df2) assert tgt.get_ligand_set_html() == ligand_set.get_html() edge_set = edges.EdgeSet("mcl1_sample") columns = [ 0, 1, "Smiles1", "Smiles2", "exp. DeltaG [kcal/mol]", "exp. Error [kcal/mol]", ] pd.testing.assert_frame_equal( tgt.get_edge_set().get_dataframe(columns=columns), edge_set.get_dataframe(columns=columns), ) assert tgt.get_edge_set().get_dict() == edge_set.get_dict() assert tgt.get_edge_set_html() == edge_set.get_html() # TODO: this actually does not test anything, only checks if it works tgt.find_links() tgt.get_html_data() tgt.get_graph()
def test_derive_observables(): for target in targets.target_dict.keys(): ligand_set = ligands.LigandSet(target) for name, lig in ligand_set.items(): for i, t in enumerate(["dg", "ki", "ic50", "pic50"]): lig.derive_observables( derived_type=t, destination=f"DerivedMeasurement{i}" ) for original_type in lig._observables: if ("measurement", original_type) in list(lig._data.index): assert lig._data[ (f"DerivedMeasurement{i}", t) ] == utils.convert_value( lig._data[("measurement", original_type)], original_type=original_type, final_type=t, ) # Test expected exception when trying to convert to unknown observable with pytest.raises( NotImplementedError, match=f"Conversion to observable xxx not possible. " f"Observable must be any of: dg, ki, ic50 or pic50.", ): lig.derive_observables( derived_type="xxx", destination=f"DerivedMeasurement" ) # Test expected exception when trying to convert from unknown observable for original_type in lig._observables: if ("measurement", original_type) in list(lig._data.index): lig._data.rename({original_type: "xxx"}, inplace=True, level=1) with pytest.raises( ValueError, match=f"No known measured observable found. " f"Measured observable should be any of: dg, ki, ic50 or pic50.", ): lig.derive_observables( derived_type="pic50", destination=f"DerivedMeasurement{i}" )
def test_edge(): eps = 0.0001 test_dict = ["xxx", "yyy"] edg = edges.Edge(test_dict) assert edg.get_name() == f"edge_xxx_yyy" assert edg.get_dict() == {f"edge_xxx_yyy": ["lig_xxx", "lig_yyy"]} ligand_set = ligands.LigandSet("mcl1_sample") test_dict = ["30", "27"] edg = edges.Edge(test_dict) assert edg.get_name() == f"edge_30_27" assert edg.get_dict() == {f"edge_30_27": ["lig_30", "lig_27"]} pd.testing.assert_series_equal(edg.get_dataframe(), pd.Series({0: "30", 1: "27"})) edg.add_ligand_data(ligand_set) with pytest.raises(AssertionError): pd.testing.assert_series_equal( edg.get_dataframe(), pd.Series({0: "30", 1: "27"}) ) df = edg.get_dataframe( columns=[ 0, 1, "Smiles1", "Smiles2", "exp. DeltaG [kcal/mol]", "exp. Error [kcal/mol]", ] ) ddg = ( ligand_set[f"lig_{edg._data[1]}"]._data[("DerivedMeasurement", "dg")] - ligand_set[f"lig_{edg._data[0]}"]._data[("DerivedMeasurement", "dg")] ) e_ddg = np.sqrt( ligand_set[f"lig_{edg._data[1]}"]._data[("DerivedMeasurement", "e_dg")] ** 2 + ligand_set[f"lig_{edg._data[0]}"]._data[("DerivedMeasurement", "e_dg")] ** 2 ) assert pytest.approx(df["exp. DeltaG [kcal/mol]"].magnitude, eps) == ddg.magnitude assert pytest.approx(df["exp. Error [kcal/mol]"].magnitude, 0.5) == e_ddg.magnitude
# "lig_68": -7.69, } eps = 0.01 for key, item in jacs_data.items(): assert ( pytest.approx(item, eps) == df[df.name == key][("DerivedMeasurement", "dg")] .values[0] .to(utils.unit_registry("kcal / mole")) .magnitude ) test_set = [] ligand_set = ligands.LigandSet("mcl1_sample") for name, lig in ligand_set.items(): test_set.append(("mcl1_sample", name, lig)) @pytest.mark.parametrize("target, ligand_name, lig", test_set) def test_ligand_data(target, ligand_name, lig): m1 = Chem.MolFromSmiles(lig._data["smiles"][0]) m1 = Chem.AddHs(m1) m2 = Chem.SDMolSupplier( os.path.join( targets.data_path, targets.get_target_dir(target), "02_ligands", ligand_name, "crd",
def test_edge_set(): eps = 0.01 lig_set = ligands.LigandSet("mcl1_sample") edg_set = edges.EdgeSet("mcl1_sample") for k, edg in edg_set.items(): assert f"lig_{edg._data[0]}" in lig_set.keys() assert f"lig_{edg._data[1]}" in lig_set.keys() ddg = ( lig_set[f"lig_{edg._data[1]}"]._data[("DerivedMeasurement", "dg")] - lig_set[f"lig_{edg._data[0]}"]._data[("DerivedMeasurement", "dg")] ) assert ( pytest.approx(edg._data["exp. DeltaG [kcal/mol]"].magnitude, eps) == ddg.magnitude ) e_ddg = np.sqrt( lig_set[f"lig_{edg._data[1]}"]._data[("DerivedMeasurement", "e_dg")] ** 2 + lig_set[f"lig_{edg._data[0]}"]._data[("DerivedMeasurement", "e_dg")] ** 2 ) assert ( pytest.approx(edg._data["exp. Error [kcal/mol]"].magnitude, 0.5) == e_ddg.magnitude ) df = edg_set.get_dataframe() for k, edg in df.iterrows(): assert f"lig_{edg[0]}" in lig_set.keys() assert f"lig_{edg[1]}" in lig_set.keys() ddg = ( lig_set[f"lig_{edg[1]}"]._data[("DerivedMeasurement", "dg")] - lig_set[f"lig_{edg[0]}"]._data[("DerivedMeasurement", "dg")] ) assert ( pytest.approx(edg["exp. DeltaG [kcal/mol]"].magnitude, eps) == ddg.magnitude ) e_ddg = np.sqrt( lig_set[f"lig_{edg[1]}"]._data[("DerivedMeasurement", "e_dg")] ** 2 + lig_set[f"lig_{edg[0]}"]._data[("DerivedMeasurement", "e_dg")] ** 2 ) assert ( pytest.approx(edg["exp. Error [kcal/mol]"].magnitude, 0.5) == e_ddg.magnitude ) df2 = edg_set.get_dataframe( columns=[ 0, 1, "Smiles1", "Smiles2", "exp. DeltaG [kcal/mol]", "exp. Error [kcal/mol]", ] ) for k, edg in df2.iterrows(): assert f"lig_{edg[0]}" in lig_set.keys() assert f"lig_{edg[1]}" in lig_set.keys() ddg = ( lig_set[f"lig_{edg[1]}"]._data[("DerivedMeasurement", "dg")] - lig_set[f"lig_{edg[0]}"]._data[("DerivedMeasurement", "dg")] ) assert ( pytest.approx(edg["exp. DeltaG [kcal/mol]"].magnitude, eps) == ddg.magnitude ) e_ddg = np.sqrt( lig_set[f"lig_{edg[1]}"]._data[("DerivedMeasurement", "e_dg")] ** 2 + lig_set[f"lig_{edg[0]}"]._data[("DerivedMeasurement", "e_dg")] ** 2 ) assert ( pytest.approx(edg["exp. Error [kcal/mol]"].magnitude, 0.5) == e_ddg.magnitude ) html = edg_set.get_html() html = edg_set.get_html( columns=[ 0, 1, "Smiles1", "Smiles2", "exp. DeltaG [kcal/mol]", "exp. Error [kcal/mol]", ] ) d = edg_set.get_dict() for edg, ligs in d.items(): assert ( edg == f'edge_{ligs[0].replace("lig_", "")}_{ligs[1].replace("lig_", "")}' ) assert ligs[0] in lig_set.keys() assert ligs[1] in lig_set.keys()