def substructure(mol, query, largest_only=True, ignore_hydrogen=True): """ if mol is a substructure of the query, return True Args: mol: Compound query: Compound largest_only: compare only largest graph molecule """ def subset_filter(cnt1, cnt2): diff = cnt2 diff.subtract(cnt1) if any(v < 0 for v in diff.values()): return True if not (len(mol) and len(query)): return False # two blank molecules are not isomorphic m = molutil.clone(mol) q = molutil.clone(query) if largest_only: m = molutil.largest_graph(m) q = molutil.largest_graph(q) if ignore_hydrogen: m = molutil.make_Hs_implicit(m) q = molutil.make_Hs_implicit(q) if filter_(m, q, f=subset_filter): gm = GraphMatcher(q.graph, m.graph, node_match=atom_match) return gm.subgraph_is_isomorphic() return False
def equal(mol, query, largest_only=True, ignore_hydrogen=True): """ if mol is exactly same structure as the query, return True Args: mol: Compound query: Compound """ m = molutil.clone(mol) q = molutil.clone(query) if largest_only: m = molutil.largest_graph(m) q = molutil.largest_graph(q) if ignore_hydrogen: m = molutil.make_Hs_implicit(m) q = molutil.make_Hs_implicit(q) if molutil.mw(m) == molutil.mw(q): gm = GraphMatcher(q.graph, m.graph, node_match=atom_match) return gm.is_isomorphic() return False
def ready_to_draw(mol): """Shortcut function to prepare molecule to draw. Overwrite this function for customized appearance. It is recommended to clone the molecule before draw because all the methods above are destructive. """ copied = molutil.clone(mol) # display_terminal_carbon(mol) equalize_terminal_double_bond(copied) # spine_to_terminal_wedge(copied) scale_and_center(copied) format_ring_double_bond(copied) return copied
def comparison_array(molecule, diameter=10, size=30, ignore_hydrogen=True): """ Generate comparison array Comparison array consists of node pairs in the graph and a collater. 42 bit collater 6 bit of distance (0-31) 18 bit of bond attribute x2 9 bit of atom attribute x 2 7 bit of atom number (0-127) 2 bit of atom pi(0-3) [Sheridan, R.P. and Miller, M.D., J. Chem. Inf. Comput. Sci. 38 (1998) 915] Using possible_path_length instead of shortest_path_length and find intersection of the distance set each other, it is possible to detect roundabout matching path. Therefore, exact graph isomorphism can be determined. However, set intersection is too costful to do in spite of a trivial improvement. Args: mol: Compound cutoff: more distant connection than the value is no longer used for comparison matrix graph due to performance reason. Returns: arr(list): comparison array. list of tuple (node1, node2, collater) max_mcs(int): maximum size of possible mcs int_to_node(dict): int -> node index pair reconverter dict Throws: ValueError: if len(mol) < 3 """ molecule.require("Valence") mol = molutil.clone(molecule) if ignore_hydrogen: mol = molutil.make_Hs_implicit(mol) # Ignore salt, water remover.remove_salt(mol) remover.remove_water(mol) # multivalent coordinated metals notably affect the performance remover.remove_coordinated_metal(mol) g = nx.line_graph(mol.graph) node_to_int = {} for i, e in enumerate(g.nodes()): node_to_int[e] = i a1 = mol.atom(e[0]) a2 = mol.atom(e[1]) a1t = a1.number << 2 | a1.pi a2t = a2.number << 2 | a2.pi pair = sorted((a1t, a2t)) g.node[e]["type"] = pair[0] << 9 | pair[1] # convert node index pair to integer expression g = nx.relabel_nodes(g, node_to_int) # interger -> index pair reconverter int_to_node = {v: k for k, v in node_to_int.items()} arr = [] matrix = nx.Graph() for ui, ua in g.nodes(data=True): r = _reachables(g, ui, diameter, size) for vi, d in r.items(): if not d: continue matrix.add_edge(ui, vi) code = (d << 18 | ua["type"]) << 18 | g.node[vi]["type"] arr.append((ui, vi, code)) max_size = len(max(find_cliques(matrix), key=len, default=[])) return arr, max_size, int_to_node
def test_clone(self): # this takes 0.003 ms m = reader.mol_from_text(MOL["Indinavir"]) cp = molutil.clone(m) self.assertEqual(len(m), len(cp))