def remove_decorator(self, smirks): """ Chose an atom or bond in the input smirks pattern and then remove one decorator from it. """ env = CE(smirks) sub, dec_opts = self._get_item_and_remove_options(env) # note this should be impossible if sub is None or len(dec_opts) == 0: return smirks, False change = np.random.choice(dec_opts) if change == 'remove_ors': new_or_types, changed = self.remove_or(sub.getORtypes(), isinstance(sub, CE.Bond)) if not changed: return smirks, False sub.setORtypes(new_or_types) elif change == 'remove_ands': new_and_types, changed = self.remove_and(sub.getANDtypes()) if not changed: return smirks, False sub.setANDtypes(new_and_types) else: # change == 'remove_atom' remove = env.removeAtom(sub) if not remove: return smirks, False return env.asSMIRKS(), True
def get_smirks_matches(mol, smirks): """ Gets atom indices for a smirks string in a given molecule Parameters ---------- mol : a chemper Mol smirks : str SMIRKS pattern being matched to the molecule Returns -------- matches: list of tuples atom indices for labeled atom in the smirks """ from chemper.graphs.environment import ChemicalEnvironment env = ChemicalEnvironment(smirks) if env.getType().lower() == 'impropertorsion': matches = ImproperDict() else: matches = ValenceDict() for match in mol.smirks_search(smirks): smirks_indices = sorted(list(match.keys())) atom_indices = tuple([match[s].get_index() for s in smirks_indices]) matches[atom_indices] = '' return matches.keys()
def test_create_environments(smirks, frag_type): """ Test all types of ChemicalEnvironment objects with defined atoms and bonds Each will be tetrahedral carbons connected by ring single bonds """ env = ChemicalEnvironment(smirks) output_type = env.getType() assert output_type == frag_type
def test_get_component_list(comp, option, expected_len): """ Test getting full component list works correctly """ angle_smirks = "[#6X3;R1:1]=,:;@[#6X3;R1;a:2](-,:;@[#7])-;!@[#8X2H1;!R:3]" angle = ChemicalEnvironment(angle_smirks) components = angle.getComponentList(comp, option) assert len(components) == expected_len
def test_selection_by_descriptor(descriptor, is_none): """ test selection by description works """ angle_smirks = "[#6X3;R1:1]=,:;@[#6X3;R1;a:2](-,:;@[#7])-;!@[#8X2H1;!R:3]" angle = ChemicalEnvironment(angle_smirks) atom = angle.selectAtom(descriptor) bond = angle.selectBond(descriptor) assert (atom is None) == is_none assert (bond is None) == is_none
def test_ring_parsing(decorator): """ Check not's in parsing or or and decoration """ # make SMIRKS with decorator in both the OR and AND decorators temp_smirks = "[#1%s;a;%s:1]" % (decorator, decorator) env = ChemicalEnvironment(temp_smirks) atom = env.getIndexedAtoms()[0] # check decorator in OR type assert decorator in atom.getORtypes()[0][1] # check decorator in AND types assert decorator in atom.getANDtypes()
def test_wrong_smirks_error(): """ Check that unparseable SMIRKS raises errors """ smirks = "[*;m:1]" msg = "SMIRKS (%s) should not be parseable, but an environment was successfully created" from chemper.graphs.environment import SMIRKSParsingError with pytest.raises(SMIRKSParsingError): env = ChemicalEnvironment(smirks)
def remove_decorator(self, smirks): """ Chose an atom or bond in the input smirks pattern and then remove one decorator from it. Parameters ----------- smirks : str A SMIRKS string which should be reduced Returns -------- new_smirks : str A new SMIRKS pattern is_changed : bool True if some of the decorators were successfully removed """ env = CE(smirks) sub, dec_opts = self._get_item_and_remove_options(env) # note this should be impossible if sub is None or len(dec_opts) == 0: return smirks, False change = np.random.choice(dec_opts) if change == 'remove_ors': new_or_types, changed = self.remove_or(sub.or_types, isinstance(sub, CE.Bond)) if not changed: return smirks, False sub.or_types = new_or_types elif change == 'remove_ands': new_and_types, changed = self.remove_and(sub.and_types) if not changed: return smirks, False sub.and_types = new_and_types else: # change == 'remove_atom' remove = env.remove_atom(sub) if not remove: return smirks, False return env.as_smirks(), True
def make_smirks_attribute_graph(chem_env: ChemicalEnvironment) -> nx.Graph: """ Make a new nx.Graph from the environment with attributes. """ new_graph = nx.Graph() bonds = chem_env._graph_edges(data=True) nodes = list(chem_env._graph.nodes()) new_graph.add_nodes_from([(node, node.__dict__) for node in nodes]) # new_graph.add_edges_from( # [(bond[0], bond[1], bond[-1]["bond"].__dict__) for bond in bonds] # ) new_graph.add_edges_from(bonds) return new_graph
def test_complicated_torsion(): """ Test ChemicalEnvironment objects with complicated torsion test methods that add atoms, remove atoms add ORtypes and ANDtypes to existing atoms This is the SMIRK for the final torsion "[*:1] - [#6:2](=[#8,#7;H0]) - [#6:3](-[#7X3,#8X2;+0]-[#1])(-[#1]) - [*:4]" """ torsion_smirks = "[*:1]-[#6:2]-[#6:3]-[*:4]" torsion = ChemicalEnvironment(torsion_smirks) # save atoms (use selectAtom) atom1 = torsion.selectAtom(1) atom2 = torsion.selectAtom(2) atom3 = torsion.selectAtom(3) # Add atoms with names so I can try to remove them atom2alpha = torsion.addAtom(atom2, [('=',[])], None, [('#8',[]),('#7',[])], ['H0']) atom3alpha1 = torsion.addAtom(atom3) atom3beta1 = torsion.addAtom(atom3alpha1, [('-',[])], None, [('#1',[])]) atom3alpha2 = torsion.addAtom(atom3, [('-',[])], None, [('#1',[])]) # Get bond for atom3 and alpha and add ANDtype bond = torsion.getBond(atom3, atom3alpha1) assert bond is not None bond.addORtype('-', []) # Add ORtypes and ANDtypes to atom3 alpha atom atom3alpha1.addORtype('#7', ['X3']) atom3alpha1.addORtype('#8', ['X2']) atom3alpha1.addANDtype('+0') # Call getAtoms and getBonds just to make sure they work torsion.getAtoms() torsion.getBonds() # get smarts and smirks for the large torsion smarts = torsion.asSMIRKS(smarts=True) assert is_valid_smirks(smarts) smirks = torsion.asSMIRKS() assert is_valid_smirks(smirks) # Try removing atoms # if it was labeled: removed = torsion.removeAtom(atom1) assert not removed removed = torsion.removeAtom(atom3alpha1) assert not removed removed = torsion.removeAtom(atom3beta1) assert removed
def test_other_env_methods(): """ Test the other minor class functions for ChemicalEnvironments """ angle_smirks = "[#6X3;R1:1]=,:;@[#6X3;R1;a:2](-,:;@[#7])-;!@[#8X2H1;!R:3]" angle = ChemicalEnvironment(angle_smirks) # Check is__ descriptors atom2 = angle.selectAtom(2) bond1 = angle.selectBond(1) alpha_atom = angle.selectAtom('Alpha') beta_atom = angle.addAtom(alpha_atom) alpha_bond = angle.getBond(atom2, alpha_atom) beta_bond = angle.getBond(alpha_atom, beta_atom) # list of lists: # [ [[components], [(method, expected)]], [...]] check_is_methods = [ [[atom2,bond1], [(angle.isAlpha, False), (angle.isBeta, False), (angle.isIndexed, True), (angle.isUnindexed, False)]], [[alpha_atom, alpha_bond], [(angle.isAlpha, True), (angle.isIndexed, False), (angle.isUnindexed, True)]], [[beta_atom, beta_bond], [(angle.isBeta, True)]]] for compSet, methodList in check_is_methods: for comp in compSet: for (method, expected) in methodList: # same message classify = method(comp) assert classify == expected # Check getBond when atoms aren't bonded atom1 = angle.selectAtom(1) beta_to_atom1 = angle.getBond(beta_atom, atom1) assert beta_to_atom1 is None # Check valence: should be 3 for atom2 val = angle.getValence(atom2) assert val == 3 # Check bond order # For bond1 =,:;@ it should be 1.5 because order returns lowest possible order = bond1.getOrder() assert order == 1.5 # For atom order = angle.getBondOrder(atom2) assert order == 3.5
def __init__(self, molecules, cluster_list, max_layers=5, verbose=True, strict_smirks=True): """ Parameters ---------- molecules : list of Mols These can be chemper Mols or molecules from any supported toolkit (currently OpenEye or RDKit) cluster_list : list of labels and smirks_atom_lists For each label the user should provide a list tuples for atom indices in each molecule you want included in that cluster. For example, if you wanted all atoms with indices (0,1) and (1,2) to be in cluster 'c1' and atoms (2,3) in cluster 'c2' for each of two molecules then cluster_list would be [ ('c1', [ (0,1), (1,2) ], [ (0,1), (1,2) ]), ('c2', [ (2,3) ], [ (2,3) ]) ] To see an example of this in action checkout https://github.com/MobleyLab/chemper/tree/master/examples max_layers : int (optional) default = 5 how many atoms away from the indexed atoms should we consider at the maximum verbose : boolean (optional) default = True If true information is printed to the command line during reducing strict_smirks : boolean (optional) default = True If False it will not raise an error when incapable of making SMIRKS This setting is not recommended unless you are a master user or developer trying to test current behavior. The variable SMIRKSifier.checks will tell you if the SMIRKS generation failed when strict_smirks = False """ self.molecules = [mol_toolkit.Mol(m) for m in molecules] self.intermediate_smirks = dict() self.cluster_list = cluster_list self.verbose = verbose self.max_layers = max_layers self.strict_smirks = strict_smirks # determine the type of SMIRKS for symmetry in indices purposes # This is done by making a test SMIRKS graph = ClusterGraph(self.molecules, cluster_list[0][1], 0) test_smirks = graph.as_smirks(compress=True) env = CE(test_smirks) if env.get_type() is None: # corresponds to an unknown chemical pattern self.dict_type = dict elif env.get_type().lower() == 'impropertorsion': self.dict_type = ImproperDict else: self.dict_type = ValenceDict # Convert input "smirks_atom_list" into a dictionary with the form: # {mol_idx: {(atom indices): label, ...}, ... } self.cluster_dict = dict() self.ref_labels = set() self.total = 0 # form of cluster_list is [(label, [for each mol [ (tuples of atom indices)] ) ] for label, mol_list in self.cluster_list: self.ref_labels.add(label) # [for each mol [ (tuples of atom indices)] for mol_idx, atom_indice_tuples in enumerate(mol_list): if mol_idx not in self.cluster_dict: self.cluster_dict[mol_idx] = self.dict_type() for atom_tuple in atom_indice_tuples: self.total += 1 self.cluster_dict[mol_idx][atom_tuple] = label # make SMIRKS patterns for input clusters self.current_smirks, self.layers = self.make_smirks() if self.verbose: print_smirks(self.current_smirks) # check SMIRKS and save the matches to input clusters self.type_matches, self.checks = self.types_match_reference() if not self.checks: msg = """ SMIRKSifier was not able to create SMIRKS for the provided clusters with %i layers. Try increasing the number of layers or changing your clusters """ % self.max_layers if self.strict_smirks: raise ClusteringError(msg) else: print("WARNING!", msg)
def compare_smirks_graphs(smirks1: str, smirks2: str): """ Compare two smirks schema based on the types of smirks they cover. """ if smirks1 == smirks2: return True # define the node matching functions def atom_match(atom1, atom2): """ A networkx matching function for atom smirks. """ return atom1["index"] == atom2["index"] def bond_match(atom1, atom2): """ A networkx matching function for bond smirks. """ if atom1["index"] == atom2["index"]: return True elif atom1["index"] > 0 and atom2["index"] > 0: if abs(atom1["index"] - atom2["index"]) == 1: return True else: return False else: return False def angle_match(atom1, atom2): """ A networkx matching function for angle smirks. """ print(atom1, atom2) if atom1["index"] == atom2["index"]: return True elif atom1["index"] > 0 and atom2["index"] > 0: if abs(atom1["index"] - atom2["index"]) == 2: return True else: return False else: return False def dihedral_match(atom1, atom2): """ A networkx matching function for dihedral smirks. """ if atom1["index"] == atom2["index"]: return True elif atom1["index"] > 0 and atom2["index"] > 0: if abs(atom1["index"] - atom2["index"]) == 3: return True elif abs(atom1["index"] - atom2["index"]) == 1: return True else: return False else: return False environments = { 1: atom_match, 2: bond_match, 3: angle_match, 4: dihedral_match } # first work out the type of graph, atom, angle, dihedral based on the number of tagged atoms env1 = ChemicalEnvironment(smirks1) env2 = ChemicalEnvironment(smirks2) # make sure they tag the same number of atoms if len(env1.get_indexed_atoms()) != len(env2.get_indexed_atoms()): return False else: smirks_type = len(env1.get_indexed_atoms()) # define the general node match def general_match(x, y): is_equal = x["_or_types"] == y["_or_types"] is_equal &= x["_and_types"] == y["_and_types"] is_equal &= x["ring"] == y["ring"] is_equal &= x["is_atom"] == y["is_atom"] return is_equal def node_match(x, y): is_equal = general_match(x, y) is_equal &= environments[smirks_type](x, y) return is_equal # now do the check env1_graph = make_smirks_attribute_graph(env1) env2_graph = make_smirks_attribute_graph(env2) gm = nx.algorithms.isomorphism.GraphMatcher(env1_graph, env2_graph, node_match=node_match) return gm.is_isomorphic()