示例#1
0
    def remove_decorator(self, smirks):
        """
        Chose an atom or bond in the input smirks pattern
        and then remove one decorator from it.
        """
        env = CE(smirks)
        sub, dec_opts = self._get_item_and_remove_options(env)

        # note this should be impossible
        if sub is None or len(dec_opts) == 0:
            return smirks, False

        change = np.random.choice(dec_opts)
        if change == 'remove_ors':
            new_or_types, changed = self.remove_or(sub.getORtypes(),
                                                   isinstance(sub, CE.Bond))
            if not changed:
                return smirks, False
            sub.setORtypes(new_or_types)
        elif change == 'remove_ands':
            new_and_types, changed = self.remove_and(sub.getANDtypes())
            if not changed:
                return smirks, False
            sub.setANDtypes(new_and_types)
        else:  # change == 'remove_atom'
            remove = env.removeAtom(sub)
            if not remove:
                return smirks, False

        return env.asSMIRKS(), True
示例#2
0
def get_smirks_matches(mol, smirks):
    """
    Gets atom indices for a smirks string in a given molecule

    Parameters
    ----------
    mol : a chemper Mol
    smirks : str
             SMIRKS pattern being matched to the molecule

    Returns
    --------
    matches: list of tuples
        atom indices for labeled atom in the smirks
    """
    from chemper.graphs.environment import ChemicalEnvironment

    env = ChemicalEnvironment(smirks)
    if env.getType().lower() == 'impropertorsion':
        matches = ImproperDict()
    else:
        matches = ValenceDict()

    for match in mol.smirks_search(smirks):
        smirks_indices = sorted(list(match.keys()))
        atom_indices = tuple([match[s].get_index() for s in smirks_indices])
        matches[atom_indices] = ''

    return matches.keys()
示例#3
0
def test_create_environments(smirks, frag_type):
    """
    Test all types of ChemicalEnvironment objects with defined atoms and bonds
    Each will be tetrahedral carbons connected by ring single bonds
    """
    env = ChemicalEnvironment(smirks)
    output_type = env.getType()
    assert output_type == frag_type
示例#4
0
def test_get_component_list(comp, option, expected_len):
    """
    Test getting full component list works correctly
    """
    angle_smirks = "[#6X3;R1:1]=,:;@[#6X3;R1;a:2](-,:;@[#7])-;!@[#8X2H1;!R:3]"
    angle = ChemicalEnvironment(angle_smirks)
    components = angle.getComponentList(comp, option)
    assert len(components) == expected_len
示例#5
0
def test_selection_by_descriptor(descriptor, is_none):
    """
    test selection by description works
    """
    angle_smirks = "[#6X3;R1:1]=,:;@[#6X3;R1;a:2](-,:;@[#7])-;!@[#8X2H1;!R:3]"
    angle = ChemicalEnvironment(angle_smirks)
    atom = angle.selectAtom(descriptor)
    bond = angle.selectBond(descriptor)

    assert (atom is None) == is_none
    assert (bond is None) == is_none
示例#6
0
def test_ring_parsing(decorator):
    """
    Check not's in parsing or or and decoration
    """
    # make SMIRKS with decorator in both the OR and AND decorators
    temp_smirks = "[#1%s;a;%s:1]" % (decorator, decorator)
    env = ChemicalEnvironment(temp_smirks)
    atom = env.getIndexedAtoms()[0]

    # check decorator in OR type
    assert decorator in atom.getORtypes()[0][1]

    # check decorator in AND types
    assert decorator in atom.getANDtypes()
示例#7
0
def test_wrong_smirks_error():
    """
    Check that unparseable SMIRKS raises errors
    """
    smirks = "[*;m:1]"
    msg = "SMIRKS (%s) should not be parseable, but an environment was successfully created"
    from chemper.graphs.environment import SMIRKSParsingError
    with pytest.raises(SMIRKSParsingError):
        env = ChemicalEnvironment(smirks)
示例#8
0
    def remove_decorator(self, smirks):
        """
        Chose an atom or bond in the input smirks pattern
        and then remove one decorator from it.

        Parameters
        -----------
        smirks : str
            A SMIRKS string which should be reduced

        Returns
        --------
        new_smirks : str
            A new SMIRKS pattern
        is_changed : bool
            True if some of the decorators were successfully removed
        """
        env = CE(smirks)
        sub, dec_opts = self._get_item_and_remove_options(env)

        # note this should be impossible
        if sub is None or len(dec_opts) == 0:
            return smirks, False

        change = np.random.choice(dec_opts)
        if change == 'remove_ors':
            new_or_types, changed = self.remove_or(sub.or_types,
                                                   isinstance(sub, CE.Bond))
            if not changed:
                return smirks, False
            sub.or_types = new_or_types
        elif change == 'remove_ands':
            new_and_types, changed = self.remove_and(sub.and_types)
            if not changed:
                return smirks, False
            sub.and_types = new_and_types
        else:  # change == 'remove_atom'
            remove = env.remove_atom(sub)
            if not remove:
                return smirks, False

        return env.as_smirks(), True
示例#9
0
def make_smirks_attribute_graph(chem_env: ChemicalEnvironment) -> nx.Graph:
    """
    Make a new nx.Graph from the environment with attributes.
    """
    new_graph = nx.Graph()
    bonds = chem_env._graph_edges(data=True)
    nodes = list(chem_env._graph.nodes())
    new_graph.add_nodes_from([(node, node.__dict__) for node in nodes])
    # new_graph.add_edges_from(
    #     [(bond[0], bond[1], bond[-1]["bond"].__dict__) for bond in bonds]
    # )
    new_graph.add_edges_from(bonds)
    return new_graph
示例#10
0
def test_complicated_torsion():
    """
    Test ChemicalEnvironment objects with complicated torsion
    test methods that add atoms, remove atoms
    add ORtypes and ANDtypes to existing atoms

    This is the SMIRK for the final torsion
    "[*:1] - [#6:2](=[#8,#7;H0]) - [#6:3](-[#7X3,#8X2;+0]-[#1])(-[#1]) - [*:4]"
    """
    torsion_smirks = "[*:1]-[#6:2]-[#6:3]-[*:4]"
    torsion = ChemicalEnvironment(torsion_smirks)
    # save atoms (use selectAtom)
    atom1 = torsion.selectAtom(1)
    atom2 = torsion.selectAtom(2)
    atom3 = torsion.selectAtom(3)

    # Add atoms with names so I can try to remove them
    atom2alpha = torsion.addAtom(atom2, [('=',[])], None, [('#8',[]),('#7',[])], ['H0'])
    atom3alpha1 = torsion.addAtom(atom3)
    atom3beta1 = torsion.addAtom(atom3alpha1, [('-',[])], None, [('#1',[])])
    atom3alpha2 = torsion.addAtom(atom3, [('-',[])], None, [('#1',[])])

    # Get bond for atom3 and alpha and add ANDtype
    bond = torsion.getBond(atom3, atom3alpha1)
    assert bond is not None
    bond.addORtype('-', [])

    # Add ORtypes and ANDtypes to atom3 alpha atom
    atom3alpha1.addORtype('#7', ['X3'])
    atom3alpha1.addORtype('#8', ['X2'])
    atom3alpha1.addANDtype('+0')

    # Call getAtoms and getBonds just to make sure they work
    torsion.getAtoms()
    torsion.getBonds()

    # get smarts and smirks for the large torsion
    smarts = torsion.asSMIRKS(smarts=True)
    assert is_valid_smirks(smarts)
    smirks = torsion.asSMIRKS()
    assert is_valid_smirks(smirks)


    # Try removing atoms
    # if it was labeled:
    removed = torsion.removeAtom(atom1)
    assert not removed
    removed = torsion.removeAtom(atom3alpha1)
    assert not removed
    removed = torsion.removeAtom(atom3beta1)
    assert removed
示例#11
0
def test_other_env_methods():
    """
    Test the other minor class functions for ChemicalEnvironments
    """
    angle_smirks = "[#6X3;R1:1]=,:;@[#6X3;R1;a:2](-,:;@[#7])-;!@[#8X2H1;!R:3]"
    angle = ChemicalEnvironment(angle_smirks)
    # Check is__ descriptors
    atom2 = angle.selectAtom(2)
    bond1 = angle.selectBond(1)
    alpha_atom = angle.selectAtom('Alpha')
    beta_atom = angle.addAtom(alpha_atom)
    alpha_bond = angle.getBond(atom2, alpha_atom)
    beta_bond = angle.getBond(alpha_atom, beta_atom)

    # list of lists:
    # [ [[components], [(method, expected)]], [...]]
    check_is_methods = [
            [[atom2,bond1], [(angle.isAlpha, False), (angle.isBeta, False),
                (angle.isIndexed, True), (angle.isUnindexed, False)]],
            [[alpha_atom, alpha_bond], [(angle.isAlpha, True), (angle.isIndexed, False),
                (angle.isUnindexed, True)]],
            [[beta_atom, beta_bond], [(angle.isBeta, True)]]]

    for compSet, methodList in check_is_methods:
        for comp in compSet:
            for (method, expected) in methodList:
                # same message
                classify = method(comp)
                assert classify == expected

    # Check getBond when atoms aren't bonded
    atom1 = angle.selectAtom(1)
    beta_to_atom1 = angle.getBond(beta_atom, atom1)
    assert beta_to_atom1 is None

    # Check valence: should be 3 for atom2
    val = angle.getValence(atom2)
    assert val == 3

    # Check bond order
    # For bond1 =,:;@ it should be 1.5 because order returns lowest possible
    order = bond1.getOrder()
    assert order == 1.5

    # For atom
    order = angle.getBondOrder(atom2)
    assert order == 3.5
示例#12
0
    def __init__(self,
                 molecules,
                 cluster_list,
                 max_layers=5,
                 verbose=True,
                 strict_smirks=True):
        """
        Parameters
        ----------
        molecules : list of Mols
            These can be chemper Mols or molecules from any supported toolkit
            (currently OpenEye or RDKit)

        cluster_list : list of labels and smirks_atom_lists
            For each label the user should provide a list tuples for atom indices
            in each molecule you want included in that cluster.

            For example, if you wanted all atoms with indices (0,1) and (1,2) to be in cluster 'c1'
            and atoms (2,3) in cluster 'c2' for each of two molecules then cluster_list would be

            [ ('c1', [ (0,1), (1,2) ], [ (0,1), (1,2) ]),
              ('c2', [ (2,3)        ], [ (2,3)        ]) ]

            To see an example of this in action checkout
            https://github.com/MobleyLab/chemper/tree/master/examples

        max_layers : int (optional)
            default = 5
            how many atoms away from the indexed atoms should
            we consider at the maximum

        verbose : boolean (optional)
            default = True
            If true information is printed to the command line during reducing

        strict_smirks : boolean (optional)
            default = True
            If False it will not raise an error when incapable of making SMIRKS
            This setting is not recommended unless you are a master user
            or developer trying to test current behavior.
            The variable SMIRKSifier.checks will tell you if the SMIRKS
            generation failed when strict_smirks = False
        """
        self.molecules = [mol_toolkit.Mol(m) for m in molecules]
        self.intermediate_smirks = dict()
        self.cluster_list = cluster_list
        self.verbose = verbose
        self.max_layers = max_layers
        self.strict_smirks = strict_smirks

        # determine the type of SMIRKS for symmetry in indices purposes
        # This is done by making a test SMIRKS
        graph = ClusterGraph(self.molecules, cluster_list[0][1], 0)
        test_smirks = graph.as_smirks(compress=True)
        env = CE(test_smirks)
        if env.get_type() is None:
            # corresponds to an unknown chemical pattern
            self.dict_type = dict
        elif env.get_type().lower() == 'impropertorsion':
            self.dict_type = ImproperDict
        else:
            self.dict_type = ValenceDict

        # Convert input "smirks_atom_list" into a dictionary with the form:
        # {mol_idx: {(atom indices): label, ...}, ... }
        self.cluster_dict = dict()
        self.ref_labels = set()
        self.total = 0
        # form of cluster_list is [(label, [for each mol [ (tuples of atom indices)] ) ]
        for label, mol_list in self.cluster_list:
            self.ref_labels.add(label)
            # [for each mol [ (tuples of atom indices)]
            for mol_idx, atom_indice_tuples in enumerate(mol_list):
                if mol_idx not in self.cluster_dict:
                    self.cluster_dict[mol_idx] = self.dict_type()
                for atom_tuple in atom_indice_tuples:
                    self.total += 1
                    self.cluster_dict[mol_idx][atom_tuple] = label

        # make SMIRKS patterns for input clusters
        self.current_smirks, self.layers = self.make_smirks()
        if self.verbose: print_smirks(self.current_smirks)
        # check SMIRKS and save the matches to input clusters
        self.type_matches, self.checks = self.types_match_reference()

        if not self.checks:
            msg = """
                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with %i layers. Try increasing the number of layers
                      or changing your clusters
                      """ % self.max_layers
            if self.strict_smirks:
                raise ClusteringError(msg)
            else:
                print("WARNING!", msg)
示例#13
0
def compare_smirks_graphs(smirks1: str, smirks2: str):
    """
    Compare two smirks schema based on the types of smirks they cover.
    """
    if smirks1 == smirks2:
        return True

    # define the node matching functions
    def atom_match(atom1, atom2):
        """
        A networkx matching function for atom smirks.
        """
        return atom1["index"] == atom2["index"]

    def bond_match(atom1, atom2):
        """
        A networkx matching function for bond smirks.
        """
        if atom1["index"] == atom2["index"]:
            return True
        elif atom1["index"] > 0 and atom2["index"] > 0:
            if abs(atom1["index"] - atom2["index"]) == 1:
                return True
            else:
                return False
        else:
            return False

    def angle_match(atom1, atom2):
        """
        A networkx matching function for angle smirks.
        """
        print(atom1, atom2)
        if atom1["index"] == atom2["index"]:
            return True
        elif atom1["index"] > 0 and atom2["index"] > 0:
            if abs(atom1["index"] - atom2["index"]) == 2:
                return True
            else:
                return False
        else:
            return False

    def dihedral_match(atom1, atom2):
        """
        A networkx matching function for dihedral smirks.
        """
        if atom1["index"] == atom2["index"]:
            return True
        elif atom1["index"] > 0 and atom2["index"] > 0:
            if abs(atom1["index"] - atom2["index"]) == 3:
                return True
            elif abs(atom1["index"] - atom2["index"]) == 1:
                return True
            else:
                return False
        else:
            return False

    environments = {
        1: atom_match,
        2: bond_match,
        3: angle_match,
        4: dihedral_match
    }
    # first work out the type of graph, atom, angle, dihedral based on the number of tagged atoms
    env1 = ChemicalEnvironment(smirks1)
    env2 = ChemicalEnvironment(smirks2)
    # make sure they tag the same number of atoms
    if len(env1.get_indexed_atoms()) != len(env2.get_indexed_atoms()):
        return False
    else:
        smirks_type = len(env1.get_indexed_atoms())

    # define the general node match
    def general_match(x, y):
        is_equal = x["_or_types"] == y["_or_types"]
        is_equal &= x["_and_types"] == y["_and_types"]
        is_equal &= x["ring"] == y["ring"]
        is_equal &= x["is_atom"] == y["is_atom"]
        return is_equal

    def node_match(x, y):
        is_equal = general_match(x, y)
        is_equal &= environments[smirks_type](x, y)
        return is_equal

    # now do the check
    env1_graph = make_smirks_attribute_graph(env1)
    env2_graph = make_smirks_attribute_graph(env2)
    gm = nx.algorithms.isomorphism.GraphMatcher(env1_graph,
                                                env2_graph,
                                                node_match=node_match)
    return gm.is_isomorphic()