Пример #1
0
def test_optional_name_parser():
    optional_names = ['_C', '_CH2', '_CH']
    S = SMARTS(optional_names=optional_names)
    ast = S.parse('_CH2_C_CH')
    symbols = [a.children[0] for a in ast.find_data('atom_symbol')]
    for name in optional_names:
        assert name in symbols
Пример #2
0
def test_optional_name_parser():
    optional_names = ['_C', '_CH2', '_CH']
    S = SMARTS(optional_names=optional_names)
    ast = S.parse('_CH2_C_CH')
    symbols = [a.tail[0] for a in ast.select('atom_symbol').strees]
    for name in optional_names:
        assert name in symbols
Пример #3
0
def test_optional_name_parser():
    optional_names = ['_C', '_CH2', '_CH']
    S = SMARTS(optional_names=optional_names)
    ast = S.parse('_CH2_C_CH')
    symbols = [a.children[0] for a in ast.find_data('atom_symbol')]
    for name in optional_names:
        assert name in symbols
Пример #4
0
 def test_optional_name_parser(self):
     optional_names = ["_C", "_CH2", "_CH"]
     S = SMARTS(optional_names=optional_names)
     ast = S.parse("_CH2_C_CH")
     symbols = [a.children[0] for a in ast.find_data("atom_symbol")]
     for name in optional_names:
         assert name in symbols
Пример #5
0
def test_optional_name_parser():
    optional_names = ['_C', '_CH2', '_CH']
    S = SMARTS(optional_names=optional_names)
    ast = S.parse('_CH2_C_CH')
    symbols = [a.tail[0] for a in ast.select('atom_symbol').strees]
    for name in optional_names:
        assert name in symbols
Пример #6
0
    def __init__(self,
                 smarts_string,
                 parser=None,
                 name=None,
                 overrides=None,
                 typemap=None,
                 *args,
                 **kwargs):
        super(SMARTSGraph, self).__init__(*args, **kwargs)

        self.smarts_string = smarts_string
        self.name = name
        self.overrides = overrides
        self.typemap = typemap

        if parser is None:
            self.ast = SMARTS().parse(smarts_string)
        else:
            self.ast = parser.parse(smarts_string)

        self._atom_indices = OrderedDict()
        self._add_nodes()
        self._add_edges(self.ast)
        self._add_label_edges()
        self._graph_matcher = None
Пример #7
0
def element_by_smarts_string(smarts_string):
    """Search for an element by a given SMARTS string

    Look up an element from a list of known elements by SMARTS string.
    Return None if no match found.

    Parameters
    ----------
    smarts_string : str
        SMARTS string representation of an atom type or its local chemical
        context. The Foyer SMARTS parser will be used to find the central atom
        and look up an Element. Note that this means some SMARTS grammar may
        not be parsed properly. For details, see
        https://github.com/mosdef-hub/foyer/issues/63

    Returns
    -------
    matched_element : element.Element
        Return an element from the periodic table if we find a match

    Raises
    ------
    GMSOError
        If no matching element is found for the provided smarts string
    """
    from foyer.smarts import SMARTS

    PARSER = SMARTS()

    symbols = PARSER.parse(smarts_string).iter_subtrees_topdown()

    first_symbol = None
    for symbol in symbols:
        if symbol.data == 'atom_symbol':
            first_symbol = symbol.children[0]
            break

    matched_element = None
    if first_symbol is not None:
        matched_element = element_by_symbol(first_symbol)

    if matched_element is None:
        raise GMSOError(
            f'Failed to find an element from SMARTS string {smarts_string}. The '
            f'parser detected a central node with name {first_symbol}')

    return matched_element
Пример #8
0
    def __init__(self, smarts_str):

        self.PARSER = SMARTS()
        self.AST = self.PARSER.parse(smarts_str)
        self.AST.select('start')
        self.atom_with_id = {}
        self.atom_name = {}
        self.atom_with_label = {}
        self.NetworkX = nx.Graph()

        # invoke initial functions to initialize.
        self._assign_id()
        self._set_atoms_with_label()

        # invoke converting functions to generate graph.
        self._add_nodes()
        self._add_edges(self.AST)
        self._add_label_edges()
Пример #9
0
 def __init__(
     self,
     atomtype_definitions,
     atomtype_overrides,
     non_element_types,
     parser=None,
 ):
     self.atomtype_definitions = atomtype_definitions
     self.atomtype_overrides = atomtype_overrides
     self.non_element_types = non_element_types
     self.parser = parser or SMARTS(self.non_element_types)
Пример #10
0
def element_by_smarts_string(smarts_string):
    """Search for an element by a given SMARTS string

    Look up an element from a list of known elements by SMARTS string.
    Return None if no match found.

    Parameters
    ----------
    smarts_string : str
        SMARTS string representation of an atom type or its local chemical
        context. The Foyer SMARTS parser will be used to find the central atom
        and look up an Element. Note that this means some SMARTS grammar may
        not be parsed properly. For details, see
        https://github.com/mosdef-hub/foyer/issues/63

    Returns
    -------
    matched_element : element.Element or None
        Return an element from the periodict table if we find a match,
        otherwise return None

    """
    from foyer.smarts import SMARTS

    PARSER = SMARTS()

    symbol = next(
        PARSER.parse(smarts_string).find_data('atom_symbol')).children[0]
    print(symbol)
    matched_element = element_by_symbol(symbol)

    if matched_element is None:
        raise GMSOError(
            f''
            'Failed to find an element from SMARTS string {smarts_string). The'
            'parser detected a central node with name {symbol}')

    return matched_element
Пример #11
0
    def __init__(self, smarts_string, parser=None, name=None, overrides=None,
                 *args, **kwargs):
        super(SMARTSGraph, self).__init__(*args, **kwargs)

        self.smarts_string = smarts_string
        self.name = name
        self.overrides = overrides

        if parser is None:
            self.ast = SMARTS().parse(smarts_string)
        else:
            self.ast = parser.parse(smarts_string)

        self._atom_indices = OrderedDict()
        self._add_nodes()
        self._add_edges(self.ast)
        self._add_label_edges()
        self._graph_matcher = None
Пример #12
0
    def __init__(self, smarts_str):

        self.PARSER = SMARTS()
        self.AST = self.PARSER.parse(smarts_str)
        self.AST.select('start')
        self.atom_with_id = {}
        self.atom_name = {}
        self.atom_with_label = {}
        self.NetworkX = nx.Graph()

        # invoke initial functions to initialize.
        self._assign_id()
        self._set_atoms_with_label()

        # invoke converting functions to generate graph.
        self._add_nodes()
        self._add_edges(self.AST)
        self._add_label_edges()
Пример #13
0
class SMARTSGraph(nx.Graph):
    """A graph representation of a SMARTS pattern.

    Attributes
    ----------
    smarts_string : str
        The SMARTS string outlined in the force field
    parser : foyer.smarts.SMARTS
        The parser whose grammar rules convert the SMARTSstring
        into the AST
    name : str
    overrides : set
        Rules or SMARTSGraph over which this SMARTSGraph takes precedence

    Attributes
    ----------
    graph_matcher : smarts_graph.SMARTSMatcher
        implementation of VF2 that handles subgraph matching

    Notes
    -----
    SMARTSGraph inherits from networkx.Graph, available features can be found
    at networkx.org/documentation/stable/reference/classes/graph.html
    """

    # Because the first atom in a SMARTS string is always the one we want to
    # type, the graph's nodes needs to be ordered.

    def __init__(self,
                 smarts_string,
                 parser=None,
                 name=None,
                 overrides=None,
                 typemap=None,
                 *args,
                 **kwargs):
        super(SMARTSGraph, self).__init__(*args, **kwargs)

        self.smarts_string = smarts_string
        self.name = name
        self.overrides = overrides
        self.typemap = typemap

        if parser is None:
            self.ast = SMARTS().parse(smarts_string)
        else:
            self.ast = parser.parse(smarts_string)

        self._atom_indices = OrderedDict()
        self._add_nodes()
        self._add_edges(self.ast)
        self._add_label_edges()
        self._graph_matcher = None

    def _add_nodes(self):
        """Add all atoms in the SMARTS string as nodes in the graph."""
        for n, atom in enumerate(
            [x for x in self.ast.iter_subtrees_topdown() if x.data == "atom"]):
            self.add_node(n, atom=atom)
            self._atom_indices[id(atom)] = n

    def _add_edges(self, ast_node, trunk=None):
        """Add all bonds in the SMARTS string as edges in the graph."""
        atom_indices = self._atom_indices
        for ast_child in ast_node.children:
            if ast_child.data == "atom":
                atom_idx = atom_indices[id(ast_child)]
                if trunk is not None:
                    trunk_idx = atom_indices[id(trunk)]
                    self.add_edge(atom_idx, trunk_idx)
                trunk = ast_child
            elif ast_child.data == "branch":
                self._add_edges(ast_child, trunk)

    def _add_label_edges(self):
        """Add edges between all atoms with the same atom_label in rings."""
        # We need each individual label and atoms with multiple ring labels
        # would yield e.g. the string '12' so split those up.
        label_digits = defaultdict(list)
        for node, attr in self.nodes(data=True):
            atom = attr["atom"]
            for label in atom.find_data("atom_label"):
                digits = list(label.children[0])
                for digit in digits:
                    label_digits[digit].append(atom)

        for label, (atom1, atom2) in label_digits.items():
            atom1_idx = self._atom_indices[id(atom1)]
            atom2_idx = self._atom_indices[id(atom2)]
            self.add_edge(atom1_idx, atom2_idx)

    def _node_match(self, host, pattern):
        """Determine if two graph nodes are equal."""
        atom_expr = pattern["atom"].children[0]
        atom = host["atom_data"]
        bond_partners = host["bond_partners"]
        return self._atom_expr_matches(atom_expr, atom, bond_partners)

    def _atom_expr_matches(self, atom_expr, atom, bond_partners):
        """Evaluate SMARTS string expressions."""
        if atom_expr.data == "not_expression":
            return not self._atom_expr_matches(atom_expr.children[0], atom,
                                               bond_partners)
        elif atom_expr.data in ("and_expression", "weak_and_expression"):
            return self._atom_expr_matches(
                atom_expr.children[0], atom,
                bond_partners) and self._atom_expr_matches(
                    atom_expr.children[1], atom, bond_partners)
        elif atom_expr.data == "or_expression":
            return self._atom_expr_matches(
                atom_expr.children[0], atom,
                bond_partners) or self._atom_expr_matches(
                    atom_expr.children[1], atom, bond_partners)
        elif atom_expr.data == "atom_id":
            return self._atom_id_matches(atom_expr.children[0], atom,
                                         bond_partners, self.typemap)
        elif atom_expr.data == "atom_symbol":
            return self._atom_id_matches(atom_expr, atom, bond_partners,
                                         self.typemap)
        else:
            raise TypeError("Expected atom_id, atom_symbol, and_expression, "
                            "or_expression, or not_expression. "
                            "Got {}".format(atom_expr.data))

    @staticmethod
    def _atom_id_matches(atom_id, atom, bond_partners, typemap):
        """Compare atomic indices, symbols, neighbors, rings."""
        atomic_num = atom.atomic_number
        atom_name = atom.name
        atom_idx = atom.index

        if atom_id.data == "atomic_num":
            return atomic_num == int(atom_id.children[0])
        elif atom_id.data == "atom_symbol":
            if str(atom_id.children[0]) == "*":
                return True
            elif str(atom_id.children[0]).startswith("_"):
                # Store non-element elements in .name
                return atom_name == str(atom_id.children[0])
            else:
                return atomic_num == pt.AtomicNum[str(atom_id.children[0])]
        elif atom_id.data == "has_label":
            label = atom_id.children[0][
                1:]  # Strip the % sign from the beginning.
            return label in typemap[atom_idx]["whitelist"]
        elif atom_id.data == "neighbor_count":
            return len(bond_partners) == int(atom_id.children[0])
        elif atom_id.data == "ring_size":
            cycle_len = int(atom_id.children[0])
            for cycle in typemap[atom_idx]["cycles"]:
                if len(cycle) == cycle_len:
                    return True
            return False
        elif atom_id.data == "ring_count":
            n_cycles = len(typemap[atom_idx]["cycles"])
            if n_cycles == int(atom_id.children[0]):
                return True
            return False
        elif atom_id.data == "matches_string":
            raise NotImplementedError("matches_string is not yet implemented")

    def find_matches(self, topology_graph, typemap):
        """Return sets of atoms that match this SMARTS pattern in a topology.

        Parameters
        ----------
        topology_graph : TopologyGraph
            The topology that we are trying to atomtype.
        typemap : dict
            The target typemap being used/edited

        Notes
        -----
        When this function gets used in atomtyper.py, we actively modify the
        white- and blacklists of the atoms in `topology` after finding a match.
        This means that between every successive call of
        `subgraph_isomorphisms_iter()`, the topology against which we are
        matching may have actually changed. Currently, we take advantage of this
        behavior in some edges cases (e.g. see `test_hexa_coordinated` in
        `test_smarts.py`).

        """
        # Note: Needs to be updated in sync with the grammar in `smarts.py`.
        ring_tokens = ["ring_size", "ring_count"]
        has_ring_rules = any(
            list(self.ast.find_data(token)) for token in ring_tokens)
        topology_graph.add_bond_partners()
        _prepare_atoms(topology_graph, typemap, compute_cycles=has_ring_rules)

        if self._graph_matcher is None:
            atom = nx.get_node_attributes(self, name="atom")[0]
            if len(list(atom.find_data("atom_symbol"))) == 1 and not list(
                    atom.find_data("not_expression")):
                try:
                    element = next(atom.find_data("atom_symbol")).children[0]
                except IndexError:
                    try:
                        atomic_num = next(
                            atom.find_data("atomic_num")).children[0]
                        element = pt.Element[int(atomic_num)]
                    except IndexError:
                        element = None
            else:
                element = None
            self._graph_matcher = SMARTSMatcher(
                topology_graph,
                self,
                node_match=self._node_match,
                element=element,
                typemap=typemap,
            )

        matched_atoms = set()
        for mapping in self._graph_matcher.subgraph_isomorphisms_iter():
            mapping = {
                node_id: atom_id
                for atom_id, node_id in mapping.items()
            }
            # The first node in the smarts graph always corresponds to the atom
            # that we are trying to match.
            atom_index = mapping[0]
            # Don't yield duplicate matches found via matching the pattern in a
            # different order.
            if atom_index not in matched_atoms:
                matched_atoms.add(atom_index)
                yield atom_index
Пример #14
0
import parmed as pmd
import plyplus
import pytest

from foyer.forcefield import generate_topology, Forcefield
from foyer.rule import Rule
from foyer.smarts import SMARTS
from foyer.tests.utils import get_fn

PARSER = SMARTS()


def _rule_match(atom, smart, result):
    rule = Rule('test', parser=PARSER, smarts_string=smart)
    assert rule.matches(atom) is result


def test_ast():
    ast = PARSER.parse('O([H&X1])(H)')
    assert ast.head == "start"
    assert ast.tail[0].head == "atom"
    assert ast.tail[0].tail[0].head == "atom_symbol"
    assert ast.tail[0].tail[0].head == "atom_symbol"
    assert str(ast.tail[0].tail[0].tail[0]) == "O"


def test_parse():
    smarts = ['[#6][#1](C)H', '[O;X2]([C;X4](F)(*)(*))[C;X4]']
    for pattern in smarts:
        ast = PARSER.parse(pattern)
Пример #15
0
def test_optional_names_bad_syntax():
    bad_optional_names = ['_C', 'XXX', 'C']
    with pytest.raises(FoyerError):
        S = SMARTS(optional_names=bad_optional_names)
Пример #16
0
class SMARTSGraph(nx.Graph):
    """A graph representation of a SMARTS pattern.

    Attributes
    ----------
    smarts_string : str
        The SMARTS string outlined in the force field
    parser : foyer.smarts.SMARTS
        The parser whose grammar rules convert the SMARTSstring 
        into the AST
    name : str
    overrides : set
        Rules or SMARTSGraph over which this SMARTSGraph takes precedence

    Other Parameters
    ----------
    args
    kwargs

    Attributes
    ----------
    graph_matcher : smarts_graph.SMARTSMatcher
        implementation of VF2 that handles subgraph matching
    """

    # Because the first atom in a SMARTS string is always the one we want to
    # type, the graph's nodes needs to be ordered.

    def __init__(self,
                 smarts_string,
                 parser=None,
                 name=None,
                 overrides=None,
                 typemap=None,
                 *args,
                 **kwargs):
        super(SMARTSGraph, self).__init__(*args, **kwargs)

        self.smarts_string = smarts_string
        self.name = name
        self.overrides = overrides
        self.typemap = typemap

        if parser is None:
            self.ast = SMARTS().parse(smarts_string)
        else:
            self.ast = parser.parse(smarts_string)

        self._atom_indices = OrderedDict()
        self._add_nodes()
        self._add_edges(self.ast)
        self._add_label_edges()
        self._graph_matcher = None

    def _add_nodes(self):
        """Add all atoms in the SMARTS string as nodes in the graph."""
        for n, atom in enumerate(
            [x for x in self.ast.iter_subtrees_topdown() if x.data == 'atom']):
            self.add_node(n, atom=atom)
            self._atom_indices[id(atom)] = n

    def _add_edges(self, ast_node, trunk=None):
        """"Add all bonds in the SMARTS string as edges in the graph."""
        atom_indices = self._atom_indices
        for ast_child in ast_node.children:
            if ast_child.data == 'atom':
                atom_idx = atom_indices[id(ast_child)]
                if trunk is not None:
                    trunk_idx = atom_indices[id(trunk)]
                    self.add_edge(atom_idx, trunk_idx)
                trunk = ast_child
            elif ast_child.data == 'branch':
                self._add_edges(ast_child, trunk)

    def _add_label_edges(self):
        """Add edges between all atoms with the same atom_label in rings."""
        # We need each individual label and atoms with multiple ring labels
        # would yield e.g. the string '12' so split those up.
        label_digits = defaultdict(list)
        for node, attr in self.nodes(data=True):
            atom = attr["atom"]
            for label in atom.find_data("atom_label"):
                digits = list(label.children[0])
                for digit in digits:
                    label_digits[digit].append(atom)

        for label, (atom1, atom2) in label_digits.items():
            atom1_idx = self._atom_indices[id(atom1)]
            atom2_idx = self._atom_indices[id(atom2)]
            self.add_edge(atom1_idx, atom2_idx)

    def _node_match(self, host, pattern):
        """ Determine if two graph nodes are equal """
        atom_expr = pattern['atom'].children[0]
        atom = host['atom']
        return self._atom_expr_matches(atom_expr, atom)

    def _atom_expr_matches(self, atom_expr, atom):
        """ Helper function for evaluating SMARTS string expressions """
        if atom_expr.data == 'not_expression':
            return not self._atom_expr_matches(atom_expr.children[0], atom)
        elif atom_expr.data in ('and_expression', 'weak_and_expression'):
            return (self._atom_expr_matches(atom_expr.children[0], atom)
                    and self._atom_expr_matches(atom_expr.children[1], atom))
        elif atom_expr.data == 'or_expression':
            return (self._atom_expr_matches(atom_expr.children[0], atom)
                    or self._atom_expr_matches(atom_expr.children[1], atom))
        elif atom_expr.data == 'atom_id':
            return self._atom_id_matches(atom_expr.children[0], atom,
                                         self.typemap)
        elif atom_expr.data == 'atom_symbol':
            return self._atom_id_matches(atom_expr, atom, self.typemap)
        else:
            raise TypeError('Expected atom_id, atom_symbol, and_expression, '
                            'or_expression, or not_expression. '
                            'Got {}'.format(atom_expr.data))

    @staticmethod
    def _atom_id_matches(atom_id, atom, typemap):
        """ Helper func for comparing atomic indices, symbols, neighbors, rings """
        atomic_num = atom.element
        if atom_id.data == 'atomic_num':
            return atomic_num == int(atom_id.children[0])
        elif atom_id.data == 'atom_symbol':
            if str(atom_id.children[0]) == '*':
                return True
            elif str(atom_id.children[0]).startswith('_'):
                # Store non-element elements in .name
                return atom.name == str(atom_id.children[0])
            else:
                return atomic_num == pt.AtomicNum[str(atom_id.children[0])]
        elif atom_id.data == 'has_label':
            label = atom_id.children[0][
                1:]  # Strip the % sign from the beginning.
            return label in typemap[atom.idx]['whitelist']
        elif atom_id.data == 'neighbor_count':
            return len(atom.bond_partners) == int(atom_id.children[0])
        elif atom_id.data == 'ring_size':
            cycle_len = int(atom_id.children[0])
            for cycle in typemap[atom.idx]['cycles']:
                if len(cycle) == cycle_len:
                    return True
            return False
        elif atom_id.data == 'ring_count':
            n_cycles = len(typemap[atom.idx]['cycles'])
            if n_cycles == int(atom_id.children[0]):
                return True
            return False
        elif atom_id.data == 'matches_string':
            raise NotImplementedError('matches_string is not yet implemented')

    def find_matches(self, structure, typemap):
        """Return sets of atoms that match this SMARTS pattern in a topology.

        Notes:
        ------
        When this function gets used in atomtyper.py, we actively modify the
        white- and blacklists of the atoms in `topology` after finding a match.
        This means that between every successive call of
        `subgraph_isomorphisms_iter()`, the topology against which we are
        matching may have actually changed. Currently, we take advantage of this
        behavior in some edges cases (e.g. see `test_hexa_coordinated` in
        `test_smarts.py`).

        """
        # Note: Needs to be updated in sync with the grammar in `smarts.py`.
        ring_tokens = ['ring_size', 'ring_count']
        has_ring_rules = any(
            list(self.ast.find_data(token)) for token in ring_tokens)
        _prepare_atoms(structure, typemap, compute_cycles=has_ring_rules)

        top_graph = nx.Graph()
        top_graph.add_nodes_from(((a.idx, {
            'atom': a
        }) for a in structure.atoms))
        top_graph.add_edges_from(
            ((b.atom1.idx, b.atom2.idx) for b in structure.bonds))

        if self._graph_matcher is None:
            atom = nx.get_node_attributes(self, name='atom')[0]
            if len(list(atom.find_data('atom_symbol'))) == 1 and \
                        not list(atom.find_data('not_expression')):
                try:
                    element = next(atom.find_data('atom_symbol')).children[0]
                except IndexError:
                    try:
                        atomic_num = next(
                            atom.find_data('atomic_num')).children[0]
                        element = pt.Element[int(atomic_num)]
                    except IndexError:
                        element = None
            else:
                element = None
            self._graph_matcher = SMARTSMatcher(top_graph,
                                                self,
                                                node_match=self._node_match,
                                                element=element,
                                                typemap=typemap)

        matched_atoms = set()
        for mapping in self._graph_matcher.subgraph_isomorphisms_iter():
            mapping = {
                node_id: atom_id
                for atom_id, node_id in mapping.items()
            }
            # The first node in the smarts graph always corresponds to the atom
            # that we are trying to match.
            atom_index = mapping[0]
            # Don't yield duplicate matches found via matching the pattern in a
            # different order.
            if atom_index not in matched_atoms:
                matched_atoms.add(atom_index)
                yield atom_index
Пример #17
0
def test_optional_names_good_syntax():
    good_optional_names = ['_C', '_CH2', '_CH']
    S = SMARTS(optional_names=good_optional_names)
Пример #18
0
 def test_optional_names_good_syntax(self):
     good_optional_names = ["_C", "_CH2", "_CH"]
     S = SMARTS(optional_names=good_optional_names)
Пример #19
0
 def test_optional_names_bad_syntax(self):
     bad_optional_names = ["_C", "XXX", "C"]
     with pytest.raises(FoyerError):
         S = SMARTS(optional_names=bad_optional_names)
Пример #20
0
class SMARTSGraph(nx.Graph):
    """A graph representation of a SMARTS pattern.

    Attributes
    ----------
    smarts_string : str
    parser : foyer.smarts.SMARTS
    name : str
    overrides : set

    Other Parameters
    ----------
    args
    kwargs
    """
    # Because the first atom in a SMARTS string is always the one we want to
    # type, the graph's nodes needs to be ordered.
    node_dict_factory = OrderedDict

    def __init__(self, smarts_string, parser=None, name=None, overrides=None,
                 *args, **kwargs):
        super(SMARTSGraph, self).__init__(*args, **kwargs)

        self.smarts_string = smarts_string
        self.name = name
        self.overrides = overrides

        if parser is None:
            self.ast = SMARTS().parse(smarts_string)
        else:
            self.ast = parser.parse(smarts_string)

        self._atom_indices = OrderedDict()
        self._add_nodes()
        self._add_edges(self.ast)
        self._add_label_edges()
        self._graph_matcher = None

    def _add_nodes(self):
        """Add all atoms in the SMARTS string as nodes in the graph."""
        for n, atom in enumerate(self.ast.select('atom')):
            self.add_node(n, atom=atom)
            self._atom_indices[id(atom)] = n

    def _add_edges(self, ast_node, trunk=None):
        """"Add all bonds in the SMARTS string as edges in the graph."""
        atom_indices = self._atom_indices
        for atom in ast_node.tail:
            if atom.head == 'atom':
                atom_idx = atom_indices[id(atom)]
                if atom.is_first_kid and atom.parent().head == 'branch':
                    trunk_idx = atom_indices[id(trunk)]
                    self.add_edge(atom_idx, trunk_idx)
                if not atom.is_last_kid:
                    if atom.next_kid.head == 'atom':
                        next_idx = atom_indices[id(atom.next_kid)]
                        self.add_edge(atom_idx, next_idx)
                    elif atom.next_kid.head == 'branch':
                        trunk = atom
                else:  # We traveled through the whole branch.
                    return
            elif atom.head == 'branch':
                self._add_edges(atom, trunk)

    def _add_label_edges(self):
        """Add edges between all atoms with the same atom_label in rings."""
        labels = self.ast.select('atom_label')
        if not labels:
            return

        # We need each individual label and atoms with multiple ring labels
        # would yield e.g. the string '12' so split those up.
        label_digits = defaultdict(list)
        for label in labels:
            digits = list(label.tail[0])
            for digit in digits:
                label_digits[digit].append(label.parent())

        for label, (atom1, atom2) in label_digits.items():
            atom1_idx = self._atom_indices[id(atom1)]
            atom2_idx = self._atom_indices[id(atom2)]
            self.add_edge(atom1_idx, atom2_idx)

    def _node_match(self, host, pattern):
        atom_expr = pattern['atom'].tail[0]
        atom = host['atom']
        return self._atom_expr_matches(atom_expr, atom)

    def _atom_expr_matches(self, atom_expr, atom):
        if atom_expr.head == 'not_expression':
            return not self._atom_expr_matches(atom_expr.tail[0], atom)
        elif atom_expr.head in ('and_expression', 'weak_and_expression'):
            return (self._atom_expr_matches(atom_expr.tail[0], atom) and
                    self._atom_expr_matches(atom_expr.tail[1], atom))
        elif atom_expr.head == 'or_expression':
            return (self._atom_expr_matches(atom_expr.tail[0], atom) or
                    self._atom_expr_matches(atom_expr.tail[1], atom))
        elif atom_expr.head == 'atom_id':
            return self._atom_id_matches(atom_expr.tail[0], atom)
        elif atom_expr.head == 'atom_symbol':
            return self._atom_id_matches(atom_expr, atom)
        else:
            raise TypeError('Expected atom_id, atom_symbol, and_expression, '
                            'or_expression, or not_expression. '
                            'Got {}'.format(atom_expr.head))

    @staticmethod
    def _atom_id_matches(atom_id, atom):
        atomic_num = atom.element.atomic_number
        if atom_id.head == 'atomic_num':
            return atomic_num == int(atom_id.tail[0])
        elif atom_id.head == 'atom_symbol':
            if str(atom_id.tail[0]) == '*':
                return True
            elif str(atom_id.tail[0]).startswith('_'):
                return atom.element.name == str(atom_id.tail[0])
            else:
                return atomic_num == pt.AtomicNum[str(atom_id.tail[0])]
        elif atom_id.head == 'has_label':
            label = atom_id.tail[0][1:]  # Strip the % sign from the beginning.
            return label in atom.whitelist
        elif atom_id.head == 'neighbor_count':
            return len(atom.bond_partners) == int(atom_id.tail[0])
        elif atom_id.head == 'ring_size':
            cycle_len = int(atom_id.tail[0])
            for cycle in atom.cycles:
                if len(cycle) == cycle_len:
                    return True
            return False
        elif atom_id.head == 'ring_count':
            n_cycles = len(atom.cycles)
            if n_cycles == int(atom_id.tail[0]):
                return True
            return False
        elif atom_id.head == 'matches_string':
            raise NotImplementedError('matches_string is not yet implemented')

    def find_matches(self, topology):
        """Return sets of atoms that match this SMARTS pattern in a topology.

        Notes:
        ------
        When this function gets used in atomtyper.py, we actively modify the
        white- and blacklists of the atoms in `topology` after finding a match.
        This means that between every successive call of
        `subgraph_isomorphisms_iter()`, the topology against which we are
        matching may have actually changed. Currently, we take advantage of this
        behavior in some edges cases (e.g. see `test_hexa_coordinated` in
        `test_smarts.py`).

        """
        # Note: Needs to be updated in sync with the grammar in `smarts.py`.
        ring_tokens = ['ring_size', 'ring_count']
        has_ring_rules = any(self.ast.select(token)
                             for token in ring_tokens)
        _prepare_atoms(topology, compute_cycles=has_ring_rules)

        top_graph = nx.Graph()
        top_graph.add_nodes_from(((a.index, {'atom': a})
                                  for a in topology.atoms()))
        top_graph.add_edges_from(((b[0].index, b[1].index)
                                  for b in topology.bonds()))

        if self._graph_matcher is None:
            atom = nx.get_node_attributes(self, name='atom')[0]
            if len(atom.select('atom_symbol')) == 1 and not atom.select('not_expression'):
                try:
                    element = atom.select('atom_symbol').strees[0].tail[0]
                except IndexError:
                    try:
                        atomic_num = atom.select('atomic_num').strees[0].tail[0]
                        element = pt.Element[int(atomic_num)]
                    except IndexError:
                        element = None
            else:
                element = None
            self._graph_matcher = SMARTSMatcher(top_graph, self,
                                                node_match=self._node_match,
                                                element=element)

        matched_atoms = set()
        for mapping in self._graph_matcher.subgraph_isomorphisms_iter():
            mapping = {node_id: atom_id for atom_id, node_id in mapping.items()}
            # The first node in the smarts graph always corresponds to the atom
            # that we are trying to match.
            atom_index = mapping[0]
            # Don't yield duplicate matches found via matching the pattern in a
            # different order.
            if atom_index not in matched_atoms:
                matched_atoms.add(atom_index)
                yield atom_index
Пример #21
0
class Ast2Nx:
    """ A class first to parse the SMARTS string to AST tree and finally convert to NetworkX graph.
    Support graph/subgraph isomorphism using VF2 algorithm.

    Parameters
    ----------
    smarts_str : SMARTS string
        The SMARTS string used to generate Networkx graph.

    Attributes
    ----------
    PARSER : SMARTS
        The SMARTS parser.
    AST : STree
        The abstract syntax tree generated from SMARTS.
    atom_with_id : dict, {id(atom): (atom, unique_id)}
        Assign each atom with a unique id.
    atom_name : dict, {id(atom): name}
        Assign each atom with a name. The name can be repeated in order to make grpah/sub-graph isomorphism.
    atom_with_label : dict, {id(atom}: atom_label}
        In SMARTS, atom_label is used to mark the jointed point in rings
    referrers : set
        Other compounds that reference this part with labels.
    """
    def __init__(self, smarts_str):

        self.PARSER = SMARTS()
        self.AST = self.PARSER.parse(smarts_str)
        self.AST.select('start')
        self.atom_with_id = {}
        self.atom_name = {}
        self.atom_with_label = {}
        self.NetworkX = nx.Graph()

        # invoke initial functions to initialize.
        self._assign_id()
        self._set_atoms_with_label()

        # invoke converting functions to generate graph.
        self._add_nodes()
        self._add_edges(self.AST)
        self._add_label_edges()

    def _assign_id(self):
        """ assign a unique id to each atom.
        """
        atom_id = 0
        atoms = self.AST.select('atom')
        for atom in atoms:
            self.atom_with_id[id(atom)] = (atom, atom_id)
            self.atom_name[id(atom)] = self._set_atom_name(atom)
            atom_id += 1

    def _set_atoms_with_label(self):
        """ assign an atom_label to each labelled atom.
        """
        atoms_with_label = self.AST.select('atom_label')
        for atom_ in atoms_with_label:
            assert atom_.parent(
            ).head == 'atom', "the parent of atom_label has to be atom."
            self.atom_with_label[id(atom_.parent())] = list(atom_.tail[0])

    def _set_atom_name(self, atom):
        """ the name assigned to each atom.
        The name here is particularly designed for isomorphism.
        The graph isomorphism algorithm will first check the syntactics, i.e., structure,
        and then check the identity of each node. The name here can be used for identity checking.
        """
        # ToDo: Need more details to specifically handle the atom name.
        atom_name_list = []
        for atom_name_ in atom.tail:
            if atom_name_.head == 'atom_label':
                # ignore atom_label information.
                pass
            else:
                atom_name_list.append(str(atom_name_))
        return 'atom(' + ', '.join(atom_name_list) + ')'

    def _get_atom_with_id(self, atom):
        """ given atom return atom_with_id.
        """
        if id(atom) in self.atom_with_id:
            return self.atom_with_id[id(atom)]
        return self.atom_with_id[atom]

    def _get_atom_name(self, atom):
        """ given atom return atom_name.
        """
        if id(atom) in self.atom_name:
            return self.atom_name[id(atom)]
        return self.atom_name[atom]

    def _get_edge_name(self, atom1, atom2):
        """ given two atoms return the edge name between them.
        """
        return self._get_atom_name(atom1) + '-' + self._get_atom_name(atom2)

    def _add_nodes(self):
        """ add all nodes to the graph.
        """
        atoms = self.AST.select('atom')
        for atom in atoms:
            atom_name = self._get_atom_name(atom)
            self.NetworkX.add_node(self._get_atom_with_id(atom),
                                   name=atom_name)

    def _add_edges(self, ASTtree, trunk=None):
        """ add all edges to the graph.
        """
        for atom in ASTtree.tail:
            if atom.head == 'atom':
                # atom is the type that want to add to the graph
                if atom.is_first_kid and (atom.parent().head == 'branch'):
                    # if this atom is the first one in its branch then it should connect to the trunk
                    assert trunk is not None, "can't add branch to a None root!"
                    self.NetworkX.add_edge(self._get_atom_with_id(atom),
                                           self._get_atom_with_id(trunk),
                                           name=self._get_edge_name(
                                               id(atom), id(trunk)))
                if not atom.is_last_kid:
                    # if this atom is not the last one, it should connect to the its next atom
                    if atom.next_kid.head == 'atom':
                        self.NetworkX.add_edge(
                            self._get_atom_with_id(atom),
                            self._get_atom_with_id(atom.next_kid),
                            name=self._get_edge_name(atom, atom.next_kid))
                    elif atom.next_kid.head == 'branch':
                        # if the next atom is a new branch then this atom should be the trunk for the new branch
                        trunk = atom
                else:
                    return  # we already travel through the whole branch
            elif atom.head == 'branch':
                # a new branch appeared, so we recursively travel to the new branch
                self._add_edges(atom, trunk)

    def _add_label_edges(self):
        """ Connect all atoms with the same atom_label.
        """
        for atom_id, labels in self.atom_with_label.items():
            for atom_id_inner, labels_inner in self.atom_with_label.items():
                if atom_id_inner is not atom_id:
                    for label_inner in labels_inner:
                        if label_inner in labels:
                            self.NetworkX.add_edge(
                                self._get_atom_with_id(atom_id_inner),
                                self._get_atom_with_id(atom_id),
                                name=self._get_edge_name(
                                    atom_id_inner, atom_id))

    def to_file(self, name_graphml='AST2NX.graphml'):
        """ write to a graphml file which can be read by a lot of professional visualization tools such as Cytoscape.
        """
        if name_graphml.endswith('.graphml'):
            nx.write_graphml(self.NetworkX, name_graphml)
        else:
            nx.write_graphml(self.NetworkX, name_graphml + '.graphml')

    # ===== isomorphism functions ===== #
    def foyer_node_match(self, G1_node, G2_node):
        """ the matching rule for node/atom.
        For example, atomic_num(6) ?= atom(C), how to deal with and_expression, and how to deal with wildcard (*)?
        """
        # ToDo: need a completed atom matching rules here.
        if G1_node == G2_node:
            return True
        return False

    def foyer_edge_match(self, G1_edge, G2_edge):
        """ the matching rule for edge/bond.
        """
        # ToDo: need a completed atom matching rules here.
        if G1_edge == G2_edge:
            return True
        return False

    def __eq__(self, other):
        """ whole graph ismorphism.
        """
        GM = isomorphism.GraphMatcher(self.NetworkX,
                                      other.NetworkX,
                                      node_match=self.foyer_node_match,
                                      edge_match=self.foyer_edge_match)
        return isomorphism.GraphMatcher.is_isomorphic(GM)

    def __contains__(self, item):
        """ subgraph ismorphism.
        """
        # ToDo: iterator for sub-graph isomorphism.
        GM = isomorphism.GraphMatcher(self.NetworkX,
                                      item.NetworkX,
                                      node_match=self.foyer_node_match,
                                      edge_match=self.foyer_edge_match)
        return isomorphism.GraphMatcher.subgraph_is_isomorphic(GM)
Пример #22
0
 def smarts_parser(self):
     return SMARTS()
Пример #23
0
class SMARTSGraph(nx.Graph):
    """A graph representation of a SMARTS pattern.

    Attributes
    ----------
    smarts_string : str
    parser : foyer.smarts.SMARTS
    name : str
    overrides : set

    Other Parameters
    ----------
    args
    kwargs
    """
    # Because the first atom in a SMARTS string is always the one we want to
    # type, the graph's nodes needs to be ordered.
    node_dict_factory = OrderedDict

    def __init__(self, smarts_string, parser=None, name=None, overrides=None,
                 *args, **kwargs):
        super(SMARTSGraph, self).__init__(*args, **kwargs)

        self.smarts_string = smarts_string
        self.name = name
        self.overrides = overrides

        if parser is None:
            self.ast = SMARTS().parse(smarts_string)
        else:
            self.ast = parser.parse(smarts_string)

        self._atom_indices = OrderedDict()
        self._add_nodes()
        self._add_edges(self.ast)
        self._add_label_edges()
        self._graph_matcher = None

    def _add_nodes(self):
        """Add all atoms in the SMARTS string as nodes in the graph."""
        for n, atom in enumerate(self.ast.select('atom')):
            self.add_node(n, atom=atom)
            self._atom_indices[id(atom)] = n

    def _add_edges(self, ast_node, trunk=None):
        """"Add all bonds in the SMARTS string as edges in the graph."""
        atom_indices = self._atom_indices
        for atom in ast_node.tail:
            if atom.head == 'atom':
                atom_idx = atom_indices[id(atom)]
                if atom.is_first_kid and atom.parent().head == 'branch':
                    trunk_idx = atom_indices[id(trunk)]
                    self.add_edge(atom_idx, trunk_idx)
                if not atom.is_last_kid:
                    if atom.next_kid.head == 'atom':
                        next_idx = atom_indices[id(atom.next_kid)]
                        self.add_edge(atom_idx, next_idx)
                    elif atom.next_kid.head == 'branch':
                        trunk = atom
                else:  # We traveled through the whole branch.
                    return
            elif atom.head == 'branch':
                self._add_edges(atom, trunk)

    def _add_label_edges(self):
        """Add edges between all atoms with the same atom_label in rings."""
        labels = self.ast.select('atom_label')
        if not labels:
            return

        # We need each individual label and atoms with multiple ring labels
        # would yield e.g. the string '12' so split those up.
        label_digits = defaultdict(list)
        for label in labels:
            digits = list(label.tail[0])
            for digit in digits:
                label_digits[digit].append(label.parent())

        for label, (atom1, atom2) in label_digits.items():
            atom1_idx = self._atom_indices[id(atom1)]
            atom2_idx = self._atom_indices[id(atom2)]
            self.add_edge(atom1_idx, atom2_idx)

    def _node_match(self, host, pattern):
        atom_expr = pattern['atom'].tail[0]
        atom = host['atom']
        return self._atom_expr_matches(atom_expr, atom)

    def _atom_expr_matches(self, atom_expr, atom):
        if atom_expr.head == 'not_expression':
            return not self._atom_expr_matches(atom_expr.tail[0], atom)
        elif atom_expr.head in ('and_expression', 'weak_and_expression'):
            return (self._atom_expr_matches(atom_expr.tail[0], atom) and
                    self._atom_expr_matches(atom_expr.tail[1], atom))
        elif atom_expr.head == 'or_expression':
            return (self._atom_expr_matches(atom_expr.tail[0], atom) or
                    self._atom_expr_matches(atom_expr.tail[1], atom))
        elif atom_expr.head == 'atom_id':
            return self._atom_id_matches(atom_expr.tail[0], atom)
        elif atom_expr.head == 'atom_symbol':
            return self._atom_id_matches(atom_expr, atom)
        else:
            raise TypeError('Expected atom_id, atom_symbol, and_expression, '
                            'or_expression, or not_expression. '
                            'Got {}'.format(atom_expr.head))

    @staticmethod
    def _atom_id_matches(atom_id, atom):
        atomic_num = atom.element.atomic_number
        if atom_id.head == 'atomic_num':
            return atomic_num == int(atom_id.tail[0])
        elif atom_id.head == 'atom_symbol':
            if str(atom_id.tail[0]) == '*':
                return True
            elif str(atom_id.tail[0]).startswith('_'):
                return atom.element.name == str(atom_id.tail[0])
            else:
                return atomic_num == pt.AtomicNum[str(atom_id.tail[0])]
        elif atom_id.head == 'has_label':
            label = atom_id.tail[0][1:]  # Strip the % sign from the beginning.
            return label in atom.whitelist
        elif atom_id.head == 'neighbor_count':
            return len(atom.bond_partners) == int(atom_id.tail[0])
        elif atom_id.head == 'ring_size':
            cycle_len = int(atom_id.tail[0])
            for cycle in atom.cycles:
                if len(cycle) == cycle_len:
                    return True
            return False
        elif atom_id.head == 'ring_count':
            n_cycles = len(atom.cycles)
            if n_cycles == int(atom_id.tail[0]):
                return True
            return False
        elif atom_id.head == 'matches_string':
            raise NotImplementedError('matches_string is not yet implemented')

    def find_matches(self, topology):
        """Return sets of atoms that match this SMARTS pattern in a topology.

        Notes:
        ------
        When this function gets used in atomtyper.py, we actively modify the
        white- and blacklists of the atoms in `topology` after finding a match.
        This means that between every successive call of
        `subgraph_isomorphisms_iter()`, the topology against which we are
        matching may have actually changed. Currently, we take advantage of this
        behavior in some edges cases (e.g. see `test_hexa_coordinated` in
        `test_smarts.py`).

        """
        # Note: Needs to be updated in sync with the grammar in `smarts.py`.
        ring_tokens = ['ring_size', 'ring_count']
        has_ring_rules = any(self.ast.select(token)
                             for token in ring_tokens)
        _prepare_atoms(topology, compute_cycles=has_ring_rules)

        top_graph = nx.Graph()
        top_graph.add_nodes_from(((a.index, {'atom': a})
                                  for a in topology.atoms()))
        top_graph.add_edges_from(((b[0].index, b[1].index)
                                  for b in topology.bonds()))

        if self._graph_matcher is None:
            atom = nx.get_node_attributes(self, 'atom')[0]
            if len(atom.select('atom_symbol')) == 1 and not atom.select('not_expression'):
                try:
                    element = atom.select('atom_symbol').strees[0].tail[0]
                except IndexError:
                    try:
                        atomic_num = atom.select('atomic_num').strees[0].tail[0]
                        element = pt.Element[int(atomic_num)]
                    except IndexError:
                        element = None
            else:
                element = None
            self._graph_matcher = SMARTSMatcher(top_graph, self,
                                                node_match=self._node_match,
                                                element=element)

        # The first node in the smarts graph always corresponds to the atom
        # that we are trying to match.
        first_atom = next(self.nodes_iter())
        matched_atoms = set()
        for mapping in self._graph_matcher.subgraph_isomorphisms_iter():
            mapping = {node_id: atom_id for atom_id, node_id in mapping.items()}
            atom_index = mapping[first_atom]
            # Don't yield duplicate matches found via matching the pattern in a
            # different order.
            if atom_index not in matched_atoms:
                matched_atoms.add(atom_index)
                yield atom_index
Пример #24
0
class Ast2Nx:
    """ A class first to parse the SMARTS string to AST tree and finally convert to NetworkX graph.
    Support graph/subgraph isomorphism using VF2 algorithm.

    Parameters
    ----------
    smarts_str : SMARTS string
        The SMARTS string used to generate Networkx graph.

    Attributes
    ----------
    PARSER : SMARTS
        The SMARTS parser.
    AST : STree
        The abstract syntax tree generated from SMARTS.
    atom_with_id : dict, {id(atom): (atom, unique_id)}
        Assign each atom with a unique id.
    atom_name : dict, {id(atom): name}
        Assign each atom with a name. The name can be repeated in order to make grpah/sub-graph isomorphism.
    atom_with_label : dict, {id(atom}: atom_label}
        In SMARTS, atom_label is used to mark the jointed point in rings
    referrers : set
        Other compounds that reference this part with labels.
    """

    def __init__(self, smarts_str):

        self.PARSER = SMARTS()
        self.AST = self.PARSER.parse(smarts_str)
        self.AST.select('start')
        self.atom_with_id = {}
        self.atom_name = {}
        self.atom_with_label = {}
        self.NetworkX = nx.Graph()

        # invoke initial functions to initialize.
        self._assign_id()
        self._set_atoms_with_label()

        # invoke converting functions to generate graph.
        self._add_nodes()
        self._add_edges(self.AST)
        self._add_label_edges()

    def _assign_id(self):
        """ assign a unique id to each atom.
        """
        atom_id = 0
        atoms = self.AST.select('atom')
        for atom in atoms:
            self.atom_with_id[id(atom)] = (atom, atom_id)
            self.atom_name[id(atom)] = self._set_atom_name(atom)
            atom_id += 1

    def _set_atoms_with_label(self):
        """ assign an atom_label to each labelled atom.
        """
        atoms_with_label = self.AST.select('atom_label')
        for atom_ in atoms_with_label:
            assert atom_.parent().head == 'atom', "the parent of atom_label has to be atom."
            self.atom_with_label[id(atom_.parent())] = list(atom_.tail[0])

    def _set_atom_name(self, atom):
        """ the name assigned to each atom.
        The name here is particularly designed for isomorphism.
        The graph isomorphism algorithm will first check the syntactics, i.e., structure,
        and then check the identity of each node. The name here can be used for identity checking.
        """
        # ToDo: Need more details to specifically handle the atom name.
        atom_name_list = []
        for atom_name_ in atom.tail:
            if atom_name_.head == 'atom_label':
                # ignore atom_label information.
                pass
            else:
                atom_name_list.append(str(atom_name_))
        return 'atom(' + ', '.join(atom_name_list) + ')'

    def _get_atom_with_id(self, atom):
        """ given atom return atom_with_id.
        """
        if id(atom) in self.atom_with_id:
            return self.atom_with_id[id(atom)]
        return self.atom_with_id[atom]

    def _get_atom_name(self, atom):
        """ given atom return atom_name.
        """
        if id(atom) in self.atom_name:
            return self.atom_name[id(atom)]
        return self.atom_name[atom]

    def _get_edge_name(self, atom1, atom2):
        """ given two atoms return the edge name between them.
        """
        return self._get_atom_name(atom1) + '-' + self._get_atom_name(atom2)

    def _add_nodes(self):
        """ add all nodes to the graph.
        """
        atoms = self.AST.select('atom')
        for atom in atoms:
            atom_name = self._get_atom_name(atom)
            self.NetworkX.add_node(self._get_atom_with_id(atom), name=atom_name)

    def _add_edges(self, ASTtree, trunk=None):
        """ add all edges to the graph.
        """
        for atom in ASTtree.tail:
            if atom.head == 'atom':
                # atom is the type that want to add to the graph
                if atom.is_first_kid and (atom.parent().head == 'branch'):
                    # if this atom is the first one in its branch then it should connect to the trunk
                    assert trunk is not None, "can't add branch to a None root!"
                    self.NetworkX.add_edge(self._get_atom_with_id(atom), self._get_atom_with_id(trunk),
                                           name=self._get_edge_name(id(atom), id(trunk)))
                if not atom.is_last_kid:
                    # if this atom is not the last one, it should connect to the its next atom
                    if atom.next_kid.head == 'atom':
                        self.NetworkX.add_edge(self._get_atom_with_id(atom), self._get_atom_with_id(atom.next_kid),
                                               name=self._get_edge_name(atom, atom.next_kid))
                    elif atom.next_kid.head == 'branch':
                        # if the next atom is a new branch then this atom should be the trunk for the new branch
                        trunk = atom
                else:
                    return  # we already travel through the whole branch
            elif atom.head == 'branch':
                # a new branch appeared, so we recursively travel to the new branch
                self._add_edges(atom, trunk)

    def _add_label_edges(self):
        """ Connect all atoms with the same atom_label.
        """
        for atom_id, labels in self.atom_with_label.items():
            for atom_id_inner, labels_inner in self.atom_with_label.items():
                if atom_id_inner is not atom_id:
                    for label_inner in labels_inner:
                        if label_inner in labels:
                            self.NetworkX.add_edge(self._get_atom_with_id(atom_id_inner),
                                                   self._get_atom_with_id(atom_id),
                                                   name=self._get_edge_name(atom_id_inner, atom_id))

    def to_file(self, name_graphml='AST2NX.graphml'):
        """ write to a graphml file which can be read by a lot of professional visualization tools such as Cytoscape.
        """
        if name_graphml.endswith('.graphml'):
            nx.write_graphml(self.NetworkX, name_graphml)
        else:
            nx.write_graphml(self.NetworkX, name_graphml + '.graphml')

    # ===== isomorphism functions ===== #
    def foyer_node_match(self, G1_node, G2_node):
        """ the matching rule for node/atom.
        For example, atomic_num(6) ?= atom(C), how to deal with and_expression, and how to deal with wildcard (*)?
        """
        # ToDo: need a completed atom matching rules here.
        if G1_node == G2_node:
            return True
        return False

    def foyer_edge_match(self, G1_edge, G2_edge):
        """ the matching rule for edge/bond.
        """
        # ToDo: need a completed atom matching rules here.
        if G1_edge == G2_edge:
            return True
        return False

    def __eq__(self, other):
        """ whole graph ismorphism.
        """
        GM = isomorphism.GraphMatcher(self.NetworkX, other.NetworkX,
                                      node_match=self.foyer_node_match, edge_match=self.foyer_edge_match)
        return isomorphism.GraphMatcher.is_isomorphic(GM)

    def __contains__(self, item):
        """ subgraph ismorphism.
        """
        # ToDo: iterator for sub-graph isomorphism.
        GM = isomorphism.GraphMatcher(self.NetworkX, item.NetworkX,
                                      node_match=self.foyer_node_match, edge_match=self.foyer_edge_match)
        return isomorphism.GraphMatcher.subgraph_is_isomorphic(GM)