def bonds_of_type(gra, symb1, symb2, mbond=1): """ Determine the indices of all a specific bond specified by atom type and bond order. :param gra: molecular graph :type gra: molecular graph data structure :param symb1: symbol of atom 1 in the bond :type symb1: str :param symb2: symbol of atom 2 in the bond :type symb2: str :param mbond: bond order of desired bond type :type mbond: int :rtype: tuple(int) """ # Get the dict that relates atom indices to symbols idx_symb_dct = atom_symbols(gra) # Loop over all the bonds and build a list of ones that match _bonds_of_type = tuple() _bonds = bonds_of_order(gra, mbond=mbond) for bond in _bonds: idx1, idx2 = bond symb1, symb2 = idx_symb_dct[idx1], idx_symb_dct[idx2] if (symb1, symb2) in ((symb1, symb2), (symb2, symb1)): _bonds_of_type += ((idx1, idx2), ) return _bonds_of_type
def rotational_bond_keys(gra, lin_keys=None, with_h_rotors=True): """ get all rotational bonds for a graph :param gra: the graph :param lin_keys: keys to linear atoms in the graph """ gra = explicit(gra) sym_dct = atom_symbols(gra) ngb_keys_dct = atoms_neighbor_atom_keys(gra) bnd_ord_dct = resonance_dominant_bond_orders(gra) rng_bnd_keys = list(itertools.chain(*rings_bond_keys(gra))) def _is_rotational_bond(bnd_key): ngb_keys_lst = [ngb_keys_dct[k] - bnd_key for k in bnd_key] is_single = max(bnd_ord_dct[bnd_key]) <= 1 has_neighbors = all(ngb_keys_lst) not_in_ring = bnd_key not in rng_bnd_keys is_h_rotor = any( set(map(sym_dct.__getitem__, ks)) == {'H'} for ks in ngb_keys_lst) return is_single and has_neighbors and not_in_ring and ( not is_h_rotor or with_h_rotors) rot_bnd_keys = frozenset(filter(_is_rotational_bond, bond_keys(gra))) lin_keys_lst = linear_segments_atom_keys(gra, lin_keys=lin_keys) dum_keys = tuple(atom_keys(gra, sym='X')) for keys in lin_keys_lst: bnd_keys = sorted((k for k in rot_bnd_keys if k & set(keys)), key=sorted) # Check whether there are neighboring atoms on either side of the # linear segment excl_keys = set(keys) | set(dum_keys) end_key1 = atom_neighbor_atom_key(gra, keys[0], excl_atm_keys=excl_keys) excl_keys |= {end_key1} end_key2 = atom_neighbor_atom_key(gra, keys[-1], excl_atm_keys=excl_keys) end_keys = {end_key1, end_key2} ngb_keys_lst = [ngb_keys_dct[k] - excl_keys for k in end_keys] has_neighbors = all(ngb_keys_lst) if not has_neighbors: rot_bnd_keys -= set(bnd_keys) else: rot_bnd_keys -= set(bnd_keys[:-1]) return rot_bnd_keys
def equivalent_bonds(gra, bnd_key, stereo=True, dummy=True): """ Identify sets of isomorphically equivalent bonds Two bonds are equivalent if they transform into each other under an automorphism :param gra: A graph :param bnd_key: An bond key for the graph, which may be sorted or unsorted :param backbone_only: Compare backbone atoms only? :type stereo: bool :param dummy: Consider dummy atoms? :type dummy: bool :returns: Keys to equivalent bonds :rtype: frozenset """ bnd_key = tuple(bnd_key) bnd_keys = list(map(tuple, map(sorted, bond_keys(gra)))) bnd_keys += list(map(tuple, map(reversed, bnd_keys))) assert bnd_key in bnd_keys, f"{bnd_key} not in {bnd_keys}" atm_symb_dct = atom_symbols(gra) atm_ngbs_dct = atoms_neighbor_atom_keys(gra) def _symbols(bnd_key): return list(map(atm_symb_dct.__getitem__, bnd_key)) def _neighbor_symbols(bnd_key): key1, key2 = bnd_key nsymbs1 = sorted(map(atm_symb_dct.__getitem__, atm_ngbs_dct[key1])) nsymbs2 = sorted(map(atm_symb_dct.__getitem__, atm_ngbs_dct[key2])) return nsymbs1, nsymbs2 # 1. Find bonds with the same atom types bnd_symbs = _symbols(bnd_key) cand_keys = [k for k in bnd_keys if _symbols(k) == bnd_symbs] # 2. Of those, find bonds with the same neighboring atom types bnd_ngb_symbs = _neighbor_symbols(bnd_key) cand_keys = [k for k in cand_keys if _neighbor_symbols(k) == bnd_ngb_symbs] # 3. Find the equivalent bonds from the list of candidates. # Strategy: Change the atom symbols to 'Lv' and 'Ts' and check for # isomorphism. Assumes none of the compounds have element 116 or 117. bnd_keys = [] for key in cand_keys: if are_equivalent_bonds(gra, bnd_key, key, stereo=stereo, dummy=dummy): bnd_keys.append(key) return frozenset(bnd_keys)
def from_graph(gra): """ networkx graph object from a molecular graph """ nxg = networkx.Graph() nxg.add_nodes_from(atom_keys(gra)) nxg.add_edges_from(bond_keys(gra)) networkx.set_node_attributes(nxg, atom_symbols(gra), 'symbol') networkx.set_node_attributes(nxg, atom_implicit_hydrogen_valences(gra), 'implicit_hydrogen_valence') networkx.set_node_attributes(nxg, atom_stereo_parities(gra), 'stereo_parity') networkx.set_edge_attributes(nxg, bond_orders(gra), 'order') networkx.set_edge_attributes(nxg, bond_stereo_parities(gra), 'stereo_parity') return nxg
def equivalent_atoms(gra, atm_key, stereo=True, dummy=True): """ Identify sets of isomorphically equivalent atoms Two atoms are equivalent if they transform into each other under an automorphism :param gra: A graph :param atm_key: An atom key for the graph :param stereo: Consider stereo? :type stereo: bool :param dummy: Consider dummy atoms? :type dummy: bool :returns: Keys to equivalent atoms :rtype: frozenset """ assert atm_key in atom_keys(gra), ( f"{atm_key} not in {atom_keys(gra)}") atm_symb_dct = atom_symbols(gra) atm_ngbs_dct = atoms_neighbor_atom_keys(gra) def _neighbor_symbols(key): return sorted(map(atm_symb_dct.__getitem__, atm_ngbs_dct[key])) # 1. Find atoms with the same symbols atm_symb = atm_symb_dct[atm_key] cand_keys = atom_keys(gra, sym=atm_symb) # 2. Of those, find atoms with the same neighboring atom types atm_ngb_symbs = _neighbor_symbols(atm_key) cand_keys = [k for k in cand_keys if _neighbor_symbols(k) == atm_ngb_symbs] # 3. Find the equivalent atoms from the list of candidates. # Strategy: Change the atom symbol to 'Ts' and check for isomorphism. # Assumes none of the compounds have element 117. atm_keys = [] for key in cand_keys: if are_equivalent_atoms(gra, atm_key, key, stereo=stereo, dummy=dummy): atm_keys.append(key) return frozenset(atm_keys)
def neighbors_of_type(gra, aidx, symb): """ For a given atom, determine the indices of all the atoms which neighbor it that are of the type specified. :param gra: molecular graph :type gra: molecular graph data structure :param aidx: index of atom for which to find neighbors :type aidx: int :param symb: symbols of desired atom types for neighbors :type symb: str """ idx_symb_dct = atom_symbols(gra) neighs = atoms_neighbor_atom_keys(gra)[aidx] neigh_symbs = _atom_idx_to_symb(neighs, idx_symb_dct) idxs_of_type = tuple() for nidx, nsymb in zip(neighs, neigh_symbs): if nsymb == symb: idxs_of_type += (nidx, ) return idxs_of_type
def two_bond_idxs(gra, symb1, cent, symb2): """ Determine the triplet of indices of atoms of specified types that are connected in a chain by two bonds: (symb1_idx, cent_idx, symb2_idx). :param gra: molecular graph :type gra: molecular graph data structure :param symb1: symbol of atom at one end of chain :type symb1: str :param cent: symbol of atom in the middle of a chain :type cent: str :param symb2: symbol of atom at other end of chain :type symb2: str """ grps = tuple() neigh_dct = atoms_neighbor_atom_keys(gra) idx_symb_dct = atom_symbols(gra) symb_idx_dct = atom_symbol_keys(gra) cent_idxs = symb_idx_dct.get(cent, tuple()) for cent_idx in cent_idxs: neighs = tuple(neigh_dct[cent_idx]) neigh_symbs = _atom_idx_to_symb(neighs, idx_symb_dct) if neigh_symbs == (symb1, symb2): grp_idxs = (neighs[0], cent_idx, neighs[1]) elif neigh_symbs == (symb2, symb1): grp_idxs = (neighs[1], cent_idx, neighs[0]) else: grp_idxs = () if grp_idxs: grps += ((grp_idxs), ) return grps
def smiles(gra, stereo=True, local_stereo=False, res_stereo=False): """ SMILES string from graph :param gra: molecular graph :type gra: automol graph data structure :param stereo: Include stereo? :type stereo: bool :param local_stereo: Is the graph using local stereo assignments? That is, are they based on atom keys rather than canonical keys? :type local_stereo: bool :param res_stereo: allow resonant double-bond stereo? :type res_stereo: bool :returns: the SMILES string :rtype: str """ assert is_connected(gra), ( "Cannot form connection layer for disconnected graph.") if not stereo: gra = without_stereo_parities(gra) # If not using local stereo assignments, canonicalize the graph first. # From this point on, the stereo parities can be assumed to correspond to # the neighboring atom keys. if not local_stereo: gra = canonical(gra) # Convert to implicit graph gra = implicit(gra) # Insert hydrogens necessary for bond stereo gra = _insert_stereo_hydrogens(gra) # Find a dominant resonance rgr = dominant_resonance(gra) # Determine atom symbols symb_dct = atom_symbols(rgr) # Determine atom implicit hydrogens nhyd_dct = atom_implicit_hydrogen_valences(rgr) # Determine bond orders for this resonance bnd_ord_dct = bond_orders(rgr) # Find radical sites for this resonance rad_atm_keys = radical_atom_keys_from_resonance(rgr) # Determine neighbors nkeys_dct = atoms_neighbor_atom_keys(rgr) # Find stereo parities atm_par_dct = dict_.filter_by_value(atom_stereo_parities(rgr), lambda x: x is not None) bnd_par_dct = dict_.filter_by_value(bond_stereo_parities(rgr), lambda x: x is not None) # Remove stereo parities if requested if not res_stereo: print('before') print(bnd_par_dct) bnd_par_dct = dict_.filter_by_key(bnd_par_dct, lambda x: bnd_ord_dct[x] == 2) print('after') print(bnd_par_dct) else: raise NotImplementedError("Not yet implemented!") def _atom_representation(key, just_seen=None, nkeys=(), closures=()): symb = ptab.to_symbol(symb_dct[key]) nhyd = nhyd_dct[key] needs_brackets = key in rad_atm_keys or symb not in ORGANIC_SUBSET hyd_rep = f'H{nhyd}' if nhyd > 1 else ('H' if nhyd == 1 else '') par_rep = '' if key in atm_par_dct: needs_brackets = True skeys = [just_seen] if nhyd: assert nhyd == 1 skeys.append(-numpy.inf) if closures: skeys.extend(closures) skeys.extend(nkeys) can_par = atm_par_dct[key] smi_par = can_par ^ util.is_odd_permutation(skeys, sorted(skeys)) par_rep = '@@' if smi_par else '@' if needs_brackets: rep = f'[{symb}{par_rep}{hyd_rep}]' else: rep = f'{symb}' return rep # Get the pool of stereo bonds for the graph and set up a dictionary for # storing the ending representation. ste_bnd_key_pool = list(bnd_par_dct.keys()) drep_dct = {} def _bond_representation(key, just_seen=None): key0 = just_seen key1 = key # First, handle the bond order if key0 is None or key1 is None: rep = '' else: bnd_ord = bnd_ord_dct[frozenset({key0, key1})] if bnd_ord == 1: rep = '' elif bnd_ord == 2: rep = '=' elif bnd_ord == 3: rep = '#' else: raise ValueError("Bond orders greater than 3 not permitted.") drep = drep_dct[(key0, key1)] if (key0, key1) in drep_dct else '' bnd_key = next((b for b in ste_bnd_key_pool if key1 in b), None) if bnd_key is not None: # We've encountered a new stereo bond, so remove it from the pool ste_bnd_key_pool.remove(bnd_key) # Determine the atoms involved key2, = bnd_key - {key1} nkey1s = set(nkeys_dct[key1]) - {key2} nkey2s = set(nkeys_dct[key2]) - {key1} nmax1 = max(nkey1s) nmax2 = max(nkey2s) nkey1 = just_seen if just_seen in nkey1s else nmax1 nkey2 = nmax2 # Determine parity can_par = bnd_par_dct[bnd_key] smi_par = can_par if nkey1 == nmax1 else not can_par # Determine bond directions drep1 = drep if drep else '/' if just_seen in nkey1s: drep = drep1 flip = not smi_par else: drep_dct[(key1, nkey1)] = drep1 flip = smi_par drep2 = _flip_direction(drep1, flip=flip) drep_dct[(key2, nkey2)] = drep2 rep += drep # Second, handle directionality (bond stereo) return rep # Get the pool of rings for the graph and set up a dictionary for storing # their tags. As the SMILES is built, each next ring that is encountered # will be given a tag, removed from the pool, and transferred to the tag # dictionary. rng_pool = list(rings_atom_keys(rgr)) rng_tag_dct = {} def _ring_representation_with_nkeys_and_closures(key, nkeys=()): nkeys = nkeys.copy() # Check for new rings in the ring pool. If a new ring is found, create # a tag, add it to the tags dictionary, and drop it from the rings # pool. for new_rng in rng_pool: if key in new_rng: # Choose a neighbor key for SMILES ring closure clos_nkey = sorted(set(new_rng) & set(nkeys))[0] # Add it to the ring tag dictionary with the current key first # and the closure key last tag = max(rng_tag_dct.values(), default=0) + 1 assert tag < 10, ( f"Ring tag exceeds 10 for this graph:\n{string(gra)}") rng = cycle_ring_atom_key_to_front(new_rng, key, clos_nkey) rng_tag_dct[rng] = tag # Remove it from the pool of unseen rings rng_pool.remove(new_rng) tags = [] closures = [] for rng, tag in rng_tag_dct.items(): if key == rng[-1]: nkeys.remove(rng[0]) closures.append(rng[0]) # Handle the special case where the last ring bond has stereo if (rng[-1], rng[0]) in drep_dct: drep = drep_dct[(rng[-1], rng[0])] tags.append(f'{drep}{tag}') else: tags.append(f'{tag}') if key == rng[0]: nkeys.remove(rng[-1]) closures.append(rng[-1]) tags.append(f'{tag}') rrep = ''.join(map(str, tags)) return rrep, nkeys, closures # Determine neighboring keys nkeys_dct_pool = dict_.transform_values(atoms_neighbor_atom_keys(rgr), sorted) def _recurse_smiles(smi, lst, key, just_seen=None): nkeys = nkeys_dct_pool.pop(key) if key in nkeys_dct_pool else [] # Remove keys just seen from the list of neighbors, to avoid doubling # back. if just_seen in nkeys: nkeys.remove(just_seen) # Start the SMILES string and connection list. The connection list is # used for sorting. rrep, nkeys, closures = _ring_representation_with_nkeys_and_closures( key, nkeys) arep = _atom_representation(key, just_seen, nkeys, closures=closures) brep = _bond_representation(key, just_seen) smi = f'{brep}{arep}{rrep}' lst = [key] # Now, extend the layer/list along the neighboring atoms. if nkeys: # Build sub-strings/lists by recursively calling this function. sub_smis = [] sub_lsts = [] while nkeys: nkey = nkeys.pop(0) sub_smi, sub_lst = _recurse_smiles('', [], nkey, just_seen=key) sub_smis.append(sub_smi) sub_lsts.append(sub_lst) # If this is a ring, remove the neighbor on the other side of # `key` to prevent repetition as we go around the ring. if sub_lst[-1] == key: nkeys.remove(sub_lst[-2]) # Now, join the sub-layers and lists together. # If there is only one neighbor, we joint it as # {arep1}{brep2}{arep2}... if len(sub_lsts) == 1: sub_smi = sub_smis[0] sub_lst = sub_lsts[0] # Extend the SMILES string smi += f'{sub_smi}' # Extend the list lst.extend(sub_lst) # If there are multiple neighbors, we joint them as # {arep1}({brep2}{arep2}...)({brep3}{arep3}...){brep4}{arep4}... else: assert len(sub_lsts) > 1 # Extend the SMILES string smi += (''.join(map("({:s})".format, sub_smis[:-1])) + sub_smis[-1]) # Append the lists of neighboring branches. lst.append(sub_lsts) return smi, lst # If there are terminal atoms, start from the first one atm_keys = atom_keys(rgr) term_keys = terminal_atom_keys(gra, heavy=False) start_key = min(term_keys) if term_keys else min(atm_keys) smi, _ = _recurse_smiles('', [], start_key) return smi