def linear_segments_atom_keys(gra, lin_keys=None): """ atom keys for linear segments in the graph """ ngb_keys_dct = atoms_neighbor_atom_keys(without_dummy_atoms(gra)) lin_keys = (linear_atom_keys(gra, dummy=True) if lin_keys is None else lin_keys) lin_keys = [k for k in lin_keys if len(ngb_keys_dct[k]) <= 2] lin_segs = connected_components(subgraph(gra, lin_keys)) lin_keys_lst = [] for lin_seg in lin_segs: lin_seg_keys = atom_keys(lin_seg) if len(lin_seg_keys) == 1: key, = lin_seg_keys lin_keys_lst.append([key]) else: end_key1, end_key2 = sorted([ key for key, ngb_keys in atoms_neighbor_atom_keys(lin_seg).items() if len(ngb_keys) == 1 ]) ngb_keys_dct = atoms_neighbor_atom_keys(lin_seg) key = None keys = [end_key1] while key != end_key2: key, = ngb_keys_dct[keys[-1]] - set(keys) keys.append(key) lin_keys_lst.append(keys) lin_keys_lst = tuple(map(tuple, lin_keys_lst)) return lin_keys_lst
def _cumulene_chains(rgr): atm_hyb_dct = resonance_dominant_atom_hybridizations(rgr) sp1_atm_keys = dict_.keys_by_value(atm_hyb_dct, lambda x: x == 1) sp2_atm_keys = dict_.keys_by_value(atm_hyb_dct, lambda x: x == 2) atm_ngb_keys_dct = atoms_neighbor_atom_keys(rgr) def _cumulene_chain(chain): ret = None atm_key = chain[-1] next_atm_keys = atm_ngb_keys_dct[atm_key] - {chain[-2]} if next_atm_keys: assert len(next_atm_keys) == 1 next_atm_key, = next_atm_keys if next_atm_key in sp1_atm_keys: chain.append(next_atm_key) ret = _cumulene_chain(chain) elif next_atm_key in sp2_atm_keys: chain.append(next_atm_key) ret = chain return ret cum_chains = [] for atm_key in sp2_atm_keys: sp1_atm_ngb_keys = atm_ngb_keys_dct[atm_key] & sp1_atm_keys chains = [[atm_key, atm_ngb_key] for atm_ngb_key in sp1_atm_ngb_keys] for chain in chains: cum_chain = _cumulene_chain(chain) if cum_chain is not None: cum_chains.append(cum_chain) cum_chains = tuple(map(tuple, cum_chains)) return cum_chains
def ring_atom_chirality(gra, atm, ring_atms, stereo=False): """is this ring atom a chiral center? """ if not stereo: gra = without_stereo_parities(gra) adj_atms = atoms_neighbor_atom_keys(gra) keys = [] for atmi in adj_atms[atm]: key = [atm, atmi] key.sort() key = frozenset(key) keys.append(key) if atmi in ring_atms: for atmj in adj_atms[atmi]: if atmj in ring_atms: key = [atmj, atmi] key.sort() key = frozenset(key) keys.append(key) gras = remove_bonds(gra, keys) cgras = connected_components(gras) ret_gras = [] for gra_i in cgras: atms_i = atom_keys(gra_i) if [x for x in atms_i if x in adj_atms[atm] or x == atm]: ret_gras.append(gra_i) return ret_gras
def atom_longest_chain(gra, atm_key): """ longest chain for a specific atom """ atm_ngb_keys_dct = atoms_neighbor_atom_keys(gra) atm_ngb_keys = atm_ngb_keys_dct[atm_key] chains_lst = [] if atm_ngb_keys: next_chains_lst = [ [atm_key, atm_ngb_key] for atm_ngb_key in atm_ngb_keys] while True: chains_lst = next_chains_lst next_chains_lst = [] for chain in chains_lst: atm_ngb_keys = atm_ngb_keys_dct[chain[-1]] next_atm_keys = sorted(atm_ngb_keys - set(chain)) for next_atm_key in next_atm_keys: next_chains_lst.append(chain + [next_atm_key]) if not next_chains_lst: break max_chain = tuple(chains_lst[0]) else: max_chain = tuple((atm_key,)) return max_chain
def atoms_stereo_sorted_neighbor_atom_keys(sgr): """ Obtain neighbor atom keys for all stereo atoms, sorted by stereo priority. Includes all stereo atoms and atoms constituting stereo bonds. For stereo bonds, the neighbors for each atom in the bond exclude the other atom in the bond. :param sgr: the graph :returns: Neighbor atom keys, sorted by stereo priority, keyed by atom. :rtype: dict """ atm_ste_keys = atom_stereo_keys(sgr) bnd_ste_keys = bond_stereo_keys(sgr) atm_ngb_keys_dct = atoms_neighbor_atom_keys(sgr) ste_atm_ngb_keys_dct = {} for atm_key in atm_ste_keys: atm_ngb_keys = atm_ngb_keys_dct[atm_key] ste_atm_ngb_keys_dct[atm_key] = atom_stereo_sorted_neighbor_atom_keys( sgr, atm_key, atm_ngb_keys) for bnd_key in bnd_ste_keys: atm1_key, atm2_key = sorted(bnd_key) atm1_ngb_keys = atm_ngb_keys_dct[atm1_key] - bnd_key atm2_ngb_keys = atm_ngb_keys_dct[atm2_key] - bnd_key ste_atm_ngb_keys_dct[atm1_key] = atom_stereo_sorted_neighbor_atom_keys( sgr, atm1_key, atm1_ngb_keys) ste_atm_ngb_keys_dct[atm2_key] = atom_stereo_sorted_neighbor_atom_keys( sgr, atm2_key, atm2_ngb_keys) return ste_atm_ngb_keys_dct
def rotational_symmetry_number(gra, key1, key2, lin_keys=None): """ get the rotational symmetry number along a given rotational axis :param gra: the graph :param key1: the first atom key :param key2: the second atom key """ ngb_keys_dct = atoms_neighbor_atom_keys(without_dummy_atoms(gra)) imp_hyd_vlc_dct = atom_implicit_hydrogen_valences(implicit(gra)) axis_keys = {key1, key2} # If the keys are part of a linear chain, use the ends of that for the # symmetry number calculation lin_keys_lst = linear_segments_atom_keys(gra, lin_keys=lin_keys) for keys in lin_keys_lst: if key1 in keys or key2 in keys: if len(keys) == 1: key1, key2 = sorted(ngb_keys_dct[keys[0]]) else: key1, = ngb_keys_dct[keys[0]] - {keys[1]} key2, = ngb_keys_dct[keys[-1]] - {keys[-2]} axis_keys |= set(keys) break sym_num = 1 for key in (key1, key2): if key in imp_hyd_vlc_dct: ngb_keys = ngb_keys_dct[key] - axis_keys if len(ngb_keys) == imp_hyd_vlc_dct[key] == 3: sym_num = 3 break return sym_num
def stereogenic_atom_keys(gra, assigned=False): """ Find stereogenic atoms in this graph. If the `assigned` flag is set to `False`, only unassigned stereogenic atoms will be detected. :param gra: the graph :param assigned: Include atoms that already have stereo assignments? :param assigned: bool :returns: the stereogenic atom keys :rtype: frozenset """ gra = without_bond_orders(gra) gra = explicit(gra) # for simplicity, add the explicit hydrogens back in atm_keys = dict_.keys_by_value(atom_bond_valences(gra), lambda x: x == 4) if not assigned: # Remove assigned stereo keys atm_keys -= atom_stereo_keys(gra) atm_ngb_keys_dct = atoms_neighbor_atom_keys(gra) def _is_stereogenic(atm_key): atm_ngb_keys = list(atm_ngb_keys_dct[atm_key]) pri_vecs = [ stereo_priority_vector(gra, atm_key, atm_ngb_key) for atm_ngb_key in atm_ngb_keys ] ret = not any(pv1 == pv2 for pv1, pv2 in itertools.combinations(pri_vecs, r=2)) return ret ste_gen_atm_keys = frozenset(filter(_is_stereogenic, atm_keys)) return ste_gen_atm_keys
def bond_stereo_parity_from_geometry(gra, bnd_key, geo, geo_idx_dct): """ get the current stereo parity of a bond from its geometry """ atm1_key, atm2_key = bnd_key atm_ngb_keys_dct = atoms_neighbor_atom_keys(gra) atm1_ngb_keys = atm_ngb_keys_dct[atm1_key] - {atm2_key} atm2_ngb_keys = atm_ngb_keys_dct[atm2_key] - {atm1_key} atm1_ngb_keys = atom_stereo_sorted_neighbor_atom_keys( gra, atm1_key, atm1_ngb_keys) atm2_ngb_keys = atom_stereo_sorted_neighbor_atom_keys( gra, atm2_key, atm2_ngb_keys) # get the top priority neighbor keys on each side atm1_ngb_key = atm1_ngb_keys[0] atm2_ngb_key = atm2_ngb_keys[0] # determine the parity based on the coordinates xyzs = automol.geom.base.coordinates(geo) atm1_xyz = xyzs[geo_idx_dct[atm1_key]] atm2_xyz = xyzs[geo_idx_dct[atm2_key]] atm1_ngb_xyz = xyzs[geo_idx_dct[atm1_ngb_key]] atm2_ngb_xyz = xyzs[geo_idx_dct[atm2_ngb_key]] atm1_bnd_vec = numpy.subtract(atm1_ngb_xyz, atm1_xyz) atm2_bnd_vec = numpy.subtract(atm2_ngb_xyz, atm2_xyz) dot_val = numpy.vdot(atm1_bnd_vec, atm2_bnd_vec) assert dot_val != 0. # for now, assume no collinear par = dot_val > 0. return par
def rotational_bond_keys(gra, lin_keys=None, with_h_rotors=True): """ get all rotational bonds for a graph :param gra: the graph :param lin_keys: keys to linear atoms in the graph """ gra = explicit(gra) sym_dct = atom_symbols(gra) ngb_keys_dct = atoms_neighbor_atom_keys(gra) bnd_ord_dct = resonance_dominant_bond_orders(gra) rng_bnd_keys = list(itertools.chain(*rings_bond_keys(gra))) def _is_rotational_bond(bnd_key): ngb_keys_lst = [ngb_keys_dct[k] - bnd_key for k in bnd_key] is_single = max(bnd_ord_dct[bnd_key]) <= 1 has_neighbors = all(ngb_keys_lst) not_in_ring = bnd_key not in rng_bnd_keys is_h_rotor = any( set(map(sym_dct.__getitem__, ks)) == {'H'} for ks in ngb_keys_lst) return is_single and has_neighbors and not_in_ring and ( not is_h_rotor or with_h_rotors) rot_bnd_keys = frozenset(filter(_is_rotational_bond, bond_keys(gra))) lin_keys_lst = linear_segments_atom_keys(gra, lin_keys=lin_keys) dum_keys = tuple(atom_keys(gra, sym='X')) for keys in lin_keys_lst: bnd_keys = sorted((k for k in rot_bnd_keys if k & set(keys)), key=sorted) # Check whether there are neighboring atoms on either side of the # linear segment excl_keys = set(keys) | set(dum_keys) end_key1 = atom_neighbor_atom_key(gra, keys[0], excl_atm_keys=excl_keys) excl_keys |= {end_key1} end_key2 = atom_neighbor_atom_key(gra, keys[-1], excl_atm_keys=excl_keys) end_keys = {end_key1, end_key2} ngb_keys_lst = [ngb_keys_dct[k] - excl_keys for k in end_keys] has_neighbors = all(ngb_keys_lst) if not has_neighbors: rot_bnd_keys -= set(bnd_keys) else: rot_bnd_keys -= set(bnd_keys[:-1]) return rot_bnd_keys
def bond_stereo_sorted_neighbor_atom_keys(gra, atm1_key, atm2_key, atm1_ngb_keys=None, atm2_ngb_keys=None): """ get the neighbor keys of a bond's atoms sorted by stereo priority """ bnd_key = frozenset({atm1_key, atm2_key}) if atm1_ngb_keys is None: atm1_ngb_keys = atoms_neighbor_atom_keys(gra)[atm1_key] if atm2_ngb_keys is None: atm2_ngb_keys = atoms_neighbor_atom_keys(gra)[atm2_key] atm1_ngb_keys -= bnd_key atm2_ngb_keys -= bnd_key ste_atm1_ngb_keys = atom_stereo_sorted_neighbor_atom_keys( gra, atm1_key, atm1_ngb_keys) ste_atm2_ngb_keys = atom_stereo_sorted_neighbor_atom_keys( gra, atm2_key, atm2_ngb_keys) return (ste_atm1_ngb_keys, ste_atm2_ngb_keys)
def stereogenic_bond_keys(gra, assigned=False): """ Find stereogenic bonds in this graph. If the `assigned` flag is set to `False`, only unassigned stereogenic bonds will be detected. :param gra: the graph :param assigned: Include bonds that already have stereo assignments? :param assigned: bool :returns: the stereogenic bond keys :rtype: frozenset """ gra = without_bond_orders(gra) gra = explicit(gra) # for simplicity, add the explicit hydrogens back in # get candidates: planar bonds bnd_keys = sp2_bond_keys(gra) if not assigned: # remove bonds that already have stereo assignments bnd_keys -= bond_stereo_keys(gra) bnd_keys -= functools.reduce( # remove double bonds in small rings frozenset.union, filter(lambda x: len(x) < 8, rings_bond_keys(gra)), frozenset()) atm_ngb_keys_dct = atoms_neighbor_atom_keys(gra) def _is_stereogenic(bnd_key): atm1_key, atm2_key = bnd_key def _is_symmetric_on_bond(atm_key, atm_ngb_key): atm_ngb_keys = list(atm_ngb_keys_dct[atm_key] - {atm_ngb_key}) if not atm_ngb_keys: # C=:O: ret = True elif len(atm_ngb_keys) == 1: # C=N:-X ret = False else: assert len(atm_ngb_keys) == 2 # C=C(-X)-Y ret = (stereo_priority_vector( gra, atm_key, atm_ngb_keys[0]) == stereo_priority_vector( gra, atm_key, atm_ngb_keys[1])) return ret return not (_is_symmetric_on_bond(atm1_key, atm2_key) or _is_symmetric_on_bond(atm2_key, atm1_key)) ste_gen_bnd_keys = frozenset(filter(_is_stereogenic, bnd_keys)) return ste_gen_bnd_keys
def equivalent_bonds(gra, bnd_key, stereo=True, dummy=True): """ Identify sets of isomorphically equivalent bonds Two bonds are equivalent if they transform into each other under an automorphism :param gra: A graph :param bnd_key: An bond key for the graph, which may be sorted or unsorted :param backbone_only: Compare backbone atoms only? :type stereo: bool :param dummy: Consider dummy atoms? :type dummy: bool :returns: Keys to equivalent bonds :rtype: frozenset """ bnd_key = tuple(bnd_key) bnd_keys = list(map(tuple, map(sorted, bond_keys(gra)))) bnd_keys += list(map(tuple, map(reversed, bnd_keys))) assert bnd_key in bnd_keys, f"{bnd_key} not in {bnd_keys}" atm_symb_dct = atom_symbols(gra) atm_ngbs_dct = atoms_neighbor_atom_keys(gra) def _symbols(bnd_key): return list(map(atm_symb_dct.__getitem__, bnd_key)) def _neighbor_symbols(bnd_key): key1, key2 = bnd_key nsymbs1 = sorted(map(atm_symb_dct.__getitem__, atm_ngbs_dct[key1])) nsymbs2 = sorted(map(atm_symb_dct.__getitem__, atm_ngbs_dct[key2])) return nsymbs1, nsymbs2 # 1. Find bonds with the same atom types bnd_symbs = _symbols(bnd_key) cand_keys = [k for k in bnd_keys if _symbols(k) == bnd_symbs] # 2. Of those, find bonds with the same neighboring atom types bnd_ngb_symbs = _neighbor_symbols(bnd_key) cand_keys = [k for k in cand_keys if _neighbor_symbols(k) == bnd_ngb_symbs] # 3. Find the equivalent bonds from the list of candidates. # Strategy: Change the atom symbols to 'Lv' and 'Ts' and check for # isomorphism. Assumes none of the compounds have element 116 or 117. bnd_keys = [] for key in cand_keys: if are_equivalent_bonds(gra, bnd_key, key, stereo=stereo, dummy=dummy): bnd_keys.append(key) return frozenset(bnd_keys)
def stereo_priority_vector(gra, atm_key, atm_ngb_key): """ generates a sortable one-to-one representation of the branch extending from `atm_key` through its bonded neighbor `atm_ngb_key` """ bbn_keys = backbone_keys(gra) exp_hyd_keys = explicit_hydrogen_keys(gra) if atm_ngb_key not in bbn_keys: assert atm_ngb_key in exp_hyd_keys assert frozenset({atm_key, atm_ngb_key}) in bond_keys(gra) pri_vec = () else: gra = implicit(gra) atm_dct = atoms(gra) bnd_dct = bonds(gra) assert atm_key in bbn_keys assert frozenset({atm_key, atm_ngb_key}) in bnd_dct # here, switch to an implicit graph atm_ngb_keys_dct = atoms_neighbor_atom_keys(gra) def _priority_vector(atm1_key, atm2_key, seen_keys): # we keep a list of seen keys to cut off cycles, avoiding infinite # loops bnd_val = bnd_dct[frozenset({atm1_key, atm2_key})] atm_val = atm_dct[atm2_key] bnd_val = _replace_nones_with_negative_infinity(bnd_val) atm_val = _replace_nones_with_negative_infinity(atm_val) if atm2_key in seen_keys: ret = (bnd_val, ) else: seen_keys.update({atm1_key, atm2_key}) atm3_keys = atm_ngb_keys_dct[atm2_key] - {atm1_key} if atm3_keys: next_vals, seen_keys = zip(*[ _priority_vector(atm2_key, atm3_key, seen_keys) for atm3_key in atm3_keys ]) ret = (bnd_val, atm_val) + next_vals else: ret = (bnd_val, atm_val) return ret, seen_keys pri_vec, _ = _priority_vector(atm_key, atm_ngb_key, set()) return pri_vec
def atom_groups(gra, atm, stereo=False): """ return a list of groups off of one atom TODO: MERGE WITH BRANCH FUNCTIONS OR MAKE NAMING CONSISTENT SOMEHOW """ if not stereo: gra = without_stereo_parities(gra) adj_atms = atoms_neighbor_atom_keys(gra) keys = [] for atmi in adj_atms[atm]: key = [atm, atmi] key.sort() key = frozenset(key) keys.append(key) gras = remove_bonds(gra, keys) return connected_components(gras)
def _atom_stereo_corrected_geometry(gra, atm_ste_par_dct, geo, geo_idx_dct): """ correct the atom stereo parities of a geometry, for a subset of atoms """ ring_atm_keys = set(itertools.chain(*rings_atom_keys(gra))) atm_ngb_keys_dct = atoms_neighbor_atom_keys(gra) atm_keys = list(atm_ste_par_dct.keys()) for atm_key in atm_keys: par = atm_ste_par_dct[atm_key] curr_par = atom_stereo_parity_from_geometry(gra, atm_key, geo, geo_idx_dct) if curr_par != par: atm_ngb_keys = atm_ngb_keys_dct[atm_key] # for now, we simply exclude rings from the pivot keys # (will not work for stereo atom at the intersection of two rings) atm_piv_keys = list(atm_ngb_keys - ring_atm_keys)[:2] assert len(atm_piv_keys) == 2 atm3_key, atm4_key = atm_piv_keys # get coordinates xyzs = automol.geom.base.coordinates(geo) atm_xyz = xyzs[geo_idx_dct[atm_key]] atm3_xyz = xyzs[geo_idx_dct[atm3_key]] atm4_xyz = xyzs[geo_idx_dct[atm4_key]] # do the rotation rot_axis = util.vec.unit_bisector(atm3_xyz, atm4_xyz, orig_xyz=atm_xyz) rot_atm_keys = ( atom_keys(branch(gra, atm_key, {atm_key, atm3_key})) | atom_keys(branch(gra, atm_key, {atm_key, atm4_key}))) rot_idxs = list(map(geo_idx_dct.__getitem__, rot_atm_keys)) geo = automol.geom.rotate(geo, rot_axis, numpy.pi, orig_xyz=atm_xyz, idxs=rot_idxs) assert atom_stereo_parity_from_geometry(gra, atm_key, geo, geo_idx_dct) == par gra = set_atom_stereo_parities(gra, {atm_key: par}) return geo, gra
def radical_dissociation_prods(gra, pgra1): """ given a dissociation product, determine the other product """ gra = without_fractional_bonds(gra) pgra2 = None rads = sing_res_dom_radical_atom_keys(gra) adj_atms = atoms_neighbor_atom_keys(gra) # adj_idxs = tuple(adj_atms[rad] for rad in rads) for rad in rads: for adj in adj_atms[rad]: for group in atom_groups(gra, adj, stereo=False): if isomorphism(group, pgra1, backbone_only=True): pgra2 = remove_atoms(gra, atom_keys(group)) # pgra2 = remove_bonds(pgra2, bond_keys(group)) if bond_keys(group) in pgra2: pgra2 = remove_bonds(pgra2, bond_keys(group)) return (pgra1, pgra2)
def atom_stereo_sorted_neighbor_atom_keys(gra, atm_key, atm_ngb_keys=None): """ get the neighbor keys of an atom sorted by stereo priority """ if atm_ngb_keys is None: atm_ngb_keys = atoms_neighbor_atom_keys(gra)[atm_key] atm_ngb_keys = list(atm_ngb_keys) # explicitly create an object array because otherwise the argsort # interprets [()] as [] atm_pri_vecs = numpy.empty(len(atm_ngb_keys), dtype=numpy.object_) atm_pri_vecs[:] = [ stereo_priority_vector(gra, atm_key, atm_ngb_key) for atm_ngb_key in atm_ngb_keys ] sort_idxs = numpy.argsort(atm_pri_vecs) sorted_atm_ngb_keys = tuple(map(atm_ngb_keys.__getitem__, sort_idxs)) return sorted_atm_ngb_keys
def equivalent_atoms(gra, atm_key, stereo=True, dummy=True): """ Identify sets of isomorphically equivalent atoms Two atoms are equivalent if they transform into each other under an automorphism :param gra: A graph :param atm_key: An atom key for the graph :param stereo: Consider stereo? :type stereo: bool :param dummy: Consider dummy atoms? :type dummy: bool :returns: Keys to equivalent atoms :rtype: frozenset """ assert atm_key in atom_keys(gra), ( f"{atm_key} not in {atom_keys(gra)}") atm_symb_dct = atom_symbols(gra) atm_ngbs_dct = atoms_neighbor_atom_keys(gra) def _neighbor_symbols(key): return sorted(map(atm_symb_dct.__getitem__, atm_ngbs_dct[key])) # 1. Find atoms with the same symbols atm_symb = atm_symb_dct[atm_key] cand_keys = atom_keys(gra, sym=atm_symb) # 2. Of those, find atoms with the same neighboring atom types atm_ngb_symbs = _neighbor_symbols(atm_key) cand_keys = [k for k in cand_keys if _neighbor_symbols(k) == atm_ngb_symbs] # 3. Find the equivalent atoms from the list of candidates. # Strategy: Change the atom symbol to 'Ts' and check for isomorphism. # Assumes none of the compounds have element 117. atm_keys = [] for key in cand_keys: if are_equivalent_atoms(gra, atm_key, key, stereo=stereo, dummy=dummy): atm_keys.append(key) return frozenset(atm_keys)
def atom_stereo_parity_from_geometry(gra, atm_key, geo, geo_idx_dct): """ get the current stereo parity of an atom from its geometry """ atm_ngb_keys_dct = atoms_neighbor_atom_keys(gra) atm_ngb_keys = atm_ngb_keys_dct[atm_key] # sort the neighbor keys by stereo priority atm_ngb_keys = atom_stereo_sorted_neighbor_atom_keys( gra, atm_key, atm_ngb_keys) # determine the parity based on the coordinates xyzs = automol.geom.base.coordinates(geo) atm_ngb_idxs = dict_.values_by_key(geo_idx_dct, atm_ngb_keys) atm_ngb_xyzs = [xyzs[idx] for idx in atm_ngb_idxs] det_mat = numpy.ones((4, 4)) det_mat[:, :3] = atm_ngb_xyzs det_val = numpy.linalg.det(det_mat) assert det_val != 0. # for now, assume no four-atom planes par = det_val > 0. return par
def _insert_stereo_hydrogens(gra): """ Insert hydrogens necessary for bond stereo into an implicit graph. Hydrogens are given negative keys for proper stereo sorting """ bnd_keys = bond_stereo_keys(gra) nkeys_dct = atoms_neighbor_atom_keys(gra) nhyd_dct = atom_implicit_hydrogen_valences(gra) next_key = -max(atom_keys(gra)) - 1 for bnd_key in bnd_keys: key1, key2 = bnd_key nkey1s = nkeys_dct[key1] - {key2} nkey2s = nkeys_dct[key2] - {key1} for key, nkeys in [(key1, nkey1s), (key2, nkey2s)]: if not nkeys: assert nhyd_dct[key] == 1 gra = add_bonded_atom(gra, 'H', key, next_key) gra = set_atom_implicit_hydrogen_valences(gra, {key: 0}) next_key = next_key - 1 return gra
def neighbors_of_type(gra, aidx, symb): """ For a given atom, determine the indices of all the atoms which neighbor it that are of the type specified. :param gra: molecular graph :type gra: molecular graph data structure :param aidx: index of atom for which to find neighbors :type aidx: int :param symb: symbols of desired atom types for neighbors :type symb: str """ idx_symb_dct = atom_symbols(gra) neighs = atoms_neighbor_atom_keys(gra)[aidx] neigh_symbs = _atom_idx_to_symb(neighs, idx_symb_dct) idxs_of_type = tuple() for nidx, nsymb in zip(neighs, neigh_symbs): if nsymb == symb: idxs_of_type += (nidx, ) return idxs_of_type
def radical_dissociation_products(gra, pgra1): """ For a given species, determine the products of a dissociation occuring around a radical site. We assume one of the dissociation products is known, and we attempt to find the corresponding product. Currently, we assume that the input pgra1 is appropriately stereolabeled. :param gra: species undergoing dissociation :type gra: automol.graph object :param pgra1: one of the known products of dissociation :type pgra1: automol.graph object :rtype: tuple(automol.graph.object) """ # Remove gractional bonds for functions to work gra = without_fractional_bonds(gra) # Attempt to find a graph of product corresponding to pgra1 pgra2 = None for rad in sing_res_dom_radical_atom_keys(gra): for adj in atoms_neighbor_atom_keys(gra)[rad]: for group in atom_groups(gra, adj, stereo=False): if isomorphism(group, pgra1, backbone_only=True): pgra2 = remove_atoms(gra, atom_keys(group)) if bond_keys(group) in pgra2: pgra2 = remove_bonds(pgra2, bond_keys(group)) # If pgra2 is ID'd, rebuild the two product graphs with stereo labels if pgra2 is not None: keys2 = atom_keys(pgra2) idx_gra = to_index_based_stereo(gra) idx_pgra2 = subgraph(idx_gra, keys2, stereo=True) pgra2 = from_index_based_stereo(idx_pgra2) return pgra1, pgra2
def two_bond_idxs(gra, symb1, cent, symb2): """ Determine the triplet of indices of atoms of specified types that are connected in a chain by two bonds: (symb1_idx, cent_idx, symb2_idx). :param gra: molecular graph :type gra: molecular graph data structure :param symb1: symbol of atom at one end of chain :type symb1: str :param cent: symbol of atom in the middle of a chain :type cent: str :param symb2: symbol of atom at other end of chain :type symb2: str """ grps = tuple() neigh_dct = atoms_neighbor_atom_keys(gra) idx_symb_dct = atom_symbols(gra) symb_idx_dct = atom_symbol_keys(gra) cent_idxs = symb_idx_dct.get(cent, tuple()) for cent_idx in cent_idxs: neighs = tuple(neigh_dct[cent_idx]) neigh_symbs = _atom_idx_to_symb(neighs, idx_symb_dct) if neigh_symbs == (symb1, symb2): grp_idxs = (neighs[0], cent_idx, neighs[1]) elif neigh_symbs == (symb2, symb1): grp_idxs = (neighs[1], cent_idx, neighs[0]) else: grp_idxs = () if grp_idxs: grps += ((grp_idxs), ) return grps
def bond_symmetry_numbers(gra, frm_bnd_key=None, brk_bnd_key=None): """ symmetry numbers, by bond TODO: DEPRECATE -- I think this function can be replaced with rotational_symmetry_number(). Passing in formed and broken keys is unnecessary if one passes in a TS graph, which is stored in the reaction object. the (approximate) symmetry number of the torsional potential for this bond, based on the hydrogen counts for each atom It is reduced to 1 if one of the H atoms in the torsional bond is a neighbor to the special bonding atom (the atom that is being transferred) """ imp_gra = implicit(gra) atm_imp_hyd_vlc_dct = atom_implicit_hydrogen_valences(imp_gra) bnd_keys = bond_keys(imp_gra) tfr_atm = None if frm_bnd_key and brk_bnd_key: for atm_f in list(frm_bnd_key): for atm_b in list(brk_bnd_key): if atm_f == atm_b: tfr_atm = atm_f if tfr_atm: neighbor_dct = atoms_neighbor_atom_keys(gra) nei_tfr = neighbor_dct[tfr_atm] atms = gra[0] all_hyds = [] for atm in atms: if atms[atm][0] == 'H': all_hyds.append(atm) else: nei_tfr = {} bnd_symb_num_dct = {} bnd_symb_nums = [] for bnd_key in bnd_keys: bnd_sym = 1 vlc = max(map(atm_imp_hyd_vlc_dct.__getitem__, bnd_key)) if vlc == 3: bnd_sym = 3 if tfr_atm: for atm in nei_tfr: nei_s = neighbor_dct[atm] h_nei = 0 for nei in nei_s: if nei in all_hyds: h_nei += 1 if h_nei == 3: bnd_sym = 1 bnd_symb_nums.append(bnd_sym) bnd_symb_num_dct = dict(zip(bnd_keys, bnd_symb_nums)) # fill in the rest of the bonds for completeness bnd_symb_num_dct = dict_.by_key(bnd_symb_num_dct, bond_keys(gra), fill_val=1) return bnd_symb_num_dct
def from_index_based_stereo(sgr): """ Convert a graph from index-based stereo assignments back to absolute stereo assignments, where parities are independent of atom ordering. :param sgr: a graph with index-based stereo assignments :returns: a graph with absolute stereo assignments """ assert sgr == explicit(sgr), ( f"Not an explicit graph:\n{string(sgr, one_indexed=False)}") gra = without_stereo_parities(sgr) if has_stereo(sgr): atm_keys_pool = atom_stereo_keys(sgr) bnd_keys_pool = bond_stereo_keys(sgr) idx_atm_ste_par_dct = atom_stereo_parities(sgr) idx_bnd_ste_par_dct = bond_stereo_parities(sgr) atm_ngb_keys_dct = atoms_neighbor_atom_keys(sgr) atm_keys = set() bnd_keys = set() last_gra = None # Do the assignments iteratively to handle higher-order stereo while last_gra != gra: last_gra = gra abs_atm_ste_par_dct = {} abs_bnd_ste_par_dct = {} atm_keys.update(stereogenic_atom_keys(gra) & atm_keys_pool) bnd_keys.update(stereogenic_bond_keys(gra) & bnd_keys_pool) # Determine absolute stereo assignments for atoms for atm_key in atm_keys: abs_srt_keys = atom_stereo_sorted_neighbor_atom_keys( gra, atm_key, atm_ngb_keys_dct[atm_key]) idx_srt_keys = sorted(abs_srt_keys) if util.is_even_permutation(idx_srt_keys, abs_srt_keys): abs_atm_ste_par_dct[atm_key] = ( idx_atm_ste_par_dct[atm_key]) else: abs_atm_ste_par_dct[atm_key] = ( not idx_atm_ste_par_dct[atm_key]) # Determine absolute stereo assignments for bonds for bnd_key in bnd_keys: atm1_key, atm2_key = sorted(bnd_key) atm1_abs_srt_keys = atom_stereo_sorted_neighbor_atom_keys( gra, atm1_key, atm_ngb_keys_dct[atm1_key] - bnd_key) atm2_abs_srt_keys = atom_stereo_sorted_neighbor_atom_keys( gra, atm2_key, atm_ngb_keys_dct[atm2_key] - bnd_key) atm1_idx_srt_keys = sorted(atm1_abs_srt_keys) atm2_idx_srt_keys = sorted(atm2_abs_srt_keys) if not ((atm1_idx_srt_keys[0] != atm1_abs_srt_keys[0]) ^ (atm2_idx_srt_keys[0] != atm2_abs_srt_keys[0])): abs_bnd_ste_par_dct[bnd_key] = ( idx_bnd_ste_par_dct[bnd_key]) else: abs_bnd_ste_par_dct[bnd_key] = ( not idx_bnd_ste_par_dct[bnd_key]) gra = set_atom_stereo_parities(gra, abs_atm_ste_par_dct) gra = set_bond_stereo_parities(gra, abs_bnd_ste_par_dct) atm_ste_keys = atom_stereo_keys(gra) bnd_ste_keys = bond_stereo_keys(gra) assert atm_ste_keys == atm_keys_pool, ( "Index-based to absolute stereo conversion failed:\n" f"{str(atm_ste_keys)} != {str(atm_keys_pool)}") assert bnd_ste_keys == bnd_keys_pool, ( "Index-based to absolute stereo conversion failed:\n" f"{str(bnd_ste_keys)} != {str(bnd_keys_pool)}") return gra
def smiles(gra, stereo=True, local_stereo=False, res_stereo=False): """ SMILES string from graph :param gra: molecular graph :type gra: automol graph data structure :param stereo: Include stereo? :type stereo: bool :param local_stereo: Is the graph using local stereo assignments? That is, are they based on atom keys rather than canonical keys? :type local_stereo: bool :param res_stereo: allow resonant double-bond stereo? :type res_stereo: bool :returns: the SMILES string :rtype: str """ assert is_connected(gra), ( "Cannot form connection layer for disconnected graph.") if not stereo: gra = without_stereo_parities(gra) # If not using local stereo assignments, canonicalize the graph first. # From this point on, the stereo parities can be assumed to correspond to # the neighboring atom keys. if not local_stereo: gra = canonical(gra) # Convert to implicit graph gra = implicit(gra) # Insert hydrogens necessary for bond stereo gra = _insert_stereo_hydrogens(gra) # Find a dominant resonance rgr = dominant_resonance(gra) # Determine atom symbols symb_dct = atom_symbols(rgr) # Determine atom implicit hydrogens nhyd_dct = atom_implicit_hydrogen_valences(rgr) # Determine bond orders for this resonance bnd_ord_dct = bond_orders(rgr) # Find radical sites for this resonance rad_atm_keys = radical_atom_keys_from_resonance(rgr) # Determine neighbors nkeys_dct = atoms_neighbor_atom_keys(rgr) # Find stereo parities atm_par_dct = dict_.filter_by_value(atom_stereo_parities(rgr), lambda x: x is not None) bnd_par_dct = dict_.filter_by_value(bond_stereo_parities(rgr), lambda x: x is not None) # Remove stereo parities if requested if not res_stereo: print('before') print(bnd_par_dct) bnd_par_dct = dict_.filter_by_key(bnd_par_dct, lambda x: bnd_ord_dct[x] == 2) print('after') print(bnd_par_dct) else: raise NotImplementedError("Not yet implemented!") def _atom_representation(key, just_seen=None, nkeys=(), closures=()): symb = ptab.to_symbol(symb_dct[key]) nhyd = nhyd_dct[key] needs_brackets = key in rad_atm_keys or symb not in ORGANIC_SUBSET hyd_rep = f'H{nhyd}' if nhyd > 1 else ('H' if nhyd == 1 else '') par_rep = '' if key in atm_par_dct: needs_brackets = True skeys = [just_seen] if nhyd: assert nhyd == 1 skeys.append(-numpy.inf) if closures: skeys.extend(closures) skeys.extend(nkeys) can_par = atm_par_dct[key] smi_par = can_par ^ util.is_odd_permutation(skeys, sorted(skeys)) par_rep = '@@' if smi_par else '@' if needs_brackets: rep = f'[{symb}{par_rep}{hyd_rep}]' else: rep = f'{symb}' return rep # Get the pool of stereo bonds for the graph and set up a dictionary for # storing the ending representation. ste_bnd_key_pool = list(bnd_par_dct.keys()) drep_dct = {} def _bond_representation(key, just_seen=None): key0 = just_seen key1 = key # First, handle the bond order if key0 is None or key1 is None: rep = '' else: bnd_ord = bnd_ord_dct[frozenset({key0, key1})] if bnd_ord == 1: rep = '' elif bnd_ord == 2: rep = '=' elif bnd_ord == 3: rep = '#' else: raise ValueError("Bond orders greater than 3 not permitted.") drep = drep_dct[(key0, key1)] if (key0, key1) in drep_dct else '' bnd_key = next((b for b in ste_bnd_key_pool if key1 in b), None) if bnd_key is not None: # We've encountered a new stereo bond, so remove it from the pool ste_bnd_key_pool.remove(bnd_key) # Determine the atoms involved key2, = bnd_key - {key1} nkey1s = set(nkeys_dct[key1]) - {key2} nkey2s = set(nkeys_dct[key2]) - {key1} nmax1 = max(nkey1s) nmax2 = max(nkey2s) nkey1 = just_seen if just_seen in nkey1s else nmax1 nkey2 = nmax2 # Determine parity can_par = bnd_par_dct[bnd_key] smi_par = can_par if nkey1 == nmax1 else not can_par # Determine bond directions drep1 = drep if drep else '/' if just_seen in nkey1s: drep = drep1 flip = not smi_par else: drep_dct[(key1, nkey1)] = drep1 flip = smi_par drep2 = _flip_direction(drep1, flip=flip) drep_dct[(key2, nkey2)] = drep2 rep += drep # Second, handle directionality (bond stereo) return rep # Get the pool of rings for the graph and set up a dictionary for storing # their tags. As the SMILES is built, each next ring that is encountered # will be given a tag, removed from the pool, and transferred to the tag # dictionary. rng_pool = list(rings_atom_keys(rgr)) rng_tag_dct = {} def _ring_representation_with_nkeys_and_closures(key, nkeys=()): nkeys = nkeys.copy() # Check for new rings in the ring pool. If a new ring is found, create # a tag, add it to the tags dictionary, and drop it from the rings # pool. for new_rng in rng_pool: if key in new_rng: # Choose a neighbor key for SMILES ring closure clos_nkey = sorted(set(new_rng) & set(nkeys))[0] # Add it to the ring tag dictionary with the current key first # and the closure key last tag = max(rng_tag_dct.values(), default=0) + 1 assert tag < 10, ( f"Ring tag exceeds 10 for this graph:\n{string(gra)}") rng = cycle_ring_atom_key_to_front(new_rng, key, clos_nkey) rng_tag_dct[rng] = tag # Remove it from the pool of unseen rings rng_pool.remove(new_rng) tags = [] closures = [] for rng, tag in rng_tag_dct.items(): if key == rng[-1]: nkeys.remove(rng[0]) closures.append(rng[0]) # Handle the special case where the last ring bond has stereo if (rng[-1], rng[0]) in drep_dct: drep = drep_dct[(rng[-1], rng[0])] tags.append(f'{drep}{tag}') else: tags.append(f'{tag}') if key == rng[0]: nkeys.remove(rng[-1]) closures.append(rng[-1]) tags.append(f'{tag}') rrep = ''.join(map(str, tags)) return rrep, nkeys, closures # Determine neighboring keys nkeys_dct_pool = dict_.transform_values(atoms_neighbor_atom_keys(rgr), sorted) def _recurse_smiles(smi, lst, key, just_seen=None): nkeys = nkeys_dct_pool.pop(key) if key in nkeys_dct_pool else [] # Remove keys just seen from the list of neighbors, to avoid doubling # back. if just_seen in nkeys: nkeys.remove(just_seen) # Start the SMILES string and connection list. The connection list is # used for sorting. rrep, nkeys, closures = _ring_representation_with_nkeys_and_closures( key, nkeys) arep = _atom_representation(key, just_seen, nkeys, closures=closures) brep = _bond_representation(key, just_seen) smi = f'{brep}{arep}{rrep}' lst = [key] # Now, extend the layer/list along the neighboring atoms. if nkeys: # Build sub-strings/lists by recursively calling this function. sub_smis = [] sub_lsts = [] while nkeys: nkey = nkeys.pop(0) sub_smi, sub_lst = _recurse_smiles('', [], nkey, just_seen=key) sub_smis.append(sub_smi) sub_lsts.append(sub_lst) # If this is a ring, remove the neighbor on the other side of # `key` to prevent repetition as we go around the ring. if sub_lst[-1] == key: nkeys.remove(sub_lst[-2]) # Now, join the sub-layers and lists together. # If there is only one neighbor, we joint it as # {arep1}{brep2}{arep2}... if len(sub_lsts) == 1: sub_smi = sub_smis[0] sub_lst = sub_lsts[0] # Extend the SMILES string smi += f'{sub_smi}' # Extend the list lst.extend(sub_lst) # If there are multiple neighbors, we joint them as # {arep1}({brep2}{arep2}...)({brep3}{arep3}...){brep4}{arep4}... else: assert len(sub_lsts) > 1 # Extend the SMILES string smi += (''.join(map("({:s})".format, sub_smis[:-1])) + sub_smis[-1]) # Append the lists of neighboring branches. lst.append(sub_lsts) return smi, lst # If there are terminal atoms, start from the first one atm_keys = atom_keys(rgr) term_keys = terminal_atom_keys(gra, heavy=False) start_key = min(term_keys) if term_keys else min(atm_keys) smi, _ = _recurse_smiles('', [], start_key) return smi
def _connection_layer_and_list(gra): """ AMChI connection layer and list from graph :param gra: molecular graph :type gra: automol graph data structure :returns: the connection layer, without prefix, and connection list :rtype: str, list """ # Get a one-indexed neighbor keys dictionary. nkeys_dct = { k + 1: [n + 1 for n in ns] for k, ns in atoms_neighbor_atom_keys(gra).items() } def _recurse_connection_layer(conn_lyr, conn_lst, key, just_seen=None): nkeys = nkeys_dct.pop(key) if key in nkeys_dct else [] # Remove keys just seen from the list of neighbors, to avoid doubling # back. if just_seen in nkeys: nkeys.remove(just_seen) # Start the connection layer (string) and list. We could just work with # the layer directly, but the list is necessary for sorting. conn_lyr = f'{key}' conn_lst = [key] # Now, extend the layer/list along the neighboring atoms. if nkeys: # Build sub-layers/lists by recursively calling this function. sub_lyrs = [] sub_lsts = [] while nkeys: nkey = nkeys.pop(0) sub_lyr, sub_lst = _recurse_connection_layer('', [], nkey, just_seen=key) sub_lyrs.append(sub_lyr) sub_lsts.append(sub_lst) # If this is a ring, remove the neighbor on the other side of # `key` to prevent repetition as we go around the ring. if sub_lst[-1] == key: nkeys.remove(sub_lst[-2]) # Now, join the sub-layers and lists together. # If there is only one neighbor, we join it as # k-n-... if len(sub_lsts) == 1: # Extend the layer string conn_lyr += f'-{sub_lyrs[0]}' # Extend the list sub_lst = sub_lsts[0] conn_lst.extend(sub_lst) # If there are multiple neighbors, we join it as # k(n1-...,n2-...)n3-... else: # Sort the list of branches by length and index values. srt_idxs = sorted(range(len(sub_lsts)), key=lambda i: (len(sub_lsts[i]), sub_lsts[i])) # Apply the sort to both layers and lists. sub_lyrs = list(map(sub_lyrs.__getitem__, srt_idxs)) sub_lsts = list(map(sub_lsts.__getitem__, srt_idxs)) # Extend the layer string. conn_lyr += f"({','.join(sub_lyrs[:-1])}){sub_lyrs[-1]}" # Append the lists of neighboring branches. conn_lst.append(sub_lsts) return conn_lyr, conn_lst # If there are terminal atoms, start from the one with the lowest canonical # number term_keys = terminal_heavy_atom_keys(gra) start_key = min(term_keys) + 1 if term_keys else 1 conn_lyr, conn_lst = _recurse_connection_layer('', [], start_key) return conn_lyr, conn_lst