def fix_bond_order(mol: Chem.Mol) -> Chem.Mol: """On a Mol where hydrogens are present it guesses bond order.""" def is_sp2(atom: Chem.Atom) -> bool: N_neigh = len(atom.GetBonds()) symbol = atom.GetSymbol() if symbol == 'H': return False elif symbol == 'N' and N_neigh < 3: return True elif symbol == 'C' and N_neigh < 4: return True elif symbol == 'O' and N_neigh < 2: return True else: return False def get_other(bond: Chem.Bond, atom: Chem.Atom) -> Chem.Atom: """Given an bond and an atom return the other.""" if bond.GetEndAtomIdx() == atom.GetIdx( ): # atom == itself gives false. return bond.GetBeginAtom() else: return bond.GetEndAtom() def find_sp2_bonders(atom: Chem.Atom) -> List[Chem.Atom]: return [neigh for neigh in find_bonders(atom) if is_sp2(neigh)] def find_bonders(atom: Chem.Atom) -> List[Chem.Atom]: return [get_other(bond, atom) for bond in atom.GetBonds()] def descr(atom: Chem.Atom) -> str: return f'{atom.GetSymbol()}{atom.GetIdx()}' ## main body of function for atom in mol.GetAtoms(): # print(atom.GetSymbol(), is_sp2(atom), find_sp2_bonders(atom)) if is_sp2(atom): doubles = find_sp2_bonders(atom) if len(doubles) == 1: # tobedoubled.append([atom.GetIdx(), doubles[0].GetIdx()]) b = mol.GetBondBetweenAtoms(atom.GetIdx(), doubles[0].GetIdx()) if b: b.SetBondType(Chem.rdchem.BondType.DOUBLE) else: raise ValueError('Issue with:', descr(atom), descr(doubles[0])) elif len(doubles) > 1: for d in doubles: b = mol.GetBondBetweenAtoms(atom.GetIdx(), d.GetIdx()) if b: b.SetBondType(Chem.rdchem.BondType.AROMATIC) b.SetIsAromatic(True) else: raise ValueError('Issue with:', descr(atom), descr(d)) elif len(doubles) == 0: print(descr(atom), ' is underbonded!') else: pass return mol
def make_pair_by_split(self, conjoined: Chem.Mol, atom_idx: int) -> Tuple[Chem.Mol, Chem.Mol]: # make overlapping mols by getting a single molecule, and split it # this gives more control over Chem.rdMolAlign.AlignMol as this may overlap other atoms. # negative weights does not work... # fore bond = conjoined.GetBondBetweenAtoms(atom_idx, atom_idx + 1) fragged = Chem.FragmentOnBonds(conjoined, [bond.GetIdx()], addDummies=False) fore = Chem.GetMolFrags(fragged, asMols=True)[0] bond = conjoined.GetBondBetweenAtoms(atom_idx - 1, atom_idx) fragged = Chem.FragmentOnBonds(conjoined, [bond.GetIdx()], addDummies=False) aft = Chem.GetMolFrags(fragged, asMols=True)[1] return fore, aft
def substructure_to_feature(mol: Chem.Mol, substructure: FrozenSet[int], fg_features: List[List[int]] = None) -> str: """ Converts a substructure (set of atom indices) to a feature string by sorting and concatenating atom and bond feature vectors. :param mol: A molecule. :param substructure: A set of atom indices representing a substructure. :param fg_features: A list of k-hot vector indicating the functional groups the atom belongs to. :return: A string representing the featurization of the substructure. """ if fg_features is None: fg_features = [None] * mol.GetNumAtoms() substructure = list(substructure) atoms = [Chem.Mol.GetAtomWithIdx(mol, idx) for idx in substructure] bonds = [] for i in range(len(substructure)): for j in range(i + 1, len(substructure)): a1, a2 = substructure[i], substructure[j] bond = mol.GetBondBetweenAtoms(a1, a2) if bond is not None: bonds.append(bond) features = [str(atom_features(atom, fg_features[atom.GetIdx()])) for atom in atoms] + \ [str(bond_features(bond)) for bond in bonds] features.sort( ) # ensure identical feature string for different atom/bond ordering features = str(features) return features
def _are_rings_bonded(self, mol: Chem.Mol, ringA: Tuple[int], ringB: Tuple[int]): for i in ringA: for j in ringB: if mol.GetBondBetweenAtoms(i, j) is not None: return True else: return False
def _GetBurdenMatrix(mol: Chem.Mol, propertylabel: str = 'm') -> numpy.matrix: """Calculate weighted Burden matrix and eigenvalues.""" mol = Chem.AddHs(mol) Natom = mol.GetNumAtoms() AdMatrix = Chem.GetAdjacencyMatrix(mol) bondindex = numpy.argwhere(AdMatrix) AdMatrix1 = numpy.array(AdMatrix, dtype=numpy.float32) # The diagonal elements of B, Bii, are either given by # the carbon normalized atomic mass, # van der Waals volume, Sanderson electronegativity, # and polarizability of atom i. for i in range(Natom): atom = mol.GetAtomWithIdx(i) temp = GetRelativeAtomicProperty(element=atom.GetSymbol(), propertyname=propertylabel) AdMatrix1[i, i] = round(temp, 3) # The element of B connecting atoms i and j, Bij, # is equal to the square root of the bond # order between atoms i and j. for i in bondindex: bond = mol.GetBondBetweenAtoms(int(i[0]), int(i[1])) if bond.GetBondType().name == 'SINGLE': AdMatrix1[i[0], i[1]] = round(numpy.sqrt(1), 3) if bond.GetBondType().name == "DOUBLE": AdMatrix1[i[0], i[1]] = round(numpy.sqrt(2), 3) if bond.GetBondType().name == "TRIPLE": AdMatrix1[i[0], i[1]] = round(numpy.sqrt(3), 3) if bond.GetBondType().name == "AROMATIC": AdMatrix1[i[0], i[1]] = round(numpy.sqrt(1.5), 3) # All other elements of B (corresponding non bonded # atom pairs) are set to 0.001 bondnonindex = numpy.argwhere(AdMatrix == 0) for i in bondnonindex: if i[0] != i[1]: AdMatrix1[i[0], i[1]] = 0.001 return numpy.real(numpy.linalg.eigvals(AdMatrix1))
def _categorise(self, mol: Chem.Mol, uniques: set) -> Dict[str, Union[set, Dict]]: """ What do the novel atoms do in terms of connectivity. Complicated dict output (called ``categories`` in the methods). Really ought to be SetProp of the atoms. * ``uniques`` are set of atoms to classify on * ``internals`` are unique atoms that are connected solely to unique atoms * ``attachments`` are non-unique atoms to which a unique atom connects * ``pairs`` is a dict of unique atom idx --> dict of ``idx`` --> attachment idx and ``type`` bond type. :param mol: molecule to describe :param uniques: set of indices that are new to this molecule :return: """ # pairs = {} internals = set() attachments = set() dummies = set() for i in uniques: # novel atoms unique_atom = mol.GetAtomWithIdx(i) if unique_atom.GetSymbol() == self.dummy_symbol: dummies.add(i) neighbours = {n.GetIdx() for n in unique_atom.GetNeighbors()} if len(neighbours - uniques ) == 0: # unlessone of the connections is not unique. internals.add(i) else: i_attached = neighbours - uniques attachments |= i_attached pairs[i] = [{ 'idx': j, 'type': mol.GetBondBetweenAtoms(i, j).GetBondType() } for j in i_attached] anchors = uniques - internals # store for safekeeping for atom in mol.GetAtoms(): i = atom.GetIdx() if i in internals: # novel and not connected atom.SetProp('_Category', 'internal') elif i in attachments: # not-novel but connected atom.SetProp('_Category', 'overlapping-attachment') elif i in pairs: # dict not set tho atom.SetProp('_Category', 'internal-attachment') else: # overlapping atom.SetProp('_Category', 'overlapping') # if self._debug_draw: # depracated... but this could be useful... # high = list(internals) + list(attachments) + list(anchors) # color = {**{i: (0, 0.8, 0) for i in internals}, # **{i: (0, 0, 0.8) for i in attachments}, # **{i: (0.8, 0, 0.8) for i in anchors}} # print('Purple: anchor atoms, Blue: attachments, Green: internals') # self.draw_nicely(mol, highlightAtoms=high, highlightAtomColors=color) # print({atom.GetIdx(): atom.GetProp('_Category') for atom in mol.GetAtoms()}) return dict(uniques=uniques, internals=internals, attachments=attachments, pairs=pairs, dummies=dummies)
def get_conjugate_group_with_halogen(m: Mol): natoms = len(m.GetAtoms()) adjmat = np.zeros((natoms, natoms), dtype=bool) for i in range(natoms): for j in range(i + 1, natoms): if isinstance(m.GetBondBetweenAtoms(i, j), Bond): adjmat[i][j] = True adjmat[j][i] = True supp = ResonanceMolSupplier(m, ) # supp = ResonanceMolSupplier(m, Chem.KEKULE_ALL) # supp = ResonanceMolSupplier(m, Chem.ALLOW_CHARGE_SEPARATION) cg_dict = {} a: Atom for a in m.GetAtoms(): aid = a.GetIdx() cgid = supp.GetAtomConjGrpIdx(aid) if cgid < 1e5: cg_dict[aid] = cgid cgids = set(cg_dict.values()) cgs = [] for cgid in cgids: cg = [i for i in cg_dict.keys() if cg_dict[i] == cgid] atom: Atom for atom in m.GetAtoms(): if atom.GetIdx() not in cg: if any(adjmat[atom.GetIdx()][cg_aid] for cg_aid in cg) and atom.GetSymbol() in ("I", "F", "Cl", "Br"): cg.append(atom.GetIdx()) cgmol, old_id_2_new_id = RdFunc.get_sub_rdmol(m, cg) cgs.append([cgmol, old_id_2_new_id]) return sorted(cgs, key=lambda x: x[0].GetNumAtoms(), reverse=True)
def get_edge_infos(molecule: Chem.Mol, graph: Graph): edge_infos = [] for (source, sink) in graph.edges: kind = graph.edges[(source, sink)]['kind'] if kind == 1: bond = molecule.GetBondBetweenAtoms(source, sink) edge_info = EdgeInfo( distance=tools.get_atom_distance(molecule, source, sink), atom_ids=(source, sink), kind=kind, stereo=bond.GetStereo(), bond_type=bond.GetBondType(), is_aromatic=bond.GetIsAromatic(), is_conjugated=bond.GetIsConjugated(), is_in_ring_size=tuple( int(bond.IsInRingSize(size)) for size in RING_SIZES), ) else: edge_info = EdgeInfo( distance=tools.get_atom_distance(molecule, source, sink), atom_ids=(source, sink), kind=kind, ) edge_infos.append(edge_info) return edge_infos
def construct_discrete_edge_matrix(mol: Chem.Mol): if mol is None: return None N = mol.GetNumAtoms() #adj = Chem.rdmolops.GetAdjacencyMatrix(mol) #size = adj.shape[0] size = MAX_NUMBER_ATOM adjs = numpy.zeros((4, size, size), dtype=numpy.float32) for i in range(N): for j in range(N): bond = mol.GetBondBetweenAtoms(i, j) # type: Chem.Bond if bond is not None: bondType = str(bond.GetBondType()) if bondType == 'SINGLE': adjs[0, i, j] = 1.0 elif bondType == 'DOUBLE': adjs[1, i, j] = 1.0 elif bondType == 'TRIPLE': adjs[2, i, j] = 1.0 elif bondType == 'AROMATIC': adjs[3, i, j] = 1.0 else: print("[ERROR] Unknown bond type", bondType) assert False # Should not come here return adjs
def _prevent_allene(self, mol: Chem.Mol) -> Chem.Mol: if not isinstance(mol, Chem.RWMol): mol = Chem.RWMol(mol) for atom in mol.GetAtoms(): if atom.GetAtomicNum() < 14: n = [] for bond in atom.GetBonds(): if bond.GetBondType().name in ('DOUBLE', 'TRIPLE'): n.append(bond) else: pass if len(n) > 2: #this is a mess! log.info(f'Allene issue: {n} double bonds on {atom.GetSymbol()} atom {atom.GetIdx()}!') for bond in n: bond.SetBondType(Chem.BondType().SINGLE) elif len(n) == 2: # downgrade the higher bonded one! others = [a for bond in n for a in (bond.GetBeginAtom(), bond.GetEndAtom()) if a.GetIdx() != atom.GetIdx()] others = sorted(others, key=lambda atom: sum([b.GetBondTypeAsDouble() for b in atom.GetBonds()])) log.info(f'Allene removed between {atom.GetIdx()} and {[a.GetIdx() for a in others]}') mol.GetBondBetweenAtoms(atom.GetIdx(), others[-1].GetIdx()).SetBondType(Chem.BondType.SINGLE) else: pass else: continue return mol
def _categorise(self, mol: Chem.Mol, uniques: set) -> Dict[str, Union[set, Dict]]: """ What do the novel atoms do in terms of connectivity. Complicated dict output (called ``categories`` in the methods). Really ought to be SetProp of the atoms. * ``uniques`` are set of atoms to classify on * ``internals`` are unique atoms that are connected solely to unique atoms * ``attachments`` are non-unique atoms to which a unique atom connects * ``pairs`` is a dict of unique atom idx --> dict of ``idx`` --> attachment idx and ``type`` bond type. :param mol: molecule to describe :param uniques: set of indices that are new to this molecule :return: """ # pairs = {} internals = set() attachments = set() dummies = set() for i in uniques: unique_atom = mol.GetAtomWithIdx(i) if unique_atom.GetSymbol() == self.dummy_symbol: dummies.add(i) neighbours = {n.GetIdx() for n in unique_atom.GetNeighbors()} if len(neighbours - uniques) == 0: internals.add(i) else: i_attached = neighbours - uniques attachments |= i_attached pairs[i] = [{ 'idx': j, 'type': mol.GetBondBetweenAtoms(i, j).GetBondType() } for j in i_attached] anchors = uniques - internals if self._debug_draw: high = list(internals) + list(attachments) + list(anchors) color = { **{i: (0, 0.8, 0) for i in internals}, **{i: (0, 0, 0.8) for i in attachments}, **{i: (0.8, 0, 0.8) for i in anchors} } self.draw_nicely(mol, highlightAtoms=high, highlightAtomColors=color) return dict(uniques=uniques, internals=internals, attachments=attachments, pairs=pairs, dummies=dummies)
def process(mol: Mol, device: torch.device, **kwargs): n = mol.GetNumAtoms() + 1 # graph = DGLGraph() # graph.add_nodes(n) # graph.add_edges(graph.nodes(), graph.nodes()) # graph.add_edges(range(1, n), 0) a1 = [] a2 = [] cnt = 0 f_bonds = [] # for i in range(0,n): # a1.append(i) # a2.append(i) # cnt += 1 for i in range(1, n): a1.append(i) a2.append(0) cnt += 1 f_bonds.append([0] * feature.BOND_FDIM) # graph.add_edges(0, range(1, n)) for e in mol.GetBonds(): u, v = e.GetBeginAtomIdx(), e.GetEndAtomIdx() a1.append(u + 1) a2.append(v + 1) a1.append(v + 1) a2.append(u + 1) bond = mol.GetBondBetweenAtoms(u, v) f_bond = feature.bond_features(bond) f_bonds.append(f_bond) f_bonds.append(f_bond) cnt += 2 # graph.add_edge(u + 1, v + 1) # graph.add_edge(v + 1, u + 1) # adj = graph.adjacency_matrix(transpose=False).to_dense() edge_index = torch.tensor([a1, a2], dtype=torch.long, device=device) v, m = feature.mol_feature(mol) vec = torch.cat([ torch.zeros((1, m)), v ]).to(device) # edge_attr = torch.rand(cnt, feature.BOND_FDIM) edge_attr = torch.tensor(f_bonds, dtype=torch.float32, device=device) return MPNNData(n, vec, edge_index, cnt, edge_attr)
def _prevent_weird_rings(self, mol: Chem.Mol): if not isinstance(mol, Chem.RWMol): mol = Chem.RWMol(mol) ringatoms = self._get_ring_info(mol) #GetRingInfo().AtomRings() for ring_A, ring_B in itertools.combinations(ringatoms, r=2): shared = set(ring_A).intersection(set(ring_B)) if len(shared) == 0: log.debug('This molecule has some separate rings') pass # separate rings elif len(shared) == 1: log.debug('This molecule has a spiro bicycle') pass # spiro ring. elif len(shared) == 2: log.debug('This molecule has a fused ring') if mol.GetBondBetweenAtoms(*shared) is not None: pass # indole/naphtalene small, big = sorted([ring_A, ring_B], key=lambda ring: len(ring)) if len(small) == 4: log.warning('This molecule has a benzo-azetine–kind-of-thing: expanding to indole') # Chem.MolFromSmiles('C12CCCCC1CC2') # benzo-azetine is likely an error: add and extra atom a, b = set(small).difference(big) self._place_between(mol, a, b) elif len(small) == 3: log.warning('This molecule has a benzo-cyclopropane–kind-of-thing: expanding to indole') # Chem.MolFromSmiles('C12CCCCC1C2') # benzo-cyclopronane is actually impossible at this stage. a = list(set(small).difference(big))[0] for b in shared: self._place_between(mol, a, b) else: pass # indole and nathalene elif (len(ring_A), len(ring_B)) == (6, 6): raise Exception('This is utterly impossible') else: print(f'mysterious ring system {len(ring_A)} + {len(ring_B)}') pass # ???? elif len(shared) < self.atoms_in_bridge_cutoff: #adamantene/norbornane/tropinone kind of thing log.warning('This molecule has a bridge: leaving') pass # ideally check if planar... else: log.warning('This molecule has a bridge that will be removed') mol = self._prevent_bridge_ring(mol, ring_A) # start from scratch. return self._prevent_weird_rings(mol) return mol.GetMol()
def merge(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol, anchor_index: int, attachment_details: List[Dict]) -> Chem.Mol: for detail in attachment_details: attachment_index = detail['idx_F'] # fragmentanda attachment_index scaffold_attachment_index = detail['idx_S'] bond_type = detail['type'] f = Chem.FragmentOnBonds(fragmentanda, [ fragmentanda.GetBondBetweenAtoms(anchor_index, attachment_index).GetIdx() ], addDummies=False) frag_split = [] fragmols = Chem.GetMolFrags(f, asMols=True, fragsMolAtomMapping=frag_split, sanitizeFrags=False) if self._debug_draw: print(frag_split) # Get the fragment of interest. ii = 0 for mol_N, indices in enumerate(frag_split): if anchor_index in indices: break ii += len(indices) else: raise Exception frag = fragmols[mol_N] frag_anchor_index = indices.index(anchor_index) if self._debug_draw: self.draw_nicely(frag) combo = Chem.RWMol(rdmolops.CombineMols(scaffold, frag)) scaffold_anchor_index = frag_anchor_index + scaffold.GetNumAtoms() if self._debug_draw: print(scaffold_anchor_index, scaffold_attachment_index, anchor_index, scaffold.GetNumAtoms()) self.draw_nicely(combo) combo.AddBond(scaffold_anchor_index, scaffold_attachment_index, bond_type) Chem.SanitizeMol( combo, sanitizeOps=Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS + Chem.rdmolops.SanitizeFlags.SANITIZE_SETAROMATICITY, catchErrors=True) if self._debug_draw: self.draw_nicely(combo) scaffold = combo return scaffold
def total_bond_feature(mol: Mol) -> Tuple[torch.Tensor, int]: ''' Extract bond features. Returns: (feature_vec, feature_dim) ''' f_atoms = [atom for atom in mol.GetAtoms()] n_atoms = len(f_atoms) f_bonds = [[0] * BOND_FDIM] for a1 in range(n_atoms): for a2 in range(a1 + 1, n_atoms): bond = mol.GetBondBetweenAtoms(a1, a2) if bond is None: continue f_bond = bond_features(bond) f_bonds.append(f_bond) return torch.tensor(f_bonds), BOND_FDIM
def get_sub_rdmol(m: Mol, atomids: [int]): atoms_in_old_mol: [Atom] = [ a for a in m.GetAtoms() if a.GetIdx() in atomids ] atom_numbers = [a.GetAtomicNum() for a in atoms_in_old_mol] old_id_2_new_id = {} newid = 0 for oldatom in atoms_in_old_mol: old_id = oldatom.GetIdx() old_id_2_new_id[old_id] = newid newid += 1 mol = Chem.MolFromSmarts("[#" + str(atom_numbers[0]) + "]") rwmol = Chem.RWMol(mol) for s in atom_numbers[1:]: rwmol.AddAtom(Chem.Atom(s)) # print('new mol atom') # for a in rwmol.GetAtoms(): # print(a.GetIdx(), a.GetSymbol()) # print('--') for aini, ainj in combinations(atomids, 2): b = m.GetBondBetweenAtoms(aini, ainj) if isinstance(b, Bond): # iatom = m.GetAtomWithIdx(aini).GetSymbol() # jatom = m.GetAtomWithIdx(ainj).GetSymbol() # print('found bond {} {} - {} {}, {}'.format(iatom, aini, jatom, ainj, b.GetBondType())) bt = b.GetBondType() newi = old_id_2_new_id[aini] newj = old_id_2_new_id[ainj] rwmol.AddBond(newi, newj, bt) # newatomi = rwmol.GetAtomWithIdx(newi).GetSymbol() # newatomj = rwmol.GetAtomWithIdx(newj).GetSymbol() # print('added {} {} - {} {}'.format(newatomi, newi, newatomj, newj)) mol = rwmol.GetMol() return mol, old_id_2_new_id
def _prevent_conjoined_ring(self, mol: Chem.Mol) -> Chem.Mol: """ This kills bridging bonds with not atoms in the bridge within rings. So it is bridged, fused and spiro safe. It removes only one bond, so andamantane/norbornane are safe. :param mol: :return: """ c = Counter([i for ring in self._get_ring_info(mol) for i in ring]) nested = [k for k in c if c[k] >= 3] pairs = [(idx_a, idx_b) for idx_a, idx_b in itertools.combinations(nested, r=2) if mol.GetBondBetweenAtoms(idx_a, idx_b) is not None] rank = sorted(pairs, key=lambda x: c[x[0]] + c[x[1]], reverse=True) if len(rank) > 0: idx_a, idx_b = rank[0] if not isinstance(mol, Chem.RWMol): mol = Chem.RWMol(mol) mol.RemoveBond(idx_a, idx_b) # SetBoolProp('_IsRingBond') is not important log.info(f'Zero-atom bridged ring issue: bond between {idx_a}-{idx_b} removed') return self._prevent_conjoined_ring(mol) elif isinstance(mol, Chem.RWMol): return mol.GetMol() else: return mol
def _merge_part(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol, anchor_index: int, attachment_details: List[Dict], other_attachments: List[int], other_attachment_details: List[List[Dict]]) -> Chem.Mol: """ This does the messy work for merge_pair. :param scaffold: the Chem.Mol molecule onto whose copy the fragmentanda Chem.Mol gets added :param fragmentanda: The other Chem.Mol molecule :param anchor_index: the fragment-to-added's internal atom that attaches (hit indexed) :param attachment_details: see `_pre_fragment_pairs` or example below fo an entry :type attachment_details: List[Dict] :param other_attachments: :param other_attachment_details: :return: a new Chem.Mol molecule Details object example: [{'idx': 5, 'type': rdkit.Chem.rdchem.BondType.SINGLE, 'idx_F': 5, # fragmentanda index 'idx_S': 1 # scaffold index }], ...} """ # get bit to add. bonds_to_frag = [] for detail in attachment_details: attachment_index = detail['idx_F'] # fragmentanda attachment_index bonds_to_frag += [ fragmentanda.GetBondBetweenAtoms(anchor_index, attachment_index).GetIdx() ] bonds_to_frag += [ fragmentanda.GetBondBetweenAtoms(oi, oad[0]['idx_F']).GetIdx() for oi, oad in zip(other_attachments, other_attachment_details) ] f = Chem.FragmentOnBonds(fragmentanda, bonds_to_frag, addDummies=False) frag_split = [] fragmols = Chem.GetMolFrags(f, asMols=True, fragsMolAtomMapping=frag_split, sanitizeFrags=False) # Get the fragment of interest. ii = 0 for mol_N, indices in enumerate(frag_split): if anchor_index in indices: break ii += len(indices) else: raise Exception frag = fragmols[mol_N] frag_anchor_index = indices.index(anchor_index) # pre-emptively fix atom ori_i # offset collapsed to avoid clashes. self.offset(frag) # Experimental code. # TODO: finish! # frag_atom = frag.GetAtomWithIdx(frag_anchor_index) # old2future = {atom.GetIntProp('_ori_i'): atom.GetIdx() + scaffold.GetNumAtoms() for atom in frag.GetAtoms()} # del old2future[-1] # does nothing but nice to double tap # if frag_atom.GetIntProp('_ori_i') == -1: #damn. # for absent in self._get_mystery_ori_i(frag): # old2future[absent] = scaffold_attachment_index # self._renumber_original_indices(frag, old2future) combo = Chem.RWMol(rdmolops.CombineMols(scaffold, frag)) scaffold_anchor_index = frag_anchor_index + scaffold.GetNumAtoms() for detail in attachment_details: # scaffold_anchor_index : atom index in scaffold that needs to be added to scaffold_attachment_index # but was originally attached to attachment_index in fragmentanda. # the latter is not kept. attachment_index = detail['idx_F'] # fragmentanda attachment_index scaffold_attachment_index = detail[ 'idx_S'] # scaffold attachment index bond_type = detail['type'] combo.AddBond(scaffold_anchor_index, scaffold_attachment_index, bond_type) new_bond = combo.GetBondBetweenAtoms(scaffold_anchor_index, scaffold_attachment_index) # BondProvenance.set_bond(new_bond, '???') # self.transfer_ring_data(fragmentanda.GetAtomWithIdx(attachment_index), # combo.GetAtomWithIdx(scaffold_anchor_index)) for oi, oad in zip(other_attachments, other_attachment_details): bond_type = oad[0]['type'] scaffold_attachment_index = oad[0]['idx_S'] scaffold_anchor_index = indices.index(oi) + scaffold.GetNumAtoms() combo.AddBond(scaffold_anchor_index, scaffold_attachment_index, bond_type) new_bond = combo.GetBondBetweenAtoms(scaffold_anchor_index, scaffold_attachment_index) # BondProvenance.set_bond(new_bond, '???') Chem.SanitizeMol( combo, sanitizeOps=Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS + Chem.rdmolops.SanitizeFlags.SANITIZE_SETAROMATICITY, catchErrors=True) self._prevent_two_bonds_on_dummy(combo) scaffold = combo.GetMol() return scaffold
def collapse_ring(self, mol: Chem.Mol) -> Chem.Mol: """ Collapses a ring(s) into a single dummy atom(s). Stores data as JSON in the atom. :param mol: :return: """ self.store_positions(mol) mol = Chem.RWMol(mol) conf = mol.GetConformer() center_idxs = [] morituri = [] old2center = defaultdict(list) for atomset in mol.GetRingInfo().AtomRings(): morituri.extend(atomset) neighs = [] neighbonds = [] bonds = [] xs = [] ys = [] zs = [] elements = [] # add elemental ring c = mol.AddAtom(Chem.Atom('C')) center_idxs.append(c) central = mol.GetAtomWithIdx(c) name = mol.GetProp('_Name') if mol.HasProp('_Name') else '???' central.SetProp('_ori_name', name), # get data for storage for i in atomset: old2center[i].append(c) atom = mol.GetAtomWithIdx(i) neigh_i = [a.GetIdx() for a in atom.GetNeighbors()] neighs.append(neigh_i) bond = [mol.GetBondBetweenAtoms(i, j).GetBondType().name for j in neigh_i] bonds.append(bond) pos = conf.GetAtomPosition(i) xs.append(pos.x) ys.append(pos.y) zs.append(pos.z) elements.append(atom.GetSymbol()) # store data in elemental ring central.SetIntProp('_ori_i', -1) central.SetProp('_ori_is', json.dumps(atomset)) central.SetProp('_neighbors', json.dumps(neighs)) central.SetProp('_xs', json.dumps(xs)) central.SetProp('_ys', json.dumps(ys)) central.SetProp('_zs', json.dumps(zs)) central.SetProp('_elements', json.dumps(elements)) central.SetProp('_bonds', json.dumps(bonds)) conf.SetAtomPosition(c, Point3D(*[sum(axis) / len(axis) for axis in (xs, ys, zs)])) for atomset, center_i in zip(mol.GetRingInfo().AtomRings(), center_idxs): # bond to elemental ring central = mol.GetAtomWithIdx(center_i) neighss = json.loads(central.GetProp('_neighbors')) bondss = json.loads(central.GetProp('_bonds')) for neighs, bonds in zip(neighss, bondss): for neigh, bond in zip(neighs, bonds): if neigh not in atomset: bt = getattr(Chem.BondType, bond) if neigh not in morituri: mol.AddBond(center_i, neigh, bt) else: for other_center_i in old2center[neigh]: if center_i != other_center_i: if not mol.GetBondBetweenAtoms(center_i, other_center_i): mol.AddBond(center_i, other_center_i, bt) break else: raise ValueError(f'Cannot find what {neigh} became') for i in sorted(set(morituri), reverse=True): mol.RemoveAtom(self._get_new_index(mol, i)) return mol.GetMol()
def _merge_part(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol, anchor_index: int, attachment_details: List[Dict], other_attachments: List[int], other_attachment_details: List[List[Dict]]) -> Chem.Mol: """ This does the messy work for merge_pair. :param scaffold: :param fragmentanda: :param anchor_index: :param attachment_details: :param other_attachments: :param other_attachment_details: :return: """ # get bit to add. bonds_to_frag = [] for detail in attachment_details: attachment_index = detail['idx_F'] # fragmentanda attachment_index bonds_to_frag += [ fragmentanda.GetBondBetweenAtoms(anchor_index, attachment_index).GetIdx() ] bonds_to_frag += [ fragmentanda.GetBondBetweenAtoms(oi, oad[0]['idx_F']).GetIdx() for oi, oad in zip(other_attachments, other_attachment_details) ] if self._debug_draw and other_attachments: print('ring!', other_attachments) print('ring!', other_attachment_details) f = Chem.FragmentOnBonds(fragmentanda, bonds_to_frag, addDummies=False) frag_split = [] fragmols = Chem.GetMolFrags(f, asMols=True, fragsMolAtomMapping=frag_split, sanitizeFrags=False) if self._debug_draw: print('Fragment splits') print(frag_split) # Get the fragment of interest. ii = 0 for mol_N, indices in enumerate(frag_split): if anchor_index in indices: break ii += len(indices) else: raise Exception frag = fragmols[mol_N] frag_anchor_index = indices.index(anchor_index) # pre-emptively fix atom ori_i # offset collapsed to avoid clashes. self._offset_collapsed_ring(frag) self._offset_origins(frag) # Experimental code. # TODO: finish! # frag_atom = frag.GetAtomWithIdx(frag_anchor_index) # old2future = {atom.GetIntProp('_ori_i'): atom.GetIdx() + scaffold.GetNumAtoms() for atom in frag.GetAtoms()} # del old2future[-1] # does nothing but nice to double tap # if frag_atom.GetIntProp('_ori_i') == -1: #damn. # for absent in self._get_mystery_ori_i(frag): # old2future[absent] = scaffold_attachment_index # self._renumber_original_indices(frag, old2future) if self._debug_draw: print('Fragment to add') self.draw_nicely(frag) combo = Chem.RWMol(rdmolops.CombineMols(scaffold, frag)) scaffold_anchor_index = frag_anchor_index + scaffold.GetNumAtoms() if self._debug_draw: print('Pre-merger') print(scaffold_anchor_index, attachment_details, anchor_index, scaffold.GetNumAtoms()) self.draw_nicely(combo) for detail in attachment_details: attachment_index = detail['idx_F'] # fragmentanda attachment_index scaffold_attachment_index = detail['idx_S'] bond_type = detail['type'] combo.AddBond(scaffold_anchor_index, scaffold_attachment_index, bond_type) for oi, oad in zip(other_attachments, other_attachment_details): bond_type = oad[0]['type'] scaffold_attachment_index = oad[0]['idx_S'] scaffold_anchor_index = indices.index(oi) + scaffold.GetNumAtoms() combo.AddBond(scaffold_anchor_index, scaffold_attachment_index, bond_type) if self._debug_draw: print( f"Added additional {bond_type.name} bond between {scaffold_attachment_index} and {scaffold_anchor_index} " + \ f"(formerly {indices.index(oi)})") Chem.SanitizeMol( combo, sanitizeOps=Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS + Chem.rdmolops.SanitizeFlags.SANITIZE_SETAROMATICITY, catchErrors=True) if self._debug_draw: print('Merged') self.draw_nicely(combo) self._prevent_two_bonds_on_dummy(combo) scaffold = combo.GetMol() return scaffold