def edit_mol(rmol, edits, tatoms): new_mol = Chem.RWMol(rmol) [a.SetNumExplicitHs(0) for a in new_mol.GetAtoms()] amap = {} for atom in rmol.GetAtoms(): amap[atom.GetAtomMapNum() - 1] = atom.GetIdx() for x, y, t, v in edits: bond = new_mol.GetBondBetweenAtoms(amap[x], amap[y]) # a1 = new_mol.GetAtomWithIdx(amap[x]) # a2 = new_mol.GetAtomWithIdx(amap[y]) if bond is not None: new_mol.RemoveBond(amap[x], amap[y]) if t > 0: new_mol.AddBond(amap[x], amap[y], BOND_FLOAT_TO_TYPE[t]) pred_mol = new_mol.GetMol() pred_smiles = Chem.MolToSmiles(pred_mol) pred_list = pred_smiles.split('.') pred_mols = [] for pred_smiles in pred_list: mol = Chem.MolFromSmiles(pred_smiles) if mol is None: continue atom_set = set([atom.GetAtomMapNum() - 1 for atom in mol.GetAtoms()]) if len(atom_set & tatoms) == 0: continue for atom in mol.GetAtoms(): atom.SetAtomMapNum(0) pred_mols.append(mol) return '.'.join( sorted([Chem.MolToSmiles(pred_mol) for pred_mol in pred_mols]))
def __init__(self, smiles=None, rdk=None, conv_enabled=False): """Constructor Keyword Arguments: smiles {str} -- SMILES representation of a molecule (default: {None}) rdk {rdkit Mol} -- molecule as an RDKit object (default: {None}) conv_enabled {bool} -- whether to set both smiles and graph arguments here or lazily defer until called (default: {False}) Raises: ValueError -- if neither a correct smiles string or a rdkit mol are provided """ if conv_enabled: if isinstance(smiles, str): # also checks if smiles can be parsed rdk = Chem.MolFromSmiles(smiles) assert rdk is not None elif rdk is not None: smiles = Chem.MolToSmiles(rdk) else: raise ValueError("Invalid arguments") self.smiles = smiles self.rdk = rdk self.graph = None # should be obtained from rdk when needed self.synthesis_path = [] # list of Reactions self.begin_flag = True
def processMols(mols): print('smiles\tName\tsa_score') for i, m in enumerate(mols): if m is None: continue s = calculateScore(m) smiles = Chem.MolToSmiles(m) print(smiles + "\t" + m.GetProp('_Name') + "\t%3f" % s)
def sanitize_smiles(smi, largest_fragment=False): mol = Chem.MolFromSmiles(smi) if mol is None: return smi try: mol = standardizer.standardize( mol) # standardize functional group reps if largest_fragment: mol = standardizer.largest_fragment( mol) # remove product counterions/salts/etc. mol = standardizer.uncharge( mol) # neutralize, e.g., carboxylic acids except Exception: pass return Chem.MolToSmiles(mol)
def to_smiles(self): smiles = self.smiles if self.smiles is None: self.smiles = Chem.MolToSmiles(self.rdk) return self.smiles
def edit_mol(rmol, edits): new_mol = Chem.RWMol(rmol) # Keep track of aromatic nitrogens, might cause explicit hydrogen issues aromatic_nitrogen_idx = set() aromatic_carbonyl_adj_to_aromatic_nH = {} aromatic_carbondeg3_adj_to_aromatic_nH0 = {} for a in new_mol.GetAtoms(): if a.GetIsAromatic() and a.GetSymbol() == 'N': aromatic_nitrogen_idx.add(a.GetIdx()) for nbr in a.GetNeighbors(): if a.GetNumExplicitHs() == 1 and nbr.GetSymbol( ) == 'C' and nbr.GetIsAromatic() and any( b.GetBondTypeAsDouble() == 2 for b in nbr.GetBonds()): aromatic_carbonyl_adj_to_aromatic_nH[ nbr.GetIdx()] = a.GetIdx() elif a.GetNumExplicitHs() == 0 and nbr.GetSymbol( ) == 'C' and nbr.GetIsAromatic() and len(nbr.GetBonds()) == 3: aromatic_carbondeg3_adj_to_aromatic_nH0[ nbr.GetIdx()] = a.GetIdx() else: a.SetNumExplicitHs(0) new_mol.UpdatePropertyCache() amap = {} for atom in rmol.GetAtoms(): amap[atom.GetIntProp('molAtomMapNumber')] = atom.GetIdx() # Apply the edits as predicted for x, y, t in edits: bond = new_mol.GetBondBetweenAtoms(amap[x], amap[y]) a1 = new_mol.GetAtomWithIdx(amap[x]) a2 = new_mol.GetAtomWithIdx(amap[y]) if bond is not None: new_mol.RemoveBond(amap[x], amap[y]) # Are we losing a bond on an aromatic nitrogen? if bond.GetBondTypeAsDouble() == 1.0: if amap[x] in aromatic_nitrogen_idx: if a1.GetTotalNumHs() == 0: a1.SetNumExplicitHs(1) elif a1.GetFormalCharge() == 1: a1.SetFormalCharge(0) elif amap[y] in aromatic_nitrogen_idx: if a2.GetTotalNumHs() == 0: a2.SetNumExplicitHs(1) elif a2.GetFormalCharge() == 1: a2.SetFormalCharge(0) # Are we losing a c=O bond on an aromatic ring? If so, remove H from adjacent nH if appropriate if bond.GetBondTypeAsDouble() == 2.0: if amap[x] in aromatic_carbonyl_adj_to_aromatic_nH: new_mol.GetAtomWithIdx( aromatic_carbonyl_adj_to_aromatic_nH[ amap[x]]).SetNumExplicitHs(0) elif amap[y] in aromatic_carbonyl_adj_to_aromatic_nH: new_mol.GetAtomWithIdx( aromatic_carbonyl_adj_to_aromatic_nH[ amap[y]]).SetNumExplicitHs(0) if t > 0: new_mol.AddBond(amap[x], amap[y], BOND_TYPE[t]) # Special alkylation case? if t == 1: if amap[x] in aromatic_nitrogen_idx: if a1.GetTotalNumHs() == 1: a1.SetNumExplicitHs(0) else: a1.SetFormalCharge(1) elif amap[y] in aromatic_nitrogen_idx: if a2.GetTotalNumHs() == 1: a2.SetNumExplicitHs(0) else: a2.SetFormalCharge(1) # Are we getting a c=O bond on an aromatic ring? If so, add H to adjacent nH0 if appropriate if t == 2: if amap[x] in aromatic_carbondeg3_adj_to_aromatic_nH0: new_mol.GetAtomWithIdx( aromatic_carbondeg3_adj_to_aromatic_nH0[ amap[x]]).SetNumExplicitHs(1) elif amap[y] in aromatic_carbondeg3_adj_to_aromatic_nH0: new_mol.GetAtomWithIdx( aromatic_carbondeg3_adj_to_aromatic_nH0[ amap[y]]).SetNumExplicitHs(1) # Tried: # bonds_to_remove.sort(key=lambda x: x[0], reverse=True) # for (idx, bond) in bonds_to_remove: # start = bond.GetBeginAtomIdx() # end = bond.GetEndAtomIdx() # new_mol.RemoveBond(start, end) # pred_mol = new_mol.GetMol() pred_mol = new_mol.GetMol() # Clear formal charges to make molecules valid # Note: because S and P (among others) can change valence, be more flexible for atom in pred_mol.GetAtoms(): atom.ClearProp('molAtomMapNumber') if atom.GetSymbol() == 'N' and atom.GetFormalCharge( ) == 1: # exclude negatively-charged azide bond_vals = sum( [bond.GetBondTypeAsDouble() for bond in atom.GetBonds()]) if bond_vals <= 3: atom.SetFormalCharge(0) elif atom.GetSymbol() == 'N' and atom.GetFormalCharge( ) == -1: # handle negatively-charged azide addition bond_vals = sum( [bond.GetBondTypeAsDouble() for bond in atom.GetBonds()]) if bond_vals == 3 and any( [nbr.GetSymbol() == 'N' for nbr in atom.GetNeighbors()]): atom.SetFormalCharge(0) elif atom.GetSymbol() == 'N': bond_vals = sum( [bond.GetBondTypeAsDouble() for bond in atom.GetBonds()]) if bond_vals == 4 and not atom.GetIsAromatic( ): # and atom.IsInRingSize(5)): atom.SetFormalCharge(1) elif atom.GetSymbol() == 'C' and atom.GetFormalCharge() != 0: atom.SetFormalCharge(0) elif atom.GetSymbol() == 'O' and atom.GetFormalCharge() != 0: bond_vals = sum( [bond.GetBondTypeAsDouble() for bond in atom.GetBonds()]) + atom.GetNumExplicitHs() if bond_vals == 2: atom.SetFormalCharge(0) elif atom.GetSymbol() in ['Cl', 'Br', 'I', 'F' ] and atom.GetFormalCharge() != 0: bond_vals = sum( [bond.GetBondTypeAsDouble() for bond in atom.GetBonds()]) if bond_vals == 1: atom.SetFormalCharge(0) elif atom.GetSymbol() == 'S' and atom.GetFormalCharge() != 0: bond_vals = sum( [bond.GetBondTypeAsDouble() for bond in atom.GetBonds()]) if bond_vals in [2, 4, 6]: atom.SetFormalCharge(0) elif atom.GetSymbol( ) == 'P': # quartenary phosphorous should be pos. charge with 0 H bond_vals = [ bond.GetBondTypeAsDouble() for bond in atom.GetBonds() ] if sum(bond_vals) == 4 and len(bond_vals) == 4: atom.SetFormalCharge(1) atom.SetNumExplicitHs(0) elif sum(bond_vals) == 3 and len( bond_vals) == 3: # make sure neutral atom.SetFormalCharge(0) elif atom.GetSymbol( ) == 'B': # quartenary boron should be neg. charge with 0 H bond_vals = [ bond.GetBondTypeAsDouble() for bond in atom.GetBonds() ] if sum(bond_vals) == 4 and len(bond_vals) == 4: atom.SetFormalCharge(-1) atom.SetNumExplicitHs(0) elif atom.GetSymbol() in ['Mg', 'Zn']: bond_vals = [ bond.GetBondTypeAsDouble() for bond in atom.GetBonds() ] if sum(bond_vals) == 1 and len(bond_vals) == 1: atom.SetFormalCharge(1) elif atom.GetSymbol() == 'Si': bond_vals = [ bond.GetBondTypeAsDouble() for bond in atom.GetBonds() ] if sum(bond_vals) == len(bond_vals): atom.SetNumExplicitHs(max(0, 4 - len(bond_vals))) # Bounce to/from SMILES to try to sanitize pred_smiles = Chem.MolToSmiles(pred_mol) # <--- TODO: error occurs here pred_list = pred_smiles.split('.') pred_mols = [Chem.MolFromSmiles(pred_smiles) for pred_smiles in pred_list] for i, mol in enumerate(pred_mols): # Check if we failed/succeeded in previous step if mol is None: logging.debug('##### Unparseable mol: {}'.format(pred_list[i])) continue # Else, try post-sanitiztion fixes in structure mol = Chem.MolFromSmiles(Chem.MolToSmiles(mol)) if mol is None: continue for rxn in clean_rxns_postsani: out = rxn.RunReactants((mol, )) if out: try: Chem.SanitizeMol(out[0][0]) pred_mols[i] = Chem.MolFromSmiles( Chem.MolToSmiles(out[0][0])) except Exception as e: print(e) print('Could not sanitize postsani reaction product: {}'. format(Chem.MolToSmiles(out[0][0]))) print('Original molecule was: {}'.format( Chem.MolToSmiles(mol))) pred_smiles = [ Chem.MolToSmiles(pred_mol) for pred_mol in pred_mols if pred_mol is not None ] return pred_smiles
thisrow.append(r) thisrow.append(p) # Save pbond information pbonds = {} for bond in pmol.GetBonds(): a1 = idxfunc(bond.GetBeginAtom()) a2 = idxfunc(bond.GetEndAtom()) t = bond_types.index(bond.GetBondType()) pbonds[(a1, a2)] = pbonds[(a2, a1)] = t + 1 for atom in pmol.GetAtoms(): atom.ClearProp('molAtomMapNumber') psmiles = Chem.MolToSmiles(pmol) psmiles_sani = set(sanitize_smiles(psmiles, True).split('.')) psmiles = set(psmiles.split('.')) thisrow.append('.'.join(psmiles)) thisrow.append('.'.join(psmiles_sani)) ########### Use *true* edits to try to recover product if opts.bonds_as_doubles: cbonds = [] for gedit in gedits.split(';'): x, y, t = gedit.split('-') x, y, t = int(x), int(y), float(t) cbonds.append((x, y, bond_types_as_double[t])) else: