def process_by_folder(fd, inpath): cycle = fd.strip("cycle_") sd = inpath + '/' + fd + '/ranked_designs.sd' if os.path.exists(sd): cir_mols = [PropertyMol(m) for m in Chem.SDMolSupplier(sd)] for m in cir_mols: # Calculate properties for each mol m.SetProp('Cycle', cycle) m.SetProp('MolWeight', str(MolWt(m))) m.SetProp('LogP', str(LogP(m))) m.SetProp('QED', str(QED(m))) m.SetProp('SAS', str(SAS(m))) # Select the highest score design in the cycle # (the first one in the ranked sd file) best_mol = cir_mols[0] return cir_mols, best_mol
def __read_sdf(fname, input_format, id_field_name=None, sanitize=True): if input_format == 'sdf': suppl = Chem.SDMolSupplier(fname, sanitize=sanitize) elif input_format == 'sdf.gz': suppl = Chem.ForwardSDMolSupplier(gzip.open(fname), sanitize=sanitize) else: return for mol in suppl: if mol is not None: if id_field_name is not None: mol_title = mol.GetProp(id_field_name) else: if mol.GetProp("_Name"): mol_title = mol.GetProp("_Name") else: mol_title = Chem.MolToSmiles(mol, isomericSmiles=True) yield PropertyMol(mol), mol_title
def to_rdkit_mol(mol_repr, molid=None, instantiator=Chem.MolFromSmiles, to2D=False, to3D=False, toPropertyMol=False, keep_Hs=True): """ Converts a molecular representation (e.g. smiles string) into an RDKit molecule. Allows to perform common postprocessing operations on the resulting molecule. """ if not isinstance(mol_repr, Chem.Mol): mol = instantiator(mol_repr) else: mol = mol_repr if mol is None: if molid is None: logger.warning('RDKit cannot create a molecule from %r' % mol_repr) else: logger.warning('RDKit cannot create molecule %s from %r' % (molid, mol_repr)) return mol if to3D: # Note: this fails often due to ETDG not able to digest properly stereochemistry # (use non isomeric smiles usually helps) AllChem.AddHs(mol) if -1 == AllChem.EmbedMolecule(mol, randomSeed=0): raise Exception( 'Could not embed molecule, blame stereochemistry? do not use isomeric smiles?' ) AllChem.UFFOptimizeMolecule(mol) if not keep_Hs: AllChem.RemoveHs(mol) elif to2D: AllChem.Compute2DCoords(mol) if toPropertyMol: return PropertyMol(mol) return mol
print( "iter= " + str(iterCount) + " fullPopObj= " + str(round(fullPopObj, 3)) + " topPopObj= ", str(round(topPopObj, 3)) + " finalPopObj= " + str(round(finalPopObj, 3)) + " minObj= " + str(round(minObj, 3))) if (abs(finalPopObj) < abs(minObj)): minObj = finalPopObj if (args.statePickleFile != None): if (not os.path.isdir(os.path.dirname(args.statePickleFile))): os.makedirs(os.path.dirname(args.statePickleFile)) if (args.statePickleFile.endswith(".gz")): pickle.dump( ([PropertyMol(m) for m in actives], [PropertyMol(m) for m in inactives], aa_D_ref, ii_D_ref, ai_D_ref, iterCount, pop, minObj), gzip.open(args.statePickleFile, "wb"), protocol=pickle.HIGHEST_PROTOCOL) else: pickle.dump( ([PropertyMol(m) for m in actives], [PropertyMol(m) for m in inactives], aa_D_ref, ii_D_ref, ai_D_ref, iterCount, pop, minObj), open(args.statePickleFile, "wb"), protocol=pickle.HIGHEST_PROTOCOL) writePop(finalPop, actives, inactives, iterCount) if (abs(finalPopObj) < FINAL_POP_OBJ): break
def construct_ligand(fragment_ids, bond_ids, fragment_library): """ Construct a ligand by connecting multiple fragments based on a Combination object Parameters ---------- fragment_ids: list of str Fragment IDs of recombined ligand, e.g. `["SE_2", "AP_0", "FP_2"]` (`<subpocket>_<fragment index in subpocket pool>`). bond_ids : list of list of str Bond IDs of recombined ligand, e.g. `[["FP_6", "AP_10"], ["AP_11", "SE_13"]]`: Atom (`<subpocket>_<atom ID>`) pairs per fragment bond. fragment_library : dict of pandas.DataFrame SMILES and RDKit molecules for fragments (values) per subpocket (key). Returns ------- ligand: rdkit.Chem.rdchem.Mol or None Recombined ligand (or None if the ligand could not be constructed) """ fragments = [] for fragment_id in fragment_ids: # Get subpocket and fragment index in subpocket subpocket = fragment_id[:2] fragment_index = int(fragment_id[3:]) fragment = fragment_library[subpocket].ROMol_original[fragment_index] # Store unique atom identifiers in original molecule (important for recombined ligand construction based on atom IDs) fragment = Chem.RemoveHs(fragment) for i, atom in enumerate(fragment.GetAtoms()): fragment_atom_id = f"{subpocket}_{i}" atom.SetProp("fragment_atom_id", fragment_atom_id) atom.SetProp("fragment_id", fragment.GetProp("complex_pdb")) fragment = PropertyMol(fragment) # Append fragment to list of fragments fragments.append(fragment) # Combine fragments using map-reduce model combo = reduce(Chem.CombineMols, fragments) bonds_matching = True ed_combo = Chem.EditableMol(combo) replaced_dummies = [] for bond in bond_ids: dummy_1 = next(atom for atom in combo.GetAtoms() if atom.GetProp("fragment_atom_id") == bond[0]) dummy_2 = next(atom for atom in combo.GetAtoms() if atom.GetProp("fragment_atom_id") == bond[1]) atom_1 = dummy_1.GetNeighbors()[0] atom_2 = dummy_2.GetNeighbors()[0] # check bond types bond_type_1 = combo.GetBondBetweenAtoms(dummy_1.GetIdx(), atom_1.GetIdx()).GetBondType() bond_type_2 = combo.GetBondBetweenAtoms(dummy_2.GetIdx(), atom_2.GetIdx()).GetBondType() if bond_type_1 != bond_type_2: bonds_matching = False break ed_combo.AddBond(atom_1.GetIdx(), atom_2.GetIdx(), order=bond_type_1) replaced_dummies.extend([dummy_1.GetIdx(), dummy_2.GetIdx()]) # Do not construct this ligand if bond types are not matching if not bonds_matching: return # Remove replaced dummy atoms replaced_dummies.sort(reverse=True) for dummy in replaced_dummies: ed_combo.RemoveAtom(dummy) ligand = ed_combo.GetMol() # Replace remaining dummy atoms with hydrogens du = Chem.MolFromSmiles("*") h = Chem.MolFromSmiles("[H]", sanitize=False) ligand = AllChem.ReplaceSubstructs(ligand, du, h, replaceAll=True)[0] try: ligand = Chem.RemoveHs(ligand) except ValueError: print(Chem.MolToSmiles(ligand)) return # Clear properties for prop in ligand.GetPropNames(): ligand.ClearProp(prop) for atom in ligand.GetAtoms(): atom.ClearProp("fragment_atom_id") # Generate 2D coordinates AllChem.Compute2DCoords(ligand) return ligand