def create_dataset(args): from rdkit import Chem filename = os.path.join('dataset', '%s.pth' % (args.dataset)) inputfile = os.path.join('dataset', '%s.txt' % (args.dataset)) dataset = [] # Load a dataset. with open(inputfile, 'r') as f: lines = f.readlines() for index, line in enumerate(lines, 1): smiles, property = line.strip('\n').split(' ') # Exclude the data contains '.' in its smiles. if '.' in smiles: continue # Create each data with the above defined functions. mol = Chem.AddHs(Chem.MolFromSmiles(smiles)) atoms = create_atoms(mol, atom_dict) molecular_size = len(atoms) ij_bond_dict = create_ij_bond_dict(mol, bond_dict) fingerprints = extract_fingerprints(args.radius, atoms, ij_bond_dict, fingerprint_dict, edge_dict) adjacency = Chem.GetAdjacencyMatrix(mol) #print(fingerprints.shape, adjacency.shape, molecular_size, property) #print(fingerprints, adjacency, molecular_size, property) dataset.append((fingerprints, adjacency, molecular_size, property)) print('\r%s: %5d/%5d' % (filename, index, len(lines)), end='') print() return dataset
def generate_conformations(m, n): mol = Chem.AddHs(m) ids=ids = AllChem.EmbedMultipleConfs(mol,numConfs=n,useExpTorsionAnglePrefs=True,useBasicKnowledge=True) #ids=ids = AllChem.EmbedMultipleConfs(mol,numConfs=n) results ={} for i in ids: try: if Chem.rdForceFieldHelpers.MMFFHasAllMoleculeParams(mol): #print("MMFF") ff = AllChem.MMFFGetMoleculeForceField(mol, AllChem.MMFFGetMoleculeProperties(mol), confId=i) ff.Initialize() ff.CalcEnergy() if MAP_paths.MINI_Iterations > 0: AllChem.MMFFOptimizeMolecule(mol, confId=i) results[i] = ff.CalcEnergy() elif Chem.rdForceFieldHelpers.UFFHasAllMoleculeParams(mol): #print("UFF") ff = AllChem.UFFGetMoleculeForceField(mol, confId=i) ff.Initialize() ff.CalcEnergy() if MAP_paths.MINI_Iterations > 0: AllChem.UFFOptimizeMolecule(mol, confId=i) results[i] = ff.CalcEnergy() else: print(">> ERROR: missing force field parameters for atom(s) in your molecule.") return except: print(">> ERROR: something went wrong in force field minimization.") return return mol, results
def generate_3d_structure(smi, obabel=1): """ Method to generate the 3D coordinates of a molecule from its smiles The default code is OpenBabel, RDKit can also be used. """ structure = [] if obabel: # use OpenBabel obmol = pybel.readstring('smi', smi) obmol.OBMol.AddHydrogens() obmol.make3D() bond = np.zeros((len(obmol.atoms), len(obmol.atoms)), dtype=int) for i in range(len(obmol.atoms)): for j in range(len(obmol.atoms)): if not obmol.OBMol.GetBond(i + 1, j + 1) is None: order = obmol.OBMol.GetBond(i + 1, j + 1).GetBO() bond[i][j] = order for at in obmol.atoms: pos = at.coords sym = num_to_syms[at.atomicnum] structure += [sym, pos[0], pos[1], pos[2]] return obmol, structure, bond else: # use RDKit rdmol = Chem.AddHs(Chem.MolFromSmiles(smi)) AllChem.EmbedMolecule(rdmol, AllChem.ETKDG()) AllChem.MMFFOptimizeMolecule(rdmol) atoms = rdmol.GetAtoms() bond = np.zeros((len(atoms), len(atoms)), dtype=int) for i in range(len(rdmol.GetAtoms())): for j in range(len(rdmol.GetAtoms())): if not rdmol.GetBondBetweenAtoms(i, j) is None: b = rdmol.GetBondBetweenAtoms(i, j) order = int(b.GetBondTypeAsDouble()) bond[i][j] = order for i, atom in enumerate(rdmol.GetAtoms()): pos = rdmol.GetConformer(0).GetAtomPosition(i) sym = atom.GetSymbol() structure += [sym, pos.x, pos.y, pos.z] return rdmol, structure, bond
def xyzfromsmi(smi): mol = Chem.MolFromSmiles(smi) mol = Chem.AddHs(mol) AllChem.EmbedMolecule(mol) Chem.Kekulize(mol, clearAromaticFlags=True) #print(smi, mol.GetNumAtoms()) try: mol_ = Chem.RemoveHs(mol) with open('fragment_lookup/tmp.mol', "w") as FILE: FILE.write(Chem.MolToMolBlock(mol_)) xyz_coordinates=list() #print(Chem.MolToMolBlock(mol)) bashCommand = 'obabel -imol fragment_lookup/tmp.mol -oxyz --gen3d -xb' process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) output, error = process.communicate() output=[x.split(' ') for x in output.decode("utf-8").split("\n")[2::] ] for i, x_ in enumerate(output): #vprint(x_,len(x_)) if len(x_) > 3: xyz_coordinates.append([float(x) for x in x_[1::] if len(x) > 0]) except: xyz_coordinates=list() #print(Chem.MolToMolBlock(mol)) with open('tmp','w') as FILE: FILE.write(smi) bashCommand = 'obabel -ismi tmp -oxyz --gen3d -xb ' process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) output, error = process.communicate() #print(output) output=[x.split(' ') for x in output.decode("utf-8").split("\n")[2::] ] for i, x_ in enumerate(output): #print(x_,len(x_)) if len(x_) > 3: xyz_coordinates.append([float(x) for x in x_[1::] if len(x) > 0]) #print(coords) atoms = [atom2label(atom.GetSymbol()) for atom in mol.GetAtoms()] #print(atoms) return atoms, xyz_coordinates
def get_charge(mol, property_name, do_charge): from rdkit.Chem import AllChem as Chem err = 0 # partial charges if do_charge is False: err = check_mol(mol, property_name, do_charge) if err == 0: # prepares molecule mol = Chem.RemoveHs(mol) n_at = mol.GetNumAtoms() # takes properties list_prop = mol.GetPropsAsDict() # extracts the property according to the set name string_values = list_prop[property_name] string_values = string_values.split("\n") w = np.asarray(map(float, string_values)) else: mol = Chem.AddHs(mol) n_at = mol.GetNumAtoms() w = np.ones((n_at, 1)) / n_at # same format as previous calculation w = np.asarray(map(float, w)) property_name = 'equal_w' err = 0 # extract properties for atom in range(n_at): mol.GetAtomWithIdx(atom).SetDoubleProp(property_name, w[atom]) mol = Chem.RemoveHs(mol) # Gasteiger-Marsili Charges elif (do_charge is True) and (err is 0): Chem.ComputeGasteigerCharges(mol) err = check_mol(mol, property_name, do_charge) return mol, property_name, err
def calc_props_dekois(smiles): # Create RDKit mol try: mol = Chem.MolFromSmiles(smiles) mol = Chem.AddHs(mol) # Calculate properties and store in dict prop_dict = {} # molweight prop_dict.update({'mol_wg': Descriptors.MolWt(mol)}) # logP prop_dict.update({'log_p': Chem.Crippen.MolLogP(mol)}) # HBA prop_dict.update( {'hba': Chem.rdMolDescriptors.CalcNumLipinskiHBA(mol)}) # HBD prop_dict.update( {'hbd': Chem.rdMolDescriptors.CalcNumLipinskiHBD(mol)}) # aromatic ring count prop_dict.update( {'ring_ct': Chem.rdMolDescriptors.CalcNumAromaticRings(mol)}) # rotatable bonds prop_dict.update( {'rot_bnds': Chem.rdMolDescriptors.CalcNumRotatableBonds(mol)}) # Formal charges pos, neg = calc_charges(mol) prop_dict.update({'pos_charge': pos}) prop_dict.update({'neg_charge': neg}) prop_array = [ prop_dict['mol_wg'], prop_dict['log_p'], prop_dict['hba'], prop_dict['hbd'], prop_dict['ring_ct'], prop_dict['rot_bnds'], prop_dict['pos_charge'], prop_dict['neg_charge'] ] return (prop_dict, prop_array) except: return ({}, [0, 0, 0, 0, 0, 0, 0, 0])
def getLDI(mol): """ ################################################################# Calculation of local dipole index (D) ################################################################# """ Hmol = Chem.AddHs(mol) GMCharge.ComputeGasteigerCharges(Hmol, iter_step) res = [] for atom in Hmol.GetAtoms(): charge = float(atom.GetProp('_GasteigerCharge')) if not math.isnan(charge) and not charge == numpy.inf: res.append(charge) else: res.append(0.0) cc = [ numpy.absolute(res[x.GetBeginAtom().GetIdx()] - res[x.GetEndAtom().GetIdx()]) for x in Hmol.GetBonds() ] B = len(Hmol.GetBonds()) if B == 0: return 0.0 return round(sum(cc) / B, 6)
def get_mol(xyz, smiles, with_conformer=True): """ Get an RDKit mol from an xyz and smiles. Note that this assumes that the xyz is ordered in the same way an RDKit object of the same smiles would be ordered, and that there is no change in connectivity between the RDKit mol and the xyz. Args: xyz (torch.Tensor): atom type and xyz of geometry. smiles (str): SMILES string with_conformer (bool): also add conformer to the RDKit mol Returns: mol (rdkit.Chem.rdchem.Mol): RDKit mol object """ mol = Chem.MolFromSmiles(smiles) mol = Chem.AddHs(mol) if with_conformer: conformer = get_conformer(xyz) mol.AddConformer(conformer) return mol
def opt_mol_from_smi(smi, save_name): """ opt mol from smiles """ mol = Chem.MolFromSmiles(smi) mol = Chem.AddHs(mol) AllChem.EmbedMolecule(mol, randomSeed=3) AllChem.MMFFOptimizeMolecule(mol) mol = Chem.rdmolops.RemoveHs(mol) writer = Chem.SDWriter(save_name) mol.SetProp('_Name', 'Chemistry ' + save_name) mol.SetProp('STEREOCHEM', str(1.00)) mol.SetProp('EF', str(1.00)) mol.SetProp('MOL_WEIGHT', str(1.00)) mol.SetProp('COMPOUND_ID', str(1.00)) mol.SetProp('SUPPLIER', str(1.00)) mol.SetProp('COMMEN', str(1.00)) writer.write(mol) return save_name + ".mol"
def create_3d_sdf_from_smiles(smiles_zincid_dict, decoy_sdf): ''' create 3DF sdf files using rdkit :param smiles_zincid_dict: dictionary of smiles and molecule name :param decoy_sdf: 3d sdf :return: combined sdf file ''' writer = Chem.SDWriter(decoy_sdf) for k, v in smiles_zincid_dict.items(): #print(v) try: mol = Chem.MolFromSmiles(k) molH = Chem.AddHs(mol) AllChem.EmbedMolecule(molH, useRandomCoords=True) AllChem.UFFOptimizeMolecule(molH) molH.SetProp("_Name", "ZIN"+v) writer.write(molH) except: print(("no conformer for Zin" + "{}").format(v)) writer.close()
def build_from_smiles(smiles_pattern, protonate=False, openff_compatible=True): mol = Chem.MolFromSmiles(smiles_pattern, sanitize=False) flags = Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_SETAROMATICITY if not protonate: flags ^= Chem.SanitizeFlags.SANITIZE_ADJUSTHS Chem.SanitizeMol(mol, flags) if protonate: mol = Chem.AddHs(mol) Chem.SetAromaticity(mol, Chem.AromaticityModel.AROMATICITY_MDL) Chem.SanitizeMol(mol, Chem.SanitizeFlags.SANITIZE_SETAROMATICITY) if openff_compatible: # Needed since the TK doesn't like float bond orders # This means it will essentially throw out the : specifier? Chem.Kekulize(mol) return mol
def add_prot_Hs(rdmol): """ Add hydrogens to molecules read from PDB Makes sure that the hydrogens get the correct PDBResidue info """ retmol = Chem.AddHs(rdmol, addCoords=True) for atom in retmol.GetAtoms(): if atom.GetPDBResidueInfo() is None and atom.GetSymbol() == "H": bond = atom.GetBonds()[0] if bond.GetBeginAtom().GetIdx() == atom.GetIdx: connected_atom = bond.GetEndAtom() else: connected_atom = bond.GetBeginAtom() try: ResInfo = connected_atom.GetPDBResidueInfo() atom.SetMonomerInfo(ResInfo) except: print( "Hydrogen annotation failed:", connected_atom.GetIdx(), atom.GetIdx(), ) return retmol
def multi_prods(mol_list, rxn, debug=False): prod1_list = [] prod2_list = [] prod3_list = [] prod4_list = [] for mol in mol_list: if debug: print(MolToSmiles(mol)) try: mol.UpdatePropertyCache() FastFindRings(mol) except: print('This mol fails! ' + MolToSmiles(mol)) # print('This mol fails! ' +mol) continue products = rxn.RunReactants((Chem.AddHs(mol), )) if products != (): for prod in products: prod1_list.append(prod[0]) prod2_list.append(prod[1]) prod3_list.append(prod[2]) prod4_list.append(prod[3]) return prod1_list, prod2_list, prod3_list, prod4_list
def CalculateHydrogenNumber(mol): """ ################################################################# Calculation of Number of Hydrogen in a molecule ---->nhyd Usage: result=CalculateHydrogenNumber(mol) Input: mol is a molecule object. Output: result is a numeric value. ################################################################# """ i = 0 Hmol = Chem.AddHs(mol) for atom in Hmol.GetAtoms(): if atom.GetAtomicNum() == 1: i = i + 1 return i
def convert_sugar_forms(molecule): rxn1 = '[O:1]1[C:2]([C:8])[C:3][C:4][C:5][C:6]1[O:7][H]>>[H][O:1][C:2]([C:8])[C:3][C:4][C:5][C:6]=[O:7]' rxn2 = '[O:6]1[C:2]([O:7][H])([C:1])[C:3][C:4][C:5]1>>[C:1][C:2](=[O:7])[C:3][C:4][C:5][O:6][H]' rxn3 = '[O:1]1[C:2]([H:8])[C:3][C:4][C:5][C:6]1[O:7][H]>>[H][O:1][C:2]([H:8])[C:3][C:4][C:5][C:6]=[O:7]' rxn4 = '[O:6]1[C:2]([O:7][H])([H:1])[C:3][C:4][C:5]1>>[H:1][C:2](=[O:7])[C:3][C:4][C:5][O:6][H]' sugarrxns = [AllChem.ReactionFromSmarts(rxn1), AllChem.ReactionFromSmarts(rxn2), AllChem.ReactionFromSmarts(rxn3), AllChem.ReactionFromSmarts(rxn4)] rxnproducts = [molecule] seen = set() seen.add(Chem.MolToSmiles(Chem.RemoveHs(molecule), isomericSmiles=True)) for sugarrxn in sugarrxns: prods = sugarrxn.RunReactants((molecule,)) for p in traverse(prods): smilesprod = Chem.MolToSmiles(p, isomericSmiles=True) if smilesprod not in seen: pmol = Chem.MolFromSmiles(smilesprod) pmol = Chem.AddHs(pmol) seen.add(smilesprod) rxnproducts.append(pmol) return rxnproducts
def prepare_smiles_and_mol(self, mol): """Prepare `smiles` and `mol` used in following preprocessing. This method is called before `get_input_features` is called, by parser class. This method may be overriden to support custom `smile`/`mol` extraction Args: mol (mol): mol instance Returns (tuple): (`smiles`, `mol`) """ # Note that smiles expression is not unique. # we obtain canonical smiles which is unique in `mol` canonical_smiles = Chem.MolToSmiles(mol, isomericSmiles=False, canonical=True) mol = Chem.MolFromSmiles(canonical_smiles) if self.add_Hs: mol = Chem.AddHs(mol) if self.kekulize: Chem.Kekulize(mol) return canonical_smiles, mol
def calculate_and_write_fp(title, mol, outf, _type, n, nbits, compress): if _type == 1: #ecfp fp = AllChem.GetMorganFingerprintAsBitVect(mol, n, nBits=nbits) bit_string = fp.ToBitString() elif _type == 2: #fcfp fp = AllChem.GetMorganFingerprintAsBitVect(mol, n, nBits=nbits, useFeatures=True) bit_string = fp.ToBitString() elif _type == 3: #MACCS mol = Chem.AddHs(mol) fp = MACCSkeys.GenMACCSKeys(mol) bit_string = fp.ToBitString()[1:] else: pass outf.write(title) if compress: outf.write("\t" + bit_string) else: for bit in bit_string: outf.write("\t" + bit) outf.write("\n")
def _featurize(self, mol, smiles): """ Featurizes a compound as described in the paper cited above. :param mol: :param smiles: :return: """ mol = Chem.AddHs(mol) # Consider hydrogens. # Process each fragment in the compound separately and join the fingerprints of all fragments to form the # fingerprint of the compound/molecule. # We think this provides a better handling of SMILES with '.' in them (Disconnected structures) # The original codes of the aforecited paper removes all such samples. fragments = Chem.GetMolFrags(mol, asMols=True) frag_fingerprints = [] for frag_mol in fragments: atoms = create_atoms(self, frag_mol) i_jbond_dict = create_ijbonddict(self, frag_mol) fingerprints = extract_fingerprints(self, atoms, i_jbond_dict, self.radius) frag_fingerprints.append(fingerprints) fingerprints = np.concatenate(frag_fingerprints) adjacency = create_adjacency(mol) return GnnMol(mol, fingerprints, adjacency, smiles)
def parse_smiles(smiles): # load in rdkit from rdkit import Chem from rdkit.Chem import AllChem # construct rdkir object m = Chem.MolFromSmiles(smiles) m2 = Chem.AddHs(m) AllChem.EmbedMolecule(m2) # parse mol file and obtain E & G lines = Chem.MolToMolBlock(m2).split('\n') E = [] G = [] for line in lines: fields = line.split() if len(fields) > 5 and fields[0] != 'M' and fields[-1] != 'V2000': E += [fields[3]] geo = [float(x) for x in fields[:3]] G += [geo] G = np.array(G) return E, G
def make_conformers( smiles_path, output_dir, num_confs=300, rms_thresh=1.5, ): smiles_string = smiles_from_path(smiles_path) m = Chem.MolFromSmiles(smiles_string) m2 = Chem.AddHs(m) cids = AllChem.EmbedMultipleConfs( m2, numConfs=num_confs, pruneRmsThresh=rms_thresh, ) m3 = Chem.RemoveHs(m2) for i, conformer in enumerate(cids): output_conformer( m3, conformer, output_dir / "{}.pdb".format(i), ) return len(cids), output_dir
def from_smarts(smarts, nconfs=1, name=None, forcefield=None, rms=0.1): """ Generates PLAMS molecule(s) from a smarts strings. This allows for example to define hydrogens explicitly. However it is less suitable for aromatic molecules (use from_smiles in that case). :parameter str smarts: A smarts string :parameter int nconfs: Number of conformers to be generated :parameter str name: A name for the molecule :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization and ranking of comformations. The default value None results in skipping of the geometry optimization step. :parameter float rms: Root Mean Square deviation threshold for removing similar/equivalent conformations. :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1 :rtype: |Molecule| or list of PLAMS Molecules """ smiles = str(smarts.split()[0]) mol = Chem.MolFromSmarts(smiles) Chem.SanitizeMol(mol) molecule = Chem.AddHs(mol) molecule.SetProp('smiles', smiles) return get_conformations(molecule, nconfs, name, forcefield, rms)
def test0AddHds(self): mol = Chem.MolFromSmiles("CC") conf = Chem.Conformer(1) conf.SetAtomPosition(0, Point3D(-0.5, 0.0, 0.0)) conf.SetAtomPosition(1, Point3D(1.0, 0.0, 0.0)) cid = mol.AddConformer(conf) conf2 = mol.GetConformer() self.assertTrue(conf2.GetNumAtoms() == 2) nmol = Chem.AddHs(mol, 0, 1) conf3 = nmol.GetConformer() self.assertTrue(conf3.GetNumAtoms() == 8) self.assertTrue(conf2.GetNumAtoms() == 2) targetCoords = [[-0.5, 0.0, 0.0], [1.0, 0.0, 0.0], [-0.8667, 0.0, 1.03709], [-0.8667, 0.8981, -0.5185], [-0.8667, -0.8981, -0.5185], [1.3667, 0.0, -1.0371], [1.36667, 0.8981, 0.5185], [1.36667, -0.8981, 0.5185]] for i in range(8): pt = conf3.GetAtomPosition(i) self.assertTrue(ptEq(pt, Point3D(*tuple(targetCoords[i]))))
def canon(df, idx): print('trying to canonize smile for idx: {}'.format(idx)) try: smile = df.loc[idx]['smiles'] m = Chem.MolFromSmiles(smile) m = Chem.AddHs(m) c_smile = Chem.MolToSmiles(m) df.loc[idx, 'c_smiles'] = c_smile df.loc[idx, 'status'] = 0 formula = CalcMolFormula(m) if 'Cl' in formula: formula = formula.replace('Cl', '') formula = formula + 'Cl' df.loc[idx, 'Formula'] = formula except Exception as e: df.loc[idx, 'status'] = -2 print("could not convert smile {} of molecule {} : {}".format( smile, idx, df.loc[idx]['Name'])) print('Exception: {}'.format(e)) return df
def generate_conformers(self, savefolder, savename="molecule_conformers", filetype="pdb", savefolder_exist_ok=False, num_confs=400): """ Generates ligand conformer and saves the results to a folder. Parameters ---------- savefolder: str Path to directory where the results will be saved savename: str Name of the generated files. example filename: <savename>_1.pdb filetype: str must be 'pdb' or 'mol2' savefolder_exist_ok: bool if false returns an error if savefolder already exsits Nconformers: int Number of conforer to generate. """ from rdkit.Chem import AllChem os.makedirs(savefolder, exist_ok=savefolder_exist_ok) mol = deepcopy(self._mol) mol = Chem.AddHs(mol) ids = AllChem.EmbedMultipleConfs(mol, numConfs=num_confs, pruneRmsThresh=1., maxAttempts=10000) for id in ids: AllChem.UFFOptimizeMolecule(mol, confId=id) for index, id in enumerate(ids): if filetype == "pdb": chemwrite = Chem.PDBWriter elif filetype == "sdf": chemwrite = Chem.SDWriter else: raise ValueError("Unknown file format. Cannot save to format '{}'".format(filetype)) writer = chemwrite(os.path.join(savefolder, '{}_{}.{}'.format(savename, index + 1, filetype))) writer.write(mol, confId=id)
def to_conformers(rdm, nconfs): """ Generate molecular geometries for a set of conformers from am RDKit molecule object. Currently not removing redundant conformers. :param rdm: molecule object :type rdm: RDKit molecule object :param nconfs: number of conformers to generate :type nconfs: int :rtype: automol geometry data structure """ rdm = _rd_chem.AddHs(rdm) atms = rdm.GetAtoms() natms = len(rdm.GetAtoms()) geos = [] if natms == 1: syms = [str(atms[0].GetSymbol()).title()] xyzs = [(0., 0., 0.)] geos.append( automol.create.geom.from_data(syms, xyzs, angstrom=True)) else: cids = _rd_all_chem.EmbedMultipleConfs(rdm, numConfs=nconfs) res = _rd_all_chem.MMFFOptimizeMoleculeConfs(rdm) energies = list(zip(*res))[1] for cid in cids: syms = tuple(str(rda.GetSymbol()).title() for rda in atms) xyzs = tuple(map(tuple, rdm.GetConformer(cid).GetPositions())) geos.append( automol.create.geom.from_data(syms, xyzs, angstrom=True)) # Sort geometries using the energies geos = [ x for _, x in sorted(zip(energies, geos), key=lambda pair: pair[0]) ] return geos
def mol_to_hg(mol, kekulize, add_Hs): """ get a bipartite representation of a molecule. Parameters ---------- mol : rdkit.Chem.rdchem.Mol molecule object kekulize : bool kekulize or not add_Hs : bool add implicit hydrogens to the molecule or not. Returns ------- Hypergraph """ if add_Hs: mol = Chem.AddHs(mol) if kekulize: Chem.Kekulize(mol) bipartite_g = mol_to_bipartite(mol, kekulize) hg = Hypergraph() for each_atom in [ each_node for each_node in bipartite_g.nodes() if each_node.startswith('atom_') ]: node_set = set([]) for each_bond in bipartite_g.adj[each_atom]: hg.add_node(each_bond, attr_dict=bipartite_g.node[each_bond]['bond_attr']) node_set.add(each_bond) hg.add_edge(node_set, attr_dict=bipartite_g.node[each_atom]['atom_attr']) return hg
def generate_structures(vae, smi, char_to_index, limit=1e4, write=False): rdkit_mols = [] temps = [] iterations = [] iteration = limit_counter = 0 while True: iteration += 1 limit_counter += 1 t = random.random() * 2 candidate = decode_smiles(vae, smi, char_to_index, temp=t).split(" ")[0] try: sampled = Chem.MolFromSmiles(candidate) cation = Chem.AddHs(sampled) Chem.EmbedMolecule(cation, Chem.ETKDG()) Chem.UFFOptimizeMolecule(cation) cation = Chem.RemoveHs(cation) candidate = Chem.MolToSmiles(cation) if candidate not in rdkit_mols: temps.append(t) iterations.append(iteration) rdkit_mols.append(candidate) limit_counter = 0 df = pd.DataFrame([rdkit_mols, temps, iterations]).T df.columns = ['smiles', 'temperature', 'iteration'] print(df) except: pass if limit_counter > limit: break if write: df = pd.DataFrame([rdkit_mols, temps, iterations]).T df.columns = ['smiles', 'temperature', 'iteration'] pd.DataFrame.to_csv(df, path_or_buf='{}.csv'.format(write), index=False) return df
def from_smiles(cls, smiles_input, neutralize_molecule=True): """ A SMILES-string is used to generate the Molecules InChI-string and it's graph. """ # molecule instantiation and adding the smiles-string: start_time = time.perf_counter() mdh_mol = cls() # adding InChI-string to the molecule (for finding entries in database) and canonicalization of SMILES: molecule = Chem.MolFromSmiles(smiles_input) # neutralize atoms if neutralize_molecule: molecule = neutralize_atoms(molecule) mdh_mol.inchi = AllChem.MolToInchi( molecule) # TODO: producing warning, generate inchi's separately? mdh_mol.smiles = Chem.MolToSmiles(molecule) # generate 3D atomic coordinates based on RDKit's EKTD-method: molecule = Chem.AddHs(molecule) AllChem.EmbedMolecule(molecule, randomSeed=0xF00D) molblock = Chem.MolToMolBlock(molecule) # add metadata: mdh_mol.coordinate_metadata[0] = rdkit.__name__ mdh_mol.coordinate_metadata[1] = rdkit.__version__ mdh_mol.coordinate_metadata[2] = "EKTG" # TODO: change to ETKDG mdh_mol.coordinate_metadata[4] = datetime.datetime.utcnow() # 5-tuple: [software, version, method, calc.-time, datetime] # self.coordinate_metadata = [None, None, None, None, None] # generate/extract cartesian coordinates and add them to the molecule: mdh_mol.molblock_data_extraction(molblock, rdkit_molblock=True) mdh_mol.coordinate_metadata[3] = round( time.perf_counter() - start_time, 3) return mdh_mol
def MinimizeMolecule(Mol, MolCount, Writer): "Minimize moleculer and write it out" if OptionsInfo["AddHydrogens"]: Mol = Chem.AddHs(Mol) Status = 0 try: if OptionsInfo["UseUFF"]: Status = AllChem.UFFOptimizeMolecule( Mol, maxIters=OptionsInfo["MaxIters"]) elif OptionsInfo["UseMMFF"]: Status = AllChem.MMFFOptimizeMolecule( Mol, maxIters=OptionsInfo["MaxIters"]) else: MiscUtil.PrintError( "Minimization couldn't be performed: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"]) except RuntimeError as ErrMsg: MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning( "Minimization couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg)) return False if Status != 0: MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning( "Minimization failed to converge for molecule %s in %d steps. Try using higher value for \"--maxIters\" option...\n" % (MolName, OptionsInfo["MaxIters"])) if OptionsInfo["RemoveHydrogens"]: Mol = Chem.RemoveHs(Mol) Writer.write(Mol) return True
def fragment_mol(smi, smi_id=''): mol = Chem.MolFromSmiles(smi) outlines = set() if mol is None: sys.stderr.write("Can't generate mol for: %s\n" % smi) else: # heavy atoms frags = rdMMPA.FragmentMol(mol, pattern="[!#1]!@!=!#[!#1]", maxCuts=4, resultsAsMols=False, maxCutBonds=30) frags += rdMMPA.FragmentMol(mol, pattern="[!#1]!@!=!#[!#1]", maxCuts=3, resultsAsMols=False, maxCutBonds=30) frags = set(frags) for core, chains in frags: output = '%s,%s,%s,%s\n' % (smi, smi_id, core, chains) outlines.add(output) # hydrogen splitting mol = Chem.AddHs(mol) n = mol.GetNumAtoms() - mol.GetNumHeavyAtoms() if n < 60: frags = rdMMPA.FragmentMol(mol, pattern="[#1]!@!=!#[!#1]", maxCuts=1, resultsAsMols=False, maxCutBonds=100) for core, chains in frags: output = '%s,%s,%s,%s\n' % (smi, smi_id, core, chains) outlines.add(output) return outlines