def _CDPLconfigForConformation( mol ): # TODO is this the right way to handle ligands for conf. generation? ''' PRIVAT METHOD configures a CDPL BasicMolecule for conformation generation. \n Input: \n mol (CDPL BasicMolecule): a CDPL BasicMolecule \n Return: \n (CDPL BasicMolecule): the configured input BasicMolecule ''' Chem.perceiveComponents(mol, False) Chem.perceiveSSSR(mol, False) Chem.setRingFlags(mol, False) Chem.calcImplicitHydrogenCounts(mol, False) Chem.perceiveHybridizationStates(mol, False) Chem.setAromaticityFlags(mol, False) Chem.calcCIPPriorities(mol, False) Chem.calcAtomCIPConfigurations(mol, False) Chem.calcBondCIPConfigurations(mol, False) Chem.calcAtomStereoDescriptors(mol, False) Chem.calcBondStereoDescriptors(mol, False) Chem.calcTopologicalDistanceMatrix(mol, False) Chem.generate2DCoordinates(mol, False) Chem.generateBond2DStereoFlags(mol, True)
def mol_to_sdf(molecules, path, multiconf=True): if not isinstance(molecules, Iterable): molecules = [molecules] w = Chem.FileSDFMolecularGraphWriter(path) Chem.setMultiConfExportParameter(w, multiconf) for mol in molecules: Chem.calcImplicitHydrogenCounts(mol, False) w.write(mol) w.close()
def setupMolecule(mol): Chem.perceiveComponents(mol, False) Chem.perceiveSSSR(mol, False) Chem.setRingFlags(mol, False) Chem.calcImplicitHydrogenCounts(mol, False) Chem.perceiveHybridizationStates(mol, False) Chem.setAromaticityFlags(mol, False) Chem.calcCIPPriorities(mol, False) Chem.calcAtomCIPConfigurations(mol, False) Chem.calcBondCIPConfigurations(mol, False)
def sanitize_mol(mol: Chem.BasicMolecule, makeHydrogenComplete=False) -> Chem.BasicMolecule: Chem.calcImplicitHydrogenCounts(mol, True) Chem.perceiveHybridizationStates(mol, True) Chem.perceiveComponents(mol, True) Chem.perceiveSSSR(mol, True) Chem.setRingFlags(mol, True) Chem.setAromaticityFlags(mol, True) if makeHydrogenComplete: Chem.makeHydrogenComplete(mol) Chem.calcImplicitHydrogenCounts(mol, True) Chem.generateHydrogen3DCoordinates(mol, True) return mol
def loadCDFMolecule(fname): mol = Chem.BasicMolecule() cdf_reader = Chem.FileCDFMoleculeReader(fname) if not cdf_reader.read(mol): return None Chem.calcImplicitHydrogenCounts(mol, False) Chem.perceiveHybridizationStates(mol, False) Chem.setAtomSymbolsFromTypes(mol, False) Chem.perceiveSSSR(mol, False) Chem.setRingFlags(mol, False) Chem.setAromaticityFlags(mol, False) return mol
def cdfMol_pdb(pdb, output, name): initial_time = time.time() cdf_mol = Chem.BasicMolecule() pdb_mol = Chem.BasicMolecule() pdb_str = open(pdb, 'r').read().replace('WAT', 'HOH').replace('HIE', 'HIS') pdb_reader = Biomol.PDBMoleculeReader(Base.StringIOStream(pdb_str)) Biomol.setPDBApplyDictAtomBondingToNonStdResiduesParameter( pdb_reader, True) if not pdb_reader.read(pdb_mol): return None Chem.calcImplicitHydrogenCounts(pdb_mol, False) Chem.perceiveHybridizationStates(pdb_mol, False) Chem.setAtomSymbolsFromTypes(pdb_mol, False) Chem.perceiveSSSR(pdb_mol, False) Chem.setRingFlags(pdb_mol, False) Chem.setAromaticityFlags(pdb_mol, False) cdf_mol.assign(pdb_mol) for atom in cdf_mol.atoms: Chem.set3DCoordinatesArray(atom, Math.Vector3DArray()) i = 0 while i < cdf_mol.numAtoms: Chem.get3DCoordinatesArray(cdf_mol.getAtom(i)).addElement( Chem.get3DCoordinates(pdb_mol.getAtom(i))) i += 1 tmp_output = output + name + ".cdf" try: Chem.FileCDFMolecularGraphWriter(tmp_output).write(cdf_mol) except: print('> Cdf_mol writing failure.') raise residues = Biomol.ResidueList(cdf_mol) tmp_output = output + name + "_residue_info.txt" with open(tmp_output, 'w') as txt_writer: txt_writer.write('residue name_resid_chain\n') for res in residues: res_id = getResidueID(res) txt_writer.write('{}: \n'.format(res_id)) calc_time = time.time() - initial_time print('> Cdf and amino acid residue number list files generated in {}s'. format(int(calc_time)))
def _CDPLcalcProteinProperties( pdb_mol): # TODO is this the right way to handle protein structures? ''' PRIVAT METHOD configures a CDPL BasicMolecule for a protein structure. Is used in the _CDPLextractProteinFragments method \n Input: \n pdb_mol (CDPL BasicMolecule): a CDPL BasicMolecule representing the protein structure \n ''' Chem.calcImplicitHydrogenCounts(pdb_mol, True) Chem.perceiveHybridizationStates(pdb_mol, True) Chem.makeHydrogenComplete(pdb_mol) Chem.setAtomSymbolsFromTypes(pdb_mol, False) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Chem.setRingFlags(pdb_mol, True) Chem.setAromaticityFlags(pdb_mol, True) Chem.generateHydrogen3DCoordinates(pdb_mol, True) Biomol.setHydrogenResidueSequenceInfo(pdb_mol, False)
def calculate_molecule_hashcode(mol, stereo=True): Chem.makeHydrogenDeplete(mol) Chem.calcImplicitHydrogenCounts(mol, True) if stereo: Chem.calcAtomStereoDescriptors(mol, True) Chem.calcBondStereoDescriptors(mol, True) Chem.calcCIPPriorities(mol, True) Chem.calcAtomCIPConfigurations(mol, True) Chem.calcBondCIPConfigurations(mol, True) return Chem.calcHashCode(mol) else: return Chem.calcHashCode(mol, atom_flags=Chem.AtomPropertyFlag.TYPE | Chem.AtomPropertyFlag.H_COUNT | Chem.AtomPropertyFlag.FORMAL_CHARGE | Chem.AtomPropertyFlag.AROMATICITY, bond_flags=Chem.BondPropertyFlag.ORDER | Chem.BondPropertyFlag.TOPOLOGY | Chem.BondPropertyFlag.AROMATICITY)
def mol_to_smiles(mol, kekulized=False, canonical=True, atom_stereo=True, hydrogen_deplete=True, bond_stereo=False): stream = Base.StringIOStream() w = Chem.SMILESMolecularGraphWriter(stream) Chem.setSMILESWriteKekuleFormParameter(w, kekulized) Chem.setSMILESWriteCanonicalFormParameter(w, canonical) Chem.setSMILESRecordFormatParameter(w, 'S') Chem.setSMILESWriteAtomStereoParameter(w, atom_stereo) Chem.setSMILESWriteBondStereoParameter(w, bond_stereo) Chem.setSMILESNoOrganicSubsetParameter(w, False) Chem.setOrdinaryHydrogenDepleteParameter(w, hydrogen_deplete) Chem.calcImplicitHydrogenCounts(mol, True) w.write(mol) w.close() return stream.value
def generate_ph(pdb, key): ifs = Base.FileIOStream(pdb, 'r') tlc = self.ligand_3_letter_code pdb_reader = Biomol.PDBMoleculeReader(ifs) pdb_mol = Chem.BasicMolecule() print '- Reading input: ', pdb, ' ...' if not pdb_reader.read(pdb_mol): print '!! Could not read input molecule' return print '- Processing macromolecule', pdb, ' ...' i = 0 while i < pdb_mol.getNumBonds(): bond = pdb_mol.getBond(i) if Chem.isMetal(bond.atoms[0]) or Chem.isMetal(bond.atoms[1]): pdb_mol.removeBond(i) else: i += 1 Chem.calcImplicitHydrogenCounts(pdb_mol, True) Chem.perceiveHybridizationStates(pdb_mol, True) Chem.makeHydrogenComplete(pdb_mol) Chem.setAtomSymbolsFromTypes(pdb_mol, False) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Biomol.setHydrogenResidueSequenceInfo(pdb_mol, False) Chem.setRingFlags(pdb_mol, True) Chem.setAromaticityFlags(pdb_mol, True) Chem.generateHydrogen3DCoordinates(pdb_mol, True) ligand = Chem.Fragment() print '- Extracting ligand ', tlc, ' ...' for atom in pdb_mol.atoms: if Biomol.getResidueCode(atom) == tlc: Biomol.extractResidueSubstructure(atom, pdb_mol, ligand, False) break if ligand.numAtoms == 0: print '!! Could not find ligand', tlc, 'in input file' return Chem.perceiveSSSR(ligand, True) lig_env = Chem.Fragment() Biomol.extractEnvironmentResidues(ligand, pdb_mol, lig_env, 7.0) Chem.perceiveSSSR(lig_env, True) print '- Constructing pharmacophore ...' lig_pharm = Pharm.BasicPharmacophore() env_pharm = Pharm.BasicPharmacophore() pharm_gen = Pharm.DefaultPharmacophoreGenerator(False) pharm_gen.generate(ligand, lig_pharm) pharm_gen.generate(lig_env, env_pharm) analyzer = Pharm.DefaultInteractionAnalyzer() interactions = Pharm.FeatureMapping() analyzer.analyze(lig_pharm, env_pharm, interactions) #------------------------- XVOLS int_env_ftrs = Pharm.FeatureSet() Pharm.getFeatures(int_env_ftrs, interactions, False) int_core_ftrs = Pharm.FeatureSet() Pharm.getFeatures(int_core_ftrs, interactions, True) int_pharm = Pharm.BasicPharmacophore(int_core_ftrs) for ftr in int_env_ftrs: if Pharm.getType( ftr ) == Pharm.FeatureType.H_BOND_DONOR or Pharm.getType( ftr) == Pharm.FeatureType.H_BOND_ACCEPTOR: Pharm.setTolerance(ftr, 1.0) else: Pharm.setTolerance(ftr, 1.5) Pharm.createExclusionVolumes(int_pharm, int_env_ftrs, 0.0, 0.1, False) int_env_ftr_atoms = Chem.Fragment() Pharm.getFeatureAtoms(int_env_ftrs, int_env_ftr_atoms) int_residue_atoms = Chem.Fragment() Biomol.extractResidueSubstructures(int_env_ftr_atoms, lig_env, int_residue_atoms, True) Chem.makeHydrogenDeplete(int_residue_atoms) def isAlphaAtom(atom): return Biomol.getResidueAtomName(atom) == 'CA' Chem.removeAtomsIfNot(int_residue_atoms, isAlphaAtom) Pharm.createExclusionVolumes(int_pharm, int_residue_atoms, Chem.Atom3DCoordinatesFunctor(), 1.0, 2.0, False) features_in_ph = [] for int_ftr in int_pharm: if Pharm.hasSubstructure(int_ftr) == False: continue elif ftype_names[Pharm.getType(int_ftr)] == 'XV': continue feature_id = generate_key(int_ftr) features_in_ph.append(str(feature_id)) self.unique_feature_vector.add(str(feature_id)) int_pharm.fv = features_in_ph int_pharm.path_to_pdb = pdb return int_pharm
sdf_reader = Chem.SDFMoleculeReader(struct_is) mol = Chem.BasicMolecule() xlogp_calc = Chem.XLogPCalculator() histo = Math.DVector() histo.resize(Chem.XLogPCalculator.FEATURE_VECTOR_SIZE) Chem.setMultiConfImportParameter(sdf_reader, False) while sdf_reader.read(mol): exp_logp = float(exp_logp_is.readline()) Chem.perceiveComponents(mol, False) Chem.perceiveSSSR(mol, False) Chem.setRingFlags(mol, False) Chem.calcImplicitHydrogenCounts(mol, False) Chem.perceiveHybridizationStates(mol, False) Chem.setAromaticityFlags(mol, False) Chem.calcTopologicalDistanceMatrix(mol, False) xlogp_calc.calculate(mol) histo += xlogp_calc.getFeatureVector() mlr_model.addXYData(xlogp_calc.getFeatureVector(), exp_logp) mlr_model.buildModel() mlr_model.calcStatistics() print('Model Statistics:', file=sys.stderr) print('----------------------------------', file=sys.stderr)
def generate_ph(pdb, args, df_constructor, ts): ifs = Base.FileIOStream(pdb, 'r') tlc = args.ligand_three_letter_code pdb_reader = Biomol.PDBMoleculeReader(ifs) pdb_mol = Chem.BasicMolecule() print '- Reading input: ', pdb, ' ...' if not pdb_reader.read(pdb_mol): print '!! Could not read input molecule' return print '- Processing macromolecule', pdb, ' ...' i = 0 while i < pdb_mol.getNumBonds(): bond = pdb_mol.getBond(i) if Chem.isMetal(bond.atoms[0]) or Chem.isMetal(bond.atoms[1]): pdb_mol.removeBond(i) else: i += 1 for a in pdb_mol.atoms: Chem.setImplicitHydrogenCount(a, 0) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Chem.perceiveHybridizationStates(pdb_mol, True) Chem.makeHydrogenComplete(pdb_mol) Chem.setAtomSymbolsFromTypes(pdb_mol, False) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Biomol.setHydrogenResidueSequenceInfo(pdb_mol, False) Chem.setRingFlags(pdb_mol, True) Chem.setAromaticityFlags(pdb_mol, True) Chem.generateHydrogen3DCoordinates(pdb_mol, True) Chem.calcFormalCharges(pdb_mol, True) ligand = Chem.Fragment() print '- Extracting ligand ', tlc, ' ...' for atom in pdb_mol.atoms: if Biomol.getResidueCode(atom) == tlc: Biomol.extractResidueSubstructure(atom, pdb_mol, ligand, False) break if ligand.numAtoms == 0: print '!! Could not find ligand', tlc, 'in input file' return Chem.perceiveSSSR(ligand, True) lig_env = Chem.Fragment() Biomol.extractEnvironmentResidues(ligand, pdb_mol, lig_env, 7.0) Chem.perceiveSSSR(lig_env, True) print '- Constructing pharmacophore ...' lig_pharm = Pharm.BasicPharmacophore() env_pharm = Pharm.BasicPharmacophore() pharm_gen = Pharm.DefaultPharmacophoreGenerator(True) pharm_gen.generate(ligand, lig_pharm) pharm_gen.generate(lig_env, env_pharm) #Pharm.FilePMLFeatureContainerWriter('./test/lig_ph_' + str(ts) + '.pml').write(lig_pharm) analyzer = Pharm.DefaultInteractionAnalyzer() interactions = Pharm.FeatureMapping() analyzer.analyze(lig_pharm, env_pharm, interactions) df_constructor, interaction_at_ts = outputInteractions( lig_pharm, env_pharm, interactions, df_constructor) #Chem.FileSDFMolecularGraphWriter('./test/ligand_' + str(ts) + '.sdf').write(ligand) return df_constructor, interaction_at_ts
def processMolecule(mol, stats): modified = False if NEUTRALIZE: modified = Chem.neutralize(mol) Chem.perceiveComponents(mol, False) Chem.perceiveSSSR(mol, False) Chem.setRingFlags(mol, False) Chem.calcImplicitHydrogenCounts(mol, False) Chem.perceiveHybridizationStates(mol, False) Chem.setAromaticityFlags(mol, False) comps = Chem.getComponents(mol) if comps.getSize() > 1 and KEEP_ONLY_LARGEST_COMP: largest_comp = None for comp in comps: if largest_comp is None: largest_comp = comp elif comp.getNumAtoms() > largest_comp.getNumAtoms(): largest_comp = comp Chem.perceiveComponents(largest_comp, False) Chem.perceiveSSSR(largest_comp, False) Chem.setName(largest_comp, Chem.getName(mol)) print('Removed Components from Molecule ' + str(stats.read) + ': ' + generateSMILES(mol) + ' ' + Chem.getName(mol), file=sys.stderr) modified = True if Chem.hasName(mol): Chem.setName(largest_comp, Chem.getName(mol)) if Chem.hasStructureData(mol): Chem.setStructureData(largest_comp, Chem.getStructureData(mol)) mol = largest_comp if Chem.getHeavyAtomCount(mol) < MIN_HEAVY_ATOM_COUNT: return None if REMOVE_FLUORINATED and Chem.getAtomCount( mol, Chem.AtomType.F) > FLUOR_ATOM_COUNT: return None carbon_seen = False for atom in mol.atoms: atom_type = Chem.getType(atom) invalid_type = True for valid_type in VALID_ATOM_TYPES: if Chem.atomTypesMatch(valid_type, atom_type): invalid_type = False break if invalid_type: return None if atom_type == Chem.AtomType.C: carbon_seen = True if CARBON_ATOMS_MANDATORY and carbon_seen == False: return None if modified: stats.modified += 1 return mol