def process(): if len(sys.argv) < 4: print('Usage:', sys.argv[0], 'training-set.sdf logP-data regression-coeff-file', file=sys.stderr) sys.exit(2) struct_is = Base.FileIOStream(sys.argv[1], 'r') exp_logp_is = Base.FileIOStream(sys.argv[2], 'r') coeff_os = Base.FileIOStream(sys.argv[3], 'w')
def process(): if len(sys.argv) < 4: print('Usage:', sys.argv[0], '[training-set.sdf] [logS-data] [regression coeff. output file]', file=sys.stderr) sys.exit(2) struct_is = Base.FileIOStream(sys.argv[1], 'r') exp_logs_is = Base.FileIOStream(sys.argv[2], 'r') coeff_os = Base.FileIOStream(sys.argv[3], 'w')
def process(): if len(sys.argv) < 4: print('Usage:', sys.argv[0], '[input.sdf] [exclude-molecules.sdf] [output.sdf]', file=sys.stderr) sys.exit(2) ifs = Base.FileIOStream(sys.argv[1], 'r') xifs = Base.FileIOStream(sys.argv[2], 'r') ofs = Base.FileIOStream(sys.argv[3], 'w') reader = Chem.SDFMoleculeReader(ifs) xreader = Chem.SDFMoleculeReader(xifs) writer = Chem.SDFMolecularGraphWriter(ofs) mol = Chem.BasicMolecule() Chem.setMultiConfImportParameter(reader, False) Chem.setMultiConfImportParameter(xreader, False) Chem.setMultiConfExportParameter(writer, False) stats = Stats() stats.read = 0 stats.dropped = 0 xhashes = set() while xreader.read(mol): setupMolecule(mol) hashcode = Chem.calcHashCode(mol) xhashes.add(hashcode) while reader.read(mol): #print('Processing Molecule ' + str(stats.read) setupMolecule(mol) hashcode = Chem.calcHashCode(mol) if hashcode in xhashes: stats.dropped += 1 print('Dropped Molecule ' + str(stats.read) + ': ' + Chem.generateSMILES(mol) + ' ' + Chem.getName(mol), file=sys.stderr) else: writer.write(mol) stats.read += 1 if stats.read % 10000 == 0: print('Processed ' + str(stats.read) + ' Molecules...', file=sys.stderr) print('', file=sys.stderr) print('-- Summary --', file=sys.stderr) print('Molecules processed: ' + str(stats.read), file=sys.stderr) print('Molecules dropped: ' + str(stats.dropped), file=sys.stderr)
def read_in_ph(ph_path, output_dir_path): fr = Pharm.PMLPharmacophoreReader(Base.FileIOStream(ph_path)) ph = Pharm.BasicPharmacophore() fr.read(ph) ph.pml_path = ph_path ph.dir_path = output_dir_path return ph
def savePharmacophore(pha, path): ''' Saves a particula pha at the target path.\n Input:\n pha (CDPL BasicPharmacophore): the pharmacophore to be saved as a pml file \n path (String): path where to save the pml file (includes the filename.pml) ''' Pharm.PMLFeatureContainerWriter(Base.FileIOStream(path, 'w')).write(pha) return True
def write_ph_for_rpms(self, rpm_maps, output_directory): for fv in rpm_maps: directory = output_directory + '/' + str(fv) if not os.path.exists(directory): os.makedirs(directory) for ph_key in rpm_maps[fv]: ph_to_write = directory + '/ph_' + str(fv) + '_' + str( ph_key) + '.pml' print '- Writing pharmacophore: ' + str(ph_to_write) Pharm.PMLFeatureContainerWriter( Base.FileIOStream(ph_to_write, 'w')).write(rpm_maps[ph_key])
def CDPLphaFromPML(pml_path): ''' reads a single CDPL BasicPharmacophore from an pml-file. Input: \n pml_path (string): path to the pml file \n Return: \n (CDPL BasicPharmacophore): the corresponding CDPL BasicPharmacophore ''' pha = Pharm.BasicPharmacophore() ifs = Base.FileIOStream(pml_path, 'r') pml_reader = Pharm.PMLPharmacophoreReader(ifs) if not pml_reader.read(pha): log.error("COULD NOT READ PML", pml_path) return False return pha
def CDPLmolFromSdf(sdf_path, conformation): ''' generates a single CDPL Molecule from an sdf-file. If conformations is true, then one random conformation will be generated. \n Input: \n sdf_path (string): path to the sdf file \n conformation (boolean): generates one 3d conformation according to MMFF94 \n Return: \n (CDPL BasicMolecule): the corresponding CDPL BasicMolecule ''' mol = Chem.BasicMolecule() ifs = Base.FileIOStream(sdf_path, 'r') sdf_reader = Chem.SDFMoleculeReader(ifs) if not sdf_reader.read(mol): log.error("COULD NOT READ SDF", sdf_path) return False if conformation: return _CDPLgenerateConformation(mol) return mol
def _CDPLreadFromPDBFile(pdb_file): ''' PRIVAT METHOD reads a pdb file and is used by the CDPLreadProteinFile method. Input: \n pdb_file (string): the path to the pdb file \n Return: \n (CDPL BasicMolecule): the corresponding pdb molecule ''' ifs = Base.FileIOStream(pdb_file, 'r') pdb_reader = Biomol.PDBMoleculeReader(ifs) pdb_mol = Chem.BasicMolecule() Biomol.setPDBApplyDictAtomBondingToNonStdResiduesParameter( pdb_reader, False ) #TODO Should this be there for the pdb readin? or also in the config? if not pdb_reader.read(pdb_mol): log.error("COULD NOT READ PDB", pdb_file) return False return pdb_mol
def process(sdf_file, psd_file_path): ifs = Base.FileIOStream(sdf_file, 'r') reader = Chem.SDFMoleculeReader(ifs) mol = Chem.BasicMolecule() Chem.setMultiConfImportParameter(reader, True) psd_creator = Pharm.PSDScreeningDBCreator( psd_file_path, Pharm.PSDScreeningDBCreator.CREATE, True) i = 0 t0 = time.clock() while reader.read(mol): setupMolecule(mol) psd_creator.process(mol) i += 1 if i % 100 == 0: print 'Processed ' + str(i) + ' molecules (' + str( time.clock() - t0), 's elapsed)...' t0 = time.clock() mol.clear() print '' print '-- Summary --' print 'Molecules processed: ' + str(psd_creator.numProcessed) print 'Molecules rejected: ' + str(psd_creator.numRejected) print 'Molecules deleted: ' + str(psd_creator.numDeleted) print 'Molecules inserted: ' + str(psd_creator.numInserted) psd_creator.close()
def generate_ph(pdb, key): ifs = Base.FileIOStream(pdb, 'r') tlc = self.ligand_3_letter_code pdb_reader = Biomol.PDBMoleculeReader(ifs) pdb_mol = Chem.BasicMolecule() print '- Reading input: ', pdb, ' ...' if not pdb_reader.read(pdb_mol): print '!! Could not read input molecule' return print '- Processing macromolecule', pdb, ' ...' i = 0 while i < pdb_mol.getNumBonds(): bond = pdb_mol.getBond(i) if Chem.isMetal(bond.atoms[0]) or Chem.isMetal(bond.atoms[1]): pdb_mol.removeBond(i) else: i += 1 Chem.calcImplicitHydrogenCounts(pdb_mol, True) Chem.perceiveHybridizationStates(pdb_mol, True) Chem.makeHydrogenComplete(pdb_mol) Chem.setAtomSymbolsFromTypes(pdb_mol, False) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Biomol.setHydrogenResidueSequenceInfo(pdb_mol, False) Chem.setRingFlags(pdb_mol, True) Chem.setAromaticityFlags(pdb_mol, True) Chem.generateHydrogen3DCoordinates(pdb_mol, True) ligand = Chem.Fragment() print '- Extracting ligand ', tlc, ' ...' for atom in pdb_mol.atoms: if Biomol.getResidueCode(atom) == tlc: Biomol.extractResidueSubstructure(atom, pdb_mol, ligand, False) break if ligand.numAtoms == 0: print '!! Could not find ligand', tlc, 'in input file' return Chem.perceiveSSSR(ligand, True) lig_env = Chem.Fragment() Biomol.extractEnvironmentResidues(ligand, pdb_mol, lig_env, 7.0) Chem.perceiveSSSR(lig_env, True) print '- Constructing pharmacophore ...' lig_pharm = Pharm.BasicPharmacophore() env_pharm = Pharm.BasicPharmacophore() pharm_gen = Pharm.DefaultPharmacophoreGenerator(False) pharm_gen.generate(ligand, lig_pharm) pharm_gen.generate(lig_env, env_pharm) analyzer = Pharm.DefaultInteractionAnalyzer() interactions = Pharm.FeatureMapping() analyzer.analyze(lig_pharm, env_pharm, interactions) #------------------------- XVOLS int_env_ftrs = Pharm.FeatureSet() Pharm.getFeatures(int_env_ftrs, interactions, False) int_core_ftrs = Pharm.FeatureSet() Pharm.getFeatures(int_core_ftrs, interactions, True) int_pharm = Pharm.BasicPharmacophore(int_core_ftrs) for ftr in int_env_ftrs: if Pharm.getType( ftr ) == Pharm.FeatureType.H_BOND_DONOR or Pharm.getType( ftr) == Pharm.FeatureType.H_BOND_ACCEPTOR: Pharm.setTolerance(ftr, 1.0) else: Pharm.setTolerance(ftr, 1.5) Pharm.createExclusionVolumes(int_pharm, int_env_ftrs, 0.0, 0.1, False) int_env_ftr_atoms = Chem.Fragment() Pharm.getFeatureAtoms(int_env_ftrs, int_env_ftr_atoms) int_residue_atoms = Chem.Fragment() Biomol.extractResidueSubstructures(int_env_ftr_atoms, lig_env, int_residue_atoms, True) Chem.makeHydrogenDeplete(int_residue_atoms) def isAlphaAtom(atom): return Biomol.getResidueAtomName(atom) == 'CA' Chem.removeAtomsIfNot(int_residue_atoms, isAlphaAtom) Pharm.createExclusionVolumes(int_pharm, int_residue_atoms, Chem.Atom3DCoordinatesFunctor(), 1.0, 2.0, False) features_in_ph = [] for int_ftr in int_pharm: if Pharm.hasSubstructure(int_ftr) == False: continue elif ftype_names[Pharm.getType(int_ftr)] == 'XV': continue feature_id = generate_key(int_ftr) features_in_ph.append(str(feature_id)) self.unique_feature_vector.add(str(feature_id)) int_pharm.fv = features_in_ph int_pharm.path_to_pdb = pdb return int_pharm
def generate_ph(pdb, args, df_constructor, ts): ifs = Base.FileIOStream(pdb, 'r') tlc = args.ligand_three_letter_code pdb_reader = Biomol.PDBMoleculeReader(ifs) pdb_mol = Chem.BasicMolecule() print '- Reading input: ', pdb, ' ...' if not pdb_reader.read(pdb_mol): print '!! Could not read input molecule' return print '- Processing macromolecule', pdb, ' ...' i = 0 while i < pdb_mol.getNumBonds(): bond = pdb_mol.getBond(i) if Chem.isMetal(bond.atoms[0]) or Chem.isMetal(bond.atoms[1]): pdb_mol.removeBond(i) else: i += 1 for a in pdb_mol.atoms: Chem.setImplicitHydrogenCount(a, 0) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Chem.perceiveHybridizationStates(pdb_mol, True) Chem.makeHydrogenComplete(pdb_mol) Chem.setAtomSymbolsFromTypes(pdb_mol, False) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Biomol.setHydrogenResidueSequenceInfo(pdb_mol, False) Chem.setRingFlags(pdb_mol, True) Chem.setAromaticityFlags(pdb_mol, True) Chem.generateHydrogen3DCoordinates(pdb_mol, True) Chem.calcFormalCharges(pdb_mol, True) ligand = Chem.Fragment() print '- Extracting ligand ', tlc, ' ...' for atom in pdb_mol.atoms: if Biomol.getResidueCode(atom) == tlc: Biomol.extractResidueSubstructure(atom, pdb_mol, ligand, False) break if ligand.numAtoms == 0: print '!! Could not find ligand', tlc, 'in input file' return Chem.perceiveSSSR(ligand, True) lig_env = Chem.Fragment() Biomol.extractEnvironmentResidues(ligand, pdb_mol, lig_env, 7.0) Chem.perceiveSSSR(lig_env, True) print '- Constructing pharmacophore ...' lig_pharm = Pharm.BasicPharmacophore() env_pharm = Pharm.BasicPharmacophore() pharm_gen = Pharm.DefaultPharmacophoreGenerator(True) pharm_gen.generate(ligand, lig_pharm) pharm_gen.generate(lig_env, env_pharm) #Pharm.FilePMLFeatureContainerWriter('./test/lig_ph_' + str(ts) + '.pml').write(lig_pharm) analyzer = Pharm.DefaultInteractionAnalyzer() interactions = Pharm.FeatureMapping() analyzer.analyze(lig_pharm, env_pharm, interactions) df_constructor, interaction_at_ts = outputInteractions( lig_pharm, env_pharm, interactions, df_constructor) #Chem.FileSDFMolecularGraphWriter('./test/ligand_' + str(ts) + '.sdf').write(ligand) return df_constructor, interaction_at_ts
def process(): if len(sys.argv) < 4: print( 'Usage:', sys.argv[0], '[input torsion rules.xml] [structures.sdf] [output torsion histogram library.sdf]', file=sys.stderr) sys.exit(2) tor_lib = ConfGen.TorsionLibrary() try: tor_lib.load(Base.FileIOStream(sys.argv[1], 'r')) except: print('Error while loading input torsion rules:', sys.exc_info()[0], file=sys.stderr) sys.exit(2) tor_matcher = ConfGen.TorsionRuleMatcher(tor_lib) tor_matcher.findAllRuleMappings(True) tor_matcher.findUniqueMappingsOnly(True) tor_matcher.stopAtFirstMatchingRule(True) mol = Chem.BasicMolecule() mol_reader = Chem.FileSDFMoleculeReader(sys.argv[2]) Chem.setMultiConfImportParameter(mol_reader, False) print('- Analyzing input structures...', file=sys.stderr) i = 1 rule_to_angle_hists = {} coords = Math.Vector3DArray() while True: try: if not mol_reader.read(mol): break except IOError as e: print('Error while reading input molecule', i, ':', e, file=sys.stderr) i += 1 continue if i % 500 == 0: print(' ... At input molecule', i, '...', file=sys.stderr) Chem.initSubstructureSearchTarget(mol, False) try: Chem.get3DCoordinates(mol, coords) except Base.ItemNotFound: print('Could not get 3D-coordinates for molecule', i, file=sys.stderr) i += 1 continue for bond in mol.bonds: if Chem.getRingFlag(bond): continue if Chem.isHydrogenBond(bond): continue if Chem.getExplicitBondCount( bond.getBegin()) <= 1 or Chem.getExplicitBondCount( bond.getEnd()) <= 1: continue tor_matcher.findMatches(bond, mol, False) for match in tor_matcher: processMatch(i, match, mol, coords, rule_to_angle_hists) i += 1 print('- Processing torsion angle histograms...', file=sys.stderr) processHistograms(tor_lib, rule_to_angle_hists) print('- Writing output torsion library...', file=sys.stderr) try: tor_lib.save(Base.FileIOStream(sys.argv[3], 'w+')) except: print('Error while writing torsion library:', sys.exc_info()[0], file=sys.stderr) sys.exit(2) print('DONE!', file=sys.stderr)
def readPDBFromFile(path: str) -> Chem.BasicMolecule: s = Base.FileIOStream(path) protein = readPDBFromStream(s) return protein
def writePDB(path: str, protein: Chem.BasicMolecule) -> None: s = Base.FileIOStream(path) w = Biomol.PDBMolecularGraphWriter(s) w.write(protein) w.close()
def cleanStructures(): if len(sys.argv) < 5: print('Usage:', sys.argv[0], '[input.sdf] [output.sdf] [dropped.sdf] [start_index] [[count]]', file=sys.stderr) sys.exit(2) ifs = Base.FileIOStream(sys.argv[1], 'r') ofs = Base.FileIOStream(sys.argv[2], 'w') dofs = Base.FileIOStream(sys.argv[3], 'w') offset = int(sys.argv[4]) count = 0 if len(sys.argv) > 5: count = int(sys.argv[5]) reader = Chem.SDFMoleculeReader(ifs) writer = Chem.SDFMolecularGraphWriter(ofs) dwriter = Chem.SDFMolecularGraphWriter(dofs) mol = Chem.BasicMolecule() #Chem.setSMILESRecordFormatParameter(reader, 'SN') stats = Stats() stats.read = 0 stats.dropped = 0 stats.modified = 0 Chem.setMultiConfImportParameter(reader, False) Chem.setMultiConfExportParameter(writer, False) Chem.setMultiConfExportParameter(dwriter, False) if offset > 0: print('Skipping Molecules to Start Index ' + str(offset), file=sys.stderr) reader.setRecordIndex(offset) #print('Finished Setting Record Index', file=sys.stderr) stats.read = offset while reader.read(mol): #print('Processing Molecule ' + str(stats.read) proc_mol = processMolecule(mol, stats) if proc_mol: writer.write(proc_mol) else: stats.dropped += 1 dwriter.write(mol) print('Dropped Molecule ' + str(stats.read) + ': ' + generateSMILES(mol) + ' ' + Chem.getName(mol), file=sys.stderr) stats.read += 1 if stats.read % 10000 == 0: print('Processed ' + str(stats.read - offset) + ' Molecules...', file=sys.stderr) if count > 0 and (stats.read - offset) >= count: break print('', file=sys.stderr) print('-- Summary --', file=sys.stderr) print('Molecules processed: ' + str(stats.read - offset), file=sys.stderr) print('Molecules dropped: ' + str(stats.dropped), file=sys.stderr) print('Molecules modified: ' + str(stats.modified), file=sys.stderr)