def main(): ''' ''' options = parse_options() # THIS OPTION WILL PRODUCE MORE VERBOSE OUTPUT if options.debug: logger.setLevel(logging.DEBUG) if options.output: fh = open(options.output,'wb') else: fh = sys.stdout # CHOOSE HOW THE OUPTPUT DATA WILL BE WRITTEN if options.format == 'csv': writer = csv.writer(fh, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) HEADER = True # iterate through all mol2 files in inputdir for mol2file in glob.glob(os.path.join(options.inputligdir, '*.mol2')): #lig_path = os.path.join(option.inputdir, file) lig_path = mol2file protein_path = options.inputpdb if not os.path.isfile(protein_path): logger.fatal("The protein file does not exist.".format(options.inputpdb)) sys.exit(1) protein = get_molecule(protein_path) ligand = get_molecule(lig_path) # calculate descriptor based on the sum of interacting element pairs if options.descriptor == 'elements': # calculate element pair descriptor for this complex descriptor, labels = contacts.element_descriptor(protein, ligand, binsize=options.binsize) # calculate descriptor based on the sum of interacting element pairs elif options.descriptor == 'sybyl': # calculate element pair descriptor for this complex descriptor, labels = contacts.sybyl_atom_type_descriptor(protein, ligand, binsize=options.binsize) # calculate descriptor using structural interaction fingerprints elif options.descriptor == 'credo': # get the protein-ligand structural interaction fingerprint descriptor, labels = contacts.sift_descriptor(protein, ligand, binsize=options.binsize) if HEADER: # UPDATE COLUMN LABELS labels.append('ligand') writer.writerow(labels) HEADER = False if options.format == 'csv': #ligandname = "\"" + os.path.basename(lig_path).split('.')[0] + "\"" ligandname = os.path.basename(lig_path).split('.')[0] #print(ligandname) # FIRST COLUMN OF OUTPUT ROW row = descriptor.tolist() + [ligandname] writer.writerow(row)
def main(): ''' ''' options = parse_options() # THIS OPTION WILL PRODUCE MORE VERBOSE OUTPUT if options.debug: logger.setLevel(logging.DEBUG) pdbbindconf = config['standard'] data = parse_index(options.pdbbind, options.index) if options.output: fh = open(options.output, 'wb') else: fh = sys.stdout # CHOOSE HOW THE OUPTPUT DATA WILL BE WRITTEN if options.format == 'csv': writer = csv.writer(fh, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) HEADER = True # ITERATE THROUGH ALL PROTEIN-LIGAND COMPLEXES for pdb in data: # NORMALISE ACTIVITY TO NANOMOLAR pkd = get_pkd(float(data[pdb]['value']), data[pdb]['unit']) # THE PDBBIND DIRECTORY CONTAINING ALL THE STRUCTURES FOR THIS PDB ENTRY entry_dir = os.path.join(options.pdbbind, pdb) # CHECK IF THE DIRECTORY ACTUALLY EXISTS if not os.path.exists(entry_dir): logger.error( "The PDBbind directory for PDB entry {0} does not exist.". format(pdb)) continue # CREATE THE PATHS TO THE PROTEIN AND LIGAND USING THE SPECIFIC _<POCKET,PROTEIN,LIGAND,ZINC> LABEL prot_path = os.path.join( entry_dir, '{0}_{1}.pdb'.format(pdb, pdbbindconf['protein'])) lig_path = os.path.join( entry_dir, '{0}_{1}.mol2'.format(pdb, pdbbindconf['ligand'])) if not os.path.exists(prot_path): logger.error( "The protein pocket structure for PDB entry {0} cannot be found." .format(pdb)) continue elif not os.path.exists(lig_path): logger.error( "The ligand structure for PDB entry {0} cannot be found.". format(pdb)) continue protein = get_molecule(prot_path) ligand = get_molecule(lig_path) # CALCULATE DESCRIPTOR USING STRUCTURAL INTERACTION FINGERPRINTS if options.descriptor == 'credo': # GET THE PROTEIN-LIGAND STRUCTURAL INTERACTION FINGERPRINT descriptor, labels = contacts.sift_descriptor( protein, ligand, binsize=options.binsize) # CALCULATE DESCRIPTOR BASED ON THE SUM OF INTERACTING ELEMENT PAIRS elif options.descriptor == 'elements': # CALCULATE ELEMENT PAIR DESCRIPTOR FOR THIS COMPLEX descriptor, labels = contacts.element_descriptor( protein, ligand, binsize=options.binsize) # CALCULATE DESCRIPTOR BASED ON THE SUM OF INTERACTING ELEMENT PAIRS elif options.descriptor == 'sybyl': # CALCULATE ELEMENT PAIR DESCRIPTOR FOR THIS COMPLEX descriptor, labels = contacts.sybyl_atom_type_descriptor( protein, ligand, binsize=options.binsize) if HEADER: # UPDATE COLUMN LABELS labels.insert(0, 'pKd/pKi') labels.append('pdb') writer.writerow(labels) HEADER = False if options.format == 'csv': # KEEP ONLY THE TWO MOST SIGNIFICANT BITS pkdstring = "{0:.2f}".format(pkd) # FIRST COLUMN OF OUTPUT ROW row = [pkdstring] + descriptor.tolist() + [pdb] writer.writerow(row)
def main(): """ """ options = parse_options() # this option will produce more verbose output if options.debug: logger.setLevel(logging.DEBUG) csarconf = config['csar'] if options.output: fh = open(options.output, 'wb') else: fh = sys.stdout # choose how the ouptput data will be written if options.format == 'csv': writer = csv.writer(fh, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) HEADER = True # iterate through all numbered directories for directory in os.listdir(csarconf['directory']): entrydir = os.path.join(csarconf['directory'], directory) # parse kd.dat to get the pKd kddat_path = os.path.join(entrydir, 'kd.dat') # exit if kd.dat is missing if not os.path.isfile(kddat_path): logger.fatal( "CSAR directory {} does not contain kd.dat file.".format( directory)) sys.exit(1) entry, pdb, pkd = open(kddat_path).read().strip().replace( ' ', '').split(',') protein_path = glob.glob(os.path.join(entrydir, '*_complex.mol2')).pop() protein = get_molecule(str(protein_path)) ligand = extract_ligand(protein.OBMol) # calculate descriptor based on the sum of interacting element pairs if options.descriptor == 'elements': # calculate element pair descriptor for this complex descriptor, labels = contacts.element_descriptor( protein, ligand, binsize=options.binsize) # calculate descriptor based on the sum of interacting element pairs elif options.descriptor == 'sybyl': # calculate element pair descriptor for this complex descriptor, labels = contacts.sybyl_atom_type_descriptor( protein, ligand, binsize=options.binsize) # calculate descriptor using structural interaction fingerprints elif options.descriptor == 'credo': # get the protein-ligand structural interaction fingerprint descriptor, labels = contacts.sift_descriptor( protein, ligand, binsize=options.binsize) if HEADER: # UPDATE COLUMN LABELS labels.insert(0, 'pKd/pKi') labels.append('pdb') writer.writerow(labels) HEADER = False if options.format == 'csv': # FIRST COLUMN OF OUTPUT ROW row = [pkd] + descriptor.tolist() + [pdb] writer.writerow(row)
def main(): """ """ options = parse_options() # this option will produce more verbose output if options.debug: logger.setLevel(logging.DEBUG) csarconf = config['csar'] if options.output: fh = open(options.output,'wb') else: fh = sys.stdout # choose how the ouptput data will be written if options.format == 'csv': writer = csv.writer(fh, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) HEADER = True # iterate through all numbered directories for directory in os.listdir(csarconf['directory']): entrydir = os.path.join(csarconf['directory'], directory) # parse kd.dat to get the pKd kddat_path = os.path.join(entrydir, 'kd.dat') # exit if kd.dat is missing if not os.path.isfile(kddat_path): logger.fatal("CSAR directory {} does not contain kd.dat file." .format(directory)) sys.exit(1) entry, pdb, pkd = open(kddat_path).read().strip().replace(' ','').split(',') protein_path = glob.glob(os.path.join(entrydir, '*_complex.mol2')).pop() protein = get_molecule(str(protein_path)) ligand = extract_ligand(protein.OBMol) # calculate descriptor based on the sum of interacting element pairs if options.descriptor == 'elements': # calculate element pair descriptor for this complex descriptor, labels = contacts.element_descriptor(protein, ligand, binsize=options.binsize) # calculate descriptor based on the sum of interacting element pairs elif options.descriptor == 'sybyl': # calculate element pair descriptor for this complex descriptor, labels = contacts.sybyl_atom_type_descriptor(protein, ligand, binsize=options.binsize) # calculate descriptor using structural interaction fingerprints elif options.descriptor == 'credo': # get the protein-ligand structural interaction fingerprint descriptor, labels = contacts.sift_descriptor(protein, ligand, binsize=options.binsize) if HEADER: # UPDATE COLUMN LABELS labels.insert(0,'pKd/pKi') labels.append('pdb') writer.writerow(labels) HEADER = False if options.format == 'csv': # FIRST COLUMN OF OUTPUT ROW row = [pkd] + descriptor.tolist() + [pdb] writer.writerow(row)
def main(): ''' ''' options = parse_options() # THIS OPTION WILL PRODUCE MORE VERBOSE OUTPUT if options.debug: logger.setLevel(logging.DEBUG) pdbbindconf = config['pdbbind'] data = parse_index(options.pdbbind, options.index) if options.output: fh = open(options.output,'wb') else: fh = sys.stdout # CHOOSE HOW THE OUPTPUT DATA WILL BE WRITTEN if options.format == 'csv': writer = csv.writer(fh, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) HEADER = True counter = 0 # ITERATE THROUGH ALL PROTEIN-LIGAND COMPLEXES for pdb in data: # THE PDBBIND DIRECTORY CONTAINING ALL THE STRUCTURES FOR THIS PDB ENTRY entry_dir = os.path.join(options.pdbbind,pdb) # CHECK IF THE DIRECTORY ACTUALLY EXISTS if not os.path.exists(entry_dir): logger.error("The PDBbind directory for PDB entry {0} does not exist.".format(pdb)) continue # CREATE THE PATHS TO THE PROTEIN AND LIGAND USING THE SPECIFIC _<POCKET,PROTEIN,LIGAND,ZINC> LABEL prot_path = os.path.join(entry_dir,'{0}_{1}.pdb'.format(pdb,pdbbindconf['protein'])) ref_lig_path = os.path.join(entry_dir,'{0}_{1}.mol2'.format(pdb,pdbbindconf['ligand'])) #for each protein, the ligand gets generated docking poses from x docking methods, # if not os.path.exists(prot_path): logger.error("The protein pocket structure for PDB entry {0} cannot be found.".format(pdb)) continue for score in dockingMethods: pose_path = os.path.join(posesDir, score, pdb) # \TODO: add pattern for each docking method, right now only works with gold lig_pattern = "gold_soln" # RMSD dict for all poses counter = counter + 1 print("Calculating RMSDs for ligand " + pdb + ", docking method " + score) RMSDs = calcRMSDPoses(ref_lig_path, pose_path, lig_pattern) for pose in listFiles(pose_path, lig_pattern): lig_path = os.path.join(posesDir, score, pdb, pose) poseRMSD = RMSDs[pose] poseID = pose.split('.')[0] + '_' + score if not os.path.exists(lig_path): logger.error("The ligand structure for PDB entry {0} cannot be found.".format(pdb)) continue protein = get_molecule(prot_path) ligand = get_molecule(lig_path) # CALCULATE DESCRIPTOR USING STRUCTURAL INTERACTION FINGERPRINTS if options.descriptor == 'credo': # GET THE PROTEIN-LIGAND STRUCTURAL INTERACTION FINGERPRINT descriptor, labels = contacts.sift_descriptor(protein, ligand, binsize=options.binsize) # CALCULATE DESCRIPTOR BASED ON THE SUM OF INTERACTING ELEMENT PAIRS elif options.descriptor == 'elements': # CALCULATE ELEMENT PAIR DESCRIPTOR FOR THIS COMPLEX descriptor, labels = contacts.element_descriptor(protein, ligand, binsize=options.binsize) # CALCULATE DESCRIPTOR BASED ON THE SUM OF INTERACTING ELEMENT PAIRS elif options.descriptor == 'sybyl': # CALCULATE ELEMENT PAIR DESCRIPTOR FOR THIS COMPLEX descriptor, labels = contacts.sybyl_atom_type_descriptor(protein, ligand, binsize=options.binsize) if HEADER: # UPDATE COLUMN LABELS labels.insert(0,'RMSD') labels.append('ligandID') writer.writerow(labels) HEADER = False if options.format == 'csv': # KEEP ONLY THE TWO MOST SIGNIFICANT BITS #pkdstring = "{0:.2f}".format(pkd) # FIRST COLUMN OF OUTPUT ROW row = [poseRMSD] + descriptor.tolist() + [poseID] writer.writerow(row)