def parse_options(): ''' ''' # PARSE COMMAND LINE usage = "%prog [options]" parser = OptionParser(usage=usage) parser.add_option( "--debug", action="store_true", dest="debug", default=False, help='Set logging level to debug and print more verbose output.') parser.add_option( "-B", "--binsize", dest="binsize", type=float, default=0.0, help= "Bin size (in Angstrom) to use for binning contacts based on inter-atomic distance." ) parser.add_option( "-F", "--format", dest="format", default='csv', help="Format to use for writing the SIFt of the protein-ligand complex." ) parser.add_option( "-O", "--output", dest="output", default=None, help="File to which the data will be written (default=STDOUT).") parser.add_option( "-D", "--descriptor", dest="descriptor", default='elements', help= "Descriptor to use. Valid descriptors are 'credo', 'elements' and 'sybyl'." ) # GET COMMAND LINE OPTIONS (options, args) = parser.parse_args() if options.descriptor not in ('elements', 'credo', 'sybyl'): logger.fatal("Invalid descriptor: {0}.".format(options.descriptor)) parser.print_help() sys.exit(1) return options
def parse_options(): ''' ''' # PARSE COMMAND LINE usage = "%prog [options]" parser = OptionParser(usage=usage) parser.add_option("--debug", action = "store_true", dest = "debug", default = False, help = 'Set logging level to debug and print more verbose output.') parser.add_option("-B", "--binsize", dest = "binsize", type = float, default = 0.0, help = "Bin size (in Angstrom) to use for binning contacts based on inter-atomic distance.") parser.add_option("-F", "--format", dest = "format", default = 'csv', help = "Format to use for writing the SIFt of the protein-ligand complex.") parser.add_option("-O", "--output", dest = "output", default = None, help = "File to which the data will be written (default=STDOUT).") parser.add_option("-D", "--descriptor", dest = "descriptor", default = 'elements', help = "Descriptor to use. Valid descriptors are 'credo', 'elements' and 'sybyl'.") # GET COMMAND LINE OPTIONS (options, args) = parser.parse_args() if options.descriptor not in ('elements', 'credo', 'sybyl'): logger.fatal("Invalid descriptor: {0}.".format(options.descriptor)) parser.print_help() sys.exit(1) return options
def main(): ''' ''' options = parse_options() # THIS OPTION WILL PRODUCE MORE VERBOSE OUTPUT if options.debug: logger.setLevel(logging.DEBUG) if options.output: fh = open(options.output,'wb') else: fh = sys.stdout # CHOOSE HOW THE OUPTPUT DATA WILL BE WRITTEN if options.format == 'csv': writer = csv.writer(fh, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) HEADER = True # iterate through all mol2 files in inputdir for mol2file in glob.glob(os.path.join(options.inputligdir, '*.mol2')): #lig_path = os.path.join(option.inputdir, file) lig_path = mol2file protein_path = options.inputpdb if not os.path.isfile(protein_path): logger.fatal("The protein file does not exist.".format(options.inputpdb)) sys.exit(1) protein = get_molecule(protein_path) ligand = get_molecule(lig_path) # calculate descriptor based on the sum of interacting element pairs if options.descriptor == 'elements': # calculate element pair descriptor for this complex descriptor, labels = contacts.element_descriptor(protein, ligand, binsize=options.binsize) # calculate descriptor based on the sum of interacting element pairs elif options.descriptor == 'sybyl': # calculate element pair descriptor for this complex descriptor, labels = contacts.sybyl_atom_type_descriptor(protein, ligand, binsize=options.binsize) # calculate descriptor using structural interaction fingerprints elif options.descriptor == 'credo': # get the protein-ligand structural interaction fingerprint descriptor, labels = contacts.sift_descriptor(protein, ligand, binsize=options.binsize) if HEADER: # UPDATE COLUMN LABELS labels.append('ligand') writer.writerow(labels) HEADER = False if options.format == 'csv': #ligandname = "\"" + os.path.basename(lig_path).split('.')[0] + "\"" ligandname = os.path.basename(lig_path).split('.')[0] #print(ligandname) # FIRST COLUMN OF OUTPUT ROW row = descriptor.tolist() + [ligandname] writer.writerow(row)
def main(): """ """ options = parse_options() # this option will produce more verbose output if options.debug: logger.setLevel(logging.DEBUG) csarconf = config['csar'] if options.output: fh = open(options.output,'wb') else: fh = sys.stdout # choose how the ouptput data will be written if options.format == 'csv': writer = csv.writer(fh, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) HEADER = True # iterate through all numbered directories for directory in os.listdir(csarconf['directory']): entrydir = os.path.join(csarconf['directory'], directory) # parse kd.dat to get the pKd kddat_path = os.path.join(entrydir, 'kd.dat') # exit if kd.dat is missing if not os.path.isfile(kddat_path): logger.fatal("CSAR directory {} does not contain kd.dat file." .format(directory)) sys.exit(1) entry, pdb, pkd = open(kddat_path).read().strip().replace(' ','').split(',') protein_path = glob.glob(os.path.join(entrydir, '*_complex.mol2')).pop() protein = get_molecule(str(protein_path)) ligand = extract_ligand(protein.OBMol) # calculate descriptor based on the sum of interacting element pairs if options.descriptor == 'elements': # calculate element pair descriptor for this complex descriptor, labels = contacts.element_descriptor(protein, ligand, binsize=options.binsize) # calculate descriptor based on the sum of interacting element pairs elif options.descriptor == 'sybyl': # calculate element pair descriptor for this complex descriptor, labels = contacts.sybyl_atom_type_descriptor(protein, ligand, binsize=options.binsize) # calculate descriptor using structural interaction fingerprints elif options.descriptor == 'credo': # get the protein-ligand structural interaction fingerprint descriptor, labels = contacts.sift_descriptor(protein, ligand, binsize=options.binsize) if HEADER: # UPDATE COLUMN LABELS labels.insert(0,'pKd/pKi') labels.append('pdb') writer.writerow(labels) HEADER = False if options.format == 'csv': # FIRST COLUMN OF OUTPUT ROW row = [pkd] + descriptor.tolist() + [pdb] writer.writerow(row)
def parse_options(): ''' ''' # PARSE COMMAND LINE usage = "%prog [options]" parser = OptionParser(usage=usage) parser.add_option("--debug", action = "store_true", dest = "debug", default = False, help = 'Set logging level to debug and print more verbose output.') parser.add_option("-B", "--binsize", dest = "binsize", type = float, default = 0.0, help = "Bin size (in Angstrom) to use for binning contacts based on inter-atomic distance.") parser.add_option("-F", "--format", dest = "format", default = 'csv', help = "Format to use for writing the SIFt of the protein-ligand complex.") parser.add_option("-O", "--output", dest = "output", default = None, help = "File to which the data will be written (default=STDOUT).") parser.add_option("-P", "--pdbbind-dir", dest = "pdbbind", default = None, help = "PDBbind directory.") parser.add_option("-I", "--index", dest = "index", default = None, help = "PDBbind data index file for a specific data set (core,refined,general).") parser.add_option("-D", "--descriptor", dest = "descriptor", default = 'credo', help = "Descriptor to use. Valid descriptors are 'credo', 'elements' and 'sybyl'.") # GET COMMAND LINE OPTIONS (options, args) = parser.parse_args() if not options.pdbbind: logger.error("The PDBbind directory must be provided.") parser.print_help() sys.exit(1) elif not os.path.exists(options.pdbbind): logger.fatal("The specified PDBbind directory does not exist.") sys.exit(1) if not options.index: logger.error("A path to a PDBbind data index file must be provided.") parser.print_help() sys.exit(1) elif not os.path.exists(options.index): logger.fatal("The specified PDBbind data index file does not exist.") sys.exit(1) if options.descriptor not in ('elements', 'credo', 'sybyl'): logger.fatal("Invalid descriptor: {0}.".format(options.descriptor)) parser.print_help() sys.exit(1) return options
def main(): """ """ options = parse_options() # this option will produce more verbose output if options.debug: logger.setLevel(logging.DEBUG) csarconf = config['csar'] if options.output: fh = open(options.output, 'wb') else: fh = sys.stdout # choose how the ouptput data will be written if options.format == 'csv': writer = csv.writer(fh, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) HEADER = True # iterate through all numbered directories for directory in os.listdir(csarconf['directory']): entrydir = os.path.join(csarconf['directory'], directory) # parse kd.dat to get the pKd kddat_path = os.path.join(entrydir, 'kd.dat') # exit if kd.dat is missing if not os.path.isfile(kddat_path): logger.fatal( "CSAR directory {} does not contain kd.dat file.".format( directory)) sys.exit(1) entry, pdb, pkd = open(kddat_path).read().strip().replace( ' ', '').split(',') protein_path = glob.glob(os.path.join(entrydir, '*_complex.mol2')).pop() protein = get_molecule(str(protein_path)) ligand = extract_ligand(protein.OBMol) # calculate descriptor based on the sum of interacting element pairs if options.descriptor == 'elements': # calculate element pair descriptor for this complex descriptor, labels = contacts.element_descriptor( protein, ligand, binsize=options.binsize) # calculate descriptor based on the sum of interacting element pairs elif options.descriptor == 'sybyl': # calculate element pair descriptor for this complex descriptor, labels = contacts.sybyl_atom_type_descriptor( protein, ligand, binsize=options.binsize) # calculate descriptor using structural interaction fingerprints elif options.descriptor == 'credo': # get the protein-ligand structural interaction fingerprint descriptor, labels = contacts.sift_descriptor( protein, ligand, binsize=options.binsize) if HEADER: # UPDATE COLUMN LABELS labels.insert(0, 'pKd/pKi') labels.append('pdb') writer.writerow(labels) HEADER = False if options.format == 'csv': # FIRST COLUMN OF OUTPUT ROW row = [pkd] + descriptor.tolist() + [pdb] writer.writerow(row)
def parse_options(): ''' ''' # PARSE COMMAND LINE usage = "%prog [options]" parser = OptionParser(usage=usage) parser.add_option("--debug", action = "store_true", dest = "debug", default = False, help = 'Set logging level to debug and print more verbose output.') parser.add_option("-B", "--binsize", dest = "binsize", type = float, default = 0.0, help = "Bin size (in Angstrom) to use for binning contacts based on inter-atomic distance.") parser.add_option("-F", "--format", dest = "format", default = 'csv', help = "Format to use for writing the SIFt of the protein-ligand complex.") parser.add_option("-O", "--output", dest = "output", default = "/home/dat/WORK/DB/DESCRIPTORS/CASF2014-refined_SIFt_RMSD.csv",#None, help = "File to which the data will be written (default=STDOUT).") parser.add_option("-P", "--pdbbind-dir", dest = "pdbbind", default = "/home/dat/WORK/DB/PDBbind/v2014-refined/",#None, help = "PDBbind directory.") parser.add_option("-I", "--index", dest = "index", default = "/home/dat/WORK/DB/PDBbind/v2014-refined/INDEX_refined_data.2014",#None, help = "PDBbind data index file for a specific data set (core,refined,general).") parser.add_option("-D", "--descriptor", dest = "descriptor", default = 'credo', help = "Descriptor to use. Valid descriptors are 'credo', 'elements' and 'sybyl'.") # GET COMMAND LINE OPTIONS (options, args) = parser.parse_args() if not options.pdbbind: logger.error("The PDBbind directory must be provided.") parser.print_help() sys.exit(1) elif not os.path.exists(options.pdbbind): logger.fatal("The specified PDBbind directory does not exist.") sys.exit(1) if not options.index: logger.error("A path to a PDBbind data index file must be provided.") parser.print_help() sys.exit(1) elif not os.path.exists(options.index): logger.fatal("The specified PDBbind data index file does not exist.") sys.exit(1) if options.descriptor not in ('elements', 'credo', 'sybyl'): logger.fatal("Invalid descriptor: {0}.".format(options.descriptor)) parser.print_help() sys.exit(1) return options