def setUp(self): """ Instantiate local copy of Binana object. """ self.binana = Binana() ### 3zp9 comes from PDBBind-CN _3zp9_protein = PDB() _3zp9_protein_pdb = os.path.join(data_dir(), "3zp9_protein_hyd.pdb") _3zp9_protein_pdbqt = os.path.join(data_dir(), "3zp9_protein_hyd.pdbqt") _3zp9_protein.load_from_files(_3zp9_protein_pdb, _3zp9_protein_pdbqt) # The ligand is also specified by pdbbind _3zp9_ligand = PDB() _3zp9_ligand_pdb = os.path.join(data_dir(), "3zp9_ligand_hyd.pdb") _3zp9_ligand_pdbqt = os.path.join(data_dir(), "3zp9_ligand_hyd.pdbqt") _3zp9_ligand.load_from_files(_3zp9_ligand_pdb, _3zp9_ligand_pdbqt) ### 3bwf comes from PDBBind-CN _3bwf_protein = PDB() _3bwf_protein_pdb = os.path.join(data_dir(), "3bwf_protein_hyd.pdb") _3bwf_protein_pdbqt = os.path.join(data_dir(), "3bwf_protein_hyd.pdbqt") _3bwf_protein.load_from_files(_3bwf_protein_pdb, _3bwf_protein_pdbqt) # The ligand is also specified by pdbbind _3bwf_ligand = PDB() _3bwf_ligand_pdb = os.path.join(data_dir(), "3bwf_ligand_hyd.pdb") _3bwf_ligand_pdbqt = os.path.join(data_dir(), "3bwf_ligand_hyd.pdbqt") _3bwf_ligand.load_from_files(_3bwf_ligand_pdb, _3bwf_ligand_pdbqt) self.test_cases = [("3bwf", _3bwf_protein, _3bwf_ligand), ("3zp9", _3zp9_protein, _3zp9_ligand)]
def featurize_dude(dude_dir, target, pickle_dir, num_jobs): """Featurize DUD-E docked poses and write features to pickle_out. Parameters ---------- dude_dir: string Path to DUD-E directory target: string Name of DUD-E target. pickle_dir: string Path to directory to output pickles """ target_dir = os.path.join(dude_dir, target) actives_dir = os.path.join(target_dir, "actives") decoys_dir = os.path.join(target_dir, "decoys") actives = [a for a in os.listdir(actives_dir)] decoys = [a for a in os.listdir(decoys_dir)] receptor = os.path.join(target_dir, "receptor.pdb") pickle_out = os.path.join(target_dir, "out.pkl.gz") # Just for debugging purposes actives = actives[:1] num_per_job = int(math.ceil(len(actives) / float(num_jobs))) print "Number per job: %d" % num_per_job protein_pdb_path = "/home/rbharath/DUD-E/aa2ar/receptor_hyd.pdb" protein_pdbqt_path = "/home/rbharath/DUD-E/aa2ar/receptor_hyd.pdbqt" print "About to load protein from input files" protein_pdb_obj = PDB() protein_pdb_obj.load_from_files(protein_pdb_path, protein_pdbqt_path) binana = Binana() feature_len = binana.num_features() feature_vectors = {} for compound in actives: compound_name = compound.split(".")[0] compound_pdbqt = compound_name + "_hyd_out.pdbqt" compound_pdbqt = os.path.join(actives_dir, compound_pdbqt) # Convert the pdbqt to pdb pdbqt_to_pdb(compound_pdbqt, actives_dir) compound_pdb = compound_name + "_hyd_out.pdb" compound_pdb = os.path.join(actives_dir, compound_pdb) structures = MultiStructure() structures.load_from_files(compound_pdb, compound_pdbqt) vectors = [] for key in sorted(structures.molecules.keys()): structure = structures.molecules[key] print "type(structure)" print type(structure) vectors.append( binana.compute_input_vector(structure, protein_pdb_obj)) feature_vectors[compound_name] = vectors with gzip.open(pickle_out, "wb") as f: pickle.dump(feature_vectors, f) decoys = decoys[:1]
def featurize_job(docked_compounds): """Featurize all docked structures.""" # Instantiate copy of binana vector binana = Binana() feature_len = binana_num_features() feature_vectors = {} for count, compound in enumerate(docked_compounds): print "\nprocessing %d-th docked pdb %s" % (count, compound)
def featurize_fingerprint(pdb_directories, pickle_out): """Featurize all pdbs in provided directories.""" # Instantiate copy of binana vector binana = Binana() # See features/tests/nnscore_test.py:TestBinana.testComputeInputVector # for derivation. feature_len = binana.num_features() feature_vectors = {} for count, pdb_dir in enumerate(pdb_directories): print "\nprocessing %d-th pdb %s" % (count, dir) print "About to extract ligand and protein input files" ligand_pdb, ligand_pdbqt = None, None protein_pdb, protein_pdbqt = None, None for f in os.listdir(pdb_dir): if re.search("_ligand_hyd.pdb$", f): ligand_pdb = f elif re.search("_ligand_hyd.pdbqt$", f): ligand_pdbqt = f elif re.search("_protein_hyd.pdb$", f): protein_pdb = f elif re.search("_protein_hyd.pdbqt$", f): protein_pdbqt = f print "Extracted Input Files:" print (ligand_pdb, ligand_pdbqt, protein_pdb, protein_pdbqt) if (not ligand_pdb or not ligand_pdbqt or not protein_pdb or not protein_pdbqt): raise ValueError("Required files not present for %s" % pdb_dir) ligand_pdb_path = os.path.join(pdb_dir, ligand_pdb) ligand_pdbqt_path = os.path.join(pdb_dir, ligand_pdbqt) protein_pdb_path = os.path.join(pdb_dir, protein_pdb) protein_pdbqt_path = os.path.join(pdb_dir, protein_pdbqt) print "About to load ligand from input files" ligand_pdb_obj = PDB() ligand_pdb_obj.load_from_files(ligand_pdb_path, ligand_pdbqt_path) print "About to load protein from input files" protein_pdb_obj = PDB() protein_pdb_obj.load_from_files(protein_pdb_path, protein_pdbqt_path) print "About to generate feature vector." features = binana.compute_input_vector(ligand_pdb_obj, protein_pdb_obj) if len(features) != feature_len: raise ValueError("Feature length incorrect on %s" % pdb_dir) print "Feature vector generated correctly." print "About to compute ligand smiles string." ligand_mol = Chem.MolFromPDBFile(ligand_pdb_path) # TODO(rbharath): Why does this fail sometimes? if ligand_mol is None: continue smiles = Chem.MolToSmiles(ligand_mol) print "About to compute sequence." protein = md.load(protein_pdb_path) seq = [r.name for r in protein.top.residues] # Write the computed quantities feature_vectors[pdb_dir] = (features, smiles, seq) print "About to write pickle to " + pickle_out with open(pickle_out, "wb") as f: pickle.dump(feature_vectors, f)
def featurize_pdbbind(pdbbind_dir, pickle_out): """Featurize all entries in pdbbind_dir and write features to pickle_out pdbbind_dir should be a dir, with K subdirs, one for each protein-ligand complex to be featurized. The ligand and receptor should each have a pdb and pdbqt file. The ligand files should end in '_ligand_hyd.${FILETYPE}' while the receptor files should end in '_protein_hyd.${FILETYPE}' pdbbind_dir: string Path to pdbbind directory. pickle_out: string Path to write pickle output. """ assert os.path.isdir(pdbbind_dir) # Instantiate copy of binana vector binana = Binana() feature_vectors = {} # Extract the subdirectories in pdbbind_dir subdirs = [ d for d in os.listdir(pdbbind_dir) if os.path.isdir(os.path.join(pdbbind_dir, d)) ] # TODO(rbharath): ONLY FOR DEBUGGING! subdirs = subdirs[900:] num_atoms = len(Binana.atom_types) # See features/tests/nnscore_test.py:TestBinana.testComputeInputVector # for derivation. feature_len = (3 * num_atoms * (num_atoms + 1) / 2 + num_atoms + 12 + 6 + 3 + 6 + 3 + 6 + 3 + 1) for count, d in enumerate(subdirs): print "\nprocessing %d-th pdb %s" % (count, d) subdir = os.path.join(pdbbind_dir, d) print "About to extract ligand and protein input files" ligand_pdb, ligand_pdbqt = None, None protein_pdb, protein_pdbqt = None, None for f in os.listdir(subdir): if re.search("_ligand_hyd.pdb$", f): ligand_pdb = f elif re.search("_ligand_hyd.pdbqt$", f): ligand_pdbqt = f elif re.search("_protein_hyd.pdb$", f): protein_pdb = f elif re.search("_protein_hyd.pdbqt$", f): protein_pdbqt = f print "Extracted Input Files:" print(ligand_pdb, ligand_pdbqt, protein_pdb, protein_pdbqt) if (not ligand_pdb or not ligand_pdbqt or not protein_pdb or not protein_pdbqt): raise ValueError("Required files not present for %s" % d) ligand_pdb_path = os.path.join(subdir, ligand_pdb) ligand_pdbqt_path = os.path.join(subdir, ligand_pdbqt) protein_pdb_path = os.path.join(subdir, protein_pdb) protein_pdbqt_path = os.path.join(subdir, protein_pdbqt) print "About to load ligand from input files" ligand_pdb_obj = PDB() ligand_pdb_obj.load_from_files(ligand_pdb_path, ligand_pdbqt_path) print "About to load protein from input files" protein_pdb_obj = PDB() protein_pdb_obj.load_from_files(protein_pdb_path, protein_pdbqt_path) print "About to generate feature vector." vector = binana.compute_input_vector(ligand_pdb_obj, protein_pdb_obj) feature_vectors[d] = vector if len(vector) != feature_len: raise ValueError("Feature length incorrect on %s" % d) print "Feature vector generated correctly." with open(pickle_out, "wb") as f: pickle.dump(feature_vectors, f)
def featurize_pdbbind(pdbbind_dir, pickle_out): """Featurize all entries in pdbbind_dir and write features to pickle_out pdbbind_dir should be a dir, with K subdirs, one for each protein-ligand complex to be featurized. The ligand and receptor should each have a pdb and pdbqt file. The ligand files should end in '_ligand_hyd.${FILETYPE}' while the receptor files should end in '_protein_hyd.${FILETYPE}' pdbbind_dir: string Path to pdbbind directory. pickle_out: string Path to write pickle output. """ assert os.path.isdir(pdbbind_dir) # Instantiate copy of binana vector binana = Binana() feature_vectors = {} # Extract the subdirectories in pdbbind_dir subdirs = [d for d in os.listdir(pdbbind_dir) if os.path.isdir(os.path.join(pdbbind_dir, d))] # TODO(rbharath): ONLY FOR DEBUGGING! subdirs = subdirs[900:] num_atoms = len(Binana.atom_types) # See features/tests/nnscore_test.py:TestBinana.testComputeInputVector # for derivation. feature_len = (3*num_atoms*(num_atoms+1)/2 + num_atoms + 12 + 6 + 3 + 6 + 3 + 6 + 3 + 1) for count, d in enumerate(subdirs): print "\nprocessing %d-th pdb %s" % (count, d) subdir = os.path.join(pdbbind_dir, d) print "About to extract ligand and protein input files" ligand_pdb, ligand_pdbqt = None, None protein_pdb, protein_pdbqt = None, None for f in os.listdir(subdir): if re.search("_ligand_hyd.pdb$", f): ligand_pdb = f elif re.search("_ligand_hyd.pdbqt$", f): ligand_pdbqt = f elif re.search("_protein_hyd.pdb$", f): protein_pdb = f elif re.search("_protein_hyd.pdbqt$", f): protein_pdbqt = f print "Extracted Input Files:" print (ligand_pdb, ligand_pdbqt, protein_pdb, protein_pdbqt) if (not ligand_pdb or not ligand_pdbqt or not protein_pdb or not protein_pdbqt): raise ValueError("Required files not present for %s" % d) ligand_pdb_path = os.path.join(subdir, ligand_pdb) ligand_pdbqt_path = os.path.join(subdir, ligand_pdbqt) protein_pdb_path = os.path.join(subdir, protein_pdb) protein_pdbqt_path = os.path.join(subdir, protein_pdbqt) print "About to load ligand from input files" ligand_pdb_obj = PDB() ligand_pdb_obj.load_from_files(ligand_pdb_path, ligand_pdbqt_path) print "About to load protein from input files" protein_pdb_obj = PDB() protein_pdb_obj.load_from_files(protein_pdb_path, protein_pdbqt_path) print "About to generate feature vector." vector = binana.compute_input_vector(ligand_pdb_obj, protein_pdb_obj) feature_vectors[d] = vector if len(vector) != feature_len: raise ValueError("Feature length incorrect on %s" % d) print "Feature vector generated correctly." with open(pickle_out, "wb") as f: pickle.dump(feature_vectors, f)
def setUp(self): """ Instantiate local copy of Binana object. """ self.binana = Binana() # TODO(rbharath): This load sequence is getting unwieldy. Refactor. #### PRGR is from the DUD-E collection #prgr_receptor = PDB() #prgr_pdb = os.path.join(data_dir(), "prgr_hyd.pdb") #prgr_pdbqt = os.path.join(data_dir(), "prgr_hyd.pdbqt") #prgr_receptor.load_from_files(prgr_pdb, prgr_pdbqt) ## This compound is CHEMBL1164248 #prgr_active = PDB() #prgr_active_pdb = os.path.join(data_dir(), "prgr_active0_hyd.pdb") #prgr_active_pdbqt = os.path.join(data_dir(), "prgr_active0_hyd.pdbqt") #prgr_active.load_from_files(prgr_active_pdb, prgr_active_pdbqt) #### c-Abl is taken from the Autodock Vina examples #cabl_receptor = PDB() #cabl_receptor_pdb = os.path.join(data_dir(), "c-Abl_hyd.pdb") #cabl_receptor_pdbqt = os.path.join(data_dir(), "c-Abl_hyd.pdbqt") #cabl_receptor.load_from_files(cabl_receptor_pdb, # cabl_receptor_pdbqt) ## This compound is imatinib #cabl_active = PDB() #cabl_active_pdb = os.path.join(data_dir(), "imatinib_hyd.pdb") #cabl_active_pdbqt = os.path.join(data_dir(), "imatinib_hyd.pdbqt") #cabl_active.load_from_files(cabl_active_pdb, # cabl_active_pdbqt) #### 1zea comes from PDBBind-CN ## Python complains about variables starting with numbers, so put an ## underscore in front of everything. #_1zea_protein = PDB() #_1zea_protein_pdb = os.path.join(data_dir(), "1zea_protein_hyd.pdb") #_1zea_protein_pdbqt = os.path.join(data_dir(), "1zea_protein_hyd.pdbqt") #_1zea_protein.load_from_files(_1zea_protein_pdb, _1zea_protein_pdbqt) ## The ligand is also specified by pdbbind #_1zea_ligand = PDB() #_1zea_ligand_pdb = os.path.join(data_dir(), "1zea_ligand_hyd.pdb") #_1zea_ligand_pdbqt = os.path.join(data_dir(), "1zea_ligand_hyd.pdbqt") #_1zea_ligand.load_from_files(_1zea_ligand_pdb, _1zea_ligand_pdbqt) #### 1r5y comes from PDBBind-CN #_1r5y_protein = PDB() #_1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb") #_1r5y_protein_pdbqt = os.path.join(data_dir(), "1r5y_protein_hyd.pdbqt") #_1r5y_protein.load_from_files(_1r5y_protein_pdb, _1r5y_protein_pdbqt) ## The ligand is also specified by pdbbind #_1r5y_ligand = PDB() #_1r5y_ligand_pdb = os.path.join(data_dir(), "1r5y_ligand_hyd.pdb") #_1r5y_ligand_pdbqt = os.path.join(data_dir(), "1r5y_ligand_hyd.pdbqt") #_1r5y_ligand.load_from_files(_1r5y_ligand_pdb, _1r5y_ligand_pdbqt) #### 3ao4 comes from PDBBind-CN #_3ao4_protein = PDB() #_3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb") #_3ao4_protein_pdbqt = os.path.join(data_dir(), "3ao4_protein_hyd.pdbqt") #_3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt) ## The ligand is also specified by pdbbind #_3ao4_ligand = PDB() #_3ao4_ligand_pdb = os.path.join(data_dir(), "3ao4_ligand_hyd.pdb") #_3ao4_ligand_pdbqt = os.path.join(data_dir(), "3ao4_ligand_hyd.pdbqt") #_3ao4_ligand.load_from_files(_3ao4_ligand_pdb, _3ao4_ligand_pdbqt) #### 2jdm comes from PDBBind-CN #_2jdm_protein = PDB() #_2jdm_protein_pdb = os.path.join(data_dir(), "2jdm_protein_hyd.pdb") #_2jdm_protein_pdbqt = os.path.join(data_dir(), "2jdm_protein_hyd.pdbqt") #_2jdm_protein.load_from_files(_2jdm_protein_pdb, _2jdm_protein_pdbqt) ## The ligand is also specified by pdbbind #_2jdm_ligand = PDB() #_2jdm_ligand_pdb = os.path.join(data_dir(), "2jdm_ligand_hyd.pdb") #_2jdm_ligand_pdbqt = os.path.join(data_dir(), "2jdm_ligand_hyd.pdbqt") #_2jdm_ligand.load_from_files(_2jdm_ligand_pdb, _2jdm_ligand_pdbqt) #### 3g5k comes from PDBBind-CN #_3g5k_protein = PDB() #_3g5k_protein_pdb = os.path.join(data_dir(), "3g5k_protein_hyd.pdb") #_3g5k_protein_pdbqt = os.path.join(data_dir(), "3g5k_protein_hyd.pdbqt") #_3g5k_protein.load_from_files(_3g5k_protein_pdb, _3g5k_protein_pdbqt) ## The ligand is also specified by pdbbind #_3g5k_ligand = PDB() #_3g5k_ligand_pdb = os.path.join(data_dir(), "3g5k_ligand_hyd.pdb") #_3g5k_ligand_pdbqt = os.path.join(data_dir(), "3g5k_ligand_hyd.pdbqt") #_3g5k_ligand.load_from_files(_3g5k_ligand_pdb, _3g5k_ligand_pdbqt) #### 3str comes from PDBBind-CN #_3str_protein = PDB() #_3str_protein_pdb = os.path.join(data_dir(), "3str_protein_hyd.pdb") #_3str_protein_pdbqt = os.path.join(data_dir(), "3str_protein_hyd.pdbqt") #_3str_protein.load_from_files(_3str_protein_pdb, _3str_protein_pdbqt) ## The ligand is also specified by pdbbind #_3str_ligand = PDB() #_3str_ligand_pdb = os.path.join(data_dir(), "3str_ligand_hyd.pdb") #_3str_ligand_pdbqt = os.path.join(data_dir(), "3str_ligand_hyd.pdbqt") #_3str_ligand.load_from_files(_3str_ligand_pdb, _3str_ligand_pdbqt) #### 1nu3 comes from PDBBind-CN #_1nu3_protein = PDB() #_1nu3_protein_pdb = os.path.join(data_dir(), "1nu3_protein_hyd.pdb") #_1nu3_protein_pdbqt = os.path.join(data_dir(), "1nu3_protein_hyd.pdbqt") #_1nu3_protein.load_from_files(_1nu3_protein_pdb, _1nu3_protein_pdbqt) ## The ligand is also specified by pdbbind #_1nu3_ligand = PDB() #_1nu3_ligand_pdb = os.path.join(data_dir(), "1nu3_ligand_hyd.pdb") #_1nu3_ligand_pdbqt = os.path.join(data_dir(), "1nu3_ligand_hyd.pdbqt") #_1nu3_ligand.load_from_files(_1nu3_ligand_pdb, _1nu3_ligand_pdbqt) #### 2rio comes from PDBBind-CN #_2rio_protein = PDB() #_2rio_protein_pdb = os.path.join(data_dir(), "2rio_protein_hyd.pdb") #_2rio_protein_pdbqt = os.path.join(data_dir(), "2rio_protein_hyd.pdbqt") #_2rio_protein.load_from_files(_2rio_protein_pdb, _2rio_protein_pdbqt) ## The ligand is also specified by pdbbind #_2rio_ligand = PDB() #_2rio_ligand_pdb = os.path.join(data_dir(), "2rio_ligand_hyd.pdb") #_2rio_ligand_pdbqt = os.path.join(data_dir(), "2rio_ligand_hyd.pdbqt") #_2rio_ligand.load_from_files(_2rio_ligand_pdb, _2rio_ligand_pdbqt) #### 2y2h comes from PDBBind-CN #_2y2h_protein = PDB() #_2y2h_protein_pdb = os.path.join(data_dir(), "2y2h_protein_hyd.pdb") #_2y2h_protein_pdbqt = os.path.join(data_dir(), "2y2h_protein_hyd.pdbqt") #_2y2h_protein.load_from_files(_2y2h_protein_pdb, _2y2h_protein_pdbqt) ## The ligand is also specified by pdbbind #_2y2h_ligand = PDB() #_2y2h_ligand_pdb = os.path.join(data_dir(), "2y2h_ligand_hyd.pdb") #_2y2h_ligand_pdbqt = os.path.join(data_dir(), "2y2h_ligand_hyd.pdbqt") #_2y2h_ligand.load_from_files(_2y2h_ligand_pdb, _2y2h_ligand_pdbqt) #### 1pi5 comes from PDBBind-CN #_1pi5_protein = PDB() #_1pi5_protein_pdb = os.path.join(data_dir(), "1pi5_protein_hyd.pdb") #_1pi5_protein_pdbqt = os.path.join(data_dir(), "1pi5_protein_hyd.pdbqt") #_1pi5_protein.load_from_files(_1pi5_protein_pdb, _1pi5_protein_pdbqt) ## The ligand is also specified by pdbbind #_1pi5_ligand = PDB() #_1pi5_ligand_pdb = os.path.join(data_dir(), "1pi5_ligand_hyd.pdb") #_1pi5_ligand_pdbqt = os.path.join(data_dir(), "1pi5_ligand_hyd.pdbqt") #_1pi5_ligand.load_from_files(_1pi5_ligand_pdb, _1pi5_ligand_pdbqt) #### 3fxz comes from PDBBind-CN #_3fxz_protein = PDB() #_3fxz_protein_pdb = os.path.join(data_dir(), "3fxz_protein_hyd.pdb") #_3fxz_protein_pdbqt = os.path.join(data_dir(), "3fxz_protein_hyd.pdbqt") #_3fxz_protein.load_from_files(_3fxz_protein_pdb, _3fxz_protein_pdbqt) ## The ligand is also specified by pdbbind #_3fxz_ligand = PDB() #_3fxz_ligand_pdb = os.path.join(data_dir(), "3fxz_ligand_hyd.pdb") #_3fxz_ligand_pdbqt = os.path.join(data_dir(), "3fxz_ligand_hyd.pdbqt") #_3fxz_ligand.load_from_files(_3fxz_ligand_pdb, _3fxz_ligand_pdbqt) #### 4i60 comes from PDBBind-CN #_4i60_protein = PDB() #_4i60_protein_pdb = os.path.join(data_dir(), "4i60_protein_hyd.pdb") #_4i60_protein_pdbqt = os.path.join(data_dir(), "4i60_protein_hyd.pdbqt") #_4i60_protein.load_from_files(_4i60_protein_pdb, _4i60_protein_pdbqt) ## The ligand is also specified by pdbbind #_4i60_ligand = PDB() #_4i60_ligand_pdb = os.path.join(data_dir(), "4i60_ligand_hyd.pdb") #_4i60_ligand_pdbqt = os.path.join(data_dir(), "4i60_ligand_hyd.pdbqt") #_4i60_ligand.load_from_files(_4i60_ligand_pdb, _4i60_ligand_pdbqt) #### 1hyv comes from PDBBind-CN #_1hyv_protein = PDB() #_1hyv_protein_pdb = os.path.join(data_dir(), "1hyv_protein_hyd.pdb") #_1hyv_protein_pdbqt = os.path.join(data_dir(), "1hyv_protein_hyd.pdbqt") #_1hyv_protein.load_from_files(_1hyv_protein_pdb, _1hyv_protein_pdbqt) ## The ligand is also specified by pdbbind #_1hyv_ligand = PDB() #_1hyv_ligand_pdb = os.path.join(data_dir(), "1hyv_ligand_hyd.pdb") #_1hyv_ligand_pdbqt = os.path.join(data_dir(), "1hyv_ligand_hyd.pdbqt") #_1hyv_ligand.load_from_files(_1hyv_ligand_pdb, _1hyv_ligand_pdbqt) #### 3m1j comes from PDBBind-CN #_3m1j_protein = PDB() #_3m1j_protein_pdb = os.path.join(data_dir(), "3m1j_protein_hyd.pdb") #_3m1j_protein_pdbqt = os.path.join(data_dir(), "3m1j_protein_hyd.pdbqt") #_3m1j_protein.load_from_files(_3m1j_protein_pdb, _3m1j_protein_pdbqt) ## The ligand is also specified by pdbbind #_3m1j_ligand = PDB() #_3m1j_ligand_pdb = os.path.join(data_dir(), "3m1j_ligand_hyd.pdb") #_3m1j_ligand_pdbqt = os.path.join(data_dir(), "3m1j_ligand_hyd.pdbqt") #_3m1j_ligand.load_from_files(_3m1j_ligand_pdb, _3m1j_ligand_pdbqt) #### 1y3g comes from PDBBind-CN #_1y3g_protein = PDB() #_1y3g_protein_pdb = os.path.join(data_dir(), "1y3g_protein_hyd.pdb") #_1y3g_protein_pdbqt = os.path.join(data_dir(), "1y3g_protein_hyd.pdbqt") #_1y3g_protein.load_from_files(_1y3g_protein_pdb, _1y3g_protein_pdbqt) ## The ligand is also specified by pdbbind #_1y3g_ligand = PDB() #_1y3g_ligand_pdb = os.path.join(data_dir(), "1y3g_ligand_hyd.pdb") #_1y3g_ligand_pdbqt = os.path.join(data_dir(), "1y3g_ligand_hyd.pdbqt") #_1y3g_ligand.load_from_files(_1y3g_ligand_pdb, _1y3g_ligand_pdbqt) #### 6rsa comes from PDBBind-CN #_6rsa_protein = PDB() #_6rsa_protein_pdb = os.path.join(data_dir(), "6rsa_protein_hyd.pdb") #_6rsa_protein_pdbqt = os.path.join(data_dir(), "6rsa_protein_hyd.pdbqt") #_6rsa_protein.load_from_files(_6rsa_protein_pdb, _6rsa_protein_pdbqt) ## The ligand is also specified by pdbbind #_6rsa_ligand = PDB() #_6rsa_ligand_pdb = os.path.join(data_dir(), "6rsa_ligand_hyd.pdb") #_6rsa_ligand_pdbqt = os.path.join(data_dir(), "6rsa_ligand_hyd.pdbqt") #_6rsa_ligand.load_from_files(_6rsa_ligand_pdb, _6rsa_ligand_pdbqt) #### 1lvk comes from PDBBind-CN #_1lvk_protein = PDB() #_1lvk_protein_pdb = os.path.join(data_dir(), "1lvk_protein_hyd.pdb") #_1lvk_protein_pdbqt = os.path.join(data_dir(), "1lvk_protein_hyd.pdbqt") #_1lvk_protein.load_from_files(_1lvk_protein_pdb, _1lvk_protein_pdbqt) ## The ligand is also specified by pdbbind #_1lvk_ligand = PDB() #_1lvk_ligand_pdb = os.path.join(data_dir(), "1lvk_ligand_hyd.pdb") #_1lvk_ligand_pdbqt = os.path.join(data_dir(), "1lvk_ligand_hyd.pdbqt") #_1lvk_ligand.load_from_files(_1lvk_ligand_pdb, _1lvk_ligand_pdbqt) #### 3rj7 comes from PDBBind-CN #_3rj7_protein = PDB() #_3rj7_protein_pdb = os.path.join(data_dir(), "3rj7_protein_hyd.pdb") #_3rj7_protein_pdbqt = os.path.join(data_dir(), "3rj7_protein_hyd.pdbqt") #_3rj7_protein.load_from_files(_3rj7_protein_pdb, _3rj7_protein_pdbqt) ## The ligand is also specified by pdbbind #_3rj7_ligand = PDB() #_3rj7_ligand_pdb = os.path.join(data_dir(), "3rj7_ligand_hyd.pdb") #_3rj7_ligand_pdbqt = os.path.join(data_dir(), "3rj7_ligand_hyd.pdbqt") #_3rj7_ligand.load_from_files(_3rj7_ligand_pdb, _3rj7_ligand_pdbqt) ### 3zp9 comes from PDBBind-CN _3zp9_protein = PDB() _3zp9_protein_pdb = os.path.join(data_dir(), "3zp9_protein_hyd.pdb") _3zp9_protein_pdbqt = os.path.join(data_dir(), "3zp9_protein_hyd.pdbqt") _3zp9_protein.load_from_files(_3zp9_protein_pdb, _3zp9_protein_pdbqt) # The ligand is also specified by pdbbind _3zp9_ligand = PDB() _3zp9_ligand_pdb = os.path.join(data_dir(), "3zp9_ligand_hyd.pdb") _3zp9_ligand_pdbqt = os.path.join(data_dir(), "3zp9_ligand_hyd.pdbqt") _3zp9_ligand.load_from_files(_3zp9_ligand_pdb, _3zp9_ligand_pdbqt) ### 3bwf comes from PDBBind-CN _3bwf_protein = PDB() _3bwf_protein_pdb = os.path.join(data_dir(), "3bwf_protein_hyd.pdb") _3bwf_protein_pdbqt = os.path.join(data_dir(), "3bwf_protein_hyd.pdbqt") _3bwf_protein.load_from_files(_3bwf_protein_pdb, _3bwf_protein_pdbqt) # The ligand is also specified by pdbbind _3bwf_ligand = PDB() _3bwf_ligand_pdb = os.path.join(data_dir(), "3bwf_ligand_hyd.pdb") _3bwf_ligand_pdbqt = os.path.join(data_dir(), "3bwf_ligand_hyd.pdbqt") _3bwf_ligand.load_from_files(_3bwf_ligand_pdb, _3bwf_ligand_pdbqt) #self.test_cases = [("prgr", prgr_receptor, prgr_active), # ("cabl", cabl_receptor, cabl_active), # ("1zea", _1zea_protein, _1zea_ligand), # ("1r5y", _1r5y_protein, _1r5y_ligand), # ("3ao4", _3ao4_protein, _3ao4_ligand), # ("2jdm", _2jdm_protein, _2jdm_ligand), # ("3g5k", _3g5k_protein, _3g5k_ligand), # ("3str", _3str_protein, _3str_ligand), # ("2rio", _2rio_protein, _2rio_ligand)] self.test_cases = [("3bwf", _3bwf_protein, _3bwf_ligand)]
class TestBinana(unittest.TestCase): """ Test Binana Binding Pose Featurizer. """ def setUp(self): """ Instantiate local copy of Binana object. """ self.binana = Binana() # TODO(rbharath): This load sequence is getting unwieldy. Refactor. #### PRGR is from the DUD-E collection #prgr_receptor = PDB() #prgr_pdb = os.path.join(data_dir(), "prgr_hyd.pdb") #prgr_pdbqt = os.path.join(data_dir(), "prgr_hyd.pdbqt") #prgr_receptor.load_from_files(prgr_pdb, prgr_pdbqt) ## This compound is CHEMBL1164248 #prgr_active = PDB() #prgr_active_pdb = os.path.join(data_dir(), "prgr_active0_hyd.pdb") #prgr_active_pdbqt = os.path.join(data_dir(), "prgr_active0_hyd.pdbqt") #prgr_active.load_from_files(prgr_active_pdb, prgr_active_pdbqt) #### c-Abl is taken from the Autodock Vina examples #cabl_receptor = PDB() #cabl_receptor_pdb = os.path.join(data_dir(), "c-Abl_hyd.pdb") #cabl_receptor_pdbqt = os.path.join(data_dir(), "c-Abl_hyd.pdbqt") #cabl_receptor.load_from_files(cabl_receptor_pdb, # cabl_receptor_pdbqt) ## This compound is imatinib #cabl_active = PDB() #cabl_active_pdb = os.path.join(data_dir(), "imatinib_hyd.pdb") #cabl_active_pdbqt = os.path.join(data_dir(), "imatinib_hyd.pdbqt") #cabl_active.load_from_files(cabl_active_pdb, # cabl_active_pdbqt) #### 1zea comes from PDBBind-CN ## Python complains about variables starting with numbers, so put an ## underscore in front of everything. #_1zea_protein = PDB() #_1zea_protein_pdb = os.path.join(data_dir(), "1zea_protein_hyd.pdb") #_1zea_protein_pdbqt = os.path.join(data_dir(), "1zea_protein_hyd.pdbqt") #_1zea_protein.load_from_files(_1zea_protein_pdb, _1zea_protein_pdbqt) ## The ligand is also specified by pdbbind #_1zea_ligand = PDB() #_1zea_ligand_pdb = os.path.join(data_dir(), "1zea_ligand_hyd.pdb") #_1zea_ligand_pdbqt = os.path.join(data_dir(), "1zea_ligand_hyd.pdbqt") #_1zea_ligand.load_from_files(_1zea_ligand_pdb, _1zea_ligand_pdbqt) #### 1r5y comes from PDBBind-CN #_1r5y_protein = PDB() #_1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb") #_1r5y_protein_pdbqt = os.path.join(data_dir(), "1r5y_protein_hyd.pdbqt") #_1r5y_protein.load_from_files(_1r5y_protein_pdb, _1r5y_protein_pdbqt) ## The ligand is also specified by pdbbind #_1r5y_ligand = PDB() #_1r5y_ligand_pdb = os.path.join(data_dir(), "1r5y_ligand_hyd.pdb") #_1r5y_ligand_pdbqt = os.path.join(data_dir(), "1r5y_ligand_hyd.pdbqt") #_1r5y_ligand.load_from_files(_1r5y_ligand_pdb, _1r5y_ligand_pdbqt) #### 3ao4 comes from PDBBind-CN #_3ao4_protein = PDB() #_3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb") #_3ao4_protein_pdbqt = os.path.join(data_dir(), "3ao4_protein_hyd.pdbqt") #_3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt) ## The ligand is also specified by pdbbind #_3ao4_ligand = PDB() #_3ao4_ligand_pdb = os.path.join(data_dir(), "3ao4_ligand_hyd.pdb") #_3ao4_ligand_pdbqt = os.path.join(data_dir(), "3ao4_ligand_hyd.pdbqt") #_3ao4_ligand.load_from_files(_3ao4_ligand_pdb, _3ao4_ligand_pdbqt) #### 2jdm comes from PDBBind-CN #_2jdm_protein = PDB() #_2jdm_protein_pdb = os.path.join(data_dir(), "2jdm_protein_hyd.pdb") #_2jdm_protein_pdbqt = os.path.join(data_dir(), "2jdm_protein_hyd.pdbqt") #_2jdm_protein.load_from_files(_2jdm_protein_pdb, _2jdm_protein_pdbqt) ## The ligand is also specified by pdbbind #_2jdm_ligand = PDB() #_2jdm_ligand_pdb = os.path.join(data_dir(), "2jdm_ligand_hyd.pdb") #_2jdm_ligand_pdbqt = os.path.join(data_dir(), "2jdm_ligand_hyd.pdbqt") #_2jdm_ligand.load_from_files(_2jdm_ligand_pdb, _2jdm_ligand_pdbqt) #### 3g5k comes from PDBBind-CN #_3g5k_protein = PDB() #_3g5k_protein_pdb = os.path.join(data_dir(), "3g5k_protein_hyd.pdb") #_3g5k_protein_pdbqt = os.path.join(data_dir(), "3g5k_protein_hyd.pdbqt") #_3g5k_protein.load_from_files(_3g5k_protein_pdb, _3g5k_protein_pdbqt) ## The ligand is also specified by pdbbind #_3g5k_ligand = PDB() #_3g5k_ligand_pdb = os.path.join(data_dir(), "3g5k_ligand_hyd.pdb") #_3g5k_ligand_pdbqt = os.path.join(data_dir(), "3g5k_ligand_hyd.pdbqt") #_3g5k_ligand.load_from_files(_3g5k_ligand_pdb, _3g5k_ligand_pdbqt) #### 3str comes from PDBBind-CN #_3str_protein = PDB() #_3str_protein_pdb = os.path.join(data_dir(), "3str_protein_hyd.pdb") #_3str_protein_pdbqt = os.path.join(data_dir(), "3str_protein_hyd.pdbqt") #_3str_protein.load_from_files(_3str_protein_pdb, _3str_protein_pdbqt) ## The ligand is also specified by pdbbind #_3str_ligand = PDB() #_3str_ligand_pdb = os.path.join(data_dir(), "3str_ligand_hyd.pdb") #_3str_ligand_pdbqt = os.path.join(data_dir(), "3str_ligand_hyd.pdbqt") #_3str_ligand.load_from_files(_3str_ligand_pdb, _3str_ligand_pdbqt) #### 1nu3 comes from PDBBind-CN #_1nu3_protein = PDB() #_1nu3_protein_pdb = os.path.join(data_dir(), "1nu3_protein_hyd.pdb") #_1nu3_protein_pdbqt = os.path.join(data_dir(), "1nu3_protein_hyd.pdbqt") #_1nu3_protein.load_from_files(_1nu3_protein_pdb, _1nu3_protein_pdbqt) ## The ligand is also specified by pdbbind #_1nu3_ligand = PDB() #_1nu3_ligand_pdb = os.path.join(data_dir(), "1nu3_ligand_hyd.pdb") #_1nu3_ligand_pdbqt = os.path.join(data_dir(), "1nu3_ligand_hyd.pdbqt") #_1nu3_ligand.load_from_files(_1nu3_ligand_pdb, _1nu3_ligand_pdbqt) #### 2rio comes from PDBBind-CN #_2rio_protein = PDB() #_2rio_protein_pdb = os.path.join(data_dir(), "2rio_protein_hyd.pdb") #_2rio_protein_pdbqt = os.path.join(data_dir(), "2rio_protein_hyd.pdbqt") #_2rio_protein.load_from_files(_2rio_protein_pdb, _2rio_protein_pdbqt) ## The ligand is also specified by pdbbind #_2rio_ligand = PDB() #_2rio_ligand_pdb = os.path.join(data_dir(), "2rio_ligand_hyd.pdb") #_2rio_ligand_pdbqt = os.path.join(data_dir(), "2rio_ligand_hyd.pdbqt") #_2rio_ligand.load_from_files(_2rio_ligand_pdb, _2rio_ligand_pdbqt) #### 2y2h comes from PDBBind-CN #_2y2h_protein = PDB() #_2y2h_protein_pdb = os.path.join(data_dir(), "2y2h_protein_hyd.pdb") #_2y2h_protein_pdbqt = os.path.join(data_dir(), "2y2h_protein_hyd.pdbqt") #_2y2h_protein.load_from_files(_2y2h_protein_pdb, _2y2h_protein_pdbqt) ## The ligand is also specified by pdbbind #_2y2h_ligand = PDB() #_2y2h_ligand_pdb = os.path.join(data_dir(), "2y2h_ligand_hyd.pdb") #_2y2h_ligand_pdbqt = os.path.join(data_dir(), "2y2h_ligand_hyd.pdbqt") #_2y2h_ligand.load_from_files(_2y2h_ligand_pdb, _2y2h_ligand_pdbqt) #### 1pi5 comes from PDBBind-CN #_1pi5_protein = PDB() #_1pi5_protein_pdb = os.path.join(data_dir(), "1pi5_protein_hyd.pdb") #_1pi5_protein_pdbqt = os.path.join(data_dir(), "1pi5_protein_hyd.pdbqt") #_1pi5_protein.load_from_files(_1pi5_protein_pdb, _1pi5_protein_pdbqt) ## The ligand is also specified by pdbbind #_1pi5_ligand = PDB() #_1pi5_ligand_pdb = os.path.join(data_dir(), "1pi5_ligand_hyd.pdb") #_1pi5_ligand_pdbqt = os.path.join(data_dir(), "1pi5_ligand_hyd.pdbqt") #_1pi5_ligand.load_from_files(_1pi5_ligand_pdb, _1pi5_ligand_pdbqt) #### 3fxz comes from PDBBind-CN #_3fxz_protein = PDB() #_3fxz_protein_pdb = os.path.join(data_dir(), "3fxz_protein_hyd.pdb") #_3fxz_protein_pdbqt = os.path.join(data_dir(), "3fxz_protein_hyd.pdbqt") #_3fxz_protein.load_from_files(_3fxz_protein_pdb, _3fxz_protein_pdbqt) ## The ligand is also specified by pdbbind #_3fxz_ligand = PDB() #_3fxz_ligand_pdb = os.path.join(data_dir(), "3fxz_ligand_hyd.pdb") #_3fxz_ligand_pdbqt = os.path.join(data_dir(), "3fxz_ligand_hyd.pdbqt") #_3fxz_ligand.load_from_files(_3fxz_ligand_pdb, _3fxz_ligand_pdbqt) #### 4i60 comes from PDBBind-CN #_4i60_protein = PDB() #_4i60_protein_pdb = os.path.join(data_dir(), "4i60_protein_hyd.pdb") #_4i60_protein_pdbqt = os.path.join(data_dir(), "4i60_protein_hyd.pdbqt") #_4i60_protein.load_from_files(_4i60_protein_pdb, _4i60_protein_pdbqt) ## The ligand is also specified by pdbbind #_4i60_ligand = PDB() #_4i60_ligand_pdb = os.path.join(data_dir(), "4i60_ligand_hyd.pdb") #_4i60_ligand_pdbqt = os.path.join(data_dir(), "4i60_ligand_hyd.pdbqt") #_4i60_ligand.load_from_files(_4i60_ligand_pdb, _4i60_ligand_pdbqt) #### 1hyv comes from PDBBind-CN #_1hyv_protein = PDB() #_1hyv_protein_pdb = os.path.join(data_dir(), "1hyv_protein_hyd.pdb") #_1hyv_protein_pdbqt = os.path.join(data_dir(), "1hyv_protein_hyd.pdbqt") #_1hyv_protein.load_from_files(_1hyv_protein_pdb, _1hyv_protein_pdbqt) ## The ligand is also specified by pdbbind #_1hyv_ligand = PDB() #_1hyv_ligand_pdb = os.path.join(data_dir(), "1hyv_ligand_hyd.pdb") #_1hyv_ligand_pdbqt = os.path.join(data_dir(), "1hyv_ligand_hyd.pdbqt") #_1hyv_ligand.load_from_files(_1hyv_ligand_pdb, _1hyv_ligand_pdbqt) #### 3m1j comes from PDBBind-CN #_3m1j_protein = PDB() #_3m1j_protein_pdb = os.path.join(data_dir(), "3m1j_protein_hyd.pdb") #_3m1j_protein_pdbqt = os.path.join(data_dir(), "3m1j_protein_hyd.pdbqt") #_3m1j_protein.load_from_files(_3m1j_protein_pdb, _3m1j_protein_pdbqt) ## The ligand is also specified by pdbbind #_3m1j_ligand = PDB() #_3m1j_ligand_pdb = os.path.join(data_dir(), "3m1j_ligand_hyd.pdb") #_3m1j_ligand_pdbqt = os.path.join(data_dir(), "3m1j_ligand_hyd.pdbqt") #_3m1j_ligand.load_from_files(_3m1j_ligand_pdb, _3m1j_ligand_pdbqt) #### 1y3g comes from PDBBind-CN #_1y3g_protein = PDB() #_1y3g_protein_pdb = os.path.join(data_dir(), "1y3g_protein_hyd.pdb") #_1y3g_protein_pdbqt = os.path.join(data_dir(), "1y3g_protein_hyd.pdbqt") #_1y3g_protein.load_from_files(_1y3g_protein_pdb, _1y3g_protein_pdbqt) ## The ligand is also specified by pdbbind #_1y3g_ligand = PDB() #_1y3g_ligand_pdb = os.path.join(data_dir(), "1y3g_ligand_hyd.pdb") #_1y3g_ligand_pdbqt = os.path.join(data_dir(), "1y3g_ligand_hyd.pdbqt") #_1y3g_ligand.load_from_files(_1y3g_ligand_pdb, _1y3g_ligand_pdbqt) #### 6rsa comes from PDBBind-CN #_6rsa_protein = PDB() #_6rsa_protein_pdb = os.path.join(data_dir(), "6rsa_protein_hyd.pdb") #_6rsa_protein_pdbqt = os.path.join(data_dir(), "6rsa_protein_hyd.pdbqt") #_6rsa_protein.load_from_files(_6rsa_protein_pdb, _6rsa_protein_pdbqt) ## The ligand is also specified by pdbbind #_6rsa_ligand = PDB() #_6rsa_ligand_pdb = os.path.join(data_dir(), "6rsa_ligand_hyd.pdb") #_6rsa_ligand_pdbqt = os.path.join(data_dir(), "6rsa_ligand_hyd.pdbqt") #_6rsa_ligand.load_from_files(_6rsa_ligand_pdb, _6rsa_ligand_pdbqt) #### 1lvk comes from PDBBind-CN #_1lvk_protein = PDB() #_1lvk_protein_pdb = os.path.join(data_dir(), "1lvk_protein_hyd.pdb") #_1lvk_protein_pdbqt = os.path.join(data_dir(), "1lvk_protein_hyd.pdbqt") #_1lvk_protein.load_from_files(_1lvk_protein_pdb, _1lvk_protein_pdbqt) ## The ligand is also specified by pdbbind #_1lvk_ligand = PDB() #_1lvk_ligand_pdb = os.path.join(data_dir(), "1lvk_ligand_hyd.pdb") #_1lvk_ligand_pdbqt = os.path.join(data_dir(), "1lvk_ligand_hyd.pdbqt") #_1lvk_ligand.load_from_files(_1lvk_ligand_pdb, _1lvk_ligand_pdbqt) #### 3rj7 comes from PDBBind-CN #_3rj7_protein = PDB() #_3rj7_protein_pdb = os.path.join(data_dir(), "3rj7_protein_hyd.pdb") #_3rj7_protein_pdbqt = os.path.join(data_dir(), "3rj7_protein_hyd.pdbqt") #_3rj7_protein.load_from_files(_3rj7_protein_pdb, _3rj7_protein_pdbqt) ## The ligand is also specified by pdbbind #_3rj7_ligand = PDB() #_3rj7_ligand_pdb = os.path.join(data_dir(), "3rj7_ligand_hyd.pdb") #_3rj7_ligand_pdbqt = os.path.join(data_dir(), "3rj7_ligand_hyd.pdbqt") #_3rj7_ligand.load_from_files(_3rj7_ligand_pdb, _3rj7_ligand_pdbqt) ### 3zp9 comes from PDBBind-CN _3zp9_protein = PDB() _3zp9_protein_pdb = os.path.join(data_dir(), "3zp9_protein_hyd.pdb") _3zp9_protein_pdbqt = os.path.join(data_dir(), "3zp9_protein_hyd.pdbqt") _3zp9_protein.load_from_files(_3zp9_protein_pdb, _3zp9_protein_pdbqt) # The ligand is also specified by pdbbind _3zp9_ligand = PDB() _3zp9_ligand_pdb = os.path.join(data_dir(), "3zp9_ligand_hyd.pdb") _3zp9_ligand_pdbqt = os.path.join(data_dir(), "3zp9_ligand_hyd.pdbqt") _3zp9_ligand.load_from_files(_3zp9_ligand_pdb, _3zp9_ligand_pdbqt) ### 3bwf comes from PDBBind-CN _3bwf_protein = PDB() _3bwf_protein_pdb = os.path.join(data_dir(), "3bwf_protein_hyd.pdb") _3bwf_protein_pdbqt = os.path.join(data_dir(), "3bwf_protein_hyd.pdbqt") _3bwf_protein.load_from_files(_3bwf_protein_pdb, _3bwf_protein_pdbqt) # The ligand is also specified by pdbbind _3bwf_ligand = PDB() _3bwf_ligand_pdb = os.path.join(data_dir(), "3bwf_ligand_hyd.pdb") _3bwf_ligand_pdbqt = os.path.join(data_dir(), "3bwf_ligand_hyd.pdbqt") _3bwf_ligand.load_from_files(_3bwf_ligand_pdb, _3bwf_ligand_pdbqt) #self.test_cases = [("prgr", prgr_receptor, prgr_active), # ("cabl", cabl_receptor, cabl_active), # ("1zea", _1zea_protein, _1zea_ligand), # ("1r5y", _1r5y_protein, _1r5y_ligand), # ("3ao4", _3ao4_protein, _3ao4_ligand), # ("2jdm", _2jdm_protein, _2jdm_ligand), # ("3g5k", _3g5k_protein, _3g5k_ligand), # ("3str", _3str_protein, _3str_ligand), # ("2rio", _2rio_protein, _2rio_ligand)] self.test_cases = [("3bwf", _3bwf_protein, _3bwf_ligand)] def test_compute_hydrophobic(self): """ TestBinana: Test that hydrophobic contacts are established. """ hydrophobics_dict = {} for name, protein, ligand in self.test_cases: hydrophobics_dict[name] = compute_hydrophobic_contacts( ligand, protein) for name, hydrophobics in hydrophobics_dict.iteritems(): print "Processing hydrohobics for %s" % name assert len(hydrophobics) == 6 assert "BACKBONE_ALPHA" in hydrophobics assert "BACKBONE_BETA" in hydrophobics assert "BACKBONE_OTHER" in hydrophobics assert "SIDECHAIN_ALPHA" in hydrophobics assert "SIDECHAIN_BETA" in hydrophobics assert "SIDECHAIN_OTHER" in hydrophobics def test_compute_electrostatics(self): """ TestBinana: Test that electrostatic energies are computed. """ electrostatics_dict = {} for name, protein, ligand in self.test_cases: electrostatics_dict[name] = compute_electrostatic_energy( ligand, protein) for name, electrostatics in electrostatics_dict.iteritems(): print "Processing electrostatics for %s" % name # The keys of these dicts are pairs of atomtypes, but the keys are # sorted so that ("C", "O") is always written as "C_O". Thus, for N # atom types, there are N*(N+1)/2 unique pairs. num_atoms = len(Binana.atom_types) assert len(electrostatics) == num_atoms*(num_atoms+1)/2 # TODO(rbharath): Charges are not computed correctly for certain # ligands! (see 2y2h_ligand). Understand why this happens. #assert np.count_nonzero(np.array(electrostatics.values())) > 0 def test_compute_flexibility(self): """ TestBinana: Gather statistics about active site protein atoms. """ active_site_dict = {} for name, protein, ligand in self.test_cases: active_site_dict[name] = compute_active_site_flexibility( ligand, protein) for name, active_site_flexibility in active_site_dict.iteritems(): print "Processing active site flexibility for %s" % name assert len(active_site_flexibility.keys()) == 6 assert "BACKBONE_ALPHA" in active_site_flexibility assert "BACKBONE_BETA" in active_site_flexibility assert "BACKBONE_OTHER" in active_site_flexibility assert "SIDECHAIN_ALPHA" in active_site_flexibility assert "SIDECHAIN_BETA" in active_site_flexibility assert "SIDECHAIN_OTHER" in active_site_flexibility def test_compute_hydrogen_bonds(self): """ TestBinana: Compute the number of hydrogen bonds. TODO(rbharath): The hydrogen-bond angle cutoff seems like it's incorrect to me. The hydrogens are placed by openbabel and aren't optimized, so I'm pretty sure that this code will miss many hydrogen bonds. Here are some options: -) Find a method to optimize the hydrogen placement. -) Place a more permissive angle cutoff for hydrogens. -) Allow for "buckets": angles 0-20, 20-40, 40-60, etc. and count the number of hydrogen bonds in each bucket. """ hbonds_dict = {} for name, protein, ligand in self.test_cases: hbonds_dict[name] = compute_hydrogen_bonds( ligand, protein) for name, hbonds in hbonds_dict.iteritems(): print "Processing hydrogen bonds for %s" % name assert len(hbonds) == 12 assert "HDONOR-LIGAND_BACKBONE_ALPHA" in hbonds assert "HDONOR-LIGAND_BACKBONE_BETA" in hbonds assert "HDONOR-LIGAND_BACKBONE_OTHER" in hbonds assert "HDONOR-LIGAND_SIDECHAIN_ALPHA" in hbonds assert "HDONOR-LIGAND_SIDECHAIN_BETA" in hbonds assert "HDONOR-LIGAND_SIDECHAIN_OTHER" in hbonds assert "HDONOR-RECEPTOR_BACKBONE_ALPHA" in hbonds assert "HDONOR-RECEPTOR_BACKBONE_BETA" in hbonds assert "HDONOR-RECEPTOR_BACKBONE_OTHER" in hbonds assert "HDONOR-RECEPTOR_SIDECHAIN_ALPHA" in hbonds assert "HDONOR-RECEPTOR_SIDECHAIN_BETA" in hbonds assert "HDONOR-RECEPTOR_SIDECHAIN_OTHER" in hbonds def test_compute_ligand_atom_counts(self): """ TestBinana: Compute the Number of Ligand Atom Counts. """ counts_dict = {} for name, _, ligand in self.test_cases: counts_dict[name] = compute_ligand_atom_counts( ligand) for name, counts in counts_dict.iteritems(): print "Processing ligand atom counts for %s" % name # TODO(rbharath): This code is useful for debugging. Remove once # codebase is stable enough. #for key in Binana.atom_types: # if key in counts: # del counts[key] #print "Residual counts:" #print counts assert len(counts) == len(Binana.atom_types) def test_compute_contacts(self): """ TestBinana: Compute contacts between Ligand and receptor. """ contacts_dict = {} for name, protein, ligand in self.test_cases: contacts_dict[name] = compute_contacts( ligand, protein) num_atoms = len(Binana.atom_types) for name, (close_contacts, contacts) in contacts_dict.iteritems(): print "Processing contacts for %s" % name print "close_contacts" for key, val in close_contacts.iteritems(): if val != 0: print (key, val) print "len(close_contacts): " + str(len(close_contacts)) print "contacts" for key, val in contacts.iteritems(): if val != 0: print (key, val) print "len(contacts): " + str(len(contacts)) print "Desired Number: " + str(num_atoms*(num_atoms+1)/2) # TODO(rbharath): The following code has proved very useful for # debugging. Remove once the code is stable enough that it's not # required. #if name == '1pi5': # for first, second in itertools.product(Binana.atom_types, # Binana.atom_types): # key = "_".join(sorted([first, second])) # if key in close_contacts: # del close_contacts[key] # if key in contacts: # del contacts[key] # print "Residuals close_contacts:" # print close_contacts # print "Residuals contacts:" # print contacts assert len(close_contacts) == num_atoms*(num_atoms+1)/2 assert len(contacts) == num_atoms*(num_atoms+1)/2 def test_compute_pi_pi_stacking(self): """ TestBinana: Compute Pi-Pi Stacking. """ # 1zea is the only example that has any pi-stacking. pi_stacking_dict = {} for name, protein, ligand in self.test_cases: pi_stacking_dict[name] = compute_pi_pi_stacking( ligand, protein) for name, pi_stacking in pi_stacking_dict.iteritems(): print "Processing pi-stacking for %s" % name assert len(pi_stacking) == 3 print pi_stacking assert "STACKING_ALPHA" in pi_stacking assert "STACKING_BETA" in pi_stacking assert "STACKING_OTHER" in pi_stacking def test_compute_pi_t(self): """ TestBinana: Compute Pi-T Interactions. TODO(rbharath): I believe that the imatininb-cabl complex has a pi-T interaction. This code has a bug since it reports that no such interaction is found. """ pi_t_dict = {} for name, protein, ligand in self.test_cases: pi_t_dict[name] = compute_pi_t( ligand, protein) for name, pi_t in pi_t_dict.iteritems(): print "Processing pi-T for %s" % name assert len(pi_t) == 3 assert "T-SHAPED_ALPHA" in pi_t assert "T-SHAPED_BETA" in pi_t assert "T-SHAPED_OTHER" in pi_t def test_compute_pi_cation(self): """ TestBinana: Compute Pi-Cation Interactions. """ pi_cation_dict = {} for name, protein, ligand in self.test_cases: pi_cation_dict[name] = compute_pi_cation( ligand, protein) for name, pi_cation in pi_cation_dict.iteritems(): print "Processing pi-cation for %s" % name assert len(pi_cation) == 6 assert 'PI-CATION_LIGAND-CHARGED_ALPHA' in pi_cation assert 'PI-CATION_LIGAND-CHARGED_BETA' in pi_cation assert 'PI-CATION_LIGAND-CHARGED_OTHER' in pi_cation assert 'PI-CATION_RECEPTOR-CHARGED_ALPHA' in pi_cation assert 'PI-CATION_RECEPTOR-CHARGED_BETA' in pi_cation assert 'PI-CATION_RECEPTOR-CHARGED_OTHER' in pi_cation def test_compute_salt_bridges(self): """ TestBinana: Compute Salt Bridges. TODO(bramsundar): None of the examples contain salt-bridge interactions. Find a complex with an actual salt-bridge interaction. """ salt_bridges_dict = {} for name, protein, ligand in self.test_cases: salt_bridges_dict[name] = compute_salt_bridges( ligand, protein) for name, salt_bridges in salt_bridges_dict.iteritems(): print "Processing salt-bridges for %s" % name assert len(salt_bridges) == 3 print salt_bridges assert 'SALT-BRIDGE_ALPHA' in salt_bridges assert 'SALT-BRIDGE_BETA' in salt_bridges assert 'SALT-BRIDGE_OTHER' in salt_bridges def test_compute_input_vector(self): """ TestBinana: Compute Input Vector. """ features_dict = {} for name, protein, ligand in self.test_cases: features_dict[name] = self.binana.compute_input_vector( ligand, protein) num_atoms = len(Binana.atom_types) # Lengths: # ligand_receptor_close_contacts: N*(N+1)/2 # ligand_receptor_contacts: N*(N+1)/2 # ligand_receptor_electrostatics: N*(N+1)/2 # ligand_atom_counts: N # hbonds: 12 # hydrophobics: 6 # stacking: 3 # pi_cation: 6 # t_shaped: 3 # active_site_flexibility: 6 # salt_bridges: 3 # rotatable_boonds_count: 1 total_len = (3*num_atoms*(num_atoms+1)/2 + num_atoms + 12 + 6 + 3 + 6 + 3 + 6 + 3 + 1) for name, input_vector in features_dict.iteritems(): print "Processing input-vector for %s" % name assert len(input_vector) == total_len
def setUp(self): """ Instantiate local copy of Binana object. """ self.binana = Binana() # TODO(rbharath): This load sequence is getting unwieldy. Refactor. ### PRGR is from the DUD-E collection prgr_receptor = PDB() prgr_pdb = os.path.join(data_dir(), "prgr_hyd.pdb") prgr_pdbqt = os.path.join(data_dir(), "prgr_hyd.pdbqt") prgr_receptor.load_from_files(prgr_pdb, prgr_pdbqt) # This compound is CHEMBL1164248 prgr_active = PDB() prgr_active_pdb = os.path.join(data_dir(), "prgr_active0_hyd.pdb") prgr_active_pdbqt = os.path.join(data_dir(), "prgr_active0_hyd.pdbqt") prgr_active.load_from_files(prgr_active_pdb, prgr_active_pdbqt) ### c-Abl is taken from the Autodock Vina examples cabl_receptor = PDB() cabl_receptor_pdb = os.path.join(data_dir(), "c-Abl_hyd.pdb") cabl_receptor_pdbqt = os.path.join(data_dir(), "c-Abl_hyd.pdbqt") cabl_receptor.load_from_files(cabl_receptor_pdb, cabl_receptor_pdbqt) # This compound is imatinib cabl_active = PDB() cabl_active_pdb = os.path.join(data_dir(), "imatinib_hyd.pdb") cabl_active_pdbqt = os.path.join(data_dir(), "imatinib_hyd.pdbqt") cabl_active.load_from_files(cabl_active_pdb, cabl_active_pdbqt) ### 1zea comes from PDBBind-CN # Python complains about variables starting with numbers, so put an # underscore in front of everything. _1zea_protein = PDB() _1zea_protein_pdb = os.path.join(data_dir(), "1zea_protein_hyd.pdb") _1zea_protein_pdbqt = os.path.join(data_dir(), "1zea_protein_hyd.pdbqt") _1zea_protein.load_from_files(_1zea_protein_pdb, _1zea_protein_pdbqt) # The ligand is also specified by pdbbind _1zea_ligand = PDB() _1zea_ligand_pdb = os.path.join(data_dir(), "1zea_ligand_hyd.pdb") _1zea_ligand_pdbqt = os.path.join(data_dir(), "1zea_ligand_hyd.pdbqt") _1zea_ligand.load_from_files(_1zea_ligand_pdb, _1zea_ligand_pdbqt) ### 1r5y comes from PDBBind-CN _1r5y_protein = PDB() _1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb") _1r5y_protein_pdbqt = os.path.join(data_dir(), "1r5y_protein_hyd.pdbqt") _1r5y_protein.load_from_files(_1r5y_protein_pdb, _1r5y_protein_pdbqt) # The ligand is also specified by pdbbind _1r5y_ligand = PDB() _1r5y_ligand_pdb = os.path.join(data_dir(), "1r5y_ligand_hyd.pdb") _1r5y_ligand_pdbqt = os.path.join(data_dir(), "1r5y_ligand_hyd.pdbqt") _1r5y_ligand.load_from_files(_1r5y_ligand_pdb, _1r5y_ligand_pdbqt) ### 3ao4 comes from PDBBind-CN _3ao4_protein = PDB() _3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb") _3ao4_protein_pdbqt = os.path.join(data_dir(), "3ao4_protein_hyd.pdbqt") _3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt) # The ligand is also specified by pdbbind _3ao4_ligand = PDB() _3ao4_ligand_pdb = os.path.join(data_dir(), "3ao4_ligand_hyd.pdb") _3ao4_ligand_pdbqt = os.path.join(data_dir(), "3ao4_ligand_hyd.pdbqt") _3ao4_ligand.load_from_files(_3ao4_ligand_pdb, _3ao4_ligand_pdbqt) ### 2jdm comes from PDBBind-CN _2jdm_protein = PDB() _2jdm_protein_pdb = os.path.join(data_dir(), "2jdm_protein_hyd.pdb") _2jdm_protein_pdbqt = os.path.join(data_dir(), "2jdm_protein_hyd.pdbqt") _2jdm_protein.load_from_files(_2jdm_protein_pdb, _2jdm_protein_pdbqt) # The ligand is also specified by pdbbind _2jdm_ligand = PDB() _2jdm_ligand_pdb = os.path.join(data_dir(), "2jdm_ligand_hyd.pdb") _2jdm_ligand_pdbqt = os.path.join(data_dir(), "2jdm_ligand_hyd.pdbqt") _2jdm_ligand.load_from_files(_2jdm_ligand_pdb, _2jdm_ligand_pdbqt) ### 3g5k comes from PDBBind-CN _3g5k_protein = PDB() _3g5k_protein_pdb = os.path.join(data_dir(), "3g5k_protein_hyd.pdb") _3g5k_protein_pdbqt = os.path.join(data_dir(), "3g5k_protein_hyd.pdbqt") _3g5k_protein.load_from_files(_3g5k_protein_pdb, _3g5k_protein_pdbqt) # The ligand is also specified by pdbbind _3g5k_ligand = PDB() _3g5k_ligand_pdb = os.path.join(data_dir(), "3g5k_ligand_hyd.pdb") _3g5k_ligand_pdbqt = os.path.join(data_dir(), "3g5k_ligand_hyd.pdbqt") _3g5k_ligand.load_from_files(_3g5k_ligand_pdb, _3g5k_ligand_pdbqt) ### 3str comes from PDBBind-CN _3str_protein = PDB() _3str_protein_pdb = os.path.join(data_dir(), "3str_protein_hyd.pdb") _3str_protein_pdbqt = os.path.join(data_dir(), "3str_protein_hyd.pdbqt") _3str_protein.load_from_files(_3str_protein_pdb, _3str_protein_pdbqt) # The ligand is also specified by pdbbind _3str_ligand = PDB() _3str_ligand_pdb = os.path.join(data_dir(), "3str_ligand_hyd.pdb") _3str_ligand_pdbqt = os.path.join(data_dir(), "3str_ligand_hyd.pdbqt") _3str_ligand.load_from_files(_3str_ligand_pdb, _3str_ligand_pdbqt) ### 1nu3 comes from PDBBind-CN _1nu3_protein = PDB() _1nu3_protein_pdb = os.path.join(data_dir(), "1nu3_protein_hyd.pdb") _1nu3_protein_pdbqt = os.path.join(data_dir(), "1nu3_protein_hyd.pdbqt") _1nu3_protein.load_from_files(_1nu3_protein_pdb, _1nu3_protein_pdbqt) # The ligand is also specified by pdbbind _1nu3_ligand = PDB() _1nu3_ligand_pdb = os.path.join(data_dir(), "1nu3_ligand_hyd.pdb") _1nu3_ligand_pdbqt = os.path.join(data_dir(), "1nu3_ligand_hyd.pdbqt") _1nu3_ligand.load_from_files(_1nu3_ligand_pdb, _1nu3_ligand_pdbqt) ### 2rio comes from PDBBind-CN _2rio_protein = PDB() _2rio_protein_pdb = os.path.join(data_dir(), "2rio_protein_hyd.pdb") _2rio_protein_pdbqt = os.path.join(data_dir(), "2rio_protein_hyd.pdbqt") _2rio_protein.load_from_files(_2rio_protein_pdb, _2rio_protein_pdbqt) # The ligand is also specified by pdbbind _2rio_ligand = PDB() _2rio_ligand_pdb = os.path.join(data_dir(), "2rio_ligand_hyd.pdb") _2rio_ligand_pdbqt = os.path.join(data_dir(), "2rio_ligand_hyd.pdbqt") _2rio_ligand.load_from_files(_2rio_ligand_pdb, _2rio_ligand_pdbqt) self.test_cases = [("prgr", prgr_receptor, prgr_active), ("cabl", cabl_receptor, cabl_active), ("1zea", _1zea_protein, _1zea_ligand), ("1r5y", _1r5y_protein, _1r5y_ligand), ("3ao4", _3ao4_protein, _3ao4_ligand), ("2jdm", _2jdm_protein, _2jdm_ligand), ("3g5k", _3g5k_protein, _3g5k_ligand), ("3str", _3str_protein, _3str_ligand), ("2rio", _2rio_protein, _2rio_ligand)]
class TestBinana(unittest.TestCase): """ Test Binana Binding Pose Featurizer. """ def setUp(self): """ Instantiate local copy of Binana object. """ self.binana = Binana() ### 3zp9 comes from PDBBind-CN _3zp9_protein = PDB() _3zp9_protein_pdb = os.path.join(data_dir(), "3zp9_protein_hyd.pdb") _3zp9_protein_pdbqt = os.path.join(data_dir(), "3zp9_protein_hyd.pdbqt") _3zp9_protein.load_from_files(_3zp9_protein_pdb, _3zp9_protein_pdbqt) # The ligand is also specified by pdbbind _3zp9_ligand = PDB() _3zp9_ligand_pdb = os.path.join(data_dir(), "3zp9_ligand_hyd.pdb") _3zp9_ligand_pdbqt = os.path.join(data_dir(), "3zp9_ligand_hyd.pdbqt") _3zp9_ligand.load_from_files(_3zp9_ligand_pdb, _3zp9_ligand_pdbqt) ### 3bwf comes from PDBBind-CN _3bwf_protein = PDB() _3bwf_protein_pdb = os.path.join(data_dir(), "3bwf_protein_hyd.pdb") _3bwf_protein_pdbqt = os.path.join(data_dir(), "3bwf_protein_hyd.pdbqt") _3bwf_protein.load_from_files(_3bwf_protein_pdb, _3bwf_protein_pdbqt) # The ligand is also specified by pdbbind _3bwf_ligand = PDB() _3bwf_ligand_pdb = os.path.join(data_dir(), "3bwf_ligand_hyd.pdb") _3bwf_ligand_pdbqt = os.path.join(data_dir(), "3bwf_ligand_hyd.pdbqt") _3bwf_ligand.load_from_files(_3bwf_ligand_pdb, _3bwf_ligand_pdbqt) self.test_cases = [("3bwf", _3bwf_protein, _3bwf_ligand), ("3zp9", _3zp9_protein, _3zp9_ligand)] def test_compute_hydrophobic(self): """ TestBinana: Test that hydrophobic contacts are established. """ hydrophobics_dict = {} for name, protein, ligand in self.test_cases: hydrophobics_dict[name] = compute_hydrophobic_contacts( ligand, protein) for name, hydrophobics in hydrophobics_dict.iteritems(): print "Processing hydrohobics for %s" % name assert len(hydrophobics) == 6 assert "BACKBONE_ALPHA" in hydrophobics assert "BACKBONE_BETA" in hydrophobics assert "BACKBONE_OTHER" in hydrophobics assert "SIDECHAIN_ALPHA" in hydrophobics assert "SIDECHAIN_BETA" in hydrophobics assert "SIDECHAIN_OTHER" in hydrophobics def test_compute_electrostatics(self): """ TestBinana: Test that electrostatic energies are computed. """ electrostatics_dict = {} for name, protein, ligand in self.test_cases: electrostatics_dict[name] = compute_electrostatic_energy( ligand, protein) for name, electrostatics in electrostatics_dict.iteritems(): print "Processing electrostatics for %s" % name # The keys of these dicts are pairs of atomtypes, but the keys are # sorted so that ("C", "O") is always written as "C_O". Thus, for N # atom types, there are N*(N+1)/2 unique pairs. num_atoms = len(Binana.atom_types) assert len(electrostatics) == num_atoms * (num_atoms + 1) / 2 # TODO(rbharath): Charges are not computed correctly for certain # ligands! (see 2y2h_ligand). Understand why this happens. #assert np.count_nonzero(np.array(electrostatics.values())) > 0 def test_compute_flexibility(self): """ TestBinana: Gather statistics about active site protein atoms. """ active_site_dict = {} for name, protein, ligand in self.test_cases: active_site_dict[name] = compute_active_site_flexibility( ligand, protein) for name, active_site_flexibility in active_site_dict.iteritems(): print "Processing active site flexibility for %s" % name assert len(active_site_flexibility.keys()) == 6 assert "BACKBONE_ALPHA" in active_site_flexibility assert "BACKBONE_BETA" in active_site_flexibility assert "BACKBONE_OTHER" in active_site_flexibility assert "SIDECHAIN_ALPHA" in active_site_flexibility assert "SIDECHAIN_BETA" in active_site_flexibility assert "SIDECHAIN_OTHER" in active_site_flexibility def test_compute_hydrogen_bonds(self): """ TestBinana: Compute the number of hydrogen bonds. TODO(rbharath): The hydrogen-bond angle cutoff seems like it's incorrect to me. The hydrogens are placed by openbabel and aren't optimized, so I'm pretty sure that this code will miss many hydrogen bonds. Here are some options: -) Find a method to optimize the hydrogen placement. -) Place a more permissive angle cutoff for hydrogens. -) Allow for "buckets": angles 0-20, 20-40, 40-60, etc. and count the number of hydrogen bonds in each bucket. """ hbonds_dict = {} for name, protein, ligand in self.test_cases: hbonds_dict[name] = compute_hydrogen_bonds(ligand, protein) for name, hbonds in hbonds_dict.iteritems(): print "Processing hydrogen bonds for %s" % name assert len(hbonds) == 12 assert "HDONOR-LIGAND_BACKBONE_ALPHA" in hbonds assert "HDONOR-LIGAND_BACKBONE_BETA" in hbonds assert "HDONOR-LIGAND_BACKBONE_OTHER" in hbonds assert "HDONOR-LIGAND_SIDECHAIN_ALPHA" in hbonds assert "HDONOR-LIGAND_SIDECHAIN_BETA" in hbonds assert "HDONOR-LIGAND_SIDECHAIN_OTHER" in hbonds assert "HDONOR-RECEPTOR_BACKBONE_ALPHA" in hbonds assert "HDONOR-RECEPTOR_BACKBONE_BETA" in hbonds assert "HDONOR-RECEPTOR_BACKBONE_OTHER" in hbonds assert "HDONOR-RECEPTOR_SIDECHAIN_ALPHA" in hbonds assert "HDONOR-RECEPTOR_SIDECHAIN_BETA" in hbonds assert "HDONOR-RECEPTOR_SIDECHAIN_OTHER" in hbonds def test_compute_ligand_atom_counts(self): """ TestBinana: Compute the Number of Ligand Atom Counts. """ counts_dict = {} for name, _, ligand in self.test_cases: counts_dict[name] = compute_ligand_atom_counts(ligand) for name, counts in counts_dict.iteritems(): print "Processing ligand atom counts for %s" % name # TODO(rbharath): This code is useful for debugging. Remove once # codebase is stable enough. #for key in Binana.atom_types: # if key in counts: # del counts[key] #print "Residual counts:" #print counts assert len(counts) == len(Binana.atom_types) def test_compute_contacts(self): """ TestBinana: Compute contacts between Ligand and receptor. """ contacts_dict = {} for name, protein, ligand in self.test_cases: contacts_dict[name] = compute_contacts(ligand, protein) num_atoms = len(Binana.atom_types) for name, (close_contacts, contacts) in contacts_dict.iteritems(): print "Processing contacts for %s" % name print "close_contacts" for key, val in close_contacts.iteritems(): if val != 0: print(key, val) print "len(close_contacts): " + str(len(close_contacts)) print "contacts" for key, val in contacts.iteritems(): if val != 0: print(key, val) print "len(contacts): " + str(len(contacts)) print "Desired Number: " + str(num_atoms * (num_atoms + 1) / 2) # TODO(rbharath): The following code has proved very useful for # debugging. Remove once the code is stable enough that it's not # required. #if name == '1pi5': # for first, second in itertools.product(Binana.atom_types, # Binana.atom_types): # key = "_".join(sorted([first, second])) # if key in close_contacts: # del close_contacts[key] # if key in contacts: # del contacts[key] # print "Residuals close_contacts:" # print close_contacts # print "Residuals contacts:" # print contacts assert len(close_contacts) == num_atoms * (num_atoms + 1) / 2 assert len(contacts) == num_atoms * (num_atoms + 1) / 2 def test_compute_pi_pi_stacking(self): """ TestBinana: Compute Pi-Pi Stacking. """ # 1zea is the only example that has any pi-stacking. pi_stacking_dict = {} for name, protein, ligand in self.test_cases: pi_stacking_dict[name] = compute_pi_pi_stacking(ligand, protein) for name, pi_stacking in pi_stacking_dict.iteritems(): print "Processing pi-stacking for %s" % name assert len(pi_stacking) == 3 print pi_stacking assert "STACKING_ALPHA" in pi_stacking assert "STACKING_BETA" in pi_stacking assert "STACKING_OTHER" in pi_stacking def test_compute_pi_t(self): """ TestBinana: Compute Pi-T Interactions. TODO(rbharath): I believe that the imatininb-cabl complex has a pi-T interaction. This code has a bug since it reports that no such interaction is found. """ pi_t_dict = {} for name, protein, ligand in self.test_cases: pi_t_dict[name] = compute_pi_t(ligand, protein) for name, pi_t in pi_t_dict.iteritems(): print "Processing pi-T for %s" % name assert len(pi_t) == 3 assert "T-SHAPED_ALPHA" in pi_t assert "T-SHAPED_BETA" in pi_t assert "T-SHAPED_OTHER" in pi_t def test_compute_pi_cation(self): """ TestBinana: Compute Pi-Cation Interactions. """ pi_cation_dict = {} for name, protein, ligand in self.test_cases: pi_cation_dict[name] = compute_pi_cation(ligand, protein) for name, pi_cation in pi_cation_dict.iteritems(): print "Processing pi-cation for %s" % name assert len(pi_cation) == 6 assert 'PI-CATION_LIGAND-CHARGED_ALPHA' in pi_cation assert 'PI-CATION_LIGAND-CHARGED_BETA' in pi_cation assert 'PI-CATION_LIGAND-CHARGED_OTHER' in pi_cation assert 'PI-CATION_RECEPTOR-CHARGED_ALPHA' in pi_cation assert 'PI-CATION_RECEPTOR-CHARGED_BETA' in pi_cation assert 'PI-CATION_RECEPTOR-CHARGED_OTHER' in pi_cation def test_compute_salt_bridges(self): """ TestBinana: Compute Salt Bridges. TODO(bramsundar): None of the examples contain salt-bridge interactions. Find a complex with an actual salt-bridge interaction. """ salt_bridges_dict = {} for name, protein, ligand in self.test_cases: salt_bridges_dict[name] = compute_salt_bridges(ligand, protein) for name, salt_bridges in salt_bridges_dict.iteritems(): print "Processing salt-bridges for %s" % name assert len(salt_bridges) == 3 print salt_bridges assert 'SALT-BRIDGE_ALPHA' in salt_bridges assert 'SALT-BRIDGE_BETA' in salt_bridges assert 'SALT-BRIDGE_OTHER' in salt_bridges def test_compute_input_vector(self): """ TestBinana: Compute Input Vector. """ features_dict = {} for name, protein, ligand in self.test_cases: features_dict[name] = self.binana.compute_input_vector( ligand, protein) num_atoms = len(Binana.atom_types) # Lengths: # ligand_receptor_close_contacts: N*(N+1)/2 # ligand_receptor_contacts: N*(N+1)/2 # ligand_receptor_electrostatics: N*(N+1)/2 # ligand_atom_counts: N # hbonds: 12 # hydrophobics: 6 # stacking: 3 # pi_cation: 6 # t_shaped: 3 # active_site_flexibility: 6 # salt_bridges: 3 # rotatable_boonds_count: 1 total_len = (3 * num_atoms * (num_atoms + 1) / 2 + num_atoms + 12 + 6 + 3 + 6 + 3 + 6 + 3 + 1) for name, input_vector in features_dict.iteritems(): print "Processing input-vector for %s" % name assert len(input_vector) == total_len
def featurize_fingerprint(pdb_directories, pickle_out): """Featurize all pdbs in provided directories.""" # Instantiate copy of binana vector binana = Binana() # See features/tests/nnscore_test.py:TestBinana.testComputeInputVector # for derivation. feature_len = binana.num_features() feature_vectors = {} for count, pdb_dir in enumerate(pdb_directories): print "\nprocessing %d-th pdb %s" % (count, dir) print "About to extract ligand and protein input files" ligand_pdb, ligand_pdbqt = None, None protein_pdb, protein_pdbqt = None, None for f in os.listdir(pdb_dir): if re.search("_ligand_hyd.pdb$", f): ligand_pdb = f elif re.search("_ligand_hyd.pdbqt$", f): ligand_pdbqt = f elif re.search("_protein_hyd.pdb$", f): protein_pdb = f elif re.search("_protein_hyd.pdbqt$", f): protein_pdbqt = f print "Extracted Input Files:" print(ligand_pdb, ligand_pdbqt, protein_pdb, protein_pdbqt) if (not ligand_pdb or not ligand_pdbqt or not protein_pdb or not protein_pdbqt): raise ValueError("Required files not present for %s" % pdb_dir) ligand_pdb_path = os.path.join(pdb_dir, ligand_pdb) ligand_pdbqt_path = os.path.join(pdb_dir, ligand_pdbqt) protein_pdb_path = os.path.join(pdb_dir, protein_pdb) protein_pdbqt_path = os.path.join(pdb_dir, protein_pdbqt) print "About to load ligand from input files" ligand_pdb_obj = PDB() ligand_pdb_obj.load_from_files(ligand_pdb_path, ligand_pdbqt_path) print "About to load protein from input files" protein_pdb_obj = PDB() protein_pdb_obj.load_from_files(protein_pdb_path, protein_pdbqt_path) print "About to generate feature vector." features = binana.compute_input_vector(ligand_pdb_obj, protein_pdb_obj) if len(features) != feature_len: raise ValueError("Feature length incorrect on %s" % pdb_dir) print "Feature vector generated correctly." print "About to compute ligand smiles string." ligand_mol = Chem.MolFromPDBFile(ligand_pdb_path) # TODO(rbharath): Why does this fail sometimes? if ligand_mol is None: continue smiles = Chem.MolToSmiles(ligand_mol) print "About to compute sequence." protein = md.load(protein_pdb_path) seq = [r.name for r in protein.top.residues] # Write the computed quantities feature_vectors[pdb_dir] = (features, smiles, seq) print "About to write pickle to " + pickle_out with gzip.open(pickle_out, "wb") as f: pickle.dump(feature_vectors, f)
def featurize_dude(dude_dir, target, pickle_dir, num_jobs): """Featurize DUD-E docked poses and write features to pickle_out. Parameters ---------- dude_dir: string Path to DUD-E directory target: string Name of DUD-E target. pickle_dir: string Path to directory to output pickles """ target_dir = os.path.join(dude_dir, target) actives_dir = os.path.join(target_dir, "actives") decoys_dir = os.path.join(target_dir, "decoys") actives = [a for a in os.listdir(actives_dir)] decoys = [a for a in os.listdir(decoys_dir)] receptor = os.path.join(target_dir, "receptor.pdb") pickle_out = os.path.join(target_dir, "out.pkl.gz") # Just for debugging purposes actives = actives[:1] num_per_job = int(math.ceil(len(actives)/float(num_jobs))) print "Number per job: %d" % num_per_job protein_pdb_path = "/home/rbharath/DUD-E/aa2ar/receptor_hyd.pdb" protein_pdbqt_path = "/home/rbharath/DUD-E/aa2ar/receptor_hyd.pdbqt" print "About to load protein from input files" protein_pdb_obj = PDB() protein_pdb_obj.load_from_files(protein_pdb_path, protein_pdbqt_path) binana = Binana() feature_len = binana.num_features() feature_vectors = {} for compound in actives: compound_name = compound.split(".")[0] compound_pdbqt = compound_name + "_hyd_out.pdbqt" compound_pdbqt = os.path.join(actives_dir, compound_pdbqt) # Convert the pdbqt to pdb pdbqt_to_pdb(compound_pdbqt, actives_dir) compound_pdb = compound_name + "_hyd_out.pdb" compound_pdb = os.path.join(actives_dir, compound_pdb) structures = MultiStructure() structures.load_from_files(compound_pdb, compound_pdbqt) vectors = [] for key in sorted(structures.molecules.keys()): structure = structures.molecules[key] print "type(structure)" print type(structure) vectors.append(binana.compute_input_vector(structure, protein_pdb_obj)) feature_vectors[compound_name] = vectors with gzip.open(pickle_out, "wb") as f: pickle.dump(feature_vectors, f) decoys = decoys[:1]