def test_pdbqt_to_pdb(self): current_dir = os.path.dirname(os.path.realpath(__file__)) protein_file = os.path.join(current_dir, "../../dock/tests/1jld_protein.pdb") xyz, mol = rdkit_util.load_molecule( protein_file, calc_charges=False, add_hydrogens=False) out_pdb = "/tmp/mol.pdb" out_pdbqt = "/tmp/mol.pdbqt" rdkit_util.write_molecule(mol, out_pdb) rdkit_util.write_molecule(mol, out_pdbqt, is_protein=True) pdb_block = rdkit_util.pdbqt_to_pdb(out_pdbqt) pdb_mol = Chem.MolFromPDBBlock(pdb_block, sanitize=False, removeHs=False) xyz, pdbqt_mol = rdkit_util.load_molecule( out_pdbqt, add_hydrogens=False, calc_charges=False) assert_equal(pdb_mol.GetNumAtoms(), pdbqt_mol.GetNumAtoms()) for atom_idx in range(pdb_mol.GetNumAtoms()): atom1 = pdb_mol.GetAtoms()[atom_idx] atom2 = pdbqt_mol.GetAtoms()[atom_idx] assert_equal(atom1.GetAtomicNum(), atom2.GetAtomicNum()) os.remove(out_pdb) os.remove(out_pdbqt)
def test_pdbqt_to_pdb(self): current_dir = os.path.dirname(os.path.realpath(__file__)) protein_file = os.path.join(current_dir, "../../dock/tests/1jld_protein.pdb") xyz, mol = rdkit_util.load_molecule(protein_file, calc_charges=False, add_hydrogens=False) out_pdb = "/tmp/mol.pdb" out_pdbqt = "/tmp/mol.pdbqt" rdkit_util.write_molecule(mol, out_pdb) rdkit_util.write_molecule(mol, out_pdbqt, is_protein=True) pdb_block = rdkit_util.pdbqt_to_pdb(out_pdbqt) from rdkit import Chem pdb_mol = Chem.MolFromPDBBlock(pdb_block, sanitize=False, removeHs=False) xyz, pdbqt_mol = rdkit_util.load_molecule(out_pdbqt, add_hydrogens=False, calc_charges=False) assert_equal(pdb_mol.GetNumAtoms(), pdbqt_mol.GetNumAtoms()) for atom_idx in range(pdb_mol.GetNumAtoms()): atom1 = pdb_mol.GetAtoms()[atom_idx] atom2 = pdbqt_mol.GetAtoms()[atom_idx] assert_equal(atom1.GetAtomicNum(), atom2.GetAtomicNum()) os.remove(out_pdb) os.remove(out_pdbqt)
def test_pdbqt_to_pdb(self): """Test that a PDBQT molecule can be converted back in to PDB.""" xyz, mol = rdkit_util.load_molecule(self.protein_file, calc_charges=False, add_hydrogens=False) with tempfile.TemporaryDirectory() as tmp: out_pdb = os.path.join(tmp, "mol.pdb") out_pdbqt = os.path.join(tmp, "mol.pdbqt") rdkit_util.write_molecule(mol, out_pdb, is_protein=True) rdkit_util.write_molecule(mol, out_pdbqt, is_protein=True) pdb_block = pdbqt_utils.pdbqt_to_pdb(out_pdbqt) from rdkit import Chem pdb_mol = Chem.MolFromPDBBlock(pdb_block, sanitize=False, removeHs=False) xyz, pdbqt_mol = rdkit_util.load_molecule(out_pdbqt, add_hydrogens=False, calc_charges=False) assert pdb_mol.GetNumAtoms() == pdbqt_mol.GetNumAtoms() for atom_idx in range(pdb_mol.GetNumAtoms()): atom1 = pdb_mol.GetAtoms()[atom_idx] atom2 = pdbqt_mol.GetAtoms()[atom_idx] assert atom1.GetAtomicNum() == atom2.GetAtomicNum()
def prepare_ligand(args): mol_name, mol, save_dir = args[0], args[1], args[2] filename = str(mol_name) + ".sdf" filename = os.path.join(save_dir, filename) rdkit_util.write_molecule(mol, filename) prepared_filename = os.path.join(save_dir, "%s_prepared.pdb" % mol_name) prepared_pdbqt = os.path.join(save_dir, "%s_prepared.pdbqt" % mol_name) if os.path.exists(prepared_pdbqt): return hydrogenate_and_compute_partial_charges(filename, "sdf", hyd_output=prepared_filename, pdbqt_output=prepared_pdbqt, verbose=False, protein=False)
def prepare_ligand(args): mol_name, mol, save_dir = args[0], args[1], args[2] filename = str(mol_name) + ".sdf" filename = os.path.join(save_dir, filename) rdkit_util.write_molecule(mol, filename) prepared_filename = os.path.join(save_dir, "%s_prepared.pdb" % mol_name) prepared_pdbqt = os.path.join(save_dir, "%s_prepared.pdbqt" % mol_name) if os.path.exists(prepared_pdbqt): return hydrogenate_and_compute_partial_charges( filename, "sdf", hyd_output=prepared_filename, pdbqt_output=prepared_pdbqt, verbose=False, protein=False)
def hydrogenate_and_compute_partial_charges(input_file, input_format, hyd_output=None, pdbqt_output=None, protein=True, verbose=True): """Outputs a hydrogenated pdb and a pdbqt with partial charges. Takes an input file in specified format. Generates two outputs: -) A pdb file that contains a hydrogenated (at pH 7.4) version of original compound. -) A pdbqt file that has computed Gasteiger partial charges. This pdbqt file is build from the hydrogenated pdb. TODO(rbharath): Can do a bit of refactoring between this function and pdbqt_to_pdb. Parameters ---------- input_file: String Path to input file. input_format: String Name of input format. """ mol = rdkit_util.load_molecule(input_file, add_hydrogens=True, calc_charges=True)[1] if verbose: print("Create pdb with hydrogens added") rdkit_util.write_molecule(mol, str(hyd_output), is_protein=protein) if verbose: print("Create a pdbqt file from the hydrogenated pdb above.") rdkit_util.write_molecule(mol, str(pdbqt_output), is_protein=protein) if protein: print("Removing ROOT/ENDROOT/TORSDOF") with open(pdbqt_output) as f: pdbqt_lines = f.readlines() filtered_lines = [] for line in pdbqt_lines: filtered_lines.append(line) with open(pdbqt_output, "w") as f: f.writelines(filtered_lines)
def test_write_molecule(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule( ligand_file, calc_charges=False, add_hydrogens=False) outfile = "/tmp/mol.sdf" rdkit_util.write_molecule(mol, outfile) xyz, mol2 = rdkit_util.load_molecule( outfile, calc_charges=False, add_hydrogens=False) assert_equal(mol.GetNumAtoms(), mol2.GetNumAtoms()) for atom_idx in range(mol.GetNumAtoms()): atom1 = mol.GetAtoms()[atom_idx] atom2 = mol.GetAtoms()[atom_idx] assert_equal(atom1.GetAtomicNum(), atom2.GetAtomicNum()) os.remove(outfile)
def hydrogenate_and_compute_partial_charges(input_file, input_format, hyd_output=None, pdbqt_output=None, protein=True, verbose=True): """Outputs a hydrogenated pdb and a pdbqt with partial charges. Takes an input file in specified format. Generates two outputs: -) A pdb file that contains a hydrogenated (at pH 7.4) version of original compound. -) A pdbqt file that has computed Gasteiger partial charges. This pdbqt file is build from the hydrogenated pdb. TODO(rbharath): Can do a bit of refactoring between this function and pdbqt_to_pdb. Parameters ---------- input_file: String Path to input file. input_format: String Name of input format. """ mol = rdkit_util.load_molecule( input_file, add_hydrogens=True, calc_charges=True)[1] if verbose: logging.info("Create pdb with hydrogens added") rdkit_util.write_molecule(mol, str(hyd_output), is_protein=protein) if verbose: logging.info("Create a pdbqt file from the hydrogenated pdb above.") rdkit_util.write_molecule(mol, str(pdbqt_output), is_protein=protein) if protein: logging.info("Removing ROOT/ENDROOT/TORSDOF") with open(pdbqt_output) as f: pdbqt_lines = f.readlines() filtered_lines = [] for line in pdbqt_lines: filtered_lines.append(line) with open(pdbqt_output, "w") as f: f.writelines(filtered_lines)
def test_write_molecule(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule(ligand_file, calc_charges=False, add_hydrogens=False) with tempfile.TemporaryDirectory() as tmp: outfile = os.path.join(tmp, "mol.sdf") rdkit_util.write_molecule(mol, outfile) xyz, mol2 = rdkit_util.load_molecule(outfile, calc_charges=False, add_hydrogens=False) assert mol.GetNumAtoms() == mol2.GetNumAtoms() for atom_idx in range(mol.GetNumAtoms()): atom1 = mol.GetAtoms()[atom_idx] atom2 = mol.GetAtoms()[atom_idx] assert atom1.GetAtomicNum() == atom2.GetAtomicNum()
def test_write_molecule(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule(ligand_file, calc_charges=False, add_hydrogens=False) outfile = "/tmp/mol.sdf" rdkit_util.write_molecule(mol, outfile) xyz, mol2 = rdkit_util.load_molecule(outfile, calc_charges=False, add_hydrogens=False) assert_equal(mol.GetNumAtoms(), mol2.GetNumAtoms()) for atom_idx in range(mol.GetNumAtoms()): atom1 = mol.GetAtoms()[atom_idx] atom2 = mol.GetAtoms()[atom_idx] assert_equal(atom1.GetAtomicNum(), atom2.GetAtomicNum()) os.remove(outfile)
def generate_poses(self, molecular_complex, centroid=None, box_dims=None, exhaustiveness=10, num_modes=9, num_pockets=None, out_dir=None, generate_scores=False): """Generates the docked complex and outputs files for docked complex. TODO: How can this work on Windows? We need to install a .msi file and invoke it correctly from Python for this to work. Parameters ---------- molecular_complexes: list A representation of a molecular complex. centroid: np.ndarray, optional The centroid to dock against. Is computed if not specified. box_dims: np.ndarray, optional Of shape `(3,)` holding the size of the box to dock. If not specified is set to size of molecular complex plus 5 angstroms. exhaustiveness: int, optional (default 10) Tells Autodock Vina how exhaustive it should be with pose generation. num_modes: int, optional (default 9) Tells Autodock Vina how many binding modes it should generate at each invocation. num_pockets: int, optional (default None) If specified, `self.pocket_finder` must be set. Will only generate poses for the first `num_pockets` returned by `self.pocket_finder`. out_dir: str, optional If specified, write generated poses to this directory. generate_score: bool, optional (default False) If `True`, the pose generator will return scores for complexes. This is used typically when invoking external docking programs that compute scores. Returns ------- Tuple of `(docked_poses, scores)`. `docked_poses` is a list of docked molecular complexes. Each entry in this list contains a `(protein_mol, ligand_mol)` pair of RDKit molecules. `scores` is a list of binding free energies predicted by Vina. Raises ------ `ValueError` if `num_pockets` is set but `self.pocket_finder is None`. """ if out_dir is None: out_dir = tempfile.mkdtemp() if num_pockets is not None and self.pocket_finder is None: raise ValueError( "If num_pockets is specified, pocket_finder must have been provided at construction time." ) # Parse complex if len(molecular_complex) > 2: raise ValueError( "Autodock Vina can only dock protein-ligand complexes and not more general molecular complexes." ) (protein_file, ligand_file) = molecular_complex # Prepare protein protein_name = os.path.basename(protein_file).split(".")[0] protein_hyd = os.path.join(out_dir, "%s_hyd.pdb" % protein_name) protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % protein_name) protein_mol = rdkit_util.load_molecule( protein_file, calc_charges=True, add_hydrogens=True) # Get protein centroid and range if centroid is not None and box_dims is not None: centroids = [centroid] dimensions = [box_dims] else: if self.pocket_finder is None: logger.info("Pockets not specified. Will use whole protein to dock") rdkit_util.write_molecule(protein_mol[1], protein_hyd, is_protein=True) rdkit_util.write_molecule( protein_mol[1], protein_pdbqt, is_protein=True) protein_centroid = geometry_utils.compute_centroid(protein_mol[0]) protein_range = mol_xyz_util.get_molecule_range(protein_mol[0]) box_dims = protein_range + 5.0 centroids, dimensions = [protein_centroid], [box_dims] else: logger.info("About to find putative binding pockets") pockets = self.pocket_finder.find_pockets(protein_file) logger.info("%d pockets found in total" % len(pockets)) logger.info("Computing centroid and size of proposed pockets.") centroids, dimensions = [], [] for pocket in pockets: protein_centroid = pocket.center() (x_min, x_max), (y_min, y_max), ( z_min, z_max) = pocket.x_range, pocket.y_range, pocket.z_range # TODO(rbharath: Does vina divide box dimensions by 2? x_box = (x_max - x_min) / 2. y_box = (y_max - y_min) / 2. z_box = (z_max - z_min) / 2. box_dims = (x_box, y_box, z_box) centroids.append(protein_centroid) dimensions.append(box_dims) if num_pockets is not None: logger.info("num_pockets = %d so selecting this many pockets for docking." % num_pockets) centroids = centroids[:num_pockets] dimensions = dimensions[:num_pockets] # Prepare protein ligand_name = os.path.basename(ligand_file).split(".")[0] ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name) ligand_mol = rdkit_util.load_molecule( ligand_file, calc_charges=True, add_hydrogens=True) rdkit_util.write_molecule(ligand_mol[1], ligand_pdbqt) docked_complexes = [] all_scores = [] for i, (protein_centroid, box_dims) in enumerate( zip(centroids, dimensions)): logger.info("Docking in pocket %d/%d" % (i + 1, len(centroids))) logger.info("Docking with center: %s" % str(protein_centroid)) logger.info("Box dimensions: %s" % str(box_dims)) # Write Vina conf file conf_file = os.path.join(out_dir, "conf.txt") vina_utils.write_vina_conf( protein_pdbqt, ligand_pdbqt, protein_centroid, box_dims, conf_file, num_modes=num_modes, exhaustiveness=exhaustiveness) # Define locations of log and output files log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name) out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name) logger.info("About to call Vina") call( "%s --config %s --log %s --out %s" % (self.vina_cmd, conf_file, log_file, out_pdbqt), shell=True) ligands, scores = vina_utils.load_docked_ligands(out_pdbqt) docked_complexes += [(protein_mol[1], ligand) for ligand in ligands] all_scores += scores if generate_scores: return docked_complexes, all_scores else: return docked_complexes