def write_grid_in_file(self, mode, grid_ligand='', xyz=False, receptor_file=''): if mode == 'other_ligand': st_2 = next(StructureReader(grid_ligand)) c2 = get_centroid(st_2) x, y, z = c2[:3] elif mode == 'xyz': x, y, z = xyz else: st_2 = next(StructureReader(self.path + self.split_ligand)) c2 = get_centroid(st_2) x, y, z = c2[:3] if receptor_file == '': use_file = self.split_protein else: use_file = receptor_file with open(self.path + self.grid_in, 'w') as f: f.write('GRID_CENTER {},{},{}\n'.format(x, y, z)) f.write('GRIDFILE {}\n'.format(self.grid_file)) f.write('INNERBOX 15,15,15\n') f.write('OUTERBOX 30,30,30\n') f.write('RECEP_FILE {}\n'.format(use_file))
def create_conformer_decoys(conformers, grid_size, start_lig_center, prot, pose_path, target, max_poses, min_angle, max_angle): num_iter_without_pose = 0 num_valid_poses = 1 grid = [] for dx in range(-grid_size, grid_size): for dy in range(-grid_size, grid_size): for dz in range(-grid_size, grid_size): grid.append([[dx, dy, dz], 0]) while num_valid_poses < max_poses: num_iter_without_pose += 1 conformer = random.choice(conformers) conformer_center = list(get_centroid(conformer)) # translation index = random.randint(0, len(grid) - 1) grid_loc = grid[index][0] transform.translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) conformer_center = list(get_centroid(conformer)) # rotation x_angle = np.random.uniform(min_angle, max_angle) y_angle = np.random.uniform(min_angle, max_angle) z_angle = np.random.uniform(min_angle, max_angle) transform.rotate_structure(conformer, x_angle, y_angle, z_angle, conformer_center) if steric_clash.clash_volume(prot, struc2=conformer) < 200: decoy_file = os.path.join( pose_path, "{}_lig{}.mae".format(target, num_valid_poses)) with structure.StructureWriter(decoy_file) as decoy: decoy.append(conformer) modify_file(decoy_file, '_pro_ligand') modify_file(decoy_file, '{}_lig0.mae'.format(target)) num_valid_poses += 1 grid[index][1] = 0 num_iter_without_pose = 0 elif num_iter_without_pose == 5 and len(grid) > 1: max_val = max(grid, key=lambda x: x[1]) grid.remove(max_val) num_iter_without_pose = 0 else: grid[index][1] += 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('docked_prot_file', type=str, help='file listing proteins to process') parser.add_argument('raw_root', type=str, help='directory where raw data will be placed') args = parser.parse_args() docking_config = [] scores = [] with open(args.docked_prot_file) as fp: for line in fp: if line[0] == '#': continue protein, target, start = line.strip().split() protein_path = os.path.join(args.raw_root, protein) pair_path = os.path.join(protein_path, '{}-to-{}'.format(target, start)) pose_path = os.path.join(pair_path, 'cartesian_ligand_poses') docking_config.append({'folder': pair_path, 'name': '{}-to-{}_cartesian'.format(target, start), 'grid_file': os.path.join(pair_path, '{}-to-{}.zip'.format(target, start)), 'prepped_ligand_file': os.path.join(pair_path, '{}-to-{}_cartesian_merge_pv.mae'.format(target, start)), 'glide_settings': {'num_poses': 1, 'docking_method': 'inplace'}}) dock_set = Docking_Set() results = dock_set.get_docking_gscores(docking_config, mode='multi') results_by_ligand = results['{}-to-{}_cartesian'.format(target, start)] for file in results_by_ligand: s = list(structure.StructureReader(os.path.join(pose_path, file)))[0] scores.append((file, get_centroid(s), score_no_vdW(results_by_ligand[file][0]))) print(scores) break
def create_decoys(lig_file): """ creates MAX_DECOYS number of translated/rotated decoys :param lig_file: (string) file of glide ligand pose that will be translated/rotated :return: """ code = lig_file.split('/')[-1].split('_')[-1] if code == 'lig0.mae': modify_file(lig_file, '_pro_ligand') else: modify_file(lig_file, '_ligand') for i in range(MAX_DECOYS): s = list(structure.StructureReader(lig_file))[0] #translation x, y, z = random_three_vector() dist = np.random.normal(MEAN_TRANSLATION, STDEV_TRANSLATION) transform.translate_structure(s, x * dist, y * dist, z * dist) #rotation x_angle = np.random.uniform(MIN_ANGLE, MAX_ANGLE) y_angle = np.random.uniform(MIN_ANGLE, MAX_ANGLE) z_angle = np.random.uniform(MIN_ANGLE, MAX_ANGLE) rot_center = list(get_centroid(s)) transform.rotate_structure(s, x_angle, y_angle, z_angle, rot_center) decoy_file = lig_file[:-4] + chr(ord('a') + i) + '.mae' with structure.StructureWriter(decoy_file) as decoy: decoy.append(s) if code == 'lig0.mae': modify_file(decoy_file, lig_file.split('/')[-1]) else: modify_file(decoy_file, lig_file.split('/')[-1])
def run_group_dist_check(grouped_files, raw_root, index, dist_dir, max_poses, max_decoys): """ checks mean distance of displacement for decoys for each protein, target, start group :param grouped_files: (list) list of protein, target, start groups :param raw_root: (string) directory where raw data will be placed :param index: (int) group number :param dist_dir: (string) directiory to place distances :param max_poses: (int) maximum number of glide poses considered :param max_decoys: (int) maximum number of decoys created per glide pose :return: """ save = [] for protein, target, start in grouped_files[index]: protein_path = os.path.join(raw_root, protein) pair_path = os.path.join(protein_path, '{}-to-{}'.format(target, start)) pose_path = os.path.join(pair_path, 'ligand_poses') pv_file = os.path.join(pair_path, '{}-to-{}_pv.maegz'.format(target, start)) num_poses = len(list(structure.StructureReader(pv_file))) means = [] for i in range(num_poses): if i == max_poses: break lig_file = os.path.join(pose_path, '{}_lig{}.mae'.format(target, i)) s = list(structure.StructureReader(lig_file))[0] c = get_centroid(s) dists = [] for j in range(max_decoys): decoy_file = lig_file[:-4] + chr(ord('a') + j) + '.mae' decoy = list(structure.StructureReader(decoy_file))[0] dists.append( transform.get_vector_magnitude(c - get_centroid(decoy))) means.append(statistics.mean(dists)) save.append(statistics.mean(means)) outfile = open(os.path.join(dist_dir, '{}.pkl'.format(index)), 'wb') pickle.dump(save, outfile) print(save)
def run_search(protein, target, start, index, raw_root, get_time, cutoff, rotation_search_step_size, grid, no_prot_h, pocket_only, test=False, x_rot=0, y_rot=0, z_rot=0): pair = '{}-to-{}'.format(target, start) protein_path = os.path.join(raw_root, protein) pair_path = os.path.join(protein_path, pair) start_lig_file = os.path.join(pair_path, '{}_lig.mae'.format(start)) start_lig = list(structure.StructureReader(start_lig_file))[0] target_lig_file = os.path.join(pair_path, 'ligand_poses', '{}_lig0.mae'.format(target)) target_lig = list(structure.StructureReader(target_lig_file))[0] build.delete_hydrogens(target_lig) start_lig_center = list(get_centroid(start_lig)) prot_file = os.path.join(pair_path, '{}_prot.mae'.format(start)) prot = list(structure.StructureReader(prot_file))[0] print(prot.atom_total) if pocket_only: get_pocket_res(prot, target_lig, 6) print(prot.atom_total) if no_prot_h: build.delete_hydrogens(prot) print(prot.atom_total) if get_time: time_conformer_decoys(pair_path, start_lig_center, target_lig, prot, rotation_search_step_size) else: conformer = create_conformer_decoys(grid, target_lig, cutoff, rotation_search_step_size, protein, target, start, index, pair_path, test, x_rot, y_rot, z_rot) return conformer
def run_test_search(protein, target, start, raw_root, cutoff, rotation_search_step_size, pair_path, no_prot_h, pocket_only, get_time): angles = [ i for i in range(-30, 30 + rotation_search_step_size, rotation_search_step_size) ] angles = angles[:5] x_rot = random.choice(angles) y_rot = random.choice(angles) z_rot = random.choice(angles) grid_points = [i for i in range(-6, 7)] grid = [[ random.choice(grid_points), random.choice(grid_points), random.choice(grid_points) ]] conformer = run_search(protein, target, start, 0, raw_root, get_time, cutoff, rotation_search_step_size, grid, no_prot_h, pocket_only, True, x_rot, y_rot, z_rot) conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae") base_conf = list(structure.StructureReader(conformer_file))[0] translate_structure(base_conf, grid[0][0], grid[0][1], grid[0][2]) base_conf_center = list(get_centroid(base_conf)) coords = base_conf.getXYZ(copy=False) new_coords = rotate_structure(coords, math.radians(x_rot), math.radians(y_rot), math.radians(z_rot), base_conf_center) base_conf.setXYZ(new_coords) rmsd_val = rmsd.calculate_in_place_rmsd(conformer, conformer.getAtomIndices(), base_conf, base_conf.getAtomIndices()) if abs(rmsd_val) == 0: print("Search works properly", rmsd_val) else: print("x_rot =", x_rot, "y_rot =", y_rot, "z_rot =", z_rot) print("RMSD =", rmsd_val, "but RMSD should equal 0")
def run_group(grouped_files, raw_root, index, num_clusters): for protein, target, start in grouped_files[index]: pair = '{}-to-{}'.format(target, start) protein_path = os.path.join(raw_root, protein) pair_path = os.path.join(protein_path, pair) pose_path = os.path.join(pair_path, 'ligand_poses') graph_dir = '{}/{}-to-{}_graph.pkl'.format(pair_path, target, start) infile = open(graph_dir, 'rb') graph_data = pickle.load(infile) infile.close() centroids = [] codes_to_include = [] for i, pdb_code in tqdm(enumerate(graph_data), desc="pdb_codes"): if pdb_code.split('_')[-1][:4] != 'lig0' and pdb_code[-1].isalpha( ): file = os.path.join(pose_path, '{}.mae'.format(pdb_code)) s = list(structure.StructureReader(file))[0] centroids.append((get_centroid(s), pdb_code)) else: codes_to_include.append(pdb_code) if len(centroids) > num_clusters: X = np.zeros((len(centroids), 3)) for i in range(len(X)): X[i] = centroids[i][0][:3] kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(X) condensed = {} for i, label in enumerate(kmeans.labels_): if len(condensed) == num_clusters: break if label not in condensed: condensed[label] = centroids[i][1] codes_to_include.extend(list(condensed.values())) else: codes_to_include = list(graph_data.keys()) outfile = open( os.path.join(pair_path, '{}_clustered.pkl'.format(pair)), 'wb') pickle.dump(codes_to_include, outfile)
def run_group(protein, target, start, raw_root, save_path, run_path, min_angle, max_angle, index, rmsd_cutoff, grid, num_jobs_submitted): """ creates decoys for each protein, target, start group :param grouped_files: (list) list of protein, target, start groups :param raw_root: (string) directory where raw data will be placed :param data_root: (string) pdbbind directory where raw data will be obtained :param index: (int) group number :param max_poses: (int) maximum number of glide poses considered :param decoy_type: (string) either cartesian or random :param max_decoys: (int) maximum number of decoys created per glide pose :param mean_translation: (float) mean distance decoys are translated :param stdev_translation: (float) stdev of distance decoys are translated :param min_angle: (float) minimum angle decoys are rotated :param max_angle: (float) maximum angle decoys are rotated :return: """ pair = '{}-to-{}'.format(target, start) protein_path = os.path.join(raw_root, protein) pair_path = os.path.join(protein_path, pair) start_lig_file = os.path.join(pair_path, '{}_lig.mae'.format(start)) start_lig = list(structure.StructureReader(start_lig_file))[0] target_lig_file = os.path.join(pair_path, 'ligand_poses', '{}_lig0.mae'.format(target)) target_lig = list(structure.StructureReader(target_lig_file))[0] start_lig_center = list(get_centroid(start_lig)) prot_file = os.path.join(pair_path, '{}_prot.mae'.format(start)) prot = list(structure.StructureReader(prot_file))[0] aligned_file = os.path.join(pair_path, "aligned_conformers.mae") conformers = list(structure.StructureReader(aligned_file)) create_conformer_decoys(save_path, run_path, conformers, grid, num_jobs_submitted, start_lig_center, target_lig, prot, min_angle, max_angle, rmsd_cutoff, protein, target, start, index)
def create_conformer_decoys(grid, target_lig, cutoff, rotation_search_step_size, protein, target, start, index, pair_path, test, x_rot, y_rot, z_rot): counter = 0 data_dict = { 'protein': [], 'target': [], 'start': [], 'num_conformers': [], 'num_poses_searched': [], 'num_correct_poses_found': [], 'time_elapsed': [], 'time_elapsed_per_conformer': [], 'grid_loc_x': [], 'grid_loc_y': [], 'grid_loc_z': [] } for grid_loc in grid: num_correct_found = 0 conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae") conformers = list(structure.StructureReader(conformer_file)) decoy_start_time = time.time() for conformer in conformers: transform.translate_structure(conformer, grid_loc[0], grid_loc[1], grid_loc[2]) conformer_center = list(get_centroid(conformer)) coords = conformer.getXYZ(copy=True) for x in range(-30, 30 + rotation_search_step_size, rotation_search_step_size): for y in range(-30, 30 + rotation_search_step_size, rotation_search_step_size): for z in range(-30, 30 + rotation_search_step_size, rotation_search_step_size): new_coords = rotate_structure(coords, math.radians(x), math.radians(y), math.radians(z), conformer_center) conformer.setXYZ(new_coords) if test and x_rot == x and y_rot == y and z_rot == z: return conformer rmsd_val = rmsd.calculate_in_place_rmsd( conformer, conformer.getAtomIndices(), target_lig, target_lig.getAtomIndices()) if rmsd_val < cutoff: num_correct_found += 1 decoy_end_time = time.time() data_dict['protein'].append(protein) data_dict['target'].append(target) data_dict['start'].append(start) data_dict['num_conformers'].append(len(conformers)) data_dict['num_poses_searched'].append(counter) data_dict['num_correct_poses_found'].append(num_correct_found) data_dict['time_elapsed'].append(decoy_end_time - decoy_start_time) data_dict['time_elapsed_per_conformer'].append( (decoy_end_time - decoy_start_time) / len(conformers)) data_dict['grid_loc_x'].append(grid_loc[0]) data_dict['grid_loc_y'].append(grid_loc[1]) data_dict['grid_loc_z'].append(grid_loc[2]) df = pd.DataFrame.from_dict(data_dict) data_folder = os.path.join(os.getcwd(), 'decoy_timing_data') if not os.path.exists(data_folder): os.mkdir(data_folder) save_folder = os.path.join(data_folder, '{}_{}-to-{}'.format(protein, target, start)) if not os.path.exists(save_folder): os.mkdir(save_folder) df.to_csv(os.path.join(save_folder, '{}.csv'.format(index))) return None
def time_conformer_decoys(pair_path, start_lig_center, target_lig, prot, rotation_search_step_size): translate_times = [] rotate_times = [] conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae") conformers = list(structure.StructureReader(conformer_file)) for conformer in conformers: conformer_center = list(get_centroid(conformer)) # translation grid_loc = [0, 0, 0] start = time.time() transform.translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) end = time.time() translate_times.append(end - start) # rotation start = time.time() transform.rotate_structure( conformer, math.radians(-30 - rotation_search_step_size), 0, 0, conformer_center) end = time.time() rotate_times.append(end - start) print("Average schrodinger translate time =", statistics.mean(translate_times)) print("Average schrodinger rotate time =", statistics.mean(rotate_times)) translate_times = [] rotate_times = [] conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae") conformers = list(structure.StructureReader(conformer_file)) for conformer in conformers: conformer_center = list(get_centroid(conformer)) # translation grid_loc = [0, 0, 0] start = time.time() translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) end = time.time() translate_times.append(end - start) # rotation start = time.time() rotate_structure(conformer, math.radians(-30 - rotation_search_step_size), 0, 0, conformer_center) end = time.time() rotate_times.append(end - start) print("Average custom translate time =", statistics.mean(translate_times)) print("Average custom rotate time =", statistics.mean(rotate_times)) clash_iterator_times = [] clash_volume_times = [] rmsd_times = [] rotation_search_step_size_rad = math.radians(rotation_search_step_size) conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae") conformers = list(structure.StructureReader(conformer_file)) for conformer in conformers: conformer_center = list(get_centroid(conformer)) # translation grid_loc = [0, 0, 0] translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) conformer_center = list(get_centroid(conformer)) # keep track of rotation angles rotate_structure(conformer, math.radians(-30 - rotation_search_step_size), 0, 0, conformer_center) x_so_far = -30 - rotation_search_step_size y_so_far = 0 z_so_far = 0 for _ in range(-30, 30, rotation_search_step_size): # x rotation rotate_structure( conformer, rotation_search_step_size_rad, math.radians(-30 - rotation_search_step_size - y_so_far), 0, conformer_center) x_so_far += 1 y_so_far += -30 - rotation_search_step_size - y_so_far for _ in range(-30, 30, rotation_search_step_size): # y rotation rotate_structure( conformer, 0, rotation_search_step_size_rad, math.radians(-30 - rotation_search_step_size - z_so_far), conformer_center) y_so_far += 1 z_so_far += -30 - rotation_search_step_size - z_so_far for _ in range(-30, 30, rotation_search_step_size): # z rotation rotate_structure(conformer, 0, 0, rotation_search_step_size_rad, conformer_center) z_so_far += 1 # get clash_iterator start = time.time() max([ x[2] for x in list( steric_clash.clash_iterator(prot, struc2=conformer)) ]) end = time.time() clash_iterator_times.append(end - start) # get clash_volume start = time.time() steric_clash.clash_volume(prot, struc2=conformer) end = time.time() clash_volume_times.append(end - start) # get rmsd start = time.time() rmsd.calculate_in_place_rmsd(conformer, conformer.getAtomIndices(), target_lig, target_lig.getAtomIndices()) end = time.time() rmsd_times.append(end - start) if len(clash_iterator_times) == 1000: print("Average clash iterator time =", statistics.mean(clash_iterator_times)) print("Average clash volume time =", statistics.mean(clash_volume_times)) print("Average rmsd time =", statistics.mean(rmsd_times)) return
def main(): parser = argparse.ArgumentParser() parser.add_argument('task', type=str, help='either run, check, remove_pv, or MAPK14') parser.add_argument('docked_prot_file', type=str, help='file listing proteins to process') parser.add_argument( 'run_path', type=str, help='directory where script and output files will be written') parser.add_argument('raw_root', type=str, help='directory where raw data will be placed') parser.add_argument('--new_prot_file', type=str, default=os.path.join(os.getcwd(), 'index.txt'), help='for update task, name of new prot file') args = parser.parse_args() if args.task == 'run': # process = get_prots(args.docked_prot_file) process = [('P04746', '3old', '1xd0')] grouped_files = group_files(N, process) for i, group in enumerate(grouped_files): with open(os.path.join(args.run_path, 'grid{}_in.sh'.format(i)), 'w') as f: for protein, target, start in group: protein_path = os.path.join(args.raw_root, protein) pair_path = os.path.join(protein_path, '{}-to-{}'.format(target, start)) pose_path = os.path.join(pair_path, 'ligand_poses') with open( os.path.join(pair_path, '{}-to-{}.in'.format(target, start)), 'w') as f_in: c = get_centroid( list( structure.StructureReader( os.path.join( pose_path, '{}_lig0.mae'.format(target))))[0]) x, y, z = c[:3] f_in.write('GRID_CENTER {},{},{}\n'.format(x, y, z)) f_in.write('GRIDFILE {}-to-{}.zip\n'.format( target, start)) f_in.write('INNERBOX 15,15,15\n') f_in.write('OUTERBOX 30,30,30\n') f_in.write('RECEP_FILE {}\n'.format( os.path.join(pair_path, '{}_prot.mae'.format(start)))) f.write('#!/bin/bash\n') f.write('cd {}\n'.format(pair_path)) f.write( '$SCHRODINGER/glide -WAIT {}-to-{}.in\n'.format( target, start)) f.write('rm {}-to-{}.in'.format(target, start)) f.write('rm {}-to-{}.log'.format(target, start)) os.chdir(args.run_path) os.system( 'sbatch -p owners -t 00:30:00 -o grid{}.out grid{}_in.sh'. format(i, i)) if args.task == 'check': process = [] num_pairs = 0 with open(args.docked_prot_file) as fp: for line in tqdm( fp, desc='going through protein, target, start groups'): if line[0] == '#': continue protein, target, start = line.strip().split() num_pairs += 1 protein_path = os.path.join(args.raw_root, protein) pair_path = os.path.join(protein_path, '{}-to-{}'.format(target, start)) # check basic files if not os.path.exists('{}/{}-to-{}.zip'.format( pair_path, target, start)): process.append((protein, target, start)) print('Missing', len(process), '/', num_pairs) print(process) if args.task == 'update': text = [] with open(args.docked_prot_file) as fp: for line in tqdm(fp, desc='files'): if line[0] == '#': continue protein, target, start = line.strip().split() protein_path = os.path.join(args.raw_root, protein) pair_path = os.path.join(protein_path, '{}-to-{}'.format(target, start)) if os.path.exists('{}/{}-to-{}.zip'.format( pair_path, target, start)): text.append(line) file = open(args.new_prot_file, "w") file.writelines(text) file.close()
def run(grouped_files, run_path, raw_root, decoy_type): """ creates grid for each protein, target, start :param grouped_files: (list) list of protein, target, start groups :param run_path: (string) directory where script and output files will be written :param raw_root: (string) directory where raw data will be placed :return: """ for i, group in enumerate(grouped_files): with open(os.path.join(run_path, 'grid{}_in.sh'.format(i)), 'w') as f: print(os.path.join(run_path, 'grid{}_in.sh'.format(i))) for protein, target, start in group: pair = '{}-to-{}'.format(target, start) target_pair = '{}-to-{}'.format(target, target) protein_path = os.path.join(raw_root, protein) pair_path = os.path.join(protein_path, pair) pose_path = os.path.join(pair_path, decoy_type) print(pair_path) # create in file for grid if not os.path.exists( os.path.join(pair_path, '{}.zip'.format(pair))): with open(os.path.join(pair_path, '{}.in'.format(pair)), 'w') as f_in: c = get_centroid( list( structure.StructureReader( os.path.join( pose_path, '{}_lig0.mae'.format(target))))[0]) x, y, z = c[:3] f_in.write('GRID_CENTER {},{},{}\n'.format(x, y, z)) f_in.write('GRIDFILE {}.zip\n'.format(pair)) f_in.write('INNERBOX 15,15,15\n') f_in.write('OUTERBOX 30,30,30\n') f_in.write('RECEP_FILE {}\n'.format( os.path.join(pair_path, '{}_prot.mae'.format(start)))) # create grid commands f.write('#!/bin/bash\n') f.write('cd {}\n'.format(pair_path)) f.write( '$SCHRODINGER/glide -WAIT {}.in\n'.format(pair)) f.write('rm {}/{}.in\n'.format(pair_path, pair)) f.write('rm {}/{}.log\n'.format(pair_path, pair)) if not os.path.exists( os.path.join(pair_path, '{}.zip'.format(target_pair))): print('hi') with open( os.path.join(pair_path, '{}.in'.format(target_pair)), 'w') as f_in: c = get_centroid( list( structure.StructureReader( os.path.join( pose_path, '{}_lig0.mae'.format(target))))[0]) x, y, z = c[:3] f_in.write('GRID_CENTER {},{},{}\n'.format(x, y, z)) f_in.write('GRIDFILE {}.zip\n'.format(target_pair)) f_in.write('INNERBOX 15,15,15\n') f_in.write('OUTERBOX 30,30,30\n') f_in.write('RECEP_FILE {}\n'.format( os.path.join(pair_path, '{}_prot.mae'.format(target)))) # create grid commands f.write('#!/bin/bash\n') f.write('cd {}\n'.format(pair_path)) f.write('$SCHRODINGER/glide -WAIT {}.in\n'.format( target_pair)) f.write('rm {}/{}.in\n'.format(pair_path, target_pair)) f.write('rm {}/{}.log\n'.format( pair_path, target_pair)) break os.chdir(run_path) os.system( 'sbatch -p rondror -t 02:00:00 -o grid{}.out grid{}_in.sh'.format( i, i)) # print('sbatch -p owners -t 02:00:00 -o grid{}.out grid{}_in.sh'.format(i, i)) break
def create_conformer_decoys(save_path, run_path, conformers, grid, num_jobs_submitted, start_lig_center, target_lig, prot, min_angle, max_angle, rmsd_cutoff, protein, target, start, index): conformer_ls = [[c, 0] for c in conformers] rot_ls = [] for rot_x in range(int(math.degrees(min_angle)), int(math.degrees(max_angle)) + 1): for rot_y in range(int(math.degrees(min_angle)), int(math.degrees(max_angle)) + 1): for rot_z in range(int(math.degrees(min_angle)), int(math.degrees(max_angle)) + 1): rot_ls.append([[ math.radians(rot_x), math.radians(rot_y), math.radians(rot_z) ], 0]) output_file = os.path.join( run_path, '{}_{}_{}_{}.txt'.format(protein, target, start, index)) num_iter_without_pose = 0 num_valid_poses = 0 num_total_poses = 0 while True: num_iter_without_pose += 1 num_total_poses += 1 if num_total_poses % 1000 == 0: num_jobs_in_queue = get_jobs_in_queue('{}{}{}'.format( protein[0], target[0], start[0])) f = open(output_file, "a") f.write( "num_total_poses: {}, len(grid): {}, len(conformer_ls): {}, len(rot_ls): {}, num_jobs_in_queue: " "{}\n".format(num_total_poses, len(grid), len(conformer_ls), len(rot_ls), num_jobs_in_queue)) f.close() if num_jobs_in_queue != num_jobs_submitted: break conformer_index = random.randint(0, len(conformer_ls) - 1) conformer = conformer_ls[conformer_index][0] conformer_center = list(get_centroid(conformer)) # translation index = random.randint(0, len(grid) - 1) grid_loc = grid[index][0] transform.translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) conformer_center = list(get_centroid(conformer)) # rotation if len(grid) > 1: x_angle = np.random.uniform(min_angle, max_angle) y_angle = np.random.uniform(min_angle, max_angle) z_angle = np.random.uniform(min_angle, max_angle) else: rot_index = random.randint(0, len(rot_ls) - 1) x_angle, y_angle, z_angle = rot_ls[rot_index][0] transform.rotate_structure(conformer, x_angle, y_angle, z_angle, conformer_center) if steric_clash.clash_volume(prot, struc2=conformer) < 200: num_valid_poses += 1 if rmsd.calculate_in_place_rmsd( conformer, conformer.getAtomIndices(), target_lig, target_lig.getAtomIndices()) < rmsd_cutoff: save_file = os.path.join( save_path, '{}_{}_{}.txt'.format(protein, target, start)) f = open(output_file, "a") f.write("Num poses searched = {}\n".format(num_total_poses)) f.write("Num acceptable clash poses searched = {}\n".format( num_valid_poses)) f.close() if not os.path.exists(save_file): with open(save_file, 'w') as f: f.write("Num poses searched = {}\n".format( num_total_poses)) f.write("Num acceptable clash poses searched = {}\n". format(num_valid_poses)) break grid[index][1] = 0 num_iter_without_pose = 0 elif num_iter_without_pose == 5 and len(grid) > 1: max_val = max(grid, key=lambda x: x[1]) grid.remove(max_val) num_iter_without_pose = 0 elif num_iter_without_pose == 5 and len(grid) == 1: if len(conformer_ls) == 1 and len(rot_ls) == 1: save_file = os.path.join( save_path, '{}_{}_{}.txt'.format(protein, target, start)) f = open(output_file, "a") f.write("Num poses searched = {}\n".format(num_total_poses)) f.write("Num acceptable clash poses searched = {}\n".format( num_valid_poses)) f.write("No correct poses found\n") f.close() if not os.path.exists(save_file): with open(save_file, 'w') as f: f.write("Num poses searched = {}\n".format( num_total_poses)) f.write("Num acceptable clash poses searched = {}\n". format(num_valid_poses)) f.write("No correct poses found\n") break elif len(conformer_ls) > 1 and (len(rot_ls) == 1 or (len(conformer_ls) + len(rot_ls)) % 2 == 0): max_val = max(conformer_ls, key=lambda x: x[1]) conformer_ls.remove(max_val) else: max_val = max(rot_ls, key=lambda x: x[1]) rot_ls.remove(max_val) num_iter_without_pose = 0 else: grid[index][1] += 1 conformer_ls[conformer_index][1] += 1 if len(grid) == 1: rot_ls[rot_index][1] += 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('task', type=str, help='either all, group, check, or MAPK14') parser.add_argument('docked_prot_file', type=str, help='file listing proteins to process') parser.add_argument( 'run_path', type=str, help='directory where script and output files will be written') parser.add_argument('raw_root', type=str, help='directory where raw data will be placed') parser.add_argument('--index', type=int, default=-1, help='for group task, group number') parser.add_argument( '--dist_dir', type=str, default=os.path.join(os.getcwd(), 'dists'), help= 'for all_dist_check and group_dist_check task, directiory to place distances' ) parser.add_argument( '--name_dir', type=str, default=os.path.join(os.getcwd(), 'names'), help= 'for all_name_check and group_name_check task, directiory to place unfinished protein, ' 'target, start groups') args = parser.parse_args() if args.task == 'all': process = get_prots(args.docked_prot_file) grouped_files = group_files(N, process) if not os.path.exists(args.run_path): os.mkdir(args.run_path) for i, group in enumerate(grouped_files): cmd = 'sbatch -p owners -t 1:00:00 -o {} --wrap="$SCHRODINGER/run python3 decoy_creator.py group {} {} {} ' \ '--index {}"' os.system( cmd.format( os.path.join(args.run_path, 'decoy{}.out'.format(i)), args.docked_prot_file, args.run_path, args.raw_root, i)) if args.task == 'group': process = get_prots(args.docked_prot_file) grouped_files = group_files(N, process) for protein, target, start in grouped_files[args.index]: protein_path = os.path.join(args.raw_root, protein) pair_path = os.path.join(protein_path, '{}-to-{}'.format(target, start)) pose_path = os.path.join(pair_path, 'cartesian_ligand_poses') pv_file = os.path.join( pair_path, '{}-to-{}_glide_pv.maegz'.format(target, start)) num_poses = len(list(structure.StructureReader(pv_file))) print(num_poses) for i in range(num_poses): if i == MAX_POSES: break lig_file = os.path.join(pose_path, '{}_lig{}.mae'.format(target, i)) create_cartesian_decoys(lig_file) break if args.task == 'check': process = [] num_pairs = 0 with open(args.docked_prot_file) as fp: for line in tqdm( fp, desc='going through protein, target, start groups'): if line[0] == '#': continue protein, target, start = line.strip().split() num_pairs += 1 protein_path = os.path.join(args.raw_root, protein) pair_path = os.path.join(protein_path, '{}-to-{}'.format(target, start)) pose_path = os.path.join(pair_path, 'ligand_poses') pv_file = os.path.join( pair_path, '{}-to-{}_pv.maegz'.format(target, start)) # num_poses = min(MAX_POSES, len(list(structure.StructureReader(pv_file)))) num_poses = 0 for i in range(MAX_DECOYS): if not os.path.join( pose_path, '{}_lig{}.mae'.format( target, str(num_poses) + chr(ord('a') + i))): process.append((protein, target, start)) print( os.path.join( pose_path, '{}_lig{}.mae'.format( target, str(num_poses) + chr(ord('a') + i)))) break print('Missing', len(process), '/', num_pairs) print(process) if args.task == 'all_dist_check': # if not os.path.exists(args.dist_dir): # os.mkdir(args.dist_dir) # # process = get_prots(args.docked_prot_file) # grouped_files = group_files(N, process) groups = [ 31, 32, 151, 176, 186, 187, 189, 194, 195, 198, 225, 226, 322, 332, 333, 341, 343, 452, 453, 460, 487, 495 ] if not os.path.exists(args.run_path): os.mkdir(args.run_path) # for i, group in enumerate(grouped_files): for i in groups: cmd = 'sbatch -p owners -t 0:20:00 -o {} --wrap="$SCHRODINGER/run python3 decoy_creator.py group_dist_check {} {} {} ' \ '--index {}"' os.system( cmd.format( os.path.join(args.run_path, 'decoy{}.out'.format(i)), args.docked_prot_file, args.run_path, args.raw_root, i)) if args.task == 'group_dist_check': if not os.path.exists(args.dist_dir): os.mkdir(args.dist_dir) process = get_prots(args.docked_prot_file) grouped_files = group_files(N, process) save = [] for protein, target, start in grouped_files[args.index]: protein_path = os.path.join(args.raw_root, protein) pair_path = os.path.join(protein_path, '{}-to-{}'.format(target, start)) pose_path = os.path.join(pair_path, 'ligand_poses') pv_file = os.path.join(pair_path, '{}-to-{}_pv.maegz'.format(target, start)) num_poses = len(list(structure.StructureReader(pv_file))) means = [] for i in range(num_poses): if i == MAX_POSES: break lig_file = os.path.join(pose_path, '{}_lig{}.mae'.format(target, i)) s = list(structure.StructureReader(lig_file))[0] c = get_centroid(s) dists = [] for j in range(MAX_DECOYS): decoy_file = lig_file[:-4] + chr(ord('a') + j) + '.mae' decoy = list(structure.StructureReader(decoy_file))[0] dists.append( transform.get_vector_magnitude(c - get_centroid(decoy))) means.append(statistics.mean(dists)) save.append(statistics.mean(means)) outfile = open( os.path.join(args.dist_dir, '{}.pkl'.format(args.index)), 'wb') pickle.dump(save, outfile) print(save) if args.task == 'check_dist_check': process = get_prots(args.docked_prot_file) grouped_files = group_files(N, process) if len(os.listdir(args.dist_dir)) != len(grouped_files): print('Not all files created') else: print('All files created') errors = [] for i in range(len(grouped_files)): infile = open(os.path.join(args.dist_dir, '{}.pkl'.format(i)), 'rb') vals = pickle.load(infile) infile.close() for j in vals: if j > 2 or j < -1: print(vals) errors.append(i) break print('Potential errors', len(errors), '/', len(grouped_files)) print(errors) if args.task == 'all_name_check': if not os.path.exists(args.name_dir): os.mkdir(args.name_dir) process = get_prots(args.docked_prot_file) grouped_files = group_files(N, process) if not os.path.exists(args.run_path): os.mkdir(args.run_path) for i, group in enumerate(grouped_files): cmd = 'sbatch -p owners -t 0:20:00 -o {} --wrap="$SCHRODINGER/run python3 decoy_creator.py group_name_check {} {} {} ' \ '--index {}"' os.system( cmd.format(os.path.join(args.run_path, 'name{}.out'.format(i)), args.docked_prot_file, args.run_path, args.raw_root, i)) if args.task == 'group_name_check': if not os.path.exists(args.name_dir): os.mkdir(args.name_dir) process = get_prots(args.docked_prot_file) grouped_files = group_files(N, process) unfinished = [] for protein, target, start in grouped_files[args.index]: protein_path = os.path.join(args.raw_root, protein) pair_path = os.path.join(protein_path, '{}-to-{}'.format(target, start)) pose_path = os.path.join(pair_path, 'ligand_poses') pv_file = os.path.join( pair_path, '{}-to-{}_glide_pv.maegz'.format(target, start)) num_poses = len(list(structure.StructureReader(pv_file))) for i in range(num_poses): if i == MAX_POSES: break lig_file = os.path.join(pose_path, '{}_lig{}.mae'.format(target, i)) found = False with open(lig_file, "r") as f: file_name = lig_file.split('/')[-1] for line in f: if line.strip() == file_name: found = True if not found: print(lig_file) unfinished.append((protein, target, start)) break else: for j in range(MAX_DECOYS): decoy_file = lig_file[:-4] + chr(ord('a') + j) + '.mae' found = False with open(decoy_file, "r") as f: file_name = decoy_file.split('/')[-1] for line in f: if line.strip() == file_name: found = True if not found: print(decoy_file) unfinished.append((protein, target, start)) break if not found: break break # outfile = open(os.path.join(args.name_dir, '{}.pkl'.format(args.index)), 'wb') # pickle.dump(unfinished, outfile) print(unfinished) if args.task == 'check_name_check': process = get_prots(args.docked_prot_file) grouped_files = group_files(N, process) if len(os.listdir(args.name_dir)) != len(grouped_files): print('Not all files created') else: print('All files created') errors = [] for i in range(len(grouped_files)): infile = open(os.path.join(args.name_dir, '{}.pkl'.format(i)), 'rb') unfinished = pickle.load(infile) infile.close() errors.extend(unfinished) print('Errors', len(errors), '/', len(process)) print(errors) if args.task == 'MAPK14': protein = 'MAPK14' ligs = ['3D83', '4F9Y'] for target in ligs: for start in ligs: if target != start: file = os.path.join( args.raw_root, '{}/{}-to-{}/{}-to-{}_pv.maegz'.format( protein, target, start, target, start)) num_poses = len(list(structure.StructureReader(file))) for i in range(num_poses): if i == 101: break lig_file = '{}/{}/{}-to-{}/{}_lig{}.mae'.format( args.raw_root, protein, target, start, target, i) create_decoys(lig_file)
def run_group(grouped_files, raw_root, data_root, index, max_poses, decoy_type, max_decoys, mean_translation, stdev_translation, min_angle, max_angle, num_conformers, grid_size): """ creates decoys for each protein, target, start group :param grouped_files: (list) list of protein, target, start groups :param raw_root: (string) directory where raw data will be placed :param data_root: (string) pdbbind directory where raw data will be obtained :param index: (int) group number :param max_poses: (int) maximum number of glide poses considered :param decoy_type: (string) either cartesian or random :param max_decoys: (int) maximum number of decoys created per glide pose :param mean_translation: (float) mean distance decoys are translated :param stdev_translation: (float) stdev of distance decoys are translated :param min_angle: (float) minimum angle decoys are rotated :param max_angle: (float) maximum angle decoys are rotated :return: """ for protein, target, start in grouped_files[index]: pair = '{}-to-{}'.format(target, start) protein_path = os.path.join(raw_root, protein) pair_path = os.path.join(protein_path, pair) pose_path = os.path.join(pair_path, decoy_type) dock_root = os.path.join(data_root, '{}/docking/sp_es4/{}'.format(protein, pair)) struct_root = os.path.join(data_root, '{}/structures/aligned'.format(protein)) # create folders if not os.path.exists(raw_root): os.mkdir(raw_root) if not os.path.exists(protein_path): os.mkdir(protein_path) if not os.path.exists(pair_path): os.mkdir(pair_path) if not os.path.exists(pose_path): os.mkdir(pose_path) # add basic files if not os.path.exists('{}/{}_prot.mae'.format(pair_path, start)): os.system('cp {}/{}_prot.mae {}/{}_prot.mae'.format( struct_root, start, pair_path, start)) if not os.path.exists('{}/{}_prot.mae'.format(pair_path, target)): os.system('cp {}/{}_prot.mae {}/{}_prot.mae'.format( struct_root, target, pair_path, target)) if not os.path.exists('{}/{}_lig.mae'.format(pair_path, start)): os.system('cp {}/{}_lig.mae {}/{}_lig.mae'.format( struct_root, start, pair_path, start)) if not os.path.exists('{}/{}_lig0.mae'.format(pose_path, target)): os.system('cp {}/{}_lig.mae {}/{}_lig0.mae'.format( struct_root, target, pose_path, target)) modify_file('{}/{}_lig0.mae'.format(pose_path, target), '_pro_ligand') # add combine glide poses pv_file = '{}/{}_glide_pv.maegz'.format(pair_path, pair) if not os.path.exists(pv_file): os.system('cp {}/{}_pv.maegz {}'.format(dock_root, pair, pv_file)) if decoy_type == "ligand_poses" or decoy_type == "cartesian_poses": # extract glide poses and create decoys num_poses = len(list(structure.StructureReader(pv_file))) for i in range(num_poses): if i == max_poses: break lig_file = os.path.join(pose_path, '{}_lig{}.mae'.format(target, i)) if i != 0: with structure.StructureWriter(lig_file) as all_file: all_file.append( list(structure.StructureReader(pv_file))[i]) if decoy_type == 'cartesian_poses': create_cartesian_decoys(lig_file) elif decoy_type == 'ligand_poses': create_decoys(lig_file, max_decoys, mean_translation, stdev_translation, min_angle, max_angle) elif decoy_type == "conformer_poses": start_lig_file = os.path.join(pair_path, '{}_lig.mae'.format(start)) start_lig = list(structure.StructureReader(start_lig_file))[0] target_lig_file = os.path.join(pair_path, 'ligand_poses', '{}_lig0.mae'.format(target)) start_lig_center = list(get_centroid(start_lig)) prot_file = os.path.join(pair_path, '{}_prot.mae'.format(start)) prot = list(structure.StructureReader(prot_file))[0] aligned_file = os.path.join(pair_path, "aligned_conformers.mae") if not os.path.exists(aligned_file): if not os.path.exists( os.path.join(pair_path, "{}_lig0-out.maegz".format(target))): gen_ligand_conformers(target_lig_file, pair_path, num_conformers) conformer_file = os.path.join( pair_path, "{}_lig0-out.maegz".format(target)) get_aligned_conformers(conformer_file, target_lig_file, aligned_file) conformers = list(structure.StructureReader(aligned_file)) create_conformer_decoys(conformers, grid_size, start_lig_center, prot, pose_path, target, max_poses, min_angle, max_angle) if os.path.exists( os.path.join(pair_path, '{}_lig0.log'.format(target))): os.remove(os.path.join(pair_path, '{}_lig0.log'.format(target))) if os.path.exists( os.path.join(pair_path, "{}_lig0-out.maegz".format(target))): os.remove( os.path.join(pair_path, "{}_lig0-out.maegz".format(target))) # combine ligands if os.path.exists('{}/{}_{}_merge_pv.mae'.format( pair_path, pair, decoy_type)): os.remove('{}/{}_{}_merge_pv.mae'.format(pair_path, pair, decoy_type)) with structure.StructureWriter('{}/{}_{}_merge_pv.mae'.format( pair_path, pair, decoy_type)) as all_file: for file in os.listdir(pose_path): if file[-3:] == 'mae': pv = list( structure.StructureReader(os.path.join( pose_path, file))) all_file.append(pv[0]) # compute mcss if not os.path.exists( os.path.join(pair_path, '{}_mcss.csv'.format(pair))): compute_protein_mcss([target, start], pair_path)
def main(): parser = argparse.ArgumentParser() parser.add_argument('task', type=str, help='either align or search') parser.add_argument('docked_prot_file', type=str, help='file listing proteins to process') parser.add_argument( 'run_path', type=str, help='directory where script and output files will be written') parser.add_argument('raw_root', type=str, help='directory where raw data will be placed') parser.add_argument('--protein', type=str, default='', help='protein name') parser.add_argument('--target', type=str, default='', help='target ligand name') parser.add_argument('--start', type=str, default='', help='start ligand name') parser.add_argument('--align_n', type=int, default=10, help='number of alignments processed in each job') parser.add_argument('--rotation_search_step_size', type=int, default=1, help='step size between each angle ' 'checked, in degrees') parser.add_argument('--index', type=int, default=-1, help='grid point group index') parser.add_argument( '--rmsd_cutoff', type=int, default=2, help='rmsd accuracy cutoff between predicted ligand pose ' 'and true ligand pose') parser.add_argument('--num_conformers', type=int, default=300, help='maximum number of conformers considered') parser.add_argument('--grid_size', type=int, default=6, help='grid size in positive and negative x, y, z ' 'directions') parser.add_argument('--grid_n', type=int, default=30, help='number of grid_points processed in each job') parser.add_argument('--time', dest='get_time', action='store_true') parser.add_argument('--no_time', dest='get_time', action='store_false') parser.set_defaults(get_time=False) parser.add_argument('--remove_prot_h', dest='no_prot_h', action='store_true') parser.add_argument('--keep_prot_h', dest='no_prot_h', action='store_false') parser.set_defaults(no_prot_h=False) parser.add_argument('--prot_pocket_only', dest='pocket_only', action='store_true') parser.add_argument('--all_prot', dest='pocket_only', action='store_false') parser.set_defaults(pocket_only=False) args = parser.parse_args() random.seed(0) if not os.path.exists(args.run_path): os.mkdir(args.run_path) pair = '{}-to-{}'.format(args.target, args.start) protein_path = os.path.join(args.raw_root, args.protein) pair_path = os.path.join(protein_path, pair) if args.task == 'conformer_all': process = get_prots(args.docked_prot_file) random.shuffle(process) run_conformer_all(process, args.raw_root, args.run_path, args.docked_prot_file) elif args.task == 'conformer_group': target_lig_file = os.path.join(pair_path, 'ligand_poses', '{}_lig0.mae'.format(args.target)) gen_ligand_conformers(target_lig_file, pair_path, args.num_conformers) if os.path.exists( os.path.join(pair_path, '{}_lig0.log'.format(args.target))): os.remove( os.path.join(pair_path, '{}_lig0.log'.format(args.target))) if args.task == 'conformer_check': process = get_prots(args.docked_prot_file) random.shuffle(process) run_conformer_check(process, args.raw_root) if args.task == 'align_all': process = get_prots(args.docked_prot_file) random.shuffle(process) run_align_all(process, args.raw_root, args.run_path, args.docked_prot_file, args.align_n) elif args.task == 'align_group': grouped_files = get_conformer_groups(args.align_n, args.target, args.start, args.protein, args.raw_root) run_align_group(grouped_files, args.index, args.n, args.protein, args.target, args.start, args.raw_root) elif args.task == 'align_check': process = get_prots(args.docked_prot_file) random.shuffle(process) run_align_check(process, args.raw_root) elif args.task == 'align_combine': process = get_prots(args.docked_prot_file) random.shuffle(process) run_align_combine(process, args.raw_root) elif args.task == 'run_search': process = get_prots(args.docked_prot_file) random.shuffle(process) grouped_files = get_grid_groups(args.grid_size, args.grid_n) search_system_caller(process, args.raw_root, args.run_path, args.docked_prot_file, args.rotation_search_step_size, args.grid_size, grouped_files) elif args.task == 'search': grouped_files = get_grid_groups(args.grid_size, args.grid_n) run_search(args.protein, args.target, args.start, args.index, args.raw_root, args.get_time, args.rmsd_cutoff, args.rotation_search_step_size, grouped_files[args.index], args.no_prot_h, args.pocket_only) elif args.task == 'check_search': process = get_prots(args.docked_prot_file) random.shuffle(process) grouped_files = get_grid_groups(args.grid_size, args.grid_n) counter = 0 unfinished = [] for protein, target, start in process: if counter == 10: break pair = '{}-to-{}'.format(target, start) protein_path = os.path.join(args.raw_root, protein) pair_path = os.path.join(protein_path, pair) conformer_file = os.path.join(pair_path, "{}_lig0-out.maegz".format(target)) conformers = list(structure.StructureReader(conformer_file)) if len(conformers) == 1: continue else: counter += 1 save_folder = os.path.join( os.getcwd(), 'decoy_timing_data', '{}_{}-to-{}'.format(protein, target, start)) for i in range(len(grouped_files)): if not os.path.exists( os.path.join(save_folder, '{}.csv'.format(i))): unfinished.append((protein, target, start, i)) print("Missing:", len(unfinished)) print(unfinished) elif args.task == 'test_search': run_test_search(args.protein, args.target, args.start, args.raw_root, args.rmsd_cutoff, args.rotation_search_step_size, pair_path, args.no_prot_h, args.pocket_only, args.get_time) elif args.task == 'get_grid_data': process = get_prots(args.docked_prot_file) random.shuffle(process) grouped_files = get_grid_groups(args.grid_size, args.grid_n) get_data(process, grouped_files, args.raw_root, args.grid_size) elif args.task == 'combine_search_data': process = get_prots(args.docked_prot_file) random.shuffle(process) grouped_files = get_grid_groups(args.grid_size, args.grid_n) get_data(process, grouped_files, args.raw_root, args.grid_size, True) elif args.task == 'get_dist': process = get_prots(args.docked_prot_file) random.shuffle(process) counter = 0 for protein, target, start in process: if counter == 10: break pair = '{}-to-{}'.format(target, start) protein_path = os.path.join(args.raw_root, protein) pair_path = os.path.join(protein_path, pair) conformer_file = os.path.join(pair_path, "{}_lig0-out.maegz".format(target)) conformers = list(structure.StructureReader(conformer_file)) if len(conformers) == 1: continue else: counter += 1 start_lig_file = os.path.join(pair_path, '{}_lig.mae'.format(start)) start_lig = list(structure.StructureReader(start_lig_file))[0] start_lig_center = list(get_centroid(start_lig)) target_lig_file = os.path.join(pair_path, 'ligand_poses', '{}_lig0.mae'.format(target)) target_lig = list(structure.StructureReader(target_lig_file))[0] target_lig_center = list(get_centroid(target_lig)) dist = math.sqrt(( (start_lig_center[0] - target_lig_center[0])**2) + ( (start_lig_center[1] - target_lig_center[1])**2) + ((start_lig_center[2] - target_lig_center[2])**2)) print(protein, target, start, dist) elif args.task == 'test_rotate_translate': prot_file = os.path.join(pair_path, '{}_prot.mae'.format(args.start)) schrodinger_prot = list(structure.StructureReader(prot_file))[0] custom_prot = list(structure.StructureReader(prot_file))[0] translation_vector = np.random.uniform(low=-100, high=100, size=(3)) transform.translate_structure(schrodinger_prot, translation_vector[0], translation_vector[1], translation_vector[2]) translate_structure(custom_prot, translation_vector[0], translation_vector[1], translation_vector[2]) schrodinger_atoms = np.array(schrodinger_prot.getXYZ(copy=False)) custom_atoms = np.array(custom_prot.getXYZ(copy=False)) if np.array_equal(schrodinger_atoms, custom_atoms): print("Translate function works properly") else: print("Error in translate function") schrodinger_prot = list(structure.StructureReader(prot_file))[0] custom_prot = list(structure.StructureReader(prot_file))[0] rotation_vector = np.random.uniform(low=-2 * np.pi, high=2 * np.pi, size=(3)) rotation_center = np.random.uniform(low=-100, high=100, size=(3)) rotation_center = [ rotation_center[0], rotation_center[1], rotation_center[2] ] transform.rotate_structure(schrodinger_prot, rotation_vector[0], rotation_vector[1], rotation_vector[2], rotation_center) coords = rotate_structure(custom_prot.getXYZ(copy=False), rotation_vector[0], rotation_vector[1], rotation_vector[2], rotation_center) custom_prot.setXYZ(coords) schrodinger_atoms = np.array(schrodinger_prot.getXYZ(copy=False)) custom_atoms = np.array(custom_prot.getXYZ(copy=False)) if np.amax(np.absolute(schrodinger_atoms - custom_atoms)) < 10**-7: print("Rotate function works properly") else: print("Error in rotate function") elif args.task == 'get_rmsd': conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae".format( args.target)) conformers = list(structure.StructureReader(conformer_file)) target_lig_file = os.path.join(pair_path, 'ligand_poses', '{}_lig0.mae'.format(args.target)) target_lig = list(structure.StructureReader(target_lig_file))[0] build.delete_hydrogens(target_lig) start_lig_file = os.path.join(pair_path, '{}_lig.mae'.format(args.start)) start_lig = list(structure.StructureReader(start_lig_file))[0] start_lig_center = list(get_centroid(start_lig)) rmsds = [] for i, conformer in tqdm(enumerate(conformers), desc='going through conformers'): conformer_center = list(get_centroid(conformer)) translate_structure(conformer, start_lig_center[0] - conformer_center[0], start_lig_center[1] - conformer_center[1], start_lig_center[2] - conformer_center[2]) rmsds.append( (conformer, rmsd.calculate_in_place_rmsd(conformer, conformer.getAtomIndices(), target_lig, target_lig.getAtomIndices()), i)) # best_match_conformer = min(rmsds, key=lambda x: x[1]) # print(best_match_conformer[1], best_match_conformer[2]) # file = os.path.join(pair_path, 'best_match_conformer.mae') # with structure.StructureWriter(file) as best_match: # best_match.append(best_match_conformer[0]) print(rmsds[248][1], rmsds[248][2]) file = os.path.join(pair_path, 'translated_conformer_248.mae') with structure.StructureWriter(file) as best_match: best_match.append(rmsds[248][0]) elif args.task == 'check_rotation': target_lig_file = os.path.join(pair_path, 'ligand_poses', '{}_lig0.mae'.format(args.target)) target_lig = list(structure.StructureReader(target_lig_file))[0] remove = [i for i in target_lig.getAtomIndices() if i != 1] target_lig.deleteAtoms(remove) center = list(get_centroid(target_lig)) print("ROTATE 5,5,5") rotate_structure(target_lig, math.radians(5), math.radians(5), math.radians(5), center) target_lig_2 = list(structure.StructureReader(target_lig_file))[0] target_lig_2.deleteAtoms(remove) center = list(get_centroid(target_lig_2)) print("ROTATE 5,0,0") rotate_structure(target_lig_2, math.radians(5), 0, 0, center) print("ROTATE 0,5,0") rotate_structure(target_lig_2, 0, math.radians(5), 0, center) print("ROTATE 0,0,5") rotate_structure(target_lig_2, 0, 0, math.radians(5), center) print( rmsd.calculate_in_place_rmsd(target_lig, target_lig.getAtomIndices(), target_lig_2, target_lig_2.getAtomIndices())) print(target_lig.getXYZ(copy=False)) print(target_lig_2.getXYZ(copy=False))
with open('{}/run/grid{}_in.sh'.format(save, i), 'w') as f: for s_file in group: out_f = s_file[:12] os.system('mkdir -p {}/{}'.format(save, out_f)) with open('{}/{}/{}.in'.format(save, out_f, out_f), 'w') as f_in: if len(s_file) != 16: continue s = next(StructureReader(ligands + s_file[:4] + '_lig.mae')) c = get_centroid(s) x, y, z = c[:3] f_in.write('GRID_CENTER {},{},{}\n'.format(x, y, z)) f_in.write('GRIDFILE {}.zip\n'.format(out_f)) f_in.write('INNERBOX 15,15,15\n') f_in.write('OUTERBOX 30,30,30\n') f_in.write('RECEP_FILE {}/{}\n'.format(root, s_file)) f.write('#!/bin/bash\n') f.write('cd {}/{}\n'.format(save, out_f)) f.write('$SCHRODINGER/glide -WAIT {}.in\n'.format(out_f)) os.chdir('{}/run'.format(save)) os.system( 'sbatch -p owners -t 02:00:00 -o grid{}.out grid{}_in.sh'.format( i, i))