def create_muts(ligand, ligands): cutoff = 2 conflict_dict = {} conflict_dict[ligand] = {} for target in ligands: if ligand != target: start_ending = '{}/structures/aligned_files/{}/{}_out.mae'.format( protein, ligand, ligand) start_s = list( structure.StructureReader(combind_root + start_ending))[0] target_ending = '{}/structures/aligned_files/{}/{}_out.mae'.format( protein, target, target) target_s = list( structure.StructureReader(combind_root + target_ending))[0] mutate_list = [] mutate_info = [] for m in list(start_s.molecule): if len(m.residue) != 1: for r in list(m.residue): r_atoms = [a.index for a in list(r.atom)] target_lig = [ a.index for a in target_s.atom if a.chain == 'L' ] clash = steric_clash.clash_volume( start_s, r_atoms, target_s, target_lig) if clash > cutoff: mutate_info.append({ 'Name': r.pdbres, 'Number': r.resnum, 'Clash volume': clash }) mutate_list.append(list(r.atom)[0]) conflict_dict[ligand][target] = mutate_info for atom in mutate_list: build.mutate(start_s, atom, 'ALA') prot_st = start_s.extract( [a.index for a in start_s.atom if a.chain != 'L']) prot_wr = structure.StructureWriter('{}/{}_to_{}.mae'.format( save_location, target, ligand)) prot_wr.append(prot_st) prot_wr.close() with open( '/home/users/sidhikab/flexibility_project/mutations/Data/conflict/' + ligand, 'wb') as outfile: pickle.dump(conflict_dict, outfile)
def create_conformer_decoys(conformers, grid_size, start_lig_center, prot, pose_path, target, max_poses, min_angle, max_angle): num_iter_without_pose = 0 num_valid_poses = 1 grid = [] for dx in range(-grid_size, grid_size): for dy in range(-grid_size, grid_size): for dz in range(-grid_size, grid_size): grid.append([[dx, dy, dz], 0]) while num_valid_poses < max_poses: num_iter_without_pose += 1 conformer = random.choice(conformers) conformer_center = list(get_centroid(conformer)) # translation index = random.randint(0, len(grid) - 1) grid_loc = grid[index][0] transform.translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) conformer_center = list(get_centroid(conformer)) # rotation x_angle = np.random.uniform(min_angle, max_angle) y_angle = np.random.uniform(min_angle, max_angle) z_angle = np.random.uniform(min_angle, max_angle) transform.rotate_structure(conformer, x_angle, y_angle, z_angle, conformer_center) if steric_clash.clash_volume(prot, struc2=conformer) < 200: decoy_file = os.path.join( pose_path, "{}_lig{}.mae".format(target, num_valid_poses)) with structure.StructureWriter(decoy_file) as decoy: decoy.append(conformer) modify_file(decoy_file, '_pro_ligand') modify_file(decoy_file, '{}_lig0.mae'.format(target)) num_valid_poses += 1 grid[index][1] = 0 num_iter_without_pose = 0 elif num_iter_without_pose == 5 and len(grid) > 1: max_val = max(grid, key=lambda x: x[1]) grid.remove(max_val) num_iter_without_pose = 0 else: grid[index][1] += 1
def rotamers(rot_s, rot_r, s, r, cutoff): try: rotamer_lib = rotamers.Rotamers(rot_s, list(rot_r.atom)[0]) a_ls = r.getAtomList() r_rmsd_ls = [] rmsd_ls = [] counter = 0 for k, rotamer in enumerate(list(rotamer_lib.rotamers)): rotamer.apply() rot_a_ls = rot_r.getAtomList() no_r_a_ls = [ a.index for a in rot_s.atom if a.index not in rot_a_ls and a.chain != 'L' ] clash = steric_clash.clash_volume(rot_s, rot_a_ls, rot_s, no_r_a_ls) if 'LEU' in r.pdbres and r.resnum == 167: print(k, clash) if clash < cutoff: counter += 1 r_rmsd_ls.append( rmsd.calculate_in_place_rmsd(s, a_ls, rot_s, rot_a_ls)) rmsd_ls.append( rmsd.calculate_in_place_rmsd(s, a_ls, rot_s, rot_a_ls)) num_rots = len(rotamer_lib.rotamers) avg_rot_rmsd = safeAvg(num_rots, rmsd_ls) num_r_rots = len(r_rmsd_ls) avg_r_rot_rmsd = safeAvg(num_r_rots, r_rmsd_ls) except Exception as e: if 'ALA' not in r.pdbres and 'GLY' not in r.pdbres and 'PRO' not in r.pdbres: print(e) num_rots = 0 avg_rot_rmsd = 0 num_r_rots = 0 avg_r_rot_rmsd = 0 return (num_rots, avg_rot_rmsd, num_r_rots, avg_r_rot_rmsd)
def run_group(grouped_files, raw_root, index, clash_dir): clashes = {} for protein, target, start in grouped_files[index]: pair = '{}-to-{}'.format(target, start) protein_path = os.path.join(raw_root, protein) pair_path = os.path.join(protein_path, pair) pose_path = os.path.join(pair_path, 'conformer_poses') prot_file = os.path.join(pair_path, '{}_prot.mae'.format(start)) prot = list(structure.StructureReader(prot_file))[0] clashes[(protein, target, start)] = [] for i in range(100): decoy_file = os.path.join(pose_path, "{}_lig{}.mae".format(target, i)) s = list(structure.StructureReader(decoy_file))[0] clashes[(protein, target, start)].append(steric_clash.clash_volume(prot, struc2=s)) outfile = open(os.path.join(clash_dir, '{}.pkl'.format(index)), 'wb') pickle.dump(clashes, outfile)
def run_group(grouped_files, raw_root, index, clash_dir): """ checks mean distance of displacement for decoys for each protein, target, start group :param grouped_files: (list) list of protein, target, start groups :param raw_root: (string) directory where raw data will be placed :param index: (int) group number :param dist_dir: (string) directiory to place distances :param max_poses: (int) maximum number of glide poses considered :param max_decoys: (int) maximum number of decoys created per glide pose :return: """ clash_dict = {} for protein, target, start in grouped_files[index]: protein_path = os.path.join(raw_root, protein) pair_path = os.path.join(protein_path, '{}-to-{}'.format(target, start)) pose_path = os.path.join(pair_path, 'ligand_poses') struct_path = os.path.join(pair_path, '{}_prot.mae'.format(start)) lig_path = os.path.join(pose_path, '{}_lig0.mae'.format(target)) s1 = list(StructureReader(struct_path))[0] lig = list(StructureReader(lig_path))[0] clash_dict[(protein, target, start)] = steric_clash.clash_volume(s1, struc2=lig) outfile = open(os.path.join(clash_dir, '{}.pkl'.format(index)), 'wb') pickle.dump(clash_dict, outfile)
def time_conformer_decoys(pair_path, start_lig_center, target_lig, prot, rotation_search_step_size): translate_times = [] rotate_times = [] conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae") conformers = list(structure.StructureReader(conformer_file)) for conformer in conformers: conformer_center = list(get_centroid(conformer)) # translation grid_loc = [0, 0, 0] start = time.time() transform.translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) end = time.time() translate_times.append(end - start) # rotation start = time.time() transform.rotate_structure( conformer, math.radians(-30 - rotation_search_step_size), 0, 0, conformer_center) end = time.time() rotate_times.append(end - start) print("Average schrodinger translate time =", statistics.mean(translate_times)) print("Average schrodinger rotate time =", statistics.mean(rotate_times)) translate_times = [] rotate_times = [] conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae") conformers = list(structure.StructureReader(conformer_file)) for conformer in conformers: conformer_center = list(get_centroid(conformer)) # translation grid_loc = [0, 0, 0] start = time.time() translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) end = time.time() translate_times.append(end - start) # rotation start = time.time() rotate_structure(conformer, math.radians(-30 - rotation_search_step_size), 0, 0, conformer_center) end = time.time() rotate_times.append(end - start) print("Average custom translate time =", statistics.mean(translate_times)) print("Average custom rotate time =", statistics.mean(rotate_times)) clash_iterator_times = [] clash_volume_times = [] rmsd_times = [] rotation_search_step_size_rad = math.radians(rotation_search_step_size) conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae") conformers = list(structure.StructureReader(conformer_file)) for conformer in conformers: conformer_center = list(get_centroid(conformer)) # translation grid_loc = [0, 0, 0] translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) conformer_center = list(get_centroid(conformer)) # keep track of rotation angles rotate_structure(conformer, math.radians(-30 - rotation_search_step_size), 0, 0, conformer_center) x_so_far = -30 - rotation_search_step_size y_so_far = 0 z_so_far = 0 for _ in range(-30, 30, rotation_search_step_size): # x rotation rotate_structure( conformer, rotation_search_step_size_rad, math.radians(-30 - rotation_search_step_size - y_so_far), 0, conformer_center) x_so_far += 1 y_so_far += -30 - rotation_search_step_size - y_so_far for _ in range(-30, 30, rotation_search_step_size): # y rotation rotate_structure( conformer, 0, rotation_search_step_size_rad, math.radians(-30 - rotation_search_step_size - z_so_far), conformer_center) y_so_far += 1 z_so_far += -30 - rotation_search_step_size - z_so_far for _ in range(-30, 30, rotation_search_step_size): # z rotation rotate_structure(conformer, 0, 0, rotation_search_step_size_rad, conformer_center) z_so_far += 1 # get clash_iterator start = time.time() max([ x[2] for x in list( steric_clash.clash_iterator(prot, struc2=conformer)) ]) end = time.time() clash_iterator_times.append(end - start) # get clash_volume start = time.time() steric_clash.clash_volume(prot, struc2=conformer) end = time.time() clash_volume_times.append(end - start) # get rmsd start = time.time() rmsd.calculate_in_place_rmsd(conformer, conformer.getAtomIndices(), target_lig, target_lig.getAtomIndices()) end = time.time() rmsd_times.append(end - start) if len(clash_iterator_times) == 1000: print("Average clash iterator time =", statistics.mean(clash_iterator_times)) print("Average clash volume time =", statistics.mean(clash_volume_times)) print("Average rmsd time =", statistics.mean(rmsd_times)) return
def create_conformer_decoys(save_path, run_path, conformers, grid, num_jobs_submitted, start_lig_center, target_lig, prot, min_angle, max_angle, rmsd_cutoff, protein, target, start, index): conformer_ls = [[c, 0] for c in conformers] rot_ls = [] for rot_x in range(int(math.degrees(min_angle)), int(math.degrees(max_angle)) + 1): for rot_y in range(int(math.degrees(min_angle)), int(math.degrees(max_angle)) + 1): for rot_z in range(int(math.degrees(min_angle)), int(math.degrees(max_angle)) + 1): rot_ls.append([[ math.radians(rot_x), math.radians(rot_y), math.radians(rot_z) ], 0]) output_file = os.path.join( run_path, '{}_{}_{}_{}.txt'.format(protein, target, start, index)) num_iter_without_pose = 0 num_valid_poses = 0 num_total_poses = 0 while True: num_iter_without_pose += 1 num_total_poses += 1 if num_total_poses % 1000 == 0: num_jobs_in_queue = get_jobs_in_queue('{}{}{}'.format( protein[0], target[0], start[0])) f = open(output_file, "a") f.write( "num_total_poses: {}, len(grid): {}, len(conformer_ls): {}, len(rot_ls): {}, num_jobs_in_queue: " "{}\n".format(num_total_poses, len(grid), len(conformer_ls), len(rot_ls), num_jobs_in_queue)) f.close() if num_jobs_in_queue != num_jobs_submitted: break conformer_index = random.randint(0, len(conformer_ls) - 1) conformer = conformer_ls[conformer_index][0] conformer_center = list(get_centroid(conformer)) # translation index = random.randint(0, len(grid) - 1) grid_loc = grid[index][0] transform.translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) conformer_center = list(get_centroid(conformer)) # rotation if len(grid) > 1: x_angle = np.random.uniform(min_angle, max_angle) y_angle = np.random.uniform(min_angle, max_angle) z_angle = np.random.uniform(min_angle, max_angle) else: rot_index = random.randint(0, len(rot_ls) - 1) x_angle, y_angle, z_angle = rot_ls[rot_index][0] transform.rotate_structure(conformer, x_angle, y_angle, z_angle, conformer_center) if steric_clash.clash_volume(prot, struc2=conformer) < 200: num_valid_poses += 1 if rmsd.calculate_in_place_rmsd( conformer, conformer.getAtomIndices(), target_lig, target_lig.getAtomIndices()) < rmsd_cutoff: save_file = os.path.join( save_path, '{}_{}_{}.txt'.format(protein, target, start)) f = open(output_file, "a") f.write("Num poses searched = {}\n".format(num_total_poses)) f.write("Num acceptable clash poses searched = {}\n".format( num_valid_poses)) f.close() if not os.path.exists(save_file): with open(save_file, 'w') as f: f.write("Num poses searched = {}\n".format( num_total_poses)) f.write("Num acceptable clash poses searched = {}\n". format(num_valid_poses)) break grid[index][1] = 0 num_iter_without_pose = 0 elif num_iter_without_pose == 5 and len(grid) > 1: max_val = max(grid, key=lambda x: x[1]) grid.remove(max_val) num_iter_without_pose = 0 elif num_iter_without_pose == 5 and len(grid) == 1: if len(conformer_ls) == 1 and len(rot_ls) == 1: save_file = os.path.join( save_path, '{}_{}_{}.txt'.format(protein, target, start)) f = open(output_file, "a") f.write("Num poses searched = {}\n".format(num_total_poses)) f.write("Num acceptable clash poses searched = {}\n".format( num_valid_poses)) f.write("No correct poses found\n") f.close() if not os.path.exists(save_file): with open(save_file, 'w') as f: f.write("Num poses searched = {}\n".format( num_total_poses)) f.write("Num acceptable clash poses searched = {}\n". format(num_valid_poses)) f.write("No correct poses found\n") break elif len(conformer_ls) > 1 and (len(rot_ls) == 1 or (len(conformer_ls) + len(rot_ls)) % 2 == 0): max_val = max(conformer_ls, key=lambda x: x[1]) conformer_ls.remove(max_val) else: max_val = max(rot_ls, key=lambda x: x[1]) rot_ls.remove(max_val) num_iter_without_pose = 0 else: grid[index][1] += 1 conformer_ls[conformer_index][1] += 1 if len(grid) == 1: rot_ls[rot_index][1] += 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('task', type=str, help='either run, check, or update') parser.add_argument('docked_prot_file', type=str, help='file listing proteins to process') parser.add_argument( 'run_path', type=str, help='directory where script and output files will be written') parser.add_argument('raw_root', type=str, help='directory where raw data will be placed') parser.add_argument('save_path', type=str, help='directory where graph will be saved') parser.add_argument('--index', type=int, default=-1, help='group index') parser.add_argument('--clash_dir', type=str, default=os.path.join(os.getcwd(), 'clash'), help='group index') parser.add_argument( '--n', type=int, default=3, help='number of protein, target, start groups processed in ' 'group task') args = parser.parse_args() if not os.path.exists(args.run_path): os.mkdir(args.run_path) if not os.path.exists(args.clash_dir): os.mkdir(args.clash_dir) if args.task == 'all': process = get_prots(args.docked_prot_file, args.raw_root) grouped_files = group_files(args.n, process) run_all(args.run_path, grouped_files, args.docked_prot_file, args.raw_root) if args.task == 'group': process = get_prots(args.docked_prot_file, args.raw_root) grouped_files = group_files(args.n, process) run_group(grouped_files, args.raw_root, args.index, args.clash_dir) if args.task == 'check': process = get_prots(args.docked_prot_file, args.raw_root) grouped_files = group_files(args.n, process) if len(os.listdir(args.clash_dir)) != len(grouped_files): print("Num expected: ", len(grouped_files)) print("Num found: ", len(os.listdir(args.clash_dir))) else: print("Finished") if args.task == 'graph': clashes = {} # for protein, target, start in grouped_files[index]: protein = 'A0F7J4' target = '2rkf' start = '2rkg' pair = '{}-to-{}'.format(target, start) protein_path = os.path.join(args.raw_root, protein) pair_path = os.path.join(protein_path, pair) pose_path = os.path.join(pair_path, 'conformer_poses') prot_file = os.path.join(pair_path, '{}_prot.mae'.format(start)) prot = list(structure.StructureReader(prot_file))[0] clashes = [] for i in range(100): decoy_file = os.path.join(pose_path, "{}_lig{}.mae".format(target, i)) s = list(structure.StructureReader(decoy_file))[0] clashes.append(steric_clash.clash_volume(prot, struc2=s)) # clashes = [] # for file in os.listdir(args.clash_dir): # infile = open(os.path.join(args.clash_dir, file), 'rb') # clash = pickle.load(infile) # infile.close() # for lig in clash: # clashes.extend(clash[lig]) fig, ax = plt.subplots() sns.distplot(clashes, hist=False) plt.title('Clash Distributions for A0F7J4 2rkf-to-2rkg') plt.xlabel('clash volume') plt.ylabel('frequency') fig.savefig(args.save_path)