def generate_exclusion_volumes(dmif, directory, debugging, shape_cutoff, restrictive): """ This function generates exclusion volumes. The exclusion volumes are described with a list of properties as follows. Format: ------------------------------------------------------------------------ 0 1 2 3 4 5 6 7 ------------------------------------------------------------------------ 0 ev M [0.0,0.0,0.0] 1.0 [] 0.0 1.0 1 ev M [2.0,0.0,0.0] 1.0 [] 0.0 1.0 ------------------------------------------------------------------------ Legend: 0 - index 1 - type 2 - flag (O - optional, M - mandatory) 3 - core position 4 - core tolerance [not needed for exclusion volumes] 5 - partner positions [not needed for exclusion volumes] 6 - partner tolerance [not needed for exclusion volumes] 7 - weight """ logger = setup_logger('exclusion_volumes', directory, debugging) update_user('Generating exclusion volumes.', logger) grid_space = 0.5 exclusion_volume_space = 4 if restrictive: exclusion_volume_space = 2 grid_tree = cKDTree([[x, y, z] for x, y, z in zip(dmif['x'], dmif['y'], dmif['z'])]) dtype = [('x', float), ('y', float), ('z', float), ('shape', int), ('count', int)] dmif_shape = np.array( [(x, y, z, shape, 0) for x, y, z, shape in zip(dmif['x'], dmif['y'], dmif['z'], dmif['shape']) if shape < shape_cutoff], dtype=dtype) positions = np.array([[ x, y, z ] for x, y, z in zip(dmif_shape['x'], dmif_shape['y'], dmif_shape['z'])]) shape_tree = cKDTree(positions) shape_grid_size = len(dmif_shape) # store number of neighbors with shape score smaller than shape_cutoff for grid points for index in range(shape_grid_size): dmif_shape['count'][index] = len( shape_tree.query_ball_point(positions[index], grid_space * 4)) # sort for neighbor count dmif_shape = np.sort(dmif_shape, order='count') # rebuild positions and shape_tree positions = np.array([[ x, y, z ] for x, y, z in zip(dmif_shape['x'], dmif_shape['y'], dmif_shape['z'])]) shape_tree = cKDTree(positions) used = [] exclusion_volumes = [] counter = 1 start = time.time() for index in range(shape_grid_size): # grid_point index should not be in used list if index not in used: neighbor_list = shape_tree.query_ball_point( positions[index], exclusion_volume_space / 2) # elements of neighbor_list should not be in used list if len(set(neighbor_list + used)) == len(neighbor_list) + len(used): # grid_point should not be at border of grid if len( grid_tree.query_ball_point(positions[index], r=grid_space * 2)) == 33: # grid_point should not be directly at border of binding pocket if len( shape_tree.query_ball_point(positions[index], r=grid_space)) == 7: # grid_point should not be surrounded by grid_points outside the binding pocket if len( shape_tree.query_ball_point( positions[index], r=grid_space * 2)) < 33: exclusion_volumes.append([ counter, 'ev', 'M', positions[index], 1.0, [], 0.0, 1.0 ]) counter += 1 used += neighbor_list eta = ((time.time() - start) / (index + 1)) * (shape_grid_size - (index + 1)) update_progress( float(index + 1) / shape_grid_size, 'Progress of exclusion volume generation', eta) logger.debug('Passed grid index {}.'.format(index)) update_user( 'Finished with generation of {} exclusion volumes.'.format( len(exclusion_volumes)), logger) return exclusion_volumes
def generate_library(pharmacophore_path, output_format, library_dict, library_path, pyrod_pharmacophore, directory, debugging): """ This function writes a combinatorial pharmacophore library. """ logger = setup_logger('library', directory, debugging) update_user('Starting library generation.', logger) template_pharmacophore = pharmacophore_reader(pharmacophore_path, pyrod_pharmacophore, logger) pharmacophore_library = [] essential_hb, essential_hi, essential_ai, essential_ii = [], [], [], [] optional_hb, optional_hi, optional_ai, optional_ii = [], [], [], [] exclusion_volumes = [] # analyzing pharmacophore for index, feature in enumerate(template_pharmacophore): if feature[1] == 'ev': exclusion_volumes.append(feature) else: if feature[1] in ['ha', 'hd', 'ha2', 'hd2', 'hda']: if feature[2] == 'O': optional_hb.append(index) else: essential_hb.append(index) elif feature[1] == 'hi': if feature[2] == 'O': optional_hi.append(index) else: essential_hi.append(index) elif feature[1] in ['pi', 'ni']: if feature[2] == 'O': optional_ii.append(index) else: essential_ii.append(index) elif feature[1] == 'ai': if feature[2] == 'O': optional_ai.append(index) else: essential_ai.append(index) essential_features = essential_hb + essential_hi + essential_ai + essential_ii for hb_combination in combine_features( optional_hb, library_dict['minimal hydrogen bonds'] - len(essential_hb), library_dict['maximal hydrogen bonds'] - len(essential_hb) + 1): for hi_combination in combine_features( optional_hi, library_dict['minimal hydrophobic interactions'] - len(essential_hi), library_dict['maximal hydrophobic interactions'] - len(essential_hi) + 1): for ai_combination in combine_features( optional_ai, library_dict['minimal aromatic interactions'] - len(essential_ai), library_dict['maximal aromatic interactions'] - len(essential_ai) + 1): for ii_combination in combine_features( optional_ii, library_dict['minimal ionizable interactions'] - len(essential_ii), library_dict['maximal ionizable interactions'] - len(essential_ii) + 1): pharmacophore = (essential_features + hb_combination + hi_combination + ai_combination + ii_combination) if evaluate_pharmacophore(pharmacophore, template_pharmacophore, library_dict, pyrod_pharmacophore): pharmacophore_library.append(pharmacophore) # estimate maximal library size and ask user if number and space of pharmacophores is okay pharmacophore_writer(template_pharmacophore, [output_format], 'template_pharmacophore', library_path, logger) pharmacophore_library_size = bytes_to_text( os.path.getsize('{}/{}.{}'.format( library_path, 'template_pharmacophore', output_format)) * len(pharmacophore_library)) user_prompt = '' while user_prompt not in ['yes', 'no']: user_prompt = input( '{} pharmacophores will be written taking about {} of space.\n' 'Do you want to continue? [yes/no]: '.format( len(pharmacophore_library), pharmacophore_library_size)) if user_prompt == 'no': sys.exit() start = time.time() # write pharmacophores maximal_exclusion_volume_id = max( [exclusion_volume[0] for exclusion_volume in exclusion_volumes]) for counter, index_pharmacophore in enumerate(pharmacophore_library): extra_exclusion_volumes = [] extra_ev_counter = 1 pharmacophore = [] for index_feature in index_pharmacophore: feature = template_pharmacophore[index_feature] feature[2] = 'M' pharmacophore.append(feature) if feature[1] in ['ha', 'hd', 'ha2', 'hd2', 'hda']: extra_exclusion_volumes.append([ maximal_exclusion_volume_id + extra_ev_counter, 'ev', 'M', feature[5][0], 1.0, [], 0.0, 0.0 ]) extra_ev_counter += 1 if feature[1] in ['ha2', 'hd2', 'hda']: extra_exclusion_volumes.append([ maximal_exclusion_volume_id + extra_ev_counter, 'ev', 'M', feature[5][1], 1.0, [], 0.0, 0.0 ]) extra_ev_counter += 1 pharmacophore_writer( pharmacophore + exclusion_volumes + extra_exclusion_volumes, [output_format], str(counter), library_path, logger) update_progress( (counter + 1) / len(pharmacophore_library), 'Writing {} pharmacophores'.format(len(pharmacophore_library)), ((time.time() - start) / (counter + 1)) * (len(pharmacophore_library) - (counter + 1))) update_user('Wrote pharmacophores to {}.'.format(library_path), logger) return
def trajectory_analysis(topology, trajectory, grid_score, grid_partners, frame_counter, total_number_of_frames, first_frame, last_frame, step_size, metal_names, counter, directory, debugging, get_partners, trajectory_time, results): logger = setup_logger('_'.join(['dmif_trajectory', str(counter)]), directory, debugging) logger.info('Started analysis of trajectory {}.'.format(counter)) if debugging: u = mda.Universe(topology, trajectory) else: with warnings.catch_warnings(): warnings.simplefilter('ignore') u = mda.Universe(topology, trajectory) dtype = [('atomid', int), ('resname', 'U10'), ('resid', int), ('name', 'U10'), ('type', 'U10')] topology = np.array([(a, b, c, d, e) for a, b, c, d, e in zip( range(len(u.atoms.resnames)), u.atoms.resnames, u.atoms.resids, u.atoms.names, u.atoms.types)], dtype=dtype) positions = np.array([[ x, y, z ] for x, y, z in zip(grid_score['x'], grid_score['y'], grid_score['z'])]) x_minimum, x_maximum, y_minimum, y_maximum, z_minimum, z_maximum = grid_characteristics( positions)[:-1] tree = cKDTree(positions) main_atoms = main_selection(topology) hd_atomids, hd_types, hd_hydrogen_atomid_lists = hd_selection(main_atoms) ha_atomids, ha_types = ha_selection(main_atoms) hi_atomids = hi_selection(main_atoms) ni_atomids = ni_selection(main_atoms) pi_atomids = pi_selection(main_atoms) ai_atomids = ai_selection(main_atoms) metal_atomids = metal_selection(topology, metal_names) for frame, _ in enumerate(u.trajectory[first_frame:last_frame:step_size]): # create index collectors shape_inds = [] ha_inds = [] ha2_inds = [] hd_inds = [] hd2_inds = [] hda_inds = [] tw_inds = [] h2o_inds = [] positions = u.atoms.positions h2os_os_box_inds = topology[((topology['resname'] == 'HOH') & (topology['name'] == 'O') & (positions[:, 0] >= x_minimum) & (positions[:, 0] <= x_maximum) & (positions[:, 1] >= y_minimum) & (positions[:, 1] <= y_maximum) & (positions[:, 2] >= z_minimum) & (positions[:, 2] <= z_maximum))]['atomid'] if len(h2os_os_box_inds) > 0: tree_h2os = cKDTree(positions[h2os_os_box_inds]) if len(hd_atomids) > 0: hd_positions = positions[hd_atomids] hd_lists = tree_h2os.query_ball_tree(cKDTree(hd_positions), sel_cutoff_dict['hb']) else: hd_positions = [] hd_lists = [[]] * len(h2os_os_box_inds) if len(ha_atomids) > 0: ha_positions = positions[ha_atomids] ha_lists = tree_h2os.query_ball_tree(cKDTree(ha_positions), sel_cutoff_dict['hb']) else: ha_positions = [] ha_lists = [[]] * len(h2os_os_box_inds) if len(hi_atomids) > 0: hi_positions = positions[hi_atomids] hi_lists = tree_h2os.query_ball_tree(cKDTree(hi_positions), sel_cutoff_dict['hi']) else: hi_positions = [] hi_lists = [[]] * len(h2os_os_box_inds) if len(ni_atomids) > 0: ni_positions = [((x + y) / 2) for x, y in zip( positions[ni_atomids[::2]], positions[ni_atomids[1::2]])] ni_lists = tree_h2os.query_ball_tree(cKDTree(ni_positions), sel_cutoff_dict['ii']) else: ni_positions = [] ni_lists = [[]] * len(h2os_os_box_inds) if len(pi_atomids) > 0: pi_positions = [((x + y) / 2) for x, y in zip( positions[pi_atomids[::2]], positions[pi_atomids[1::2]])] pi_lists = tree_h2os.query_ball_tree(cKDTree(pi_positions), sel_cutoff_dict['ii']) else: pi_positions = [] pi_lists = [[]] * len(h2os_os_box_inds) if len(ai_atomids) > 0: ai_positions = [ ((x + y + z) / 3) for x, y, z in zip(positions[ai_atomids[::3]], positions[ ai_atomids[1::3]], positions[ai_atomids[2::3]]) ] ai_normals = [ normal(a, b, c) for a, b, c in zip( positions[ai_atomids[::3]], ai_positions, positions[ ai_atomids[2::3]]) ] ai_lists = tree_h2os.query_ball_tree(cKDTree(ai_positions), sel_cutoff_dict['ai']) else: ai_positions = [] ai_normals = [] ai_lists = [[]] * len(h2os_os_box_inds) if len(metal_atomids) > 0: metal_positions = positions[metal_atomids] metal_lists = tree_h2os.query_ball_tree( cKDTree(metal_positions), sel_cutoff_dict['metal']) else: metal_positions = [] metal_lists = [[]] * len(h2os_os_box_inds) else: h2os_os_box_inds = [] hd_positions, ha_positions, hi_positions, ni_positions = [], [], [], [] pi_positions, ai_positions, ai_normals, metal_positions = [], [], [], [] hd_lists, ha_lists, hi_lists, ni_lists, pi_lists, ai_lists, metal_lists = [], [], [], [], [], [], [] for o_ind, hd_list, ha_list, hi_list, ni_list, pi_list, ai_list, metal_list in \ zip(h2os_os_box_inds, hd_lists, ha_lists, hi_lists, ni_lists, pi_lists, ai_lists, metal_lists): ha, ha_i, hd, hd_i, hi, pi, ni, ai, ai_i, ai_n = 0, [], 0, [], 0, 0, 0, 0, [], [] o_coor, h1_coor, h2_coor = positions[o_ind], positions[ o_ind + 1], positions[o_ind + 2] # hydrogen bond acceptor feature for hd_ind in hd_list: hd_coor, hd_type, hd_hydrogen_coors = [ hd_positions[hd_ind], hd_types[hd_ind], positions[hd_hydrogen_atomid_lists[hd_ind]] ] if distance(o_coor, hd_coor) <= hb_dist_dict[hd_type]: for hd_hydrogen_coor in hd_hydrogen_coors: if angle(o_coor, hd_hydrogen_coor, hd_coor) >= hb_angl_dict[hd_type]: ha += 1 ha_i += [float(x) for x in hd_coor] # hydrogen bond donor feature for ha_ind in ha_list: ha_coor, ha_type = ha_positions[ha_ind], ha_types[ha_ind] if distance(o_coor, ha_coor) <= hb_dist_dict[ha_type]: for h_coor in [h1_coor, h2_coor]: if angle(ha_coor, h_coor, o_coor) >= hb_angl_dict[ha_type]: hd += 1 hd_i += [float(x) for x in ha_coor] # metals for metal_ind in metal_list: metal_position = metal_positions[metal_ind] ha += 1 ha_i += [float(x) for x in metal_position] ni += 2.6 / distance(o_coor, metal_position) # indices of points close to water inds = tree.query_ball_point(o_coor, r=1.41) h2o_inds += inds # trapped water molecules if hd + ha > 2: tw_inds += inds # water molecule is replaceable/displaceable else: # shape shape_inds += inds # hydrogen bond features if hd == 0: # single if ha == 1: ha_inds += inds if get_partners: for ind in inds: grid_partners[ind][ grid_list_dict['ha']] += ha_i # double elif ha == 2: ha2_inds += inds if get_partners: for ind in inds: grid_partners[ind][ grid_list_dict['ha2']] += ha_i # single hydrogen bond donors elif hd == 1: # single donor if ha == 0: hd_inds += inds if get_partners: for ind in inds: grid_partners[ind][ grid_list_dict['hd']] += hd_i # mixed donor acceptor elif ha == 1: hda_inds += inds if get_partners: for ind in inds: grid_partners[ind][ grid_list_dict['hda']][0] += hd_i grid_partners[ind][ grid_list_dict['hda']][1] += ha_i else: # double hydrogen bond donor hd2_inds += inds if get_partners: for ind in inds: grid_partners[ind][grid_list_dict['hd2']] += hd_i # ionizable interactions and cation-pi interactions # negative ionizable and cation-pi interactions for pi_ind in pi_list: pi_i = pi_positions[pi_ind] # negative ionizable interaction ni += 2.6 / distance(o_coor, pi_i) # cation-pi interaction for ind in inds: grid_point = [ grid_score['x'][ind], grid_score['y'][ind], grid_score['z'][ind] ] pi_distance = distance(grid_point, pi_i) if 3.1 <= pi_distance <= 6.0: grid_score['ai'][ ind] += cation_pi_distance_score_dict[round( pi_distance, 1)] if get_partners: grid_partners[ind][grid_list_dict['ai']] += [ float(x) for x in pi_i ] # positive ionizable for ni_ind in ni_list: pi += 2.6 / distance(o_coor, ni_positions[ni_ind]) # add ionizable interaction score if pi > 0: grid_score['pi'][inds] += pi grid_score['ni'][inds] -= pi if ni > 0: grid_score['ni'][inds] += ni grid_score['pi'][inds] -= ni # hydrophobic interactions if len(hi_list) > 0: hi += 1 if len(hi_list) > 1: hi += buriedness(o_coor, hi_positions[hi_list]) if hi > 0: grid_score['hi_norm'][inds] += hi # no charged environment if ni < 0.65 > pi: grid_score['hi'][inds] += hi # aromatic interactions grid point wise for ai_ind in ai_list: ai_i = ai_positions[ai_ind] ai_n = ai_normals[ai_ind] for ind in inds: grid_point = [ grid_score['x'][ind], grid_score['y'][ind], grid_score['z'][ind] ] ai_distance = distance(grid_point, ai_i) if 3.1 <= ai_distance <= 6.0: ai_vector = vector(ai_i, grid_point) ai_n, alpha = ai_geometry(ai_vector, ai_n) # cation-pi interactions if alpha <= CATION_PI_ANGLE_CUTOFF: grid_score['pi'][ ind] += cation_pi_distance_score_dict[ round(ai_distance, 1)] # pi- and t-stacking if ai_distance >= 3.3: # pi- and t-stacking with pi-system of protein aromatic center if alpha < 45: offset = opposite(alpha, ai_distance) # pi-stacking if ai_distance <= 4.7: # check offset between grid point and aromatic center if offset <= 2.0: grid_score['ai'][ ind] += pi_stacking_distance_score_dict[ round(ai_distance, 1)] if get_partners: grid_partners[ind][grid_list_dict['ai']] += \ pi_stacking_partner_position(grid_point, ai_n, ai_distance, alpha) # t-stacking else: # check offset between grid point and aromatic center if offset <= 0.5: grid_score['ai'][ ind] += t_stacking_distance_score_dict[ round(ai_distance, 1)] if get_partners: grid_partners[ind][grid_list_dict['ai']] += \ t_stacking_partner_position(ai_i, grid_point, ai_n, offset, ai_distance, alpha, True) # t-stacking with hydrogen of protein aromatic center else: if ai_distance >= 4.6: # check offset between grid point and aromatic center offset = adjacent(alpha, ai_distance) if offset <= 0.5: grid_score['ai'][ ind] += t_stacking_distance_score_dict[ round(ai_distance, 1)] if get_partners: ai_n2 = cross_product( ai_n, cross_product( ai_n, ai_vector)) ai_n2, alpha = ai_geometry( ai_vector, ai_n2) grid_partners[ind][grid_list_dict['ai']] += \ t_stacking_partner_position(ai_i, grid_point, ai_n2, offset, ai_distance, alpha) # adding scores to grid grid_score['shape'][shape_inds] += 1 grid_score['ha'][ha_inds] += 1 grid_score['ha2'][ha2_inds] += 1 grid_score['hd'][hd_inds] += 1 grid_score['hd2'][hd2_inds] += 1 grid_score['hda'][hda_inds] += 1 grid_score['tw'][tw_inds] += 1 grid_score['h2o'][h2o_inds] += 1 # grid partners to numpy array with frame_counter.get_lock(): frame_counter.value += 1 update_progress( frame_counter.value / total_number_of_frames, 'Progress of trajectory analysis', ((time.time() - trajectory_time) / frame_counter.value) * (total_number_of_frames - frame_counter.value)) logger.debug('Trajectory {} finished with frame {}.'.format( counter, frame)) logger.info('Finished analysis of trajectory {}.'.format(counter)) grid_partners = grid_partners_to_array(grid_partners) results.append([grid_score, grid_partners]) return
def generate_features(positions, feature_scores, feature_type, features_per_feature_type, directory, partner_path, debugging, total_number_of_features, start, feature_counter, results): """ This function generates features with variable tolerance based on a global maximum search algorithm. The features are described with a list of properties as follows. Format: ------------------------------------------------------------------------ 0 1 2 3 4 5 6 7 ------------------------------------------------------------------------ 0 hi M [0.0,0.0,0.0] 1.5 [] 0.0 1.0 1 pi M [0.0,0.0,0.0] 1.5 [] 0.0 1.0 2 ni M [0.0,0.0,0.0] 1.5 [] 0.0 1.0 3 hd M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0]] 1.9499999 1.0 4 ha M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0]] 1.9499999 1.0 5 hd2 M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0],[0.0,3.0,0.0]] 1.9499999 1.0 6 ha2 M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0],[0.0,3.0,0.0]] 1.9499999 1.0 7 hda M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0],[0.0,3.0,0.0]] 1.9499999 1.0 8 ai M [0.0,0.0,0.0] 1.5 [[1.0,0.0,0.0]] 0.43633232 1.0 ------------------------------------------------------------------------ Legend: 0 - index 1 - type 2 - flag (O - optional, M - mandatory) 3 - core position 4 - core tolerance 5 - partner positions (hda feature with coordinates for first donor than acceptor) 6 - partner tolerance 7 - weight """ logger = setup_logger('_'.join(['features', feature_type]), directory, debugging) if partner_path is None: partner_path = directory + '/data' if feature_type in grid_list_dict.keys(): partners = pickle_reader(partner_path + '/' + feature_type + '.pkl', feature_type + '.pkl', logger) else: partners = [[]] * len(positions) score_minimum = 1 tree = cKDTree(positions) generated_features = [] not_used = range(len(feature_scores)) used = [] while feature_scores[not_used].max() >= score_minimum: feature_maximum = feature_scores[not_used].max() logger.debug( 'Feature {} maximum of remaining grid points at {}.'.format( feature_type, feature_maximum)) indices_not_checked = np.where( abs(feature_scores - feature_maximum) < 1e-8)[0] indices = [] # check if grid points within minimum tolerance already used for features for index_not_checked in indices_not_checked: feature_indices = tree.query_ball_point( positions[index_not_checked], r=1.5) if len(feature_indices) + len(used) == len( set(feature_indices + used)): indices.append(index_not_checked) else: not_used = [x for x in not_used if x != index_not_checked] if len(indices) > 0: # check if only one grid point if len(indices) == 1: index = indices[0] core_tolerance, feature_indices = get_core_tolerance( positions[index], tree, feature_scores, feature_maximum) # if more than one grid point, search for the ones with the biggest tolerance else: core_tolerance, indices_maximal_tolerance, feature_indices_list = \ get_maximal_core_tolerance(indices, positions, tree, feature_scores, feature_maximum) # if more than one grid point with biggest tolerance, search for the one with the biggest score if len(indices_maximal_tolerance) > 1: index, feature_indices = get_maximal_sum_of_scores( feature_scores, indices_maximal_tolerance, feature_indices_list) else: index = indices_maximal_tolerance[0] feature_indices = feature_indices_list[0] if len(feature_indices) + len(used) > len( set(feature_indices + used)): not_used = [x for x in not_used if x != index] used.append(index) else: generated_features.append([ index, feature_type, 'M', positions[index], core_tolerance, get_partner_positions(feature_type, partners[index]), get_partner_tolerance(feature_type, core_tolerance), 1.0 ]) not_used = [x for x in not_used if x not in feature_indices] used += feature_indices with feature_counter.get_lock(): feature_counter.value += 1 update_progress( feature_counter.value / total_number_of_features, 'Progress of feature generation', ((time.time() - start) / feature_counter.value) * (total_number_of_features - feature_counter.value)) if len(generated_features) >= features_per_feature_type: break if len(generated_features) < features_per_feature_type: with feature_counter.get_lock(): feature_counter.value += features_per_feature_type - len( generated_features) update_progress(feature_counter.value / total_number_of_features, 'Progress of feature generation', ((time.time() - start) / feature_counter.value) * (total_number_of_features - feature_counter.value)) results += generated_features return
def screen_protein_conformations(topology, trajectory, pharmacophore_path, ligand_path, counter, first_frame, last_frame, step_size, metal_names, directory, output_name, debugging, total_number_of_frames, frame_counter, trajectory_time): dcd_name = 'ensemble_' + str(counter) + '.dcd' output_directory = '/'.join([directory, output_name]) file_path(dcd_name, output_directory) logger = setup_logger( '_'.join(['screen_protein_conformations', str(counter)]), directory, debugging) logger.info( 'Started screening of protein conformations in trajectory {}.'.format( counter)) ligand_positions = None if debugging: u = mda.Universe(topology, trajectory) if ligand_path: ligand_positions = mda.Universe(ligand_path).atoms.positions else: with warnings.catch_warnings(): warnings.simplefilter('ignore') u = mda.Universe(topology, trajectory) if ligand_path: ligand_positions = mda.Universe(ligand_path).atoms.positions protein = u.select_atoms('protein') dtype = [('atomid', int), ('resname', 'U10'), ('resid', int), ('name', 'U10'), ('type', 'U10')] topology = np.array([(a, b, c, d, e) for a, b, c, d, e in zip( range(len(u.atoms.resnames)), u.atoms.resnames, u.atoms.resids, u.atoms.names, u.atoms.types)], dtype=dtype) main_atoms = main_selection(topology) main_atomids = main_atoms['atomid'] heavy_atomids = heavy_atom_selection(main_atoms) hd_atomids, hd_types, hydrogen_atomid_lists = hd_selection(main_atoms) ha_atomids = ha_selection(main_atoms)[0] hi_atomids = hi_selection(main_atoms) ni_atomids = ni_selection(main_atoms) pi_atomids = pi_selection(main_atoms) ai_atomids = ai_selection(main_atoms) metal_atomids = metal_selection(topology, metal_names) features = [ feature for feature in pharmacophore_reader(pharmacophore_path, False, logger) if feature[1] != 'ev' ] if counter == 0: file_path('protein.pdb', output_directory) if debugging: with mda.Writer('/'.join([output_directory, 'protein.pdb']), bonds=None, n_atoms=protein.n_atoms) as PDB: PDB.write(protein) else: with warnings.catch_warnings(): warnings.simplefilter('ignore') with mda.Writer('/'.join([output_directory, 'protein.pdb']), bonds=None, n_atoms=protein.n_atoms) as \ PDB: PDB.write(protein) frame_collector = [] with mda.Writer('/'.join([output_directory, dcd_name]), n_atoms=protein.n_atoms) as DCD: for frame, _ in enumerate( u.trajectory[first_frame:last_frame:step_size]): positions = u.atoms.positions matched_features = 0 for feature in features: ai, ha, hd, pi, ni, cation_pi, hi = 0, 0, 0, 0, 0, 0, 0 feature_type = feature[1] feature_position = np.array(feature[3]) partner_position = [] if feature_type in ['ha', 'hd', 'ha2', 'hd2', 'ai']: partner_position = np.array(feature[5][0]) partner_tolerance = feature[6] feature_score = feature[7] # hydrogen bonds and metal interaction if feature_type == 'hd': if len(ha_atomids) > 0: ha_positions = positions[ha_atomids] ha += np.sum((cdist(partner_position.reshape( 1, 3), ha_positions) <= partner_tolerance)[0]) if ha == 0: break else: matched_features += 1 elif feature_type == 'ha': if len(hd_atomids) > 0: hd_positions = positions[hd_atomids] hd_bools = (cdist(partner_position.reshape( 1, 3), hd_positions) <= partner_tolerance)[0] matched_hd_positions = hd_positions[hd_bools] matched_hd_types = hd_types[hd_bools] matched_hydrogen_atomid_lists = hydrogen_atomid_lists[ hd_bools] for matched_hd_position, matched_hd_type, matched_hydrogen_atomid_list in \ zip(matched_hd_positions, matched_hd_types, matched_hydrogen_atomid_lists): for matched_hydrogen_atomid in matched_hydrogen_atomid_list: if angle(feature_position, positions[matched_hydrogen_atomid], matched_hd_position) >= \ hb_angl_dict[matched_hd_type]: hd = 1 if len(metal_atomids) > 0: metal_positions = positions[metal_atomids] hd += np.sum((cdist(partner_position.reshape( 1, 3), metal_positions) <= partner_tolerance)[0]) if hd == 0: break else: matched_features += 1 elif feature_type in ['hi', 'pi', 'ni']: if len(ni_atomids) > 0: ni_positions = np.array([ ((x + y) / 2) for x, y in zip(positions[ni_atomids[::2]], positions[ni_atomids[1::2]]) ]) ni_positions = ni_positions[(cdist( feature_position.reshape(1, 3), ni_positions) <= sel_cutoff_dict['ii'])[0]] for ni_position in ni_positions: pi += 2.6 / distance(feature_position, ni_position) if len(pi_atomids) > 0: pi_positions = np.array([ ((x + y) / 2) for x, y in zip(positions[pi_atomids[::2]], positions[pi_atomids[1::2]]) ]) pi_positions = pi_positions[(cdist( feature_position.reshape(1, 3), pi_positions) <= sel_cutoff_dict['ii'])[0]] for pi_position in pi_positions: ni += 2.6 / distance(feature_position, pi_position) if len(metal_atomids) > 0: metal_positions = positions[metal_atomids] metal_booleans = (cdist(feature_position.reshape(1, 3), metal_positions) <= sel_cutoff_dict['metal'])[0] metal_positions = metal_positions[metal_booleans] for metal_position in metal_positions: ni += 2.6 / distance(feature_position, metal_position) if feature_type == 'hi': if len(hi_atomids) > 0: # no charged protein environment if ni >= 0.65 <= pi: break hi_positions = positions[hi_atomids] hi_positions = hi_positions[(cdist( feature_position.reshape(1, 3), hi_positions) <= sel_cutoff_dict['hi'])[0]] if len(hi_positions) > 0: hi += 1 if len(hi_positions) > 1: hi += buriedness(feature_position, hi_positions) # check if pi and ni > 0.65 if hi < feature_score: break else: matched_features += 1 elif feature_type == 'pi': if len(ai_atomids) > 0: # cation-pi interactions ai_positions = np.array([ ((x + y + z) / 3) for x, y, z in zip(positions[ai_atomids[::3]], positions[ai_atomids[1::3]], positions[ai_atomids[2::3]]) ]) ai_normals = np.array([ normal(a, b, c) for a, b, c in zip( positions[ai_atomids[::3]], ai_positions, positions[ai_atomids[2::3]]) ]) ai_booleans = (cdist( feature_position.reshape(1, 3), ai_positions) <= sel_cutoff_dict['ai'])[0] ai_positions = ai_positions[ai_booleans] ai_normals = ai_normals[ai_booleans] for ai_i, ai_n in zip(ai_positions, ai_normals): ai_distance = distance(ai_i, feature_position) if 3.1 <= ai_distance <= 6.0: ai_n, alpha = ai_geometry( vector(ai_i, feature_position), ai_n) if alpha <= CATION_PI_ANGLE_CUTOFF: cation_pi += cation_pi_distance_score_dict[ round(ai_distance, 1)] if pi + cation_pi - ni < feature_score: break else: matched_features += 1 elif feature_type == 'ni': if ni - pi < feature_score: break else: matched_features += 1 elif feature_type == 'ai': if len(pi_atomids) > 0: # cation-pi interaction pi_positions = np.array([ ((x + y) / 2) for x, y in zip(positions[pi_atomids[::2]], positions[pi_atomids[1::2]]) ]) pi_positions = pi_positions[(cdist( feature_position.reshape(1, 3), pi_positions) <= sel_cutoff_dict['ii'])[0]] for pi_position in pi_positions: pi_distance = distance(pi_position, feature_position) if 3.1 <= pi_distance <= 6.0: alpha = ai_geometry( vector(pi_position, feature_position), partner_position)[1] if alpha <= CATION_PI_ANGLE_CUTOFF: ai += cation_pi_distance_score_dict[round( pi_distance, 1)] if len(ai_atomids) > 0: # aromatic interactions ai_positions = np.array([ ((x + y + z) / 3) for x, y, z in zip(positions[ ai_atomids[::3]], positions[ai_atomids[1::3]], positions[ai_atomids[2::3]]) ]) ai_normals = np.array([ normal(a, b, c) for a, b, c in zip( positions[ai_atomids[::3]], ai_positions, positions[ai_atomids[2::3]]) ]) ai_booleans = (cdist(feature_position.reshape( 1, 3), ai_positions) <= sel_cutoff_dict['ai'])[0] ai_positions = ai_positions[ai_booleans] ai_normals = ai_normals[ai_booleans] for ai_i, ai_n in zip(ai_positions, ai_normals): ai_distance = distance(ai_i, feature_position) if 3.3 <= ai_distance <= 6.0: ai_vector = vector(ai_i, feature_position) ai_n, alpha = ai_geometry(ai_vector, ai_n) angle_tolerance = math.degrees( partner_tolerance) # pi- and t-stacking with pi-system of protein aromatic center if alpha < 45: offset = opposite(alpha, ai_distance) # pi-stacking if ai_distance <= 4.7: # check offset between grid point and aromatic center if offset <= 2.0: # check angle between normals if vector_angle( ai_n, partner_position ) <= angle_tolerance: ai += pi_stacking_distance_score_dict[ round(ai_distance, 1)] # t-stacking else: # check offset between grid point and aromatic center if offset <= 0.5: # check angle between normals if (90 - angle_tolerance <= vector_angle( ai_n, partner_position) >= 90 + angle_tolerance): ai += t_stacking_distance_score_dict[ round(ai_distance, 1)] # t-stacking with hydrogen of protein aromatic center else: if ai_distance >= 4.6: offset = adjacent(alpha, ai_distance) # check offset between grid point and aromatic center if offset <= 0.5: if (90 - angle_tolerance <= vector_angle( ai_n, partner_position) >= 90 + angle_tolerance): ai += t_stacking_distance_score_dict[ round(ai_distance, 1)] if ai < feature_score: break else: matched_features += 1 if matched_features == len(features): clash = False heavy_atom_positions = positions[heavy_atomids] for feature in features: core_tolerance = feature[4] if cdist(np.array([feature[3]]), heavy_atom_positions).min() < core_tolerance: clash = True if not clash: if ligand_path: main_positions = positions[main_atomids] if cdist(main_positions, ligand_positions).min() < CLASH_CUTOFF: clash = True if not clash: DCD.write(protein) frame_collector.append(frame + first_frame) logger.debug('Trajectory {} finished with frame {}.'.format( counter, frame)) with frame_counter.get_lock(): frame_counter.value += 1 update_progress( frame_counter.value / total_number_of_frames, 'Progress of trajectory analysis', ((time.time() - trajectory_time) / frame_counter.value) * (total_number_of_frames - frame_counter.value)) logger.info('Finished screening of trajectory {}.'.format(counter)) with open('{}/frames_{}.csv'.format(output_directory, counter), 'w') as csv: for frame in frame_collector: csv.write('{}\t{}\n'.format(counter, frame)) return