def pml_pharmacophore_reader(path, pyrod_pharmacophore, logger): """ This functions reads LigandScout pharmacophores and stores them in an internal data structure. """ pharmacophore = [] pml_pharmacophore = et.parse(path) if pml_pharmacophore.getroot().tag == 'pharmacophore': pml_pharmacophore = pml_pharmacophore.getroot() else: pharmacophore_number = len(pml_pharmacophore.findall('pharmacophore')) if pharmacophore_number > 0: if pharmacophore_number == 1: pml_pharmacophore = pml_pharmacophore.findall( 'pharmacophore')[0] else: user_prompt = '' while user_prompt not in ['yes', 'no']: user_prompt = input( 'Pharmacophore file contains {} pharmacophores. Only one pharmacophore ' 'can be processed at a time. Do you want to continue with the first ' 'pharmacophore in the pml file? [yes/no]: '.format( pharmacophore_number)) if user_prompt == 'no': sys.exit() elif user_prompt == 'yes': pml_pharmacophore = pml_pharmacophore.findall( 'pharmacophore')[0] else: update_user('Cannot find any pharmacophore in the pml file.', logger) sys.exit() for feature in pml_pharmacophore: pharmacophore.append(pml_feature(feature, pyrod_pharmacophore, logger)) if pyrod_pharmacophore: pharmacophore = merge_pyrod_hydrogen_bonds(pharmacophore, logger) return pharmacophore
def merge_pyrod_hydrogen_bonds(pharmacophore, logger): """ This function merges hydrogen bond acceptor and donor features into the pyrod features ha2, hd2 and hda. """ valid_features = [ feature for feature in pharmacophore if feature[1] not in ['ha2', 'hd2', 'hda', 'ev'] ] exclusion_volumes = [ feature for feature in pharmacophore if feature[1] == 'ev' ] pyrod_hydrogen_bonds = [ feature for feature in pharmacophore if feature[1] in ['ha2', 'hd2', 'hda'] ] pyrod_hydrogen_bond_ids = set( [feature[0] for feature in pyrod_hydrogen_bonds]) for index in pyrod_hydrogen_bond_ids: feature_pair = [ feature for feature in pyrod_hydrogen_bonds if feature[0] == index ] pyrod_hydrogen_bond = feature_pair[0] try: if len(pyrod_hydrogen_bond[5][0]) == 0: pyrod_hydrogen_bond[5][0] = feature_pair[1][5][0] elif len(pyrod_hydrogen_bond[5][1]) == 0: pyrod_hydrogen_bond[5][1] = feature_pair[1][5][1] valid_features.append(pyrod_hydrogen_bond) except IndexError: update_user( 'The given pharmacophore contains incomplete hydrogen bonding features. Feature type is {} ' 'but only one interaction partner was found. Either create a new pharmacophore without ' 'splitting ha2, hd2 and hda features or set pyrod pharmacophore to false.' .format(pyrod_hydrogen_bond[1]), logger) sys.exit() valid_features.sort(key=operator.itemgetter(0)) return valid_features + exclusion_volumes
def pharmacophore_reader(path, pyrod_pharmacophore, logger): """ This function reads pharmacophores and translates them into the internal pharmacophore format. Format: ------------------------------------------------------------------------ 0 1 2 3 4 5 6 7 ------------------------------------------------------------------------ 0 hi M [0.0,0.0,0.0] 1.5 [] 0.0 150.1 1 pi M [0.0,0.0,0.0] 1.5 [] 0.0 30.1 2 ni M [0.0,0.0,0.0] 1.5 [] 0.0 30.1 3 hd M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0]] 1.9499999 30.1 4 ha M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0]] 1.9499999 30.1 5 hd2 M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0],[0.0,3.0,0.0]] 1.9499999 30.1 6 ha2 M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0],[0.0,3.0,0.0]] 1.9499999 30.1 7 hda M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0],[0.0,3.0,0.0]] 1.9499999 30.1 8 ai M [0.0,0.0,0.0] 1.5 [[1.0,0.0,0.0]] 0.43633232 30.1 ------------------------------------------------------------------------ Legend: 0 - index 1 - type 2 - flag (O - optional, M - mandatory) 3 - core position 4 - core tolerance 5 - partner positions (hda feature with coordinates for first donor than acceptor) 6 - partner tolerance 7 - score """ logger.info('Reading pharmacophore from {}.'.format(path)) valid_formats = ['pml', 'pdb'] pharmacophore_format = path.split('.')[-1] if pharmacophore_format not in valid_formats: update_user( 'Invalid pharmacophore format detected, only {} and {} are supported.' .format(', '.join(valid_formats[0:-1]), valid_formats[-1]), logger) sys.exit() pharmacophore = [] if pharmacophore_format == 'pml': pharmacophore = pml_pharmacophore_reader(path, pyrod_pharmacophore, logger) elif pharmacophore_format == 'pdb': pharmacophore = pdb_pharmacophore_reader(path, pyrod_pharmacophore, logger) if not pyrod_pharmacophore: pharmacophore = renumber_features(pharmacophore) return pharmacophore
def pdb_pharmacophore_reader(path, pyrod_pharmacophore, logger): """ This functions reads pdb pharmacophores in pyrod format and stores them in an internal data structure. """ if not pyrod_pharmacophore: update_user( 'This format is specific to pyrod pharmacophores. Make sure hte format is correct.', logger) pharmacophore = [] with open(path, 'r') as pharmacophore_file: for line in pharmacophore_file.readlines(): if line[:6].strip() == 'ATOM': if line[12:16].strip() == 'C': feature_list = [0, 0, 0, 0, 0.0, [], 0.0, 0.0] feature_list[0] = int(line[22:26]) feature_list[1] = line[17:20].strip() feature_list[2] = line[21:22].strip() feature_list[3] = [ float(line[30:38]), float(line[38:46]), float(line[46:54]) ] feature_list[4] = float(line[54:60]) feature_list[7] = float(line[60:66]) pharmacophore.append(feature_list) if 'P' in line[12:16]: feature_list = [ feature for feature in pharmacophore if feature[0] == int(line[22:26]) ] pharmacophore = [ feature for feature in pharmacophore if feature[0] == int(line[22:26]) ] feature_list[5].append([ float(line[30:38]), float(line[38:46]), float(line[46:54]) ]) feature_list[6] = float(line[54:60]) pharmacophore.append(feature_list) return pharmacophore
def pml_feature(feature, pyrod_pharmacophore, logger): """ This function converts a pml feature into the internal pharmacophore feature format. """ translate_features = { 'H': 'hi', 'PI': 'pi', 'NI': 'ni', 'HBA': 'ha', 'HBD': 'hd', 'AR': 'ai', 'exclusion': 'ev' } feature_list = [0, 0, 0, 0, 0.0, [], 0.0, 0.0] feature_list[0] = feature.attrib['featureId'] if feature.tag == 'volume': feature_list[1] = translate_features[feature.attrib['type']] else: feature_list[1] = translate_features[feature.attrib['name']] if pyrod_pharmacophore: try: feature_list[0] = int(feature.attrib['featureId'].split('_')[1]) feature_list[1] = feature.attrib['featureId'].split('_')[0] except (IndexError, ValueError): update_user( 'You attempted to read a pml pharmacophore, that was not generated by PyRod or with an older ' 'version of PyRod. Please set pyrod pharmacophore to false!', logger) sys.exit() feature_list[2] = 'M' if feature.attrib['optional'] == 'true': feature_list[2] = 'O' if feature.tag in ['point', 'volume']: pml_feature_point_or_volume(feature_list, feature) elif feature.tag == 'vector': pml_feature_vector(feature_list, feature) elif feature.tag == 'plane': pml_feature_plane(feature_list, feature) return feature_list
def generate_exclusion_volumes(dmif, directory, debugging, shape_cutoff, restrictive): """ This function generates exclusion volumes. The exclusion volumes are described with a list of properties as follows. Format: ------------------------------------------------------------------------ 0 1 2 3 4 5 6 7 ------------------------------------------------------------------------ 0 ev M [0.0,0.0,0.0] 1.0 [] 0.0 1.0 1 ev M [2.0,0.0,0.0] 1.0 [] 0.0 1.0 ------------------------------------------------------------------------ Legend: 0 - index 1 - type 2 - flag (O - optional, M - mandatory) 3 - core position 4 - core tolerance [not needed for exclusion volumes] 5 - partner positions [not needed for exclusion volumes] 6 - partner tolerance [not needed for exclusion volumes] 7 - weight """ logger = setup_logger('exclusion_volumes', directory, debugging) update_user('Generating exclusion volumes.', logger) grid_space = 0.5 exclusion_volume_space = 4 if restrictive: exclusion_volume_space = 2 grid_tree = cKDTree([[x, y, z] for x, y, z in zip(dmif['x'], dmif['y'], dmif['z'])]) dtype = [('x', float), ('y', float), ('z', float), ('shape', int), ('count', int)] dmif_shape = np.array( [(x, y, z, shape, 0) for x, y, z, shape in zip(dmif['x'], dmif['y'], dmif['z'], dmif['shape']) if shape < shape_cutoff], dtype=dtype) positions = np.array([[ x, y, z ] for x, y, z in zip(dmif_shape['x'], dmif_shape['y'], dmif_shape['z'])]) shape_tree = cKDTree(positions) shape_grid_size = len(dmif_shape) # store number of neighbors with shape score smaller than shape_cutoff for grid points for index in range(shape_grid_size): dmif_shape['count'][index] = len( shape_tree.query_ball_point(positions[index], grid_space * 4)) # sort for neighbor count dmif_shape = np.sort(dmif_shape, order='count') # rebuild positions and shape_tree positions = np.array([[ x, y, z ] for x, y, z in zip(dmif_shape['x'], dmif_shape['y'], dmif_shape['z'])]) shape_tree = cKDTree(positions) used = [] exclusion_volumes = [] counter = 1 start = time.time() for index in range(shape_grid_size): # grid_point index should not be in used list if index not in used: neighbor_list = shape_tree.query_ball_point( positions[index], exclusion_volume_space / 2) # elements of neighbor_list should not be in used list if len(set(neighbor_list + used)) == len(neighbor_list) + len(used): # grid_point should not be at border of grid if len( grid_tree.query_ball_point(positions[index], r=grid_space * 2)) == 33: # grid_point should not be directly at border of binding pocket if len( shape_tree.query_ball_point(positions[index], r=grid_space)) == 7: # grid_point should not be surrounded by grid_points outside the binding pocket if len( shape_tree.query_ball_point( positions[index], r=grid_space * 2)) < 33: exclusion_volumes.append([ counter, 'ev', 'M', positions[index], 1.0, [], 0.0, 1.0 ]) counter += 1 used += neighbor_list eta = ((time.time() - start) / (index + 1)) * (shape_grid_size - (index + 1)) update_progress( float(index + 1) / shape_grid_size, 'Progress of exclusion volume generation', eta) logger.debug('Passed grid index {}.'.format(index)) update_user( 'Finished with generation of {} exclusion volumes.'.format( len(exclusion_volumes)), logger) return exclusion_volumes
def generate_library(pharmacophore_path, output_format, library_dict, library_path, pyrod_pharmacophore, directory, debugging): """ This function writes a combinatorial pharmacophore library. """ logger = setup_logger('library', directory, debugging) update_user('Starting library generation.', logger) template_pharmacophore = pharmacophore_reader(pharmacophore_path, pyrod_pharmacophore, logger) pharmacophore_library = [] essential_hb, essential_hi, essential_ai, essential_ii = [], [], [], [] optional_hb, optional_hi, optional_ai, optional_ii = [], [], [], [] exclusion_volumes = [] # analyzing pharmacophore for index, feature in enumerate(template_pharmacophore): if feature[1] == 'ev': exclusion_volumes.append(feature) else: if feature[1] in ['ha', 'hd', 'ha2', 'hd2', 'hda']: if feature[2] == 'O': optional_hb.append(index) else: essential_hb.append(index) elif feature[1] == 'hi': if feature[2] == 'O': optional_hi.append(index) else: essential_hi.append(index) elif feature[1] in ['pi', 'ni']: if feature[2] == 'O': optional_ii.append(index) else: essential_ii.append(index) elif feature[1] == 'ai': if feature[2] == 'O': optional_ai.append(index) else: essential_ai.append(index) essential_features = essential_hb + essential_hi + essential_ai + essential_ii for hb_combination in combine_features( optional_hb, library_dict['minimal hydrogen bonds'] - len(essential_hb), library_dict['maximal hydrogen bonds'] - len(essential_hb) + 1): for hi_combination in combine_features( optional_hi, library_dict['minimal hydrophobic interactions'] - len(essential_hi), library_dict['maximal hydrophobic interactions'] - len(essential_hi) + 1): for ai_combination in combine_features( optional_ai, library_dict['minimal aromatic interactions'] - len(essential_ai), library_dict['maximal aromatic interactions'] - len(essential_ai) + 1): for ii_combination in combine_features( optional_ii, library_dict['minimal ionizable interactions'] - len(essential_ii), library_dict['maximal ionizable interactions'] - len(essential_ii) + 1): pharmacophore = (essential_features + hb_combination + hi_combination + ai_combination + ii_combination) if evaluate_pharmacophore(pharmacophore, template_pharmacophore, library_dict, pyrod_pharmacophore): pharmacophore_library.append(pharmacophore) # estimate maximal library size and ask user if number and space of pharmacophores is okay pharmacophore_writer(template_pharmacophore, [output_format], 'template_pharmacophore', library_path, logger) pharmacophore_library_size = bytes_to_text( os.path.getsize('{}/{}.{}'.format( library_path, 'template_pharmacophore', output_format)) * len(pharmacophore_library)) user_prompt = '' while user_prompt not in ['yes', 'no']: user_prompt = input( '{} pharmacophores will be written taking about {} of space.\n' 'Do you want to continue? [yes/no]: '.format( len(pharmacophore_library), pharmacophore_library_size)) if user_prompt == 'no': sys.exit() start = time.time() # write pharmacophores maximal_exclusion_volume_id = max( [exclusion_volume[0] for exclusion_volume in exclusion_volumes]) for counter, index_pharmacophore in enumerate(pharmacophore_library): extra_exclusion_volumes = [] extra_ev_counter = 1 pharmacophore = [] for index_feature in index_pharmacophore: feature = template_pharmacophore[index_feature] feature[2] = 'M' pharmacophore.append(feature) if feature[1] in ['ha', 'hd', 'ha2', 'hd2', 'hda']: extra_exclusion_volumes.append([ maximal_exclusion_volume_id + extra_ev_counter, 'ev', 'M', feature[5][0], 1.0, [], 0.0, 0.0 ]) extra_ev_counter += 1 if feature[1] in ['ha2', 'hd2', 'hda']: extra_exclusion_volumes.append([ maximal_exclusion_volume_id + extra_ev_counter, 'ev', 'M', feature[5][1], 1.0, [], 0.0, 0.0 ]) extra_ev_counter += 1 pharmacophore_writer( pharmacophore + exclusion_volumes + extra_exclusion_volumes, [output_format], str(counter), library_path, logger) update_progress( (counter + 1) / len(pharmacophore_library), 'Writing {} pharmacophores'.format(len(pharmacophore_library)), ((time.time() - start) / (counter + 1)) * (len(pharmacophore_library) - (counter + 1))) update_user('Wrote pharmacophores to {}.'.format(library_path), logger) return
description='\n'.join(logo), formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('conf', help='path to configuration file') parser.add_argument('--verbose', dest='debugging', action='store_true', help='verbose logging for debugging') conf = parser.parse_args().conf debugging = parser.parse_args().debugging config = configparser.ConfigParser() config.read(conf) directory = config.get('directory', 'directory') if len(directory) == 0: directory = os.getcwd() + '/pyrod' logger = setup_logger('main', directory, debugging) update_user('\n'.join(logo), logger) logger.debug('\n'.join( [': '.join(list(_)) for _ in config.items('directory')])) # defining grid if config.has_section('test grid parameters'): logger.debug('\n'.join([ ': '.join(list(_)) for _ in config.items('test grid parameters') ])) center, edge_lengths, name = test_grid_parameters(config) # determine space resulting in less than 100000 grid points space = 0.5 space_found = False while not space_found: grid = generate_grid(center, edge_lengths, space) if len(grid) < 100000: space_found = True
def ensemble_to_centroid(topology, trajectories, output_name, directory, debugging): logger = setup_logger('ensembles_to_centroid', directory, debugging) output_directory = '/'.join([directory, output_name]) protein_topology = '/'.join([output_directory, 'protein.pdb']) protein_trajectories = [] frames = [] # check if frames in trajectory files, delete empty trajectory files, collect frames in list for x in range(len(trajectories)): with open('{}/frames_{}.csv'.format(output_directory, x), 'r') as csv: frames += csv.readlines() os.remove('{}/frames_{}.csv'.format(output_directory, x)) try: mda.Universe( protein_topology, '/'.join([output_directory, 'ensemble_{}.dcd'.format(x)])) protein_trajectories.append(x) except OSError: os.remove('/'.join([output_directory, 'ensemble_{}.dcd'.format(x)])) if len(protein_trajectories) > 0: # info to user if len(frames) > 1: update_user( 'Getting centroid from {} protein conformations.'.format( len(frames)), logger) else: update_user('Found only 1 protein conformations.', logger) # merge trajectories into one file protein_trajectories = [ '/'.join([output_directory, 'ensemble_{}.dcd'.format(x)]) for x in protein_trajectories ] u = mda.Universe(protein_topology, protein_trajectories) with mda.Writer('/'.join([output_directory, 'ensemble.dcd']), n_atoms=u.atoms.n_atoms) as DCD: for _ in u.trajectory: DCD.write(u.atoms) # remove sub trajectories for path in protein_trajectories: os.remove(path) # find centroid of frames if len(frames) > 1: u = mda.Universe(protein_topology, '/'.join([output_directory, 'ensemble.dcd'])) conf_dist_matrix = encore.confdistmatrix.get_distance_matrix( u, selection='all', superimpose=True, n_job=1, weights='mass', metadata=False, verbose=False) centroid = conf_dist_matrix.as_array().sum(axis=1).argmin() else: centroid = 0 # write centroid u = mda.Universe(topology, trajectories[int(frames[centroid].split()[0])]) file_path('centroid.pdb', output_directory) with mda.Writer('/'.join([output_directory, 'centroid.pdb']), bonds=None, n_atoms=u.atoms.n_atoms) as PDB: for _ in u.trajectory[int(frames[centroid].split()[1] ):int(frames[centroid].split()[1]) + 1:]: PDB.write(u.atoms) # write csv with frame references file_path('frames.csv', output_directory) frames[centroid] = '{}\t{}\t{}\n'.format(frames[centroid].split()[0], frames[centroid].split()[1], 'centroid') with open('{}/frames.csv'.format(output_directory), 'w') as csv: csv.write(''.join(['trajectory\tframe\tcentroid\n'] + frames)) else: update_user('No protein conformations found.', logger) sys.exit() return