Пример #1
0
def generate_library(pharmacophore_path, output_format, library_dict,
                     library_path, pyrod_pharmacophore, directory, debugging):
    """ This function writes a combinatorial pharmacophore library. """
    logger = setup_logger('library', directory, debugging)
    update_user('Starting library generation.', logger)
    template_pharmacophore = pharmacophore_reader(pharmacophore_path,
                                                  pyrod_pharmacophore, logger)
    pharmacophore_library = []
    essential_hb, essential_hi, essential_ai, essential_ii = [], [], [], []
    optional_hb, optional_hi, optional_ai, optional_ii = [], [], [], []
    exclusion_volumes = []
    # analyzing pharmacophore
    for index, feature in enumerate(template_pharmacophore):
        if feature[1] == 'ev':
            exclusion_volumes.append(feature)
        else:
            if feature[1] in ['ha', 'hd', 'ha2', 'hd2', 'hda']:
                if feature[2] == 'O':
                    optional_hb.append(index)
                else:
                    essential_hb.append(index)
            elif feature[1] == 'hi':
                if feature[2] == 'O':
                    optional_hi.append(index)
                else:
                    essential_hi.append(index)
            elif feature[1] in ['pi', 'ni']:
                if feature[2] == 'O':
                    optional_ii.append(index)
                else:
                    essential_ii.append(index)
            elif feature[1] == 'ai':
                if feature[2] == 'O':
                    optional_ai.append(index)
                else:
                    essential_ai.append(index)
    essential_features = essential_hb + essential_hi + essential_ai + essential_ii
    for hb_combination in combine_features(
            optional_hb,
            library_dict['minimal hydrogen bonds'] - len(essential_hb),
            library_dict['maximal hydrogen bonds'] - len(essential_hb) + 1):
        for hi_combination in combine_features(
                optional_hi, library_dict['minimal hydrophobic interactions'] -
                len(essential_hi),
                library_dict['maximal hydrophobic interactions'] -
                len(essential_hi) + 1):
            for ai_combination in combine_features(
                    optional_ai,
                    library_dict['minimal aromatic interactions'] -
                    len(essential_ai),
                    library_dict['maximal aromatic interactions'] -
                    len(essential_ai) + 1):
                for ii_combination in combine_features(
                        optional_ii,
                        library_dict['minimal ionizable interactions'] -
                        len(essential_ii),
                        library_dict['maximal ionizable interactions'] -
                        len(essential_ii) + 1):
                    pharmacophore = (essential_features + hb_combination +
                                     hi_combination + ai_combination +
                                     ii_combination)
                    if evaluate_pharmacophore(pharmacophore,
                                              template_pharmacophore,
                                              library_dict,
                                              pyrod_pharmacophore):
                        pharmacophore_library.append(pharmacophore)
    # estimate maximal library size and ask user if number and space of pharmacophores is okay
    pharmacophore_writer(template_pharmacophore, [output_format],
                         'template_pharmacophore', library_path, logger)
    pharmacophore_library_size = bytes_to_text(
        os.path.getsize('{}/{}.{}'.format(
            library_path, 'template_pharmacophore', output_format)) *
        len(pharmacophore_library))
    user_prompt = ''
    while user_prompt not in ['yes', 'no']:
        user_prompt = input(
            '{} pharmacophores will be written taking about {} of space.\n'
            'Do you want to continue? [yes/no]: '.format(
                len(pharmacophore_library), pharmacophore_library_size))
        if user_prompt == 'no':
            sys.exit()
    start = time.time()
    # write pharmacophores
    maximal_exclusion_volume_id = max(
        [exclusion_volume[0] for exclusion_volume in exclusion_volumes])
    for counter, index_pharmacophore in enumerate(pharmacophore_library):
        extra_exclusion_volumes = []
        extra_ev_counter = 1
        pharmacophore = []
        for index_feature in index_pharmacophore:
            feature = template_pharmacophore[index_feature]
            feature[2] = 'M'
            pharmacophore.append(feature)
            if feature[1] in ['ha', 'hd', 'ha2', 'hd2', 'hda']:
                extra_exclusion_volumes.append([
                    maximal_exclusion_volume_id + extra_ev_counter, 'ev', 'M',
                    feature[5][0], 1.0, [], 0.0, 0.0
                ])
                extra_ev_counter += 1
                if feature[1] in ['ha2', 'hd2', 'hda']:
                    extra_exclusion_volumes.append([
                        maximal_exclusion_volume_id + extra_ev_counter, 'ev',
                        'M', feature[5][1], 1.0, [], 0.0, 0.0
                    ])
                    extra_ev_counter += 1
        pharmacophore_writer(
            pharmacophore + exclusion_volumes + extra_exclusion_volumes,
            [output_format], str(counter), library_path, logger)
        update_progress(
            (counter + 1) / len(pharmacophore_library),
            'Writing {} pharmacophores'.format(len(pharmacophore_library)),
            ((time.time() - start) /
             (counter + 1)) * (len(pharmacophore_library) - (counter + 1)))
    update_user('Wrote pharmacophores to {}.'.format(library_path), logger)
    return
Пример #2
0
def generate_exclusion_volumes(dmif, directory, debugging, shape_cutoff,
                               restrictive):
    """ This function generates exclusion volumes. The exclusion volumes are described with a list of properties as
    follows.

    Format:
    ------------------------------------------------------------------------
    0   1 2             3   4                             5          6     7
    ------------------------------------------------------------------------
    0  ev M [0.0,0.0,0.0] 1.0                            []        0.0   1.0
    1  ev M [2.0,0.0,0.0] 1.0                            []        0.0   1.0
    ------------------------------------------------------------------------
    Legend:
    0 - index
    1 - type
    2 - flag (O - optional, M - mandatory)
    3 - core position
    4 - core tolerance [not needed for exclusion volumes]
    5 - partner positions [not needed for exclusion volumes]
    6 - partner tolerance [not needed for exclusion volumes]
    7 - weight
    """
    logger = setup_logger('exclusion_volumes', directory, debugging)
    update_user('Generating exclusion volumes.', logger)
    grid_space = 0.5
    exclusion_volume_space = 4
    if restrictive:
        exclusion_volume_space = 2
    grid_tree = cKDTree([[x, y, z]
                         for x, y, z in zip(dmif['x'], dmif['y'], dmif['z'])])
    dtype = [('x', float), ('y', float), ('z', float), ('shape', int),
             ('count', int)]
    dmif_shape = np.array(
        [(x, y, z, shape, 0)
         for x, y, z, shape in zip(dmif['x'], dmif['y'], dmif['z'],
                                   dmif['shape']) if shape < shape_cutoff],
        dtype=dtype)
    positions = np.array([[
        x, y, z
    ] for x, y, z in zip(dmif_shape['x'], dmif_shape['y'], dmif_shape['z'])])
    shape_tree = cKDTree(positions)
    shape_grid_size = len(dmif_shape)
    # store number of neighbors with shape score smaller than shape_cutoff for grid points
    for index in range(shape_grid_size):
        dmif_shape['count'][index] = len(
            shape_tree.query_ball_point(positions[index], grid_space * 4))
    # sort for neighbor count
    dmif_shape = np.sort(dmif_shape, order='count')
    # rebuild positions and shape_tree
    positions = np.array([[
        x, y, z
    ] for x, y, z in zip(dmif_shape['x'], dmif_shape['y'], dmif_shape['z'])])
    shape_tree = cKDTree(positions)
    used = []
    exclusion_volumes = []
    counter = 1
    start = time.time()
    for index in range(shape_grid_size):
        # grid_point index should not be in used list
        if index not in used:
            neighbor_list = shape_tree.query_ball_point(
                positions[index], exclusion_volume_space / 2)
            # elements of neighbor_list should not be in used list
            if len(set(neighbor_list +
                       used)) == len(neighbor_list) + len(used):
                # grid_point should not be at border of grid
                if len(
                        grid_tree.query_ball_point(positions[index],
                                                   r=grid_space * 2)) == 33:
                    # grid_point should not be directly at border of binding pocket
                    if len(
                            shape_tree.query_ball_point(positions[index],
                                                        r=grid_space)) == 7:
                        # grid_point should not be surrounded by grid_points outside the binding pocket
                        if len(
                                shape_tree.query_ball_point(
                                    positions[index], r=grid_space * 2)) < 33:
                            exclusion_volumes.append([
                                counter, 'ev', 'M', positions[index], 1.0, [],
                                0.0, 1.0
                            ])
                            counter += 1
                            used += neighbor_list
        eta = ((time.time() - start) / (index + 1)) * (shape_grid_size -
                                                       (index + 1))
        update_progress(
            float(index + 1) / shape_grid_size,
            'Progress of exclusion volume generation', eta)
        logger.debug('Passed grid index {}.'.format(index))
    update_user(
        'Finished with generation of {} exclusion volumes.'.format(
            len(exclusion_volumes)), logger)
    return exclusion_volumes
Пример #3
0
def generate_features(positions, feature_scores, feature_type,
                      features_per_feature_type, directory, partner_path,
                      debugging, total_number_of_features, start,
                      feature_counter, results):
    """ This function generates features with variable tolerance based on a global maximum search algorithm. The
    features are described with a list of properties as follows.

    Format:
    ------------------------------------------------------------------------
    0   1 2             3   4                             5          6     7
    ------------------------------------------------------------------------
    0  hi M [0.0,0.0,0.0] 1.5                            []        0.0   1.0
    1  pi M [0.0,0.0,0.0] 1.5                            []        0.0   1.0
    2  ni M [0.0,0.0,0.0] 1.5                            []        0.0   1.0
    3  hd M [0.0,0.0,0.0] 1.5               [[3.0,0.0,0.0]]  1.9499999   1.0
    4  ha M [0.0,0.0,0.0] 1.5               [[3.0,0.0,0.0]]  1.9499999   1.0
    5 hd2 M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0],[0.0,3.0,0.0]]  1.9499999   1.0
    6 ha2 M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0],[0.0,3.0,0.0]]  1.9499999   1.0
    7 hda M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0],[0.0,3.0,0.0]]  1.9499999   1.0
    8  ai M [0.0,0.0,0.0] 1.5               [[1.0,0.0,0.0]] 0.43633232   1.0
    ------------------------------------------------------------------------
    Legend:
    0 - index
    1 - type
    2 - flag (O - optional, M - mandatory)
    3 - core position
    4 - core tolerance
    5 - partner positions (hda feature with coordinates for first donor than acceptor)
    6 - partner tolerance
    7 - weight
    """
    logger = setup_logger('_'.join(['features', feature_type]), directory,
                          debugging)
    if partner_path is None:
        partner_path = directory + '/data'
    if feature_type in grid_list_dict.keys():
        partners = pickle_reader(partner_path + '/' + feature_type + '.pkl',
                                 feature_type + '.pkl', logger)
    else:
        partners = [[]] * len(positions)
    score_minimum = 1
    tree = cKDTree(positions)
    generated_features = []
    not_used = range(len(feature_scores))
    used = []
    while feature_scores[not_used].max() >= score_minimum:
        feature_maximum = feature_scores[not_used].max()
        logger.debug(
            'Feature {} maximum of remaining grid points at {}.'.format(
                feature_type, feature_maximum))
        indices_not_checked = np.where(
            abs(feature_scores - feature_maximum) < 1e-8)[0]
        indices = []
        # check if grid points within minimum tolerance already used for features
        for index_not_checked in indices_not_checked:
            feature_indices = tree.query_ball_point(
                positions[index_not_checked], r=1.5)
            if len(feature_indices) + len(used) == len(
                    set(feature_indices + used)):
                indices.append(index_not_checked)
            else:
                not_used = [x for x in not_used if x != index_not_checked]
        if len(indices) > 0:
            # check if only one grid point
            if len(indices) == 1:
                index = indices[0]
                core_tolerance, feature_indices = get_core_tolerance(
                    positions[index], tree, feature_scores, feature_maximum)
            # if more than one grid point, search for the ones with the biggest tolerance
            else:
                core_tolerance, indices_maximal_tolerance, feature_indices_list = \
                    get_maximal_core_tolerance(indices, positions, tree, feature_scores, feature_maximum)
                # if more than one grid point with biggest tolerance, search for the one with the biggest score
                if len(indices_maximal_tolerance) > 1:
                    index, feature_indices = get_maximal_sum_of_scores(
                        feature_scores, indices_maximal_tolerance,
                        feature_indices_list)
                else:
                    index = indices_maximal_tolerance[0]
                    feature_indices = feature_indices_list[0]
            if len(feature_indices) + len(used) > len(
                    set(feature_indices + used)):
                not_used = [x for x in not_used if x != index]
                used.append(index)
            else:
                generated_features.append([
                    index, feature_type, 'M', positions[index], core_tolerance,
                    get_partner_positions(feature_type, partners[index]),
                    get_partner_tolerance(feature_type, core_tolerance), 1.0
                ])
                not_used = [x for x in not_used if x not in feature_indices]
                used += feature_indices
                with feature_counter.get_lock():
                    feature_counter.value += 1
                update_progress(
                    feature_counter.value / total_number_of_features,
                    'Progress of feature generation',
                    ((time.time() - start) / feature_counter.value) *
                    (total_number_of_features - feature_counter.value))
            if len(generated_features) >= features_per_feature_type:
                break
    if len(generated_features) < features_per_feature_type:
        with feature_counter.get_lock():
            feature_counter.value += features_per_feature_type - len(
                generated_features)
        update_progress(feature_counter.value / total_number_of_features,
                        'Progress of feature generation',
                        ((time.time() - start) / feature_counter.value) *
                        (total_number_of_features - feature_counter.value))
    results += generated_features
    return
Пример #4
0
     prog='PyRod',
     description='\n'.join(logo),
     formatter_class=argparse.RawTextHelpFormatter)
 parser.add_argument('conf', help='path to configuration file')
 parser.add_argument('--verbose',
                     dest='debugging',
                     action='store_true',
                     help='verbose logging for debugging')
 conf = parser.parse_args().conf
 debugging = parser.parse_args().debugging
 config = configparser.ConfigParser()
 config.read(conf)
 directory = config.get('directory', 'directory')
 if len(directory) == 0:
     directory = os.getcwd() + '/pyrod'
 logger = setup_logger('main', directory, debugging)
 update_user('\n'.join(logo), logger)
 logger.debug('\n'.join(
     [': '.join(list(_)) for _ in config.items('directory')]))
 # defining grid
 if config.has_section('test grid parameters'):
     logger.debug('\n'.join([
         ': '.join(list(_)) for _ in config.items('test grid parameters')
     ]))
     center, edge_lengths, name = test_grid_parameters(config)
     # determine space resulting in less than 100000 grid points
     space = 0.5
     space_found = False
     while not space_found:
         grid = generate_grid(center, edge_lengths, space)
         if len(grid) < 100000:
Пример #5
0
def ensemble_to_centroid(topology, trajectories, output_name, directory,
                         debugging):
    logger = setup_logger('ensembles_to_centroid', directory, debugging)
    output_directory = '/'.join([directory, output_name])
    protein_topology = '/'.join([output_directory, 'protein.pdb'])
    protein_trajectories = []
    frames = []
    # check if frames in trajectory files, delete empty trajectory files, collect frames in list
    for x in range(len(trajectories)):
        with open('{}/frames_{}.csv'.format(output_directory, x), 'r') as csv:
            frames += csv.readlines()
        os.remove('{}/frames_{}.csv'.format(output_directory, x))
        try:
            mda.Universe(
                protein_topology,
                '/'.join([output_directory, 'ensemble_{}.dcd'.format(x)]))
            protein_trajectories.append(x)
        except OSError:
            os.remove('/'.join([output_directory,
                                'ensemble_{}.dcd'.format(x)]))
    if len(protein_trajectories) > 0:
        # info to user
        if len(frames) > 1:
            update_user(
                'Getting centroid from {} protein conformations.'.format(
                    len(frames)), logger)
        else:
            update_user('Found only 1 protein conformations.', logger)
        # merge trajectories into one file
        protein_trajectories = [
            '/'.join([output_directory, 'ensemble_{}.dcd'.format(x)])
            for x in protein_trajectories
        ]
        u = mda.Universe(protein_topology, protein_trajectories)
        with mda.Writer('/'.join([output_directory, 'ensemble.dcd']),
                        n_atoms=u.atoms.n_atoms) as DCD:
            for _ in u.trajectory:
                DCD.write(u.atoms)
        # remove sub trajectories
            for path in protein_trajectories:
                os.remove(path)
        # find centroid of frames
        if len(frames) > 1:
            u = mda.Universe(protein_topology,
                             '/'.join([output_directory, 'ensemble.dcd']))
            conf_dist_matrix = encore.confdistmatrix.get_distance_matrix(
                u,
                selection='all',
                superimpose=True,
                n_job=1,
                weights='mass',
                metadata=False,
                verbose=False)
            centroid = conf_dist_matrix.as_array().sum(axis=1).argmin()
        else:
            centroid = 0
        # write centroid
        u = mda.Universe(topology,
                         trajectories[int(frames[centroid].split()[0])])
        file_path('centroid.pdb', output_directory)
        with mda.Writer('/'.join([output_directory, 'centroid.pdb']),
                        bonds=None,
                        n_atoms=u.atoms.n_atoms) as PDB:
            for _ in u.trajectory[int(frames[centroid].split()[1]
                                      ):int(frames[centroid].split()[1]) + 1:]:
                PDB.write(u.atoms)
        # write csv with frame references
        file_path('frames.csv', output_directory)
        frames[centroid] = '{}\t{}\t{}\n'.format(frames[centroid].split()[0],
                                                 frames[centroid].split()[1],
                                                 'centroid')
        with open('{}/frames.csv'.format(output_directory), 'w') as csv:
            csv.write(''.join(['trajectory\tframe\tcentroid\n'] + frames))
    else:
        update_user('No protein conformations found.', logger)
        sys.exit()
    return
Пример #6
0
def trajectory_analysis(topology, trajectory, grid_score, grid_partners,
                        frame_counter, total_number_of_frames, first_frame,
                        last_frame, step_size, metal_names, counter, directory,
                        debugging, get_partners, trajectory_time, results):
    logger = setup_logger('_'.join(['dmif_trajectory',
                                    str(counter)]), directory, debugging)
    logger.info('Started analysis of trajectory {}.'.format(counter))
    if debugging:
        u = mda.Universe(topology, trajectory)
    else:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            u = mda.Universe(topology, trajectory)
    dtype = [('atomid', int), ('resname', 'U10'), ('resid', int),
             ('name', 'U10'), ('type', 'U10')]
    topology = np.array([(a, b, c, d, e) for a, b, c, d, e in zip(
        range(len(u.atoms.resnames)), u.atoms.resnames, u.atoms.resids,
        u.atoms.names, u.atoms.types)],
                        dtype=dtype)
    positions = np.array([[
        x, y, z
    ] for x, y, z in zip(grid_score['x'], grid_score['y'], grid_score['z'])])
    x_minimum, x_maximum, y_minimum, y_maximum, z_minimum, z_maximum = grid_characteristics(
        positions)[:-1]
    tree = cKDTree(positions)
    main_atoms = main_selection(topology)
    hd_atomids, hd_types, hd_hydrogen_atomid_lists = hd_selection(main_atoms)
    ha_atomids, ha_types = ha_selection(main_atoms)
    hi_atomids = hi_selection(main_atoms)
    ni_atomids = ni_selection(main_atoms)
    pi_atomids = pi_selection(main_atoms)
    ai_atomids = ai_selection(main_atoms)
    metal_atomids = metal_selection(topology, metal_names)
    for frame, _ in enumerate(u.trajectory[first_frame:last_frame:step_size]):
        # create index collectors
        shape_inds = []
        ha_inds = []
        ha2_inds = []
        hd_inds = []
        hd2_inds = []
        hda_inds = []
        tw_inds = []
        h2o_inds = []
        positions = u.atoms.positions
        h2os_os_box_inds = topology[((topology['resname'] == 'HOH') &
                                     (topology['name'] == 'O') &
                                     (positions[:, 0] >= x_minimum) &
                                     (positions[:, 0] <= x_maximum) &
                                     (positions[:, 1] >= y_minimum) &
                                     (positions[:, 1] <= y_maximum) &
                                     (positions[:, 2] >= z_minimum) &
                                     (positions[:, 2] <= z_maximum))]['atomid']
        if len(h2os_os_box_inds) > 0:
            tree_h2os = cKDTree(positions[h2os_os_box_inds])
            if len(hd_atomids) > 0:
                hd_positions = positions[hd_atomids]
                hd_lists = tree_h2os.query_ball_tree(cKDTree(hd_positions),
                                                     sel_cutoff_dict['hb'])
            else:
                hd_positions = []
                hd_lists = [[]] * len(h2os_os_box_inds)
            if len(ha_atomids) > 0:
                ha_positions = positions[ha_atomids]
                ha_lists = tree_h2os.query_ball_tree(cKDTree(ha_positions),
                                                     sel_cutoff_dict['hb'])
            else:
                ha_positions = []
                ha_lists = [[]] * len(h2os_os_box_inds)
            if len(hi_atomids) > 0:
                hi_positions = positions[hi_atomids]
                hi_lists = tree_h2os.query_ball_tree(cKDTree(hi_positions),
                                                     sel_cutoff_dict['hi'])
            else:
                hi_positions = []
                hi_lists = [[]] * len(h2os_os_box_inds)
            if len(ni_atomids) > 0:
                ni_positions = [((x + y) / 2) for x, y in zip(
                    positions[ni_atomids[::2]], positions[ni_atomids[1::2]])]
                ni_lists = tree_h2os.query_ball_tree(cKDTree(ni_positions),
                                                     sel_cutoff_dict['ii'])
            else:
                ni_positions = []
                ni_lists = [[]] * len(h2os_os_box_inds)
            if len(pi_atomids) > 0:
                pi_positions = [((x + y) / 2) for x, y in zip(
                    positions[pi_atomids[::2]], positions[pi_atomids[1::2]])]
                pi_lists = tree_h2os.query_ball_tree(cKDTree(pi_positions),
                                                     sel_cutoff_dict['ii'])
            else:
                pi_positions = []
                pi_lists = [[]] * len(h2os_os_box_inds)
            if len(ai_atomids) > 0:
                ai_positions = [
                    ((x + y + z) / 3)
                    for x, y, z in zip(positions[ai_atomids[::3]], positions[
                        ai_atomids[1::3]], positions[ai_atomids[2::3]])
                ]
                ai_normals = [
                    normal(a, b, c) for a, b, c in zip(
                        positions[ai_atomids[::3]], ai_positions, positions[
                            ai_atomids[2::3]])
                ]
                ai_lists = tree_h2os.query_ball_tree(cKDTree(ai_positions),
                                                     sel_cutoff_dict['ai'])
            else:
                ai_positions = []
                ai_normals = []
                ai_lists = [[]] * len(h2os_os_box_inds)
            if len(metal_atomids) > 0:
                metal_positions = positions[metal_atomids]
                metal_lists = tree_h2os.query_ball_tree(
                    cKDTree(metal_positions), sel_cutoff_dict['metal'])
            else:
                metal_positions = []
                metal_lists = [[]] * len(h2os_os_box_inds)
        else:
            h2os_os_box_inds = []
            hd_positions, ha_positions, hi_positions, ni_positions = [], [], [], []
            pi_positions, ai_positions, ai_normals, metal_positions = [], [], [], []
            hd_lists, ha_lists, hi_lists, ni_lists, pi_lists, ai_lists, metal_lists = [], [], [], [], [], [], []
        for o_ind, hd_list, ha_list, hi_list, ni_list, pi_list, ai_list, metal_list in \
                zip(h2os_os_box_inds, hd_lists, ha_lists, hi_lists, ni_lists, pi_lists, ai_lists, metal_lists):
            ha, ha_i, hd, hd_i, hi, pi, ni, ai, ai_i, ai_n = 0, [], 0, [], 0, 0, 0, 0, [], []
            o_coor, h1_coor, h2_coor = positions[o_ind], positions[
                o_ind + 1], positions[o_ind + 2]
            # hydrogen bond acceptor feature
            for hd_ind in hd_list:
                hd_coor, hd_type, hd_hydrogen_coors = [
                    hd_positions[hd_ind], hd_types[hd_ind],
                    positions[hd_hydrogen_atomid_lists[hd_ind]]
                ]
                if distance(o_coor, hd_coor) <= hb_dist_dict[hd_type]:
                    for hd_hydrogen_coor in hd_hydrogen_coors:
                        if angle(o_coor, hd_hydrogen_coor,
                                 hd_coor) >= hb_angl_dict[hd_type]:
                            ha += 1
                            ha_i += [float(x) for x in hd_coor]
            # hydrogen bond donor feature
            for ha_ind in ha_list:
                ha_coor, ha_type = ha_positions[ha_ind], ha_types[ha_ind]
                if distance(o_coor, ha_coor) <= hb_dist_dict[ha_type]:
                    for h_coor in [h1_coor, h2_coor]:
                        if angle(ha_coor, h_coor,
                                 o_coor) >= hb_angl_dict[ha_type]:
                            hd += 1
                            hd_i += [float(x) for x in ha_coor]
            # metals
            for metal_ind in metal_list:
                metal_position = metal_positions[metal_ind]
                ha += 1
                ha_i += [float(x) for x in metal_position]
                ni += 2.6 / distance(o_coor, metal_position)
            # indices of points close to water
            inds = tree.query_ball_point(o_coor, r=1.41)
            h2o_inds += inds
            # trapped water molecules
            if hd + ha > 2:
                tw_inds += inds
            # water molecule is replaceable/displaceable
            else:
                # shape
                shape_inds += inds
                # hydrogen bond features
                if hd == 0:
                    # single
                    if ha == 1:
                        ha_inds += inds
                        if get_partners:
                            for ind in inds:
                                grid_partners[ind][
                                    grid_list_dict['ha']] += ha_i
                    # double
                    elif ha == 2:
                        ha2_inds += inds
                        if get_partners:
                            for ind in inds:
                                grid_partners[ind][
                                    grid_list_dict['ha2']] += ha_i
                # single hydrogen bond donors
                elif hd == 1:
                    # single donor
                    if ha == 0:
                        hd_inds += inds
                        if get_partners:
                            for ind in inds:
                                grid_partners[ind][
                                    grid_list_dict['hd']] += hd_i
                    # mixed donor acceptor
                    elif ha == 1:
                        hda_inds += inds
                        if get_partners:
                            for ind in inds:
                                grid_partners[ind][
                                    grid_list_dict['hda']][0] += hd_i
                                grid_partners[ind][
                                    grid_list_dict['hda']][1] += ha_i
                else:
                    # double hydrogen bond donor
                    hd2_inds += inds
                    if get_partners:
                        for ind in inds:
                            grid_partners[ind][grid_list_dict['hd2']] += hd_i
                # ionizable interactions and cation-pi interactions
                # negative ionizable and cation-pi interactions
                for pi_ind in pi_list:
                    pi_i = pi_positions[pi_ind]
                    # negative ionizable interaction
                    ni += 2.6 / distance(o_coor, pi_i)
                    # cation-pi interaction
                    for ind in inds:
                        grid_point = [
                            grid_score['x'][ind], grid_score['y'][ind],
                            grid_score['z'][ind]
                        ]
                        pi_distance = distance(grid_point, pi_i)
                        if 3.1 <= pi_distance <= 6.0:
                            grid_score['ai'][
                                ind] += cation_pi_distance_score_dict[round(
                                    pi_distance, 1)]
                            if get_partners:
                                grid_partners[ind][grid_list_dict['ai']] += [
                                    float(x) for x in pi_i
                                ]
                # positive ionizable
                for ni_ind in ni_list:
                    pi += 2.6 / distance(o_coor, ni_positions[ni_ind])
                # add ionizable interaction score
                if pi > 0:
                    grid_score['pi'][inds] += pi
                    grid_score['ni'][inds] -= pi
                if ni > 0:
                    grid_score['ni'][inds] += ni
                    grid_score['pi'][inds] -= ni
                # hydrophobic interactions
                if len(hi_list) > 0:
                    hi += 1
                    if len(hi_list) > 1:
                        hi += buriedness(o_coor, hi_positions[hi_list])
                if hi > 0:
                    grid_score['hi_norm'][inds] += hi
                    # no charged environment
                    if ni < 0.65 > pi:
                        grid_score['hi'][inds] += hi
                # aromatic interactions grid point wise
                for ai_ind in ai_list:
                    ai_i = ai_positions[ai_ind]
                    ai_n = ai_normals[ai_ind]
                    for ind in inds:
                        grid_point = [
                            grid_score['x'][ind], grid_score['y'][ind],
                            grid_score['z'][ind]
                        ]
                        ai_distance = distance(grid_point, ai_i)
                        if 3.1 <= ai_distance <= 6.0:
                            ai_vector = vector(ai_i, grid_point)
                            ai_n, alpha = ai_geometry(ai_vector, ai_n)
                            # cation-pi interactions
                            if alpha <= CATION_PI_ANGLE_CUTOFF:
                                grid_score['pi'][
                                    ind] += cation_pi_distance_score_dict[
                                        round(ai_distance, 1)]
                            # pi- and t-stacking
                            if ai_distance >= 3.3:
                                # pi- and t-stacking with pi-system of protein aromatic center
                                if alpha < 45:
                                    offset = opposite(alpha, ai_distance)
                                    # pi-stacking
                                    if ai_distance <= 4.7:
                                        # check offset between grid point and aromatic center
                                        if offset <= 2.0:
                                            grid_score['ai'][
                                                ind] += pi_stacking_distance_score_dict[
                                                    round(ai_distance, 1)]
                                            if get_partners:
                                                grid_partners[ind][grid_list_dict['ai']] += \
                                                    pi_stacking_partner_position(grid_point, ai_n, ai_distance, alpha)
                                    # t-stacking
                                    else:
                                        # check offset between grid point and aromatic center
                                        if offset <= 0.5:
                                            grid_score['ai'][
                                                ind] += t_stacking_distance_score_dict[
                                                    round(ai_distance, 1)]
                                            if get_partners:
                                                grid_partners[ind][grid_list_dict['ai']] += \
                                                    t_stacking_partner_position(ai_i, grid_point, ai_n, offset,
                                                                                ai_distance, alpha, True)
                                # t-stacking with hydrogen of protein aromatic center
                                else:
                                    if ai_distance >= 4.6:
                                        # check offset between grid point and aromatic center
                                        offset = adjacent(alpha, ai_distance)
                                        if offset <= 0.5:
                                            grid_score['ai'][
                                                ind] += t_stacking_distance_score_dict[
                                                    round(ai_distance, 1)]
                                            if get_partners:
                                                ai_n2 = cross_product(
                                                    ai_n,
                                                    cross_product(
                                                        ai_n, ai_vector))
                                                ai_n2, alpha = ai_geometry(
                                                    ai_vector, ai_n2)
                                                grid_partners[ind][grid_list_dict['ai']] += \
                                                    t_stacking_partner_position(ai_i, grid_point, ai_n2, offset,
                                                                                ai_distance, alpha)
        # adding scores to grid
        grid_score['shape'][shape_inds] += 1
        grid_score['ha'][ha_inds] += 1
        grid_score['ha2'][ha2_inds] += 1
        grid_score['hd'][hd_inds] += 1
        grid_score['hd2'][hd2_inds] += 1
        grid_score['hda'][hda_inds] += 1
        grid_score['tw'][tw_inds] += 1
        grid_score['h2o'][h2o_inds] += 1
        # grid partners to numpy array
        with frame_counter.get_lock():
            frame_counter.value += 1
        update_progress(
            frame_counter.value / total_number_of_frames,
            'Progress of trajectory analysis',
            ((time.time() - trajectory_time) / frame_counter.value) *
            (total_number_of_frames - frame_counter.value))
        logger.debug('Trajectory {} finished with frame {}.'.format(
            counter, frame))
    logger.info('Finished analysis of trajectory {}.'.format(counter))
    grid_partners = grid_partners_to_array(grid_partners)
    results.append([grid_score, grid_partners])
    return
Пример #7
0
def screen_protein_conformations(topology, trajectory, pharmacophore_path,
                                 ligand_path, counter, first_frame, last_frame,
                                 step_size, metal_names, directory,
                                 output_name, debugging,
                                 total_number_of_frames, frame_counter,
                                 trajectory_time):
    dcd_name = 'ensemble_' + str(counter) + '.dcd'
    output_directory = '/'.join([directory, output_name])
    file_path(dcd_name, output_directory)
    logger = setup_logger(
        '_'.join(['screen_protein_conformations',
                  str(counter)]), directory, debugging)
    logger.info(
        'Started screening of protein conformations in trajectory {}.'.format(
            counter))
    ligand_positions = None
    if debugging:
        u = mda.Universe(topology, trajectory)
        if ligand_path:
            ligand_positions = mda.Universe(ligand_path).atoms.positions
    else:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            u = mda.Universe(topology, trajectory)
            if ligand_path:
                ligand_positions = mda.Universe(ligand_path).atoms.positions
    protein = u.select_atoms('protein')
    dtype = [('atomid', int), ('resname', 'U10'), ('resid', int),
             ('name', 'U10'), ('type', 'U10')]
    topology = np.array([(a, b, c, d, e) for a, b, c, d, e in zip(
        range(len(u.atoms.resnames)), u.atoms.resnames, u.atoms.resids,
        u.atoms.names, u.atoms.types)],
                        dtype=dtype)
    main_atoms = main_selection(topology)
    main_atomids = main_atoms['atomid']
    heavy_atomids = heavy_atom_selection(main_atoms)
    hd_atomids, hd_types, hydrogen_atomid_lists = hd_selection(main_atoms)
    ha_atomids = ha_selection(main_atoms)[0]
    hi_atomids = hi_selection(main_atoms)
    ni_atomids = ni_selection(main_atoms)
    pi_atomids = pi_selection(main_atoms)
    ai_atomids = ai_selection(main_atoms)
    metal_atomids = metal_selection(topology, metal_names)
    features = [
        feature
        for feature in pharmacophore_reader(pharmacophore_path, False, logger)
        if feature[1] != 'ev'
    ]
    if counter == 0:
        file_path('protein.pdb', output_directory)
        if debugging:
            with mda.Writer('/'.join([output_directory, 'protein.pdb']),
                            bonds=None,
                            n_atoms=protein.n_atoms) as PDB:
                PDB.write(protein)
        else:
            with warnings.catch_warnings():
                warnings.simplefilter('ignore')
                with mda.Writer('/'.join([output_directory, 'protein.pdb']), bonds=None, n_atoms=protein.n_atoms) as \
                        PDB:
                    PDB.write(protein)
    frame_collector = []
    with mda.Writer('/'.join([output_directory, dcd_name]),
                    n_atoms=protein.n_atoms) as DCD:
        for frame, _ in enumerate(
                u.trajectory[first_frame:last_frame:step_size]):
            positions = u.atoms.positions
            matched_features = 0
            for feature in features:
                ai, ha, hd, pi, ni, cation_pi, hi = 0, 0, 0, 0, 0, 0, 0
                feature_type = feature[1]
                feature_position = np.array(feature[3])
                partner_position = []
                if feature_type in ['ha', 'hd', 'ha2', 'hd2', 'ai']:
                    partner_position = np.array(feature[5][0])
                partner_tolerance = feature[6]
                feature_score = feature[7]
                # hydrogen bonds and metal interaction
                if feature_type == 'hd':
                    if len(ha_atomids) > 0:
                        ha_positions = positions[ha_atomids]
                        ha += np.sum((cdist(partner_position.reshape(
                            1, 3), ha_positions) <= partner_tolerance)[0])
                    if ha == 0:
                        break
                    else:
                        matched_features += 1
                elif feature_type == 'ha':
                    if len(hd_atomids) > 0:
                        hd_positions = positions[hd_atomids]
                        hd_bools = (cdist(partner_position.reshape(
                            1, 3), hd_positions) <= partner_tolerance)[0]
                        matched_hd_positions = hd_positions[hd_bools]
                        matched_hd_types = hd_types[hd_bools]
                        matched_hydrogen_atomid_lists = hydrogen_atomid_lists[
                            hd_bools]
                        for matched_hd_position, matched_hd_type, matched_hydrogen_atomid_list in \
                                zip(matched_hd_positions, matched_hd_types, matched_hydrogen_atomid_lists):
                            for matched_hydrogen_atomid in matched_hydrogen_atomid_list:
                                if angle(feature_position, positions[matched_hydrogen_atomid], matched_hd_position) >= \
                                        hb_angl_dict[matched_hd_type]:
                                    hd = 1
                    if len(metal_atomids) > 0:
                        metal_positions = positions[metal_atomids]
                        hd += np.sum((cdist(partner_position.reshape(
                            1, 3), metal_positions) <= partner_tolerance)[0])
                    if hd == 0:
                        break
                    else:
                        matched_features += 1
                elif feature_type in ['hi', 'pi', 'ni']:
                    if len(ni_atomids) > 0:
                        ni_positions = np.array([
                            ((x + y) / 2)
                            for x, y in zip(positions[ni_atomids[::2]],
                                            positions[ni_atomids[1::2]])
                        ])
                        ni_positions = ni_positions[(cdist(
                            feature_position.reshape(1, 3), ni_positions) <=
                                                     sel_cutoff_dict['ii'])[0]]
                        for ni_position in ni_positions:
                            pi += 2.6 / distance(feature_position, ni_position)
                    if len(pi_atomids) > 0:
                        pi_positions = np.array([
                            ((x + y) / 2)
                            for x, y in zip(positions[pi_atomids[::2]],
                                            positions[pi_atomids[1::2]])
                        ])
                        pi_positions = pi_positions[(cdist(
                            feature_position.reshape(1, 3), pi_positions) <=
                                                     sel_cutoff_dict['ii'])[0]]
                        for pi_position in pi_positions:
                            ni += 2.6 / distance(feature_position, pi_position)
                    if len(metal_atomids) > 0:
                        metal_positions = positions[metal_atomids]
                        metal_booleans = (cdist(feature_position.reshape(1, 3),
                                                metal_positions) <=
                                          sel_cutoff_dict['metal'])[0]
                        metal_positions = metal_positions[metal_booleans]
                        for metal_position in metal_positions:
                            ni += 2.6 / distance(feature_position,
                                                 metal_position)
                    if feature_type == 'hi':
                        if len(hi_atomids) > 0:
                            # no charged protein environment
                            if ni >= 0.65 <= pi:
                                break
                            hi_positions = positions[hi_atomids]
                            hi_positions = hi_positions[(cdist(
                                feature_position.reshape(1, 3),
                                hi_positions) <= sel_cutoff_dict['hi'])[0]]
                            if len(hi_positions) > 0:
                                hi += 1
                                if len(hi_positions) > 1:
                                    hi += buriedness(feature_position,
                                                     hi_positions)
                            # check if pi and ni > 0.65
                            if hi < feature_score:
                                break
                            else:
                                matched_features += 1
                    elif feature_type == 'pi':
                        if len(ai_atomids) > 0:
                            # cation-pi interactions
                            ai_positions = np.array([
                                ((x + y + z) / 3)
                                for x, y, z in zip(positions[ai_atomids[::3]],
                                                   positions[ai_atomids[1::3]],
                                                   positions[ai_atomids[2::3]])
                            ])
                            ai_normals = np.array([
                                normal(a, b, c) for a, b, c in zip(
                                    positions[ai_atomids[::3]], ai_positions,
                                    positions[ai_atomids[2::3]])
                            ])
                            ai_booleans = (cdist(
                                feature_position.reshape(1, 3), ai_positions)
                                           <= sel_cutoff_dict['ai'])[0]
                            ai_positions = ai_positions[ai_booleans]
                            ai_normals = ai_normals[ai_booleans]
                            for ai_i, ai_n in zip(ai_positions, ai_normals):
                                ai_distance = distance(ai_i, feature_position)
                                if 3.1 <= ai_distance <= 6.0:
                                    ai_n, alpha = ai_geometry(
                                        vector(ai_i, feature_position), ai_n)
                                    if alpha <= CATION_PI_ANGLE_CUTOFF:
                                        cation_pi += cation_pi_distance_score_dict[
                                            round(ai_distance, 1)]
                        if pi + cation_pi - ni < feature_score:
                            break
                        else:
                            matched_features += 1
                    elif feature_type == 'ni':
                        if ni - pi < feature_score:
                            break
                        else:
                            matched_features += 1
                elif feature_type == 'ai':
                    if len(pi_atomids) > 0:
                        # cation-pi interaction
                        pi_positions = np.array([
                            ((x + y) / 2)
                            for x, y in zip(positions[pi_atomids[::2]],
                                            positions[pi_atomids[1::2]])
                        ])
                        pi_positions = pi_positions[(cdist(
                            feature_position.reshape(1, 3), pi_positions) <=
                                                     sel_cutoff_dict['ii'])[0]]
                        for pi_position in pi_positions:
                            pi_distance = distance(pi_position,
                                                   feature_position)
                            if 3.1 <= pi_distance <= 6.0:
                                alpha = ai_geometry(
                                    vector(pi_position, feature_position),
                                    partner_position)[1]
                                if alpha <= CATION_PI_ANGLE_CUTOFF:
                                    ai += cation_pi_distance_score_dict[round(
                                        pi_distance, 1)]
                    if len(ai_atomids) > 0:
                        # aromatic interactions
                        ai_positions = np.array([
                            ((x + y + z) / 3)
                            for x, y, z in zip(positions[
                                ai_atomids[::3]], positions[ai_atomids[1::3]],
                                               positions[ai_atomids[2::3]])
                        ])
                        ai_normals = np.array([
                            normal(a, b, c) for a, b, c in zip(
                                positions[ai_atomids[::3]], ai_positions,
                                positions[ai_atomids[2::3]])
                        ])
                        ai_booleans = (cdist(feature_position.reshape(
                            1, 3), ai_positions) <= sel_cutoff_dict['ai'])[0]
                        ai_positions = ai_positions[ai_booleans]
                        ai_normals = ai_normals[ai_booleans]
                        for ai_i, ai_n in zip(ai_positions, ai_normals):
                            ai_distance = distance(ai_i, feature_position)
                            if 3.3 <= ai_distance <= 6.0:
                                ai_vector = vector(ai_i, feature_position)
                                ai_n, alpha = ai_geometry(ai_vector, ai_n)
                                angle_tolerance = math.degrees(
                                    partner_tolerance)
                                # pi- and t-stacking with pi-system of protein aromatic center
                                if alpha < 45:
                                    offset = opposite(alpha, ai_distance)
                                    # pi-stacking
                                    if ai_distance <= 4.7:
                                        # check offset between grid point and aromatic center
                                        if offset <= 2.0:
                                            # check angle between normals
                                            if vector_angle(
                                                    ai_n, partner_position
                                            ) <= angle_tolerance:
                                                ai += pi_stacking_distance_score_dict[
                                                    round(ai_distance, 1)]
                                    # t-stacking
                                    else:
                                        # check offset between grid point and aromatic center
                                        if offset <= 0.5:
                                            # check angle between normals
                                            if (90 - angle_tolerance <=
                                                    vector_angle(
                                                        ai_n, partner_position)
                                                    >= 90 + angle_tolerance):
                                                ai += t_stacking_distance_score_dict[
                                                    round(ai_distance, 1)]
                                # t-stacking with hydrogen of protein aromatic center
                                else:
                                    if ai_distance >= 4.6:
                                        offset = adjacent(alpha, ai_distance)
                                        # check offset between grid point and aromatic center
                                        if offset <= 0.5:
                                            if (90 - angle_tolerance <=
                                                    vector_angle(
                                                        ai_n, partner_position)
                                                    >= 90 + angle_tolerance):
                                                ai += t_stacking_distance_score_dict[
                                                    round(ai_distance, 1)]
                    if ai < feature_score:
                        break
                    else:
                        matched_features += 1
            if matched_features == len(features):
                clash = False
                heavy_atom_positions = positions[heavy_atomids]
                for feature in features:
                    core_tolerance = feature[4]
                    if cdist(np.array([feature[3]]),
                             heavy_atom_positions).min() < core_tolerance:
                        clash = True
                if not clash:
                    if ligand_path:
                        main_positions = positions[main_atomids]
                        if cdist(main_positions,
                                 ligand_positions).min() < CLASH_CUTOFF:
                            clash = True
                if not clash:
                    DCD.write(protein)
                    frame_collector.append(frame + first_frame)
            logger.debug('Trajectory {} finished with frame {}.'.format(
                counter, frame))
            with frame_counter.get_lock():
                frame_counter.value += 1
            update_progress(
                frame_counter.value / total_number_of_frames,
                'Progress of trajectory analysis',
                ((time.time() - trajectory_time) / frame_counter.value) *
                (total_number_of_frames - frame_counter.value))
    logger.info('Finished screening of trajectory {}.'.format(counter))
    with open('{}/frames_{}.csv'.format(output_directory, counter),
              'w') as csv:
        for frame in frame_collector:
            csv.write('{}\t{}\n'.format(counter, frame))
    return