Пример #1
0
def pml_pharmacophore_reader(path, pyrod_pharmacophore, logger):
    """ This functions reads LigandScout pharmacophores and stores them in an internal data structure. """
    pharmacophore = []
    pml_pharmacophore = et.parse(path)
    if pml_pharmacophore.getroot().tag == 'pharmacophore':
        pml_pharmacophore = pml_pharmacophore.getroot()
    else:
        pharmacophore_number = len(pml_pharmacophore.findall('pharmacophore'))
        if pharmacophore_number > 0:
            if pharmacophore_number == 1:
                pml_pharmacophore = pml_pharmacophore.findall(
                    'pharmacophore')[0]
            else:
                user_prompt = ''
                while user_prompt not in ['yes', 'no']:
                    user_prompt = input(
                        'Pharmacophore file contains {} pharmacophores. Only one pharmacophore '
                        'can be processed at a time. Do you want to continue with the first '
                        'pharmacophore in the pml file? [yes/no]: '.format(
                            pharmacophore_number))
                    if user_prompt == 'no':
                        sys.exit()
                    elif user_prompt == 'yes':
                        pml_pharmacophore = pml_pharmacophore.findall(
                            'pharmacophore')[0]
        else:
            update_user('Cannot find any pharmacophore in the pml file.',
                        logger)
            sys.exit()
    for feature in pml_pharmacophore:
        pharmacophore.append(pml_feature(feature, pyrod_pharmacophore, logger))
    if pyrod_pharmacophore:
        pharmacophore = merge_pyrod_hydrogen_bonds(pharmacophore, logger)
    return pharmacophore
Пример #2
0
def merge_pyrod_hydrogen_bonds(pharmacophore, logger):
    """ This function merges hydrogen bond acceptor and donor features into the pyrod features ha2, hd2 and hda. """
    valid_features = [
        feature for feature in pharmacophore
        if feature[1] not in ['ha2', 'hd2', 'hda', 'ev']
    ]
    exclusion_volumes = [
        feature for feature in pharmacophore if feature[1] == 'ev'
    ]
    pyrod_hydrogen_bonds = [
        feature for feature in pharmacophore
        if feature[1] in ['ha2', 'hd2', 'hda']
    ]
    pyrod_hydrogen_bond_ids = set(
        [feature[0] for feature in pyrod_hydrogen_bonds])
    for index in pyrod_hydrogen_bond_ids:
        feature_pair = [
            feature for feature in pyrod_hydrogen_bonds if feature[0] == index
        ]
        pyrod_hydrogen_bond = feature_pair[0]
        try:
            if len(pyrod_hydrogen_bond[5][0]) == 0:
                pyrod_hydrogen_bond[5][0] = feature_pair[1][5][0]
            elif len(pyrod_hydrogen_bond[5][1]) == 0:
                pyrod_hydrogen_bond[5][1] = feature_pair[1][5][1]
            valid_features.append(pyrod_hydrogen_bond)
        except IndexError:
            update_user(
                'The given pharmacophore contains incomplete hydrogen bonding features. Feature type is {} '
                'but only one interaction partner was found. Either create a new pharmacophore without '
                'splitting ha2, hd2 and hda features or set pyrod pharmacophore to false.'
                .format(pyrod_hydrogen_bond[1]), logger)
            sys.exit()
    valid_features.sort(key=operator.itemgetter(0))
    return valid_features + exclusion_volumes
Пример #3
0
def pharmacophore_reader(path, pyrod_pharmacophore, logger):
    """ This function reads pharmacophores and translates them into the internal pharmacophore format.

    Format:
    ------------------------------------------------------------------------
    0   1 2             3   4                             5          6     7
    ------------------------------------------------------------------------
    0  hi M [0.0,0.0,0.0] 1.5                            []        0.0 150.1
    1  pi M [0.0,0.0,0.0] 1.5                            []        0.0  30.1
    2  ni M [0.0,0.0,0.0] 1.5                            []        0.0  30.1
    3  hd M [0.0,0.0,0.0] 1.5               [[3.0,0.0,0.0]]  1.9499999  30.1
    4  ha M [0.0,0.0,0.0] 1.5               [[3.0,0.0,0.0]]  1.9499999  30.1
    5 hd2 M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0],[0.0,3.0,0.0]]  1.9499999  30.1
    6 ha2 M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0],[0.0,3.0,0.0]]  1.9499999  30.1
    7 hda M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0],[0.0,3.0,0.0]]  1.9499999  30.1
    8  ai M [0.0,0.0,0.0] 1.5               [[1.0,0.0,0.0]] 0.43633232  30.1
    ------------------------------------------------------------------------
    Legend:
    0 - index
    1 - type
    2 - flag (O - optional, M - mandatory)
    3 - core position
    4 - core tolerance
    5 - partner positions (hda feature with coordinates for first donor than acceptor)
    6 - partner tolerance
    7 - score
    """
    logger.info('Reading pharmacophore from {}.'.format(path))
    valid_formats = ['pml', 'pdb']
    pharmacophore_format = path.split('.')[-1]
    if pharmacophore_format not in valid_formats:
        update_user(
            'Invalid pharmacophore format detected, only {} and {} are supported.'
            .format(', '.join(valid_formats[0:-1]), valid_formats[-1]), logger)
        sys.exit()
    pharmacophore = []
    if pharmacophore_format == 'pml':
        pharmacophore = pml_pharmacophore_reader(path, pyrod_pharmacophore,
                                                 logger)
    elif pharmacophore_format == 'pdb':
        pharmacophore = pdb_pharmacophore_reader(path, pyrod_pharmacophore,
                                                 logger)
    if not pyrod_pharmacophore:
        pharmacophore = renumber_features(pharmacophore)
    return pharmacophore
Пример #4
0
def pdb_pharmacophore_reader(path, pyrod_pharmacophore, logger):
    """ This functions reads pdb pharmacophores in pyrod format and stores them in an internal data structure. """
    if not pyrod_pharmacophore:
        update_user(
            'This format is specific to pyrod pharmacophores. Make sure hte format is correct.',
            logger)
    pharmacophore = []
    with open(path, 'r') as pharmacophore_file:
        for line in pharmacophore_file.readlines():
            if line[:6].strip() == 'ATOM':
                if line[12:16].strip() == 'C':
                    feature_list = [0, 0, 0, 0, 0.0, [], 0.0, 0.0]
                    feature_list[0] = int(line[22:26])
                    feature_list[1] = line[17:20].strip()
                    feature_list[2] = line[21:22].strip()
                    feature_list[3] = [
                        float(line[30:38]),
                        float(line[38:46]),
                        float(line[46:54])
                    ]
                    feature_list[4] = float(line[54:60])
                    feature_list[7] = float(line[60:66])
                    pharmacophore.append(feature_list)
                if 'P' in line[12:16]:
                    feature_list = [
                        feature for feature in pharmacophore
                        if feature[0] == int(line[22:26])
                    ]
                    pharmacophore = [
                        feature for feature in pharmacophore
                        if feature[0] == int(line[22:26])
                    ]
                    feature_list[5].append([
                        float(line[30:38]),
                        float(line[38:46]),
                        float(line[46:54])
                    ])
                    feature_list[6] = float(line[54:60])
                    pharmacophore.append(feature_list)
    return pharmacophore
Пример #5
0
def pml_feature(feature, pyrod_pharmacophore, logger):
    """ This function converts a pml feature into the internal pharmacophore feature format. """
    translate_features = {
        'H': 'hi',
        'PI': 'pi',
        'NI': 'ni',
        'HBA': 'ha',
        'HBD': 'hd',
        'AR': 'ai',
        'exclusion': 'ev'
    }
    feature_list = [0, 0, 0, 0, 0.0, [], 0.0, 0.0]
    feature_list[0] = feature.attrib['featureId']
    if feature.tag == 'volume':
        feature_list[1] = translate_features[feature.attrib['type']]
    else:
        feature_list[1] = translate_features[feature.attrib['name']]
    if pyrod_pharmacophore:
        try:
            feature_list[0] = int(feature.attrib['featureId'].split('_')[1])
            feature_list[1] = feature.attrib['featureId'].split('_')[0]
        except (IndexError, ValueError):
            update_user(
                'You attempted to read a pml pharmacophore, that was not generated by PyRod or with an older '
                'version of PyRod. Please set pyrod pharmacophore to false!',
                logger)
            sys.exit()
    feature_list[2] = 'M'
    if feature.attrib['optional'] == 'true':
        feature_list[2] = 'O'
    if feature.tag in ['point', 'volume']:
        pml_feature_point_or_volume(feature_list, feature)
    elif feature.tag == 'vector':
        pml_feature_vector(feature_list, feature)
    elif feature.tag == 'plane':
        pml_feature_plane(feature_list, feature)
    return feature_list
Пример #6
0
def generate_exclusion_volumes(dmif, directory, debugging, shape_cutoff,
                               restrictive):
    """ This function generates exclusion volumes. The exclusion volumes are described with a list of properties as
    follows.

    Format:
    ------------------------------------------------------------------------
    0   1 2             3   4                             5          6     7
    ------------------------------------------------------------------------
    0  ev M [0.0,0.0,0.0] 1.0                            []        0.0   1.0
    1  ev M [2.0,0.0,0.0] 1.0                            []        0.0   1.0
    ------------------------------------------------------------------------
    Legend:
    0 - index
    1 - type
    2 - flag (O - optional, M - mandatory)
    3 - core position
    4 - core tolerance [not needed for exclusion volumes]
    5 - partner positions [not needed for exclusion volumes]
    6 - partner tolerance [not needed for exclusion volumes]
    7 - weight
    """
    logger = setup_logger('exclusion_volumes', directory, debugging)
    update_user('Generating exclusion volumes.', logger)
    grid_space = 0.5
    exclusion_volume_space = 4
    if restrictive:
        exclusion_volume_space = 2
    grid_tree = cKDTree([[x, y, z]
                         for x, y, z in zip(dmif['x'], dmif['y'], dmif['z'])])
    dtype = [('x', float), ('y', float), ('z', float), ('shape', int),
             ('count', int)]
    dmif_shape = np.array(
        [(x, y, z, shape, 0)
         for x, y, z, shape in zip(dmif['x'], dmif['y'], dmif['z'],
                                   dmif['shape']) if shape < shape_cutoff],
        dtype=dtype)
    positions = np.array([[
        x, y, z
    ] for x, y, z in zip(dmif_shape['x'], dmif_shape['y'], dmif_shape['z'])])
    shape_tree = cKDTree(positions)
    shape_grid_size = len(dmif_shape)
    # store number of neighbors with shape score smaller than shape_cutoff for grid points
    for index in range(shape_grid_size):
        dmif_shape['count'][index] = len(
            shape_tree.query_ball_point(positions[index], grid_space * 4))
    # sort for neighbor count
    dmif_shape = np.sort(dmif_shape, order='count')
    # rebuild positions and shape_tree
    positions = np.array([[
        x, y, z
    ] for x, y, z in zip(dmif_shape['x'], dmif_shape['y'], dmif_shape['z'])])
    shape_tree = cKDTree(positions)
    used = []
    exclusion_volumes = []
    counter = 1
    start = time.time()
    for index in range(shape_grid_size):
        # grid_point index should not be in used list
        if index not in used:
            neighbor_list = shape_tree.query_ball_point(
                positions[index], exclusion_volume_space / 2)
            # elements of neighbor_list should not be in used list
            if len(set(neighbor_list +
                       used)) == len(neighbor_list) + len(used):
                # grid_point should not be at border of grid
                if len(
                        grid_tree.query_ball_point(positions[index],
                                                   r=grid_space * 2)) == 33:
                    # grid_point should not be directly at border of binding pocket
                    if len(
                            shape_tree.query_ball_point(positions[index],
                                                        r=grid_space)) == 7:
                        # grid_point should not be surrounded by grid_points outside the binding pocket
                        if len(
                                shape_tree.query_ball_point(
                                    positions[index], r=grid_space * 2)) < 33:
                            exclusion_volumes.append([
                                counter, 'ev', 'M', positions[index], 1.0, [],
                                0.0, 1.0
                            ])
                            counter += 1
                            used += neighbor_list
        eta = ((time.time() - start) / (index + 1)) * (shape_grid_size -
                                                       (index + 1))
        update_progress(
            float(index + 1) / shape_grid_size,
            'Progress of exclusion volume generation', eta)
        logger.debug('Passed grid index {}.'.format(index))
    update_user(
        'Finished with generation of {} exclusion volumes.'.format(
            len(exclusion_volumes)), logger)
    return exclusion_volumes
Пример #7
0
def generate_library(pharmacophore_path, output_format, library_dict,
                     library_path, pyrod_pharmacophore, directory, debugging):
    """ This function writes a combinatorial pharmacophore library. """
    logger = setup_logger('library', directory, debugging)
    update_user('Starting library generation.', logger)
    template_pharmacophore = pharmacophore_reader(pharmacophore_path,
                                                  pyrod_pharmacophore, logger)
    pharmacophore_library = []
    essential_hb, essential_hi, essential_ai, essential_ii = [], [], [], []
    optional_hb, optional_hi, optional_ai, optional_ii = [], [], [], []
    exclusion_volumes = []
    # analyzing pharmacophore
    for index, feature in enumerate(template_pharmacophore):
        if feature[1] == 'ev':
            exclusion_volumes.append(feature)
        else:
            if feature[1] in ['ha', 'hd', 'ha2', 'hd2', 'hda']:
                if feature[2] == 'O':
                    optional_hb.append(index)
                else:
                    essential_hb.append(index)
            elif feature[1] == 'hi':
                if feature[2] == 'O':
                    optional_hi.append(index)
                else:
                    essential_hi.append(index)
            elif feature[1] in ['pi', 'ni']:
                if feature[2] == 'O':
                    optional_ii.append(index)
                else:
                    essential_ii.append(index)
            elif feature[1] == 'ai':
                if feature[2] == 'O':
                    optional_ai.append(index)
                else:
                    essential_ai.append(index)
    essential_features = essential_hb + essential_hi + essential_ai + essential_ii
    for hb_combination in combine_features(
            optional_hb,
            library_dict['minimal hydrogen bonds'] - len(essential_hb),
            library_dict['maximal hydrogen bonds'] - len(essential_hb) + 1):
        for hi_combination in combine_features(
                optional_hi, library_dict['minimal hydrophobic interactions'] -
                len(essential_hi),
                library_dict['maximal hydrophobic interactions'] -
                len(essential_hi) + 1):
            for ai_combination in combine_features(
                    optional_ai,
                    library_dict['minimal aromatic interactions'] -
                    len(essential_ai),
                    library_dict['maximal aromatic interactions'] -
                    len(essential_ai) + 1):
                for ii_combination in combine_features(
                        optional_ii,
                        library_dict['minimal ionizable interactions'] -
                        len(essential_ii),
                        library_dict['maximal ionizable interactions'] -
                        len(essential_ii) + 1):
                    pharmacophore = (essential_features + hb_combination +
                                     hi_combination + ai_combination +
                                     ii_combination)
                    if evaluate_pharmacophore(pharmacophore,
                                              template_pharmacophore,
                                              library_dict,
                                              pyrod_pharmacophore):
                        pharmacophore_library.append(pharmacophore)
    # estimate maximal library size and ask user if number and space of pharmacophores is okay
    pharmacophore_writer(template_pharmacophore, [output_format],
                         'template_pharmacophore', library_path, logger)
    pharmacophore_library_size = bytes_to_text(
        os.path.getsize('{}/{}.{}'.format(
            library_path, 'template_pharmacophore', output_format)) *
        len(pharmacophore_library))
    user_prompt = ''
    while user_prompt not in ['yes', 'no']:
        user_prompt = input(
            '{} pharmacophores will be written taking about {} of space.\n'
            'Do you want to continue? [yes/no]: '.format(
                len(pharmacophore_library), pharmacophore_library_size))
        if user_prompt == 'no':
            sys.exit()
    start = time.time()
    # write pharmacophores
    maximal_exclusion_volume_id = max(
        [exclusion_volume[0] for exclusion_volume in exclusion_volumes])
    for counter, index_pharmacophore in enumerate(pharmacophore_library):
        extra_exclusion_volumes = []
        extra_ev_counter = 1
        pharmacophore = []
        for index_feature in index_pharmacophore:
            feature = template_pharmacophore[index_feature]
            feature[2] = 'M'
            pharmacophore.append(feature)
            if feature[1] in ['ha', 'hd', 'ha2', 'hd2', 'hda']:
                extra_exclusion_volumes.append([
                    maximal_exclusion_volume_id + extra_ev_counter, 'ev', 'M',
                    feature[5][0], 1.0, [], 0.0, 0.0
                ])
                extra_ev_counter += 1
                if feature[1] in ['ha2', 'hd2', 'hda']:
                    extra_exclusion_volumes.append([
                        maximal_exclusion_volume_id + extra_ev_counter, 'ev',
                        'M', feature[5][1], 1.0, [], 0.0, 0.0
                    ])
                    extra_ev_counter += 1
        pharmacophore_writer(
            pharmacophore + exclusion_volumes + extra_exclusion_volumes,
            [output_format], str(counter), library_path, logger)
        update_progress(
            (counter + 1) / len(pharmacophore_library),
            'Writing {} pharmacophores'.format(len(pharmacophore_library)),
            ((time.time() - start) /
             (counter + 1)) * (len(pharmacophore_library) - (counter + 1)))
    update_user('Wrote pharmacophores to {}.'.format(library_path), logger)
    return
Пример #8
0
     description='\n'.join(logo),
     formatter_class=argparse.RawTextHelpFormatter)
 parser.add_argument('conf', help='path to configuration file')
 parser.add_argument('--verbose',
                     dest='debugging',
                     action='store_true',
                     help='verbose logging for debugging')
 conf = parser.parse_args().conf
 debugging = parser.parse_args().debugging
 config = configparser.ConfigParser()
 config.read(conf)
 directory = config.get('directory', 'directory')
 if len(directory) == 0:
     directory = os.getcwd() + '/pyrod'
 logger = setup_logger('main', directory, debugging)
 update_user('\n'.join(logo), logger)
 logger.debug('\n'.join(
     [': '.join(list(_)) for _ in config.items('directory')]))
 # defining grid
 if config.has_section('test grid parameters'):
     logger.debug('\n'.join([
         ': '.join(list(_)) for _ in config.items('test grid parameters')
     ]))
     center, edge_lengths, name = test_grid_parameters(config)
     # determine space resulting in less than 100000 grid points
     space = 0.5
     space_found = False
     while not space_found:
         grid = generate_grid(center, edge_lengths, space)
         if len(grid) < 100000:
             space_found = True
Пример #9
0
def ensemble_to_centroid(topology, trajectories, output_name, directory,
                         debugging):
    logger = setup_logger('ensembles_to_centroid', directory, debugging)
    output_directory = '/'.join([directory, output_name])
    protein_topology = '/'.join([output_directory, 'protein.pdb'])
    protein_trajectories = []
    frames = []
    # check if frames in trajectory files, delete empty trajectory files, collect frames in list
    for x in range(len(trajectories)):
        with open('{}/frames_{}.csv'.format(output_directory, x), 'r') as csv:
            frames += csv.readlines()
        os.remove('{}/frames_{}.csv'.format(output_directory, x))
        try:
            mda.Universe(
                protein_topology,
                '/'.join([output_directory, 'ensemble_{}.dcd'.format(x)]))
            protein_trajectories.append(x)
        except OSError:
            os.remove('/'.join([output_directory,
                                'ensemble_{}.dcd'.format(x)]))
    if len(protein_trajectories) > 0:
        # info to user
        if len(frames) > 1:
            update_user(
                'Getting centroid from {} protein conformations.'.format(
                    len(frames)), logger)
        else:
            update_user('Found only 1 protein conformations.', logger)
        # merge trajectories into one file
        protein_trajectories = [
            '/'.join([output_directory, 'ensemble_{}.dcd'.format(x)])
            for x in protein_trajectories
        ]
        u = mda.Universe(protein_topology, protein_trajectories)
        with mda.Writer('/'.join([output_directory, 'ensemble.dcd']),
                        n_atoms=u.atoms.n_atoms) as DCD:
            for _ in u.trajectory:
                DCD.write(u.atoms)
        # remove sub trajectories
            for path in protein_trajectories:
                os.remove(path)
        # find centroid of frames
        if len(frames) > 1:
            u = mda.Universe(protein_topology,
                             '/'.join([output_directory, 'ensemble.dcd']))
            conf_dist_matrix = encore.confdistmatrix.get_distance_matrix(
                u,
                selection='all',
                superimpose=True,
                n_job=1,
                weights='mass',
                metadata=False,
                verbose=False)
            centroid = conf_dist_matrix.as_array().sum(axis=1).argmin()
        else:
            centroid = 0
        # write centroid
        u = mda.Universe(topology,
                         trajectories[int(frames[centroid].split()[0])])
        file_path('centroid.pdb', output_directory)
        with mda.Writer('/'.join([output_directory, 'centroid.pdb']),
                        bonds=None,
                        n_atoms=u.atoms.n_atoms) as PDB:
            for _ in u.trajectory[int(frames[centroid].split()[1]
                                      ):int(frames[centroid].split()[1]) + 1:]:
                PDB.write(u.atoms)
        # write csv with frame references
        file_path('frames.csv', output_directory)
        frames[centroid] = '{}\t{}\t{}\n'.format(frames[centroid].split()[0],
                                                 frames[centroid].split()[1],
                                                 'centroid')
        with open('{}/frames.csv'.format(output_directory), 'w') as csv:
            csv.write(''.join(['trajectory\tframe\tcentroid\n'] + frames))
    else:
        update_user('No protein conformations found.', logger)
        sys.exit()
    return