def process_water_structures(initial_pdb, main_chains, ligand):
    """
    Detects the waters we have to keep (important for the simulation) and returns 
    a structure holding them.
    Important waters are the ones closer to Template residue 50 (Ile), the aa is not 
    but it is not guaranteed to be conserved, which means we have to rely into the 
    residue number to choose it, and take any offset into account if needed.
    
    Extra: water molecules must be also close to the binding site. We will pick then the 
    water that has minimum distance to the binding site and residue 50
    
    :param initial_pdb: The pdb (prody structure) we want to extract the chains.
    
    :return: A dictionary indexed by the water id (res. num. + chain id) holding the prody pdb
    structure of that water.
    """
    hw = prody.HierView(initial_pdb.select("protein"))
    water_structs = {}
    for chain in hw.iterChains():
        if chain.getChid() in main_chains:
            # We cannot do a direct selection, instead we iterate
            for i, residue in enumerate(chain.iterResidues()):
                if i == 50:  # 50th residue
                    break

            residue_com = prody.calcCenter(residue)

            if ligand is None:
                ligand_com = prody.calcCenter(initial_pdb)
            else:
                ligand_com = prody.calcCenter(ligand)

            # Identify closer water
            waters = initial_pdb.select("name O and water")
            if waters is not None:
                distance_to_R50 = numpy.sqrt(
                    ((residue_com - waters.getCoords())**2).sum(axis=1))
                distance_to_BindSite = numpy.sqrt(
                    ((ligand_com - waters.getCoords())**2).sum(axis=1))
                distances = distance_to_R50 + distance_to_BindSite
                min_dist = numpy.min(distances)
                min_dist_index = numpy.where(distances == min_dist)
                water_resnum = waters.getResnums()[min_dist_index]
                water_chid = waters.getChids()[min_dist_index][0]
                water_id = "%d:%s" % (water_resnum, water_chid)
                # We use a dict in order to get rid of repeats
                selection_string = "resnum %d and chain %s" % (water_resnum,
                                                               water_chid)
                water_structs[water_id] = initial_pdb.water.select(
                    selection_string).copy()

    return water_structs
def process_water_structures(initial_pdb, main_chains, ligand):
    """
    Detects the waters we have to keep (important for the simulation) and returns 
    a structure holding them.
    Important waters are the ones closer to Template residue 50 (Ile), the aa is not 
    but it is not guaranteed to be conserved, which means we have to rely into the 
    residue number to choose it, and take any offset into account if needed.
    
    Extra: water molecules must be also close to the binding site. We will pick then the 
    water that has minimum distance to the binding site and residue 50
    
    :param initial_pdb: The pdb (prody structure) we want to extract the chains.
    
    :return: A dictionary indexed by the water id (res. num. + chain id) holding the prody pdb
    structure of that water.
    """
    hw = prody.HierView(initial_pdb.select("protein"))
    water_structs = {}
    for chain in hw.iterChains():
        if chain.getChid() in main_chains:
            # We cannot do a direct selection, instead we iterate
            for i, residue in enumerate(chain.iterResidues()):
                if i == 50: # 50th residue
                    break
            
            residue_com = prody.calcCenter(residue)
            
            if ligand is None:
                ligand_com = prody.calcCenter(initial_pdb)
            else:
                ligand_com =prody.calcCenter(ligand)
                
            # Identify closer water
            waters = initial_pdb.select("name O and water")
            if waters is not None:
                distance_to_R50 = numpy.sqrt(((residue_com - waters.getCoords())**2).sum(axis=1))
                distance_to_BindSite = numpy.sqrt(((ligand_com - waters.getCoords())**2).sum(axis=1))
                distances = distance_to_R50 + distance_to_BindSite
                min_dist = numpy.min(distances)
                min_dist_index = numpy.where(distances == min_dist)
                water_resnum = waters.getResnums()[min_dist_index]
                water_chid = waters.getChids()[min_dist_index][0]
                water_id = "%d:%s"%(water_resnum, water_chid)
                # We use a dict in order to get rid of repeats
                selection_string = "resnum %d and chain %s"%(water_resnum,
                                                             water_chid)
                water_structs[water_id] = initial_pdb.water.select(selection_string).copy()
                    
    return water_structs
示例#3
0
 def _prepare_points(self):
     """Load structures and compute the location of the points of the
     3D image to be generated.
     """
     self.complex = parsePDB(self.path)
     protein = self.complex.select("not (resname WER or water)")
     ligand = self.complex.select("resname WER")
     center = calcCenter(ligand.getCoords())
     moveAtoms(self.complex, by=-center)
     center = calcCenter(self.complex.select("resname WER").getCoords())
     self.protein.structure = protein
     self.ligand.structure = ligand
     self.points = grid_around(center,
                               self.size,
                               spacing=24 / (self.size - 1))
示例#4
0
    def make_query_coords(self):
        q1_coords = [
            self.query.select('name ' + n).getCoords()[0]
            for n in self.query_lig_corr[0]
        ]
        if self.query_cyclic[0]:
            len_coords = len(q1_coords)
            q_sel1_coords = [[q1_coords[i - j] for i in range(len_coords)]
                             for j in range(len_coords)]
        else:
            q_sel1_coords = [q1_coords]

        q2_coords = [
            self.query.select('name ' + n).getCoords()[0]
            for n in self.query_lig_corr[1]
        ]
        if self.query_cyclic[1]:
            len_coords = len(q2_coords)
            q_sel2_coords = [[q2_coords[i - j] for i in range(len_coords)]
                             for j in range(len_coords)]
        else:
            q_sel2_coords = [q2_coords]

        com = pr.calcCenter(
            self.query.select('name ' + ' '.join(self.query_lig_corr[0])))
        self.query_distance = np.max(cdist(
            [com], q_sel2_coords[0])) + self.rmsd_threshold

        superpose_list = []
        for q1 in q_sel1_coords:
            for q2 in q_sel2_coords:
                superpose_list.append(np.vstack((q1, q2)))
        self.query_coords = superpose_list
示例#5
0
def orient(pdb, selection='all'):
    act = pdb.select(selection)
    adj = prody.calcCenter(act)
    oldcoords = act.getCoords()
    newcoords = np.subtract(oldcoords, adj)
    nncoords = varimax(newcoords)
    trans = prody.calcTransformation(oldcoords, nncoords)
    trans.apply(pdb)
    return pdb
@author: victor
"""
import sys
import os
import glob
import prody 
from hivprotmut.structures.pdbcuration import CurationSelections

if __name__ == '__main__':
    final_db_folder = sys.argv[1]
    com_file = sys.argv[2]
    
    com_handler = open(com_file, "w")
    ligand_folders = os.listdir(final_db_folder) # first level are ligands
    txt_root = os.path.split(final_db_folder)[1]
    for path in ligand_folders:
        files = glob.glob(os.path.join(final_db_folder, path, "*.pdb"))
        for pdb_file in files:
            pdb = prody.parsePDB(pdb_file)
            txt_pdb = os.path.split(pdb_file)[1]
            ligand = pdb.select(CurationSelections.HEAVY_LIGAND_SELECTION)  
            com = prody.calcCenter(ligand)
            txt_pdb_file = os.path.join(path, txt_pdb)
            com_handler.write("%s %.3f %.3f %.3f\n"%(
                                                   txt_pdb_file,
                                                   com[0],
                                                   com[1],
                                                   com[2]                                                   
                                                   ))
    com_handler.close()
示例#7
0
def blast(pdb_path):

    cdir = os.getcwd()
    tdir = tempfile.mkdtemp()
    os.chdir(tdir)

    receptor = os.path.basename(os.path.splitext(pdb_path)[0])
    pdbHead = prody.parsePDBHeader(pdb_path)
    pdbFile = prody.parsePDB(pdb_path)

    ligands = []
    for chem in pdbHead['chemicals']:
        ligands.append([chem.chain, str(chem.resnum), chem.resname, chem.name])

    blast_result = []
    for chain, resnum, resname, name in ligands:

        rec = pdbFile.select('not (chain {} resnum {})'.format(chain, resnum))
        ligand = pdbFile.select('chain {} resnum {}'.format(chain, resnum))

        cen_ligand = prody.calcCenter(ligand)

        res_coll = []
        ligCoords = ligand.getCoords()
        print('lig_size', len(ligCoords))

        sequence = ''
        i = 4
        while len(sequence) < 100:

            for center in ligCoords:
                around_atoms = rec.select(
                    'same residue as within {} of center'.format(i),
                    center=center)
                if around_atoms is None:
                    continue
                res_coll.append(around_atoms)
                #res_indices = around_atoms.getResindices()
                #print(around_atoms.getHierView()['A'].getSequence())
                #print (res_indices)
                #res_coll = res_coll | set(res_indices)
            resindices = reduce(lambda x, y: x | y, res_coll)
            sequence = resindices.getHierView()['A'].getSequence()
            print('sequence', i, len(sequence), sequence)
            i += 1

        with open('sequence.fasta', 'w') as fout:
            fout.write(">receptor\n" + sequence + '\n')

        cmd = 'blastp -db {} -query sequence.fasta -outfmt 5 -out result'.format(
            BLASTDB)
        #print(os.getcwd())

        cl = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
        cl.wait()

        #print(os.listdir(os.getcwd()))

        dtree = xml.dom.minidom.parse("result")
        collection = dtree.documentElement
        hits = collection.getElementsByTagName("Hit")

        hit_result = []

        for hit in hits:
            hit_id = hit.getElementsByTagName('Hit_id')[0].childNodes[0].data
            hsps = hit.getElementsByTagName('Hit_hsps')[0]
            identity = hsps.getElementsByTagName(
                'Hsp_identity')[0].childNodes[0].data
            align_len = hsps.getElementsByTagName(
                'Hsp_align-len')[0].childNodes[0].data
            qseq = hsps.getElementsByTagName('Hsp_qseq')[0].childNodes[0].data
            hseq = hsps.getElementsByTagName('Hsp_hseq')[0].childNodes[0].data
            midline = hsps.getElementsByTagName(
                'Hsp_midline')[0].childNodes[0].data

            blast_result.append([
                receptor, hit_id,
                str(identity),
                str(align_len),
                str(len(sequence)), midline, hseq, sequence
            ])

    return blast_result
示例#8
0
def compute_atom_distances(pdb_target, res_file, output_report, chain="L"):
    """
    This function calculate atom-atom distances for ligand and residue atoms. The residue number and atom names
    (for both, ligand and residue) must be specified in a file ('res_file').
    :param pdb_target: input PDB file path
    :param res_file: file with instructions. This file must have n rows with three format: RESNUM LATOMNAME/SLIGAND
    ATOMNAMES/SRESIDUE. If you want to calculate a distance using a center of mass write [ATOM1,ATOM2,ATOMN]

    :param output_report:
    :param chain:
    :return:
    """
    # Load PDB files
    target = pdb2prody(pdb_target)
    ligand = target.select("chain {}".format(chain))
    print(ligand.getNames())
    # Reading instructions from file
    list_of_instructions = read_selecteds_from_file(res_file)
    # Select the input atoms
    report = []
    for line in list_of_instructions:
        resnum, atom_name_ref, atom_name_tar = line.split()
        # If the user wants to select more than one atom he has to put them in a string with this format: [atom1,atomN...]
        if "[" in atom_name_ref or "]" in atom_name_ref:
            print("Multiple atom selection for the ligand")
            atom_string_with_comas = atom_name_ref.strip("[").strip("]")
            atom_list = atom_string_with_comas.split(",")
            atom_string = ' '.join(atom_list)
            atom_ref_selected = ligand.select("name {}".format(atom_string))
            print("Selected atoms: {}".format(atom_ref_selected.getNames()))
        else:
            print("Single atom selection for the ligand")
            atom_ref_selected = ligand.select("name {}".format(atom_name_ref))
            print("Selected atom: {}".format(atom_ref_selected.getNames()))

        if "[" in atom_name_tar or "]" in atom_name_tar:
            print("Multiple atom selection for the system")
            atom_string_with_comas = atom_name_tar.strip("[").strip("]")
            atom_list = atom_string_with_comas.split(",")
            atom_string = ' '.join(atom_list)
            atom_tar_selected = select_atom_given_name_type_and_num(
                target, resnum, atom_string)
            print("Selected atoms: {}".format(atom_tar_selected.getNames()))
        else:
            print("Single atom selection for the system")
            atom_tar_selected = select_atom_given_name_type_and_num(
                target, resnum, atom_name_tar)
            print("Selected atom: {}".format(atom_tar_selected.getNames()))
        try:
            number_of_selected_atoms_ref = len(atom_ref_selected.getNames())
        except AttributeError:
            exit(
                "None atoms where selected. Please, check if the selected atoms exists in the ligand in {}"
                .format(pdb_target))
        try:
            number_of_selected_atoms_tar = len(atom_tar_selected.getNames())
        except AttributeError:
            exit(
                "None atoms where selected. Please, check if the selected atoms exists in the residue {} in {}"
                .format(resnum, pdb_target))
        # Now there are four possibilities: len 1 in target and ref, len > 1 in one of both and len >1 in both.
        # If the len is more than 1 we will use the center of mass as a point to compute the distance.
        if number_of_selected_atoms_ref <= 1 and number_of_selected_atoms_tar <= 1:
            distance = prody.calcDistance(atom_tar_selected, atom_ref_selected)
        elif number_of_selected_atoms_ref <= 1 and number_of_selected_atoms_tar > 1:
            center_tar = prody.calcCenter(atom_tar_selected)
            atom_coords = atom_ref_selected.getCoords()[0]
            distance = np.linalg.norm(center_tar - atom_coords)
        elif number_of_selected_atoms_ref > 1 and number_of_selected_atoms_tar <= 1:
            center_ref = prody.calcCenter(atom_ref_selected)
            atom_coords = atom_tar_selected.getCoords()[0]
            distance = np.linalg.norm(atom_coords - center_ref)
        else:
            center_tar = prody.calcCenter(atom_tar_selected)
            center_ref = prody.calcCenter(atom_ref_selected)
            distance = np.linalg.norm(center_tar - center_ref)
        report_line = "{:4} {:10} {:10} {:6.3f}\n".format(
            resnum, ''.join(atom_ref_selected.getNames()),
            ''.join(atom_tar_selected.getNames()), distance)
        report.append(report_line)

    report_final = ''.join(report)

    if output_report:
        with open(output_report, "w") as report_file:
            report_file.write(report_final)
    print(report_final)
    return report_final
示例#9
0
    def bundle_ligand_data(self,
                           pick_one,
                           fake_ligand=True,
                           OUT=True,
                           compare_ResId_native='default',
                           Id_suffix='default',
                           filename=None,
                           benchmark=None):
        '''

        :param pick_one:
        :param fake_ligand:
        :param OUT:
        :param compare_ResId_native:
        :param Id_suffix:
        :param filename:
        :param benchmark:
        :return:
        '''
        PDB = self.PDBname
        if fake_ligand == False:
            ResId = str(pick_one.getResindex())
        else:
            ResId = compare_ResId_native + '_' + str(Id_suffix)

        pdb_store_dir = self.store_dir
        other = self.receptor
        # Extract this ligand from protein (as input for openbabel)

        if filename is None:
            filename = pdb_store_dir + '/{1}/{0}_{1}_ligand.pdb'.format(
                PDB, ResId)
            if not os.path.isfile(filename):
                if not os.path.exists(pdb_store_dir + '/' + ResId):
                    os.mkdir(pdb_store_dir + '/' + ResId)
            if OUT:
                try:
                    pd.writePDB(filename, pick_one)
                    tar_filename = ''.join(filename.split('.')[:-1])
                    tar_filename += '.mol'
                    pdb_to_mol2(filename, tar_filename)
                except:
                    print 'Unexpected Error!'
                    logging.error('Cannot convert {} to mol2 format!'.format(
                        filename.split('/')[-1]))
                    return

        if not os.path.isfile(filename):
            if not os.path.exists(pdb_store_dir + '/' + ResId):
                os.mkdir(pdb_store_dir + '/' + ResId)

        naming = '{}_{}'.format(PDB, ResId)

        # Get coordinate of center
        xyz = pick_one.getCoords()
        middle = pd.calcCenter(pick_one)
        # in pi degree , the rotation of the box (if needed)
        rotation = [0, 0, 0]

        scale = max(
            max(xyz[:, 0]) - middle[0], middle[0] - min(xyz[:, 0]),
            max(xyz[:, 1]) - middle[1], middle[1] - min(xyz[:, 1]),
            max(xyz[:, 2]) - middle[2], middle[2] - min(xyz[:, 2]))

        # assert scale <= 10
        if scale > self.BOX_range / 2:
            logging.warning(
                'Warning! {} has a ligand out of box scale with {} atom distance to center'
                .format(PDB, scale))
            # Now shifting the boxes:
            max_scale = max(
                max(xyz[:, 0]) - min(xyz[:, 0]),
                max(xyz[:, 1]) - min(xyz[:, 1]),
                max(xyz[:, 2]) - min(xyz[:, 2]))
            if max_scale > self.BOX_range:
                logging.error(
                    'Assertion failed, {} has a ligand out of box completely with scale'
                    .format(PDB, scale))
                return
            # Try to move to the new center
            temp_mid = [(max(xyz[:, 0]) + min(xyz[:, 0])) / 2,
                        (max(xyz[:, 1]) + min(xyz[:, 1])) / 2,
                        (max(xyz[:, 2]) + min(xyz[:, 2])) / 2]

            temp_mid[0] = round(temp_mid[0], 6)
            temp_mid[1] = round(temp_mid[1], 6)
            temp_mid[2] = round(temp_mid[2], 6)
            middle = np.array(temp_mid)
            print middle

        # print middle
        scale_extension = (self.BOX_range - self.BOX_size) / 2
        box_num = int(np.ceil(self.BOX_range / self.BOX_size))
        xx, yy, zz = np.meshgrid(
            np.linspace(middle[0] - scale_extension,
                        middle[0] + scale_extension, box_num),
            np.linspace(middle[1] - scale_extension,
                        middle[1] + scale_extension, box_num),
            np.linspace(middle[2] - scale_extension,
                        middle[2] + scale_extension, box_num))

        # print xx
        vector = np.c_[xx.ravel(), yy.ravel(), zz.ravel()]
        num_vector = [0] * len(vector)

        #print len(vector), box_num
        for atom in pick_one.iterAtoms():
            x, y, z = atom.getCoords()
            x_pos = int(round(x - vector[0][0]))
            # assert 0 <= x_pos <= 19
            y_pos = int(round(y - vector[0][1]))
            # assert 0 <= y_pos <= 19
            z_pos = int(round(z - vector[0][2]))
            # assert 0 <= z_pos <= 19
            if 0 <= x_pos < box_num and 0 <= y_pos < box_num and 0 <= z_pos < box_num:
                # Simply change here to fulfill the mark as 'H_1'
                # note (z(y(x))) follows from atuogrid map file format , otherwise the coordinate system is not correspond coorectly
                num_vector[z_pos * box_num * box_num + y_pos * box_num +
                           x_pos] = atom.getName() + '_' + str(HETERO_PART)

        # quick,dirty way to find atoms of protein in cubic boxes
        pd.defSelectionMacro(
            'inbox',
            'abs(x-{1}) <= {0} and abs(y-{2}) <= {0} and abs(z-{3}) <= {0}'.
            format(self.BOX_size / 2, middle[0], middle[1], middle[2]))
        residues = other.select(
            'protein and same residue as within 18 of center', center=middle)

        if residues is None:
            logging.warning('{} in {} has no atoms nearby'.format(ResId, PDB))
            return

        # This place might have some potential problem
        # for ADP or ATP , they might either be part of nucleic and the ligand
        # This will cause a severe bug when calculating autovina score
        # TODO fix this issue
        nearby = residues.select('inbox')

        if nearby is not None:
            for atom in nearby.iterAtoms():
                x, y, z = atom.getCoords()
                x_pos = int(round(x - vector[0][0]))
                # assert 0 <= x_pos <= 19
                y_pos = int(round(y - vector[0][1]))
                # assert 0 <= y_pos <= 19
                z_pos = int(round(z - vector[0][2]))
                # assert 0 <= z_pos <= 19
                temp = z_pos * box_num * box_num + y_pos * box_num + x_pos
                if 0 <= x_pos < box_num and 0 <= y_pos < box_num and 0 <= z_pos < box_num and num_vector[
                        temp] == 0:
                    # Simply change here to fulfill the mark as 'C_2'
                    num_vector[temp] = atom.getName() + '_' + str(PROTEIN_PART)
                else:
                    # num_vector[temp] += '|'+atom.getName() + '_' + str(PROTEIN_PART)
                    print atom.getName()
                    logging.warning('Coorinate {} {} {} found at {}'.format(
                        x_pos, y_pos, z_pos, self.PDBname))

        # Save into the dict for future locating
        # naming = '{}_{}'.format(PDB, ResId)

        # Do autogrid mapgeneration:
        # ligand_filename = os.path.join(temp_pdb_PREFIX, PDB + '/' + naming + '_ligand.pdb')
        # receptor_filename = os.path.join(temp_pdb_PREFIX, PDB + '/' + naming + '_receptor.pdb')
        # complex_filename = os.path.join(temp_pdb_PREFIX, PDB + '/' + naming + '_complex.pdb')
        # fake_ligand_filename = os.path.join(temp_pdb_PREFIX, 'fake-ligand.pdb')

        self.heterodict[ResId] = {
            'raw_vector': num_vector,
            'center': middle,
            'rotation': rotation,
            'naming': '{}_{}'.format(PDB, ResId),
            'chain': 'NA',
            'filename': filename,
            'id': ResId,
            'Resname': 'NA',
            'ligand': pick_one,
            'protein': residues,
            'vina_score': 'NA',
            'original_one': True,
            'file_generated': False,
            'fake_ligand': True,
            'RMSF': 0,
            'Contact Similarity': 1,
            'gridmap_protein': 'NA',
            'gridmap_ligand': 'NA',
            'gridmap_complex': 'NA'
        }

        if fake_ligand == True:
            try:
                dist = self._calcRMSD(
                    self.heterodict[compare_ResId_native]['ligand'],
                    pick_one,
                    benchmark=benchmark)
                print dist
                self.heterodict[ResId]['RMSF'] = dist
            except:
                print 'oops'
                raise IOError
            self.heterodict[ResId]['Contact Similarity'] = self._calcQ(
                self.heterodict[compare_ResId_native]['ligand'],
                pick_one,
                benchmark=benchmark)
        else:
            self.heterodict[ResId]['Resname'] = pick_one.getResname()
            self.heterodict[ResId]['chain'] = pick_one.getChid()
    def process_residue_into_vector(self,
                                    prody_ligand,
                                    prody_residue,
                                    distance_cutoff_polar=3.5,
                                    distance_cutoff_greasy=4):
        """
        Converting each residue into a representative vector

        Metrics I care about for each residue:
        * Contact distance
        * Residue characteristics 
            * Amino acid identity or degenerate amino acid groups?
            * Amino acid chemical characteristics e.g. [Sigma Amino Acid Reference Chart]
              (http://www.sigmaaldrich.com/life-science/metabolomics/learning-center/amino-acid-reference-chart.html)
        * Position of residue relative to fragment
            * Vector from fragment centroid to {residue centroid | closest residue atom }
        * Backbone or side chain
        
         {Angstroms} - X component, vector from fragment centroid to closest residue atom
         {Angstroms} - Y component, vector from fragment centroid to closest residue atom
         {Angstroms} - Z component, vector from fragment centroid to closest residue atom

         { 0 | 1 }   - Backbone contact OR Sidechain contact
         { 0 | 1 }   - Ligand Polar Contact OR Ligand Non-polar Contact
         
         { 0 | 1 }   - Side chain has hydrogen bond donor/acceptor (DEHKNQRSTY)
         
         { 0 | 1 }   - Hydrophobic, aliphatic (AILV)
         { 0 | 1 }   - Hydrophobic, aromatic (FWY)
         { 0 | 1 }   - Polar (NCQMST)
         { 0 | 1 }   - Charged, Acidic (DE)
         { 0 | 1 }   - Charged, Basic (HKR)
         { 0 | 1 }   - Glycine
         { 0 | 1 }   - Proline
         { 0 | 1 }   - Backbone carbonyl
         { 0 | 1 }   - Backbone amino
         { 0 | 1 }   - Backbone C/CA
        
        :return: 
        """

        min_contact_distance, row_index_low, column_index_low = minimum_contact_distance(
            prody_residue, prody_ligand, return_indices=True)
        polar_residues = [
            'ASP', 'GLU', 'HIS', 'LYS', 'ASN', 'GLN', 'ARG', 'SER', 'THR',
            'TYR'
        ]

        if all([
                prody_residue.getResnames()[0] in polar_residues,
                min_contact_distance > distance_cutoff_polar
        ]):
            return None

        elif min_contact_distance > distance_cutoff_greasy:
            return None

        else:
            residue_contact_atom = prody_residue.copy().select(
                'index {}'.format(row_index_low))
            ligand_contact_atom = prody_ligand.copy().select(
                'index {}'.format(column_index_low))

            # Save min contact residue and ligand atom indicies for evaluating cluster quality later
            # self.min_contact_distance = min_contact_distance

            # Residue Contact Type
            residue_contact_type = 0 if residue_contact_atom.getNames()[0] in [
                'C', 'CA', 'N', 'O'
            ] else 1

            # Ligand Contact Type
            ligand_contact_type = 1 if ligand_contact_atom.getNames(
            )[0][0] in ['C'] else 0

            # Vector from fragment centroid to closest residue atom
            contact_vector = (residue_contact_atom.getCoords() -
                              prody.calcCenter(prody_ligand))[0]
            # contact_unit_vector = contact_vector / np.linalg.norm(contact_vector)

            # Side chain has hydrogen bond donor/acceptor (DEHKNQRSTY)
            h_bond_donor_acceptor = 1 if residue_contact_atom.getResnames(
            )[0] in polar_residues else 0

            # Polar atom on residue is contacting ligand
            residue_polar_contact = 1 if residue_contact_atom.getNames(
            )[0][0] in ['O', 'N'] else 0

            # Residue characteristics
            # todo: UPDATE so that only one of the below can hold value of 1 at any given time
            # {0 | 1} - Hydrophobic, aliphatic(AILVC)
            greasy_ali = 1 if residue_contact_atom.getResnames()[0] in [
                'ALA', 'ILE', 'LEU', 'VAL', 'CYS'
            ] else 0
            # {0 | 1} - Hydrophobic, aromatic(FWY)
            greasy_aro = 1 if residue_contact_atom.getResnames()[0] in [
                'PHE', 'TYR', 'TRP'
            ] else 0
            # {0 | 1} - Polar(NCQMST)
            polar = 1 if residue_contact_atom.getResnames()[0] in [
                'ASN', 'CYS', 'GLN', 'MET', 'SER', 'THR'
            ] else 0
            # {0 | 1} - Charged, Acidic(DE)
            charged_acid = 1 if residue_contact_atom.getResnames()[0] in [
                'ASP', 'GLU'
            ] else 0
            # {0 | 1} - Charged, Basic(HKR)
            charged_basic = 1 if residue_contact_atom.getResnames()[0] in [
                'HIS', 'LYS', 'ARG'
            ] else 0
            # {0 | 1} - Glycine
            glycine = 1 if residue_contact_atom.getResnames()[0] in ['GLY'
                                                                     ] else 0
            # {0 | 1} - Proline
            proline = 1 if residue_contact_atom.getResnames()[0] in ['PRO'
                                                                     ] else 0
            # {0 | 1} - Backbone carbonyl
            bb_carbonyl = 1 if residue_contact_atom.getNames()[0] in ['O'
                                                                      ] else 0
            # {0 | 1} - Backbone amino
            bb_amino = 1 if residue_contact_atom.getNames()[0] in ['N'] else 0
            # {0 | 1} - Backbone C / CA
            bb_c_ca = 1 if residue_contact_atom.getNames()[0] in ['C', 'CA'
                                                                  ] else 0

            categorical_vector = [
                residue_contact_type, ligand_contact_type,
                h_bond_donor_acceptor, residue_polar_contact, greasy_ali,
                greasy_aro, polar, charged_acid, charged_basic, glycine,
                proline, bb_carbonyl, bb_amino, bb_c_ca
            ]

            self.categorical_array = np.asanyarray(categorical_vector)
            self.contact_vector = contact_vector

            return True
"""
Created on 1/9/2014

@author: victor
"""
import sys
import os
import glob
import prody
from hivprotmut.structures.pdbcuration import CurationSelections

if __name__ == '__main__':
    final_db_folder = sys.argv[1]
    com_file = sys.argv[2]

    com_handler = open(com_file, "w")
    ligand_folders = os.listdir(final_db_folder)  # first level are ligands
    txt_root = os.path.split(final_db_folder)[1]
    for path in ligand_folders:
        files = glob.glob(os.path.join(final_db_folder, path, "*.pdb"))
        for pdb_file in files:
            pdb = prody.parsePDB(pdb_file)
            txt_pdb = os.path.split(pdb_file)[1]
            ligand = pdb.select(CurationSelections.HEAVY_LIGAND_SELECTION)
            com = prody.calcCenter(ligand)
            txt_pdb_file = os.path.join(path, txt_pdb)
            com_handler.write("%s %.3f %.3f %.3f\n" %
                              (txt_pdb_file, com[0], com[1], com[2]))
    com_handler.close()
示例#12
0
 def __init__(self, parsed_pdb, comb):
     """instance of class IntFG has attributes including selection names, neighboring atoms, neighborhood density 
     of atoms, etc."""
     self.sele = parsed_pdb.possible_ifgs.pop()
     self.resindex, self._ind = np.unique(self.sele.getResindices(), return_index=True)
     self.resname = self.sele.getResnames()[self._ind]
     self.resnum = self.sele.getResnums()[self._ind]
     self.atom_names = {resname: self.sele.select('resindex ' + str(resindex)).getNames() for resname, resindex in
                        zip(self.resname, self.resindex)}
     self.chid = np.unique(self.sele.getChids())[0]
     self.center_coords = pr.calcCenter(self.sele)
     self.vdm_count = 1
     self.count = comb.ifg_count
     self.sasa = None
     self.residue_sasa = None
     self.dssp_sasa = None
     self.sasa_3A_probe = None
     self.sasa_4A_probe = None
     self.sasa_5A_probe = None
     self.contact_atoms_all = None
     self.contact_atoms_protein = None
     self.contact_resnums = None
     self.contact_chids = None
     self.contact_resindices = None
     self.contact_segments = None
     self.contact_atoms_water = None
     self.contact_atoms_ligand = None
     self.contact_atoms_metal = None
     self.contact_info_water = []
     self.contact_info_ligand = []
     self.contact_info_metal = []
     self.contact_info_protein = []
     self.contact_dict = collections.defaultdict(set)
     self.contact_pair_dict = collections.defaultdict(list)
     self.probe_hbonds = []
     self.rotamer = None
     self.min_hull_dist_ifg = None
     self.min_hull_dist_cb_ca = None
     self.cbeta_density = None
     self.heavy_atom_density_5A = None
     self.heavy_atom_density_10A = None
     if comb.ifg_seq_str != 'element':
         self.ifg_frag = parsed_pdb.prody_pdb.select('segment A and chain ' + self.chid + ' and resnum `' + str(np.min(self.resnum)-1)
                                                 + 'to' + str(np.max(self.resnum)+1) + '`')
     else:
         self.ifg_frag = self.sele
     self.frag_length = len(np.unique(self.ifg_frag.getResindices()))
     if comb.ifg_seq_str != 'element':
         self.sequence = ''.join(one_letter_code[rn] for rn in self.resname)
     else:
         self.sequence = ''
     self.sec_struct_dssp = None
     self.sec_struct_phi_psi = None
     self.contact_number_water = None
     self.per_res_contact_number_water = None
     self.contact_atom_names_water = None
     self.contact_resnames_water = None
     self.contact_resnums_water = None
     self.contact_number_ligand = None
     self.per_res_contact_number_ligand = None
     self.contact_atom_names_ligand = None
     self.contact_resnames_ligand = None
     self.contact_resnums_ligand = None
     self.contact_number_metal = None
     self.per_res_contact_number_metal = None
     self.contact_atom_names_metal = None
     self.contact_resnames_metal = None
     self.contact_resnums_metal = None
     self.hbond_atom_names = []
     self.hbond_resnames = []
     self.hbond_resnums = []
     self.hbond_angle = []
     self.hbond_dist_acc_hyd = []
     self.hbond_dist_heavy = []
     self.hbond_atom_names_water = []
     self.hbond_number_water = []
     self.hbond_resnames_water = []
     self.hbond_resnums_water = []
     self.hbond_angle_water = []
     self.hbond_dist_acc_hyd_water = []
     self.hbond_dist_heavy_water = []
     self.hbond_number_ligand = []
     self.hbond_atom_names_ligand = []
     self.hbond_resnames_ligand = []
     self.hbond_resnums_ligand = []
     self.hbond_angle_ligand = []
     self.hbond_dist_acc_hyd_ligand = []
     self.hbond_dist_heavy_ligand = []
     self.ca_hbond_atom_names = []
     self.ca_hbond_resnames = []
     self.ca_hbond_resnums = []
     self.ca_hbond_angle = []
     self.ca_hbond_dist_acc_hyd = []
     self.ca_hbond_dist_heavy = []
     self.bb_cb_atom_ind = self.get_bb_cb_atom_indices(parsed_pdb)
示例#13
0
def binding_pocket_selection(pose_store, p, ligand_name, selection_radius,
                             center):
    '''
    This function will find by default mass center
    of the ligand using Prody.
    If the -gc option is selected the spatial center of
    the ligand is used by computing the mean distance between the
    furthest x axis coordinates.
    If the -ds option is selected a dobule sphere procedure is followed
    by selecting the furthest x axis atoms.
    '''

    amino = [
        'CYS', 'ASP', 'SER', 'GLN', 'LYS', 'ILE', 'PRO', 'THR', 'PHE', 'ASN',
        'GLY', 'HIS', 'LEU', 'ARG', 'TRP', 'ALA', 'VAL', 'GLU', 'TYR', 'MET'
    ]
    two_let_atom_code = ['Br', 'FE']

    coord = []
    x_coord = []
    binding_pocket = []
    ligand = []

    min_coord = None
    max_coord = None

    for pose in pose_store:
        structure = pose_store[pose].split('\n')

    for line in structure:
        if line[17:20] == ligand_name:
            coord.append(float(line[30:38]))
            coord.append(float(line[38:46]))
            coord.append(float(line[46:54]))

            ligand_atom = (line[17:20].strip(), line[12:16].strip(),
                           line[30:38].strip(), line[38:46].strip(),
                           line[46:54].strip(), line[-2:].strip())
            ligand.append(ligand_atom)
            ligand_atom = ()

    for i in range(0, len(coord), 3):
        x_coord.append(float(coord[i]))

    if center == 'double':
        print('\nDouble center of ligand selected')

        x_out_left = coord[coord.index(min(x_coord))]
        y_out_left = coord[coord.index(min(x_coord)) + 1]
        z_out_left = coord[coord.index(min(x_coord)) + 2]
        x_out_right = coord[coord.index(max(x_coord))]
        y_out_right = coord[coord.index(max(x_coord)) + 1]
        z_out_right = coord[coord.index(max(x_coord)) + 2]

        print('\n')
        print("Left sphere center coordinates: ", x_out_left, y_out_left,
              z_out_left)
        print("Right sphere center coordinates: ", x_out_right, y_out_right,
              z_out_right)
        print("Spheres radii: ", selection_radius)

        for line in structure:
            if (line[0:6].strip() == "ATOM" or line[0:6].strip()
                    == "HETATM") and line[17:20].strip() in amino:

                x1 = math.pow((float(line[30:38]) - x_out_left), 2)
                y1 = math.pow((float(line[38:46]) - y_out_left), 2)
                z1 = math.pow((float(line[46:54]) - z_out_left), 2)

                if (x1 + y1 + z1) <= selection_radius**2:
                    atom = (line[17:20].strip(), line[12:16].strip(),
                            line[30:38].strip(), line[38:46].strip(),
                            line[46:54].strip(), line[-2:].strip())
                    binding_pocket.append(atom)
                    atom = ()

        for line in structure:
            if (line[0:6].strip() == "ATOM" or line[0:6].strip()
                    == "HETATM") and line[17:20].strip() in amino:

                x1 = math.pow((float(line[30:38]) - x_out_right), 2)
                y1 = math.pow((float(line[38:46]) - y_out_right), 2)
                z1 = math.pow((float(line[46:54]) - z_out_right), 2)

                if (x1 + y1 + z1) <= selection_radius**2:
                    if line[-3:].strip() in two_let_atom_code:
                        atom = (line[17:20].strip(), line[12:16].strip(),
                                line[30:38].strip(), line[38:46].strip(),
                                line[46:54].strip(), line[-3:].strip())

                    else:
                        atom = (line[17:20].strip(), line[12:16].strip(),
                                line[30:38].strip(), line[38:46].strip(),
                                line[46:54].strip(), line[-3:].strip()[0])
                    binding_pocket.append(atom)
                    atom = ()

        print("Number of protein atoms selected: {}".format(
            len(binding_pocket)))
        print('Number of ligand atoms selected: {}'.format(len(ligand)))
        print('Total number of atoms selected: {}'.format(
            len(binding_pocket) + len(ligand)))

    elif center == 'geometric':

        print("\nGeometric ligand center selected")

        sphere_center_x = (coord[coord.index(max(x_coord))] +
                           coord[coord.index(min(x_coord))]) / 2
        sphere_center_y = (coord[coord.index(max(x_coord)) + 1] +
                           coord[coord.index(min(x_coord)) + 1]) / 2
        sphere_center_z = (coord[coord.index(max(x_coord)) + 2] +
                           coord[coord.index(min(x_coord)) + 2]) / 2

        print('\n')
        print("Sphere center coordinates: ", sphere_center_x, sphere_center_y,
              sphere_center_z)
        print("Sphere radius: ", selection_radius)

        for line in structure:
            if (line[0:6].strip() == "ATOM" or line[0:6].strip()
                    == "HETATM") and line[17:20].strip() in amino:

                x1 = math.pow((float(line[30:38]) - sphere_center_x), 2)
                y1 = math.pow((float(line[38:46]) - sphere_center_y), 2)
                z1 = math.pow((float(line[46:54]) - sphere_center_z), 2)

                if (x1 + y1 + z1) <= selection_radius**2:
                    if line[-3:].strip() in two_let_atom_code:
                        atom = (line[17:20].strip(), line[12:16].strip(),
                                line[30:38].strip(), line[38:46].strip(),
                                line[46:54].strip(), line[-3:].strip())

                    else:
                        atom = (line[17:20].strip(), line[12:16].strip(),
                                line[30:38].strip(), line[38:46].strip(),
                                line[46:54].strip(), line[-3:].strip()[0])
                    binding_pocket.append(atom)
                    atom = ()

        print("Number of protein atoms selected: {}".format(
            len(binding_pocket)))
        print('Number of ligand atoms selected: {}'.format(len(ligand)))
        print('Total number of atoms selected: {}'.format(
            len(binding_pocket) + len(ligand)))

    elif center == 'mass':

        print('\nLigand mass center selected')
        ligand_selection = p.select('not water and hetero')

        weights = ligand_selection.getMasses()
        mass_center = calcCenter(ligand_selection, weights)

        sphere_center_x = mass_center[0]
        sphere_center_y = mass_center[1]
        sphere_center_z = mass_center[2]

        print("\nSphere center coordinates: {}, {}, {}".format(
            sphere_center_x, sphere_center_y, sphere_center_z))
        print("Sphere radius: {}".format(selection_radius))

        for line in structure:
            if (line[0:6].strip() == "ATOM" or line[0:6].strip()
                    == "HETATM") and line[17:20].strip() in amino:

                x1 = math.pow((float(line[30:38]) - sphere_center_x), 2)
                y1 = math.pow((float(line[38:46]) - sphere_center_y), 2)
                z1 = math.pow((float(line[46:54]) - sphere_center_z), 2)

                if (x1 + y1 + z1) <= selection_radius**2:
                    if line[-3:].strip() in two_let_atom_code:
                        atom = (line[17:20].strip(), line[12:16].strip(),
                                line[30:38].strip(), line[38:46].strip(),
                                line[46:54].strip(), line[-3:].strip())

                    else:
                        atom = (line[17:20].strip(), line[12:16].strip(),
                                line[30:38].strip(), line[38:46].strip(),
                                line[46:54].strip(), line[-3:].strip()[0])
                    binding_pocket.append(atom)
                    atom = ()

        print("Number of protein atoms selected: {}".format(
            len(binding_pocket)))
        print('Number of ligand atoms selected: {}'.format(len(ligand)))
        print('Total number of atoms selected: {}'.format(
            len(binding_pocket) + len(ligand)))

    return binding_pocket, ligand
示例#14
0
    def append_vectors(self, hetero_file):
        '''
        Append each docked result as a vector to the dict
        :param hetero_file: file position
        :return: nothing , but will generate a vector into the dict
        '''

        #need to split the files
        TEMP = 'temp.pdb'
        o = open(hetero_file, 'r')
        one_pdb = ''
        for line in o:
            one_pdb += line
            if 'END' in line:
                #write a temporial file
                with open(TEMP, 'wb') as w:
                    w.write(one_pdb)
                    w.close()
                one_pdb = ''
                pdb = pd.parsePDB(TEMP)
                if pdb.numAtoms() <= 3:
                    continue

                # Get coordinate of center
                xyz = pdb.getCoords()
                middle = pd.calcCenter(pdb)

                scale = max(
                    max(xyz[:, 0]) - middle[0], middle[0] - min(xyz[:, 0]),
                    max(xyz[:, 1]) - middle[1], middle[1] - min(xyz[:, 1]),
                    max(xyz[:, 2]) - middle[2], middle[2] - min(xyz[:, 2]))

                # assert scale <= 10
                if scale > 10:
                    logging.warning(
                        'Warning! {} has a ligand out of box scale with {} atom distance to center'
                        .format(self.PDBname, scale))
                    # Now shifting the boxes:
                    max_scale = max(
                        max(xyz[:, 0]) - min(xyz[:, 0]),
                        max(xyz[:, 1]) - min(xyz[:, 1]),
                        max(xyz[:, 2]) - min(xyz[:, 2]))
                    if max_scale > 20:
                        logging.error(
                            'Assertion failed, {} has a ligand out of box completely with scale'
                            .format(self.PDBname, scale))
                        continue
                    # Try to move to the new center
                    middle = [(max(xyz[:, 0]) + min(xyz[:, 0])) / 2,
                              (max(xyz[:, 1]) + min(xyz[:, 1])) / 2,
                              (max(xyz[:, 2]) + min(xyz[:, 2])) / 2]

                xx, yy, zz = np.meshgrid(
                    np.linspace(middle[0] - 9.5, middle[0] + 9.5, 20),
                    np.linspace(middle[1] - 9.5, middle[1] + 9.5, 20),
                    np.linspace(middle[2] - 9.5, middle[2] + 9.5, 20))

                # print xx
                vector = np.c_[xx.ravel(), yy.ravel(), zz.ravel()]

                num_vector = [0] * 8000
                for atom in pdb.iterAtoms():
                    x, y, z = atom.getCoords()
                    x_pos = int(round(x - vector[0][0]))
                    # assert 0 <= x_pos <= 19
                    y_pos = int(round(y - vector[0][1]))
                    # assert 0 <= y_pos <= 19
                    z_pos = int(round(z - vector[0][2]))
                    # assert 0 <= z_pos <= 19
                    if 0 <= x_pos <= 19 and 0 <= y_pos <= 19 and 0 <= z_pos <= 19:
                        # Simply change here to fulfill the mark as 'H_1'
                        num_vector[
                            x_pos * 400 + y_pos * 20 +
                            z_pos] = atom.getName() + '_' + str(HETERO_PART)

                # quick,dirty way to find atoms of protein in cubic boxes
                pd.defSelectionMacro(
                    'inbox',
                    'abs(x-{}) < 10 and abs(y-{}) < 10 and abs(z-{}) < 10'.
                    format(middle[0], middle[1], middle[2]))
                nearby = self.protein.select('inbox')

                if nearby is not None:
                    for atom in nearby.iterAtoms():
                        x, y, z = atom.getCoords()
                        x_pos = int(round(x - vector[0][0]))
                        # assert 0 <= x_pos <= 19
                        y_pos = int(round(y - vector[0][1]))
                        # assert 0 <= y_pos <= 19
                        z_pos = int(round(z - vector[0][2]))
                        # assert 0 <= z_pos <= 19
                        if 0 <= x_pos <= 19 and 0 <= y_pos <= 19 and 0 <= z_pos <= 19 and num_vector[
                                x_pos * 400 + y_pos * 20 + z_pos] == 0:
                            # Simply change here to fulfill the mark as 'C_2'
                            num_vector[x_pos * 400 + y_pos * 20 +
                                       z_pos] = atom.getName() + '_' + str(
                                           PROTEIN_PART)
                        else:
                            logging.warning(
                                'Coorinate {} {} {} found at {}'.format(
                                    x_pos, y_pos, z_pos, self.PDBname))

                            # This is for checking the correctness when we add atoms in proteins.
                            # filename2= 'data/{}_{}_2.pdb'.format(PDB, ResId)
                            # writePDB(filename2, pick_one+nearby)

                # Save into the dict for future locating
                self.heterodict[str(self.ct)] = {
                    'raw_vector':
                    num_vector,
                    'center':
                    middle,
                    'filename':
                    hetero_file,
                    'id':
                    hetero_file.split('/')[-1].split('.')[0] + '_' +
                    str(self.ct)
                }
                self.ct += 1
示例#15
0
    def find_possible_ifgs_rmsd(self, comb, rmsd_threshold=1.0):
        """uses iFG definitions in comb object to select iFGs in the parsed protein object that have all atoms
        and occupancies = 1.
        """
        possible_ifgs = []
        if comb.num_res_ifg_query == 1:
            poss_ifg_sel = self.prody_pdb.select('segment A and chain ' +
                                                 self.pdb_chain +
                                                 ' sequence "' +
                                                 comb.ifg_seq_str_query + '"')
            if poss_ifg_sel is not None:
                ifg_resindices, indices = np.unique(
                    poss_ifg_sel.getResindices(), return_index=True)
                ifg_resnames = poss_ifg_sel.getResnames()[indices]

                for ifg_resindex, ifg_resname in zip(ifg_resindices,
                                                     ifg_resnames):
                    ifg_selection = self.prody_pdb.select(
                        'resindex ' + str(ifg_resindex) + ' and name ' +
                        comb.ifg_sele_dict_query[1][ifg_resname])
                    if ifg_selection is not None:
                        num_atoms = len(ifg_selection)
                        if num_atoms == len(comb.ifg_sele_dict_query[1]
                                            [ifg_resname].split()):
                            if all(ifg_selection.getResnums() > 0):
                                possible_ifgs.append(ifg_selection)
                comb.total_possible_ifgs += len(possible_ifgs)
        else:
            poss_ifg_sel = self.prody_pdb.select('segment A and chain ' +
                                                 self.pdb_chain +
                                                 ' sequence "' +
                                                 comb.ifg_seq_str_query + '"')
            if poss_ifg_sel is not None:
                ifg_resindices_cat_list, indices = np.unique(
                    poss_ifg_sel.getResindices(), return_index=True)
                ifg_resnames_cat_list = poss_ifg_sel.getResnames()[indices]
                ifg_resindex_pairs = [
                    ifg_resindices_cat_list[i:i + 2]
                    for i in range(0, len(ifg_resindices_cat_list), 2)
                ]
                ifg_resname_pairs = [
                    ifg_resnames_cat_list[i:i + 2]
                    for i in range(0, len(ifg_resnames_cat_list), 2)
                ]
                for ifg_resindex_pair, ifg_resname_pair in zip(
                        ifg_resindex_pairs, ifg_resname_pairs):
                    resind1, resind2 = ifg_resindex_pair
                    resname1, resname2 = ifg_resname_pair
                    try:
                        ifg_selection = self.prody_pdb.select(
                            '(resindex ' + str(resind1) + ' and name ' +
                            comb.ifg_sele_dict_query[1][resname1] + ')' +
                            ' or (resindex ' + str(resind2) + ' and name ' +
                            comb.ifg_sele_dict_query[2][resname2] + ')')
                    except KeyError:
                        print('Non-canonical residue in iFG, skipping.')
                        ifg_selection = None
                    if ifg_selection is not None:
                        num_atoms = len(ifg_selection)
                        names = comb.ifg_sele_dict_query[1][resname1].split()
                        names.extend(
                            comb.ifg_sele_dict_query[2][resname2].split())
                        if num_atoms == len(names):
                            if all(ifg_selection.getResnums() > 0):
                                possible_ifgs.append(ifg_selection)
                comb.total_possible_ifgs += len(possible_ifgs)

        passed_possible_ifgs = []
        for pifg in possible_ifgs:
            com = pr.calcCenter(
                pifg.select('name ' + ' '.join(comb.query_names[0])))
            q2_sel = self.prody_pdb.select(
                'name ' + ' '.join(comb.query_names[1]) + ' within ' +
                str(comb.query_distance) + ' of center',
                center=com)
            if q2_sel is not None:
                resinds_query2s = np.unique(q2_sel.getResindices())
                q_sel1_coords = [
                    pifg.select('name ' + n).getCoords()[0]
                    for n in comb.query_names[0]
                ]
                for resind in resinds_query2s:
                    q_sel = self.prody_pdb.select(
                        'name ' + ' '.join(comb.query_names[1]) +
                        ' and resindex ' + str(resind))
                    if len(q_sel) == len(comb.query_names[1]):
                        q_sel2_coords = [
                            q_sel.select('name ' + n).getCoords()[0]
                            for n in comb.query_names[1]
                        ]
                        pifg_coords = np.vstack((q_sel1_coords, q_sel2_coords))
                        for coords in comb.query_coords:
                            R, m_com, t_com = get_rot_trans(
                                coords, pifg_coords)
                            coords_transformed = np.dot(
                                (coords - m_com), R) + t_com
                            rmsd = pr.calcRMSD(coords_transformed, pifg_coords)
                            if rmsd <= rmsd_threshold:
                                passed_possible_ifgs.append(
                                    q_sel
                                )  # This only takes the query2 selection as the iFG.
                                break
        return passed_possible_ifgs
示例#16
0
def compute_center_of_chain(pdb_object, chain="L"):
    molecule_to_center = pdb_object.select("chain {}".format(chain))
    center = prody.calcCenter(molecule_to_center)
    return center