def compute_residue_pairs_ref(self, cutoff=5.0, save_file=True, filename=None): """Compute the residue pair on the reference conformation Args: cutoff (float, optional): cutoff for the contact atoms save_file (bool, optional): save the file containing the residue pairs filename (None, optional): filename Returns: dict: defintition of the residue pairs """ sql_ref = pdb2sql(self.ref, sqlfile='mol2.db') residue_pairs_ref = sql_ref.get_contact_residue( cutoff=cutoff, return_contact_pairs=True, excludeH=True) sql_ref.close() if save_file: if filename is None: f = open( self.ref.split('.')[0] + 'residue_contact_pairs.pckl', 'wb') else: f = open(filename, 'wb') # save as pickle pickle.dump(residue_pairs_ref, f) f.close() return else: return residue_pairs_ref
def compute_izone(self, cutoff=5.0, save_file=True, filename=None): """Compute the zones for i-rmsd calculationss Args: cutoff (float, optional): cutoff for the contact atoms save_file (bool, optional): svae file containing the zone filename (str, optional): filename Returns: dict: i-zone definition """ sql_ref = pdb2sql(self.ref) contact_ref = sql_ref.get_contact_atoms( cutoff=cutoff, extend_to_residue=True, return_only_backbone_atoms=True) index_contact_ref = contact_ref[0] + contact_ref[1] # get the xyz and atom identifier of the decoy contact atoms #xyz_contact_ref = sql_ref.get('x,y,z',rowID=index_contact_ref) data_test = [ tuple(data) for data in sql_ref.get('chainID,resSeq', rowID=index_contact_ref) ] data_test = sorted(set(data_test)) # close the sql sql_ref.close() if save_file: if filename is None: f = open(self.ref.split('.')[0] + '.izone', 'w') else: f = open(filename, 'w') for res in data_test: chain = res[0] num = res[1] f.write('zone %s%d-%s%d\n' % (chain, num, chain, num)) f.close() return else: resData = {} for res in data_test: chain = res[0] num = res[1] if chain not in resData.keys(): resData[chain] = [] resData[chain].append(num) return resData
def compute_Fnat_pdb2sql(self, cutoff=5.0): """Slow method to compute the FNAT usign pdb2sql Args: cutoff (float, optional): cutoff for the contact atoms Returns: float: Fnat value for the conformation """ # create the sql sql_decoy = pdb2sql(self.decoy) sql_ref = pdb2sql(self.ref) # get the contact atoms of the decoy residue_pairs_decoy = sql_decoy.get_contact_residue( cutoff=cutoff, return_contact_pairs=True, excludeH=True) # get the contact atoms of the ref residue_pairs_ref = sql_ref.get_contact_residue( cutoff=cutoff, return_contact_pairs=True, excludeH=True) # form the pair data data_pair_decoy = [] for resA, resB_list in residue_pairs_decoy.items(): data_pair_decoy += [(resA, resB) for resB in resB_list] # form the pair data data_pair_ref = [] for resA, resB_list in residue_pairs_ref.items(): data_pair_ref += [(resA, resB) for resB in resB_list] # find the umber of residue that ref and decoys hace in common nCommon = len(set(data_pair_ref).intersection(data_pair_decoy)) # normalize Fnat = nCommon / len(data_pair_ref) sql_decoy.close() sql_ref.close() return Fnat
def compute_lzone(self, save_file=True, filename=None): """Compute the zone for L-RMSD calculation Args: save_file (bool, optional): save the zone file filename (str, optional): name of the file Returns: dict: definition of the zone """ sql_ref = pdb2sql(self.ref) nA = len(sql_ref.get('x,y,z', chainID='A')) nB = len(sql_ref.get('x,y,z', chainID='B')) # detect which chain is the longest long_chain = 'A' if nA < nB: long_chain = 'B' # extract data about the residue data_test = [ tuple(data) for data in sql_ref.get('chainID,resSeq', chainID=long_chain) ] data_test = sorted(set(data_test)) # close the sql sql_ref.close() if save_file: if filename is None: f = open(self.ref.split('.')[0] + '.lzone', 'w') else: f = open(filename, 'w') for res in data_test: chain = res[0] num = res[1] f.write('zone %s%d-%s%d\n' % (chain, num, chain, num)) f.close() return else: resData = {} for res in data_test: chain = res[0] num = res[1] if chain not in resData.keys(): resData[chain] = [] resData[chain].append(num) return resData
def __init__(self, pdbfile, param_charge=None, param_vdw=None, patch_file=None, contact_distance=8.5, root_export='./', individual_directory=False, verbose=False): ''' Compute the Coulomb, van der Waals interaction and charges Args: pdbfile (str): pdb file of the molecule param_charge (str): file name of the force field fiel containing the charges e.g. protein-allhdg5.4_new.top. Must be of the format: * CYM atom O type=O charge=-0.500 end * ALA atom N type=NH1 charge=-0.570 end param_vdw (str): file name of the force field file containing the vdw parameters e.g protein-allhdg5.4_new.param. Must be of the format * NONBonded CYAA 0.105 3.750 0.013 3.750 * NONBonded CCIS 0.105 3.750 0.013 3.750 patch_file (str): file name of a valid patch file for the parameters e.g. patch.top The way we handle the patching is very manual and should be made more automatic contact_distance (float): the maximum distance between 2 contact atoms include_entire_residue (bool): if atom X of resiue M is a contact atom, include all the atoms of resiude M in the contact_atom list root_export (str): root directory where the feature file will be exported (deprecated ?) Example : >>> pdb = '1AK4_100w.pdb' >>> >>> # get the force field included in deeprank >>> # if another FF has been used to compute the ref >>> # change also this path to the correct one >>> FF = pkg_resources.resource_filename('deeprank.features','') + '/forcefield/' >>> >>> # declare the feature calculator instance >>> atfeat = AtomicFeature(pdb, >>> param_charge = FF + 'protein-allhdg5-4_new.top', >>> param_vdw = FF + 'protein-allhdg5-4_new.param', >>> patch_file = FF + 'patch.top') >>> # assign parameters >>> atfeat.assign_parameters() >>> >>> # only compute the pair interactions here >>> atfeat.evaluate_pair_interaction(save_interactions=test_name) >>> >>> # close the db >>> atfeat.sqldb.close() ''' super().__init__("Atomic") # set a few things self.pdbfile = pdbfile self.param_charge = param_charge self.param_vdw = param_vdw self.patch_file = patch_file self.contact_distance = contact_distance self.individual_directory = individual_directory self.verbose = verbose # a few constant self.eps0 = 1 self.c = 332.0636 # dircetory to export self.root_export = root_export # read the pdb as an sql self.sqldb = pdb2sql(self.pdbfile) # read the force field self.read_charge_file() if patch_file != None: self.patch = self.read_patch() else: self.patch = None # read the vdw param file self.read_vdw_file() # get the contact atoms self.get_contact_atoms()
def compute_irmsd_pdb2sql(self, cutoff=10, method='svd', izone=None, exportpath=None): """Slow method to compute the i-rmsd Require the precalculation of the izone. A dedicated routine is implemented to comoute the izone if izone is not given in argument the routine will compute them automatically i-RMSD is computed selecting the back bone of the contact residue with a cutoff of 10A in the decoy. Align these as best as possible with their coutner part in the ref and and compute the RMSD Ref : DockQ: A Quality Measure for Protein-Protein Docking Models http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0161879 Args: izone (None, optional): file name of the zone. if None the zones will be calculated first method (str, optional): Method to align the fragments ('svd','quaternion') cutoff (float, optional): cutoff for the contact atoms exportpath (str, optional): file name where the aligned pdbs are exported Returns: float: i-RMSD value of the conformation """ # create thes sql sql_decoy = pdb2sql(self.decoy) sql_ref = pdb2sql(self.ref) # get the contact atoms if izone is None: contact_ref = sql_ref.get_contact_atoms( cutoff=cutoff, extend_to_residue=True, return_only_backbone_atoms=False) index_contact_ref = contact_ref[0] + contact_ref[1] else: index_contact_ref = self.get_izone_rowID( sql_ref, izone, return_only_backbone_atoms=False) # get the xyz and atom identifier of the decoy contact atoms xyz_contact_ref = sql_ref.get('x,y,z', rowID=index_contact_ref) data_contact_ref = sql_ref.get('chainID,resSeq,resName,name', rowID=index_contact_ref) # get the xyz and atom indeitifier of the reference xyz_decoy = sql_decoy.get('x,y,z') data_decoy = sql_decoy.get('chainID,resSeq,resName,name') # loop through the ref label # check if the atom is in the decoy # if yes -> add xyz to xyz_contact_decoy # if no -> remove the corresponding to xyz_contact_ref xyz_contact_decoy = [] index_contact_decoy = [] clean_ref = False for iat, atom in enumerate(data_contact_ref): try: index = data_decoy.index(atom) index_contact_decoy.append(index) xyz_contact_decoy.append(xyz_decoy[index]) except Exception: xyz_contact_ref[iat] = None index_contact_ref[iat] = None clean_ref = True # clean the xyz if clean_ref: xyz_contact_ref = [ xyz for xyz in xyz_contact_ref if xyz is not None ] index_contact_ref = [ ind for ind in index_contact_ref if ind is not None ] # check that we still have atoms in both chains chain_decoy = list( set(sql_decoy.get('chainID', rowID=index_contact_decoy))) chain_ref = list(set(sql_ref.get('chainID', rowID=index_contact_ref))) if len(chain_decoy) < 1 or len(chain_ref) < 1: raise ValueError( 'Error in i-rmsd: only one chain represented in one chain') # get the translation so that both A chains are centered tr_decoy = self.get_trans_vect(xyz_contact_decoy) tr_ref = self.get_trans_vect(xyz_contact_ref) # translate everything xyz_contact_decoy = self.translation(xyz_contact_decoy, tr_decoy) xyz_contact_ref = self.translation(xyz_contact_ref, tr_ref) # get the ideql rotation matrix # to superimpose the A chains U = self.get_rotation_matrix(xyz_contact_decoy, xyz_contact_ref, method=method) # rotate the entire fragment xyz_contact_decoy = self.rotation_matrix(xyz_contact_decoy, U, center=False) # compute the RMSD irmsd = self.get_rmsd(xyz_contact_decoy, xyz_contact_ref) # export the pdb for verifiactions if exportpath is not None: # update the sql database sql_decoy.update_xyz(xyz_contact_decoy, index=index_contact_decoy) sql_ref.update_xyz(xyz_contact_ref, index=index_contact_ref) sql_decoy.exportpdb(exportpath + '/irmsd_decoy.pdb', index=index_contact_decoy) sql_ref.exportpdb(exportpath + '/irmsd_ref.pdb', index=index_contact_ref) # close the db sql_decoy.close() sql_ref.close() return irmsd
def compute_lrmsd_pdb2sql(self, exportpath=None, method='svd'): """Slow routine to compute the L-RMSD. This routine parse the PDB directly using pdb2sql. L-RMSD is computed by aligning the longest chain of the decoy to the one of the reference and computing the RMSD of the shortest chain between decoy and reference. Ref : DockQ: A Quality Measure for Protein-Protein Docking Models http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0161879 Args: exportpath (str, optional): file name where the aligned pdbs are exported method (str, optional): Method to align the fragments ('svd','quaternion') Returns: float: L-RMSD value of the conformation """ # create the sql sql_decoy = pdb2sql(self.decoy, sqlfile='decoy.db') sql_ref = pdb2sql(self.ref, sqlfile='ref.db') # extract the pos of chains A xyz_decoy_A = np.array(sql_decoy.get('x,y,z', chainID='A')) xyz_ref_A = np.array(sql_ref.get('x,y,z', chainID='A')) # extract the pos of chains B xyz_decoy_B = np.array(sql_decoy.get('x,y,z', chainID='B')) xyz_ref_B = np.array(sql_ref.get('x,y,z', chainID='B')) # check the lengthes if len(xyz_decoy_A) != len(xyz_ref_A): xyz_decoy_A, xyz_ref_A = self.get_identical_atoms( sql_decoy, sql_ref, 'A') if len(xyz_decoy_B) != len(xyz_ref_B): xyz_decoy_B, xyz_ref_B = self.get_identical_atoms( sql_decoy, sql_ref, 'B') # detect which chain is the longest nA, nB = len(xyz_decoy_A), len(xyz_decoy_B) if nA > nB: xyz_decoy_long = xyz_decoy_A xyz_ref_long = xyz_ref_A xyz_decoy_short = xyz_decoy_B xyz_ref_short = xyz_ref_B else: xyz_decoy_long = xyz_decoy_B xyz_ref_long = xyz_ref_B xyz_decoy_short = xyz_decoy_A xyz_ref_short = xyz_ref_A # get the translation so that both A chains are centered tr_decoy = self.get_trans_vect(xyz_decoy_long) tr_ref = self.get_trans_vect(xyz_ref_long) # translate everything for 1 xyz_decoy_short = self.translation(xyz_decoy_short, tr_decoy) xyz_decoy_long = self.translation(xyz_decoy_long, tr_decoy) # translate everuthing for 2 xyz_ref_short = self.translation(xyz_ref_short, tr_ref) xyz_ref_long = self.translation(xyz_ref_long, tr_ref) # get the ideal rotation matrix # to superimpose the A chains U = self.get_rotation_matrix(xyz_decoy_long, xyz_ref_long, method=method) # rotate the entire fragment xyz_decoy_short = self.rotation_matrix(xyz_decoy_short, U, center=False) # compute the RMSD lrmsd = self.get_rmsd(xyz_decoy_short, xyz_ref_short) # export the pdb for verifiactions if exportpath is not None: # extract the pos of the dimer xyz_decoy = np.array(sql_decoy.get('x,y,z')) xyz_ref = np.array(sql_ref.get('x,y,z')) # translate xyz_ref = self.translation(xyz_ref, tr_ref) xyz_decoy = self.translation(xyz_decoy, tr_decoy) # rotate decoy xyz_decoy = self.rotation_matrix(xyz_decoy, U, center=False) # update the sql database sql_decoy.update_xyz(xyz_decoy) sql_ref.update_xyz(xyz_ref) # export sql_decoy.exportpdb(exportpath + '/lrmsd_decoy.pdb') sql_ref.exportpdb(exportpath + '/lrmsd_aligned.pdb') # close the db sql_decoy.close() sql_ref.close() return lrmsd