def __init__(self, pdb_data, chainA='A', chainB='B'): """Compute the burried surface area feature. Freesasa is required for this feature. From Freesasa version 2.0.3 the Python bindings are released as a separate module. They can be installed using >>> pip install freesasa Args : pdb_data (list(byte) or str): pdb data or pdb filename chainA (str, optional): name of the first chain chainB (str, optional): name of the second chain Example : >>> bsa = BSA('1AK4.pdb') >>> bsa.get_structure() >>> bsa.get_contact_residue_sasa() >>> bsa.sql._close() """ self.pdb_data = pdb_data self.sql = pdb2sql.interface(pdb_data) self.chains_label = [chainA, chainB] self.feature_data = {} self.feature_data_xyz = {} freesasa.setVerbosity(freesasa.nowarnings)
def test_database_consistency(self): """"verify initilizing interface with updated pdb2sql database""" pdb_db = pdb2sql(self.pdb) pdb_db.update_column('temp', [99] * 10) target = pdb_db.get('*') self.db = interface(pdb_db) result = self.db.get('*') self.assertEqual(target, result)
def test_get_contact_atoms_exludeH(self): """"verify get_contact_atoms(excludeH=True)""" pdb = 'pdb/3CRO_H.pdb' db = interface(pdb) contact_atoms = db.get_contact_atoms(excludeH=True) self.assertIsInstance(contact_atoms, dict) self.assertEqual(len(contact_atoms), 2) self.assertEqual(list(contact_atoms.keys()), ['A', 'B']) self.assertEqual(len(contact_atoms['A']), 341) self.assertEqual(len(contact_atoms['B']), 333)
def test_get_contact_residues_default(self): """"verify get_contact_residues default.""" pdb_db = pdb2sql(self.pdb) self.db = interface(pdb_db) contact_residues = self.db.get_contact_residues() self.assertIsInstance(contact_residues, dict) self.assertEqual(len(contact_residues), 2) self.assertEqual(list(contact_residues.keys()), ['A', 'B']) self.assertEqual(len(contact_residues['A']), 20) self.assertEqual(len(contact_residues['B']), 20)
def test_get_contact_residues_exludeH(self): """"verify get_contact_residues(excludeH=True)""" pdb = 'pdb/3CRO_H.pdb' db = interface(pdb) contact_residues = db.get_contact_residues(allchains=True, excludeH=True) self.assertIsInstance(contact_residues, dict) self.assertEqual(len(contact_residues), 4) self.assertEqual(list(contact_residues.keys()), ['A', 'B', 'L', 'R']) self.assertEqual(len(contact_residues['A']), 20) self.assertEqual(len(contact_residues['B']), 20) self.assertEqual(len(contact_residues['L']), 47) self.assertEqual(len(contact_residues['R']), 48)
def test_get_contact_residues_alltrue(self): """"verify get_contact_residues(True)""" pdb = 'pdb/3CRO_H.pdb' db = interface(pdb) with self.assertWarns(UserWarning) as ex: contact_residues = db.get_contact_residues( allchains=True, only_backbone_atoms=True, excludeH=True) self.assertEqual(len(ex.warnings), 2) self.assertEqual(ex.warning.args[0], 'SQL query get an empty') self.assertIsInstance(contact_residues, dict) self.assertEqual(len(contact_residues), 4) self.assertEqual(list(contact_residues.keys()), ['A', 'B', 'L', 'R']) self.assertEqual(len(contact_residues['A']), 0) self.assertEqual(len(contact_residues['B']), 0) self.assertEqual(len(contact_residues['L']), 9) self.assertEqual(len(contact_residues['R']), 8)
def test_get_contact_atoms_alltrue(self): """"verify get_contact_atoms(True)""" pdb = 'pdb/3CRO_H.pdb' db = interface(pdb) with self.assertWarns(UserWarning) as ex: contact_atoms = db.get_contact_atoms(allchains=True, extend_to_residue=True, only_backbone_atoms=True, excludeH=True) self.assertEqual(len(ex.warnings), 2) self.assertEqual(ex.warning.args[0], 'SQL query get an empty') self.assertIsInstance(contact_atoms, dict) self.assertEqual(len(contact_atoms), 4) self.assertEqual(list(contact_atoms.keys()), ['A', 'B', 'L', 'R']) # pymol `select catoms, name CA+C+N+O and byres((chain L and name CA+C+N+O ) # within 8.5 of (chain R and name CA+C+N+O))` self.assertEqual(len(contact_atoms['A']), 0) self.assertEqual(len(contact_atoms['B']), 0) self.assertEqual(len(contact_atoms['L']), 36) self.assertEqual(len(contact_atoms['R']), 32)
def get_feature_value(self, contact_only=True): """get the feature value.""" sql = pdb2sql.interface(self.pdbfile) xyz_info = sql.get('chainID,resSeq,resName', name='CB') xyz = sql.get('x,y,z', name='CB') xyz_dict = {} for pos, info in zip(xyz, xyz_info): xyz_dict[tuple(info)] = pos contact_residue = sql.get_contact_residue(cutoff=5.5) contact_residue = contact_residue["A"] + contact_residue["B"] sql._close() pssm_data_xyz = {} pssm_data = {} for res, data in zip(self.res_data, self.pssm_data): if contact_only and res not in contact_residue: continue if tuple(res) in xyz_dict: chain = {'A': 0, 'B': 1}[res[0]] key = tuple([chain] + xyz_dict[tuple(res)]) sasa = self.sasa[tuple(res)] pssm_data[res] = [data * sasa] pssm_data_xyz[key] = [data * sasa] else: printif([tuple(res), ' not found in the pdbfile'], self.debug) # if we have no contact atoms if len(pssm_data_xyz) == 0: pssm_data_xyz[tuple([0, 0., 0., 0.])] = [0.0] pssm_data_xyz[tuple([1, 0., 0., 0.])] = [0.0] self.feature_data['pssm'] = pssm_data self.feature_data_xyz['pssm'] = pssm_data_xyz
def __init__(self, pdb_data, chainA='A', chainB='B'): """Compute the residue contacts between polar/apolar/charged residues. Args : pdb_data (list(byte) or str): pdb data or pdb filename chainA (str, optional): name of the first chain chainB (str, optional): name of the second chain Example : >>> rcd = ResidueDensity('1EWY_100w.pdb') >>> rcd.get(cutoff=5.5) >>> rcd.extract_features() """ self.pdb_data = pdb_data self.sql = pdb2sql.interface(pdb_data) self.chains_label = [chainA, chainB] self.feature_data = {} self.feature_data_xyz = {} self.residue_types = config.AA_properties
def __init__(self, pdb_data, chain1='A', chain2='B'): """Compute the residue contacts between polar/apolar/charged residues. Args: pdb_data (list(byte) or str): pdb data or pdb filename chain1 (str): First chain ID. Defaults to 'A' chain2 (str): Second chain ID. Defaults to 'B' Example: >>> rcd = ResidueDensity('1EWY_100w.pdb') >>> rcd.get(cutoff=5.5) >>> rcd.extract_features() """ self.pdb_data = pdb_data self.sql = pdb2sql.interface(pdb_data) self.chains_label = [chain1, chain2] self.chain1 = chain1 self.chain2 = chain2 self.feature_data = {} self.feature_data_xyz = {} self.residue_types = config.AA_properties
def __init__(self, pdb=None, pssm=None, contact_distance=8.5, internal_contact_distance=3, pssm_align='res', biopython=False): """Class from which Residue features are computed Args: pdb (str, optional): path to pdb file. Defaults to None. pssm (str, optional): path to pssm file. Defaults to None. contact_distance (float, optional): cutoff distance for external edges. Defaults to 8.5. internal_contact_distance (int, optional): cutoff distance for internal edges. Defaults to 3. pssm_align (str, optional): [description]. Defaults to 'res'. """ super().__init__() self.type = 'residue' self.pdb = pdb self.name = os.path.splitext(os.path.basename(pdb))[0] if pssm is not None: self.pssm, self.ic = PSSM.PSSM_aligned(pssm, style=pssm_align) else: self.pssm, self.ic = None, None self.contact_distance = contact_distance self.internal_contact_distance = internal_contact_distance self.residue_charge = { 'CYS': -0.64, 'HIS': -0.29, 'ASN': -1.22, 'GLN': -1.22, 'SER': -0.80, 'THR': -0.80, 'TYR': -0.80, 'TRP': -0.79, 'ALA': -0.37, 'PHE': -0.37, 'GLY': -0.37, 'ILE': -0.37, 'VAL': -0.37, 'MET': -0.37, 'PRO': 0.0, 'LEU': -0.37, 'GLU': -1.37, 'ASP': -1.37, 'LYS': -0.36, 'ARG': -1.65 } self.residue_names = { 'CYS': 0, 'HIS': 1, 'ASN': 2, 'GLN': 3, 'SER': 4, 'THR': 5, 'TYR': 6, 'TRP': 7, 'ALA': 8, 'PHE': 9, 'GLY': 10, 'ILE': 11, 'VAL': 12, 'MET': 13, 'PRO': 14, 'LEU': 15, 'GLU': 16, 'ASP': 17, 'LYS': 18, 'ARG': 19 } self.residue_polarity = { 'CYS': 'polar', 'HIS': 'polar', 'ASN': 'polar', 'GLN': 'polar', 'SER': 'polar', 'THR': 'polar', 'TYR': 'polar', 'TRP': 'polar', 'ALA': 'apolar', 'PHE': 'apolar', 'GLY': 'apolar', 'ILE': 'apolar', 'VAL': 'apolar', 'MET': 'apolar', 'PRO': 'apolar', 'LEU': 'apolar', 'GLU': 'neg_charged', 'ASP': 'neg_charged', 'LYS': 'neg_charged', 'ARG': 'pos_charged' } self.pssm_pos = { 'CYS': 4, 'HIS': 8, 'ASN': 2, 'GLN': 5, 'SER': 15, 'THR': 16, 'TYR': 18, 'TRP': 17, 'ALA': 0, 'PHE': 13, 'GLY': 7, 'ILE': 9, 'VAL': 19, 'MET': 12, 'PRO': 14, 'LEU': 10, 'GLU': 6, 'ASP': 3, 'LYS': 11, 'ARG': 1 } self.polarity_encoding = { 'apolar': 0, 'polar': 1, 'neg_charged': 2, 'pos_charged': 3 } self.biopython = biopython #self.edge_polarity_encoding, iencod = {}, 0 # for k1, v1 in self.polarity_encoding.items(): # for k2, v2 in self.polarity_encoding.items(): ##key = tuple(np.sort([v1, v2])) # if key not in self.edge_polarity_encoding: ##self.edge_polarity_encoding[key] = iencod #iencod += 1 # check if external execs are installed self.check_execs() # create the sqldb db = interface(self.pdb) # get the graphs t0 = time() self.get_graph(db) print('Graph %f' % (time() - t0)) # get the nodes/edge attributes t0 = time() self.get_node_features(db) print('Node %f' % (time() - t0)) t0 = time() self.get_edge_features() print('Edge %f' % (time() - t0)) # close the db db._close()
def construct_graph(self, verbose=False, print_res_pairs=False): """Construct the graph corresponding to a given PDB Args: verbose (bool, optional): print for debug print_res_pairs (bool, optional): print the residue contact pairs for debug """ db = interface(self.pdbfile) res_contact_pairs = db.get_contact_residues( cutoff=self.cutoff, allchains=True, # chain1=self.chain_label[0], # chain2=self.chain_label[1], return_contact_pairs=True) # tag the non residues keys_to_pop = [] for res in res_contact_pairs.keys(): if res[2] not in self.resmap_inv: keys_to_pop.append(res) #res_contact_pairs.pop(res,None) print(res) warnings.warn('--> Residue not valid') # tag the ones that are not in PSSM for res in list(res_contact_pairs.keys()): if res not in self.aligned_pssm: keys_to_pop.append(res) #res_contact_pairs.pop(res,None) print(res) warnings.warn('--> Residue not found in PSSM file') # Remove the residue for res in keys_to_pop: if res in res_contact_pairs: res_contact_pairs.pop(res, None) # get a list of residues of chain B # automatically remove the ones that are not proper res # and the ones that are not in the PSSM nodesB = [] for k, reslist in list(res_contact_pairs.items()): tmp_reslist = [] for res in reslist: if res[2] in self.resmap_inv and res in self.aligned_pssm: nodesB += [res] tmp_reslist += [res] else: print(res) warnings.warn( '--> Residue not found in PSSM file or Residue not recognized' ) res_contact_pairs[k] = tmp_reslist nodesB = sorted(set(nodesB)) # make a list of nodes self.nodes = list(res_contact_pairs.keys()) + nodesB # get the edge numbering self.edges = [] for key, val in res_contact_pairs.items(): ind1 = self.nodes.index(key) for v in val: ind2 = self.nodes.index(v) self.edges.append([ind1, ind2]) if print_res_pairs: for k, vs in res_contact_pairs.items(): print(k) for v in vs: print('\t\t', v)
def get_feature_value(self, cutoff=5.5): """get the feature value.""" sql = pdb2sql.interface(self.pdb_file) # set achors for all residues and get their xyz xyz_info = sql.get('chainID,resSeq,resName', name='CB') xyz_info += sql.get('chainID,resSeq,resName', name='CA', resName='GLY') xyz = sql.get('x,y,z', name='CB') xyz += sql.get('x,y,z', name='CA', resName='GLY') xyz_dict = {} for pos, info in zip(xyz, xyz_info): xyz_dict[tuple(info)] = pos # get interface contact residues # ctc_res = {"A":[chain 1 residues], "B": [chain2 residues]} ctc_res = sql.get_contact_residues(cutoff=cutoff) sql._close() ctc_res = ctc_res["A"] + ctc_res["B"] # handle with small interface or no interface total_res = len(ctc_res) if total_res == 0: raise ValueError( f"{self.mol_name}: No interface residue found with the " f"cutoff {cutoff}Å." f" Failed to calculate the features of FullPSSM/PSSM_IC.") elif total_res < 5: # this is an empirical value warnings.warn( f"{self.mol_name}: Only {total_res} interface residues found" f" with cutoff {cutoff}Å. Be careful with" f" using the features FullPSSM/PSSM_IC") # check if interface residues have pssm values ctc_res_set = set(ctc_res) pssm_res_set = set(self.pssm.keys()) if len(ctc_res_set.intersection(pssm_res_set)) == 0: raise ValueError( f"{self.mol_name}: All interface residues have no pssm values." f" Check residue chainID/ID/name consistency " f"between PDB and PSSM files") elif len(ctc_res_set.difference(pssm_res_set)) > 0: ctc_res_wo_pssm = ctc_res_set.difference(pssm_res_set) ctc_res_with_pssm = ctc_res_set - ctc_res_wo_pssm warnings.warn( f"{self.mol_name}: The following interface residues have " f" no pssm value:\n {ctc_res_wo_pssm}") else: ctc_res_with_pssm = ctc_res # get feature values for res in ctc_res_with_pssm: chain = {'A': 0, 'B': 1}[res[0]] key = tuple([chain] + xyz_dict[res]) for name, value in zip(self.feature_names, self.pssm[res]): # Make sure the feature_names and pssm[res] have # consistent order of the 20 residue types # name: PSSM_ALA # value: -3.0 # res: ('B', 573, 'HIS') # key: (0, -19.346, 6.156, -3.44) self.feature_data[name][res] = [value] self.feature_data_xyz[name][key] = [value]
def read_pdb(self): """Create a sql databse for the pdb.""" self.sqldb = pdb2sql.interface(self.molgrp['complex'][()])
def setUp(self): self.pdb = 'pdb/3CRO.pdb' self.db = interface(self.pdb)
def __init__(self, pdb=None, pssm=None, contact_distance=8.5, internal_contact_distance=3, pssm_align='res'): super().__init__() self.type = 'residue' self.pdb = pdb self.name = os.path.splitext(os.path.basename(pdb))[0] if pssm is not None: self.pssm, self.ic = PSSM.PSSM_aligned( pssm, style=pssm_align) else: self.pssm, self.ic = None, None self.contact_distance = contact_distance self.internal_contact_distance = internal_contact_distance self.residue_charge = {'CYS': -0.64, 'HIS': -0.29, 'ASN': -1.22, 'GLN': -1.22, 'SER': -0.80, 'THR': -0.80, 'TYR': -0.80, 'TRP': -0.79, 'ALA': -0.37, 'PHE': -0.37, 'GLY': -0.37, 'ILE': -0.37, 'VAL': -0.37, 'MET': -0.37, 'PRO': 0.0, 'LEU': -0.37, 'GLU': -1.37, 'ASP': -1.37, 'LYS': -0.36, 'ARG': -1.65} self.residue_names = {'CYS': 0, 'HIS': 1, 'ASN': 2, 'GLN': 3, 'SER': 4, 'THR': 5, 'TYR': 6, 'TRP': 7, 'ALA': 8, 'PHE': 9, 'GLY': 10, 'ILE': 11, 'VAL': 12, 'MET': 13, 'PRO': 14, 'LEU': 15, 'GLU': 16, 'ASP': 17, 'LYS': 18, 'ARG': 20} self.residue_polarity = {'CYS': 'polar', 'HIS': 'polar', 'ASN': 'polar', 'GLN': 'polar', 'SER': 'polar', 'THR': 'polar', 'TYR': 'polar', 'TRP': 'polar', 'ALA': 'apolar', 'PHE': 'apolar', 'GLY': 'apolar', 'ILE': 'apolar', 'VAL': 'apolar', 'MET': 'apolar', 'PRO': 'apolar', 'LEU': 'apolar', 'GLU': 'charged', 'ASP': 'charged', 'LYS': 'charged', 'ARG': 'charged'} self.pssm_pos = {'CYS': 4, 'HIS': 8, 'ASN': 2, 'GLN': 5, 'SER': 15, 'THR': 16, 'TYR': 18, 'TRP': 17, 'ALA': 0, 'PHE': 13, 'GLY': 7, 'ILE': 9, 'VAL': 19, 'MET': 12, 'PRO': 14, 'LEU': 10, 'GLU': 6, 'ASP': 3, 'LYS': 11, 'ARG': 1} self.polarity_encoding = { 'apolar': 0, 'polar': -1, 'charged': 1} self.edge_polarity_encoding, iencod = {}, 0 for k1, v1 in self.polarity_encoding.items(): for k2, v2 in self.polarity_encoding.items(): key = tuple(np.sort([v1, v2])) if key not in self.edge_polarity_encoding: self.edge_polarity_encoding[key] = iencod iencod += 1 # check if external execs are installed self.check_execs() # create the sqldb db = interface(self.pdb) # get the graphs t0 = time() self.get_graph(db) print('Graph %f' % (time()-t0)) # get the nodes/edge attributes t0 = time() self.get_node_features(db) print('Node %f' % (time()-t0)) t0 = time() self.get_edge_features() print('Edge %f' % (time()-t0)) # close the db db._close()