def pp_from_pdb_id(self, pdb_id): """Gathers data from a single PDB pdb_id.""" pdb_struct = parsePDB(pdb_id) reference = parsePDBHeader(pdb_id, 'reference') pmid = int(reference['pmid']) if 'pmid' in reference else 0 pdb_mapping.map_atom_group(pdb_struct) up_interactions = self.map_interactions( pdb_id, self.get_interactions_list(pdb_struct)) uids = set(pdb_struct.getData('uids')) if None in uids: uids.remove(None) uid2data = self.get_up_data(uids) uid2struct = pdb_mapping.extract_secondary_structures(pdb_id, uid2data) # Protein interactions. for u1c1, u2c2 in up_interactions: u1, c1 = u1c1 u2, c2 = u2c2 p1 = uid2data[u1] p2 = uid2data[u2] p1_struct = uid2struct[u1] p2_struct = uid2struct[u2] if p1['length'] < MIN_PROTEIN_LEN or p2['length'] < MIN_PROTEIN_LEN: continue pairs = list(up_interactions[(u1c1, u2c2)]) yield { 'pdb_id': pdb_id, 'p1_uni_id': p1['accession'], 'p2_uni_id': p2['accession'], 'p1_len': p1['length'], 'p2_len': p2['length'], 'p1_seq': p1['sequence'], 'p2_seq': p2['sequence'], 'p1_struct': p1_struct, 'p2_struct': p2_struct, 'organism': p2['organism'], 'pmid': pmid, 'interaction_type': 'protein-protein', 'p1_pfam': p1['pfam'], 'p2_pfam': p2['pfam'], 'p1_pdb_chain': c1, 'p2_pdb_chain': c2, 'interacting_residues': pairs }
def pl_from_pdb_id(self, pdb_id): pdb_struct = parsePDB(pdb_id) reference = parsePDBHeader(pdb_id, 'reference') pmid = int(reference['pmid']) if 'pmid' in reference else 0 pdb_mapping.map_atom_group(pdb_struct) up_interactions = self.map_interactions( pdb_id, self.get_interactions_list(pdb_struct)) uids = set(pdb_struct.getData('uids')) if None in uids: uids.remove(None) uid2data = self.get_up_data(uids) uid2struct = pdb_mapping.extract_secondary_structures(pdb_id, uid2data) # Ligands interactions. chemicals = {} for c in parsePDBHeader(pdb_id, 'chemicals'): if not c.resname in chemicals: chemicals[c.resname] = c for chemical in chemicals.itervalues(): ligand_interactions = self.get_ligand_interactions( pdb_id, chemical.resname, pdb_struct) for uid in ligand_interactions: if not uid in uid2data: uid2data[uid] = self.up_parser.get_uniprot_entry(uid) p = uid2data[uid] p_struct = uid2struct[uid] yield { 'ligand_name': chemical.name, 'ligand_formula': chemical.formula, 'pdb_id': pdb_id, 'p_uni_id': p['accession'], 'p_len': p['length'], 'p_seq': p['sequence'], 'p_struct': p_struct, 'p_pfam': p['pfam'], 'organism': p['organism'], 'pmid': pmid, 'interaction_type': 'protein-ligand', 'interacting_residues': list(ligand_interactions[uid]) }
def ptm_from_pdb_id(self, pdb_id): pdb_struct = parsePDB(pdb_id) reference = parsePDBHeader(pdb_id, 'reference') pmid = int(reference['pmid']) if 'pmid' in reference else 0 pdb_mapping.map_atom_group(pdb_struct) up_interactions = self.map_interactions( pdb_id, self.get_interactions_list(pdb_struct)) uids = set(pdb_struct.getData('uids')) if None in uids: uids.remove(None) uid2data = self.get_up_data(uids) uid2struct = pdb_mapping.extract_secondary_structures(pdb_id, uid2data) # PTMs. for u, p in uid2data.iteritems(): ptm_residues = {} for ptm in p['ptm_features']: if not ptm[0] in ptm_residues: ptm_residues[ptm[0]] = [] ptm_residues[ptm[0]].extend( range(int(ptm[2][0]), int(ptm[2][1]) + 1)) p_struct = uid2struct[u] for ptm in ptm_residues: yield { 'ptm_type': ptm, 'pdb_id': pdb_id, 'p_uni_id': p['accession'], 'p_len': p['length'], 'p_seq': p['sequence'], 'p_struct': p_struct, 'p_pfam': p['pfam'], 'organism': p['organism'], 'pmid': pmid, 'interaction_type': 'ptm', 'affected_residues': ptm_residues[ptm] }