def run(self): files = os.listdir(self.path) for f in files: try: with open(os.sep.join([self.path, f]), 'r') as yf: y = yaml.load(yf) ps = PdbStateIdentifier(Structure.objects.get(pdb_code__index=y['pdb'])) ps.run() print(f, y['distance'], y['state'], round(float(ps.activation_value), 2), ps.state) y['distance'] = round(float(ps.activation_value), 2) with open(os.sep.join([self.path, f]), 'w') as syf: yaml.dump(y, syf, indent=4, default_flow_style=False) except Exception as msg: print(f, msg)
def handle(self, *args, **options): try: s = Structure.objects.get(pdb_code__index=options['s']) except: s = StructureModel.objects.get(protein__entry_name=options['s'], state__slug=options['state']) if options['gns']: if s.protein_conformation.protein.family.slug.startswith('002') or s.protein_conformation.protein.family.slug.startswith('003'): tm2_gn, tm6_gn, tm3_gn, tm7_gn = '2x41', '6x33', '3x44', '7x51' else: tm2_gn, tm6_gn, tm3_gn, tm7_gn = '2x41', '6x38', '3x44', '7x52' for value in options['gns']: if value.startswith('2'): tm2_gn = value elif value.startswith('6'): tm6_gn = value elif value.startswith('3'): tm3_gn = value elif value.startswith('7'): tm7_gn = value if options['cutoffs']: psi = PdbStateIdentifier(s, tm2_gn=tm2_gn, tm6_gn=tm6_gn, tm3_gn=tm3_gn, tm7_gn=tm7_gn, inactive_cutoff=float(options['cutoffs'][0]), intermediate_cutoff=float(options['cutoffs'][1]), ) else: psi = PdbStateIdentifier(s, tm2_gn=tm2_gn, tm6_gn=tm6_gn, tm3_gn=tm3_gn, tm7_gn=tm7_gn) else: psi = PdbStateIdentifier(s) psi.run() print(options['s'], psi.activation_value, psi.state)
def run(self): strs = Structure.objects.filter(refined=False).exclude( protein_conformation__protein__parent__family__parent__parent__parent__slug__in =['001', '004', '005']) self.match, self.mismatch, self.exceptions = 0, 0, 0 for s in strs: try: if self.only_xtals: psis = PdbStateIdentifier(s, self.tm2_gn, self.tm6_gn, self.tm3_gn, self.tm7_gn, self.inact_cutoff, self.inter_cutoff) psis.run() if psis.state != s.state: print(s, s.state, s.distance, psis.state, psis.activation_value, 'mismatch') self.mismatch += 1 else: print(s, ",", s.state, ',', psis.state, psis.activation_value, self.inact_cutoff, self.inter_cutoff) self.match += 1 else: r_s = Structure.objects.get( pdb_code__index=s.pdb_code.index + '_refined') psi = PdbStateIdentifier(r_s, self.tm2_gn, self.tm6_gn, self.tm3_gn, self.tm7_gn, self.inact_cutoff, self.inter_cutoff) psi.run() psis = PdbStateIdentifier(s, self.tm2_gn, self.tm6_gn, self.tm3_gn, self.tm7_gn, self.inact_cutoff, self.inter_cutoff) psis.run() if psi.state != psis.state: print(s, psis.state.slug, psis.activation_value, psi.state.slug, psi.activation_value) self.mismatch += 1 else: self.match += 1 except: print('Exception: ', s) self.exceptions += 1 if not self.only_xtals: hommods = StructureModel.objects.all().exclude( protein__family__parent__parent__parent__slug__in=[ '001', '004', '005' ]) for h in hommods: try: psih = PdbStateIdentifier(h, self.tm2_gn, self.tm6_gn, self.tm3_gn, self.tm7_gn, self.inact_cutoff, self.inter_cutoff) psih.run() psiss = PdbStateIdentifier(h.main_template, self.tm2_gn, self.tm6_gn, self.tm3_gn, self.tm7_gn, self.inact_cutoff, self.inter_cutoff) psiss.run() if psih.state != psiss.state: print(h, psiss.state.slug, psiss.activation_value, psih.state.slug, psih.activation_value) self.mismatch += 1 else: self.match += 1 except: print('Exception hommod:', h) self.exceptions += 1 print('match:', self.match, 'mismatch:', self.mismatch, 'exceptions:', self.exceptions) print('{}-{}-{}-{},{},{},{},{},{}'.format( self.tm2_gn, self.tm6_gn, self.tm3_gn, self.tm7_gn, self.mismatch, self.match, self.exceptions, self.inact_cutoff, self.inter_cutoff)) return 0
def run(self): strs = Structure.objects.filter(refined=False).exclude(protein_conformation__protein__parent__family__parent__parent__parent__slug__in=['001','004','005']) self.match, self.mismatch, self.exceptions = 0,0,0 for s in strs: try: if self.only_xtals: psis = PdbStateIdentifier(s, self.tm2_gn, self.tm6_gn, self.tm3_gn, self.tm7_gn, self.inact_cutoff, self.inter_cutoff) psis.run() if psis.state!=s.state: print(s, s.state, s.distance, psis.state, psis.activation_value, 'mismatch') self.mismatch+=1 else: print(s,",",s.state,',', psis.state, psis.activation_value, self.inact_cutoff, self.inter_cutoff) self.match+=1 else: r_s = Structure.objects.get(pdb_code__index=s.pdb_code.index+'_refined') psi = PdbStateIdentifier(r_s, self.tm2_gn, self.tm6_gn, self.tm3_gn, self.tm7_gn, self.inact_cutoff, self.inter_cutoff) psi.run() psis = PdbStateIdentifier(s, self.tm2_gn, self.tm6_gn, self.tm3_gn, self.tm7_gn, self.inact_cutoff, self.inter_cutoff) psis.run() if psi.state!=psis.state: print(s, psis.state.slug, psis.activation_value, psi.state.slug, psi.activation_value) self.mismatch+=1 else: self.match+=1 except: print('Exception: ', s) self.exceptions+=1 if not self.only_xtals: hommods = StructureModel.objects.all().exclude(protein__family__parent__parent__parent__slug__in=['001','004','005']) for h in hommods: try: psih = PdbStateIdentifier(h, self.tm2_gn, self.tm6_gn, self.tm3_gn, self.tm7_gn, self.inact_cutoff, self.inter_cutoff) psih.run() psiss = PdbStateIdentifier(h.main_template, self.tm2_gn, self.tm6_gn, self.tm3_gn, self.tm7_gn, self.inact_cutoff, self.inter_cutoff) psiss.run() if psih.state!=psiss.state: print(h, psiss.state.slug, psiss.activation_value, psih.state.slug, psih.activation_value) self.mismatch+=1 else: self.match+=1 except: print('Exception hommod:', h) self.exceptions+=1 print('match:', self.match, 'mismatch:', self.mismatch, 'exceptions:', self.exceptions) print('{}-{}-{}-{},{},{},{},{},{}'.format(self.tm2_gn, self.tm6_gn, self.tm3_gn, self.tm7_gn, self.mismatch, self.match, self.exceptions, self.inact_cutoff, self.inter_cutoff)) return 0
def new_xtals(self, uniprot): ''' List GPCR crystal structures missing from GPCRdb and the yaml files. Adds missing structures to DB. ''' structs = self.pdb_request_by_uniprot(uniprot) try: protein = Protein.objects.get(accession=uniprot) except: protein = None try: x50s = Residue.objects.filter(protein_conformation__protein=protein,generic_number__label__in=['1x50','2x50','3x50','4x50','5x50','6x50','7x50']) except: x50s = None if structs!=['null']: for s in structs: missing_from_db, missing_yaml = False, False try: st_obj = Structure.objects.get(pdb_code__index=s) except: if s not in self.exceptions: check = self.pdb_request_by_pdb(s) if check==1: self.db_list.append(s) missing_from_db = True if s not in self.yamls and s not in self.exceptions: if s not in self.db_list: check = self.pdb_request_by_pdb(s) else: check = 1 if check==1: self.yaml_list.append(s) missing_yaml = True if not missing_from_db: continue try: pdb_data_dict = fetch_pdb_info(s, protein, new_xtal=True) exp_method = pdb_data_dict['experimental_method'] if exp_method=='Electron Microscopy': st_type = StructureType.objects.get(slug='electron-microscopy') elif exp_method=='X-ray diffraction': st_type = StructureType.objects.get(slug='x-ray-diffraction') if 'deletions' in pdb_data_dict: for d in pdb_data_dict['deletions']: presentx50s = [] for x in x50s: if not d['start']<x.sequence_number<d['end']: presentx50s.append(x) # Filter out ones without all 7 x50 positions present in the xtal if len(presentx50s)!=7: try: del self.db_list[self.db_list.index(s)] missing_from_db = False del self.yaml_list[self.yaml_list.index(s)] except: pass else: print('Warning: no deletions in pdb info, check {}'.format(s)) continue if missing_from_db: pref_chain = '' resolution = pdb_data_dict['resolution'] pdb_code, created = WebLink.objects.get_or_create(index=s, web_resource=WebResource.objects.get(slug='pdb')) pdbl = PDB.PDBList() pdbl.retrieve_pdb_file(s, pdir='./', file_format="pdb") with open('./pdb{}.ent'.format(s).lower(),'r') as f: lines = f.readlines() pdb_file = '' publication_date, pubmed, doi = '','','' state = ProteinState.objects.get(slug='inactive') new_prot, created = Protein.objects.get_or_create(entry_name=s.lower(), accession=None, name=s.lower(), sequence=pdb_data_dict['wt_seq'], family=protein.family, parent=protein, residue_numbering_scheme=protein.residue_numbering_scheme, sequence_type=ProteinSequenceType.objects.get(slug='mod'), source=ProteinSource.objects.get(name='OTHER'), species=protein.species) new_prot_conf, created = ProteinConformation.objects.get_or_create(protein=new_prot, state=state, template_structure=None) for line in lines: if line.startswith('REVDAT 1'): publication_date = line[13:22] if line.startswith('JRNL PMID'): pubmed = line[19:].strip() if line.startswith('JRNL DOI'): doi = line[19:].strip() pdb_file+=line pdb_data, created = PdbData.objects.get_or_create(pdb=pdb_file) d = datetime.strptime(publication_date,'%d-%b-%y') publication_date = d.strftime('%Y-%m-%d') try: if doi!='': try: publication = Publication.objects.get(web_link__index=doi) except Publication.DoesNotExist as e: p = Publication() try: p.web_link = WebLink.objects.get(index=doi, web_resource__slug='doi') except WebLink.DoesNotExist: wl = WebLink.objects.create(index=doi, web_resource = WebResource.objects.get(slug='doi')) p.web_link = wl p.update_from_doi(doi=doi) p.save() publication = p elif pubmed!='': try: publication = Publication.objects.get(web_link__index=pubmed) except Publication.DoesNotExist as e: p = Publication() try: p.web_link = WebLink.objects.get(index=pubmed, web_resource__slug='pubmed') except WebLink.DoesNotExist: wl = WebLink.objects.create(index=pubmed, web_resource = WebResource.objects.get(slug='pubmed')) p.web_link = wl p.update_from_pubmed_data(index=pubmed) p.save() publication = p except: pass pcs = PdbChainSelector(s, protein) pcs.run_dssp() preferred_chain = pcs.select_chain() # Run state identification # Create yaml files with open('../../data/protwis/gpcr/structure_data/constructs/{}.yaml'.format(pdb_code.index), 'w') as construct_file: yaml.dump({'name': pdb_code.index.lower(), 'protein': protein.entry_name}, construct_file, indent=4) with open('../../data/protwis/gpcr/structure_data/structures/{}.yaml'.format(pdb_code.index), 'w') as structure_file: struct_yaml_dict = {'construct': pdb_code.index.lower(), 'pdb': pdb_code.index, 'preferred_chain': preferred_chain, 'auxiliary_protein': '', 'ligand': {'name': 'None', 'pubchemId': 'None', 'title': 'None', 'role': '.nan', 'type': 'None'}, 'signaling_protein': 'None', 'state': 'Inactive'} auxiliary_proteins, ligands = [], [] if pdb_data_dict['ligands']!='None': for key, values in pdb_data_dict['ligands'].items(): if key in ['SO4','NA','CLR','OLA','OLB','OLC','TAR','NAG','EPE','BU1','ACM','GOL','PEG','PO4','TLA','BOG','CIT','PLM','BMA','MAN','MLI','PGE']: continue else: ligands.append({'name': key, 'pubchemId': 'None', 'title': pdb_data_dict['ligands'][key]['comp_name'], 'role': '.nan', 'type': 'None'}) for key, values in pdb_data_dict['auxiliary'].items(): if pdb_data_dict['auxiliary'][key]['subtype'] in ['Expression tag', 'Linker']: continue else: auxiliary_proteins.append(pdb_data_dict['auxiliary'][key]['subtype']) for key, values in pdb_data_dict['construct_sequences'].items(): if key!=protein.entry_name and key not in struct_yaml_dict['auxiliary_protein']: if 'arrestin' in key: struct_yaml_dict['signaling_protein'] = key if len(auxiliary_proteins)>1: struct_yaml_dict['auxiliary_protein'] = ', '.join(auxiliary_proteins) if len(ligands)>1: struct_yaml_dict['ligand'] = ligands yaml.dump(struct_yaml_dict, structure_file, indent=4, default_flow_style=False) # Build residue table for structure build_structure_command = shlex.split('/env/bin/python3 manage.py build_structures -f {}.yaml'.format(pdb_code.index)) subprocess.call(build_structure_command) # Check state struct = Structure.objects.get(pdb_code__index=pdb_code.index) pi = PdbStateIdentifier(struct) pi.run() if pi.state!=None: Structure.objects.filter(pdb_code__index=pdb_code.index).update(state=pi.state) print(pi.state, pi.activation_value) with open('../../data/protwis/gpcr/structure_data/structures/{}.yaml'.format(pdb_code.index), 'r') as yf: struct_yaml = yaml.load(yf) struct_yaml['state'] = pi.state.name try: struct_yaml['distance'] = round(float(pi.activation_value), 2) except: struct_yaml['distance'] = None with open('../../data/protwis/gpcr/structure_data/structures/{}.yaml'.format(pdb_code.index), 'w') as struct_yaml_file: yaml.dump(struct_yaml, struct_yaml_file, indent=4, default_flow_style=False) # Check sodium pocket new_prot_conf.sodium_pocket() print('{} added to db (preferred_chain chain: {})'.format(s, preferred_chain)) except Exception as msg: print(s, msg)
def handle(self, *args, **options): psi = PdbStateIdentifier(options['s']) psi.run() print(options['s'], psi.activation_value, psi.state)
def new_xtals(self, uniprot): ''' List GPCR crystal structures missing from GPCRdb and the yaml files. Adds missing structures to DB. ''' structs = self.pdb_request_by_uniprot(uniprot) try: protein = Protein.objects.get(accession=uniprot) except: protein = None try: x50s = Residue.objects.filter(protein_conformation__protein=protein,generic_number__label__in=['1x50','2x50','3x50','4x50','5x50','6x50','7x50']) except: x50s = None if structs!=['null']: for s in structs: missing_from_db, missing_yaml = False, False try: st_obj = Structure.objects.get(pdb_code__index=s) except: if s not in self.exceptions: check = self.pdb_request_by_pdb(s) if check==1: self.db_list.append(s) missing_from_db = True if s not in self.yamls and s not in self.exceptions: if s not in self.db_list: check = self.pdb_request_by_pdb(s) else: check = 1 if check==1: self.yaml_list.append(s) missing_yaml = True if not missing_from_db: continue # try: pdb_data_dict = fetch_pdb_info(s, protein, new_xtal=True) exp_method = pdb_data_dict['experimental_method'] if exp_method=='Electron Microscopy': st_type = StructureType.objects.get(slug='electron-microscopy') elif exp_method=='X-ray diffraction': st_type = StructureType.objects.get(slug='x-ray-diffraction') if 'deletions' in pdb_data_dict: for d in pdb_data_dict['deletions']: presentx50s = [] for x in x50s: if not d['start']<x.sequence_number<d['end']: presentx50s.append(x) # Filter out ones without all 7 x50 positions present in the xtal if len(presentx50s)!=7: try: del self.db_list[self.db_list.index(s)] missing_from_db = False del self.yaml_list[self.yaml_list.index(s)] except: pass else: print('Warning: no deletions in pdb info, check {}'.format(s)) continue if missing_from_db: pref_chain = '' resolution = pdb_data_dict['resolution'] pdb_code, created = WebLink.objects.get_or_create(index=s, web_resource=WebResource.objects.get(slug='pdb')) pdbl = PDB.PDBList() pdbl.retrieve_pdb_file(s, pdir='./', file_format="pdb") with open('./pdb{}.ent'.format(s).lower(),'r') as f: lines = f.readlines() pdb_file = '' publication_date, pubmed, doi = '','','' state = ProteinState.objects.get(slug='inactive') new_prot, created = Protein.objects.get_or_create(entry_name=s.lower(), accession=None, name=s.lower(), sequence=pdb_data_dict['wt_seq'], family=protein.family, parent=protein, residue_numbering_scheme=protein.residue_numbering_scheme, sequence_type=ProteinSequenceType.objects.get(slug='mod'), source=ProteinSource.objects.get(name='OTHER'), species=protein.species) new_prot_conf, created = ProteinConformation.objects.get_or_create(protein=new_prot, state=state, template_structure=None) for line in lines: if line.startswith('REVDAT 1'): publication_date = line[13:22] if line.startswith('JRNL PMID'): pubmed = line[19:].strip() if line.startswith('JRNL DOI'): doi = line[19:].strip() pdb_file+=line pdb_data, created = PdbData.objects.get_or_create(pdb=pdb_file) d = datetime.strptime(publication_date,'%d-%b-%y') publication_date = d.strftime('%Y-%m-%d') try: if doi!='': try: publication = Publication.objects.get(web_link__index=doi) except Publication.DoesNotExist as e: p = Publication() try: p.web_link = WebLink.objects.get(index=doi, web_resource__slug='doi') except WebLink.DoesNotExist: wl = WebLink.objects.create(index=doi, web_resource = WebResource.objects.get(slug='doi')) p.web_link = wl p.update_from_doi(doi=doi) p.save() publication = p elif pubmed!='': try: publication = Publication.objects.get(web_link__index=pubmed) except Publication.DoesNotExist as e: p = Publication() try: p.web_link = WebLink.objects.get(index=pubmed, web_resource__slug='pubmed') except WebLink.DoesNotExist: wl = WebLink.objects.create(index=pubmed, web_resource = WebResource.objects.get(slug='pubmed')) p.web_link = wl p.update_from_pubmed_data(index=pubmed) p.save() publication = p except: pass pcs = PdbChainSelector(s, protein) pcs.run_dssp() preferred_chain = pcs.select_chain() # Run state identification # Create yaml files with open(os.sep.join([settings.DATA_DIR, 'structure_data','constructs', '{}.yaml'.format(pdb_code.index)]), 'w') as construct_file: yaml.dump({'name': pdb_code.index.lower(), 'protein': protein.entry_name}, construct_file, indent=4) with open(os.sep.join([settings.DATA_DIR, 'structure_data','structures','{}.yaml'.format(pdb_code.index)]), 'w') as structure_file: struct_yaml_dict = {'construct': pdb_code.index.lower(), 'pdb': pdb_code.index, 'preferred_chain': preferred_chain, 'auxiliary_protein': '', 'ligand': {'name': 'None', 'pubchemId': 'None', 'title': 'None', 'role': '.nan', 'type': 'None'}, 'signaling_protein': 'None', 'state': 'Inactive'} auxiliary_proteins, ligands = [], [] if pdb_data_dict['ligands']!='None': for key, values in pdb_data_dict['ligands'].items(): if key in ['SO4','NA','CLR','OLA','OLB','OLC','TAR','NAG','EPE','BU1','ACM','GOL','PEG','PO4','TLA','BOG','CIT','PLM','BMA','MAN','MLI','PGE','SIN','PGO','MES','ZN','NO3','NI','MG','PG4']: continue else: ligands.append({'name': key, 'pubchemId': 'None', 'title': pdb_data_dict['ligands'][key]['comp_name'], 'role': '.nan', 'type': 'None'}) for key, values in pdb_data_dict['auxiliary'].items(): if pdb_data_dict['auxiliary'][key]['subtype'] in ['Expression tag', 'Linker']: continue else: auxiliary_proteins.append(pdb_data_dict['auxiliary'][key]['subtype']) for key, values in pdb_data_dict['construct_sequences'].items(): if key!=protein.entry_name and key not in struct_yaml_dict['auxiliary_protein']: if 'arrestin' in key: struct_yaml_dict['signaling_protein'] = key if len(auxiliary_proteins)>1: struct_yaml_dict['auxiliary_protein'] = ', '.join(auxiliary_proteins) if len(ligands)>1: struct_yaml_dict['ligand'] = ligands yaml.dump(struct_yaml_dict, structure_file, indent=4, default_flow_style=False) # Build residue table for structure build_structure_command = shlex.split('/env/bin/python3 manage.py build_structures -f {}.yaml'.format(pdb_code.index)) subprocess.call(build_structure_command) # Check state struct = Structure.objects.get(pdb_code__index=pdb_code.index) pi = PdbStateIdentifier(struct) pi.run() if pi.state!=None: Structure.objects.filter(pdb_code__index=pdb_code.index).update(state=pi.state) print(pi.state, pi.activation_value) with open('../../data/protwis/gpcr/structure_data/structures/{}.yaml'.format(pdb_code.index), 'r') as yf: struct_yaml = yaml.load(yf) struct_yaml['state'] = pi.state.name try: struct_yaml['distance'] = round(float(pi.activation_value), 2) except: struct_yaml['distance'] = None with open('../../data/protwis/gpcr/structure_data/structures/{}.yaml'.format(pdb_code.index), 'w') as struct_yaml_file: yaml.dump(struct_yaml, struct_yaml_file, indent=4, default_flow_style=False) # Check sodium pocket new_prot_conf.sodium_pocket() print('{} added to db (preferred_chain chain: {})'.format(s, preferred_chain))