def pfam_annotate(data): data["PFAM_domains"] = "-" data["Link_to_pfam"] = "-" for index, row in enumerate(data.iterrows()): pfam_domains = [] uniprot_id = row[1][18].split(",") aa_pos = int(row[1][12]) for seq in uniprot_id: if seq == "-": continue else: pfam_url = "https://pfam.xfam.org/protein/" full_url = os.path.join(pfam_url, seq) try: f = prody.searchPfam(seq) for i in f.items(): start_pos = int(i[1]["locations"][0]["start"]) end_pos = int(i[1]["locations"][0]["end"]) if aa_pos >= start_pos and aa_pos <= end_pos: pfam_domains.append(str(i[1]["id"])+":"+\ str(start_pos)+"-"+str(end_pos)) except Exception: continue if len(pfam_domains) == 0: continue else: data.loc[index, "PFAM_domains"] = ",".join(pfam_domains) data.loc[index, "Link_to_pfam"] = full_url return data
def _searchPfam(self, refresh=False, **kwargs): assert type(refresh) is bool if refresh is True or self.Pfam is None: try: self.Pfam = searchPfam(self.uniq_acc, **kwargs) except: self.Pfam = {} raise return self.Pfam
def evol_search(query, **kwargs): import prody from os.path import join, split pfam_results = prody.searchPfam(query, **kwargs) if pfam_results is None: return outname = kwargs.get('outname', None) delimiter = kwargs.get('delimiter', '\t') if outname: folder, outname = split(outname) filepath = join(prody.utilities.makePath(folder), outname) out = open(filepath, 'wb') else: from sys import stdout as out title = delimiter.join(['acc', 'id', 'type', 'e-value']) + '\n' out.write(title) for key in pfam_results: val = pfam_results[key] evalue = '' for i, location in enumerate(val.get('locations', [])): temp = location.get('evalue', None) if temp: if i == 0: evalue = float(temp) else: if float(temp) < evalue: evalue = float(temp) output = delimiter.join([ val.get('accession', ' '), val.get('id', ' '), val.get('type', ' '), str(evalue) ]) + '\n' out.write(output) if outname: prody.LOGGER.info('Search results written in {0}.'.format(filepath)) out.close()
def evol_search(query, **kwargs): import prody from os.path import join, split pfam_results = prody.searchPfam(query, **kwargs) if pfam_results is None: return outname = kwargs.get('outname', None) delimiter = kwargs.get('delimiter', '\t') if outname: folder, outname = split(outname) filepath = join(prody.utilities.makePath(folder), outname) out = open(filepath, 'wb') else: from sys import stdout as out title = delimiter.join(['acc', 'id', 'type', 'e-value']) + '\n' out.write(title) for key in pfam_results: val = pfam_results[key] evalue = '' for i, location in enumerate(val.get('locations', [])): temp = location.get('evalue', None) if temp: if i==0: evalue = float(temp) else: if float(temp) < evalue: evalue = float(temp) output = delimiter.join([val.get('accession', ' '), val.get('id', ' '), val.get('type', ' '), str(evalue)]) + '\n' out.write(output) if outname: prody.LOGGER.info('Search results written in {0}.'.format(filepath)) out.close()
def evol_search(query, **kwargs): import prody from os.path import join, split pfam_results = prody.searchPfam(query, **kwargs) if pfam_results is None: return outname = kwargs.get("outname", None) delimiter = kwargs.get("delimiter", "\t") if outname: folder, outname = split(outname) filepath = join(prody.utilities.makePath(folder), outname) out = open(filepath, "wb") else: from sys import stdout as out title = delimiter.join(["acc", "id", "type", "e-value"]) + "\n" out.write(title) for key in pfam_results: val = pfam_results[key] evalue = "" for i, location in enumerate(val.get("locations", [])): temp = location.get("evalue", None) if temp: if i == 0: evalue = float(temp) else: if float(temp) < evalue: evalue = float(temp) output = ( delimiter.join([val.get("accession", " "), val.get("id", " "), val.get("type", " "), str(evalue)]) + "\n" ) out.write(output) if outname: prody.LOGGER.info("Search results written in {0}.".format(filepath)) out.close()
if (len(uniprot_code)): ### get the uniprot canonical sequence from the database (full kinase sequence) raw_sequence = o3i.getCanonicalSequenceFromUniprotCode(uniprot_code) uniprot_sequence = Seq(raw_sequence, generic_protein) seq_id = o3i.getCanonicalSeqIdFromUniprotCode(uniprot_code) # get a list of start and end positions for a given annotation on the sequence # if none is returned, then the annotation is not found for this particular sequence cur_domain_idx = 1 #first let's get all domain annotations from PFAM, if available # #print(uniprot_code) try: pfam_annotations = prody.searchPfam(uniprot_code) except Exception: continue pfam_families = pfam_annotations.keys() #for pfam_family in pfam_families: # if pfam_family in expected_pfam_families: # print(pfam_annotations[pfam_family]) for family in expected_pfam_families: if (family in pfam_annotations): subSequence = "" if (len(pfam_annotations[family]["locations"]) > 1): # print(uniprot_code,pfam_annotations[family]["locations"]) for loc_idx, location in enumerate( pfam_annotations[family]["locations"]):