def download_aa_dist_per_gene(UPID_list_fname, cutoff): UPID_list = [] for row in open(UPID_list_fname, 'r'): if row: UPID_list.append(row[48:54]) if cutoff > 0: UPID_list = UPID_list[0:min(cutoff, len(UPID_list))] # a dictionary containing the aa_dist for each uniprot ID UPID_to_aa_dist = {} for i, UPID in enumerate(UPID_list): print i, "\t", UPID # initialize a dictionary for amino acids frequency in each protein aa_dist = dict([(aa, 0) for aa in AA_LETTERS]) # call for aa sequence for each uniprot from swiss prot - biopython tool handle = ExPASy.get_sprot_raw(UPID) seq_record = SeqIO.read(handle, "swiss") # count frequency for each aa in each UPID # update aa_frequency in aa_dict - to avoid bugs where for example an aa seq from # swiss prot may contain weired letters such as 'X' for aa in list(seq_record): if aa in AA_LETTERS: aa_dist[aa] += 1 UPID_to_aa_dist[UPID] = np.array([aa_dist[aa] for aa in AA_LETTERS]) return UPID_to_aa_dist
def get_SwissProt(dict,accession): try: handle = ExPASy.get_sprot_raw(accession) record = SwissProt.read(handle) dict[accession] = record except urllib2.HTTPError, error: print accession + ": protein not found on UniProt . "
def main(): # Read the UniProt ID for a txt file. with open('problem_datasets/rosalind_dbpr.txt', 'r') as infile: uni_id = infile.read().strip() # Retrieve the data from UniProt (separated IDs by commas). raw_data = ExPASy.get_sprot_raw(uni_id) record = SwissProt.read( raw_data) # use SwissProt.parse for multiple proteins # Collect the relevant information. go = [] for i in record.cross_references: if i[2].startswith('P:'): go.append(i[2][2:]) # Output answer. with open('output/rosalind_dbpr_out.txt', 'w') as outfile: outfile.write('\n'.join(go)) # Optional: Print answer and gene ID/name name = record.gene_name.split(' ')[0][5:] print('Gene:\n', name, ' (UniProt ID = ', uni_id, ')\n\nBiological Processes:\n', '\n'.join(go), sep='')
def download_sequences(accessions): records = {} for accession in accessions: handle = ExPASy.get_sprot_raw(accession) record = SwissProt.read(handle) records[accession] = record.sequence return records
def download_from_swissprot(id_file, output_file, rettype="swiss", save_format="swiss"): """ :type id_file: basestring :type output_file: basestring """ cnt = 1 output_file_handle = open(output_file, 'w') with open(id_file, 'r') as f: for line in f: query_id = line.strip() try: handle = ExPASy.get_sprot_raw(query_id) except urllib2.HTTPError as e: log.warning('{0} query failed'.format(query_id)) seq_record = SeqIO.read(handle, rettype) SeqIO.write(seq_record, output_file_handle, save_format) log.info('#{1} Processed {0}'.format(seq_record.id, cnt)) cnt += 1 handle.close() log.info('Total {0} queries done!'.format(cnt - 1))
def access_sequence(accession): handle = ExPASy.get_sprot_raw(accession) try: record = SwissProt.read(handle) except ValueException: print("WARNING: Accession %s not found" % accession) return record.sequence
def main(input_string): record = SwissProt.read(ExPASy.get_sprot_raw(input_string)) for ref in record.cross_references: if ref[0] == 'GO' and ref[2].startswith('P:'): # if reference is a Gene Ontology reference and refers to a # biological process print(ref[2][2:])
def find_COG2(self): """Find records from uniprotIDs without use of keggIDs.""" handle = ExPASy.get_sprot_raw(self.uprotID) record = SwissProt.read(handle) query = record.gene_name.strip("Name""="";") url_open = urllib.urlopen("http://rest.genome.jp/oc/?"+query) return url_open.read()
def main(filename): with open(filename) as fin: my_seq = fin.read().strip() handle = ExPASy.get_sprot_raw(my_seq) record = SwissProt.read(handle) for s in [f[2].split(':')[1] for f in record.cross_references if f[0]=='GO' and f[2][0]=='P']: print s
def parseBlast(): result_handle = open("./output/blastOut.xml") blast_records = NCBIXML.parse(result_handle) E_VALUE_THRESH = 1 blastHits = {} accessions = {} #Loop through each protein query results for blast_record in blast_records: keyword_list = [] #stores running keyword list queryID = blast_record.query.split()[0].split(':')[ 1] #parse for the query protein ID #Loop through the hits associated with particular sequence for alignment in blast_record.alignments: for hsp in alignment.hsps: #Hit must have e-value < threshold to be considered if hsp.expect < E_VALUE_THRESH: title = alignment.title #title of hit splittitle = title.split() raw_protein_title = title.split('OS')[ 0] #specific keywords in title protein_title = " ".join(raw_protein_title.split()[2:]) keyword_list.append(protein_title) accession = splittitle[1].split('|')[ 1] #parse for the accession number accessions.setdefault(queryID, []).append(accession) handle = ExPASy.get_sprot_raw(accession) record = SwissProt.read(handle) keyword_list += record.keywords keyword_string = '; '.join(keyword_list) blastHits[queryID] = keyword_string break #only take top hit for now return (blastHits, accessions)
def sequence_file(*args): '''The function sequence_file save the sequence of the protein in fasta format, to do so the sequence is retrieved and the other necessary information to make the fasta header. We included a try/except chunck to display an Error if the code is invalid''' a = code.get() try: from Bio import ExPASy from Bio import SwissProt with ExPASy.get_sprot_raw(a) as handle: record = SwissProt.read(handle) except: if a == "": open_window("No Code", "Please Insert an Uniprot Code", "#FFC3C3", '200x30') else: open_window("No Valid Code", "Please Insert a valid Uniprot Code", "#FFC3C3", '200x30') descrip = record.description.split(";")[0] num = descrip.find("Full=") + 5 descrip = descrip[num:] fasta_header = ">sp|" + code.get( ) + "|" + record.entry_name + " " + descrip + " OS=" + record.organism filename = filedialog.asksaveasfilename(defaultextension='.fasta', filetypes=[("fasta", "*.fasta")]) TextFile = open(filename, "w") TextFile.write(fasta_header + '\n') TextFile.write(record.sequence) TextFile.close()
def gen_uniprot_features_for_pdb(infile): for line in open(infile,'r'): (pdb_dom, count, uniprot_ids) = line.replace('\n','').split('\t') uniprot_ids = uniprot_ids.split('|') for uniprot_id in uniprot_ids: data = SwissProt.read(ExPASy.get_sprot_raw(uniprot_id)).__dict__ keep = False go = []; interpro = ''; evo_trace = '' for xref in data['cross_references']: if xref[0] == 'GO': go.append(xref[1]) if xref[0] == 'InterPro': interpro = xref[1] if xref[0] == 'EvolutionaryTrace': evo_trace = xref[1] if xref[0] == 'PDB' and xref[1].lower() == pdb_dom.lower(): keep = True if keep == False: continue organism = data['organism'] loc = '' for comment in data['comments']: if comment.startswith('SUBCELLULAR LOCATION'): loc = comment print '%s\t%s\t%s\t%s\t%s\t%s\t%s' %(pdb_dom,uniprot_id,'|'.join(go),interpro,evo_trace,organism,loc)
def get_protein_EC(gene, retry=0): """ Queries Uniprot for a gene entry and extracts the EC, if any. If the gene is successfully queried, but no EC is present, returns None. It's possible that, due to connection problems, a gene that is in Uniprot is not found, so it will try again after a cooldown period. > Input gene : str => the gene code to be queried retry : int => number of tries. Max 10. > Output - EC for GENE, if GENE has one annotated in Uniprot. - None, if GENE doesn't have an EC - Exception, if any exception occurred. Most common exceptions are HTTPError or ValueError. """ rgx = re.compile(r"EC=\d+\.\d+\.\d+\.\d+") try: with ExPASy.get_sprot_raw(gene) as handle: seq_record = SeqIO.read(handle, "swiss") match = rgx.search(seq_record.description) if match is not None: return match.group(0) except Exception as e: if retry < 10: time.sleep(5) # cool down time 5s print("\nGENE NOT FOUND. RETRYING (%d)" % retry) return get_protein_EC(gene, retry + 1) return e except KeyboardInterrupt as k: print("\nKeyBoard Interrupt Signal received. Aborting") return k return None
def get_records(ids): records = [] for id in ids: handle = ExPASy.get_sprot_raw(id) record = SwissProt.read(handle) records.append(record.sequence) return records
def fetch_swp_expasy(uniprot_acc): """ Fetch information on SwissProt accession (manually reviewed UniProt entry). http://biopython.org/DIST/docs/api/Bio.SwissProt.Record-class.html Parameters ---------- arg1 : str SwissProt accession or identifier. Returns ------- list list of length 2 with the name of the attributes found and their values. """ #generates record object with information regarding SwissProt identifier handle = ExPASy.get_sprot_raw(uniprot_acc) record = SwissProt.read(handle) #checks all the attributes possibles for the record object generated and their type #attributes are of type: str, tuple, or list #attribute list found here: http://biopython.org/DIST/docs/api/Bio.SwissProt.Record-class.html attrib_names = [ 'accessions', 'data created', 'date created (ISO)', 'organism', 'gene names', 'description', 'comments', 'keywords' ] swp_info_list = [ record.accessions, record.created[0], dating(record.created[0]), record.organism, record.gene_name, record.description, record.comments, record.keywords ] return (attrib_names, swp_info_list)
def Uniprot_records(self): handle = ExPASy.get_sprot_raw(self.__uniprot_id)#ID do NCBI, para tirar ficheiro xml da Uniprot url = handle.url # url = url.replace('txt','xml') # response = requests.get(url) # with open('Uniprot' + self.__uniprot_id + '.xml','wb') as file: #b para escrever em modo binário file.write(response.content)
def get_SwissProt(dict, accession): try: handle = ExPASy.get_sprot_raw(accession) record = SwissProt.read(handle) dict[accession] = record except urllib2.HTTPError, error: print accession + ": protein not found on UniProt . "
def fetch_genbank(sid): try: handle = ExPASy.get_sprot_raw(sid) seq = SeqIO.read(handle, 'swiss') SeqIO.write(seq, sid + '.genbank', 'genbank') print(sid, 'sequence length', len(seq)) except Exception: print(sid, 'not found')
def get_pro_from_SwissProt(id): '''return protein sequence of id from swiss protein database module used: Bio.ExPASy, SeqIO.read''' handle = ExPASy.get_sprot_raw(id) if handle: pro_record = SeqIO.read(handle, 'swiss') return str(pro_record.seq) return None
def main(argv): # input() reads stdin handle = ExPASy.get_sprot_raw(input().strip()) #you can give several IDs separated by commas record = SwissProt.read(handle) # use SwissProt.parse for multiple proteins # there ought to be a better way to pull GO information from the record! maybe there is... for p in filter(lambda x:x[0]=='GO' and x[2].startswith('P:'),record.cross_references): print(p[2][2:])
def getgo(id): handle = ExPASy.get_sprot_raw(id) record = SwissProt.read(handle) go = [ r[2].split(":")[1] for r in record.cross_references if r[0] == "GO" and r[2].startswith("P") ] print("\n".join(go))
def dbpr(UniProt_ID): handle = ExPASy.get_sprot_raw(UniProt_ID) record = handle.read() bp_patten = r"P:.*; IEA:" bp_res = re.findall(bp_patten, str(record)) bp = [r.replace("P:", "").replace("; IEA:", "") for r in bp_res] handle.close() return bp
def swissprot_search(): f = open('output/seq_accession.txt') db = f.readline() for accession in f: handle = ExPASy.get_sprot_raw(accession) record = SwissProt.read(handle) print(record)
def main(id): handle = ExPASy.get_sprot_raw(id) record = SwissProt.read(handle) for cr in record.cross_references: if cr[0] == "GO": bits = cr[2].split(":") if bits[0] == "P": print bits[1]
def get(self,id): """Open and Read a Swiss-Prot file locally from remote source (ExPASy database) Swiss-Prot file over the internet from the ExPASy database. Input must be a accession number stored on the swissprot site. """ handle = ExPASy.get_sprot_raw(id) record = SwissProt.read(handle) return record
def write_to_file(identifier): handle = ExPASy.get_sprot_raw(identifier) record = SeqIO.read(handle, 'swiss') with open('%s.txt' % identifier, 'w') as file: SeqIO.write(record, file, 'fasta') handle.close()
def MouseHomolog(self, dfs): print('\nFinding mouse homologs') ind = 0 new_dfs = [] for acc in self.accs: try: handle = ExPASy.get_sprot_raw(acc) record = SwissProt.read(handle) name = record.entry_name except: print('\nNo entry for', acc, ',continuing') ind += 1 continue try: mname = name.split('_')[0] + '_MOUSE' mhandle = ExPASy.get_sprot_raw(mname) mrecord = SwissProt.read(mhandle) mseq = mrecord.sequence print(f'\nFound mouse homolog for {name}: {mname}') except: print(f'\nNo mouse gene entry for {acc}-{name}, continuing') ind += 1 continue df = dfs[ind] mcol = [] for row in range(len(df)): pepseq = df.Sequence[df.index[row]] print(pepseq) if str(pepseq) in mseq: mcol.append('True') else: mcol.append('False') df['Mouse'] = mcol new_dfs.append(df) ind += 1 df_final = pd.concat(new_dfs, sort=True) df_final.to_excel(self.out_folder + '/' + 'MouseHomologPeptides.xlsx', index=True)
def main(): with open("dbpr") as f: handle = ExPASy.get_sprot_raw(f.readline().strip()) record = SwissProt.read(handle) record = [x[2] for x in record.cross_references if x[0] == 'GO'] record = [x[2:] for x in record if x[0] == 'P'] sys.stdout = open("dbpr.out","w") print "\n".join(record)
def protfunction(query_proteins): """Shows the proteins function given their names or ids str -> list""" function_list = [] for prot in query_proteins: with ExPASy.get_sprot_raw(prot) as handle: record = SwissProt.read(handle) function_list.append((prot, record.comments[0][10:])) return function_list
def main(argv): line = files.read_line(argv[0]) handle = ExPASy.get_sprot_raw(line) record = SwissProt.read(handle) go = filter(lambda x: x[0] == 'GO' and 'P:' in x[2], record.cross_references) print '\n'.join(g[2].split(':')[1] for g in go)
def print_bio_process(file): Uniprot_id = file.read().rstrip() handle = ExPASy.get_sprot_raw(Uniprot_id) rec = SwissProt.read(handle) bio_process = [ i[2][2:] for i in rec.cross_references if i[0] == 'GO' and i[2].startswith('P') ] print('\n'.join(bio_process))
def test_get_sprot_raw(self): """Bio.ExPASy.get_sprot_raw("O23729")""" identifier = "O23729" handle = ExPASy.get_sprot_raw(identifier) record = SeqIO.read(handle, "swiss") handle.close() self.assertEqual(record.id, identifier) self.assertEqual(len(record), 394) self.assertEqual(seguid(record.seq), "5Y08l+HJRDIlhLKzFEfkcKd1dkM")
def dbpr(): uniprot_id = open("rosalind_dbpr.txt").read().strip() handle = ExPASy.get_sprot_raw(uniprot_id) record = SwissProt.read(handle) # return the list of biological functions for ref in record.cross_references: if ref[0] == 'GO' and ref[2].startswith('P:'): print ref[2][2:]
def __init__(self, seq_id=None, seq_type=None): "sets variables for instance" if seq_type is 'uniprot': handle = ExPASy.get_sprot_raw(seq_id) self.seq_record = SeqIO.read(handle, "swiss") elif seq_type is 'genbank': handle = Entrez.efetch(db='protein', rettype='genbank', id=seq_id) self.seq_record = SeqIO.read(handle, "genbank") handle.close()
def DBPR(id): handle = ExPASy.get_sprot_raw(id) # several IDs can be separated by commas record = SwissProt.read( handle) # use SwissProt.parse for multiple proteins GO = [] for item in record.cross_references: if item[0] == 'GO': if item[2].split(':')[0] == 'P': GO.append(item[2].split(':')[1]) return GO
def find_function(prot): handle = ExPASy.get_sprot_raw(prot) # Can give several IDs separated by commas record = SwissProt.read(handle) # Use SwissProt.parse for multiple proteins functions = [] for ref in record.cross_references: if ref[0] == 'GO' and ref[2][0] == 'P': print(ref) functions.append(ref[2][2:]) return functions
def acession(self): self.rec=[] for ide in self.ids: if ide!='ND': results=ExPASy.get_sprot_raw(ide) rec=SwissProt.read(results) self.rec.append(rec) else: self.rec.append('ND') return self.rec
def BiologicalProcesses(UniProtID): Handle = ExPASy.get_sprot_raw(UniProtID) Record = SwissProt.read(Handle) Processes = [] for i in Record.cross_references: if "GO" in i: for j in i: if re.match("P:.*", j): Processes.append(j[j.rfind(':')+1:]) return "\n".join(Processes)
def get_keywords(lookup): try: handle = ExPASy.get_sprot_raw(lookup) except: print("Error in ExPASy") sys.exit(1) try: record = SwissProt.read(handle) except ValueError, error: print(error) sys.exit(1)
def get_prot(id): with ExPASy.get_sprot_raw(id) as handle: seq_record = SeqIO.read(handle, 'swiss') tam= len(seq_record.seq) seq= seq_record.seq tax= seq_record.annotations["taxonomy"] org= seq_record.annotations["organism"] #host= seq_record.annotations["organism_host"] y = ('ID:' + id + '|' + 'SEQUENCE:' + seq + '|' + 'SEQUENCE LENGTH:' + str(tam) + 'bp' + '|' + 'TAXONOMY:' + str(tax) + '|' + 'ORGANISM:' + org ) return y
def test_get_sprot_raw(self): """Bio.ExPASy.get_sprot_raw("O23729")""" identifier = "O23729" # This is to catch an error page from our proxy: handle = UndoHandle(ExPASy.get_sprot_raw(identifier)) if _as_string(handle.peekline()).startswith("<!DOCTYPE HTML"): raise IOError record = SeqIO.read(handle, "swiss") handle.close() self.assertEqual(record.id, identifier) self.assertEqual(len(record), 394) self.assertEqual(seguid(record.seq), "5Y08l+HJRDIlhLKzFEfkcKd1dkM")
def main(protein_id): handle = ExPASy.get_sprot_raw(protein_id) #you can give several IDs separated by commas record = SwissProt.read(handle) # use SwissProt.parse for multiple proteins answer = "" for r in record.cross_references: print r if r[0] == "GO": if r[2].split(":")[0] == 'P': answer += r[2].split(":")[1] + "\n" return answer.strip()
def get_seq(source, fmt): handle = None if fmt == 'fasta': handle = open(source) elif fmt == 'genbank': hanlde = open(sourc) elif fmt == 'swiss': handle = ExPASy.get_sprot_raw(source) else: raise TypeError('Need to choose correct file format') record_iterator = SeqIO.parse(handle, fmt) #handle.close() return record_iterator
def download_entry(self, accession): try: handle = ExPASy.get_sprot_raw(accession) record = SwissProt.read(handle) except: raise KeyError('{}'.format(accession)) record_org = record.organism.strip().lower() if self.organism not in record_org: print('{} ortholog of {} not found.'.format(self.organism, accession)) raise KeyError('{} ortholog of {} not found.'.format(self.organism, accession)) else: self.records[accession] = record return record
def main(): #Grab our input id value uniprot_id = get_uniprot_id_from_file(arguments['<input>']) #Get a handle on the data for the uniprot id handle = ExPASy.get_sprot_raw(uniprot_id) #Parse our data record = SwissProt.read(handle) handle.close() #Process out the stuff of interest, GO values in this case go_refs = [ref[1:] for ref in record.cross_references if ref[0] == 'GO'] for go_entry in go_refs: pre, val = go_entry[1].split(':') if pre == 'P': print(val)
def main(fichier): """ navigate into protein database """ f = open(fichier,'r') fline = f.readline().strip() from Bio import ExPASy from Bio import SwissProt handle = ExPASy.get_sprot_raw(fline) record = SwissProt.read(handle) go = [] for i in record.cross_references: if i[0] == 'GO' and i[2][0]=='P': go.append(i[2].lstrip('P:')) print '\n'.join(go)
def test_get_sprot_raw(self): """Bio.ExPASy.get_sprot_raw("O23729")""" identifier = "O23729" try: #This is to catch an error page from our proxy: handle = UndoHandle(ExPASy.get_sprot_raw(identifier)) if _as_string(handle.peekline()).startswith("<!DOCTYPE HTML"): raise IOError record = SeqIO.read(handle, "swiss") handle.close() except IOError: raise MissingExternalDependencyError( "internet (or maybe just ExPASy) not available") self.assertEqual(record.id, identifier) self.assertEqual(len(record), 394) self.assertEqual(seguid(record.seq), "5Y08l+HJRDIlhLKzFEfkcKd1dkM")
def __getitem__(self, id): """__getitem__(self, id) -> object Return a SwissProt entry. id is either the id or accession for the entry. Raises a KeyError if there's an error. """ from Bio import ExPASy # First, check to see if enough time has passed since my # last query. self.limiter.wait() try: handle = ExPASy.get_sprot_raw(id) except IOError: raise KeyError(id) if self.parser is not None: return self.parser.parse(handle) return handle.read()
def main(): # Read the UniProt ID for a txt file. with open('problem_datasets/rosalind_dbpr.txt', 'r') as infile: uni_id = infile.read().strip() # Retrieve the data from UniProt (separated IDs by commas). raw_data = ExPASy.get_sprot_raw(uni_id) record = SwissProt.read(raw_data) # use SwissProt.parse for multiple proteins # Collect the relevant information. go = [] for i in record.cross_references: if i[2].startswith('P:'): go.append(i[2][2:]) # Output answer. with open('output/rosalind_dbpr_out.txt', 'w') as outfile: outfile.write('\n'.join(go)) # Optional: Print answer and gene ID/name name = record.gene_name.split(' ')[0][5:] print('Gene:\n', name, ' (UniProt ID = ', uni_id, ')\n\nBiological Processes:\n', '\n'.join(go), sep='')
def snp_uniprot(uniprotname, selection='(all)', label=1, name='', quiet=0): ''' DESCRIPTION Selects all UniProt annotated nsSNPs (natural variants) in given structure. Does a sequence alignment of UniProt sequence and PDB sequence. USAGE snp_uniprot uniprotname [, selection [, label [, name [, quiet]]]] ARGUMENTS uniprotname = string: UniProt reference (like HBB_HUMAN or P68871) selection = string: atom selection label = 0 or 1: Label CA atoms of nsSNPs with mutation {default: 1} name = string: name of new selection {default: nsSNPs} EXAMPLE fetch 3HBT snp_uniprot ACTG_HUMAN, chain A SEE ALSO snp_ncbi ''' from Bio import ExPASy from Bio import SwissProt handle = ExPASy.get_sprot_raw(uniprotname) record = SwissProt.read(handle) snp_common(record, selection, label, name, quiet)
#!/usr/bin/python from Bio import ExPASy ids = ['O23729', 'O23730', 'O23731'] all_results = '' for id in ids: results = ExPASy.get_sprot_raw(id) all_results = all_results + results.read()
from Bio import ExPASy from Bio import SwissProt id = "Q5SLP9" handle = ExPASy.get_sprot_raw(id) record = SwissProt.read(handle) for x in record.cross_references: if x[2][0:2] == 'P:': print x[2][2:]
def protein_record(protein): """Return the SwissProt record of a protein with id protein.""" handle = ExPASy.get_sprot_raw(protein) # you can give several IDs separated by commas return SwissProt.read(handle) # use SwissProt.parse for multiple proteins
''' Created on Mar 5, 2013 @author: Mike ''' from Bio import ExPASy from Bio import SwissProt if __name__ == '__main__': protein = 'Q9JT70' handle = ExPASy.get_sprot_raw(protein) record = SwissProt.read(handle) refs = [r for r in record.cross_references if "GO" in r] refs = [r[2] for r in refs if "P:" in r[2]] for r in refs: print r[2:]
from Bio import ExPASy, SwissProt from Bio.SwissProt import KeyWList #test ID: Q5SLP9 #get GO-Biological process info #DNA recombination #DNA repair #DNA replication handle = ExPASy.get_sprot_raw('Q9HAV7') #you can give several IDs separated by commas record = SwissProt.read(handle) for i in record.cross_references: if i[0] == 'GO' and i[2].startswith('P'): print i[2].replace('P:','') else: continue
def checksum_summary(record) : if len(record.seq) < 25 : short = record.seq.tostring() else : short = record.seq.tostring()[:19] \ + "..." + record.seq.tostring()[-3:] return "%s [%s] len %i" \ % (short, seguid(record.seq), len(record.seq)) ##################################################################### print "Checking Bio.ExPASy.get_sprot_raw()" id_list = ["O23729"] for identifier in id_list : print "- Fetching %s" % identifier handle = ExPASy.get_sprot_raw(identifier) records = list(SeqIO.parse(handle, "swiss")) assert len(records)==1 record = records[0] print " Got " + checksum_summary(record) assert record.id == identifier del id_list, handle, identifier, records, record ##################################################################### print "Checking Bio.Entrez.efetch()" for database, format, entry in [("genome","fasta","X52960"), ("genome","gb","X52960"), ("nucleotide", "fasta", "6273291"), ("nucleotide", "gb", "6273291"), ("protein", "fasta", "16130152"),
def getgo(id): handle = ExPASy.get_sprot_raw(id) record = SwissProt.read(handle) go = [r[2].split(":")[1] for r in record.cross_references if r[0] == "GO" and r[2].startswith('P')] print "\n".join(go)
#!/usr/bin/python from Bio import ExPASy from Bio.WWW import * from Bio.SwissProt import SProt expasy = ExPASy.get_sprot_raw('CERU_HUMAN') sp = SProt.Iterator(expasy, SProt.RecordParser()) record = sp.next() print record.keywords