def search(self): if self.database == 'PubMed': from Bio import PubMed from Bio import GenBank searchIds = PubMed.search_for(self.searchTerm, max_ids=self.maxResults) GBrecParser = GenBank.FeatureParser() ncbiDict = GenBank.NCBIDictionary(self.type, 'genbank', parser=GBrecParser) from Bio import Medline MLrecParser = Medline.RecordParser() medlineDict = PubMed.Dictionary(delay=1.0, parser=MLrecParser) for id in searchIds: MLrecord = medlineDict[id] GBrecord = ncbiDict[id] newDBItem = DBItem(self.project, seq=GBrecord.seq, descript=GBrecord.description, id=id, record=MLrecord) self.items[id] = newDBItem
org = rec.annotations.get('organism', '') date = rec.annotations.get('date', '') head = '>gi:%s, id:%s, org:%s, date:%s\n' % (gi, rec.id, org, date) body = '\n'.join(textwrap.wrap(rec.seq.data, width=80)) return head, body if __name__ == '__main__': mode = sys.argv[1] text = sys.argv[2] output_file = sys.argv[3] print('Searching for %s <br>' % text) # check if inputs are all numbers try: gi_list = text.split() [int(_) for _ in gi_list] except ValueError: gi_list = GenBank.search_for(text, max_ids=10) fp = open(output_file, 'wt') record_parser = GenBank.FeatureParser() ncbi_dict = GenBank.NCBIDictionary(mode, 'genbank', parser=record_parser) for gid in gi_list: res = ncbi_dict[gid] head, body = make_fasta(res) fp.write(head + body + '\n') print(head) fp.close()
def run(self): if not self.allowRefSeqs: print 'NOT ALLOWING REFSEQS' if self.query_string.startswith( 'GI:') or self.query_string.startswith('gi:'): self.query_string = self.query_string[3:] q = self.query_string gi_list = self.search(q) else: q = "mycobacterium phage " + self.query_string + " AND Hatfull GF[AUTH] NOT srcdb_refseq[prop]" print "search query:", q gi_list = self.search(q) print 'gi_list:', gi_list if len(gi_list) == 0: print 'Got no results. Changing search criteria' q = self.query_string + " AND Hatfull GF[AUTH] NOT srcdb_refseq[prop]" print "search query:", q gi_list = self.search(q) if len(gi_list) == 0: print 'Got no results. Changing search criteria' q = self.query_string + " NOT srcdb_refseq[prop]" print "search query:", q gi_list = self.search(q) if len(gi_list) != 0: print 'found GenBank Direct Submission(s)' print gi_list else: print 'found no results other than refSeq(s), which you refused' self.result = None return else: # allowing refSeqs print 'ALLOWING REFSEQS' if self.query_string.startswith( 'GI:') or self.query_string.startswith('gi:'): self.query_string = self.query_string[3:] q = self.query_string gi_list = self.search(q) else: q = "mycobacterium phage " + self.query_string + " AND Hatfull GF[AUTH]" print "search query:", q gi_list = self.search(q) if len(gi_list) == 0: q = self.query_string + " AND Hatfull GF[AUTH]" gi_list = self.search(q) if len(gi_list) == 0: print 'Got no results. Changing search criteria' print 'search query:', self.query_string gi_list = self.search(self.query_string) if len(gi_list) == 0: print 'no results found' self.results = gi_list return if len(gi_list) > 1: selection = -1 for i in range(len(gi_list)): print i + 1, '\t', gi_list[i] selection = raw_input( "Your search returned multiple results. Please type the number for your selection: " ) selection = int(selection) - 1 else: selection = 0 print 'creating parser...' feature_parser = GenBank.FeatureParser() print 'creating dict' ncbi_dict = GenBank.NCBIDictionary('nucleotide', 'genbank', parser=feature_parser) if selection == -1: ## Accounts for non-existent phage query print 'non-existent phage query' self.result = 0 else: print 'got result' self.result = ncbi_dict[gi_list[selection]]