Exemplo n.º 1
0
    def search(self):

        if self.database == 'PubMed':
            from Bio import PubMed
            from Bio import GenBank

        searchIds = PubMed.search_for(self.searchTerm, max_ids=self.maxResults)

        GBrecParser = GenBank.FeatureParser()
        ncbiDict = GenBank.NCBIDictionary(self.type,
                                          'genbank',
                                          parser=GBrecParser)

        from Bio import Medline

        MLrecParser = Medline.RecordParser()
        medlineDict = PubMed.Dictionary(delay=1.0, parser=MLrecParser)
        for id in searchIds:
            MLrecord = medlineDict[id]
            GBrecord = ncbiDict[id]
            newDBItem = DBItem(self.project,
                               seq=GBrecord.seq,
                               descript=GBrecord.description,
                               id=id,
                               record=MLrecord)
            self.items[id] = newDBItem
Exemplo n.º 2
0
    org = rec.annotations.get('organism', '')
    date = rec.annotations.get('date', '')
    head = '>gi:%s, id:%s, org:%s, date:%s\n' % (gi, rec.id, org, date)
    body = '\n'.join(textwrap.wrap(rec.seq.data, width=80))
    return head, body


if __name__ == '__main__':
    mode = sys.argv[1]
    text = sys.argv[2]
    output_file = sys.argv[3]

    print('Searching for %s <br>' % text)

    # check if inputs are all numbers
    try:
        gi_list = text.split()
        [int(_) for _ in gi_list]
    except ValueError:
        gi_list = GenBank.search_for(text, max_ids=10)

    fp = open(output_file, 'wt')
    record_parser = GenBank.FeatureParser()
    ncbi_dict = GenBank.NCBIDictionary(mode, 'genbank', parser=record_parser)
    for gid in gi_list:
        res = ncbi_dict[gid]
        head, body = make_fasta(res)
        fp.write(head + body + '\n')
        print(head)
    fp.close()
Exemplo n.º 3
0
    def run(self):
        if not self.allowRefSeqs:
            print 'NOT ALLOWING REFSEQS'
            if self.query_string.startswith(
                    'GI:') or self.query_string.startswith('gi:'):
                self.query_string = self.query_string[3:]
                q = self.query_string
                gi_list = self.search(q)
            else:
                q = "mycobacterium phage " + self.query_string + " AND Hatfull GF[AUTH] NOT srcdb_refseq[prop]"
                print "search query:", q
                gi_list = self.search(q)
                print 'gi_list:', gi_list
            if len(gi_list) == 0:
                print 'Got no results.  Changing search criteria'
                q = self.query_string + " AND Hatfull GF[AUTH] NOT srcdb_refseq[prop]"
                print "search query:", q
                gi_list = self.search(q)
            if len(gi_list) == 0:
                print 'Got no results.  Changing search criteria'
                q = self.query_string + " NOT srcdb_refseq[prop]"
                print "search query:", q
                gi_list = self.search(q)
            if len(gi_list) != 0:
                print 'found GenBank Direct Submission(s)'
                print gi_list
            else:
                print 'found no results other than refSeq(s), which you refused'
                self.result = None
                return
        else:  # allowing refSeqs
            print 'ALLOWING REFSEQS'
            if self.query_string.startswith(
                    'GI:') or self.query_string.startswith('gi:'):
                self.query_string = self.query_string[3:]
                q = self.query_string
                gi_list = self.search(q)
            else:
                q = "mycobacterium phage " + self.query_string + " AND Hatfull GF[AUTH]"
                print "search query:", q
                gi_list = self.search(q)
            if len(gi_list) == 0:
                q = self.query_string + " AND Hatfull GF[AUTH]"
                gi_list = self.search(q)
            if len(gi_list) == 0:
                print 'Got no results.  Changing search criteria'
                print 'search query:', self.query_string
                gi_list = self.search(self.query_string)

            if len(gi_list) == 0:
                print 'no results found'

        self.results = gi_list
        return

        if len(gi_list) > 1:
            selection = -1
            for i in range(len(gi_list)):
                print i + 1, '\t', gi_list[i]
            selection = raw_input(
                "Your search returned multiple results.  Please type the number for your selection: "
            )
            selection = int(selection) - 1
        else:
            selection = 0
        print 'creating parser...'
        feature_parser = GenBank.FeatureParser()
        print 'creating dict'
        ncbi_dict = GenBank.NCBIDictionary('nucleotide',
                                           'genbank',
                                           parser=feature_parser)

        if selection == -1:  ## Accounts for non-existent phage query
            print 'non-existent phage query'
            self.result = 0
        else:
            print 'got result'
            self.result = ncbi_dict[gi_list[selection]]