Python Reader примеры использования

Язык программирования: Python

Пространство имен/Пакет: Pyblio.Parsers.Semantic.PubMed

Класс/Тип: Reader

Примеров на hotexamples.com: 3

Python Reader - 3 примера найдено. Это лучшие примеры Python кода для Pyblio.Parsers.Semantic.PubMed.Reader, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

parse(2)

Пример #1

Показать файл

Файл: ut_pubmed.py Проект: zkota/pyblio-core-1.3

    def testParsing(self):

        src = os.path.join(base, "search-0.xml")

        r = Reader()
        r.parse(Compat.ElementTree.ElementTree(file=open(src)), self.db)

        tmp = pybut.dbname()
        fd = open(tmp, "w")
        self.db.xmlwrite(fd)
        fd.close()

        pybut.fileeq(tmp, pybut.fp("ut_pubmed", "result.bip"))

Пример #2

Показать файл

Файл: PubMed.py Проект: zkota/pyblio-core-1.3

    def __init__(self, db):

        self.db = db
        self.reader = Reader()

        self._pending = None

Пример #3

Показать файл

Файл: PubMed.py Проект: zkota/pyblio-core-1.3

class PubMed(object):
    """ A connection to the PubMed database """

    schema = 'org.pybliographer/pubmed/0.1'

    baseURL = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils'

    BATCH_SIZE = 500
    
    toolName = 'pybliographer'
    adminEmail = '*****@*****.**'

    log = logging.getLogger('pyblio.external.pubmed')

    SRV_SEARCH = '/esearch.fcgi'
    SRV_FETCH = '/efetch.fcgi'

    def __init__(self, db):

        self.db = db
        self.reader = Reader()

        self._pending = None

    def count(self, query, db='PubMed'):
        assert self._pending is None, 'no more than one search at a time per connection'

        data = {'db': db, 'term': query}
        req = self._send_query(self.SRV_SEARCH, data, rettype='count')

        def success(data):
            return int(data.find('./Count').text)
        return req.addCallback(success)
    
    def search(self, query, maxhits=500, db='PubMed'):

        query = query.strip()
        
        # The result set that will contain the data
        self._rs = self.db.rs.new()
        self._rs.name = _('Imported from PubMed')

        # Special case for no query: this would cause an error from
        # the server if we do not catch it first.
        if not query:
            results = defer.Deferred()
            def autofire():
                results.callback(0)
            reactor.callLater(0, autofire)
            return results, rs

        self._query = query
        self._pubmed_db = db
        self._total = None
        self._webenv = None
        self._query_key = None

        self._batch = batch.Batch(maxhits, self.BATCH_SIZE)
        return self._batch.fetch(self._runner), self._rs

    def cancel(self):
        """ Cancel a running query.

        The database is not reverted to its original state."""
        if self._pending:
            self._pending.cancel()

    def _send_query(self, service, args, **kargs):

        all = {'email': self.adminEmail,
               'tool': self.toolName,
               'retmode': 'xml'}
        
        all.update(args)
        all.update(kargs)

        # ensure all arguments are utf8 encoded
        for k, v in all.items():
            if isinstance(v, unicode):
                all[k] = v.encode('utf-8')
                
        url = self.baseURL + service + '?' + urllib.urlencode(all)

        self.log.debug('sending query %r' % url)

        # We have the charge of setting and cleaning self._pending
        self._pending = HTTPRetrieve(url)

        def done(data):
            self._pending = None
            return data
        
        return self._pending.deferred.addBoth(done).addCallback(_xml)

    def _runner(self, start, count):

        if self._total is None:
            # for the first iteration of the query, we send a
            # "Search", which prepares the query, returns the total
            # number of results, and gives us a key to access the
            # content. The following calls will use "Fetch" to
            # actually gather the results.
            data = {'db': self._pubmed_db, 'term': self._query}
            d = self._send_query(self.SRV_SEARCH, data, usehistory='y')

            def _got_summary(data):
                # Total number of results
                self._total = int(data.find('./Count').text)
            
                # Parameters necessary to fetch the content of the result set
                self._webenv = data.find('./WebEnv').text
                self._query_key = data.find('./QueryKey').text

                # next run, we will actually start fetching the
                # results, starting at 0
                return 0, self._total
            return d.addCallback(_got_summary)

        else:
            def _received(data):
                previously = len(self._rs)
                self.reader.parse(data, self.db, self._rs)
                freshly_parsed = len(self._rs) - previously
                if freshly_parsed <= 0:
                    self.log.warn("what happend? I increased the result set by %d" % freshly_parsed)
                    # pretend there has been at least one parsing, so
                    # that we ensure that the task
                    # progresses. Otherwise we might loop forever on
                    # an entry we cannot parse.
                    freshly_parsed = 1
                return freshly_parsed, self._total

            fetchdata = {
                'db': self._pubmed_db,
                'WebEnv': self._webenv,
                'query_key': self._query_key,
            }
            d = self._send_query(self.SRV_FETCH, fetchdata,
                                 retstart=start, retmax=count)
            return d.addCallback(_received)