Пример #1
0
    def iter_page_info(self):
        st = ffi.new('HashInfoStream **')
        ret = self._c_aduana.hashinfo_stream_new(st, self._page_db[0])
        if ret != 0:
            raise AduanaException.from_error(self._page_db[0].error)

        page_hash = ffi.new('uint64_t *')
        pi = ffi.new('PageInfo **')
        while True:
            ss = self._c_aduana.hashinfo_stream_next(st[0], page_hash, pi)
            if ss != self._c_aduana.stream_state_next:
                break
            yield PageInfo(page_hash[0], pi[0])

        self._c_aduana.hashinfo_stream_delete(st[0])
Пример #2
0
    def iter_page_info(self):
        st = ffi.new('HashInfoStream **')
        ret = self._c_aduana.hashinfo_stream_new(st, self._page_db[0])
        if ret != 0:
            raise PageDBException.from_error(self._page_db[0].error)

        page_hash = ffi.new('uint64_t *')
        pi = ffi.new('PageInfo **')
        while True:
            ss = self._c_aduana.hashinfo_stream_next(st[0], page_hash, pi)
            if ss != self._c_aduana.stream_state_next:
                break
            yield PageInfo(page_hash[0], pi[0])

        self._c_aduana.hashinfo_stream_delete(st[0])
Пример #3
0
    def __init__(self, page_db, persist=0, path=None):
        # save to make sure lib is available at destruction time
        self._c_aduana = C_ADUANA

        self._closed = False
        self._page_db = page_db
        self._page_db.persist = persist

        self._sch = ffi.new('FreqScheduler **')
        self._core = SchedulerCore(
            self._sch,
            self._c_aduana.freq_scheduler_add,
            self._c_aduana.freq_scheduler_request
        )

        ret = self._c_aduana.freq_scheduler_new(
            self._sch,
            self._page_db._page_db[0],
            path or ffi.NULL
        )
        if ret != 0:
            if self._sch:
                raise AduanaException.from_error(self._sch[0].error)
            else:
                raise AduanaException("Error inside freq_scheduler_new", ret)

        self._sch[0].persist = persist
Пример #4
0
    def __init__(self, page_db, persist=0, scorer=None, path=None):
        # save to make sure lib is available at destruction time
        self._c_aduana = C_ADUANA

        self._page_db = page_db
        self._page_db.persist = persist

        self._sch = ffi.new('BFScheduler **')
        self._core = SchedulerCore(
            self._sch,
            self._c_aduana.bf_scheduler_add,
            self._c_aduana.bf_scheduler_request
        )

        ret = self._c_aduana.bf_scheduler_new(
            self._sch,
            self._page_db._page_db[0],
            path or ffi.NULL
        )
        if ret != 0:
            if self._sch:
                raise PageDBException.from_error(self._sch[0].error)
            else:
                raise PageDBException("Error inside bf_scheduler_new", ret)

        self._c_aduana.bf_scheduler_set_persist(self._sch[0], persist)

        if scorer:
            self._scorer = scorer
            self._scorer.setup(self._sch[0].scorer)
            ret = self._c_aduana.bf_scheduler_update_start(self._sch[0])
            if ret != 0:
                raise PageDBException.from_error(self._sch[0].error)
Пример #5
0
    def __init__(self, page_db, persist=0, scorer=None, path=None):
        # save to make sure lib is available at destruction time
        self._c_aduana = C_ADUANA

        self._closed = False
        self._page_db = page_db
        self._page_db.persist = persist

        self._sch = ffi.new('BFScheduler **')
        self._core = SchedulerCore(
            self._sch,
            self._c_aduana.bf_scheduler_add,
            self._c_aduana.bf_scheduler_request
        )

        ret = self._c_aduana.bf_scheduler_new(
            self._sch,
            self._page_db._page_db[0],
            path or ffi.NULL
        )
        if ret != 0:
            if self._sch:
                raise AduanaException.from_error(self._sch[0].error)
            else:
                raise AduanaException("Error inside bf_scheduler_new", ret)

        self._c_aduana.bf_scheduler_set_persist(self._sch[0], persist)

        if scorer:
            self._scorer = scorer
            self._scorer.setup(self._sch[0].scorer)
            ret = self._c_aduana.bf_scheduler_update_start(self._sch[0])
            if ret != 0:
                raise AduanaException.from_error(self._sch[0].error)
Пример #6
0
    def __init__(self, page_db, persist=0, path=None):
        # save to make sure lib is available at destruction time
        self._c_aduana = C_ADUANA

        self._page_db = page_db
        self._page_db.persist = persist

        self._sch = ffi.new('FreqScheduler **')
        self._core = SchedulerCore(
            self._sch,
            self._c_aduana.freq_scheduler_add,
            self._c_aduana.freq_scheduler_request
        )

        ret = self._c_aduana.freq_scheduler_new(
            self._sch,
            self._page_db._page_db[0],
            path or ffi.NULL
        )
        if ret != 0:
            if self._sch:
                raise PageDBException.from_error(self._sch[0].error)
            else:
                raise PageDBException("Error inside freq_scheduler_new", ret)

        self._sch[0].persist = persist
Пример #7
0
 def page_info(self, page_hash):
     pi = ffi.new('PageInfo **')
     ret = self._c_aduana.page_db_get_info(
         self._page_db[0], ffi.cast('uint64_t', page_hash), pi)
     if ret != 0:
         raise AduanaException.from_error(self._page_db[0].error)
     return PageInfo(page_hash, pi[0])
Пример #8
0
 def page_info(self, page_hash):
     pi = ffi.new('PageInfo **')
     ret = self._c_aduana.page_db_get_info(
         self._page_db[0], ffi.cast('uint64_t', page_hash), pi)
     if ret != 0:
         raise PageDBException.from_error(self._page_db[0].error)
     return PageInfo(page_hash, pi[0])
Пример #9
0
 def requests(self, n_pages):
     pReq = ffi.new('PageRequest **')
     ret = self._scheduler_request(self._sch[0], n_pages, pReq)
     if ret != 0:
         raise PageDBException.from_error(self._sch[0].error)
     reqs = [ffi.string(pReq[0].urls[i]) for i in xrange(pReq[0].n_urls)]
     self._c_aduana.page_request_delete(pReq[0])
     return reqs
Пример #10
0
 def requests(self, n_pages):
     pReq = ffi.new('PageRequest **')
     ret = self._scheduler_request(self._sch[0], n_pages, pReq)
     if ret != 0:
         raise AduanaException.from_error(self._sch[0].error)
     reqs = [ffi.string(pReq[0].urls[i]) for i in xrange(pReq[0].n_urls)]
     self._c_aduana.page_request_delete(pReq[0])
     return reqs
Пример #11
0
    def __init__(self, path, persist=0):
        # save to make sure lib is available at destruction time
        self._c_aduana = C_ADUANA

        self._page_db = ffi.new('PageDB **')
        ret = self._c_aduana.page_db_new(self._page_db, path)
        if ret != 0:
            if self._page_db:
                raise PageDBException.from_error(self._page_db[0].error)
            else:
                raise PageDBException("Error inside page_db_new", ret)

        self.persist = persist
Пример #12
0
    def __init__(self, path, persist=0):
        # save to make sure lib is available at destruction time
        self._c_aduana = C_ADUANA

        self._closed = False
        self._page_db = ffi.new('PageDB **')
        ret = self._c_aduana.page_db_new(self._page_db, path)
        if ret != 0:
            if self._page_db:
                raise AduanaException.from_error(self._page_db[0].error)
            else:
                raise AduanaException("Error inside page_db_new", ret)

        self.persist = persist
Пример #13
0
    def load(self, freq_iter):
        cur = ffi.new('void **')
        ret = self._c_aduana.freq_scheduler_cursor_open(self._sch[0], cur)
        if ret != 0:
            raise PageDBException.from_error(self._sch[0].error)

        for page_hash, page_freq in freq_iter:
            self._c_aduana.freq_scheduler_cursor_write(
                self._sch[0],
                cur[0],
                ffi.cast('uint64_t', page_hash),
                page_freq
            )
        ret = self._c_aduana.freq_scheduler_cursor_commit(self._sch[0], cur[0])
        if ret != 0:
            raise PageDBException.from_error(self._sch[0].error)
Пример #14
0
    def load(self, freq_iter):
        cur = ffi.new('void **')
        ret = self._c_aduana.freq_scheduler_cursor_open(self._sch[0], cur)
        if ret != 0:
            raise AduanaException.from_error(self._sch[0].error)

        for page_hash, page_freq in freq_iter:
            self._c_aduana.freq_scheduler_cursor_write(
                self._sch[0],
                cur[0],
                ffi.cast('uint64_t', page_hash),
                page_freq
            )
        ret = self._c_aduana.freq_scheduler_cursor_commit(self._sch[0], cur[0])
        if ret != 0:
            raise AduanaException.from_error(self._sch[0].error)
Пример #15
0
    def __init__(self, page_db):
        self._c_aduana = C_ADUANA

        self._closed = False
        self._scorer = ffi.new('PageRankScorer **')
        self._c_aduana.page_rank_scorer_new(self._scorer, page_db._page_db[0])
Пример #16
0
    def __init__(self, page_db):
        self._c_aduana = C_ADUANA

        self._closed = False
        self._scorer = ffi.new('HitsScorer **')
        self._c_aduana.hits_scorer_new(self._scorer, page_db._page_db[0])
Пример #17
0
    def __init__(self, page_db):
        self._c_aduana = C_ADUANA

        self._scorer = ffi.new('HitsScorer **')
        self._c_aduana.hits_scorer_new(self._scorer, page_db._page_db[0])
Пример #18
0
    def __init__(self, page_db):
        self._c_aduana = C_ADUANA

        self._scorer = ffi.new('PageRankScorer **')
        self._c_aduana.page_rank_scorer_new(self._scorer, page_db._page_db[0])