Пример #1
0
        def got_coll_params(params):
            
            self.__dict__.update(params)
            if template_exclusion_category:
                self.template_exclusion_category = template_exclusion_category

            if print_template_pattern:
                self.print_template_pattern = print_template_pattern

            if self.print_template_pattern:
                self.make_print_template = utils.get_print_template_maker(self.print_template_pattern)
            else:
                self.make_print_template = None


            titles, revids = self._split_titles_revids(pages)

            self.fetch_html("page", titles)
            self.fetch_html("oldid", revids)

            self.fetch_used("titles", titles)
            self.fetch_used("revids", revids)

            self.report()
            self.dispatch()
Пример #2
0
    def __init__(self, api, fsout, pages, licenses,
                 status=None,
                 progress=None,
                 print_template_pattern=None,
                 template_exclusion_category=None,
                 cover_image=None,
                 imagesize=800, fetch_images=True):

        self.dispatch_event = gevent.event.Event()
        self.api_semaphore = gevent.coros.Semaphore(20)

        self.print_template_pattern = None
        self.template_exclusion_category = None
        self.template_blacklist = None
        self.cover_image = cover_image

        self.pages = pages

        self.image_download_pool = gevent.pool.Pool(10)

        self.fatal_error = "stopped by signal"

        self.api = api
        self.api.report = self.report
        self.api_cache = {self.api.apiurl: self.api,}

        self.fsout = fsout
        self.licenses = licenses
        self.status = status
        self.progress = progress or shared_progress(status=status)

        self.imagesize = imagesize
        self.fetch_images = fetch_images

        self.scheduled = set()

        self.count_total = 0
        self.count_done = 0
        self.redirects = {}
        self.cat2members = {}

        self.img_max_retries = 2

        self.title2latest = {}

        self.pages_todo = []
        self.revids_todo = []
        self.imageinfo_todo = []
        self.imagedescription_todo = {}  # base path -> list
        self._nshandler = None

        siteinfo = self.get_siteinfo_for(self.api)
        self.fsout.write_siteinfo(siteinfo)
        self.nshandler = nshandling.nshandler(siteinfo)
        if self.template_exclusion_category:
            ns, partial, fqname = self.nshandler.splitname(self.template_exclusion_category, 14)
            if ns != 14:
                print "bad category name:", repr(self.template_exclusion_category)

        params = mwapi.get_collection_params(api)
        self.__dict__.update(params)
        if template_exclusion_category:
            self.template_exclusion_category = template_exclusion_category

        if print_template_pattern:
            self.print_template_pattern = print_template_pattern

        if self.print_template_pattern:
            self.make_print_template = utils.get_print_template_maker(self.print_template_pattern)
        else:
            self.make_print_template = None

        titles, revids = self._split_titles_revids(pages)

        self.pool = gevent.pool.Pool()
        self.refcall_pool = gevent.pool.Pool(1024)

        self._refcall(self.fetch_html, "page", titles)
        self._refcall(self.fetch_html, "oldid", revids)

        self._refcall(self.fetch_used, "titles", titles)
        self._refcall(self.fetch_used, "revids", revids)
Пример #3
0
 def set_make_print_template(self):
     p = self.nfo.get("print_template_pattern")
     if p and "$1" in p:
         self.make_print_template = utils.get_print_template_maker(p)
     else:
         self.make_print_template = None
Пример #4
0
 def set_make_print_template(self):
     p = self.nfo.get("print_template_pattern")
     if p and "$1" in p:
         self.make_print_template = utils.get_print_template_maker(p)
     else:
         self.make_print_template = None