Пример #1
0
def get_info(widgets, url, opener):
    debug = widgets.debug
    html = yield asyncjobs.ProgressDownloadThreadedTask(
        url, opener, headers=HEADERS, 
        elapsed_cb=functools.partial(on_elapsed, widgets, "info"))
    try:
        info = pysheng.get_info(html)
    except ValueError, detail:
        debug("Error parsing page HTML: %s" % str(detail))
        raise
Пример #2
0
def download_book(widgets, state, url, page_start=0, page_end=None):
    """Yield (info, page, image_data) for pages from page_start to page_end"""
    try:
        set_sensitivity(widgets,
                        start=False,
                        pause=True,
                        cancel=True,
                        browse_destdir=False,
                        page_start=False,
                        page_end=False)
        destdir = widgets.destdir.get_text()
        debug = widgets.debug
        set_sensitivity(widgets, check=False, savepdf=False)

        debug("Output directory: %s" % destdir)
        debug("Page_start: %s, Page end: %s" %
              (adj_int(page_start, +1, 1), adj_int(page_end, +1, "last")))
        opener = lib.get_cookies_opener()
        book_id = pysheng.get_id_from_string(url)
        debug("Book ID: %s" % book_id)
        cover_url = pysheng.get_cover_url(book_id)
        widgets.progress_all.set_fraction(0.0)
        widgets.progress_all.set_text('')
        widgets.progress_current.set_pulse_step(0.04)
        state.downloaded_images = None
        info = yield _from(get_info(widgets, cover_url, opener))

        if not widgets.page_start.get_text():
            widgets.page_start.set_text(str(1))
        if not widgets.page_end.get_text():
            widgets.page_end.set_text(str(len(info["page_ids"])))
        page_ids = info["page_ids"][page_start:adj_int(page_end, +1)]
        namespace = dict(title=info["title"], attribution=info["attribution"])
        images = []

        for page, page_id in enumerate(page_ids):
            page += page_start
            if namespace["attribution"]:
                image_file_template = "%(attribution)s - %(title)s - %(page)03d"
            else:
                image_file_template = "%(title)s - %(page)03d"
            filename0 = image_file_template % dict(namespace, page=page + 1)
            filename = string_to_valid_filename(filename0.encode("utf-8"), 240)
            output_path = os.path.join(destdir, filename)
            existing_files = glob.glob(escape_glob(output_path) + ".*")
            if existing_files:
                debug("Skip existing image: %s" % existing_files[0])
                images.append(output_path)
                continue
            relative_page = page - page_start + 1
            widgets.progress_all.set_fraction(
                float(relative_page - 1) / len(page_ids))
            widgets.progress_all.set_text(
                "Total: %d%%" %
                (int(100 * float(relative_page - 1) / len(page_ids))))
            header = "[%d/%d] " % (relative_page, len(page_ids))
            debug(header + "Start page: %d (page_id: %s)" %
                  (page + 1, page_id))
            page_url = pysheng.get_page_url(info["prefix"], page_id)
            debug(header + "Download page contents: %s" % (page_url))
            widgets.progress_current.set_fraction(0.0)
            page_html = yield asyncjobs.ProgressDownloadThreadedTask(
                page_url,
                opener,
                headers=HEADERS,
                elapsed_cb=functools.partial(on_elapsed, widgets, "page"))

            image_url0 = pysheng.get_image_url_from_page(page_html)
            width, height = info["max_resolution"]
            image_url = re.sub("w=(\d+)", "w=" + str(width), image_url0)
            if not image_url:
                debug("No image for this page, probably access is restricted")
                continue
            debug(header + "Download page image: %s" % image_url)
            widgets.progress_current.set_fraction(0.0)
            image_data = yield asyncjobs.ProgressDownloadThreadedTask(
                image_url,
                opener,
                headers=HEADERS,
                elapsed_cb=functools.partial(on_elapsed, widgets, "image"))
            format = imghdr.what(StringIO.StringIO(image_data)) or "png"
            debug(header + "Image downloaded (size=%d, format=%s)" %
                  (len(image_data), format))
            output_path_with_extension = output_path + "." + format
            createfile(output_path_with_extension, image_data)
            debug(header + "Image written: %s" % output_path_with_extension)
            images.append(output_path_with_extension)

        widgets.progress_all.set_fraction(1.0)
        widgets.progress_all.set_text("Done")
        debug("Done!")
        restart_buttons(widgets)
        state.downloaded_images = images

        if namespace["attribution"]:
            state.pdf_filename = "%(attribution)s - %(title)s.pdf" % namespace
        else:
            state.pdf_filename = "%(title)s.pdf" % namespace
        set_sensitivity(widgets, savepdf=True)
    except asyncjobs.JobCancelled:
        return
    except Exception, detail:
        traceback.print_exc()
        debug("job error: %s" % detail)
        restart_buttons(widgets)
Пример #3
0
def threaded_task(state, loop, url):
    state.result = None
    cb = functools.partial(elapsed_cb, state)
    state.callback = []
    state.result = \
        yield asyncjobs.ProgressDownloadThreadedTask(url, elapsed_cb=cb)