Пример #1
0
def wikibooks_espri(wiki_url):
    """Wikibooks import using the wikibooks2epub script by Jan Gerber
    to first convert the wikibook to an epub, which can then be turned
    into a bookizip via the espri function.
    """
    os.environ['oxCACHE'] = os.path.abspath(config.WIKIBOOKS_CACHE)
    os.environ['LANG'] = 'en_NZ.UTF-8'
    tainted_name = unquote(os.path.basename(urlsplit(wiki_url).path))
    bookid = "%s-%s" % (super_bleach(tainted_name),
                        time.strftime('%Y.%m.%d-%H.%M.%S'))
    workdir = tempfile.mkdtemp(prefix=bookid,
                               dir=os.path.join(config.DATA_ROOT, "tmp"))
    os.chmod(workdir, 0755)
    epub_file = os.path.join(workdir, bookid + '.epub')
    epub_url = path2url(epub_file)

    #the wikibooks importer is a separate process, so run that, then collect the epub.
    cmd = [
        config.TIMEOUT_CMD, config.WIKIBOOKS_TIMEOUT, config.WIKIBOOKS_CMD,
        '-i', wiki_url, '-o', epub_file
    ]
    log(cmd)
    log(os.environ)
    log(os.getcwd())

    try:
        check_call(cmd)
    except CalledProcessError, e:
        if e.returncode == 137:
            raise TimeoutError('Wikibooks took too long (over %s seconds)' %
                               WIKIBOOKS_TIMEOUT)
        raise
Пример #2
0
def wikibooks_espri(wiki_url):
    """Wikibooks import using the wikibooks2epub script by Jan Gerber
    to first convert the wikibook to an epub, which can then be turned
    into a bookizip via the espri function.
    """
    os.environ['oxCACHE'] = os.path.abspath(config.WIKIBOOKS_CACHE)
    os.environ['LANG'] = 'en_NZ.UTF-8'
    tainted_name = unquote(os.path.basename(urlsplit(wiki_url).path))
    bookid = "%s-%s" % (super_bleach(tainted_name),
                        time.strftime('%Y.%m.%d-%H.%M.%S'))
    workdir = tempfile.mkdtemp(prefix=bookid, dir=os.path.join(config.DATA_ROOT, "tmp"))
    os.chmod(workdir, 0755)
    epub_file = os.path.join(workdir, bookid + '.epub')
    epub_url = path2url(epub_file)

    #the wikibooks importer is a separate process, so run that, then collect the epub.
    cmd = [config.TIMEOUT_CMD, config.WIKIBOOKS_TIMEOUT,
           config.WIKIBOOKS_CMD,
           '-i', wiki_url,
           '-o', epub_file
           ]
    log(cmd)
    log(os.environ)
    log(os.getcwd())

    try:
        check_call(cmd)
    except CalledProcessError, e:
        if e.returncode == 137:
            raise TimeoutError('Wikibooks took too long (over %s seconds)' % WIKIBOOKS_TIMEOUT)
        raise
Пример #3
0
    def get_boilerplate(self, requested):
        """Return (footer url, header url)"""
        footer_tmpl, header_tmpl = config.BOILERPLATE_HTML.get(requested,
                                                               config.DEFAULT_BOILERPLATE_HTML)
        html = []
        for fn in (footer_tmpl, header_tmpl):
            if fn is not None:
                f = open(fn)
                s = f.read()
                f.close()
                #XXX can manipulate footer here, for CSS etc
                fn2 = os.path.join(self.tmpdir, os.path.basename(fn))
                f = open(fn2, 'w')
                f.write(s)
                f.close()
                html.append(path2url(fn2, full=True))
            else:
                html.append(None)

        return html
Пример #4
0
    def get_boilerplate(self, requested):
        """Return (footer url, header url)"""
        footer_tmpl, header_tmpl = config.BOILERPLATE_HTML.get(requested, config.DEFAULT_BOILERPLATE_HTML)
        html = []
        for templ_path in (footer_tmpl, header_tmpl):
            if templ_path is not None:
                f = open(os.path.join(config.TEMPLATE_ROOT, templ_path))
                template_text = f.read()
                f.close()

                #XXX can manipulate footer here, for CSS etc

                out_path = os.path.join(self.workdir, os.path.basename(templ_path))
                f = open(out_path, 'w')
                f.write(template_text)
                f.close()

                html.append(path2url(out_path))
            else:
                html.append(None)

        return html
Пример #5
0
    def get_boilerplate(self, requested):
        """Return (footer url, header url)"""
        footer_tmpl, header_tmpl = config.BOILERPLATE_HTML.get(
            requested, config.DEFAULT_BOILERPLATE_HTML)
        html = []
        for templ_path in (footer_tmpl, header_tmpl):
            if templ_path is not None:
                f = open(os.path.join(config.TEMPLATE_ROOT, templ_path))
                template_text = f.read()
                f.close()

                #XXX can manipulate footer here, for CSS etc

                out_path = os.path.join(self.workdir,
                                        os.path.basename(templ_path))
                f = open(out_path, 'w')
                f.write(template_text)
                f.close()

                html.append(path2url(out_path))
            else:
                html.append(None)

        return html
Пример #6
0
    def make_raw_pdf(self, html, pdf, outline=False, outline_file=None, page_num=None):
        if self.columns == 1:
            html_url = path2url(html)
            func = getattr(self, '_%s_command' % self.engine)
            cmd = func(html_url, pdf, outline=outline, outline_file=outline_file, page_num=page_num)
            run(cmd)
        else:
            #For multiple columns, generate a narrower single column pdf, and
            #paste it into columns using pdfnup.
            printable_width = self.width - 2.0 * self.side_margin - self.gutter
            column_width = (printable_width - (self.columns - 1) * self.column_margin) / self.columns
            page_width = column_width + self.column_margin
            side_margin = self.column_margin * 0.5
            if 'PDFGEN' in config.DEBUG_MODES:
                log("making columns with:")
                for k, v in locals().iteritems():
                    log("%s: %r" % (k, v))
                for k in ('width', 'side_margin', 'gutter', 'column_margin', 'columns', 'height'):
                    log("self.%s: %r" % (k, getattr(self, k)))

            columnmaker = PageSettings(self.workdir, (page_width, self.height),
                                       gutter=0, top_margin=self.top_margin,
                                       side_margin=side_margin,
                                       bottom_margin=self.bottom_margin,
                                       grey_scale=self.grey_scale,
                                       engine=self.engine
                                       )

            column_pdf = pdf[:-4] + '-single-column.pdf'
            columnmaker.make_raw_pdf(html, column_pdf, outline=outline,
                                     outline_file=outline_file, page_num=None)
            columnmaker.reshape_pdf(column_pdf)

            # pdfnup seems to round down to an even number of output
            # pages.  For example, if a book fills 13 pages, it will
            # clip it to 12.  So it is necessary to add blank pages to
            # round it up to an even number of output pages, which is
            # to say a multiple of (self.columns * 2) input pages.

            column_pages = count_pdf_pages(column_pdf)
            overflow_pages = column_pages % (self.columns * 2)
            if overflow_pages:
                extra_pages = self.columns * 2 - overflow_pages
            else:
                extra_pages = 0

            cmd = [config.PDFNUP,
                   '--nup', '%sx1' % int(self.columns),
                   #'--paper', papersize.lower() + 'paper',
                   '--outfile', pdf,
                   '--noautoscale', 'true',
                   '--orient', 'portrait',
                   '--paperwidth', '%smm' % int(self.width * POINT_2_MM),
                   '--paperheight', '%smm' % int(self.height * POINT_2_MM),
                   #'--tidy', 'false',
                   '--pages', '1-last%s' % (',{}' * extra_pages,),
                   #'--columnstrict', 'true',
                   #'--column', 'true',
                   column_pdf
                   ]

            run(cmd)
Пример #7
0
    def make_raw_pdf(self, html, pdf, outline=False, outline_file=None, page_num=None):
        if self.columns == 1:
            html_url = path2url(html, full=True)
            func = getattr(self, '_%s_command' % self.engine)
            cmd = func(html_url, pdf, outline=outline, outline_file=outline_file, page_num=page_num)
            run(cmd)
        else:
            #For multiple columns, generate a narrower single column pdf, and
            #paste it into columns using pdfnup.
            printable_width = self.width - 2.0 * self.side_margin - self.gutter
            column_width = (printable_width - (self.columns - 1) * self.column_margin) / self.columns
            page_width = column_width + self.column_margin
            side_margin = self.column_margin * 0.5
            if 'PDFGEN' in config.DEBUG_MODES:
                log("making columns with:")
                for k, v in locals().iteritems():
                    log("%s: %r" % (k, v))
                for k in ('width', 'side_margin', 'gutter', 'column_margin', 'columns', 'height'):
                    log("self.%s: %r" % (k, getattr(self, k)))

            columnmaker = PageSettings(self.tmpdir, (page_width, self.height),
                                       gutter=0, top_margin=self.top_margin,
                                       side_margin=side_margin,
                                       bottom_margin=self.bottom_margin,
                                       grey_scale=self.grey_scale,
                                       engine=self.engine
                                       )

            column_pdf = pdf[:-4] + '-single-column.pdf'
            columnmaker.make_raw_pdf(html, column_pdf, outline=outline,
                                     outline_file=outline_file, page_num=None)
            columnmaker.reshape_pdf(column_pdf)

            # pdfnup seems to round down to an even number of output
            # pages.  For example, if a book fills 13 pages, it will
            # clip it to 12.  So it is necessary to add blank pages to
            # round it up to an even number of output pages, which is
            # to say a multiple of (self.columns * 2) input pages.

            column_pages = count_pdf_pages(column_pdf)
            overflow_pages = column_pages % (self.columns * 2)
            if overflow_pages:
                extra_pages = self.columns * 2 - overflow_pages
            else:
                extra_pages = 0

            cmd = [PDFNUP,
                   '--nup', '%sx1' % int(self.columns),
                   #'--paper', papersize.lower() + 'paper',
                   '--outfile', pdf,
                   '--noautoscale', 'true',
                   '--orient', 'portrait',
                   '--paperwidth', '%smm' % int(self.width * POINT_2_MM),
                   '--paperheight', '%smm' % int(self.height * POINT_2_MM),
                   #'--tidy', 'false',
                   '--pages', '1-last%s' % (',{}' * extra_pages,),
                   #'--columnstrict', 'true',
                   #'--column', 'true',
                   column_pdf
                   ]

            run(cmd)