def wikibooks_espri(wiki_url): """Wikibooks import using the wikibooks2epub script by Jan Gerber to first convert the wikibook to an epub, which can then be turned into a bookizip via the espri function. """ os.environ['oxCACHE'] = os.path.abspath(config.WIKIBOOKS_CACHE) os.environ['LANG'] = 'en_NZ.UTF-8' tainted_name = unquote(os.path.basename(urlsplit(wiki_url).path)) bookid = "%s-%s" % (super_bleach(tainted_name), time.strftime('%Y.%m.%d-%H.%M.%S')) workdir = tempfile.mkdtemp(prefix=bookid, dir=os.path.join(config.DATA_ROOT, "tmp")) os.chmod(workdir, 0755) epub_file = os.path.join(workdir, bookid + '.epub') epub_url = path2url(epub_file) #the wikibooks importer is a separate process, so run that, then collect the epub. cmd = [ config.TIMEOUT_CMD, config.WIKIBOOKS_TIMEOUT, config.WIKIBOOKS_CMD, '-i', wiki_url, '-o', epub_file ] log(cmd) log(os.environ) log(os.getcwd()) try: check_call(cmd) except CalledProcessError, e: if e.returncode == 137: raise TimeoutError('Wikibooks took too long (over %s seconds)' % WIKIBOOKS_TIMEOUT) raise
def wikibooks_espri(wiki_url): """Wikibooks import using the wikibooks2epub script by Jan Gerber to first convert the wikibook to an epub, which can then be turned into a bookizip via the espri function. """ os.environ['oxCACHE'] = os.path.abspath(config.WIKIBOOKS_CACHE) os.environ['LANG'] = 'en_NZ.UTF-8' tainted_name = unquote(os.path.basename(urlsplit(wiki_url).path)) bookid = "%s-%s" % (super_bleach(tainted_name), time.strftime('%Y.%m.%d-%H.%M.%S')) workdir = tempfile.mkdtemp(prefix=bookid, dir=os.path.join(config.DATA_ROOT, "tmp")) os.chmod(workdir, 0755) epub_file = os.path.join(workdir, bookid + '.epub') epub_url = path2url(epub_file) #the wikibooks importer is a separate process, so run that, then collect the epub. cmd = [config.TIMEOUT_CMD, config.WIKIBOOKS_TIMEOUT, config.WIKIBOOKS_CMD, '-i', wiki_url, '-o', epub_file ] log(cmd) log(os.environ) log(os.getcwd()) try: check_call(cmd) except CalledProcessError, e: if e.returncode == 137: raise TimeoutError('Wikibooks took too long (over %s seconds)' % WIKIBOOKS_TIMEOUT) raise
def get_boilerplate(self, requested): """Return (footer url, header url)""" footer_tmpl, header_tmpl = config.BOILERPLATE_HTML.get(requested, config.DEFAULT_BOILERPLATE_HTML) html = [] for fn in (footer_tmpl, header_tmpl): if fn is not None: f = open(fn) s = f.read() f.close() #XXX can manipulate footer here, for CSS etc fn2 = os.path.join(self.tmpdir, os.path.basename(fn)) f = open(fn2, 'w') f.write(s) f.close() html.append(path2url(fn2, full=True)) else: html.append(None) return html
def get_boilerplate(self, requested): """Return (footer url, header url)""" footer_tmpl, header_tmpl = config.BOILERPLATE_HTML.get(requested, config.DEFAULT_BOILERPLATE_HTML) html = [] for templ_path in (footer_tmpl, header_tmpl): if templ_path is not None: f = open(os.path.join(config.TEMPLATE_ROOT, templ_path)) template_text = f.read() f.close() #XXX can manipulate footer here, for CSS etc out_path = os.path.join(self.workdir, os.path.basename(templ_path)) f = open(out_path, 'w') f.write(template_text) f.close() html.append(path2url(out_path)) else: html.append(None) return html
def get_boilerplate(self, requested): """Return (footer url, header url)""" footer_tmpl, header_tmpl = config.BOILERPLATE_HTML.get( requested, config.DEFAULT_BOILERPLATE_HTML) html = [] for templ_path in (footer_tmpl, header_tmpl): if templ_path is not None: f = open(os.path.join(config.TEMPLATE_ROOT, templ_path)) template_text = f.read() f.close() #XXX can manipulate footer here, for CSS etc out_path = os.path.join(self.workdir, os.path.basename(templ_path)) f = open(out_path, 'w') f.write(template_text) f.close() html.append(path2url(out_path)) else: html.append(None) return html
def make_raw_pdf(self, html, pdf, outline=False, outline_file=None, page_num=None): if self.columns == 1: html_url = path2url(html) func = getattr(self, '_%s_command' % self.engine) cmd = func(html_url, pdf, outline=outline, outline_file=outline_file, page_num=page_num) run(cmd) else: #For multiple columns, generate a narrower single column pdf, and #paste it into columns using pdfnup. printable_width = self.width - 2.0 * self.side_margin - self.gutter column_width = (printable_width - (self.columns - 1) * self.column_margin) / self.columns page_width = column_width + self.column_margin side_margin = self.column_margin * 0.5 if 'PDFGEN' in config.DEBUG_MODES: log("making columns with:") for k, v in locals().iteritems(): log("%s: %r" % (k, v)) for k in ('width', 'side_margin', 'gutter', 'column_margin', 'columns', 'height'): log("self.%s: %r" % (k, getattr(self, k))) columnmaker = PageSettings(self.workdir, (page_width, self.height), gutter=0, top_margin=self.top_margin, side_margin=side_margin, bottom_margin=self.bottom_margin, grey_scale=self.grey_scale, engine=self.engine ) column_pdf = pdf[:-4] + '-single-column.pdf' columnmaker.make_raw_pdf(html, column_pdf, outline=outline, outline_file=outline_file, page_num=None) columnmaker.reshape_pdf(column_pdf) # pdfnup seems to round down to an even number of output # pages. For example, if a book fills 13 pages, it will # clip it to 12. So it is necessary to add blank pages to # round it up to an even number of output pages, which is # to say a multiple of (self.columns * 2) input pages. column_pages = count_pdf_pages(column_pdf) overflow_pages = column_pages % (self.columns * 2) if overflow_pages: extra_pages = self.columns * 2 - overflow_pages else: extra_pages = 0 cmd = [config.PDFNUP, '--nup', '%sx1' % int(self.columns), #'--paper', papersize.lower() + 'paper', '--outfile', pdf, '--noautoscale', 'true', '--orient', 'portrait', '--paperwidth', '%smm' % int(self.width * POINT_2_MM), '--paperheight', '%smm' % int(self.height * POINT_2_MM), #'--tidy', 'false', '--pages', '1-last%s' % (',{}' * extra_pages,), #'--columnstrict', 'true', #'--column', 'true', column_pdf ] run(cmd)
def make_raw_pdf(self, html, pdf, outline=False, outline_file=None, page_num=None): if self.columns == 1: html_url = path2url(html, full=True) func = getattr(self, '_%s_command' % self.engine) cmd = func(html_url, pdf, outline=outline, outline_file=outline_file, page_num=page_num) run(cmd) else: #For multiple columns, generate a narrower single column pdf, and #paste it into columns using pdfnup. printable_width = self.width - 2.0 * self.side_margin - self.gutter column_width = (printable_width - (self.columns - 1) * self.column_margin) / self.columns page_width = column_width + self.column_margin side_margin = self.column_margin * 0.5 if 'PDFGEN' in config.DEBUG_MODES: log("making columns with:") for k, v in locals().iteritems(): log("%s: %r" % (k, v)) for k in ('width', 'side_margin', 'gutter', 'column_margin', 'columns', 'height'): log("self.%s: %r" % (k, getattr(self, k))) columnmaker = PageSettings(self.tmpdir, (page_width, self.height), gutter=0, top_margin=self.top_margin, side_margin=side_margin, bottom_margin=self.bottom_margin, grey_scale=self.grey_scale, engine=self.engine ) column_pdf = pdf[:-4] + '-single-column.pdf' columnmaker.make_raw_pdf(html, column_pdf, outline=outline, outline_file=outline_file, page_num=None) columnmaker.reshape_pdf(column_pdf) # pdfnup seems to round down to an even number of output # pages. For example, if a book fills 13 pages, it will # clip it to 12. So it is necessary to add blank pages to # round it up to an even number of output pages, which is # to say a multiple of (self.columns * 2) input pages. column_pages = count_pdf_pages(column_pdf) overflow_pages = column_pages % (self.columns * 2) if overflow_pages: extra_pages = self.columns * 2 - overflow_pages else: extra_pages = 0 cmd = [PDFNUP, '--nup', '%sx1' % int(self.columns), #'--paper', papersize.lower() + 'paper', '--outfile', pdf, '--noautoscale', 'true', '--orient', 'portrait', '--paperwidth', '%smm' % int(self.width * POINT_2_MM), '--paperheight', '%smm' % int(self.height * POINT_2_MM), #'--tidy', 'false', '--pages', '1-last%s' % (',{}' * extra_pages,), #'--columnstrict', 'true', #'--column', 'true', column_pdf ] run(cmd)