def test_specificity(self): html = """<div class="foo"></div>""" css1 = """div,a,b,c,d,e,f,g,h,i,j { color: red; }""" css2 = """.foo { color: blue; }""" expected = u"""<div class="foo" style="color: blue"></div>""" output = pynliner.Pynliner().from_string(html).with_cssString( css1).with_cssString(css2).run() self.assertEqual(output, expected)
def render(self, context): contents = self.nodes.render(context) css = '' path = self.arg.resolve(context, True) if path is not None: css = get_template(path).render() p = pynliner.Pynliner().from_string(contents) p.with_cssString(css) return p.run()
def render_html_email(name, context): import pynliner subject = loader.render_to_string('%s.subj.txt' % name, context).strip() text_body = loader.render_to_string('%s.body.txt' % name, context) html_body = loader.render_to_string('%s.body.html' % name, context) css_body = loader.render_to_string(['%s.css' % name, 'shop/notification/email/base.css']) # convert to inline-css html_body = pynliner.Pynliner().from_string(html_body).with_cssString(css_body).run() return subject, text_body, html_body
def test_identical_element(self): css = """ .text-right { text-align: right; } .box { width:200px; border: 1px solid #000; } """ html = """<div class="box"><p>Hello World</p><p class="text-right">Hello World on right</p><p class="text-right">Hello World on right</p></div>""" expected = """<div class="box" style="width: 200px; border: 1px solid #000"><p>Hello World</p><p class="text-right" style="text-align: right">Hello World on right</p><p class="text-right" style="text-align: right">Hello World on right</p></div>""" output = pynliner.Pynliner().from_string(html).with_cssString( css).run() self.assertEqual(expected, output)
def test(): with open('style.css') as f: css = f.read() syntax_hl = """ <div class="input_area"> <div class=" highlight hl-ipython3"> <pre> <span class="kn">Hello world</span> </pre> </div> </div>""" html = syntax_hl inliner = pynliner.Pynliner() inliner = inliner.from_string(html).with_cssString(css) html = inliner.run() html = re.sub(r'<style.*?</style>', '', html, re.I|re.S|re.M) if '</style>' in html: pos = html.find('</style>') html = html[pos+8:] print(html)
def render(self, fileto, pretty=False, quoted_printable=True, **args): self.args.update(args) if self.template is None: self._get_template() output = self.template.render(**self.args) self.pyl = pynliner.Pynliner().from_string(output) # transform template (forced inline CSS) self._parse_head() output, text = self._transpile_css(pretty=pretty) with open(fileto + '.html', 'w') as fh: fh.write(output) # ordered set from dict keys uniquelines = dict.fromkeys(text.split('\n')) with open(fileto + '.txt', 'w') as fh: fh.write('\n'.join(uniquelines.keys()))
def _apply_styling(html): ''' Inline all styles defined and used into the individual HTML tags. ''' return pynliner.Pynliner().from_string(html).with_cssString( DEFAULT_CSS).run()
def assert_pynlined(self, html, css, expected): actual = pynliner.Pynliner().from_string(html).with_cssString( css).run() self.assertEqual(actual, expected)
def nb2wp(nbfile, out_dir='', template='full', css_files=['style.css'], save_img=True, img_dir='img', img_url_prefix='img', latex='wp', remove_attrs=True, footer=True, save_css=False, save_html=False, quiet=False): """ Convert Jupyter notebook file to Wordpress.com HTML. Parameters: nbfile: The Jupyter notebook file out_dir: Specify output directory. If empty, a directory with the same name as the notebook file will be created. template: (Optional) nbconvert template file. The default is "full". You may specify standard nbconvert template names such as "full" or "basic", or the path of custom nbconvert .TPL file. css_files: Specify list of CSS files to use. The files will be appended after one another. Default is ['style.css']. If not specified, the CSS provided by nbconvert will be used. save_img: Save inline images to external image files. Default: True. Setting this to False will cause failure in loading the image because Wordpress.com disallows "data:" URI. img_dir: The local directory to save images. The path may be relative or absolute. If relative, the directory will be under out_dir. Default: "img". img_url_prefix: The root/parent directory of the images as seen from HTTP. latex: Specify how to convert Latex directives. Default is "wp". If empty, no Latex conversion will be performed (the directives will be left unchanged). remove_attrs: Remove various HTML attributes such as "class", "id" from the output HTML file to simplify the file. Default: True footer: Add conversion footer. Default: True save_css: Save the CSS that is used to 'style.css' file in out_dir, for debugging. Default: False save_html: Save the HTML before it is processed to 'input.html' file in out_dir, for debugging. Default: False quiet: No output to stdout if true. Default: False """ t0 = time.time() file = os.path.basename(nbfile) filename = os.path.splitext(file)[0] if not out_dir: out_dir = filename if not os.path.exists(out_dir): os.makedirs(out_dir) if img_url_prefix[-1] == '/': img_url_prefix = img_url_prefix[:-1] def debug(msg): if not quiet: print(msg) with open(nbfile, 'r') as f: notebook = nbformat.read(f, as_version=4) html_exporter = nbconvert.HTMLExporter() if template: html_exporter.template_file = template debug('Using template: {}'.format(html_exporter.template_file)) (html, res) = html_exporter.from_notebook_node(notebook) # # Preprocess CSS and HTML # if css_files: debug('Using CSS files {}'.format(css_files)) css = '' for css_file in css_files: with open(css_file, 'r') as f: css += f.read() else: if res['inlining'] and res['inlining']['css']: css = '\n'.join(res['inlining']['css']) + '\n' else: debug('Warning: no CSS is generated by nbconvert') css = '' # Replace/remove string patterns in CSS patterns = [# comments may contain HTML tags that confuses our regex. (r'/\*.*?\*/', '', re.I|re.S|re.M), # cssutils not able to handle '(' in CSS selector # But alas.. this regex removal is broken #(r'[_0-9a-zA-Z-#.:*]+\(.*?}', '', re.I|re.S|re.M), # cssutils not able to handle ~ in CSS selector # But alas.. this regex removal is broken #(r'[_0-9a-zA-Z-#.:*]+\s*~', '', re.I|re.S|re.M), ] for str_pat, repl, flag in patterns: pat = re.compile(str_pat, flag) css = pat.sub(repl, css) if save_css: out_css_file = os.path.join(out_dir, 'style.css') debug('Saving CSS to {}'.format(out_css_file)) with open(out_css_file, 'w') as f: f.write(css) patterns = [# silly character after headings (r'¶', '', re.I|re.S|re.M), # link to local file custom.css, which pynliner couldn't handle (r'<link rel="stylesheet" .*?>', '', re.I|re.S|re.M), # remove the whole <head> as it contains duplicate CSS with full template (r'<head.*</head>', '', re.I|re.S|re.M) ] for str_pat, repl, flag in patterns: pat = re.compile(str_pat, flag) html = pat.sub(repl, html) if save_html: out_html_file = os.path.join(out_dir, 'input.html') debug('Saving tmp HTML to {}'.format(out_html_file)) with open(out_html_file, 'w') as f: f.write(html) # # CSS inlining # inliner = pynliner.Pynliner() if css: inliner = inliner.from_string(html).with_cssString(css) else: inliner = inliner.from_string(html) html = inliner.run() # # Process images # if save_img: soup = BeautifulSoup(html, 'html.parser') images = soup.find_all('img') img_parent_path = os.path.join(out_dir, img_dir) if images and not os.path.exists(img_parent_path): os.makedirs(img_parent_path) for img_i, img in enumerate(images): src = img['src'] if 'data:' in src.lower(): # data: URI img_type = re.search(r'data:image/([a-z0-9]+)', src, re.I).group(1) img_encoding = re.search(r'data:image/[a-z]+;([a-z0-9]+)', src, re.I).group(1) data = re.search(r'data:image/[a-z0-9]+;[a-z0-9]+,(.*)', src, re.I).group(1) img_file = 'img{}.{}'.format(img_i, img_type) img_path = os.path.join(img_parent_path, img_file) with open(img_path, 'wb') as f: if img_encoding == 'base64': f.write(base64.b64decode(data)) else: raise RuntimeError('Unsupporte image encoding "{}"'.format(img_encoding)) img['src'] = img_url_prefix + '/' + img_file elif 'http:' not in src.lower() and 'https:' not in src.lower(): # Local file img_filename = os.path.basename(src) img_ext = os.path.splitext(img_filename)[1] img_file = 'img{}{}'.format(img_i, img_ext) img_path = os.path.join(img_parent_path, img_file) copyfile(src, img_path) img['src'] = img_url_prefix + '/' + img_file html = str(soup) # # clean up the HTML # patterns = [# Remove inline <style> still in the HTML (in the body) (r'<style.*?</style>', '', re.I|re.S|re.M), ] for str_pat, repl, flag in patterns: pat = re.compile(str_pat, flag) html = pat.sub(repl, html) # # Remove classes and ids # if remove_attrs: soup = BeautifulSoup(html, 'html.parser') elements = soup.find_all() for el in elements: del el['class'] del el['id'] html = str(soup) # # Process latex last (otherwise '&' will be escaped) # if latex == "wp": # Stage 1: replace "$ formula $" into "@beginlatex@ formula @endlatex1@", # and "$$ formula $$" into "@beginlatex@ formula @endlatex2@" pat = re.compile(r'(\${1,2})((?:\\.|[\s\S])*?)\1') while True: m = pat.search(html) if m is None: break formula = m.group(2) html = html[:m.start()] + '@beginlatex@' + formula + \ ('@endlatex2@' if m.group(1)=='$$' else '@endlatex1@') + \ html[m.end():] # Stage 2: replace '@beginlatex@' and '@endlatex@@' html = html.replace('@beginlatex@', '$latex ') \ .replace('@endlatex1@', ' &bg=ffffff&s=2 $') \ .replace('@endlatex2@', ' &bg=ffffff&s=4 $')
def render(self): inliner = pynliner.Pynliner().from_string(self.html) inliner = inliner.with_cssString(self.css) return inliner.run()