def run(self, parent, blocks): raw_block = blocks.pop(0) code = self.RE.search(raw_block).group('formula') node = etree.fromstring( compile_latex(code, False, self.cache_directory)) parent.append(node) return node
def svg_rewrite(svg): svg = re.sub('(xmlns(\:[a-z]*)?="[^"]+")', '', svg) svg = re.sub('xlink:href', 'href', svg) tree = etree.fromstring(svg) for use in tree.iter('use'): ref_id = use.get('href')[1:] ref = tree.find('.//path[@id=\'%s\']' % ref_id) use.attrib.pop('href') use.tag = ref.tag transform = use.attrib.pop('transform', '') x = use.attrib.pop('x', 0) y = use.attrib.pop('y', 0) transform += 'translate(%s, %s)' % (x, y) if len(use.attrib) > 0: raise RaphidocException( 'Unexpected attribute(s) on "use" element found: %s' % [k for k in ref.attrib.keys()]) for key, value in ref.attrib.items(): # TODO: BUG HERE?! if key == 'translate': translate += value elif key != 'id': use.attrib[key] = value use.attrib['transform'] = transform tree.remove(tree.find('defs')) return etree.tostring(tree).decode()
def run(self, parent, blocks): block = blocks.pop(0) text = block # Parse configuration params m = self.RE.search(block) classes = m.group('classes') if m.group('classes') else self.config['classes'] alt = m.group('alt') if m.group('alt') else self.config['alt'] # Read blocks until end marker found while blocks and not self.RE_END.search(block): block = blocks.pop(0) text += '\n' + block else: if not blocks: raise RuntimeError("UML block not closed") # Remove block header and footer text = re.sub(self.RE, "", re.sub(self.RE_END, "", text)) # Generate image from PlantUML script imagesrc = self.generate_uml_image(text).replace('xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"','') imagesrc = re.sub(r'textLength="\d+"','',imagesrc) # Create image tag and append to the document #etree.SubElement(parent, "svg", alt=alt, classes=classes) etree.register_namespace('','http://www.w3.org/2000/svg') etree.register_namespace('xlink','http://www.w3.org/1999/xlink') src = etree.fromstring(imagesrc) #print(etree.tostring(src)) parent.append(src)
def translate(self, catalog, translations, root): children = root.getchildren() for idx, child in enumerate(children): if re.match(TRANSLATE_TAGS_RE, child.tag): translatable = child.text or '' translatable += '\n'.join([ etree.tostring(c) for c in child.getchildren() ]) if translatable: translatable = self.parser.unescape(translatable) catalog.add(translatable) attrs = ' '.join(( '{}="{}"'.format(k, v) for k, v in child.attrib.items() )) translated = translations.gettext(translatable) if isinstance(translated, binary_type): translated = translated.decode('utf-8') content = '<{0} {2}>{1}</{0}>'.format( child.tag, translated, attrs ) try: new_node = etree.fromstring(content.encode('utf-8')) root.remove(child) root.insert(idx, new_node) except etree.ParseError: pass else: self.translate(catalog, translations, child)
def run(self, text): text = '<{tag}>{text}</{tag}>'.format(tag=self._markdown.doc_tag, text=text) root = etree.fromstring(text.encode('utf-8')) self._iterate(root, self._add_color_code) self._iterate(root, self._add_border_table) self._iterate(root, self._to_absolute_url) output = self._markdown.serializer(root) if self._markdown.stripTopLevelTags: try: start = output.index('<%s>' % self._markdown.doc_tag) + len( self._markdown.doc_tag) + 2 end = output.rindex('</%s>' % self._markdown.doc_tag) output = output[start:end].strip() except ValueError: if output.strip().endswith('<%s />' % self._markdown.doc_tag): # We have an empty document output = '' else: # We have a serious problem raise ValueError( 'Markdown failed to strip top-level tags. Document=%r' % output.strip()) return output
def run(self, parent, blocks): block = blocks.pop(0) text = block # Parse configuration params m = self.RE.search(block) classes = m.group('classes') if m.group( 'classes') else self.config['classes'] alt = m.group('alt') if m.group('alt') else self.config['alt'] # Read blocks until end marker found while blocks and not self.RE_END.search(block): block = blocks.pop(0) text += '\n' + block else: if not blocks: raise RuntimeError("UML block not closed") # Remove block header and footer text = re.sub(self.RE, "", re.sub(self.RE_END, "", text)) # Generate image from PlantUML script imagesrc = self.generate_uml_image(text).replace( 'xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"', '') imagesrc = re.sub(r'textLength="\d+"', '', imagesrc) # Create image tag and append to the document #etree.SubElement(parent, "svg", alt=alt, classes=classes) etree.register_namespace('', 'http://www.w3.org/2000/svg') etree.register_namespace('xlink', 'http://www.w3.org/1999/xlink') src = etree.fromstring(imagesrc) #print(etree.tostring(src)) parent.append(src)
def run(self, parent, blocks): sibling = self.lastChild(parent) block = blocks.pop(0) match = self.line_match.search(block) if match: block = block[match.end() + 1:] # removes the first line block, theRest = self.detab(block) contents = block.replace('\n', ' ').replace('\r', '').strip() if match: template = jinja2.Template( '<div class="{{ class }}">' + ' <p class="p-notification__response">' + ' {% if title %}' + ' <span class="p-notification__status">' + ' {{ title }}:' + ' </span>' + ' {% endif %}' + ' <span class="p-notification__line">{{body}}</span>' + ' </p>' + '</div>') notification_type, title = self.get_type_and_title(match) type_classes = { 'warning': 'p-notification--caution', 'positive': 'p-notification--positive', 'negative': 'p-notification--negative', 'information': 'p-notification--information', } markup = template.render({ 'class': type_classes.get(notification_type, 'p-notification'), 'title': title, 'body': contents }) parent.append(etree.fromstring(markup)) else: response_paragraph = sibling.find( "p[@class='p-notification__response']") line_element = etree.fromstring( '<span class="p-notification__line">' + contents + '</span>') response_paragraph.append(line_element)
def _expected(self, Cls, ctx): from markdown.util import etree s = '<p>%s</p>' % Cls.render_template(ctx) # "tree-ify" it which causes some stuff like re-arranging properties # etc el = etree.fromstring(s) s = etree.tostring(el) return s.decode('utf-8')
def handleMatch(self, m): data = m.group('data') data = escape(data) return etree.fromstring( "<div align='center'><input type='hidden' parts='' value='" + data + "' analyses='' class='schematic ctrls' width='640' height='480'/></div>" )
def handleMatch(self, m): language = m.group(2) or 'text' code = m.group(3) try: lexer = get_lexer_by_name(language) except: lexer = get_lexer_by_name('text') formatter = HtmlFormatter(cssclass='source', noclasses=True) source = etree.fromstring(highlight(code, lexer, formatter)) return source
def construal_link(ss1, ss2, href, cls): span1 = '<span' + (' class="supersense-deprecated">' if ss_is_deprecated(ss1) else '>') span1 += ss1.current_revision.metadatarevision.name if ss1 else 'INVALIDSS' span1 += '</span>' span2 = '<span' + (' class="supersense-deprecated">' if ss_is_deprecated(ss2) else '>') span2 += ss2.current_revision.metadatarevision.name if ss2 else 'INVALIDSS' span2 += '</span>' a = f'<a href="{href}" class="{cls}">{span1}↝{span2}</a>' return etree.fromstring(a)
def handleMatch(self, m): expr = m.group(2) for (from_s, to_s) in TRANSLATE_TABLE: expr = expr.replace(from_s, to_s) expr = ZERO_PATTERN.sub(r'\1<sup>0</sup>', expr) expr = CHARGE_PATTERN.sub(r'\1<sup>\2</sup>\3', expr) expr = MULTIPLIER_PATTERN.sub(r'\1<sub>\2</sub>', expr) expr = ADDITIONAL_PATTERN.sub(r'<sup>\1</sup>', expr) return etree.fromstring( '<span class="iced">%s</span>' % expr.encode('utf-8'))
def make_el(cls, object_context, path_spec, identity, args=[]): if not object_context: # need to look up object_context object_context = hubpath_objects.get(path_spec) s = cls.render_template({ 'object': object_context, #'path': path_spec, 'id': identity, 'args': args, }) el = etree.fromstring(s) return el
def handleMatch(self, m): # Prepend '_' if not specified in include include_fname = m.group(3) if include_fname[:1] != "_": include_fname = "_" + include_fname include_path = os.path.join(self._reader_dir, include_fname) el = None with pelican_open(include_path) as text: try: el = etree.fromstring(text) except Exception as e: logger.error("Cannot parse included html %s: %s" % (include_path, e)) return el
def handleMatch(self, m): # Prepend '_' if not specified in include include_fname = m.group(3) if include_fname[:1] != '_': include_fname = '_' + include_fname include_path = os.path.join(self._reader_dir, include_fname) el = None with pelican_open(include_path) as text: try: el = etree.fromstring(text) except Exception as e: logger.error('Cannot parse included html %s: %s' % (include_path, e)) return el
def handleMatch(self, m): global COUNTER d = m.groupdict() path = "games/ugl/" + d.get('name') + ".swf" name = "uglgame_%d_swf" % COUNTER COUNTER += 1 content = """<div class="swf"> <div id="%s"> <b>Need flash to run this :(</b> </div> <script> swfobject.embedSWF("%s", "%s", 480, 480, "11.8"); </script> </div>""" % (name, path, name) return etree.fromstring(content)
def handleMatch(self, m): global COUNTER d = m.groupdict() path = "games/ugl/" + d.get('name') + ".swf" name = "uglgame_%d_swf" % COUNTER COUNTER += 1 content = """ <div id="uglgame"></div> <script> var gameName = "%s"; </script> <script src="//ajax.googleapis.com/ajax/libs/swfobject/2.2/swfobject.js"></script> <script src="//ajax.googleapis.com/ajax/libs/jquery/2.1.0/jquery.min.js"></script> <script src="http://fserb.com/vault/static/ugl.js"></script>""" % (name) return etree.fromstring(content)
def pspecial(self, **kwargs): try: args = argize_kwargs(kwargs) cl = None text = args[0] prep = args[1] p = models.Adposition.normalize_adp( cls=models.Adposition, adp=prep.split('/')[-1], language_name=prep.split('/')[-2]) if p: prep = prep.replace(prep.split('/')[-1], p) if len(args) >= 4: cl = args[3] if len(args) > 2 and not '-' == args[2]: construal = args[2] if '`' in construal: return link(text, '/' + prep + '/' + construal, cl if cl else 'usage') elif "'" in construal or '?' in construal: return link(text.replace('--', '↝'), '/' + prep + '/' + construal, cl if cl else 'usage') else: cl = cl if cl else 'usage' if '--' in construal: # 2 supersenses specified: role, function ss1, ss2 = get_supersenses_for_construal(construal) supersenses = (ss1, ss2) else: ss = get_supersense(construal) if ss is None: # special (backtick) labels are represented as construals with no role or function supersenses = () else: # single supersense specified, so it will be both role and function in the construal supersenses = (ss, ) construal = construal + '--' + construal cl += " usage-deprecated" if any( ss_is_deprecated(ss) for ss in supersenses) else "" text = text.replace('--', '↝') href = '/' + prep + '/' + construal link_elt = etree.fromstring( f'<a class="{cl}" href="{href}"></a>') link_elt.text = text return link_elt return link(text, '/' + prep, cl if cl else 'adposition') except: return self.errormsg()
def run(self, parent: etree.Element, blocks: List[str]) -> None: sibling = self.lastChild(parent) block = blocks.pop(0) m = PLACEHOLDER_RE.search(block) if m: index = int(m.group(1)) block = getattr(self.md, 'bootstrap_stash')[index] try: tree = etree.fromstring(block) except etree.ParseError as e: pre = etree.SubElement(parent, 'pre') pre.text = f'{e}\n{escape(block)}' else: self._parse(parent, tree) elif sibling: self.parser.parseChunk(sibling, block)
def run(self, parent, blocks): match = self.PATTERN.match(blocks.pop(0)) type = match.group('type') start = None end = None if type is not None: n = int(match.group('n')) if type == 'before': end = n elif type == 'after': start = n + 1 else: return scoreboard = etree.SubElement(parent, 'div', {'class': 'scoreboard'}) html = get_template('archives/inline-scoreboard.html').render( {'scoreboard': self.scoreboard[start:end]} ) scoreboard.append(etree.fromstring(html))
def process(self) -> None: meta = self.metadata with open(self.source) as fh: data = fh.read() html = etree.fromstring(data) assert html.tag == 'html' head = html.find('head') if head: title = head.find('title') if title is not None: meta['title'] = title.text for elm in head.iter('meta'): meta[elm.attrib['name']] = elm.attrib['content'] body: Optional[etree.Element] = html.find('body') assert body html.remove(body) buffer = BytesIO() etree.ElementTree(body).write(buffer, encoding='utf-8', xml_declaration=False) self.body = buffer.getvalue().decode('utf-8')
def run(self, text): text = '<{tag}>{text}</{tag}>'.format(tag=self._markdown.doc_tag, text=text) root = etree.fromstring(text.encode('utf-8')) self._iterate(root, self._add_color_code) self._iterate(root, self._add_border_table) self._iterate(root, self._to_absolute_url) output = self._markdown.serializer(root) if self._markdown.stripTopLevelTags: try: start = output.index('<%s>'%self._markdown.doc_tag)+len(self._markdown.doc_tag)+2 end = output.rindex('</%s>'%self._markdown.doc_tag) output = output[start:end].strip() except ValueError: if output.strip().endswith('<%s />'%self._markdown.doc_tag): # We have an empty document output = '' else: # We have a serious problem raise ValueError('Markdown failed to strip top-level tags. Document=%r' % output.strip()) return output
def parse_html( html, tree ): """ Parses html as an - lxml.etree (@tree='lxml') - markdown.util.etree (@tree='md') """ try: if tree == 'lxml': return lxml.etree.fromstring((u'<html><head></head><body>'+html+u'</body></html>').encode('utf-8'), lxml.etree.HTMLParser(encoding='utf-8')) else: return etree.fromstring((u'<html><head></head><body>'+html+u'</body></html>').encode('utf-8')) except Exception as e: ln = e.position[0]-1 col = e.position[1] lns = html.split('\n') logger.warn("XML PARSE ERROR ("+tree+"): Line: "+str(ln)+", Col: "+str(col)) logger.warn("----------------") for i in range(-3,4): if i == 0: logger.warn(str(i)+' ***: '+lns[ln+i][:col]+'->'+lns[ln+i][col]+'<-'+lns[ln+i][col+1:]) else: logger.warn(str(i)+' : '+lns[ln+i]) logger.warn("----------------") raise e
def handleMatch(self, m): # Get the preferred Unicode emoticon, or override emoticon = self.ext.getConfig('aliases')[m.group('emoticon')] # Try to parse it as HTML in case it's overriden try: element = etree.fromstring(emoticon.encode('utf-8')) except etree.ParseError: pass # Apply class name if needed span_class = self.ext.getConfig('span_class') if span_class: try: element except NameError: element = etree.Element('span') element.text = emoticon element.set('class', span_class) # Apply style formatting style = self.ext.getConfig('styles').get(emoticon) if style: element.set('style', style) try: return element except NameError: return emoticon
def mk_doc(self, s): return etree.fromstring( "<div>" + s.strip() + "</div>")
def errormsg(self): return etree.fromstring('<span class="error">' + 'Macro Error: please see example usage' + '</span>')
def _replace_block(self, text): # Parse configuration params m = self.FENCED_BLOCK_RE.search(text) if not m: m = self.BLOCK_RE.search(text) if not m: return text, False # Parse configuration params img_format = m.group('format') if m.group( 'format') else self.config['format'] classes = m.group('classes') if m.group( 'classes') else self.config['classes'] alt = m.group('alt') if m.group('alt') else self.config['alt'] title = m.group('title') if m.group('title') else self.config['title'] width = m.group('width') if m.group('width') else None height = m.group('height') if m.group('height') else None # Extract diagram source end convert it code = m.group('code') diagram = self.generate_uml_image(code, img_format) if img_format == 'txt': # logger.debug(diagram) img = etree.Element('pre') code = etree.SubElement(img, 'code') code.attrib['class'] = 'text' code.text = AtomicString(diagram.decode('UTF-8')) else: # These are images if img_format == 'svg_inline': data = self.ADAPT_SVG_REGEX.sub('<svg \\1\\2>', diagram.decode('UTF-8')) img = etree.fromstring(data) # remove width and height in style attribute img.attrib['style'] = re.sub(r'\b(?:width|height):\d+px;', '', img.attrib['style']) elif img_format == 'svg': # Firefox handles only base64 encoded SVGs data = 'data:image/svg+xml;base64,{0}'.format( base64.b64encode(diagram).decode('ascii')) img = etree.Element('img') img.attrib['src'] = data elif img_format == 'svg_object': # Firefox handles only base64 encoded SVGs data = 'data:image/svg+xml;base64,{0}'.format( base64.b64encode(diagram).decode('ascii')) img = etree.Element('object') img.attrib['data'] = data else: # png format, explicitly set or as a default when format is not recognized data = 'data:image/png;base64,{0}'.format( base64.b64encode(diagram).decode('ascii')) img = etree.Element('img') img.attrib['src'] = data styles = [] if width: styles.append("max-width:" + width) if height: styles.append("max-height:" + height) if styles: style = img.attrib[ 'style'] + ';' if 'style' in img.attrib and img.attrib[ 'style'] != '' else '' img.attrib['style'] = style + ";".join(styles) img.attrib['width'] = '100%' if 'height' in img.attrib: img.attrib.pop('height') img.attrib['class'] = classes img.attrib['alt'] = alt img.attrib['title'] = title return text[:m.start()] + etree.tostring( img).decode() + text[m.end():], True
def handleMatch(self, m): code = m.group(2).strip() node = etree.fromstring(compile_latex(code, True, self.cache_directory)) return node
def _render(self, import_str: str) -> etree.Element: obj = import_from_string(import_str) descriptor = self._make_descriptor(obj) html = self.template.render(obj=descriptor) return etree.fromstring(html)
def handle_match(m): text = m.group(3) return etree.fromstring(katex.convert(text))
def render_from_HTML(self, html ): tree = etree.fromstring(html.encode('utf-8')) return self.render_from_dom(tree)
def _replace_block(self, text): # Parse configuration params m = self.FENCED_BLOCK_RE.search(text) if not m: m = self.BLOCK_RE.search(text) if not m: return text, False # Parse configuration params img_format = m.group('format') if m.group('format') else self.config['format'] classes = m.group('classes') if m.group('classes') else self.config['classes'] alt = m.group('alt') if m.group('alt') else self.config['alt'] title = m.group('title') if m.group('title') else self.config['title'] width = m.group('width') if m.group('width') else None height = m.group('height') if m.group('height') else None source = m.group('source') if m.group('source') else None base_dir = self.config['base_dir'] if self.config['base_dir'] else None # Convert image type in PlantUML image format if img_format == 'png': requested_format = "png" elif img_format in ['svg', 'svg_object', 'svg_inline']: requested_format = "svg" elif img_format == 'txt': requested_format = "txt" else: # logger.error("Bad uml image format '"+imgformat+"', using png") requested_format = "png" if source and base_dir: # Load diagram source from external file with open(os.path.join(base_dir, source), 'r') as f: code = f.read() else: # Extract diagram source from markdown text code = m.group('code') # Extract diagram source end convert it (if not external) diagram = self._render_diagram(code, requested_format) self_closed = True # tags are always self closing if img_format == 'txt': # logger.debug(diagram) img = etree.Element('pre') code = etree.SubElement(img, 'code') code.attrib['class'] = 'text' code.text = AtomicString(diagram.decode('UTF-8')) else: # These are images if img_format == 'svg_inline': data = self.ADAPT_SVG_REGEX.sub('<svg \\1\\2>', diagram.decode('UTF-8')) img = etree.fromstring(data.encode('UTF-8')) # remove width and height in style attribute img.attrib['style'] = re.sub(r'\b(?:width|height):\d+px;', '', img.attrib['style']) elif img_format == 'svg': # Firefox handles only base64 encoded SVGs data = 'data:image/svg+xml;base64,{0}'.format(base64.b64encode(diagram).decode('ascii')) img = etree.Element('img') img.attrib['src'] = data elif img_format == 'svg_object': # Firefox handles only base64 encoded SVGs data = 'data:image/svg+xml;base64,{0}'.format(base64.b64encode(diagram).decode('ascii')) img = etree.Element('object') img.attrib['data'] = data self_closed = False # object tag must be explicitly closed else: # png format, explicitly set or as a default when format is not recognized data = 'data:image/png;base64,{0}'.format(base64.b64encode(diagram).decode('ascii')) img = etree.Element('img') img.attrib['src'] = data styles = [] if width: styles.append("max-width:"+width) if height: styles.append("max-height:"+height) if styles: style = img.attrib['style']+';' if 'style' in img.attrib and img.attrib['style'] != '' else '' img.attrib['style'] = style+";".join(styles) img.attrib['width'] = '100%' if 'height' in img.attrib: img.attrib.pop('height') img.attrib['class'] = classes img.attrib['alt'] = alt img.attrib['title'] = title return text[:m.start()] + etree.tostring(img, short_empty_elements=self_closed).decode() \ + text[m.end():], True
def handleMatch(self, m): data = m.group('data') data = escape(data) return etree.fromstring("<div align='center'><input type='hidden' parts='' value='" + data + "' analyses='' class='schematic ctrls' width='400' height='220'/></div>")
def test_etree(): tree = etree.fromstring('<div>Hello</div>') print(tree)
def htmlToString(self, html): inputHtml = '<div>' + html + '</div>' inputRoot = etree.fromstring(inputHtml) outputRoot = self.proc.run(inputRoot) outputText = b''.join(etree.tostring(child, encoding="utf-8") for child in outputRoot).decode("utf-8") return outputText