def _factory(cls, type_input, type_output, **kw): pygments_name = None # first we check the input type against all mimetypes pygments knows: for name, short_names, patterns, mime_types in pygments.lexers.get_all_lexers( ): for mt in mime_types: if Type(mt).issupertype(type_input): pygments_name = name break if pygments_name: break # if we still don't know the lexer name for pygments, check some formats # that were supported by special parsers in moin 1.x: if pygments_name is None: moin_pygments = [ ('python', 'Python'), ('diff', 'Diff'), ('irssi', 'IRC logs'), ('irc', 'IRC logs'), ('java', 'Java'), ('cplusplus', 'C++'), ('pascal', 'Delphi'), ] for moin_format, pygments_name in moin_pygments: if Type('x-moin/format;name={0}'.format( moin_format)).issupertype(type_input): break else: pygments_name = None logging.debug("pygments_name: %r" % pygments_name) if pygments_name: lexer = pygments.lexers.find_lexer_class(pygments_name) return cls(lexer())
def parser(self, name, args, content): if '/' in name: type = Type(name) else: type = Type(type='x-moin', subtype='format', parameters={'name': name}) logging.debug("parser type: %r" % (type, )) elem = moin_page.part(attrib={moin_page.content_type: type}) if args: elem_arguments = moin_page.arguments() elem.append(elem_arguments) for key, value in args.items(): attrib = {} if key: attrib[moin_page.name] = key elem_arg = moin_page.argument(attrib=attrib, children=(value, )) elem_arguments.append(elem_arg) if content: elem.append(moin_page.body(children=content)) return elem
def eval_object_type(self, mimetype, href): """ Returns the type of an object as a str, one of the following: img, video, audio, object """ if Type('image/').issupertype(mimetype): return "img" elif Type('video/').issupertype(mimetype): return "video" elif Type('audio/').issupertype(mimetype): return "audio" else: # Nothing else worked...try using <object> return "object"
def visit_moinpage_object(self, element): """ Convert:: <object type='image/' xlink:href='uri'/> to:: <inlinemediaobject> <imageobject> <imagedata fileref="uri" /> </imageobject> </inlinemediaobject> Similar for video and audio object. """ href = element.get(xlink.href, None) attrib = {} mimetype = Type( _type=element.get(moin_page.type_, CONTENTTYPE_NONEXISTENT)) if href: attrib[docbook.fileref] = href if Type('image/').issupertype(mimetype): object_data = self.new(docbook.imagedata, attrib=attrib, children=[]) object_element = self.new(docbook.imageobject, attrib={}, children=[object_data]) elif Type('video/').issupertype(mimetype): object_data = self.new(docbook.videodata, attrib=attrib, children=[]) object_element = self.new(docbook.videoobject, attrib={}, children=[object_data]) elif Type('audio/').issupertype(mimetype): object_data = self.new(docbook.audiodata, attrib=attrib, children=[]) object_element = self.new(docbook.audioobject, attrib={}, children=[object_data]) else: return else: return return self.new(docbook.inlinemediaobject, attrib={}, children=[object_element])
def __init__(self, lexer=None, contenttype=None): """ Create a Pygments Converter. :param lexer: pygments lexer instance :param contenttype: contenttype to get a lexer for """ if lexer is None and contenttype is not None: ct = Type(contenttype) # pygments can't process parameters (like e.g. ...;charset=utf-8): mimetype = '{0}/{1}'.format(ct.type, ct.subtype) # TODO: fix pygments and remove this workaround for missing mimetypes; see issue #16 alias_mimetypes = { 'text/x.moin.wiki': 'text/x-trac-wiki', 'text/x.moin.creole': 'text/x-trac-wiki', 'application/docbook+xml': 'application/xml' } mimetype = alias_mimetypes[ mimetype] if mimetype in alias_mimetypes else mimetype try: lexer = pygments.lexers.get_lexer_for_mimetype(mimetype) except pygments.util.ClassNotFound: lexer = pygments.lexers.get_lexer_for_mimetype( 'text/plain') self.lexer = lexer
def _render_data_highlight(self): from moin.converter import default_registry as reg data_text = self.data_storage_to_internal(self.data) # TODO: use registry as soon as it is in there from moin.converter.pygments_in import Converter as PygmentsConverter pygments_conv = PygmentsConverter(contenttype=self.contenttype) doc = pygments_conv(data_text) # TODO: Real output format html_conv = reg.get(type_moin_document, Type('application/x-xhtml-moin-page')) doc = html_conv(doc) return conv_serialize(doc, {html.namespace: ''})
def handle_macro(self, elem, page): logging.debug("handle_macro elem: %r" % elem) type = elem.get(moin_page.content_type) alt = elem.get(moin_page.alt) if not type: return type = Type(type) if not (type.type == 'x-moin' and type.subtype == 'macro'): logging.debug("not a macro, skipping: %r" % (type, )) return name = type.parameters['name'] context_block = elem.tag == moin_page.part args = elem[0] if len(elem) else None elem_body = context_block and moin_page.body( ) or moin_page.inline_body() elem_error = moin_page.error() try: cls = plugins.importPlugin(app.cfg, 'macro', name, function='Macro') macro = cls() ret = macro((), args, page, alt, context_block) elem_body.append(ret) except PluginMissingError: elem_error.append('<<%s>> %s' % (name, _('Error: invalid macro name.'))) except Exception as e: # we do not want that a faulty macro aborts rendering of the page # and makes the wiki UI unusable (by emitting a Server Error), # thus, in case of exceptions, we just log the problem and return # some standard text. logging.exception("Macro {0} raised an exception:".format(name)) elem_error.append( _( '<<%(macro_name)s: execution failed [%(error_msg)s] (see also the log)>>', macro_name=name, error_msg=unicode(e), )) if len(elem_body): elem.append(elem_body) if len(elem_error): elem.append(elem_error)
def __call__(self, value, start_pos=0, positions=False, **kwargs): """ Tokenizer behaviour: Input: u"text/x.moin.wiki;charset=utf-8" Output: u"text/x.moin.wiki;charset=utf-8", u"text", u"x.moin.wiki", u"charset=utf-8" Input: u"application/pdf" Output: u"application/pdf", u"application", u"pdf" :param value: String for tokenization :param start_pos: The position number of the first token. For example, if you set start_pos=2, the tokens will be numbered 2,3,4,... instead of 0,1,2,... :param positions: Whether to record token positions in the token. """ assert isinstance(value, unicode), "{0!r} is not unicode".format(value) if u'/' not in value: # Add '/' if user forgot do this value += u'/' pos = start_pos tk = Token() tp = Type(value) # we need to yield the complete contenttype in one piece, # so we can find it with Term(CONTENTTYPE, contenttype): if tp.type is not None and tp.subtype is not None: # note: we do not use "value" directly, so Type.__unicode__ can normalize it: tk.text = unicode(tp) if positions: tk.pos = pos pos += 1 yield tk # now yield the pieces: tk.text = tp.type if positions: tk.pos = pos pos += 1 yield tk if tp.subtype is not None: tk.text = tp.subtype if positions: tk.pos = pos pos += 1 yield tk for key, value in tp.parameters.items(): tk.text = u"{0}={1}".format(key, value) if positions: tk.pos = pos pos += 1 yield tk
def _convert(self, doc): from emeraldtree import ElementTree as ET from moin.converter import default_registry as reg doc = self._expand_document(doc) # We convert the internal representation of the document # into a DocBook document conv = reg.get(type_moin_document, Type('application/docbook+xml')) doc = conv(doc) # We determine the different namespaces of the output form output_namespaces = { docbook.namespace: '', xlink.namespace: 'xlink', } # We convert the result into a StringIO object # With the appropriate namespace # TODO: Some other operation should probably be done here too # like adding a doctype file_to_send = StringIO() tree = ET.ElementTree(doc) tree.write(file_to_send, namespaces=output_namespaces) # We determine the different parameters for the reply mt = MimeType(mimestr='application/docbook+xml;charset=utf-8') content_type = mt.content_type() as_attachment = mt.as_attachment(app.cfg) # After creation of the StringIO, we are at the end of the file # so position is the size the file. # and then we should move it back at the beginning of the file content_length = file_to_send.tell() file_to_send.seek(0) # Important: empty filename keeps flask from trying to autodetect filename, # as this would not work for us, because our file's are not necessarily fs files. return send_file(file=file_to_send, mimetype=content_type, as_attachment=as_attachment, attachment_filename=None, cache_timeout=10, # wiki data can change rapidly add_etags=False, etag=None, conditional=True)
def _render_data(self): try: from moin.converter import default_registry as reg # TODO: Real output format doc = self.internal_representation() doc = self._expand_document(doc) flaskg.clock.start('conv_dom_html') html_conv = reg.get(type_moin_document, Type('application/x-xhtml-moin-page')) doc = html_conv(doc) flaskg.clock.stop('conv_dom_html') rendered_data = conv_serialize(doc, {html.namespace: ''}) except Exception: # we really want to make sure that invalid data or a malfunctioning # converter does not crash the item view (otherwise a user might # not be able to fix it from the UI). error_id = uuid.uuid4() logging.exception("An exception happened in _render_data (error_id = %s ):" % error_id) rendered_data = render_template('crash.html', server_time=time.strftime("%Y-%m-%d %H:%M:%S %Z"), url=request.url, error_id=error_id) return rendered_data
def internal_representation(self, attributes=None): """ Return the internal representation of a document using a DOM Tree """ hash_name = HASH_ALGORITHM hash_hexdigest = self.rev.meta.get(hash_name) if hash_hexdigest: cid = cache_key(usage="internal_representation", hash_name=hash_name, hash_hexdigest=hash_hexdigest, attrs=repr(attributes)) doc = app.cache.get(cid) else: # likely a non-existing item doc = cid = None if doc is None: # We will see if we can perform the conversion: # FROM_mimetype --> DOM # if so we perform the transformation, otherwise we don't from moin.converter import default_registry as reg input_conv = reg.get(Type(self.contenttype), type_moin_document) if not input_conv: raise TypeError("We cannot handle the conversion from {0} to the DOM tree".format(self.contenttype)) smiley_conv = reg.get(type_moin_document, type_moin_document, icon='smiley') # We can process the conversion links = Iri(scheme='wiki', authority='', path='/' + self.name) doc = input_conv(self.rev, self.contenttype, arguments=attributes) # XXX is the following assuming that the top element of the doc tree # is a moin_page.page element? if yes, this is the wrong place to do that # as not every doc will have that element (e.g. for images, we just get # moin_page.object, for a tar item, we get a moin_page.table): doc.set(moin_page.page_href, unicode(links)) if self.contenttype.startswith((u'text/x.moin.wiki', u'text/x-mediawiki', u'text/x.moin.creole', )): doc = smiley_conv(doc) if cid: app.cache.set(cid, doc) return doc
def contenttype_validator(element, state): """ a supported content type """ if element.raw is Unset: ct = state.get('contenttype_current') if ct is None: ct = state.get('contenttype_guessed') if ct is None: ct = CONTENTTYPE_DEFAULT element.set(ct) v = element.value if not isinstance(v, unicode): return False ct = Type(v) if ct.type not in [ 'text', 'image', 'audio', 'video', 'application', ]: return False if not ct.subtype: return False if ct.type == 'text': charset = ct.parameters.get('charset') if charset is None: # we must have the charset, otherwise decoding is impossible return False if charset.lower() not in [ 'ascii', 'utf-8', ]: # currently we do not recode return False return True
def decode_data(data, contenttype=None): """ read and decode data, return unicode text supported types for data: - rev object - str - unicode file-like objects and str need to be either utf-8 (or ascii, which is a subset of utf-8) encoded or contenttype (including a charset parameter) needs to be given. """ if not isinstance(data, (str, unicode)): data = data.data.read() if isinstance(data, str): coding = 'utf-8' if contenttype is not None: ct = Type(contenttype) coding = ct.parameters.get('charset', coding) data = data.decode(coding) if not isinstance(data, unicode): raise TypeError("data must be rev or str (requires contenttype with charset) or unicode, " "but we got {0!r}".format(data)) return data
def validate_data(meta, data): """ validate the data contents, if possible :param meta: metadata dict :param data: data file :return: validation ok [bool] """ ct = Type(meta[keys.CONTENTTYPE]) if ct.type != 'text': return True # we can't validate non-text mimetypes, so assume it is ok coding = ct.parameters['charset'].lower() if coding not in [ 'ascii', 'utf-8', ]: return True # checking 8bit encodings this way is pointless, decoding never raises text_bytes = data.read() data.seek(0) # rewind, so it can be read again try: text_bytes.decode(coding) return True except UnicodeDecodeError: return False
self.indent_repl(iter_content, stack, line, **data) else: self.indent_repl(iter_content, stack, line, '', line) return body def parse_inline(self, text, stack, inline_re): """Recognize inline elements within the given text""" lines = text.split('\n') text = [] for line in lines: text.append(self.preprocessor(line)) text = '\n'.join(text) pos = 0 for match in inline_re.finditer(text): # Handle leading text stack.top_append_ifnotempty(text[pos:match.start()]) pos = match.end() self._apply(match, 'inline', stack) # Handle trailing text stack.top_append_ifnotempty(text[pos:]) default_registry.register(Converter.factory, Type('x-moin/format;name=mediawiki'), type_moin_document) default_registry.register(Converter.factory, Type('text/x-mediawiki'), type_moin_document)
def register(cls): content_registry.register(RegistryContent.Entry(cls._factory, Type(cls.contenttype), cls.default_contenttype_params, cls.display_name, cls.ingroup_order, RegistryContent.PRIORITY_MIDDLE), cls.group) return cls
separator.append(':----:') elif th.attrib.get(moin_page.class_, None) == 'left': separator.append(':-----') elif th.attrib.get(moin_page.class_, None) == 'right': separator.append('-----:') else: separator.append('------') separator = Markdown.table_marker.join(separator) ret = self.open_children(elem) ret = ret + u'{0}{1}{0}\n'.format(Markdown.table_marker, separator) return ret def open_moinpage_table_body(self, elem): ret = self.open_children(elem) return ret def open_moinpage_table_row(self, elem): ret = self.open_children(elem, join_char=Markdown.table_marker) return u'{0}{1}{0}\n'.format(Markdown.table_marker, ret) def open_moinpage_table_of_content(self, elem): return u"\n[TOC]\n" def open_xinclude(self, elem): """Processing of transclusions is similar to objects.""" return self.open_moinpage_object(elem) default_registry.register(Converter._factory, type_moin_document, Type("text/x-markdown")) default_registry.register(Converter._factory, type_moin_document, Type('x-moin/format;name=markdown'))
def parse_inline(self, text, stack, inline_re=inline_re): """Recognize inline elements within the given text""" pos = 0 for match in inline_re.finditer(text): # Handle leading text stack.top_append_ifnotempty(text[pos:match.start()]) pos = match.end() self._apply(match, 'inline', stack) # Handle trailing text stack.top_append_ifnotempty(text[pos:]) def macro_text(self, text): """ Return an ET tree branch representing the markup present in the input text. Used for FootNotes, etc. """ p = moin_page.p() iter_content = _Iter(text) stack = _Stack(p, iter_content=iter_content) self.parse_inline(text, stack, self.inline_re) return p default_registry.register(Converter.factory, type_moin_creole, type_moin_document) default_registry.register(Converter.factory, Type('x-moin/format;name=creole'), type_moin_document)
\s | [,.:;!?()] (\s | $) ) ) ) """ % dict(uri_schemes='|'.join(URI_SCHEMES)) def inline_url_repl(self, stack, url, url_target): url = Iri(url_target) attrib = {xlink.href: url} element = moin_page.a(attrib=attrib, children=[url_target]) stack.top_append(element) inline = Converter.inline + ( inline_freelink, inline_url, ) inline_re = re.compile('|'.join(inline), re.X | re.U) from . import default_registry from moin.util.mime import Type, type_moin_document, type_moin_wiki default_registry.register(ConverterFormat19.factory, Type('text/x.moin.wiki;format=1.9'), type_moin_document) default_registry.register(ConverterFormat19.factory, Type('x-moin/format;name=wiki;format=1.9'), type_moin_document)
# Run the tree-processors for treeprocessor in self.markdown.treeprocessors.values(): new_md_root = treeprocessor.run(md_root) if new_md_root is not None: md_root = new_md_root # }}} end stolen from Markdown.convert add_lineno = bool(flaskg and flaskg.add_lineno_attr) # run markdown post processors and convert from ElementTree to an EmeraldTree object converted = self.do_children(md_root, add_lineno=add_lineno) # convert html embedded in text strings to EmeraldTree nodes self.convert_embedded_markup(converted) # convert P-tags containing block elements to DIV-tags self.convert_invalid_p_nodes(converted) body = moin_page.body(children=converted) root = moin_page.page(children=[body]) return root default_registry.register(Converter._factory, Type("text/x-markdown"), type_moin_document) default_registry.register(Converter._factory, Type('x-moin/format;name=markdown'), type_moin_document)
from moin.util.tree import moin_page from ._util import decode_data, normalize_split_text class Converter(object): """ Parse the raw text and create a document object that can be converted into output using Emitter. """ @classmethod def _factory(cls, type_input, type_output, **kw): return cls() def __call__(self, data, contenttype=None, arguments=None): text = decode_data(data, contenttype) content = normalize_split_text(text) blockcode = moin_page.blockcode() for line in content: if len(blockcode): blockcode.append('\n') blockcode.append(line.expandtabs()) body = moin_page.body(children=(blockcode, )) return moin_page.page(children=(body, )) from . import default_registry from moin.util.mime import Type, type_moin_document # Assign a lower priority (= bigger number) so that it is tried after pygments_in default_registry.register(Converter._factory, Type(type='text'), type_moin_document, default_registry.PRIORITY_MIDDLE + 1)
""" tag_to_return = self.simple_tags[element.tag.name] return self.new_copy(tag_to_return, element, depth, attrib={}) def start_dom_tree(self, element, depth): """ Return the root element of the DOM tree, with all the children. We also add a <table-of-content> element if needed. """ attrib = {} if self.standard_attribute: attrib.update(self.standard_attribute) self.standard_attribute = {} children = [] children.append(self.visit(element, depth)) # We show the table of content only if it is not empty if self.is_section: children.insert( 0, self.new(moin_page('table-of-content'), attrib={}, children={})) body = self.new(moin_page.body, attrib={}, children=children) return self.new(moin_page.page, attrib=attrib, children=[body]) from . import default_registry from moin.util.mime import Type, type_moin_document default_registry.register(Converter._factory, Type('application/docbook+xml'), type_moin_document)
self.__text = [] def write_text(self, text): self.__text.append(text) def read_result(self): return u''.join(self.__text) class PDFIndexingConverter(object): @classmethod def _factory(cls, input, output, **kw): return cls() def __call__(self, rev, contenttype=None, arguments=None): rsrcmgr = PDFResourceManager() device = UnicodeConverter(rsrcmgr, laparams=LAPARAMS) try: interpreter = PDFPageInterpreter(rsrcmgr, device) for page in PDFPage.get_pages(rev): interpreter.process_page(page) return device.read_result() finally: device.close() from . import default_registry from moin.util.mime import Type, type_text_plain default_registry.register(PDFIndexingConverter._factory, Type('application/pdf'), type_text_plain)
Support listing zip files. """ def list_contents(self, fileobj): try: rows = [] zf = zipfile.ZipFile(fileobj, mode='r') for zinfo in zf.filelist: if not (zinfo.file_size == 0 and zinfo.filename.endswith('/')): # display only normal files, not directories rows.append(( zinfo.file_size, datetime(*zinfo.date_time), # y,m,d,h,m,s zinfo.filename, )) return rows except (RuntimeError, zipfile.BadZipfile) as err: # RuntimeError is raised by zipfile stdlib module in case of # problems (like inconsistent slash and backslash usage in the # archive or a defective zip file). raise ArchiveException(str(err)) from . import default_registry from moin.util.mime import Type, type_moin_document default_registry.register(TarConverter._factory, Type('application/x-tar'), type_moin_document) default_registry.register(TarConverter._factory, Type('application/x-gtar'), type_moin_document) default_registry.register(ZipConverter._factory, Type('application/zip'), type_moin_document)
def __call__(self, content_type, *args, **kw): if self.content_type.issupertype(Type(content_type)): return self.factory(content_type, *args, **kw)
self.table_rowstyle = u'' if table_cellclass: attrib.append(u'class="{0}"'.format(table_cellclass)) if table_cellstyle: attrib.append(u'style="{0}"'.format(table_cellstyle)) if number_rows_spanned: attrib.append(u'rowspan="{0}"'.format(number_rows_spanned)) if number_columns_spanned > 1: attrib.append(u'colspan="{0}"'.format(number_columns_spanned)) attrib = u' '.join(attrib) if attrib: ret += u'<{0}>'.format(attrib) childrens_output = self.open_children(elem) return ret + childrens_output def open_moinpage_table_of_content(self, elem): return u"<<TableOfContents({0})>>\n".format( elem.get(moin_page.outline_level, u"")) def open_xinclude(self, elem): """Processing of transclusions is similar to objects.""" return self.open_moinpage_object(elem) default_registry.register(Converter.factory, type_moin_document, type_moin_wiki) default_registry.register(Converter.factory, type_moin_document, Type('x-moin/format;name=wiki'))
from emeraldtree import ElementTree as ET from moin.util.iri import Iri from moin.util.tree import moin_page, xlink class Converter(object): """ Convert a unsupported item to DOM Tree. """ @classmethod def _factory(cls, input, output, **kw): return cls() def __call__(self, rev, contenttype=None, arguments=None): item_name = rev.item.name or rev.meta['name'][0] attrib = { xlink.href: Iri(scheme='wiki', authority='', path='/' + item_name, query='do=get&rev={0}'.format(rev.revid)), } a = moin_page.a(attrib=attrib, children=[u"Download {0}.".format(item_name)]) body = moin_page.body(children=(a, )) return moin_page.page(children=(body, )) from . import default_registry from moin.util.mime import Type, type_moin_document default_registry.register(Converter._factory, Type('application/octet-stream'), type_moin_document) default_registry.register(Converter._factory, Type(type=None, subtype=None), type_moin_document)
from emeraldtree import ElementTree as ET from moin.constants.contenttypes import CONTENTTYPE_NONEXISTENT from moin.i18n import _, L_, N_ from moin.util.iri import Iri from moin.util.tree import moin_page, xlink from moin.util.mime import Type, type_moin_document from . import default_registry class Converter(object): """ Convert a non-existing item to DOM Tree. """ @classmethod def _factory(cls, input, output, **kw): return cls() def __call__(self, rev, contenttype=None, arguments=None): item_name = rev.item.fqname.value attrib = { xlink.href: Iri(scheme='wiki', authority='', path='/' + item_name, query='do=modify'), } a = moin_page.a(attrib=attrib, children=[_("%(item_name)s does not exist. Create it?", item_name=item_name)]) body = moin_page.body(children=(a, )) return moin_page.page(children=(body, )) default_registry.register(Converter._factory, Type(CONTENTTYPE_NONEXISTENT), type_moin_document)
class Converter(object): """ Convert a non-existing item to DOM Tree. """ @classmethod def _factory(cls, input, output, **kw): return cls() def __call__(self, rev, contenttype=None, arguments=None): item_name = rev.item.fqname.value attrib = { xlink.href: Iri(scheme='wiki', authority='', path='/' + item_name, query='do=modify'), } a = moin_page.a(attrib=attrib, children=[ _("%(item_name)s does not exist. Create it?", item_name=item_name) ]) body = moin_page.body(children=(a, )) return moin_page.page(children=(body, )) from . import default_registry from moin.util.mime import Type, type_moin_document default_registry.register(Converter._factory, Type(CONTENTTYPE_NONEXISTENT), type_moin_document)
logging = log.getLogger(__name__) RX_STRIPXML = re.compile(u"<[^>]*?>", re.U | re.DOTALL | re.MULTILINE) def strip_xml(text): text = RX_STRIPXML.sub(u" ", text) text = u' '.join(text.split()) return text class XMLIndexingConverter(object): """ We try to generically extract contents from XML documents by just throwing away all XML tags. This is for indexing, so this might be good enough. """ @classmethod def _factory(cls, input, output, **kw): return cls() def __call__(self, rev, contenttype=None, arguments=None): text = decode_data(rev, contenttype) text = strip_xml(text) return text from . import default_registry from moin.util.mime import Type, type_text_plain default_registry.register(XMLIndexingConverter._factory, Type('text/xml'), type_text_plain)