예제 #1
0
def call_macro(wcontext, name, elements):
    """
	Given a macro arglist (parsed from split_macro_args, for example), call the
	macro and return the result.
	
	Returns list of elements that the macro returns. On error, will raise WikError.
	"""
    plugins = load_plugins(wcontext.plugin_dirs)

    func = None
    handled = False

    # if macro_handler is defined, try it first so caller can override all
    # internal macros
    if wcontext.macro_handler is not None:
        try:
            handled, result = wcontext.macro_handler(name, wcontext, *elements)
        except:
            raise WikError(
                "ERROR CALLING MACRO <<%s>> with args:\n%s\n\n" %
                (name, xmltrace(elements)), '', exception_to_text(), '')

    # fall-through to set 'func' either way -- will check 'handled' at end

    # if not in restricted_mode, check for functions defined in embedded wikitext
    if func is None and not wcontext.restricted_mode:
        func = getattr(wcontext.mod_embedded, name, None)
        if func is not None and not callable(func):
            func = None  # disregard non-functions

    # next, try unsafe macros if allowed to
    if func is None and not wcontext.restricted_mode:
        func = plugins.unsafe.get(name, None)

    # finally, try safe macros
    if func is None:
        func = plugins.safe.get(name, None)

    if func is None and not handled:
        raise WikError("Macro '%s' not defined, or not allowed to call." %
                       name)

    # call with positional args and return value
    if not handled:
        try:
            result = func(wcontext, *elements)
        except:
            raise WikError(
                "ERROR CALLING MACRO <<%s>> with args:\n%s\n\n" %
                (name, xmltrace(elements)), '', exception_to_text(), '')

    return process_macro_result(wcontext, name, result)
예제 #2
0
def codebox(context, title, codetext):
    """
	|!Usage|{{{<<codebox title arg [arg ...]>>}}}|
	|!Description|Like a regular code box, except with a title.|
	|!Example|{{{<<codebox "A Title" '''
for i in range(10):
	print i'''>>}}}|
	|!Result|<<codebox "A Title" '''
for i in range(10):
	print i'''>>|
	"""
    if codetext.tag != 'TextMacroArg':
        raise WikError("'codetext' must be a single text argument - got %s" %
                       codetext.tag)

    container = DIV('wikkly-codebox-container')
    titlediv = DIV('wikkly-codebox-title')
    for node in eval_wiki_macro_args(context, [title]):
        titlediv.append(node)

    container.append(titlediv)

    body = DIV('wikkly-codebox-body')

    # place in TextCode element so it will get the proper code-style escaping
    #textnode = etree.Element('TextCode')
    #textnode.text = codetext.text
    textnode = Text(codetext.text, 'TextCode')

    body.append(textnode)

    container.append(body)

    return [container]
예제 #3
0
def insert_pycode(wcontext, txt):
    "Compile the given Python code, inserting any new functions into the callable list."
    try:
        # compile and exec code into module namespace -- this inserts any functions
        # and imported modules into mod_embedded namespace
        exec(txt) in wcontext.mod_embedded.__dict__
    except:
        raise WikError("Error compiling <?py ... ?> macro", txt[:80],
                       exception_to_text(), '')

    # add some default globals
    eglobals = {
        'Element': Element,
        'SubElement': SubElement,
        'Text': Text,
        'WikError': WikError,
        'FS_CWD': wcontext.var_get_text('$FS_CWD'),
    }

    # add names from __all__ in plugins as well
    plugins = load_plugins(wcontext.plugin_dirs)
    eglobals.update(plugins.embed)

    for k, v in eglobals.items():
        wcontext.mod_embedded.__dict__[k] = v
예제 #4
0
    def beginHighlight(self, style=None):
        # supported highlight styles:
        #	1. @@text@@ - (style=None) - Apply standard highlight
        #   2. @@prop1: style one; prop2: style two; ... ; items@@ - Apply given style to items.
        #   3. @@color(color): ...@@ - Apply color
        #   4. @@bgcolor(color): .. @@ - Apply background color

        self.pushnew('Highlight')
        if style is not None:
            # try parsing case #2 (sync w/wikklytext.lexer:t_HIGHLIGHT_CSS)
            #m2 = re.match('@@((\s*[a-zA-Z]\S+\s*:\s*\S+\s*;)+)', style)
            m2 = re.match('@@((\s*[a-zA-Z][a-zA-Z0-9-]*\s*:.+?;)+)', style)

            # case #3
            m3 = re.match(r'@@color\((.+?)\):', style)

            # case #4
            m4 = re.match('@@bgcolor\((.+?)\):', style)

            # set style info into Highlight node; place text in Text under it

            if m2:
                # @@prop1: style1; prop2: style2; ... ;
                style = m2.group(1)
            elif m3:
                # @@color(..): ... @@
                style = "color: %s;" % m3.group(1)
            elif m4:
                # @@bgcolor(..): ... @@
                style = "background: %s;" % m4.group(1)
            else:
                raise WikError("Unknown style: %s" % repr(style))

            if style is not None:
                self.curnode.set('style', style)
예제 #5
0
    def endNoWiki(self):
        # like RawHTML, remove <NoWiki>, leaving TextNoWiki
        self.popto('NoWiki')

        # sanity
        if len(self.curnode) != 1 or self.curnode[0].tag != 'TextNoWiki':
            raise WikError("Internal error - bad nodes under <NoWiki>")

        rawnode = self.curnode
        txt = self.curnode[0]

        self.popnode('NoWiki')
        self.curnode.remove(rawnode)
        self.curnode.append(txt)
예제 #6
0
def process_macro_result(wcontext, name, result):
    from wikklytext.eval import eval_wiki_text

    if iselement(result) and result.tag == 'ElementList':
        # already list-like, don't need to wrap
        pass

    # turn single values into list and handle below
    elif iselement(result) or isinstance(result, (unicode, str)):
        result = [result]

    elif isinstance(result, (list, tuple)):
        pass

    else:
        raise WikError("Calling <<%s>>\nMacros must return Elements or Unicode\nGot: %s, '%s'" % \
          (name, type(result), repr(result)))

    # now result can be handled as list/tuple - handle each element
    outnode = ElementList()
    for val in result:
        if iselement(val):
            outnode.append(val)  # leave Elements alone
        elif isinstance(val, unicode):  # parse Unicode -> Elements
            for e in eval_wiki_text(wcontext, val):
                outnode.append(e)
        elif isinstance(val, str):
            # note implicit unicode() conversion ... this is done for
            # convenience but macros should really return Unicode for
            # most robust code
            for e in eval_wiki_text(wcontext, unicode(val)):
                outnode.append(e)
        else:
            raise WikError("Calling <<%s>>\nMacros must return Elements or Unicode\nGot: %s, '%s'" % \
              (name, type(val), repr(val)))

    return outnode
예제 #7
0
    def endRawHTML(self):
        # should have: <RawHTML><TextHTML> ... </TextHTML></RawHTML>
        # remove the 'RawHTML' tag, promoting the TextHTML. (<RawHTML> was
        # only needed in order to create the correct type of <Text> node.
        # Can discard now since <TextHTML> captures everything.)
        self.popto('RawHTML')

        # sanity
        if len(self.curnode) != 1 or self.curnode[0].tag != 'TextHTML':
            raise WikError("Internal error - bad nodes under <RawHTML>")

        rawnode = self.curnode
        txt = self.curnode[0]

        self.popnode('RawHTML')
        self.curnode.remove(rawnode)
        self.curnode.append(txt)
예제 #8
0
def make_html(node, wcontext, parent_map, prevsib_map, add_classes):
	from time import time

	# check time limit at each node
	if time() > wcontext.stoptime:
		return 'Max runtime exceeded!!<p>'
	
	css = 'wikkly'
	
	simpletags = {
		'Bold': ('b',None),
		'Italic': ('i',None),
		'Strikethrough': ('span','%s-strike' % css),
		'Underline': ('span','%s-u' % css),
		'Superscript': ('sup',None),
		'Subscript': ('sub',None),
		'NumberedList': ('ol', '%s-ol' % css),
		'UnnumberedList': ('ul', '%s-ul' % css),
		}
		
	html = ''
		
	if node.tag in simpletags.keys():
		tag, cssclass = simpletags[node.tag]
		#if cssclass is None:
		#	html += '<%s>' % tag
		#else:
		#	html += '<%s class="%s">' % (tag,cssclass)
		html += '<%s class="%s">' % (tag, strclass(cssclass, add_classes))
		
		close = '</%s>' % tag
			
	elif node.tag == 'NumberedListItem':
		c = "%s-ol-li%d" % (css, int(node.get('level')))
		html += '<li class="%s">' % strclass(c, add_classes)
		close = '</li>'
		
	elif node.tag == 'UnnumberedListItem':
		c = "%s-ul-li%d" % (css, int(node.get('level')))
		html += '<li class="%s">' % strclass(c, add_classes)
		close = '</li>'

	elif node.tag == 'Heading':
		c = "%s-h%d" % (css, int(node.get('level')))
		html += '<h%d class="%s">' % (int(node.get('level')), strclass(c, add_classes))
		close = '</h%d>' % int(node.get('level'))
		
	elif node.tag == 'BlockIndent':
		c = "%s-block-indent" % css
		html += '<div class="%s">' % strclass(c, add_classes)
		close = '</div>'
	
	elif node.tag == 'LineIndent':
		c = "%s-line-indent" % css
		html += '<div class="%s">' % strclass(c, add_classes)
		close = '</div>'
	
	elif node.tag == 'CodeBlock':
		c = "%s-code-block" % css
		html += '<div class="%s">' % strclass(c, add_classes)
		close = '</div>'
	
	elif node.tag == 'CodeInline':
		c = "%s-code-inline" % css
		html += '<span class="%s">' % strclass(c, add_classes)
		close = '</span>'

	elif node.tag == 'ErrorsList':
		close = ''
		
	elif node.tag == 'Error':
		# I don't apply add_classes here since these are internally generated nodes
		html += '<div class="%s-error-container">' % css
		close = '</div>'
		
	elif node.tag == 'ErrorMessage':
		# I don't apply add_classes here since these are internally generated nodes
		html += '<div class="%s-error-head">Error Message</div>' % css
		html += '<div class="wikkly-error-body">' 
		close = '</div>'
		
	elif node.tag == 'ErrorLookingAt':
		# I don't apply add_classes here since these are internally generated nodes
		html += '<div class="wikkly-error-head">Looking at:</div>'	
		html += '<div class="wikkly-error-body">' 
		close = '</div>'
		
	elif node.tag == 'ErrorTrace':
		# I don't apply add_classes here since these are internally generated nodes
		html += '<div class="wikkly-error-head">Traceback:</div>'	
		html += '<div class="wikkly-error-body">' 
		close = '</div>'
	
	elif node.tag in ['Text', 'TextNoWiki']:
		# These are not allowed to have inner tags. Enforce this by
		# escaping & returning text immediately, not checking for subnodes.
		return escapeText(node.text or '')
		
	elif node.tag == 'TextCode':
		return escapeTextCode(node.text or '')
		
	elif node.tag == 'TextHTML':
		if wcontext.restricted_mode:
			return '' # <html> not allowed in Safe mode
			
		text = node.text or ''
		# no escaping, leave as raw HTML
		return text
			
	# these are just for structure in XML stream, can ignore here
	elif node.tag in ['WikklyContent', 'Content', 'ElementList']:
		close = ''

	elif node.tag == 'BlankLines':
		# no inner tags allowed, can return immediately
		#
		# NOTE: HTML headers seem to include some extra builtin padding (even
		# more that the margin), so after <Heading>, decrement the BlankLines count
		if prevsib_map[node].tag == 'Heading':
			return '<br/>' * (int(node.get('count')))
		else:
			return '<br/>' * (int(node.get('count'))+1)
		
	elif node.tag == 'Highlight':
		if node_contains_block_elements(node):
			tagopen = 'div'
			close = '</div>'
		else:
			tagopen = 'span'
			close = '</span>'
			
		# use style if given, else use default highlight style
		if 'style' in node.keys():
			if wcontext.restricted_mode:
				# remove inline styling in safe mode (to remove XSS issues)
				html += '<%s>' % tagopen
			else:
				html += '<%s style="%s">' % (tagopen, node.get('style'))
		else:
			c = "%s-highlight" % css
			html += '<%s class="%s">' % (tagopen, strclass(c, add_classes))
			
	elif node.tag == 'Table':
		c = "%s-table" % css
		html += '<table class="%s">' % strclass(c, add_classes)
		close = '</table>'
		
	elif node.tag == 'TableCaption':
		c = "%s-caption" % css
		html += '<caption class="%s">' % strclass(c, add_classes)
		close = '</caption>'
		
	elif node.tag == 'TableRow':
		c = "%s-tr" % css
		html += '<tr class="%s">' % strclass(c, add_classes)
		close = '</tr>'

	elif node.tag == 'TableCell':
		if int(node.get('skip','0')):
			return ''

		if 'type' not in node.keys():
			raise WikError("Bad TableCell", xmltrace(parent_map[node]))
			
		if node.get('type') == 'data':
			tag = 'td'
			close = '</td>'
		else:
			tag = 'th'
			close = '</th>'
			
		style = 'text-align: %s;' % sanitize_text_align(node.get('text-align'))
		if 'bgcolor' in node.keys() and not wcontext.restricted_mode:
			style += 'background: %s;' % node.get('bgcolor')
			
		c = "%s-%s" % (css, tag)
		html += '<%s class="%s" colspan="%d" rowspan="%d" style="%s">' % \
				(tag, strclass(c, add_classes), int(node.get('colspan','1')), 
				int(node.get('rowspan','1')), style)						

	elif node.tag == 'DefinitionList':
		c = "%s-dl" % css
		html += '<dl class="%s">' % strclass(c, add_classes)
		close = '</dl>'
		
	elif node.tag == 'DefinitionEntry':
		# for XML structure only, no HTML rendering
		close = ''
		
	elif node.tag == 'DefinitionTerm':
		c = "%s-dt" % css
		html += '<dt class="%s">' % strclass(c, add_classes)
		close = '</dt>'
		
	elif node.tag == 'DefinitionDef':
		c = "%s-dd" % css
		html += '<dd class="%s">' % strclass(c, add_classes)
		close = '</dd>'
		
	elif node.tag == 'Link':
		# write entire block here and return
		source = node.find('LinkSource').find('Text').text
		dest = node.find('LinkDest').find('Text').text
		
		url,linktype = resolve_URL(wcontext, dest)
		
		if wcontext.restricted_mode:
			# if anything potentially unsafe in URL, don't make link. 
			if unsafe_url(url):
				#print "** REMOVE UNSAFE URL",url
				return '<span class="wikkly-highlight">Unsafe URL removed</span>'
		
		parts = urlparse(url)
		if len(parts[0]) and len(parts[1]):
			helptext = 'Link to %s://%s' % (parts[0],parts[1])
		else:
			helptext = url
		
		# style URL as appropriate
		if linktype == 'external':
			if wcontext.var_get_int('$LINKS_NEW_WINDOW'):
				onclick = 'onclick="window.open(this.href);return false;"'
				linkclass = "%s-a-www" % css # style as external link
			else:
				onclick = ''
				linkclass = "%s-a-internal" % css # style as internal link
			
		elif linktype == 'internal':
			onclick = ''
			linkclass = "%s-a-internal" % css # style as internal link
		elif linktype == 'newitem':
			onclick = ''
			linkclass = "%s-a-unknown-item" % css # style as unknown link
		else:
			raise Exception("INTERNAL ERROR - unknown linktype '%s'" % urlnode.get('linktype'))
			
		linkclass = 'class="%s"' % strclass(linkclass, add_classes)
		
		# show domain next to link so user can see if it's suspicious
		# (skip if its a local link or relative link)
		if 0 and wcontext.restricted_mode and len(parts[0]) and len(parts[1]) and \
			url[0] != '#' and parts[1][:7] != '127.0.0':
			#print "** ADDING DOMAIN",parts
			pass
			#html += '<a title="%s" %s href="%s" %s>%s</a> [%s]' % \
			#		(helptext, linkclass, url,
			#		target,
			#		# allow complex structure under LinkElement
			#		make_html(linkelem, wcontext, parent_map, prevsib_map, add_classes),
			#		('%s://%s' % (parts[0],parts[1])))

		else:
			html += '<a title="%s" %s href="%s" %s>%s</a>' % \
							(helptext, linkclass,
								# use quoting to take care of " and ' as well
								escquotes(escapeURL(url)),
								onclick,
								escapeText(source))
								# allow complex structure under LinkElement
								
		return html
		
	#elif node.tag in ['LinkElement', 'LinkURL']:
	#	# no HTML for these - only for inner Text
	#	close = ''
		
	elif node.tag == 'Image':
		title = node.find('ImageTitle')
		url = node.find('ImageLink')
		filename = node.find('ImageFilename').find('Text').text

		if url is not None:
			url,linktype = resolve_URL(wcontext, url.find('Text').text)
		
			if wcontext.restricted_mode:
				# if anything potentially unsafe in URL, don't make link. 
				if unsafe_url(url):
					# don't apply add_classes
					return '<span class="wikkly-highlight">Unsafe image source removed</span>'
			
		if wcontext.restricted_mode:
			# if anything potentially unsafe in URL, don't make link. 
			if unsafe_url(filename):
				#print "** REMOVE UNSAFE URL",url
				# don't apply add_classes
				return '<span class="wikkly-highlight">Unsafe URL removed</span>'
			
		if title is not None:
			title = title.find('Text').text
		
		if url is not None:
			html += '<a href="%s" ' % escquotes(escapeURL(url))
			
			if wcontext.var_get_int('$LINKS_NEW_WINDOW'):
				html += 'onclick="window.open(this.href);return false;" '
			
			if title is not None:
				html += 'title=%s ' % escapeAttr(title)
			
			html += '>'
			
		c = "%s-img" % css
		html += '<img class="%s" src=%s alt=%s />' % \
			(strclass(c, add_classes), escapeAttr(filename), escapeAttr(filename))
		
		if url is not None:
			html += '</a>'
		
		return html
		
	elif node.tag == 'CreateAnchor':
		name = node.find('Text')
		c = "%s-a-internal" % css # style as internal link
		html += '<a name="%s" class="%s"> </a>' % (name.text, strclass(c, add_classes))
		return html
		
	elif node.tag == 'MacroBlock':
		# <MacroBlock> is a block-level element created by a macro.
		
		# NOTE: The macro that created the element is responsible for not letting
		#       unsafe user-data be injected into 'style'. 
		#      'class' and 'id' are sanity checked.

		html += '<div '
		if 'class' in node.keys():
			# NOTE - do NOT add 'css' prefix - assume macro has given full classname
			html += 'class="%s" ' % strclass(node.get('class'), add_classes)
			
		if 'style' in node.keys():
			html += 'style="%s" ' % node.get('style')
			
		if 'id' in node.keys():
			html += 'id="%s" ' % sanitize_id(node.get('id'))
			
		html += '>'
		
		close = '</div>'
		
	elif node.tag == 'MacroInline':
		# <MacroInline> is an inline element created by a macro.
		
		# NOTE: The macro that created the element is responsible for not letting
		#       unsafe user-data be injected into 'style'. 
		#      'class' and 'id' are sanity checked.
		html += '<span '
		if 'class' in node.keys():
			# NOTE - do NOT add 'css' prefix - assume macro has given full classname
			html += 'class="%s" ' % strclass(node.get('class'), add_classes)
			
		if 'style' in node.keys():
			html += 'style="%s" ' % node.get('style')
			
		if 'id' in node.keys():
			html += 'id="%s" ' % sanitize_id(node.get('id'))
			
		html += '>'
		
		close = '</span>'
		
	elif node.tag == 'CSSBlock':
		c = "%s-%s" % (css, sanitize_classname(node.get('class')))
		if node_contains_block_elements(node):
			# CSS seems to behave better if these are in a DIV vs. SPAN
			html += '<div class="%s">' % strclass(c, add_classes)
			close = '</div>'
		else:
			html += '<span class="%s">' % strclass(c, add_classes)
			close = '</span>'
			
		# append class to inner nodes as well (handle here and return, for simplicity)
		add_classes.append('%s-%s' % (css,node.get('class')))
		for subnode in node:
			html += make_html(subnode, wcontext, parent_map, prevsib_map, add_classes)
	
		html += close
		add_classes.pop()
		return html
		
	elif node.tag == 'Separator':
		c = "%s-separator" % css
		html += '<hr class="%s"/>' % strclass(c, add_classes)
		close = ''
	
	elif node.tag == 'LineBreak':
		html += '<br/>'
		close = ''
		
	elif node.tag == 'DashChar':
		html += ' &mdash; '
		close = ''
		
	else:
		raise WikError("Bad tag %s" % node.tag)
		
	# if subnodes not handled already, handle them now
	for subnode in node:
		html += make_html(subnode, wcontext, parent_map, prevsib_map, add_classes)
		
	html += close
	
	return html
예제 #9
0
    def parse(self, txt, wcontext):
        from time import time

        # txt should really be unicode, but accept str if it is plain ASCII
        if isinstance(txt, str):
            txt = unicode(txt)

        # sanity
        if not isinstance(txt, unicode):
            raise WikError("Unicode value required in parse() - got '%s'" %
                           repr(txt))

        # flags:
        #   * need to use re.M so beginning-of-line matches will work as expected
        #   * use re.I for case-insensitive as well
        #   * use re.S so '.' will match newline also
        self.lexer = lex.lex(object=self, reflags=re.M | re.I | re.S)

        self.wcontext = wcontext

        # shortcut for below
        parser = self.wcontext.parser

        # state vars - most of these are local context only, but some are set
        # into self if they are needed above
        in_bold = 0
        in_italic = 0
        in_strikethrough = 0
        in_underline = 0
        in_superscript = 0
        in_subscript = 0
        in_highlight = 0
        in_block_indent = 0
        in_line_indent = 0  # if > 0 this is the nesting level
        # the top of stack is the _currently_ opened listitem + level
        # e.g. for <ul>, item "###" is ('U',3), for <ol>, item '##' is ('N',2)
        list_stack = [('X', 0)]  # no currently opened list
        #in_Nlistitem = 0
        #in_Ulistitem = 0
        in_heading = 0
        in_deflist = 0  # tiddlywiki does not let DL/DT/DD nest apparently, so don't worry about it
        in_defterm = 0  # in <DT>?
        in_defdef = 0  # in <DD>?
        #in_imglink = 0
        self.in_strip_ccomment = 0  # inside /*** ... ***/ block
        in_html_comment = 0  # inside <!--- ... ---> block
        # since CSS blocks can nest, this is a list of currently open blocks, by CSS name
        css_stack = []
        # allow <html> blocks to nest
        #self.in_html_block = 0
        #self.in_code = 0
        self.in_table = 0
        self.in_tablerow = 0
        self.in_tablecell = 0
        last_token = (None, None)  # type,value

        self.prepare_input(txt)

        parser.beginDoc()

        while 1:
            tok = self.lexer.token()

            # check for EOF or over time limit
            if tok is None or time() > wcontext.stoptime:
                if tok is not None:
                    parser.characters('ERROR: TIME LIMIT EXCEEDED!')
                    parser.linebreak()

                #print "EOF LIST CHECK"
                #print "STACK ",list_stack

                # close any open lists
                while list_stack[-1][0] in "NU":
                    kind, n = list_stack.pop()
                    if kind == 'N':
                        parser.endNListItem()
                        parser.endNList()
                    else:
                        parser.endUListItem()
                        parser.endUList()

                # close any open tables
                if self.in_tablecell:
                    parser.endTableCell()
                if self.in_tablerow:
                    parser.endTableRow()
                if self.in_table:
                    parser.endTable()

                # close any opened line-indents
                while in_line_indent:
                    parser.endLineIndent()
                    in_line_indent -= 1

                # close any open definition list
                if in_defterm:
                    parser.endDefinitionTerm()

                if in_defdef:
                    parser.endDefinitionDef()

                if in_deflist:
                    parser.endDefinitionList()

                # watch out for ending inside of a structured item
                for v, s in [
                    (in_bold, "'' ... ''"),
                    (in_italic, "// ... //"),
                    (in_strikethrough, "-- ... --"),
                    (in_underline, "__ .. .__"),
                    (in_superscript, "^^ ... ^^"),
                    (in_subscript, "~~ ... ~~"),
                    (in_highlight, "@@ ... @@"),
                    (in_block_indent, "block-indent (<<<)"),
                        #(in_imglink, "[img[ ... ]]"),
                        #(self.in_html_block, "<html> ... </html>"),
                        #(self.in_code, "{{{ ... }}}")]:
                ]:
                    if v:
                        parser.error("ERROR input ended inside %s" % s, '', '')

                parser.endDoc()
                break

            # while in blockquote, hand parser raw chars
            #if self.in_blockquote and tok.type != 'BLOCKQUOTE':
            #	if hasattr(tok,'rawtext'):
            #		parser.characters(tok.rawtext)
            #	else:
            #		parser.characters(tok.value)
            #
            #	continue

            # while in code, hand parser raw chars
            #if self.in_code and tok.type != 'CODE_END':
            #	if hasattr(tok,'rawtext'):
            #		parser.characters(tok.rawtext)
            #	else:
            #		parser.characters(tok.value)
            #
            #	continue

            # while in <html>, hand parser raw chars, checking for nesting
            #if self.in_html_block:
            #	if tok.type == 'HTML_END':
            #		self.in_html_block -= 1
            #	elif tok.type == 'HTML_START':
            #		self.in_html_block += 1
            #	else:
            #		if hasattr(tok,'rawtext'):
            #			val = tok.rawtext
            #		else:
            #			val = tok.value
            #
            #		parser.characters(val)
            #
            #	continue

            # if just ended a line, and inside a table, and NOT starting a new tablerow, end table
            #if last_token[0] == 'EOLS' and in_table:
            #	if tok.type != 'TABLEROW_START' or len(last_token[1]) > 1:
            #		parser.endTable()
            #		in_table = 0

            # if just ended a line, and inside a line-indent, and NOT starting a new
            # line-indent, end indented section
            if last_token[0] == 'EOLS' and in_line_indent:
                if tok.type != 'LINE_INDENT':
                    # close all nested blocks
                    while in_line_indent:
                        parser.endLineIndent()
                        in_line_indent -= 1

            # if just ended a line, and inside a definition list, and NOT starting a new definition item, end list
            if last_token[0] == 'EOLS' and in_deflist:
                if tok.type not in ['D_TERM', 'D_DEFINITION'
                                    ] or len(last_token[1]) > 1:
                    parser.endDefinitionList()
                    in_deflist = 0

            # if just saw TABLEROW_END or TABLEROW_CAPTION and next token not
            # TABLEROW_CAPTION or TABLEROW_START, then end table
            if self.in_table and last_token[0] in ['TABLEROW_END','TABLEROW_CAPTION'] and \
             tok.type not in ['TABLEROW_CAPTION', 'TABLEROW_START']:
                if self.in_tablecell:
                    parser.endTableCell()
                    self.in_tablecell = 0

                if self.in_tablerow:
                    parser.endTableRow()
                    self.in_tablerow = 0

                parser.endTable()
                self.in_table = 0

            # if I just ended a line, and am inside a listitem, then check next token.
            # if not a listitem, pop & close all currently opened lists
            if last_token[0] == "EOLS" and list_stack[-1][1] >= 1:
                # if new token not a listitem or there were multiple EOLs, close all lists
                if tok.type not in ['N_LISTITEM', 'U_LISTITEM'
                                    ] or len(last_token[1]) > 1:
                    #print "EOL CLOSE LISTS"
                    #print "STACK ",list_stack

                    # close all open lists
                    while list_stack[-1][0] in "NU":
                        kind, n = list_stack.pop()
                        if kind == 'N':
                            parser.endNListItem()
                            parser.endNList()
                        else:
                            parser.endUListItem()
                            parser.endUList()

            if tok.type == 'TEXT':
                #parser.characters(self.no_tags(tok.value))
                parser.characters(tok.value)

            #elif tok.type == 'RAWTEXT': # internally generated type that tells me not to escape text
            #	parser.characters(tok.value)

            #elif tok.type == 'HTML_START':
            #	self.in_html_block += 1

            elif tok.type == 'BOLD':
                if in_bold:
                    parser.endBold()
                    in_bold = 0
                else:
                    parser.beginBold()
                    in_bold = 1

            elif tok.type == 'ITALIC':
                if in_italic:
                    parser.endItalic()
                    in_italic = 0
                else:
                    parser.beginItalic()
                    in_italic = 1

            elif tok.type == 'STRIKETHROUGH':
                if in_strikethrough:
                    parser.endStrikethrough()
                    in_strikethrough = 0
                else:
                    parser.beginStrikethrough()
                    in_strikethrough = 1

            elif tok.type == 'UNDERLINE':
                if in_underline:
                    parser.endUnderline()
                    in_underline = 0
                else:
                    parser.beginUnderline()
                    in_underline = 1

            elif tok.type == 'SUPERSCRIPT':
                if in_superscript:
                    parser.endSuperscript()
                    in_superscript = 0
                else:
                    parser.beginSuperscript()
                    in_superscript = 1

            elif tok.type == 'SUBSCRIPT':
                if in_subscript:
                    parser.endSubscript()
                    in_subscript = 0
                else:
                    parser.beginSubscript()
                    in_subscript = 1

            elif tok.type == 'HIGHLIGHT_DEFAULT':
                # can be end of any other "@@" style, or the start of the default style
                if in_highlight:
                    parser.endHighlight()
                    in_highlight = 0
                else:
                    # begin default highlight style
                    parser.beginHighlight()
                    in_highlight = 1

            elif tok.type in [
                    'HIGHLIGHT_CSS', 'HIGHLIGHT_COLOR', 'HIGHLIGHT_BG'
            ]:
                #print "TOKEN",tok.type,tok.value
                if in_highlight:
                    # the '@@' is the end of the highlight - reparse remainder
                    txt = self.lexer.lexdata[self.lexer.lexpos:]
                    self.lexer.input(tok.value[2:] + txt)
                    parser.endHighlight()
                    in_highlight = 0
                else:
                    # send style to parser so it knows what kind of element
                    # to create
                    parser.beginHighlight(tok.value)
                    in_highlight = 1

            #elif tok.type == 'BLOCKQUOTE':
            elif tok.type == 'BLOCK_INDENT':
                if in_block_indent:
                    parser.endBlockIndent()
                    in_block_indent = 0
                else:
                    parser.beginBlockIndent()
                    in_block_indent = 1

            elif tok.type == 'LINE_INDENT':
                # get >> chars
                m = re.match(self.t_LINE_INDENT, tok.value)
                # adjust new new nesting level
                nr = len(m.group(1))
                while nr > in_line_indent:
                    parser.beginLineIndent()
                    in_line_indent += 1

                while nr < in_line_indent:
                    parser.endLineIndent()
                    in_line_indent -= 1

            elif tok.type == 'HTML_ESCAPE':
                m = re.match(self.t_HTML_ESCAPE, tok.value, re.M | re.I | re.S)
                parser.beginRawHTML()
                parser.characters(m.group(1))
                parser.endRawHTML()

            elif tok.type == 'WIKI_ESCAPE':
                m = re.match(self.t_WIKI_ESCAPE, tok.value, re.M | re.I | re.S)
                # <nowiki> gets its own Text type to prevent camelwording
                parser.beginNoWiki()
                parser.characters(m.group(1))
                parser.endNoWiki()

            elif tok.type == 'D_TERM':
                if not in_deflist:
                    parser.beginDefinitionList()
                    in_deflist = 1

                parser.beginDefinitionTerm()
                in_defterm = 1

            elif tok.type == 'D_DEFINITION':
                if not in_deflist:
                    parser.beginDefinitionList()
                    in_deflist = 1

                parser.beginDefinitionDef()
                in_defdef = 1

            elif tok.type == 'N_LISTITEM':
                #print "N_LISTITEM, VALUE ",tok.value, "STACK ",list_stack

                # (see file 'stack' for more detailed derivation)
                #
                # remember:
                #    Top of stack is CURRENTLY opened listitem (the one before me)
                # cases:
                #   1. top of stack is my same type AND level:
                #        Close current listitem and start new one (leave stack alone)
                #   2. top of stack is LOWER level, ANY type:
                #        I'm a sublist of current item - open a new list, leaving current list open
                #        Push self to TOS
                #   3. top of stack is HIGHER level, ANY type:
                #        Current item is sublist of MY previous sibling. Close lists till I find
                #        my same type AND level at TOS (watch for emptying stack!)
                #        Start new item or new list (push to TOS).
                #   4. different type, same level:
                #        Close current list, pop TOS and start new list (push self to TOS)

                # case 1:
                if list_stack[-1][0] == 'N' and list_stack[-1][1] == len(
                        tok.value):
                    parser.endNListItem()
                    parser.beginNListItem(tok.value)

                # case 2:
                elif list_stack[-1][1] < len(tok.value):
                    parser.beginNList()
                    parser.beginNListItem(tok.value)
                    list_stack.append(('N', len(tok.value)))

                # case 3:
                elif list_stack[-1][1] > len(tok.value):
                    while (not(list_stack[-1][0] == 'N' and list_stack[-1][1] == len(tok.value))) and \
                     list_stack[-1][0] in 'NU':  # watch for end of stack as well
                        # close TOS list
                        if list_stack[-1][0] == 'N':
                            parser.endNListItem()
                            parser.endNList()
                        else:
                            parser.endUListItem()
                            parser.endUList()

                        list_stack.pop()

                    # did I empty the stack?
                    if list_stack[-1][0] != 'N':
                        # yes, start new list
                        parser.beginNList()
                    else:
                        # close current item
                        parser.endNListItem()

                    parser.beginNListItem(tok.value)

                    # do NOT push to stack since TOS is already correct

                # case 4:
                elif list_stack[-1][0] == 'U' and list_stack[-1][1] == len(
                        tok.value):
                    # close current list & pop TOS
                    parser.endUListItem()
                    parser.endUList()
                    list_stack.pop()

                    # start new list & item
                    parser.beginNList()
                    parser.beginNListItem(tok.value)

                    list_stack.append(('N', len(tok.value)))

                else:
                    # cannot reach ... if my logic is correct :-)
                    raise WikError("** INTERNAL ERROR in N_LISTITEM **")

            elif tok.type == 'U_LISTITEM':
                # (see comments in N_LISTITEM)

                #print "U_LISTITEM, VALUE ",tok.value, "STACK ",list_stack

                # case 1:
                if list_stack[-1][0] == 'U' and list_stack[-1][1] == len(
                        tok.value):
                    parser.endUListItem()
                    parser.beginUListItem(tok.value)

                # case 2:
                elif list_stack[-1][1] < len(tok.value):
                    parser.beginUList()
                    parser.beginUListItem(tok.value)
                    list_stack.append(('U', len(tok.value)))

                # case 3:
                elif list_stack[-1][1] > len(tok.value):
                    while (not(list_stack[-1][0] == 'U' and list_stack[-1][1] == len(tok.value))) and \
                     list_stack[-1][0] in 'NU':  # watch for end of stack as well
                        # close TOS list
                        if list_stack[-1][0] == 'U':
                            parser.endUListItem()
                            parser.endUList()
                        else:
                            parser.endNListItem()
                            parser.endNList()

                        list_stack.pop()

                    # did I empty the stack?
                    if list_stack[-1][0] != 'U':
                        # yes, start new list
                        parser.beginUList()
                    else:
                        # close current item
                        parser.endUListItem()

                    parser.beginUListItem(tok.value)

                    # do NOT push to stack since TOS is already correct

                # case 4:
                elif list_stack[-1][0] == 'N' and list_stack[-1][1] == len(
                        tok.value):
                    # close current list & pop TOS
                    parser.endNListItem()
                    parser.endNList()
                    list_stack.pop()

                    # start new list & item
                    parser.beginUList()
                    parser.beginUListItem(tok.value)

                    list_stack.append(('U', len(tok.value)))

                else:
                    # cannot reach ... if my logic is correct :-)
                    raise WikError("** INTERNAL ERROR in N_LISTITEM **")

            elif tok.type == 'HEADING':
                # inside a table, this is a regular char (so parser can see it and
                # know to switch to <th>, etc.)
                if self.in_table:
                    #print "RAWTEXT HEADING"
                    parser.characters(tok.rawtext)
                    continue

                parser.beginHeading(len(tok.value))
                in_heading = 1

            elif tok.type == 'LINK_AB':
                parser.handleLink(tok.value[0], tok.value[1])

            elif tok.type == 'LINK_A':
                parser.handleLink(tok.value)

            elif tok.type in [
                    'IMGLINK_TFU', 'IMGLINK_TF', 'IMGLINK_FU', 'IMGLINK_F'
            ]:
                parser.handleImgLink(*tok.value)

            elif tok.type == 'CSS_BLOCK_START':
                m = re.match(self.t_CSS_BLOCK_START, tok.value,
                             re.M | re.S | re.I)
                name = m.group(1)
                # push on stack
                css_stack.append(name)
                # inform parser
                parser.beginCSSBlock(name)

            elif tok.type == 'CSS_BLOCK_END':
                if len(css_stack):
                    # pop name and inform parser
                    name = css_stack.pop()
                    parser.endCSSBlock()
                else:
                    # regular chars outside of a CSS block
                    parser.characters(tok.value)

            elif tok.type == 'C_COMMENT_START':
                #print "******** C_COMMENT_START"
                if self.in_strip_ccomment:
                    # already in C-comment, treat as normal chars
                    parser.characters(tok.value)
                else:
                    # begin C-comment (strip comment markers)
                    self.in_strip_ccomment = 1

            #elif tok.type == 'C_COMMENT_END':
            #	print "************* C_COMMENT_END"
            #	if not self.in_strip_comment:
            #		# not in C-comment, treat as normal chars
            #		parser.characters(tok.value)
            #	else:
            #		self.in_strip_comment = 0

            elif tok.type == 'HTML_COMMENT_START':
                #print "******** C_COMMENT_START"
                if in_html_comment:
                    # already in HTML comment, treat as normal chars
                    parser.characters(tok.value)
                else:
                    # begin HTML comment (strip comment markers)
                    in_html_comment = 1

            elif tok.type == 'HTML_COMMENT_END':
                #print "************* C_COMMENT_END"
                if not in_html_comment:
                    # not in HTML-comment, treat as normal chars
                    parser.characters(tok.value)
                else:
                    # strip end markers
                    in_html_comment = 0

            elif tok.type == 'CODE_BLOCK':
                # regex grabs entire block since no nesting allowed
                m = re.match(self.t_CODE_BLOCK, tok.value, re.M | re.I | re.S)
                text = m.group(1)

                self.handle_codeblock(parser, text)

            elif tok.type == 'CODE_BLOCK_CSS':
                # regex grabs entire block since no nesting allowed
                m = re.match(self.t_CODE_BLOCK_CSS, tok.value,
                             re.M | re.I | re.S)
                text = m.group(1)

                self.handle_codeblock(parser, text)

            elif tok.type == 'CODE_BLOCK_CPP':
                # regex grabs entire block since no nesting allowed
                m = re.match(self.t_CODE_BLOCK_CPP, tok.value,
                             re.M | re.I | re.S)
                text = m.group(1)

                self.handle_codeblock(parser, text)

            elif tok.type == 'CODE_BLOCK_HTML':
                # regex grabs entire block since no nesting allowed
                m = re.match(self.t_CODE_BLOCK_HTML, tok.value,
                             re.M | re.I | re.S)
                text = m.group(1)

                self.handle_codeblock(parser, text)

            #elif tok.type == 'CODE_START':
            #	# note: while in code, nothing else comes here (see above),
            #	# so don't have to test for nesting
            #	parser.beginCode()
            #	self.in_code = 1

            #elif tok.type == 'CODE_END':
            #	# is it a code block?
            #	if self.in_code:
            #		parser.endCode()
            #		self.in_code = 0
            #	# else, might be a CSS block ending
            #	elif len(css_stack):
            #		# pop name and inform parser
            #		name = css_stack.pop()
            #		parser.endCSSBlock(name)
            #	# otherwise, it's just regular text
            #	else:
            #		parser.characters(tok.value)

            elif tok.type == 'TABLEROW_START':
                if not self.in_table:
                    parser.beginTable()
                    self.in_table = 1

                parser.beginTableRow()
                self.in_tablerow = 1
                parser.beginTableCell()
                self.in_tablecell = 1
                #in_tablerow = 1

            elif tok.type == 'TABLEROW_END':
                if not self.in_table:
                    # split | portion from "\n" portion
                    m = re.match(self.t_TABLEROW_END, tok.value,
                                 re.M | re.I | re.S)
                    parser.characters(m.group(1))
                    # feed \n back to parser
                    txt = self.lexer.lexdata[self.lexer.lexpos:]
                    self.lexer.input('\n' + txt)
                else:
                    parser.endTableCell()
                    self.in_tablecell = 0
                    parser.endTableRow()
                    self.in_tablerow = 0

            elif tok.type == 'TABLE_END':
                if not self.in_table:
                    # split | portion from "\n" portion
                    m = re.match(self.t_TABLE_END, tok.value,
                                 re.M | re.I | re.S)
                    parser.characters(m.group(1))
                    # feed \n's back to parser
                    txt = self.lexer.lexdata[self.lexer.lexpos:]
                    self.lexer.input(m.group(2) + txt)
                else:
                    parser.endTableCell()
                    self.in_tablecell = 0
                    parser.endTableRow()
                    self.in_tablerow = 0
                    parser.endTable()
                    self.in_table = 0

            elif tok.type == 'TABLEROW_CAPTION':
                # watch for caption as first row of table
                if not self.in_table:
                    parser.beginTable()
                    self.in_table = 1

                m = re.match(self.t_TABLEROW_CAPTION, tok.value,
                             re.M | re.I | re.S)
                parser.setTableCaption(m.group(1))

                txt = self.lexer.lexdata[self.lexer.lexpos:]

                # have to check for table ending since I grabbed the \n
                if re.match(r"[\t ]*[\n]", txt):
                    parser.endTable()
                    self.in_table = 0

            elif tok.type == 'PIPECHAR':
                if self.in_table:
                    parser.endTableCell()

                    # Start next cell UNLESS this is the end of the buffer.
                    # Prevents having a false empty cell at the end of the
                    # table if the row ends in EOF
                    txt = self.lexer.lexdata[self.lexer.lexpos:]
                    if not only_spaces(txt):
                        parser.beginTableCell()
                    else:
                        self.in_tablecell = 0

                else:
                    parser.characters(tok.value)

            elif tok.type == 'SEPARATOR':
                parser.separator()

            elif tok.type == 'CATCH_URL':
                # turn bare URL into link like: [[URL|URL]]
                parser.handleLink(tok.value, tok.value)

            elif tok.type == 'NULLDOT':
                pass  # nothing

            #elif tok.type == 'DELETE_ME':
            #	pass # nothing

            elif tok.type == 'XHTML_ENTITY':
                s = tok.value
                if s[-1] == ';':  # remove ; if present
                    addsemi = u';'  # remember to add back (below), if needed
                    s = s[:-1]
                else:
                    addsemi = u''

                s = s[1:]  # strip &

                if s == '#DeleteMe':
                    continue

                # check for hex entity
                m = re.match(r'\#x([0-9a-h]+)', s, re.M | re.I | re.S)
                if m:
                    if m.group(1) in ['200b', '200B']:
                        # &#x200b; is special - pass to XML layer
                        parser.characters('&#x200b;')
                    else:
                        parser.characters(unichr(hex2int(m.group(1))))

                    continue

                # check for decimal entity
                m = re.match(r'\#([0-9]+)', s, re.M | re.I | re.S)
                if m:
                    parser.characters(unichr(int(m.group(1))))
                    continue

                # see if name defined in htmlentitydefs
                import htmlentitydefs as hed
                if hed.name2codepoint.has_key(s):
                    parser.characters(unichr(hed.name2codepoint[s]))
                else:
                    # else, return as raw text (will be escaped in final output)
                    parser.characters(u'&' + s + addsemi)

            #elif tok.type == 'HTML_HEX_ENTITY':
            #	# reparse hex part
            #	m = re.match(self.t_HTML_HEX_ENTITY, tok.value, re.M|re.I|re.S)

            elif tok.type == 'DASH':
                parser.dash()

            #elif tok.type == 'MACRO':
            #	# macro has already run, insert text ...
            #	#parser.characters(self.no_tags(tok.value))
            #	parser.characters(tok.value)

            elif tok.type == 'PYTHON_EMBED':
                if self.wcontext.restricted_mode:
                    self.wcontext.parser.error(
                        "Not allowed to define macros in Safe Mode",
                        tok.rawtext, '')

                else:
                    parser.beginPyCode()
                    parser.characters(tok.value)
                    parser.endPyCode()

            #elif tok.type == 'RAWHTML':
            #	print "** RAWHTML **",tok.value
            #	parser.characters(tok.value)

            elif tok.type == "HTML_BREAK":
                parser.linebreak()

            elif tok.type == 'EOLS':
                # Do NOT handle lists here - they have complex nesting rules so must be
                # handled separately (above)

                if in_heading:
                    parser.endHeading()
                    in_heading = 0

                #if in_tablerow:
                #	parser.endTableRow()
                #	in_tablerow = 0

                #if not in_table:
                parser.EOLs(tok.value)

                if in_defterm:
                    parser.endDefinitionTerm()
                    in_defterm = 0

                if in_defdef:
                    parser.endDefinitionDef()
                    in_defdef = 0

            # remember for next pass
            last_token = (tok.type, tok.value)
예제 #10
0
def parse_macro_call(text):
    """
	Parse a macro call
		 <<text text ...>>
	
	The input text must begin with '<<'. Parser will read up to and including the
	closing '>>'.
	
	Returns:
		(macrocall, txt_remainder)
	
	Where:
		* macrocall: Element('MacroCall') with macro args stored as subnodes.
		  Each subnode will be either:
		  	a. <TextMacroArg> - a literal text arg
			b. <MacroCall> - an inner macro call
		* txt_remainder = Text after macro call.
		 
	Handles:
		* Unquoted args (args delimited by whitespace)
		* Args quoted with ', ", triple-' or triple-" (triple quotes are a WikklyText extension)
		* Args quoted with <quote> ... </quote> (WikklyText extension)
		* Python-style escapes inside of quoted strings:
			  \a \b \f \n \r \t \v \' \" \\
			  xHH (hex, 1-2 chars) 
			  NNN (octal, 1-3 chars)
			  \uHHHH unicode escape (only 4-digit unicode is supported on all Pythons)

			  \{ANYCHAR} = ANYCHAR is passed through, if not in above list
			  
			  There is no need for the Python-style "\"+newline continuation since quoted strings are
			  allowed to span lines. The [\r\n] chars are saved as-is.
	
		* Linebreaks preserved inside quoted strings (outside of quoted strings, they are delimiters)
		* Allows string concatenation: 'aaa'bbb"ccc"ddd -> 'aaabbbcccddd'
		* Preserves empty args ('' and "")
		
	Raises WikError on:
		* Unterminated quotes.
		* Unterminated "\" inside a quote.
		* No closing >>
		* Error calling inner macro.
	"""
    # sanity check
    if text[:2] != '<<':
        raise Exception("Not a macro call."
                        )  # should never happen, so let it flow to top level

    in_arg = 0  # inside an arg
    in_quotechar = None  # which quotechar I'm inside (', "", """, ''', or None)

    i = 2

    # args are stored here as subnodes
    macrocall = Element('MacroCall')
    out = u''  # current chunk of text
    while i < len(text):
        # check for end of macro
        if text[i:i + 2] == '>>' and in_quotechar is None:
            break

        # look for nested macro call
        if re.match('<<[a-z]', text[i:], re.I) and in_quotechar is None:
            # if I have a partial arg, close before opening new macro call
            if in_arg:
                elem = Element('TextMacroArg')
                elem.text = out
                macrocall.append(elem)
                out = u''
                in_arg = 0

            # parse inner macro and add it as an arg
            inner_macrocall, txt_remainder = parse_macro_call(text[i:])
            macrocall.append(inner_macrocall)
            i = 0
            text = txt_remainder
            continue  # skip +=1 at bottom for clarity here

        if text[i] == '\\':
            in_arg = 1
            if in_quotechar:
                try:
                    c, skip = char_escape(text[i + 1:])
                    out += c
                    i += skip  # skip extra char(s)
                except IndexError:  # string ended with "\" inside a quoted string
                    raise WikError(
                        "Macro statement ended inside unterminated quoted string",
                        text[:80], '', text[i:])

            else:
                out += u'\\'  # '\' is a regular char outside of a quoted string

        elif text[i] in '\t \r\n':
            if not in_arg:
                i += 1  # skip whitespace outside of quoted strings
                continue

            if in_quotechar:
                out += text[i]  # preserve whitespace inside a quoted string
            else:
                # end of arg - convert to Element and store.
                # NOTE: I'm using a special tag 'TextMacroArg' here. This purpose of this
                # is to catch any macro args that aren't processed by the macro. i.e. if
                # a "TextMacroArg" tag makes it through to the HTML writer, something is wrong.
                elem = Element('TextMacroArg')
                elem.text = out
                macrocall.append(elem)
                out = u''
                in_arg = 0

        # look for triple quotes
        elif text[i:i + 3] in ["'''", '"""']:
            in_arg = 1
            if in_quotechar == text[i:i + 3]:  # end of quote?
                # don't end arg - wait for whitespace break. this gives concatenation behaviour like:
                #     "aaa"bbb'ccc'ddd ==> 'aaabbbcccddd'
                in_quotechar = None
            elif in_quotechar:
                out += text[i:i + 3]  # regular text inside of another quote
            else:
                in_quotechar = text[i:i + 3]  # begin quote

            # skip two extra chars
            i += 2

        # look for single-quotes
        elif text[i] in ["'", '"']:
            in_arg = 1
            # end of quote?
            if in_quotechar == text[i]:
                # don't end arg - wait for whitespace break. this gives concatenation behaviour like:
                #     "aaa"bbb'ccc'ddd ==> 'aaabbbcccddd'
                in_quotechar = None
            # in another quote?
            elif in_quotechar:
                out += text[i]  # regular char inside another quote
            else:
                in_quotechar = text[i]  # begin squote

        # look for <quote>
        elif text[i:i + 7] == '<quote>':
            in_arg = 1
            # in another quote?
            if in_quotechar:
                out += text[i:i + 7]  # regular char inside another quote
            else:
                in_quotechar = text[i:i + 7]  # begin squote

            # skip 6 extra chars
            i += 6

        # look for </quote>
        elif text[i:i + 8] == '</quote>':
            in_arg = 1
            # end of quote?
            if in_quotechar == '<quote>':
                # don't end arg - wait for whitespace break. this gives concatenation behaviour like:
                #     "aaa"bbb'ccc'ddd ==> 'aaabbbcccddd'
                in_quotechar = None
            else:
                out += text[i:i + 8]  # regular text

            # skip 7 extra chars
            i += 7

        else:
            in_arg = 1
            out += text[i]  # regular char

        i += 1

    if text[i:i + 2] != '>>':
        raise WikError("No closing >> in macro statement", text[:80], '',
                       text[i:])

    # the user might not have intended this, so let them know ...
    if in_quotechar:
        raise WikError(
            "Macro statement ended inside a quoted string (current quote=%s)" %
            in_quotechar, text[:80], '', text[i:])

    # finish final partial arg if any
    if in_arg:
        # see note above about why this is 'TextMacroArg' instead of 'Text'
        elem = Element('TextMacroArg')
        elem.text = out
        macrocall.append(elem)

    # skip ending '>>'
    i += 2

    # collect all whitespace after this point
    k = i
    trailing = u''
    while len(text[k:]) and text[k] in ' \t\n':
        trailing += text[k]
        k += 1

    # if there is another macro call or a comment (i.e. non-visible markup), remove
    # all intervening whitespace.
    # else, restore either a single space or a single \n depending on
    # what I found.
    if re.match(r'<<[a-z_]+', text[k:]) or re.match(r'/%', text[k:]):
        remainder = text[k:]
    elif trailing.find('\n\n') >= 0:  # check before '\n'
        remainder = u'\n\n' + text[k:]
    elif '\n' in trailing:
        remainder = u'\n' + text[k:]
    elif len(trailing):
        remainder = u' ' + text[k:]
    else:
        remainder = text[k:]

    # the first element is really the macro name - return it as text
    #return (outlist[0].text, outlist[1:], remainder)
    return (macrocall, remainder)
예제 #11
0
def WikklyText_to_XML(content,
                      encoding,
                      safe_mode,
                      setvars=None,
                      max_runtime=-1,
                      url_resolver=None,
                      tree_posthook=None,
                      plugin_dirs=None,
                      rendercache=None,
                      macro_handler=None,
                      **kwargs):
    """
	Convert the given wikitext to XML.
	
	|>|!Inputs|
		|content\
			|Wikitext (//unicode//), usually from {{tt{wikklytext.base.load_wikitext()}}}|
		|encoding\
			|Desired output encoding (i.e. {{{'utf-8'}}})|
		|safe_mode\
			|True/False, whether to use Safe mode.|
		|setvars\
			|Variables to set into ~WikContext, as dict of:<br>{{{name: <str, unicode or int>}}}<br> \
				where 'name' can have a leading '$' to set sysvars.|
		|max_runtime\
			|Maximum time (in seconds) to run, or -1 for unlimited.|
		|url_resolver\
			|URL resolver. Must be a callable like [[default_URL_resolver()|#FUNC_default_URL_resolver]]. \
			If None, a default resolver will be used.|
		|tree_posthook\
			|Hook to call after ~ElementTree is complete, before generating XML. Will be called as: <br>\
				{{{tree_posthook(rootnode, context)}}} <br> Hook should modify tree in-place.|
		|plugin_dirs\
			|Paths to search for plugins. Can be a list of strings, a single string, or None.|
		|rendercache\
			|Instance of class in wikklytext.cache to perform caching.|
		|macro_handler\
			|Caller-defined macro handler. Will be called as:<br>\
			{{{
			handled, result = macro_handler(name, context, *elements)
			}}}\
			Returns:\
			* ''handled'': True/False if macro call was handled.
			* ''result'' is the macro return value, ready to be processed \
			by ''macro.process_macro_result''().|
		|kwargs\
			|//Undocumented// -- for backward compatibility with earlier keyword args. Do not \
			pass this directly.|
	
	|>|!Returns: {{{(xml, context)}}} |
		|xml\
			|Generated XML as an encoded bytestring|
		|context\
			|~WikContext that was used, in case user wants to inspect it.|
	"""
    from wikklytext.base import WikContext
    from wikklytext.eval import eval_wiki_elem

    setvars = setvars or {}

    if kwargs.has_key('plugin_dir'):
        # in 1.4.0 this was named 'plugin_dir' so accept if plugin_dirs not given
        plugin_dirs = plugin_dirs or kwargs.get('plugin_dir', None)
        deprecation(
            "Change 'plugin_dir' to 'plugin_dirs' in args to WikklyText_to_XML()"
        )

    wcontext = WikContext(restricted_mode=safe_mode,
                          max_runtime=max_runtime,
                          url_resolver=url_resolver,
                          plugin_dirs=plugin_dirs,
                          rendercache=rendercache,
                          macro_handler=macro_handler)

    # set any passed vars into the context
    for name, value in setvars.items():
        if isinstance(value, (str, unicode)):
            wcontext.var_set_text(name, value)
        elif isinstance(value, int):
            wcontext.var_set_int(name, value)
        else:
            raise WikError("Bad value in setvars")

    elem = WikklyText_to_Tree(wcontext, content)
    elem = eval_wiki_elem(wcontext, elem)

    # include errors from wcontext
    elist = elem.find('ErrorsList')
    for e in wcontext.parser.getErrors():
        elist.append(e)

    # call hook to postprocess tree before making XML
    if tree_posthook is not None:
        tree_posthook(elem, wcontext)

    xml = dumpxml(elem)
    return (xml, wcontext)