def write_esis(doc, ofp, knownempty): for node in doc.childNodes: nodeType = node.nodeType if nodeType == ELEMENT: gi = node.tagName if knownempty(gi): if node.hasChildNodes(): raise ValueError, \ "declared-empty node <%s> has children" % gi ofp.write("e\n") for k, value in node.attributes.items(): if _token_rx.match(value): dtype = "TOKEN" else: dtype = "CDATA" ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value))) ofp.write("(%s\n" % gi) write_esis(node, ofp, knownempty) ofp.write(")%s\n" % gi) elif nodeType == TEXT: ofp.write("-%s\n" % esistools.encode(node.data)) elif nodeType == ENTITY_REFERENCE: ofp.write("&%s\n" % node.nodeName) else: raise RuntimeError, "unsupported node type: %s" % nodeType
def dump_attr(self, pentry, value): if not (pentry.name and value): return if _token_rx.match(value): dtype = "TOKEN" else: dtype = "CDATA" self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value)))
def subconvert(self, endchar=None, depth=0): # # Parses content, including sub-structures, until the character # 'endchar' is found (with no open structures), or until the end # of the input data is endchar is None. # stack = new_stack() line = self.line while line: if line[0] == endchar and not stack: self.line = line return line m = _comment_rx.match(line) if m: text = m.group(1) if text: self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" % encode(text)) line = line[m.end():] continue m = _begin_env_rx.match(line) if m: name = m.group(1) entry = self.get_env_entry(name) # re-write to use the macro handler line = r"\%s %s" % (name, line[m.end():]) continue m = _end_env_rx.match(line) if m: # end of environment envname = m.group(1) entry = self.get_entry(envname) while stack and envname != stack[-1] \ and stack[-1] in entry.endcloses: self.write(")%s\n" % stack.pop()) if stack and envname == stack[-1]: self.write(")%s\n" % entry.outputname) del stack[-1] else: raise LaTeXStackError(envname, stack) line = line[m.end():] continue m = _begin_macro_rx.match(line) if m: # start of macro macroname = m.group(1) if macroname == "c": # Ugh! This is a combining character... endpos = m.end() self.combining_char("c", line[endpos]) line = line[endpos + 1:] continue entry = self.get_entry(macroname) if entry.verbatim: # magic case! pos = line.find("\\end{%s}" % macroname) text = line[m.end(1):pos] stack.append(entry.name) self.write("(%s\n" % entry.outputname) self.write("-%s\n" % encode(text)) self.write(")%s\n" % entry.outputname) stack.pop() line = line[pos + len("\\end{%s}" % macroname):] continue while stack and stack[-1] in entry.closes: top = stack.pop() topentry = self.get_entry(top) if topentry.outputname: self.write(")%s\n-\\n\n" % topentry.outputname) # if entry.outputname and entry.empty: self.write("e\n") # params, optional, empty = self.start_macro(macroname) # rip off the macroname if params: line = line[m.end(1):] elif empty: line = line[m.end(1):] else: line = line[m.end():] opened = 0 implied_content = 0 # handle attribute mappings here: for pentry in params: if pentry.type == "attribute": if pentry.optional: m = _optional_rx.match(line) if m and entry.outputname: line = line[m.end():] self.dump_attr(pentry, m.group(1)) elif pentry.text and entry.outputname: # value supplied by conversion spec: self.dump_attr(pentry, pentry.text) else: m = _parameter_rx.match(line) if not m: raise LaTeXFormatError( "could not extract parameter %s for %s: %r" % (pentry.name, macroname, line[:100])) if entry.outputname: self.dump_attr(pentry, m.group(1)) line = line[m.end():] elif pentry.type == "child": if pentry.optional: m = _optional_rx.match(line) if m: line = line[m.end():] if entry.outputname and not opened: opened = 1 self.write("(%s\n" % entry.outputname) stack.append(macroname) stack.append(pentry.name) self.write("(%s\n" % pentry.name) self.write("-%s\n" % encode(m.group(1))) self.write(")%s\n" % pentry.name) stack.pop() else: if entry.outputname and not opened: opened = 1 self.write("(%s\n" % entry.outputname) stack.append(entry.name) self.write("(%s\n" % pentry.name) stack.append(pentry.name) self.line = skip_white(line)[1:] line = self.subconvert( "}", len(stack) + depth + 1)[1:] self.write(")%s\n" % stack.pop()) elif pentry.type == "content": if pentry.implied: implied_content = 1 else: if entry.outputname and not opened: opened = 1 self.write("(%s\n" % entry.outputname) stack.append(entry.name) line = skip_white(line) if line[0] != "{": raise LaTeXFormatError( "missing content for " + macroname) self.line = line[1:] line = self.subconvert("}", len(stack) + depth + 1) if line and line[0] == "}": line = line[1:] elif pentry.type == "text" and pentry.text: if entry.outputname and not opened: opened = 1 stack.append(entry.name) self.write("(%s\n" % entry.outputname) #dbgmsg("--- text: %r" % pentry.text) self.write("-%s\n" % encode(pentry.text)) elif pentry.type == "entityref": self.write("&%s\n" % pentry.name) if entry.outputname: if not opened: self.write("(%s\n" % entry.outputname) stack.append(entry.name) if not implied_content: self.write(")%s\n" % entry.outputname) stack.pop() continue if line[0] == endchar and not stack: self.line = line[1:] return self.line if line[0] == "}": # end of macro or group macroname = stack[-1] if macroname: conversion = self.table[macroname] if conversion.outputname: # otherwise, it was just a bare group self.write(")%s\n" % conversion.outputname) del stack[-1] line = line[1:] continue if line[0] == "~": # don't worry about the "tie" aspect of this command line = line[1:] self.write("- \n") continue if line[0] == "{": stack.append("") line = line[1:] continue if line[0] == "\\" and line[1] in ESCAPED_CHARS: self.write("-%s\n" % encode(line[1])) line = line[2:] continue if line[:2] == r"\\": self.write("(BREAK\n)BREAK\n") line = line[2:] continue if line[:2] == r"\_": line = "_" + line[2:] continue if line[:2] in (r"\'", r'\"'): # combining characters... self.combining_char(line[1], line[2]) line = line[3:] continue m = _text_rx.match(line) if m: text = encode(m.group()) self.write("-%s\n" % text) line = line[m.end():] continue # special case because of \item[] # XXX can we axe this??? if line[0] == "]": self.write("-]\n") line = line[1:] continue # avoid infinite loops extra = "" if len(line) > 100: extra = "..." raise LaTeXFormatError("could not identify markup: %r%s" % (line[:100], extra)) while stack: entry = self.get_entry(stack[-1]) if entry.closes: self.write(")%s\n-%s\n" % (entry.outputname, encode("\n"))) del stack[-1] else: break if stack: raise LaTeXFormatError("elements remain on stack: " + ", ".join(stack))
def subconvert(self, endchar=None, depth=0): # # Parses content, including sub-structures, until the character # 'endchar' is found (with no open structures), or until the end # of the input data is endchar is None. # stack = new_stack() line = self.line while line: if line[0] == endchar and not stack: self.line = line return line m = _comment_rx.match(line) if m: text = m.group(1) if text: self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" % encode(text)) line = line[m.end():] continue m = _begin_env_rx.match(line) if m: name = m.group(1) entry = self.get_env_entry(name) # re-write to use the macro handler line = r"\%s %s" % (name, line[m.end():]) continue m = _end_env_rx.match(line) if m: # end of environment envname = m.group(1) entry = self.get_entry(envname) while stack and envname != stack[-1] \ and stack[-1] in entry.endcloses: self.write(")%s\n" % stack.pop()) if stack and envname == stack[-1]: self.write(")%s\n" % entry.outputname) del stack[-1] else: raise LaTeXStackError(envname, stack) line = line[m.end():] continue m = _begin_macro_rx.match(line) if m: # start of macro macroname = m.group(1) if macroname == "c": # Ugh! This is a combining character... endpos = m.end() self.combining_char("c", line[endpos]) line = line[endpos + 1:] continue entry = self.get_entry(macroname) if entry.verbatim: # magic case! pos = line.find("\\end{%s}" % macroname) text = line[m.end(1):pos] stack.append(entry.name) self.write("(%s\n" % entry.outputname) self.write("-%s\n" % encode(text)) self.write(")%s\n" % entry.outputname) stack.pop() line = line[pos + len("\\end{%s}" % macroname):] continue while stack and stack[-1] in entry.closes: top = stack.pop() topentry = self.get_entry(top) if topentry.outputname: self.write(")%s\n-\\n\n" % topentry.outputname) # if entry.outputname and entry.empty: self.write("e\n") # params, optional, empty = self.start_macro(macroname) # rip off the macroname if params: line = line[m.end(1):] elif empty: line = line[m.end(1):] else: line = line[m.end():] opened = 0 implied_content = 0 # handle attribute mappings here: for pentry in params: if pentry.type == "attribute": if pentry.optional: m = _optional_rx.match(line) if m and entry.outputname: line = line[m.end():] self.dump_attr(pentry, m.group(1)) elif pentry.text and entry.outputname: # value supplied by conversion spec: self.dump_attr(pentry, pentry.text) else: m = _parameter_rx.match(line) if not m: raise LaTeXFormatError( "could not extract parameter %s for %s: %r" % (pentry.name, macroname, line[:100])) if entry.outputname: self.dump_attr(pentry, m.group(1)) line = line[m.end():] elif pentry.type == "child": if pentry.optional: m = _optional_rx.match(line) if m: line = line[m.end():] if entry.outputname and not opened: opened = 1 self.write("(%s\n" % entry.outputname) stack.append(macroname) stack.append(pentry.name) self.write("(%s\n" % pentry.name) self.write("-%s\n" % encode(m.group(1))) self.write(")%s\n" % pentry.name) stack.pop() else: if entry.outputname and not opened: opened = 1 self.write("(%s\n" % entry.outputname) stack.append(entry.name) self.write("(%s\n" % pentry.name) stack.append(pentry.name) self.line = skip_white(line)[1:] line = self.subconvert("}", len(stack) + depth + 1)[1:] self.write(")%s\n" % stack.pop()) elif pentry.type == "content": if pentry.implied: implied_content = 1 else: if entry.outputname and not opened: opened = 1 self.write("(%s\n" % entry.outputname) stack.append(entry.name) line = skip_white(line) if line[0] != "{": raise LaTeXFormatError("missing content for " + macroname) self.line = line[1:] line = self.subconvert("}", len(stack) + depth + 1) if line and line[0] == "}": line = line[1:] elif pentry.type == "text" and pentry.text: if entry.outputname and not opened: opened = 1 stack.append(entry.name) self.write("(%s\n" % entry.outputname) #dbgmsg("--- text: %r" % pentry.text) self.write("-%s\n" % encode(pentry.text)) elif pentry.type == "entityref": self.write("&%s\n" % pentry.name) if entry.outputname: if not opened: self.write("(%s\n" % entry.outputname) stack.append(entry.name) if not implied_content: self.write(")%s\n" % entry.outputname) stack.pop() continue if line[0] == endchar and not stack: self.line = line[1:] return self.line if line[0] == "}": # end of macro or group macroname = stack[-1] if macroname: conversion = self.table[macroname] if conversion.outputname: # otherwise, it was just a bare group self.write(")%s\n" % conversion.outputname) del stack[-1] line = line[1:] continue if line[0] == "~": # don't worry about the "tie" aspect of this command line = line[1:] self.write("- \n") continue if line[0] == "{": stack.append("") line = line[1:] continue if line[0] == "\\" and line[1] in ESCAPED_CHARS: self.write("-%s\n" % encode(line[1])) line = line[2:] continue if line[:2] == r"\\": self.write("(BREAK\n)BREAK\n") line = line[2:] continue if line[:2] == r"\_": line = "_" + line[2:] continue if line[:2] in (r"\'", r'\"'): # combining characters... self.combining_char(line[1], line[2]) line = line[3:] continue m = _text_rx.match(line) if m: text = encode(m.group()) self.write("-%s\n" % text) line = line[m.end():] continue # special case because of \item[] # XXX can we axe this??? if line[0] == "]": self.write("-]\n") line = line[1:] continue # avoid infinite loops extra = "" if len(line) > 100: extra = "..." raise LaTeXFormatError("could not identify markup: %r%s" % (line[:100], extra)) while stack: entry = self.get_entry(stack[-1]) if entry.closes: self.write(")%s\n-%s\n" % (entry.outputname, encode("\n"))) del stack[-1] else: break if stack: raise LaTeXFormatError("elements remain on stack: " + ", ".join(stack))