def __init__(self, env, caller): OdfParser.__init__(self, env, caller) self.res = u'' self.inAnnotation = False # Are we parsing an annotation ? self.textEncountered = False # Within an annotation, have we already # met a text ? self.ignore = False # Must we avoid dumping the current tag/content
def __init__(self, env, caller): OdfParser.__init__(self, env, caller) self.res = "" self.inAnnotation = False # Are we parsing an annotation ? self.textEncountered = False # Within an annotation, have we already # met a text ? self.ignore = False # Must we avoid dumping the current tag/content
def endElement(self, elem): e = self.env if elem == '%s:annotation' % e.ns(e.NS_OFFICE): self.inAnnotation = False self.ignore = False if not self.ignore: self.res += '</%s>' % elem OdfParser.endElement(self, elem)
def startElement(self, elem, attrs): e = OdfParser.startElement(self, elem, attrs) ns = e.onStartElement() officeNs = ns[e.NS_OFFICE] textNs = ns[e.NS_TEXT] if elem in e.ignorableElements: e.state = e.IGNORING elif elem == ('%s:annotation' % officeNs): e.state = e.READING_STATEMENT elif (elem == ('%s:change-start' % textNs)) or \ (elem == ('%s:conditional-text' % textNs)): e.state = e.READING_EXPRESSION e.exprHasStyle = False else: if e.state == e.IGNORING: pass elif e.state == e.READING_CONTENT: if elem in e.impactableElements: if e.mode == e.ADD_IN_SUBBUFFER: e.addSubBuffer() e.currentBuffer.addElement(e.currentElem.name) e.currentBuffer.dumpStartElement(elem, attrs) elif e.state == e.READING_STATEMENT: pass elif e.state == e.READING_EXPRESSION: if (elem == ('%s:span' % textNs)) and \ not e.currentContent.strip(): e.currentBuffer.dumpStartElement(elem, attrs) e.exprHasStyle = True e.manageInserts()
def startElement(self, elem, attrs): e = OdfParser.startElement(self, elem, attrs) self.styleTag = '%s:style' % e.ns(e.NS_STYLE) if elem == self.styleTag: e.state = PARSING_STYLE nameAttr = '%s:name' % e.ns(e.NS_STYLE) familyAttr = '%s:family' % e.ns(e.NS_STYLE) classAttr = '%s:class' % e.ns(e.NS_STYLE) displayNameAttr = '%s:display-name' % e.ns(e.NS_STYLE) # Create the style style = Style(name=attrs[nameAttr], family=attrs[familyAttr]) if classAttr in attrs: style.styleClass = attrs[classAttr] if displayNameAttr in attrs: style.displayName = attrs[displayNameAttr] # Record this style in the environment e.styles[style.name] = style e.currentStyle = style levelKey = '%s:default-outline-level' % e.ns(e.NS_STYLE) if levelKey in attrs and attrs[levelKey].strip(): style.outlineLevel = int(attrs[levelKey]) else: if e.state == PARSING_STYLE: # I am parsing tags within the style. if elem == ('%s:text-properties' % e.ns(e.NS_STYLE)): fontSizeKey = '%s:font-size' % e.ns(e.NS_FO) if fontSizeKey in attrs: e.currentStyle.setFontSize(attrs[fontSizeKey])
def startElement(self, elem, attrs): e = OdfParser.startElement(self, elem, attrs) self.styleTag = '%s:style' % e.ns(e.NS_STYLE) if elem == self.styleTag: e.state = PARSING_STYLE nameAttr = '%s:name' % e.ns(e.NS_STYLE) familyAttr = '%s:family' % e.ns(e.NS_STYLE) classAttr = '%s:class' % e.ns(e.NS_STYLE) displayNameAttr = '%s:display-name' % e.ns(e.NS_STYLE) # Create the style style = Style(name=attrs[nameAttr], family=attrs[familyAttr]) if attrs.has_key(classAttr): style.styleClass = attrs[classAttr] if attrs.has_key(displayNameAttr): style.displayName = attrs[displayNameAttr] # Record this style in the environment e.styles[style.name] = style e.currentStyle = style levelKey = '%s:default-outline-level' % e.ns(e.NS_STYLE) if attrs.has_key(levelKey) and attrs[levelKey].strip(): style.outlineLevel = int(attrs[levelKey]) else: if e.state == PARSING_STYLE: # I am parsing tags within the style. if elem == ('%s:text-properties' % e.ns(e.NS_STYLE)): fontSizeKey = '%s:font-size' % e.ns(e.NS_FO) if attrs.has_key(fontSizeKey): e.currentStyle.setFontSize(attrs[fontSizeKey])
def endElement(self, elem): e = OdfParser.endElement(self, elem) if elem == "%s:annotation" % e.ns(e.NS_OFFICE): self.inAnnotation = False self.ignore = False if not self.ignore: self.res += "</%s>" % elem
def characters(self, content): e = OdfParser.characters(self, content) if not self.ignore: for c in content: if XML_SPECIAL_CHARS.has_key(c): self.res += XML_SPECIAL_CHARS[c] else: self.res += c
def startElement(self, elem, attrs): e = OdfParser.startElement(self, elem, attrs) ns = e.onStartElement() officeNs = ns[e.NS_OFFICE] textNs = ns[e.NS_TEXT] tableNs = ns[e.NS_TABLE] if elem in e.ignorableElements: e.state = e.IGNORING elif elem == e.tags['annotation']: # Be it in an ODT or ODS template, an annotation is considered to # contain a POD statement. e.state = e.READING_STATEMENT elif elem in (e.tags['change-start'], e.tags['conditional-text']): # In an ODT template, any text in track-changes or any conditional # field is considered to contain a POD expression. e.state = e.READING_EXPRESSION e.exprHasStyle = False elif (elem == e.tags['table-cell']) and \ attrs.has_key(e.tags['formula']) and \ attrs.has_key(e.tags['value-type']) and \ (attrs[e.tags['value-type']] == 'string') and \ attrs[e.tags['formula']].startswith('of:="'): # In an ODS template, any cell containing a formula of type "string" # and whose content is expressed as a string between double quotes # (="...") is considered to contain a POD expression. But here it # is a special case: we need to dump the cell; the expression is not # directly contained within this cell; the expression will be # contained in the next inner paragraph. So we must here dump the # cell, but without some attributes, because the "formula" will be # converted to the result of evaluating the POD expression. if e.mode == e.ADD_IN_SUBBUFFER: e.addSubBuffer() e.currentBuffer.addElement(e.currentElem.name) hook = e.currentBuffer.dumpStartElement( elem, attrs, ignoreAttrs=(e.tags['formula'], e.tags['string-value'], e.tags['value-type']), hook=True) # We already have the POD expression: remember it on the env. e.currentOdsExpression = attrs[e.tags['string-value']] e.currentOdsHook = hook else: if e.state == e.IGNORING: pass elif e.state == e.READING_CONTENT: if elem in e.impactableElements: if e.mode == e.ADD_IN_SUBBUFFER: e.addSubBuffer() e.currentBuffer.addElement(e.currentElem.name) e.currentBuffer.dumpStartElement(elem, attrs) elif e.state == e.READING_STATEMENT: pass elif e.state == e.READING_EXPRESSION: if (elem == (e.tags['span'])) and not e.currentContent.strip(): e.currentBuffer.dumpStartElement(elem, attrs) e.exprHasStyle = True e.manageInserts()
def startElement(self, elem, attrs): e = OdfParser.startElement(self, elem, attrs) ns = e.onStartElement() officeNs = ns[e.NS_OFFICE] textNs = ns[e.NS_TEXT] tableNs = ns[e.NS_TABLE] if elem in e.ignorableElems: e.state = e.IGNORING elif elem == e.tags['annotation']: # Be it in an ODT or ODS template, an annotation is considered to # contain a POD statement. e.state = e.READING_STATEMENT elif elem in e.exprStartElems: # Any track-changed text or being in a conditional or input field is # considered to be a POD expression. e.state = e.READING_EXPRESSION e.exprHasStyle = False elif (elem == e.tags['table-cell']) and \ attrs.has_key(e.tags['formula']) and \ attrs.has_key(e.tags['value-type']) and \ (attrs[e.tags['value-type']] == 'string') and \ attrs[e.tags['formula']].startswith('of:="'): # In an ODS template, any cell containing a formula of type "string" # and whose content is expressed as a string between double quotes # (="...") is considered to contain a POD expression. But here it # is a special case: we need to dump the cell; the expression is not # directly contained within this cell; the expression will be # contained in the next inner paragraph. So we must here dump the # cell, but without some attributes, because the "formula" will be # converted to the result of evaluating the POD expression. if e.mode == e.ADD_IN_SUBBUFFER: e.addSubBuffer() e.currentBuffer.addElement(e.currentElem.name) hook = e.currentBuffer.dumpStartElement(elem, attrs, ignoreAttrs=(e.tags['formula'], e.tags['string-value'], e.tags['value-type']), hook=True) # We already have the POD expression: remember it on the env. e.currentOdsExpression = attrs[e.tags['string-value']] e.currentOdsHook = hook else: if e.state == e.IGNORING: pass elif e.state == e.READING_CONTENT: if elem in e.impactableElems: if e.mode == e.ADD_IN_SUBBUFFER: e.addSubBuffer() e.currentBuffer.addElement(e.currentElem.name) e.currentBuffer.dumpStartElement(elem, attrs) elif e.state == e.READING_STATEMENT: pass elif e.state == e.READING_EXPRESSION: if (elem == (e.tags['span'])) and not e.currentContent.strip(): e.currentBuffer.dumpStartElement(elem, attrs) e.exprHasStyle = True e.manageInserts()
def endElement(self, elem): e = OdfParser.endElement(self, elem) if elem == e.tags['style']: e.state = READING e.currentStyle = None elif elem == e.tags['page-layout']: e.state = READING e.currentPageLayout = None elif elem == e.tags['master-styles']: e.state = READING
def characters(self, content): e = OdfParser.characters(self, content) if e.state == e.IGNORING: pass elif e.state == e.READING_CONTENT: e.currentBuffer.dumpContent(content) elif e.state == e.READING_STATEMENT: if e.currentElem.elem.startswith(e.namespaces[e.NS_TEXT]): e.currentContent += content elif e.state == e.READING_EXPRESSION: e.currentContent += content
def characters(self, content): e = OdfParser.characters(self, content) if e.state == e.IGNORING: pass elif e.state == e.READING_CONTENT: if e.currentOdsExpression: # Do not write content if we have encountered an ODS expression: # we will replace this content with the expression's result. pass else: e.currentBuffer.dumpContent(content) elif e.state == e.READING_STATEMENT: if e.currentElem.elem.startswith(e.namespaces[e.NS_TEXT]): e.currentContent += content elif e.state == e.READING_EXPRESSION: e.currentContent += content
def startElement(self, elem, attrs): e = OdfParser.startElement(self, elem, attrs) ns = e.onStartElement() if elem == e.tags['style']: e.state = PARSING_STYLE # Create the style style = Style(name=attrs[e.tags['name']], family=attrs[e.tags['family']]) classAttr = e.tags['class'] if attrs.has_key(classAttr): style.styleClass = attrs[classAttr] dnAttr = e.tags['display-name'] if attrs.has_key(dnAttr): style.displayName = attrs[dnAttr] dolAttr = e.tags['default-outline-level'] if attrs.has_key(dolAttr) and attrs[dolAttr].strip(): style.outlineLevel = int(attrs[dolAttr]) # Record this style in the environment e.styles[style.name] = style e.currentStyle = style elif elem == e.tags['page-layout']: e.state = PARSING_PAGE_LAYOUT pageLayout = PageLayout(attrs[e.tags['name']]) # Record this page layout in the environment e.pageLayouts[pageLayout.name] = pageLayout e.currentPageLayout = pageLayout elif elem == e.tags['master-styles']: e.state = PARSING_MASTER_STYLES elif e.state == PARSING_STYLE: # Find properties within this style definition if elem == e.tags['text-properties']: fontSizeAttr = e.tags['font-size'] if attrs.has_key(fontSizeAttr): e.currentStyle.setFontSize(attrs[fontSizeAttr]) elif e.state == PARSING_PAGE_LAYOUT: # Find properties within this page layout definition if elem == e.tags['page-layout-properties']: e.currentPageLayout.setProperties(e, attrs) elif e.state == PARSING_MASTER_STYLES: # I am parsing section "master-styles" if elem == e.tags['master-page']: plnAttr = e.tags['page-layout-name'] if attrs.has_key(plnAttr): e.masterLayoutName = attrs[plnAttr]
def startElement(self, elem, attrs): e = OdfParser.startElement(self, elem, attrs) # Do we enter into an annotation ? if elem == "%s:annotation" % e.ns(e.NS_OFFICE): self.inAnnotation = True self.textEncountered = False elif elem == "%s:p" % e.ns(e.NS_TEXT): if self.inAnnotation: if not self.textEncountered: self.textEncountered = True else: self.ignore = True if not self.ignore: self.res += "<%s" % elem for attrName, attrValue in list(attrs.items()): self.res += ' %s="%s"' % (attrName, attrValue) self.res += ">"
def startElement(self, elem, attrs): e = OdfParser.startElement(self, elem, attrs) # Do we enter into an annotation ? if elem == '%s:annotation' % e.ns(e.NS_OFFICE): self.inAnnotation = True self.textEncountered = False elif elem == '%s:p' % e.ns(e.NS_TEXT): if self.inAnnotation: if not self.textEncountered: self.textEncountered = True else: self.ignore = True if not self.ignore: self.res += '<%s' % elem for attrName, attrValue in attrs.items(): self.res += ' %s="%s"' % (attrName, attrValue) self.res += '>'
def characters(self, content): e = OdfParser.characters(self, content) if e.state == e.IGNORING: pass elif e.state == e.READING_CONTENT: if e.currentOdsExpression: # Do not write content if we have encountered an ODS expression: # we will replace this content with the expression's result. pass else: e.currentBuffer.dumpContent(content) elif e.state == e.READING_STATEMENT: # Ignore note meta-data: creator, date, sender-initials. if e.currentElem.elem in e.NOTE_TAGS: e.currentContent += content elif e.state == e.READING_EXPRESSION: e.currentContent += content
def endDocument(self): e = OdfParser.endDocument(self) self.caller.styles = e.styles
def __init__(self, env, caller): OdfParser.__init__(self, env, caller) self.styleTag = None
def endElement(self, elem): e = OdfParser.endElement(self, elem) if elem == self.styleTag: e.state = READING e.currentStyle = None
def __init__(self, env, caller): OdfParser.__init__(self, env, caller) env.raiseOnError = caller.raiseOnError
def __init__(self, env, caller): OdfParser.__init__(self, env, caller)
def endElement(self, elem): e = OdfParser.endElement(self, elem) ns = e.onEndElement() officeNs = ns[e.NS_OFFICE] textNs = ns[e.NS_TEXT] if elem in e.ignorableElements: e.state = e.READING_CONTENT elif elem == e.tags['annotation']: # Manage statement oldCb = e.currentBuffer actionElemIndex = oldCb.createAction(e.currentStatement) e.currentStatement = [] if actionElemIndex != -1: e.currentBuffer = oldCb.\ transferActionIndependentContent(actionElemIndex) if e.currentBuffer == oldCb: e.mode = e.ADD_IN_SUBBUFFER else: e.mode = e.ADD_IN_BUFFER e.state = e.READING_CONTENT else: if e.state == e.IGNORING: pass elif e.state == e.READING_CONTENT: # Dump the ODS POD expression if any if e.currentOdsExpression: e.currentBuffer.addExpression(e.currentOdsExpression, tiedHook=e.currentOdsHook) e.currentOdsExpression = None e.currentOdsHook = None # Dump the ending tag e.currentBuffer.dumpEndElement(elem) if elem in e.impactableElements: if isinstance(e.currentBuffer, MemoryBuffer): isMainElement = e.currentBuffer.isMainElement(elem) # Unreference the element among the 'elements' attribute e.currentBuffer.unreferenceElement(elem) if isMainElement: parent = e.currentBuffer.parent if not e.currentBuffer.action: # Delete this buffer and transfer content to # parent. e.currentBuffer.transferAllContent() parent.removeLastSubBuffer() e.currentBuffer = parent else: if isinstance(parent, FileBuffer): # Execute buffer action and delete the # buffer. e.currentBuffer.action.execute() parent.removeLastSubBuffer() e.currentBuffer = parent e.mode = e.ADD_IN_SUBBUFFER elif e.state == e.READING_STATEMENT: if e.currentElem.elem == Text.OD.elem: statementLine = e.currentContent.strip() if statementLine: e.currentStatement.append(statementLine) e.currentContent = '' elif e.state == e.READING_EXPRESSION: if (elem == e.tags['change-end']) or \ (elem == e.tags['conditional-text']): expression = e.currentContent.strip() e.currentContent = '' # Manage expression e.currentBuffer.addExpression(expression) if e.exprHasStyle: e.currentBuffer.dumpEndElement(e.tags['span']) e.state = e.READING_CONTENT
def endDocument(self): e = OdfParser.endDocument(self) self.caller.styles = e.styles self.caller.pageLayout = e.pageLayouts[e.masterLayoutName]
def characters(self, content): e = OdfParser.characters(self, content) if not self.ignore: self.res += escapeXml(content)