def new_elem(self, name, attribOD=None): """ Create a new Element object. name format can be 'table:table' OR '{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table' (i.e. can be path format OR tag format) attribOD is an OrderedDict in order to preserve attribute order. """ tag = self.NS(name) #print( 'tag =',tag ) if attribOD: OD = self.NS_attrib(attribOD) my_new_elem = ET.Element(tag, attrib=OD) else: my_new_elem = ET.Element(tag) sL = my_new_elem.tag.split('}') if len(sL) == 2: name = sL[1] uri = sL[0][1:] self.qnameOD[my_new_elem.tag] = '%s:%s' % (self.nsOD[uri], name) for qname, v in list(my_new_elem.attrib.items()): sL = qname.split('}') if len(sL) == 2: name = sL[1] uri = sL[0][1:] self.qnameOD[qname] = '%s:%s' % (self.nsOD[uri], name) return my_new_elem
def tostring(self): xml_dataL = [] if self.xml_header: xml_dataL = [self.xml_header + '\n'] class dummy: pass def write(self, sInp): if sys.version_info < (3, ): sInp = sInp.decode('utf-8') xml_dataL.append(sInp) dummy_file = dummy() #dummy_file.write = xml_dataL.append # There are differences between the python2 and python3 serialize routines if sys.version_info < (3, ): ET._serialize_xml(dummy_file.write, self.root, "utf-8", self.qnameOD, self.nsOD) else: short_empty_elements = True # use short format for empty elements ET._serialize_xml(dummy_file.write, self.root, self.qnameOD, self.nsOD, short_empty_elements) sOut = "".join(xml_dataL) return sOut.encode('utf-8')
def elem_tostring(self, elem, include_ns=False, use_linebreaks=True, include_header=False): xml_dataL = [] class dummy: def write(self, sInp ): if sys.version_info < (3,): sInp = sInp.decode('utf-8') if sInp.strip().startswith(u'xmlns:') and not include_ns: return if sInp.strip().endswith(u'>') and use_linebreaks: sInp = sInp.replace('>','>\n') xml_dataL.append(sInp ) dummy_file = dummy() #dummy_file.write = xml_dataL.append dummy_file = dummy() # There are differences between the python2 and python3 serialize routines if sys.version_info < (3,): ET._serialize_xml(dummy_file.write, elem, "utf-8", self.qnameOD, self.nsOD) else: short_empty_elements = True # use short format for empty elements ET._serialize_xml(dummy_file.write, elem, self.qnameOD, self.nsOD, short_empty_elements) sOut = u"".join(xml_dataL) sOut = sOut.encode('utf-8') if include_header and self.xml_header: sOut = self.xml_header + '\n' + sOut return sOut
def elem_tostring(self, elem, include_ns=False, use_linebreaks=True, include_header=False): xml_dataL = [] class dummy: def write(self, sInp): if sys.version_info < (3, ): sInp = sInp.decode('utf-8') if sInp.strip().startswith('xmlns:') and not include_ns: return if sInp.strip().endswith('>') and use_linebreaks: sInp = sInp.replace('>', '>\n') xml_dataL.append(sInp) dummy_file = dummy() #dummy_file.write = xml_dataL.append dummy_file = dummy() # There are differences between the python2 and python3 serialize routines if sys.version_info < (3, ): ET._serialize_xml(dummy_file.write, elem, "utf-8", self.qnameOD, self.nsOD) else: short_empty_elements = True # use short format for empty elements ET._serialize_xml(dummy_file.write, elem, self.qnameOD, self.nsOD, short_empty_elements) sOut = "".join(xml_dataL) sOut = sOut.encode('utf-8') if include_header and self.xml_header: sOut = self.xml_header + '\n' + sOut return sOut
def tostring(self): xml_dataL = [] if self.xml_header: xml_dataL = [self.xml_header + '\n'] class dummy: pass def write(self, sInp ): if sys.version_info < (3,): sInp = sInp.decode('utf-8') xml_dataL.append(sInp ) dummy_file = dummy() #dummy_file.write = xml_dataL.append # There are differences between the python2 and python3 serialize routines if sys.version_info < (3,): ET._serialize_xml(dummy_file.write, self.root, "utf-8", self.qnameOD, self.nsOD) else: short_empty_elements = True # use short format for empty elements ET._serialize_xml(dummy_file.write, self.root, self.qnameOD, self.nsOD, short_empty_elements) sOut = u"".join(xml_dataL) return sOut.encode('utf-8')
def __init__(self, xml_file_name_or_src): """ Read and parse xml file using modified version of standard python xml.etree.ElementTree. xml_file_name_or_src can be a file name like: "content.xml" OR can be xml source. """ #xml_file_name_or_src = xml_file_name_or_src.decode('utf-8') if xml_file_name_or_src.endswith( '.xml') and len(xml_file_name_or_src) < 256: self.xml_file_name_or_src = xml_file_name_or_src fInp = io.open(xml_file_name_or_src, 'rt', encoding='utf-8') xml_src = fInp.read() fInp.close() else: self.xml_file_name_or_src = None xml_src = xml_file_name_or_src self.xml_header = '' # Assume no header unless found at head of file match = header_re.match(xml_src) if match: #print( 'Found XML Header: ' + match.group(0) ) self.xml_header = match.group(0) # will need \n when serialized # ns entries like: ('urn:oasis:names:tc:opendocument:xmlns:table:1.0', u'table') self.nsOD = OrderedDict() # rev_ns entries like: (u'table', 'urn:oasis:names:tc:opendocument:xmlns:table:1.0') self.rev_nsOD = OrderedDict() # qname entries like: ('{urn:oasis:names:tc:opendocument:xmlns:office:1.0}document-content', u'office:document-content') self.qnameOD = OrderedDict() events = ("start", "end", "start-ns", "end-ns") context = ET.iterparse(StringIO(xml_src), events=events) for event, elem in context: if event == "start": #print('elem.tag =', elem.tag) #print('type elem.tag =', type(elem.tag)) #print(' type("}") =',type("}")) sL = elem.tag.split('}') if len(sL) == 2: name = sL[1] uri = sL[0][1:] self.qnameOD[elem.tag] = '%s:%s' % (self.nsOD[uri], name) for qname, v in list(elem.attrib.items()): sL = qname.split('}') if len(sL) == 2: name = sL[1] uri = sL[0][1:] self.qnameOD[qname] = '%s:%s' % (self.nsOD[uri], name) if event == "start-ns": self.nsOD[elem[1]] = elem[ 0] # like: ('urn:oasis:names:tc:opendocument:xmlns:table:1.0', u'table') self.rev_nsOD[elem[0]] = elem[ 1] # like: (u'table', 'urn:oasis:names:tc:opendocument:xmlns:table:1.0') self.context = context #self.root = ET.ElementTree( context.root ) self.root = context.root self.parentD = { } # index=child Element object, value=parent Element object self.depthD = {} # index=Element object, value = depth in xml tree self.original_posD = { } # index=Element object, value=tuple of child position (e.g. (0,3,1)) self.get_elem_from_orig_posD = {} # reverse lookup of "original_posD" self.max_depth = 0 self.short_pathD = { } # index=Element, value = short name (like: "ns0:name1/ns1:xyz/ns3:abc") # After building tree, create self.parentD for all Elements self.parentD[self.root] = None self.depthD[self.root] = 0 self.short_pathD[self.root] = self.qnameOD[ self.root.tag] # no calc req'd... just = qname self.original_posD[self.root] = (0, ) # tuple of position temp_short_path_counterD = { } # just used here to help count occurances of short path self.short_path_counterD = { } # index=Element, value=short path counter value self.short_path_parent_counterD = { } # index=Element, value=parent's short path counter value self.short_path_counterD[self.root] = 1 # 1st (and only) occurance self.short_path_parent_counterD[ self.root] = 1 # 1st (and only) occurance for parent in self.root.iter(): try: for ichild, child in enumerate(parent.getchildren()): self.parentD[child] = parent self.depthD[child] = self.depthD[parent] + 1 self.max_depth = max(self.max_depth, self.depthD[child]) L = list(self.original_posD[parent]) L.append(ichild) self.original_posD[child] = tuple(L) short_path = self.get_short_path(child) self.short_pathD[child] = short_path temp_short_path_counterD[( parent, short_path)] = temp_short_path_counterD.get( (parent, short_path), 0) + 1 self.short_path_counterD[child] = '%s,%s' % ( self.short_path_counterD[parent], temp_short_path_counterD[(parent, short_path)]) self.short_path_parent_counterD[ child] = '%s' % self.short_path_counterD[parent] except: print('NOTICE: No children for:', parent) for key, item in list(self.original_posD.items()): self.get_elem_from_orig_posD[ item] = key # get elem from original_posD # set up dictionaries to hold style:name info (if init_all_annn_style8name) self.annn_style8nameD = { } # index=style:name ("a123"), value=style elem self.style_refD = { } # index=xxxxx:style-name (e.g. "a123"), value=elem with ref (not style itself) self.id_draw8idD = {} # index=draw:id (e.g. "id123"), value=elem
def __init__(self, xml_file_name_or_src): """ Read and parse xml file using modified version of standard python xml.etree.ElementTree. xml_file_name_or_src can be a file name like: "content.xml" OR can be xml source. """ #xml_file_name_or_src = xml_file_name_or_src.decode('utf-8') if xml_file_name_or_src.endswith('.xml') and len(xml_file_name_or_src)<256: self.xml_file_name_or_src = xml_file_name_or_src fInp = io.open(xml_file_name_or_src, 'rt', encoding='utf-8') xml_src = fInp.read() fInp.close() else: self.xml_file_name_or_src = None xml_src = xml_file_name_or_src self.xml_header = '' # Assume no header unless found at head of file match = header_re.match(xml_src) if match: #print( 'Found XML Header: ' + match.group(0) ) self.xml_header = match.group(0) # will need \n when serialized # ns entries like: ('urn:oasis:names:tc:opendocument:xmlns:table:1.0', u'table') self.nsOD = OrderedDict() # rev_ns entries like: (u'table', 'urn:oasis:names:tc:opendocument:xmlns:table:1.0') self.rev_nsOD = OrderedDict() # qname entries like: ('{urn:oasis:names:tc:opendocument:xmlns:office:1.0}document-content', u'office:document-content') self.qnameOD = OrderedDict() events = ("start", "end", "start-ns", "end-ns") context = ET.iterparse(StringIO(xml_src), events=events) for event, elem in context: if event=="start": #print('elem.tag =', elem.tag) #print('type elem.tag =', type(elem.tag)) #print(' type("}") =',type("}")) sL = elem.tag.split('}') if len(sL) == 2: name = sL[1] uri = sL[0][1:] self.qnameOD[elem.tag] = '%s:%s'%(self.nsOD[uri], name) for qname,v in elem.attrib.items(): sL = qname.split('}') if len(sL) == 2: name = sL[1] uri = sL[0][1:] self.qnameOD[qname] = '%s:%s'%(self.nsOD[uri], name) if event=="start-ns": self.nsOD[elem[1]] = elem[0] # like: ('urn:oasis:names:tc:opendocument:xmlns:table:1.0', u'table') self.rev_nsOD[elem[0]] = elem[1] # like: (u'table', 'urn:oasis:names:tc:opendocument:xmlns:table:1.0') self.context = context #self.root = ET.ElementTree( context.root ) self.root = context.root self.parentD = {} # index=child Element object, value=parent Element object self.depthD = {} # index=Element object, value = depth in xml tree self.original_posD = {} # index=Element object, value=tuple of child position (e.g. (0,3,1)) self.get_elem_from_orig_posD = {} # reverse lookup of "original_posD" self.max_depth = 0 self.short_pathD = {} # index=Element, value = short name (like: "ns0:name1/ns1:xyz/ns3:abc") # After building tree, create self.parentD for all Elements self.parentD[self.root] = None self.depthD[self.root] = 0 self.short_pathD[self.root] = self.qnameOD[ self.root.tag ] # no calc req'd... just = qname self.original_posD[self.root] = (0,) # tuple of position temp_short_path_counterD = {} # just used here to help count occurances of short path self.short_path_counterD = {} # index=Element, value=short path counter value self.short_path_parent_counterD = {} # index=Element, value=parent's short path counter value self.short_path_counterD[self.root] = 1 # 1st (and only) occurance self.short_path_parent_counterD[self.root] = 1 # 1st (and only) occurance for parent in self.root.iter(): try: for ichild, child in enumerate(parent.getchildren()): self.parentD[child] = parent self.depthD[child] = self.depthD[parent] + 1 self.max_depth = max(self.max_depth, self.depthD[child]) L = list(self.original_posD[parent]) L.append( ichild ) self.original_posD[child] = tuple( L ) short_path = self.get_short_path( child ) self.short_pathD[child] = short_path temp_short_path_counterD[(parent,short_path)] = temp_short_path_counterD.get((parent,short_path), 0) + 1 self.short_path_counterD[child] = '%s,%s'%(self.short_path_counterD[parent], temp_short_path_counterD[(parent,short_path)]) self.short_path_parent_counterD[child] = '%s'%self.short_path_counterD[parent] except: print( 'NOTICE: No children for:', parent ) for key,item in self.original_posD.items(): self.get_elem_from_orig_posD[item] = key # get elem from original_posD # set up dictionaries to hold style:name info (if init_all_annn_style8name) self.annn_style8nameD = {} # index=style:name ("a123"), value=style elem self.style_refD = {} # index=xxxxx:style-name (e.g. "a123"), value=elem with ref (not style itself) self.id_draw8idD = {} # index=draw:id (e.g. "id123"), value=elem
def get_final_presentation_elem(): return ET.Element(force_to_tag('presentation:settings'))