Пример #1
0
    def new_elem(self, name, attribOD=None):
        """
        Create a new Element object.
        name format can be 'table:table' OR '{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table'
        (i.e. can be path format OR tag format)
        attribOD is an OrderedDict in order to preserve attribute order.
        """

        tag = self.NS(name)
        #print( 'tag =',tag )

        if attribOD:
            OD = self.NS_attrib(attribOD)
            my_new_elem = ET.Element(tag, attrib=OD)
        else:
            my_new_elem = ET.Element(tag)

        sL = my_new_elem.tag.split('}')
        if len(sL) == 2:
            name = sL[1]
            uri = sL[0][1:]
            self.qnameOD[my_new_elem.tag] = '%s:%s' % (self.nsOD[uri], name)

        for qname, v in list(my_new_elem.attrib.items()):
            sL = qname.split('}')
            if len(sL) == 2:
                name = sL[1]
                uri = sL[0][1:]
                self.qnameOD[qname] = '%s:%s' % (self.nsOD[uri], name)

        return my_new_elem
Пример #2
0
    def tostring(self):

        xml_dataL = []
        if self.xml_header:
            xml_dataL = [self.xml_header + '\n']

        class dummy:
            pass

            def write(self, sInp):
                if sys.version_info < (3, ):
                    sInp = sInp.decode('utf-8')
                xml_dataL.append(sInp)

        dummy_file = dummy()
        #dummy_file.write = xml_dataL.append

        # There are differences between the python2 and python3 serialize routines
        if sys.version_info < (3, ):
            ET._serialize_xml(dummy_file.write, self.root, "utf-8",
                              self.qnameOD, self.nsOD)
        else:
            short_empty_elements = True  # use short format for empty elements
            ET._serialize_xml(dummy_file.write, self.root, self.qnameOD,
                              self.nsOD, short_empty_elements)

        sOut = "".join(xml_dataL)
        return sOut.encode('utf-8')
Пример #3
0
    def elem_tostring(self, elem, include_ns=False, use_linebreaks=True, include_header=False):
            
        xml_dataL = []
        
        class dummy:
            
            def write(self, sInp ):
                
                if sys.version_info < (3,):
                    sInp = sInp.decode('utf-8')
                if sInp.strip().startswith(u'xmlns:') and not include_ns:
                    return
                if sInp.strip().endswith(u'>') and use_linebreaks:
                    sInp = sInp.replace('>','>\n')
                    
                xml_dataL.append(sInp )
                dummy_file = dummy()
                #dummy_file.write = xml_dataL.append

        dummy_file = dummy()

        # There are differences between the python2 and python3 serialize routines
        if sys.version_info < (3,):
            ET._serialize_xml(dummy_file.write, elem, "utf-8", self.qnameOD, self.nsOD)
        else:
            short_empty_elements = True # use short format for empty elements
            ET._serialize_xml(dummy_file.write, elem, self.qnameOD, self.nsOD, short_empty_elements)

        sOut = u"".join(xml_dataL)
        sOut = sOut.encode('utf-8')
        if include_header and self.xml_header:
            sOut = self.xml_header + '\n' + sOut
        
        return sOut
Пример #4
0
    def elem_tostring(self,
                      elem,
                      include_ns=False,
                      use_linebreaks=True,
                      include_header=False):

        xml_dataL = []

        class dummy:
            def write(self, sInp):

                if sys.version_info < (3, ):
                    sInp = sInp.decode('utf-8')
                if sInp.strip().startswith('xmlns:') and not include_ns:
                    return
                if sInp.strip().endswith('>') and use_linebreaks:
                    sInp = sInp.replace('>', '>\n')

                xml_dataL.append(sInp)
                dummy_file = dummy()
                #dummy_file.write = xml_dataL.append

        dummy_file = dummy()

        # There are differences between the python2 and python3 serialize routines
        if sys.version_info < (3, ):
            ET._serialize_xml(dummy_file.write, elem, "utf-8", self.qnameOD,
                              self.nsOD)
        else:
            short_empty_elements = True  # use short format for empty elements
            ET._serialize_xml(dummy_file.write, elem, self.qnameOD, self.nsOD,
                              short_empty_elements)

        sOut = "".join(xml_dataL)
        sOut = sOut.encode('utf-8')
        if include_header and self.xml_header:
            sOut = self.xml_header + '\n' + sOut

        return sOut
Пример #5
0
    def tostring(self):
            
        xml_dataL = []
        if self.xml_header:
            xml_dataL = [self.xml_header + '\n']

        class dummy:
            pass
            def write(self, sInp ):
                if sys.version_info < (3,):
                    sInp = sInp.decode('utf-8')
                xml_dataL.append(sInp )
        dummy_file = dummy()
        #dummy_file.write = xml_dataL.append

        # There are differences between the python2 and python3 serialize routines
        if sys.version_info < (3,):
            ET._serialize_xml(dummy_file.write, self.root, "utf-8", self.qnameOD, self.nsOD)
        else:
            short_empty_elements = True # use short format for empty elements
            ET._serialize_xml(dummy_file.write, self.root, self.qnameOD, self.nsOD, short_empty_elements)

        sOut = u"".join(xml_dataL)
        return sOut.encode('utf-8')
Пример #6
0
    def __init__(self, xml_file_name_or_src):
        """
        Read and parse xml file using modified version of standard python
        xml.etree.ElementTree.

        xml_file_name_or_src can be a file name like: "content.xml" OR
        can be xml source.
        """
        #xml_file_name_or_src = xml_file_name_or_src.decode('utf-8')

        if xml_file_name_or_src.endswith(
                '.xml') and len(xml_file_name_or_src) < 256:
            self.xml_file_name_or_src = xml_file_name_or_src

            fInp = io.open(xml_file_name_or_src, 'rt', encoding='utf-8')
            xml_src = fInp.read()
            fInp.close()
        else:
            self.xml_file_name_or_src = None
            xml_src = xml_file_name_or_src

        self.xml_header = ''  # Assume no header unless found at head of file
        match = header_re.match(xml_src)
        if match:
            #print( 'Found XML Header: ' + match.group(0) )
            self.xml_header = match.group(0)  # will need \n when serialized

        # ns entries like: ('urn:oasis:names:tc:opendocument:xmlns:table:1.0', u'table')
        self.nsOD = OrderedDict()

        # rev_ns entries like: (u'table', 'urn:oasis:names:tc:opendocument:xmlns:table:1.0')
        self.rev_nsOD = OrderedDict()

        # qname entries like: ('{urn:oasis:names:tc:opendocument:xmlns:office:1.0}document-content', u'office:document-content')
        self.qnameOD = OrderedDict()

        events = ("start", "end", "start-ns", "end-ns")
        context = ET.iterparse(StringIO(xml_src), events=events)

        for event, elem in context:
            if event == "start":
                #print('elem.tag =', elem.tag)
                #print('type elem.tag =', type(elem.tag))
                #print('     type("}") =',type("}"))
                sL = elem.tag.split('}')
                if len(sL) == 2:
                    name = sL[1]
                    uri = sL[0][1:]
                    self.qnameOD[elem.tag] = '%s:%s' % (self.nsOD[uri], name)

                for qname, v in list(elem.attrib.items()):
                    sL = qname.split('}')
                    if len(sL) == 2:
                        name = sL[1]
                        uri = sL[0][1:]
                        self.qnameOD[qname] = '%s:%s' % (self.nsOD[uri], name)
            if event == "start-ns":
                self.nsOD[elem[1]] = elem[
                    0]  # like: ('urn:oasis:names:tc:opendocument:xmlns:table:1.0', u'table')
                self.rev_nsOD[elem[0]] = elem[
                    1]  # like: (u'table', 'urn:oasis:names:tc:opendocument:xmlns:table:1.0')

        self.context = context
        #self.root = ET.ElementTree( context.root )
        self.root = context.root

        self.parentD = {
        }  # index=child Element object, value=parent Element object
        self.depthD = {}  # index=Element object, value = depth in xml tree
        self.original_posD = {
        }  # index=Element object, value=tuple of child position (e.g. (0,3,1))
        self.get_elem_from_orig_posD = {}  # reverse lookup of "original_posD"

        self.max_depth = 0
        self.short_pathD = {
        }  # index=Element, value = short name (like: "ns0:name1/ns1:xyz/ns3:abc")
        # After building tree, create self.parentD for all Elements
        self.parentD[self.root] = None
        self.depthD[self.root] = 0
        self.short_pathD[self.root] = self.qnameOD[
            self.root.tag]  # no calc req'd... just = qname

        self.original_posD[self.root] = (0, )  # tuple of position

        temp_short_path_counterD = {
        }  # just used here to help count occurances of short path
        self.short_path_counterD = {
        }  # index=Element, value=short path counter value
        self.short_path_parent_counterD = {
        }  #  index=Element, value=parent's short path counter value
        self.short_path_counterD[self.root] = 1  # 1st (and only) occurance
        self.short_path_parent_counterD[
            self.root] = 1  # 1st (and only) occurance

        for parent in self.root.iter():
            try:
                for ichild, child in enumerate(parent.getchildren()):
                    self.parentD[child] = parent
                    self.depthD[child] = self.depthD[parent] + 1
                    self.max_depth = max(self.max_depth, self.depthD[child])

                    L = list(self.original_posD[parent])
                    L.append(ichild)
                    self.original_posD[child] = tuple(L)

                    short_path = self.get_short_path(child)
                    self.short_pathD[child] = short_path

                    temp_short_path_counterD[(
                        parent, short_path)] = temp_short_path_counterD.get(
                            (parent, short_path), 0) + 1
                    self.short_path_counterD[child] = '%s,%s' % (
                        self.short_path_counterD[parent],
                        temp_short_path_counterD[(parent, short_path)])
                    self.short_path_parent_counterD[
                        child] = '%s' % self.short_path_counterD[parent]

            except:
                print('NOTICE: No children for:', parent)

        for key, item in list(self.original_posD.items()):
            self.get_elem_from_orig_posD[
                item] = key  # get elem from original_posD

        # set up dictionaries to hold style:name info (if init_all_annn_style8name)
        self.annn_style8nameD = {
        }  # index=style:name ("a123"), value=style elem
        self.style_refD = {
        }  # index=xxxxx:style-name (e.g. "a123"), value=elem with ref (not style itself)
        self.id_draw8idD = {}  # index=draw:id (e.g. "id123"), value=elem
Пример #7
0
    def __init__(self, xml_file_name_or_src):
        """
        Read and parse xml file using modified version of standard python
        xml.etree.ElementTree.

        xml_file_name_or_src can be a file name like: "content.xml" OR
        can be xml source.
        """
        #xml_file_name_or_src = xml_file_name_or_src.decode('utf-8')
        
        if xml_file_name_or_src.endswith('.xml') and len(xml_file_name_or_src)<256:
            self.xml_file_name_or_src = xml_file_name_or_src

            fInp = io.open(xml_file_name_or_src, 'rt', encoding='utf-8')
            xml_src = fInp.read()
            fInp.close()
        else:
            self.xml_file_name_or_src = None
            xml_src = xml_file_name_or_src

        self.xml_header = '' # Assume no header unless found at head of file
        match = header_re.match(xml_src)
        if match:
            #print( 'Found XML Header: ' + match.group(0) )
            self.xml_header = match.group(0) # will need \n when serialized

        # ns entries like: ('urn:oasis:names:tc:opendocument:xmlns:table:1.0', u'table')
        self.nsOD = OrderedDict()

        # rev_ns entries like: (u'table', 'urn:oasis:names:tc:opendocument:xmlns:table:1.0')
        self.rev_nsOD = OrderedDict()

        # qname entries like: ('{urn:oasis:names:tc:opendocument:xmlns:office:1.0}document-content', u'office:document-content')
        self.qnameOD = OrderedDict()

        events = ("start", "end", "start-ns", "end-ns")
        context = ET.iterparse(StringIO(xml_src), events=events)

        for event, elem in context:
            if event=="start":
                #print('elem.tag =', elem.tag)
                #print('type elem.tag =', type(elem.tag))
                #print('     type("}") =',type("}"))
                sL = elem.tag.split('}')
                if len(sL) == 2:
                    name = sL[1]
                    uri = sL[0][1:]
                    self.qnameOD[elem.tag] = '%s:%s'%(self.nsOD[uri], name)

                for qname,v in elem.attrib.items():
                    sL = qname.split('}')
                    if len(sL) == 2:
                        name = sL[1]
                        uri = sL[0][1:]
                        self.qnameOD[qname] = '%s:%s'%(self.nsOD[uri], name)
            if event=="start-ns":
                self.nsOD[elem[1]] = elem[0]     # like: ('urn:oasis:names:tc:opendocument:xmlns:table:1.0', u'table')
                self.rev_nsOD[elem[0]] = elem[1] # like: (u'table', 'urn:oasis:names:tc:opendocument:xmlns:table:1.0')

        self.context = context
        #self.root = ET.ElementTree( context.root )
        self.root = context.root

        self.parentD = {} # index=child Element object, value=parent Element object
        self.depthD = {}  # index=Element object, value = depth in xml tree
        self.original_posD = {}  # index=Element object, value=tuple of child position (e.g. (0,3,1))
        self.get_elem_from_orig_posD = {}  # reverse lookup of "original_posD"
        
        self.max_depth = 0
        self.short_pathD = {} # index=Element, value = short name (like: "ns0:name1/ns1:xyz/ns3:abc")
        # After building tree, create self.parentD for all Elements
        self.parentD[self.root] = None
        self.depthD[self.root] = 0
        self.short_pathD[self.root] = self.qnameOD[ self.root.tag ] # no calc req'd... just = qname
        
        self.original_posD[self.root] = (0,) # tuple of position

        temp_short_path_counterD = {} # just used here to help count occurances of short path
        self.short_path_counterD = {} # index=Element, value=short path counter value
        self.short_path_parent_counterD = {} #  index=Element, value=parent's short path counter value
        self.short_path_counterD[self.root] = 1 # 1st (and only) occurance
        self.short_path_parent_counterD[self.root] = 1 # 1st (and only) occurance

        for parent in self.root.iter():
            try:
                for ichild, child in enumerate(parent.getchildren()):
                    self.parentD[child] = parent
                    self.depthD[child] = self.depthD[parent] + 1
                    self.max_depth = max(self.max_depth, self.depthD[child])
                    
                    L = list(self.original_posD[parent])
                    L.append( ichild )
                    self.original_posD[child] = tuple( L )

                    short_path = self.get_short_path( child )
                    self.short_pathD[child] = short_path

                    temp_short_path_counterD[(parent,short_path)] = temp_short_path_counterD.get((parent,short_path), 0) + 1
                    self.short_path_counterD[child] = '%s,%s'%(self.short_path_counterD[parent],
                                                       temp_short_path_counterD[(parent,short_path)])
                    self.short_path_parent_counterD[child] = '%s'%self.short_path_counterD[parent]

            except:
                print( 'NOTICE: No children for:', parent )
                
        for key,item in self.original_posD.items():
            self.get_elem_from_orig_posD[item] = key # get elem from original_posD
            
            
        # set up dictionaries to hold style:name info (if init_all_annn_style8name)
        self.annn_style8nameD = {} # index=style:name ("a123"), value=style elem
        self.style_refD = {} # index=xxxxx:style-name (e.g. "a123"), value=elem with ref (not style itself)
        self.id_draw8idD = {} # index=draw:id (e.g. "id123"), value=elem
def get_final_presentation_elem():

    return ET.Element(force_to_tag('presentation:settings'))