Exemplo n.º 1
0
    def _parse_node(self, node):
        node_tree = object_dict()
        # Save attrs and text, hope there will not be a child with same name
        if node.text:
            # node_tree.value = node.text
            if not node.getchildren():
                node_tree = node.text
                # node_tree = (node.text.split(', ')
                #              if node.text.find(', ') != -1 else node.text)
            else:
                node_tree.value = node.text
        for (k,v) in node.attrib.items():
            k,v = self._namespace_split(k, object_dict({'value':v}))
            node_tree[k] = v
        #Save childrens
        for child in node.getchildren():
            tag, tree = self._namespace_split(child.tag, self._parse_node(child))
            if  tag not in node_tree: # the first time, so store it in dict
                node_tree[tag] = tree
                continue
            old = node_tree[tag]
            if not isinstance(old, list):
                node_tree.pop(tag)
                node_tree[tag] = [old] # multi times, so change old dict to a list       
            node_tree[tag].append(tree) # add the new one      

        return '' if node_tree == {} else node_tree
Exemplo n.º 2
0
 def fromstring(self, s):
     xmlstring = self.convert_to_utf(s)
     #if xmlstring:
     try:
         t = ET.fromstring(xmlstring)
         root_tag, root_tree = self._namespace_split(t.tag, self._parse_node(t))
         return object_dict({root_tag.encode(): root_tree})
     except:
         pass
     return object_dict({'parse_error': 'parse error'})
Exemplo n.º 3
0
 def _append_attribute(self, node, node_tree = None):
     if node_tree is None:
         node_tree = object_dict()
     bCreateAttr = True
     for attr in node.attributes.keys():
         if bCreateAttr:
             node_tree[XML_ATTRIBUTE] = object_dict()
             bCreateAttr = False
         k,v = self._namespace_split(attr.encode(), node.getAttribute(attr).encode())
         node_tree[XML_ATTRIBUTE][k] = v
     return node_tree
Exemplo n.º 4
0
 def fromstring(self, s):
     s = self.convert_to_utf(s)
     #if s:
     try:
         doc = parseString(s)
         t = doc.documentElement
         root_tag, root_tree = self._namespace_split(t.nodeName, self._parse_node(t))
         return object_dict({root_tag.encode(): root_tree})
     except:
         pass
     return object_dict({'parse_error': 'parse error'})
Exemplo n.º 5
0
 def _append_attribute(self, node, node_tree = None):
     if node_tree is None:
         node_tree = object_dict()
     if len(node.attrib.items()) <1:
        return node_tree
     node_tree[XML_ATTRIBUTE]=object_dict()
     itemdicts = node_tree[XML_ATTRIBUTE]
     for (attr,item) in node.attrib.items():
         k,v = self._namespace_split(attr, item)
         itemdicts[k] = v
     return node_tree
 def _parse_node(self, node):
     node_tree = object_dict()
     if node.text and node.attrib:
         if node.tag in node.attrib:
             raise ValueError("Name conflict: Attribute name conflicts with "
                              "tag name. Check the documentation.")
         node.attrib.update({node.tag: node.text})
         node.text = ''
     # Save attrs and text. Fair warning, if there's a child node with the same name
     # as an attribute, values will become a list.
     if node.text and node.text.strip():
         node_tree = node.text
     else:
         for k, v in node.attrib.items():
             k, v = self._namespace_split(k, v)
             node_tree[k] = v
         # Save children.
         for child in node.getchildren():
             tag, tree = self._namespace_split(child.tag, self._parse_node(child))
             if tag not in node_tree:  # First encounter, store it in dict.
                 node_tree[tag] = tree
                 continue
             old = node_tree[tag]
             if not isinstance(old, list):
                 # Multiple encounters, change dict to a list
                 node_tree.pop(tag)
                 node_tree[tag] = [old]
             node_tree[tag].append(tree)  # Add the new one.
     if not node_tree:
         node_tree = None
     return node_tree
Exemplo n.º 7
0
def __parse_node(node):
    tmp = object_dict()
    # save attrs and text, hope there will not be a child with same name
    if node.text:
        # Uncomment the below line to get value attribute for each tag
        #tmp['value'] = node.text
        tmp = node.text
    for (k,v) in node.attrib.items():
        tmp[k] = v

    for ch in node.getchildren():
        cht = ch.tag
        chp = __parse_node(ch)

        if cht not in tmp: # the first time, so store it in dict
            tmp[cht] = chp
            continue

        old = tmp[cht]
        if not isinstance(old, list):
            tmp.pop(cht)   
            tmp[cht] = [old] # multi times, so change old dict to a list       
        tmp[cht].append(chp) # add the new one      

    return  tmp
Exemplo n.º 8
0
 def _parse_node(self, node):
     node_tree = object_dict()        
     #append attribute
     self._append_attribute(node, node_tree)
     if len(node.attributes.keys()) <1:
         node_dicts = node_tree
     else:
         node_tree[XML_VALUE] ={}
         node_dicts = node_tree[XML_VALUE]
     for child in node.childNodes:
         nodename = child.nodeName.encode()          
         if child.nodeType in (child.TEXT_NODE, child.CDATA_SECTION_NODE):
             nodeval = child.nodeValue.encode()
             if nodeval not in self.XML_SUPERWORD:
                 '''coding may be had error'''
                 node_tree = child.nodeValue.decode().encode()
             continue            
         #Save childrens
         tag, tree = self._namespace_split(nodename, self._parse_node(child))
         """
         # the first time, so store it in dict
         if  tag not in node_tree: 
             node_tree[tag] = tree
             continue
         """
         # the first time, if the node have child so store it in list ,other raise  store it in dict
         if  tag not in node_dicts: 
             node_dicts[tag] = tree
             nodenum = len(child.childNodes)
             if nodenum<1:
                 node_dicts[tag] = tree
             else:
                 if nodenum ==1:
                     if child.childNodes[0].nodeType in (child.TEXT_NODE, child.CDATA_SECTION_NODE):
                         node_dicts[tag] = tree
                     else:
                         node_dicts.pop(tag)
                         node_dicts[tag] = [tree]
                 else:
                     node_dicts.pop(tag)
                     node_dicts[tag] = [tree]                        
             continue
         
         old = node_dicts[tag]
         if not isinstance(old, list):
             node_dicts.pop(tag)
             # multi times, so change old dict to a list
             node_dicts[tag] = [old]
         # add the new one
         node_dicts[tag].append(tree)
     return  node_tree
Exemplo n.º 9
0
    def _parse_node(self, node):
        node_tree = None
        #print node;print node.text
        if node.text:
            if node.text not in self.XML_SUPERWORD:
                '''coding may be had error'''
                node_tree = node.text
                return node_tree
        
        node_tree = object_dict()
        if len(node.attrib.items()) <1:
            node_dicts = node_tree
        else:
            node_tree[XML_VALUE] ={}
            node_dicts = node_tree[XML_VALUE]
        for child in node.getchildren():
            nodename = child.tag
            #Save childrens
            tag, tree = self._namespace_split(nodename, self._parse_node(child))
            """
            # the first time, so store it in dict
            if  tag not in node_tree: 
                node_tree[tag] = tree
                continue
            """
            # the first time, if the node have child so store it in list ,other raise  store it in dict
            
            if  tag not in node_dicts: 
                node_dicts[tag] = tree
                if len(child.getchildren())<1:
                    node_dicts[tag] = tree
                else:
                    node_dicts.pop(tag)
                    node_dicts[tag] = [tree]
                continue

            old = node_dicts[tag]
            if not isinstance(old, list):
                node_dicts.pop(tag)
                # multi times, so change old dict to a list
                node_dicts[tag] = [old]
            # add the new one
            node_dicts[tag].append(tree)
        #append attribute
        self._append_attribute(node, node_tree)
        return  node_tree
Exemplo n.º 10
0
 def fromstring(self, s):
     """parse a string"""
     t = ET.fromstring(s)
     root_tag, root_tree = self._namespace_split(t.tag, self._parse_node(t))
     return object_dict({root_tag: root_tree})
Exemplo n.º 11
0
def fromstring(s):
    """parse a string"""
    t = ET.fromstring(s)
    return object_dict({t.tag: __parse_node(t)})
Exemplo n.º 12
0
def parse(file):
    """parse a xml file to a dict"""
    f = open(file, 'r')
    t = ET.parse(f).getroot()
    return object_dict({t.tag: __parse_node(t)})
Exemplo n.º 13
0
def fromstring(s):
    """parse a string"""
    t = ET.fromstring(s)
    return object_dict({t.tag: __parse_node(t)})
Exemplo n.º 14
0
def parse(file):
    """parse a xml file to a dict"""
    f = open(file, 'r')
    t = ET.parse(f).getroot()
    return object_dict({t.tag: __parse_node(t)})
Exemplo n.º 15
0
 def fromstring(self, s):
     """parse a string"""
     t = ET.fromstring(s)
     root_tag, root_tree = self._namespace_split(t.tag, self._parse_node(t))
     return object_dict({root_tag: root_tree})