Пример #1
0
 def test_xml_text_parents(self):
     print("test_xmlparser.py: test_xml_text_parents start")
     parser = XmlParser(io.BytesIO(b"<root>Hello, world!</root>"))
     parser.next_token()
     token = parser.next_token()
     self.assertEqual(len(token.get_parent_tags()), 1)
     self.assertEqual(token.get_parent_tags()[0], b"root")
     print("test_xmlparser.py: test_xml_text_parents end")
Пример #2
0
 def test_xml_pure_text(self):
     print("test_xmlparser.py: test_xml_pure_text start")
     parser = XmlParser(io.BytesIO(b"hello world"))
     had_error = False
     try:
         parser.next_token()
     except XmlParserInvalidSyntaxError:
         had_error = True
     self.assertTrue(had_error, msg="verify the parser has aborted " + \
         "parsing \"hello world\" as XML")
     print("test_xmlparser.py: test_xml_pure_text end")
Пример #3
0
 def test_xml_attributes_unfinished(self):
     print("test_xmlparser.py: test_xml_attributes_unfinished start")
     parser = XmlParser(io.BytesIO(b"<root><child myattr=/></root>"))
     self.assertTrue(parser.next_token().get_tag() == b"root")
     had_error = False
     try:
         parser.next_token()
     except XmlParserInvalidSyntaxError:
         had_error = True
     self.assertTrue(had_error, msg="verify the parser has aborted " + \
         "due to unfinished attribute (= character but no value)")
     print("test_xmlparser.py: test_xml_attributes_unfinished end")
Пример #4
0
 def test_xml_invalid_xml_tag_position(self):
     print("test_xmlparser.py: test_xml_invalid_xml_tag_position start")
     parser = XmlParser(io.BytesIO(
         b"<root><?xml version=\"1.0\"?></root>"))
     parser.next_token()
     had_error = False
     try:
         parser.next_token()
     except XmlParserInvalidSyntaxError:
         had_error = True
     self.assertTrue(had_error, msg="verify the parser has aborted " + \
         "after encountering invalid followup data after closing '?'")
     print("test_xmlparser.py: test_xml_invalid_xml_tag_position end")
Пример #5
0
 def test_xml_selfclosing_invalid_followup(self):
     print("test_xmlparser.py: test_xml_selfclosing_invalid_followup " + \
         "start")
     parser = XmlParser(io.BytesIO(b"<root><child/test></root>"))
     parser.next_token()
     had_error = False
     try:
         parser.next_token()
     except XmlParserInvalidSyntaxError:
         had_error = True
     self.assertTrue(had_error, msg="verify the parser has aborted " + \
         "after encountering invalid followup data after closing '/'")
     print("test_xmlparser.py: test_xml_selfclosing_invalid_followup end")
Пример #6
0
 def test_xml_unfinished(self):
     print("test_xmlparser.py: test_xml_unfinished start")
     parser = XmlParser(io.BytesIO(b"<root><child/>"))
     self.assertTrue(parser.next_token().get_tag() == b"root")
     self.assertTrue(parser.next_token().get_tag() == b"child")
     self.assertTrue(parser.next_token().get_tag() == b"child")
     had_error = False
     try:
         token = parser.next_token()
         print("TOKEN: " + str(token))
     except XmlParserInvalidSyntaxError:
         had_error = True
     self.assertTrue(had_error, msg="verify the parser has aborted " + \
         "due to unfinished XML")
     print("test_xmlparser.py: test_xml_unfinished end")
Пример #7
0
 def test_xml_attributes_malformed(self):
     print("test_xmlparser.py: test_xml_attributes_malformed start")
     parser = XmlParser(io.BytesIO(b"<root><child \"\"/></root>"))
     had_error = False
     root_begin = parser.next_token()
     self.assertEqual(root_begin.get_type(), "begin_tag")
     self.assertEqual(root_begin.get_tag(), b"root")
     try:
         token = parser.next_token()
         print("token: " + str(token))
         print("attributes: " + str(token.attributes))
     except XmlParserInvalidSyntaxError:
         had_error = True
     self.assertTrue(had_error, msg="verify the parser has aborted " + \
         "due to malformed attribute")
     print("test_xmlparser.py: test_xml_attributes_malformed end")
Пример #8
0
 def test_xml_parents(self):
     print("test_xmlparser.py: test_xml_parents start")
     parser = XmlParser(io.BytesIO(
         b"<root><child1><child2/></child1></root>"))
     root_begin = parser.next_token()
     assert(len(root_begin.get_parent_tags()) == 0)
     child1_begin = parser.next_token()
     assert(len(child1_begin.get_parent_tags()) == 1)
     assert(child1_begin.get_parent_tags()[0] == b"root")
     child2_begin = parser.next_token()
     assert(len(child2_begin.get_parent_tags()) == 2)
     assert(child2_begin.get_parent_tags()[0] == b"root")
     assert(child2_begin.get_parent_tags()[1] == b"child1")
     child2_end = parser.next_token()
     assert(child2_end.get_tag() == b"child2")
     assert(len(child2_begin.get_parent_tags()) == 2)
     assert(child2_end.get_parent_tags()[0] == b"root")
     assert(child2_end.get_parent_tags()[1] == b"child1")
     child1_end = parser.next_token()
     assert(child1_end.get_tag() == b"child1")
     assert(len(child1_end.get_parent_tags()) == 1)
     assert(child1_end.get_parent_tags()[0] == b"root")
     root_end = parser.next_token()
     assert(len(root_end.get_parent_tags()) == 0)        
     print("test_xmlparser.py: test_xml_parents end")
Пример #9
0
 def deserialize(self, xml_file, expected_root_tag, expected_namespace,
         expected_max_file_size=None):
     """ Parse and deserialize an XML file.
         The XML file must have one root tag expected to you, which is
         checked for existance.
         
         All further subvalues must be either dictionaries serialized with
         the given expected namespace, or <int>, <Decimal>, <list>, <str>,
         <bool> or <None> value tags inside.
         
         Returns the resulting dictionary with all the nested dictionaries
         and values inside.
         
         Raises XmlParserParseError if the file is longer than expected,
         or doesn't have the expected root tag or namespace.
         
         In addition, errors of the XmlParser (XmlParserParseError,
         XmlParserInvalidSyntaxError) are passed through.
         
         Example with expected_root_tag="my_settings_storage",
                     expected_namespace="my_game_settings":
             <?xml version="1.0"?>
             <my_settings_storage>
                 <my_game_settings:resolutions>
                     <list>
                         <list>
                             <int>800</int>
                             <int>600</int>
                         </list>
                         <list>
                             <int>1024</int>
                              <int>768</int>
                         </list>
                     </list>
                 </my_game_settings:resolutions>
             </my_settings_storage>
     """
     if len(expected_root_tag) == 0:
         raise ValueError("need expected root tag")
     if expected_max_file_size != None:
         if not xml_file.seekable():
             raise ValueError("cannot truncate non-seekable file")
         xml_file.seek(0, io.SEEK_END)
         if xml_file.tell() > expected_max_file_size:
             raise XmlParserParseError("file larger than expected - " + \
                 expected_max_file_size + " bytes")
         xml_file.seek(0, io.SEEK_SET)
     # information about the dictionary structure:
     self.result_dict = dict()
     self.current_parent = self.result_dict
     self.current_parent_parents = []
     self.current_parent_name_in_parents = []
     # if we're inside a value tag and expect a value:
     self.expected_value_type = None
     self.value_was_set = False
     # remember about what the next closing tag may be fore if a special
     # purpose
     self.next_end_tag_closes_value = False
     self.next_end_tag_closes_list = False
     # recall whether we are in a list:
     self.in_a_list = False
     self.current_list = None
     # set up the parser
     self.parser = XmlParser(xml_file)
     self.token = self.parser.next_token()
     
     def apply_value_to_current(value):
         self.next_end_tag_closes_value = True
         # if in a list, add to that instead:
         if self.in_a_list:
             self.current_list.append(value)
             return
         # go up one level, remember name:
         name = self.current_parent_name_in_parents[-1]
         #print("[applying] current_parent_parents: " + str(self.current_parent_parents))
         self.current_parent_parents[-1][name] = value
         self.current_parent = value
         self.next_end_tag_closes_value = True
     while self.token.get_type() != "end_document":
         #print("----")
         #print("current_parent_parents: " + str(self.current_parent_parents) + "\n")
         #print("current full dict: " + str(self.result_dict) + "\n")
         #print("current_parent:" + str(self.current_parent) + "\n")
         #print("token: " + str(self.token) + "\n")
         #print("GO:\n")
         if self.token.get_type() == "begin_tag":
             if len(self.token.get_parent_tags()) == 0:
                 # root tag.
                 actual_root_tag = self.token.get_tag().decode(\
                     "utf-8")
                 if actual_root_tag != expected_root_tag:
                     raise XmlParserParseError('wrong root tag. got "' \
                         + actual_root_tag + '", expected "' + \
                         expected_root_tag + '"')
             else:
                 tag = self.token.get_tag().decode("utf-8")
                 # see if dict tag or value tag:
                 if tag.find(":") >= 0:
                     # dict tag. verify namespace:
                     if not tag.startswith(expected_namespace + ":"):
                         raise XmlParserParseError("invalid namespace " + \
                             "found - should always be " + \
                             expected_namespace)
                     # truncate namespace:
                     tag = tag.split(":", 1)[1]
                 elif tag == "list":
                     if self.in_a_list:
                         raise XmlParserParseError("list inside list " + \
                             "isn ot allowed")
                     if self.next_end_tag_closes_value or \
                             self.next_end_tag_closes_list:
                         raise XmlParserParseError("this tag already " + \
                             "has contents, cannot add list")
                     if len(self.current_parent_parents) == None:
                         raise XmlParserParseError("can only have " + \
                             "list inside a dictionary tag, not inside " \
                             + "root")
                     self.in_a_list = True
                     self.current_list = list()
                     self.token = self.parser.next_token()
                     continue
                 else:
                     # value tag:
                     if self.expected_value_type:
                         raise XmlParserParseError("cannot nest values")
                     if self.next_end_tag_closes_value or \
                             self.next_end_tag_closes_list:
                         raise XmlParserParseError("this tag already " + \
                             "has contents, cannot add value")
                     self.expected_value_type = tag
                     if len(self.current_parent_parents) == None:
                         raise XmlParserParseError("can only have " + \
                             "value inside a dictionary tag, not inside " \
                             + "root")
                     self.value_was_set = False
                     # don't make a child dict, just continue:
                     self.token = self.parser.next_token()
                     continue
                 # go deeper one level, assume it is of type dict for now:
                 self.current_parent_parents.append(self.current_parent)
                 self.current_parent_name_in_parents.append(tag)
                 self.current_parent[tag] = dict()
                 self.current_parent = self.current_parent[tag]
                 # if it is actually a value or a list, we will replace it
                 # later with that instead.
         elif self.token.get_type() == "end_tag":
             if self.expected_value_type != None and not \
                     self.value_was_set:
                 raise XmlParserParseError("invalid empty value tag found")
             if self.expected_value_type != None:
                 # leave value tag (e.g. </int>):
                 assert(self.expected_value_type == self.token.get_tag().\
                     decode("utf-8"))
                 assert(self.next_end_tag_closes_value)
                 self.expected_value_type = None
                 self.next_end_tag_closes_value = False
                 self.token = self.parser.next_token()
                 continue
             if self.token.get_tag() == "list":
                 # leave list tag </list>
                 self.in_a_list = False
                 apply_value_to_current(self.current_list)
                 self.current_list = None
                 self.next_end_tag_closes_value = False
                 self.next_end_tag_closes_list = True
                 # get out of current value:
             if self.next_end_tag_closes_value or \
                     self.next_end_tag_closes_list:
                 # we are at the closing dict tag of
                 # were we just had a value or list inside
                 self.next_end_tag_closes_list = False
                 self.next_end_tag_closes_value = False
                 self.token = self.parser.next_token()
                 continue
             if len(self.current_parent_parents) > 0:
                 assert(self.token.get_tag().decode("utf-8").\
                     startswith(expected_namespace + ":"))
                 self.current_parent = self.current_parent_parents[-1]
                 del(self.current_parent_parents[-1])
                 del(self.current_parent_name_in_parents[-1])
             else:
                 assert(self.token.get_tag().decode("utf-8") \
                     == expected_root_tag)
                 return self.result_dict
         elif self.token.get_type() == "text":
             if self.expected_value_type == None:
                 if len(self.token.get_text().strip()) == 0:
                     # just skip over this
                     self.token = self.parser.next_token()
                     continue
                 raise XmlParserParseError("invalid text content in " + \
                     "no value tag")
             # now set this value accordingly:
             self.value_was_set = True
             if self.expected_value_type == "str":
                 apply_value_to_current(self.token.get_text().strip())
             elif self.expected_value_type == "int":
                 apply_value_to_current(int(self.token.get_text().\
                     strip()))
             elif self.expected_value_type == "Decimal":
                 apply_value_to_current(Decimal(self.token.get_text().\
                     strip()))
             elif self.expected_value_type == "bool":
                 apply_value_to_current(bool(self.token.get_text().\
                     strip()))
             elif self.expected_value_type == "None":
                 apply_value_to_current(None)
             else:
                 raise XmlParserParseError("unsupported value tag " + \
                     "'" + expected_value_type + "' encountered")
         self.token = self.parser.next_token()
     return self.result_dict
Пример #10
0
class XmlSimpleDictionarySerializer(object):
    def serialize(self, dictionary, root_tag, namespace="serialized"):
        """ Serialize a given dictionary with any sort of values or nested
            dictionaries inside to XML. 
            
            All dictionaries will be serialized to tags with the keys being
            the tag names, and the values being the contents.
            
            The whole serialization will use one unified namespace, as if the
            whole thing was one object. (also see deserialize() for an
            example)
            
            Any sort of actual non-dictionary value will be serialized to
            special non-prefixed <int>, <Decimal>, <list>, <str> etc. tags.
            Supported types are:
               list, int, Decimal, str, bool
        """
        if not isinstance(dictionary, dict):
            raise ValueError("not a dictionary")
        txt = '<?xml version="1.0"?>\n' + \
        "<" + root_tag + ">\n" + self._serialize(dictionary, namespace, 1) + \
        "</" + root_tag + ">\n"
        return txt
        
    def _serialize(self, obj, namespace, indent=0, no_dict=False):
        current_indent = ' ' * (4 * indent)
        if obj == "None":
            return current_indent + "<None>None</None>\n"
        elif isinstance(obj, dict):
            if no_dict:
                raise ValueError("dictionary not allowed here - can only " + \
                    "list dictionary in dictionaries, not inside other " + \
                    "types (e.g. list")
            txt = ""
            for key in obj:
                txt = txt + current_indent + "<" + namespace + ":" + key + \
                    ">\n"
                txt = txt + self._serialize(obj[key], namespace,
                    indent + 1)
                txt = txt + current_indent + "</" + namespace + ":" + key + \
                    ">\n"
            return txt
        elif isinstance(obj, list):
            txt = current_indent + "<list>\n"
            for item in obj:
                txt = txt + self._serialize(item, namespace, indent + 1,
                    no_dict=True)
            txt = txt + current_indent + "</list>\n"
            return txt
        elif isinstance(obj, int):
            return current_indent + "<int>" + str(obj) + "</int>\n"
        elif isinstance(obj, str):
            return current_indent + "<str>" + obj.replace("&", "&amp;").\
                replace(">", "&gt;").replace("<", "lt;") + "</str>\n";
        elif isinstance(obj, Decimal) or isinstance(obj, float):
            return current_indent + "<Decimal>" + str(obj) + "</Decimal>\n"
        elif isinstance(obj, bool):
            return current_indent + "<bool>" + str(obj) + "</bool>\n"
        else:
            raise TypeError("unsupported value type - cannot serialize")
            
        
    def deserialize(self, xml_file, expected_root_tag, expected_namespace,
            expected_max_file_size=None):
        """ Parse and deserialize an XML file.
            The XML file must have one root tag expected to you, which is
            checked for existance.
            
            All further subvalues must be either dictionaries serialized with
            the given expected namespace, or <int>, <Decimal>, <list>, <str>,
            <bool> or <None> value tags inside.
            
            Returns the resulting dictionary with all the nested dictionaries
            and values inside.
            
            Raises XmlParserParseError if the file is longer than expected,
            or doesn't have the expected root tag or namespace.
            
            In addition, errors of the XmlParser (XmlParserParseError,
            XmlParserInvalidSyntaxError) are passed through.
            
            Example with expected_root_tag="my_settings_storage",
                        expected_namespace="my_game_settings":
                <?xml version="1.0"?>
                <my_settings_storage>
                    <my_game_settings:resolutions>
                        <list>
                            <list>
                                <int>800</int>
                                <int>600</int>
                            </list>
                            <list>
                                <int>1024</int>
                                 <int>768</int>
                            </list>
                        </list>
                    </my_game_settings:resolutions>
                </my_settings_storage>
        """
        if len(expected_root_tag) == 0:
            raise ValueError("need expected root tag")
        if expected_max_file_size != None:
            if not xml_file.seekable():
                raise ValueError("cannot truncate non-seekable file")
            xml_file.seek(0, io.SEEK_END)
            if xml_file.tell() > expected_max_file_size:
                raise XmlParserParseError("file larger than expected - " + \
                    expected_max_file_size + " bytes")
            xml_file.seek(0, io.SEEK_SET)
        # information about the dictionary structure:
        self.result_dict = dict()
        self.current_parent = self.result_dict
        self.current_parent_parents = []
        self.current_parent_name_in_parents = []
        # if we're inside a value tag and expect a value:
        self.expected_value_type = None
        self.value_was_set = False
        # remember about what the next closing tag may be fore if a special
        # purpose
        self.next_end_tag_closes_value = False
        self.next_end_tag_closes_list = False
        # recall whether we are in a list:
        self.in_a_list = False
        self.current_list = None
        # set up the parser
        self.parser = XmlParser(xml_file)
        self.token = self.parser.next_token()
        
        def apply_value_to_current(value):
            self.next_end_tag_closes_value = True
            # if in a list, add to that instead:
            if self.in_a_list:
                self.current_list.append(value)
                return
            # go up one level, remember name:
            name = self.current_parent_name_in_parents[-1]
            #print("[applying] current_parent_parents: " + str(self.current_parent_parents))
            self.current_parent_parents[-1][name] = value
            self.current_parent = value
            self.next_end_tag_closes_value = True
        while self.token.get_type() != "end_document":
            #print("----")
            #print("current_parent_parents: " + str(self.current_parent_parents) + "\n")
            #print("current full dict: " + str(self.result_dict) + "\n")
            #print("current_parent:" + str(self.current_parent) + "\n")
            #print("token: " + str(self.token) + "\n")
            #print("GO:\n")
            if self.token.get_type() == "begin_tag":
                if len(self.token.get_parent_tags()) == 0:
                    # root tag.
                    actual_root_tag = self.token.get_tag().decode(\
                        "utf-8")
                    if actual_root_tag != expected_root_tag:
                        raise XmlParserParseError('wrong root tag. got "' \
                            + actual_root_tag + '", expected "' + \
                            expected_root_tag + '"')
                else:
                    tag = self.token.get_tag().decode("utf-8")
                    # see if dict tag or value tag:
                    if tag.find(":") >= 0:
                        # dict tag. verify namespace:
                        if not tag.startswith(expected_namespace + ":"):
                            raise XmlParserParseError("invalid namespace " + \
                                "found - should always be " + \
                                expected_namespace)
                        # truncate namespace:
                        tag = tag.split(":", 1)[1]
                    elif tag == "list":
                        if self.in_a_list:
                            raise XmlParserParseError("list inside list " + \
                                "isn ot allowed")
                        if self.next_end_tag_closes_value or \
                                self.next_end_tag_closes_list:
                            raise XmlParserParseError("this tag already " + \
                                "has contents, cannot add list")
                        if len(self.current_parent_parents) == None:
                            raise XmlParserParseError("can only have " + \
                                "list inside a dictionary tag, not inside " \
                                + "root")
                        self.in_a_list = True
                        self.current_list = list()
                        self.token = self.parser.next_token()
                        continue
                    else:
                        # value tag:
                        if self.expected_value_type:
                            raise XmlParserParseError("cannot nest values")
                        if self.next_end_tag_closes_value or \
                                self.next_end_tag_closes_list:
                            raise XmlParserParseError("this tag already " + \
                                "has contents, cannot add value")
                        self.expected_value_type = tag
                        if len(self.current_parent_parents) == None:
                            raise XmlParserParseError("can only have " + \
                                "value inside a dictionary tag, not inside " \
                                + "root")
                        self.value_was_set = False
                        # don't make a child dict, just continue:
                        self.token = self.parser.next_token()
                        continue
                    # go deeper one level, assume it is of type dict for now:
                    self.current_parent_parents.append(self.current_parent)
                    self.current_parent_name_in_parents.append(tag)
                    self.current_parent[tag] = dict()
                    self.current_parent = self.current_parent[tag]
                    # if it is actually a value or a list, we will replace it
                    # later with that instead.
            elif self.token.get_type() == "end_tag":
                if self.expected_value_type != None and not \
                        self.value_was_set:
                    raise XmlParserParseError("invalid empty value tag found")
                if self.expected_value_type != None:
                    # leave value tag (e.g. </int>):
                    assert(self.expected_value_type == self.token.get_tag().\
                        decode("utf-8"))
                    assert(self.next_end_tag_closes_value)
                    self.expected_value_type = None
                    self.next_end_tag_closes_value = False
                    self.token = self.parser.next_token()
                    continue
                if self.token.get_tag() == "list":
                    # leave list tag </list>
                    self.in_a_list = False
                    apply_value_to_current(self.current_list)
                    self.current_list = None
                    self.next_end_tag_closes_value = False
                    self.next_end_tag_closes_list = True
                    # get out of current value:
                if self.next_end_tag_closes_value or \
                        self.next_end_tag_closes_list:
                    # we are at the closing dict tag of
                    # were we just had a value or list inside
                    self.next_end_tag_closes_list = False
                    self.next_end_tag_closes_value = False
                    self.token = self.parser.next_token()
                    continue
                if len(self.current_parent_parents) > 0:
                    assert(self.token.get_tag().decode("utf-8").\
                        startswith(expected_namespace + ":"))
                    self.current_parent = self.current_parent_parents[-1]
                    del(self.current_parent_parents[-1])
                    del(self.current_parent_name_in_parents[-1])
                else:
                    assert(self.token.get_tag().decode("utf-8") \
                        == expected_root_tag)
                    return self.result_dict
            elif self.token.get_type() == "text":
                if self.expected_value_type == None:
                    if len(self.token.get_text().strip()) == 0:
                        # just skip over this
                        self.token = self.parser.next_token()
                        continue
                    raise XmlParserParseError("invalid text content in " + \
                        "no value tag")
                # now set this value accordingly:
                self.value_was_set = True
                if self.expected_value_type == "str":
                    apply_value_to_current(self.token.get_text().strip())
                elif self.expected_value_type == "int":
                    apply_value_to_current(int(self.token.get_text().\
                        strip()))
                elif self.expected_value_type == "Decimal":
                    apply_value_to_current(Decimal(self.token.get_text().\
                        strip()))
                elif self.expected_value_type == "bool":
                    apply_value_to_current(bool(self.token.get_text().\
                        strip()))
                elif self.expected_value_type == "None":
                    apply_value_to_current(None)
                else:
                    raise XmlParserParseError("unsupported value tag " + \
                        "'" + expected_value_type + "' encountered")
            self.token = self.parser.next_token()
        return self.result_dict