Пример #1
0
def xml_escape(string):
    """ formatter function for writing to file """
    # it seems to be a problem form bs to escape stuff and selfclosing tags
    ret = EntitySubstitution.substitute_xml(string)
    ret = ret.replace('"', '"')
    ret = ret.replace("'", ''')
    return ret
Пример #2
0
    def decode(self,
               indent_level=None,
               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
               formatter="minimal"):
        """Returns a Unicode representation of this tag and its contents.

        :param eventual_encoding: The tag is destined to be
           encoded into this encoding. This method is _not_
           responsible for performing that encoding. This information
           is passed in so that it can be substituted in if the
           document contains a <META> tag that mentions the document's
           encoding.
        """
        attrs = []
        if self.attrs:
            for key, val in sorted(self.attrs.items()):
                if val is None:
                    decoded = key
                else:
                    if isinstance(val, list) or isinstance(val, tuple):
                        val = ' '.join(val)
                    elif not isinstance(val, basestring):
                        val = str(val)
                    if (self.contains_substitutions
                            and eventual_encoding is not None
                            and '%SOUP-ENCODING%' in val):
                        val = self.substitute_encoding(val, eventual_encoding)

                    decoded = (str(key) + '=' +
                               EntitySubstitution.substitute_xml(val, True))
                attrs.append(decoded)
        close = ''
        closeTag = ''
        if self.is_empty_element:
            close = '/'
        else:
            closeTag = '</%s>' % self.name

        prefix = ''
        if self.prefix:
            prefix = self.prefix + ":"

        pretty_print = (indent_level is not None)
        if pretty_print:
            space = (' ' * (indent_level - 1))
            indent_contents = indent_level + 1
        else:
            space = ''
            indent_contents = None
        contents = self.decode_contents(indent_contents, eventual_encoding,
                                        formatter)

        if self.hidden:
            # This is the 'document root' object.
            s = contents
        else:
            s = []
            attribute_string = ''
            if attrs:
                attribute_string = ' ' + ' '.join(attrs)
            if pretty_print:
                s.append(space)
            s.append('<%s%s%s%s>' %
                     (prefix, self.name, attribute_string, close))
            if pretty_print:
                s.append("\n")
            s.append(contents)
            if pretty_print and contents and contents[-1] != "\n":
                s.append("\n")
            if pretty_print and closeTag:
                s.append(space)
            s.append(closeTag)
            if pretty_print and closeTag and self.next_sibling:
                s.append("\n")
            s = ''.join(s)
        return s
Пример #3
0
    def decode(self, indent_level=None,
               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
               formatter="minimal"):
        """Returns a Unicode representation of this tag and its contents.

        :param eventual_encoding: The tag is destined to be
           encoded into this encoding. This method is _not_
           responsible for performing that encoding. This information
           is passed in so that it can be substituted in if the
           document contains a <META> tag that mentions the document's
           encoding.
        """
        attrs = []
        if self.attrs:
            for key, val in sorted(self.attrs.items()):
                if val is None:
                    decoded = key
                else:
                    if isinstance(val, list) or isinstance(val, tuple):
                        val = ' '.join(val)
                    elif not isinstance(val, basestring):
                        val = str(val)
                    if (self.contains_substitutions
                        and eventual_encoding is not None
                        and '%SOUP-ENCODING%' in val):
                        val = self.substitute_encoding(val, eventual_encoding)

                    decoded = (str(key) + '='
                               + EntitySubstitution.substitute_xml(val, True))
                attrs.append(decoded)
        close = ''
        closeTag = ''
        if self.is_empty_element:
            close = '/'
        else:
            closeTag = '</%s>' % self.name

        prefix = ''
        if self.prefix:
            prefix = self.prefix + ":"

        pretty_print = (indent_level is not None)
        if pretty_print:
            space = (' ' * (indent_level - 1))
            indent_contents = indent_level + 1
        else:
            space = ''
            indent_contents = None
        contents = self.decode_contents(
            indent_contents, eventual_encoding, formatter)

        if self.hidden:
            # This is the 'document root' object.
            s = contents
        else:
            s = []
            attribute_string = ''
            if attrs:
                attribute_string = ' ' + ' '.join(attrs)
            if pretty_print:
                s.append(space)
            s.append('<%s%s%s%s>' % (
                    prefix, self.name, attribute_string, close))
            if pretty_print:
                s.append("\n")
            s.append(contents)
            if pretty_print and contents and contents[-1] != "\n":
                s.append("\n")
            if pretty_print and closeTag:
                s.append(space)
            s.append(closeTag)
            if pretty_print and closeTag and self.next_sibling:
                s.append("\n")
            s = ''.join(s)
        return s