Пример #1
0
    def feed(self, raw_data):
        assert isinstance(raw_data, TEXT_TYPE), "feed data must be unicode!"
        data = raw_data.strip()

        # cut out <pre> and <tt> areas block tag areas
        data = block_re.sub(self._pre_cut_out, data)
        data = inline_re.sub(self._pre_cut_out, data)

        # Delete whitespace from html code
        data = strip_html(data)

        if self.debugging:
            print("_" * 79)
            print("raw data:")
            print(repr(raw_data))
            print(" -" * 40)
            print("cleaned data:")
            print(data)
            print("-" * 79)
#            print(clean_data.replace(">", ">\n"))
#            print("-"*79)

        HTMLParser.feed(self, data)

        return self.root
Пример #2
0
    def feed(self, raw_data):
        assert isinstance(raw_data, TEXT_TYPE), "feed data must be unicode!"
        data = raw_data.strip()

        # cut out <pre> and <tt> areas block tag areas
        data = block_re.sub(self._pre_cut_out, data)
        data = inline_re.sub(self._pre_cut_out, data)

        # Delete whitespace from html code
        data = strip_html(data)

        if self.debugging:
            print("_" * 79)
            print("raw data:")
            print(repr(raw_data))
            print(" -" * 40)
            print("cleaned data:")
            print(data)
            print("-" * 79)


#            print(clean_data.replace(">", ">\n"))
#            print("-"*79)

        HTMLParser.feed(self, data)

        return self.root
Пример #3
0
    def __init__(self, debug=False):
        HTMLParser.__init__(self)

        self.debugging = debug
        if self.debugging:
            warnings.warn(
                message="Html2Creole debug is on! warn every data append.")
            self.result = DebugList(self)
        else:
            self.result = []

        self.blockdata = []

        self.root = DocNode("document", None)
        self.cur = self.root

        self.__list_level = 0
Пример #4
0
    def __init__(self, debug=False):
        HTMLParser.__init__(self)

        self.debugging = debug
        if self.debugging:
            warnings.warn(
                message="Html2Creole debug is on! warn every data append."
            )
            self.result = DebugList(self)
        else:
            self.result = []

        self.blockdata = []

        self.root = DocNode("document", None)
        self.cur = self.root

        self.__list_level = 0