Exemplo n.º 1
0
    def __init__(self, the_page):
        threading.Thread.__init__(self)
        HTMLParser.HTMLParser.__init__(self)
        self.sentencesplitter = SentenceSplitter()
        self.in_paragraph = False
        self.num_of_para = -1  # when we first meet tag p, we will be 0
        self.paraString = []
        self.paraSentances = []
        self.end_of_intro = False
        self.cant_find_page = False
        self.gotimage = False

        self.feed(the_page)
        self.parse_to_sentances()