def parseContent(self, rawData, url, level): print 'parseContent rawData', len(rawData) id = 0 if (len(rawData) != 0): title = get_title(rawData) if (len(title) == 0): title = 'Default' + url # result = partPage(rawData, 0) result = partPage(rawData, url) id = self.write_to_database(1, 1, title, result, url, level) return id
def start_fetch(self, url, level = 1): self.url = url result = getFetcher(self.url, self.uid, self.upwd).fetchData() subURLs = getSubTotalURLS(result, url) if (len(result) != 0): title = get_title(result) if (len(title) == 0): title = 'Default' + self.url # result = self.convert_html_to_content(result) result = partPage(result, 0) self.write_to_database(1, 1, title, result, self.url, level) # self.do_syscmd_reindexer() return subURLs
def start_fetch(self, url, level=1): self.url = url result = getFetcher(self.url, self.uid, self.upwd).fetchData() subURLs = getSubTotalURLS(result, url) if (len(result) != 0): title = get_title(result) if (len(title) == 0): title = 'Default' + self.url # result = self.convert_html_to_content(result) result = partPage(result, 0) self.write_to_database(1, 1, title, result, self.url, level) # self.do_syscmd_reindexer() return subURLs