示例#1
0
	def parseContent(self, rawData, url, level):
		print 'parseContent rawData', len(rawData)
		id = 0
		if (len(rawData) != 0):
			title = get_title(rawData)
			if (len(title) == 0): title = 'Default' + url
			# result = partPage(rawData, 0)
			result = partPage(rawData, url)
			id = self.write_to_database(1, 1, title, result, url, level)
		return id
示例#2
0
 def parseContent(self, rawData, url, level):
     print 'parseContent rawData', len(rawData)
     id = 0
     if (len(rawData) != 0):
         title = get_title(rawData)
         if (len(title) == 0): title = 'Default' + url
         # result = partPage(rawData, 0)
         result = partPage(rawData, url)
         id = self.write_to_database(1, 1, title, result, url, level)
     return id
示例#3
0
	def start_fetch(self, url, level = 1):
		self.url = url
		result = getFetcher(self.url, self.uid, self.upwd).fetchData()
		subURLs = getSubTotalURLS(result, url)
		if (len(result) != 0):
			title = get_title(result)
			if (len(title) == 0): title = 'Default' + self.url
			# result = self.convert_html_to_content(result)
			result = partPage(result, 0)
			self.write_to_database(1, 1, title, result, self.url, level)
			# self.do_syscmd_reindexer()
		return subURLs
示例#4
0
 def start_fetch(self, url, level=1):
     self.url = url
     result = getFetcher(self.url, self.uid, self.upwd).fetchData()
     subURLs = getSubTotalURLS(result, url)
     if (len(result) != 0):
         title = get_title(result)
         if (len(title) == 0): title = 'Default' + self.url
         # result = self.convert_html_to_content(result)
         result = partPage(result, 0)
         self.write_to_database(1, 1, title, result, self.url, level)
         # self.do_syscmd_reindexer()
     return subURLs