示例#1
0
	def saveAllFileExtensions(self):
		try:
			cf = CrawlerFile(url=self.url)
			contentlist = self._getCrawlerFileExts()
			cf.saveSection('FileExtensions',contentlist)
		except Exception,e:
			print 'Exception:\t',e
示例#2
0
	def saveAllHrefsToFile(self,nonehtml=True):
		try:
			cf = CrawlerFile(url=self.url)
			contentlist = []
			hrefs = [i for i in self.visitedHrefs] + [j for j in self.unvisitedHrefs]
			for href in hrefs:
				if href.endswith('.html') and nonehtml:
					continue
				contentlist.append(href)
			cf.saveSection('Hrefs',contentlist,coverfile=True)
			# fp = open(self.file,'w')
			# fp.write('[Hrefs]'+os.linesep)
			# hrefs = [i for i in self.visitedHrefs] + [j for j in self.unvisitedHrefs]
			# rethrefs = []
			# print 'Totally ',len(hrefs), ' hrefs'
			# for href in hrefs:
			# 	if href.endswith('.html'):
			# 		continue
			# 	rethrefs.append(href)
			# 	fp.write(href + os.linesep)
			# 	print href
			# print 'Totally ',len(rethrefs), ' aviable hrefs'
			# fp.close()
		except:
			pass
示例#3
0
 def saveAllFileExtensions(self):
     try:
         cf = CrawlerFile(url=self.url)
         contentlist = self._getCrawlerFileExts()
         cf.saveSection('FileExtensions', contentlist)
     except Exception, e:
         print 'Exception:\t', e
示例#4
0
 def saveAllHrefsToFile(self, nonehtml=True):
     try:
         cf = CrawlerFile(url=self.url)
         contentlist = []
         hrefs = [i for i in self.visitedHrefs
                  ] + [j for j in self.unvisitedHrefs]
         for href in hrefs:
             if href.endswith('.html') and nonehtml:
                 continue
             contentlist.append(href)
         cf.saveSection('Hrefs', contentlist, coverfile=True)
         # fp = open(self.file,'w')
         # fp.write('[Hrefs]'+os.linesep)
         # hrefs = [i for i in self.visitedHrefs] + [j for j in self.unvisitedHrefs]
         # rethrefs = []
         # print 'Totally ',len(hrefs), ' hrefs'
         # for href in hrefs:
         # 	if href.endswith('.html'):
         # 		continue
         # 	rethrefs.append(href)
         # 	fp.write(href + os.linesep)
         # 	print href
         # print 'Totally ',len(rethrefs), ' aviable hrefs'
         # fp.close()
     except:
         pass
示例#5
0
	def saveAllPaths(self):
		try:
			cf = CrawlerFile(url=self.url)
			contentlist = self._getCrawlerPaths(self.url)
			#print contentlist
			cf.saveSection('Paths',contentlist)
			# fp = open(self.file,'w')
			# #filename = BASEDIR + '/cache/crawler/' + genFilename(self.url) + '.txt'
			
			# filename = self.file
			# fp = open(filename,'a')
			# fp.write(os.linesep+'[Paths]'+os.linesep)
			# urls = self._getCrawlerPaths(self.url)
			# for eachurl in urls:
			# 	fp.write(eachurl + os.linesep)
			# fp.close()
		except Exception,e:
			print 'Exception:\t',e
示例#6
0
    def saveAllPaths(self):
        try:
            cf = CrawlerFile(url=self.url)
            contentlist = self._getCrawlerPaths(self.url)
            # print 'contentlist=',contentlist
            cf.saveSection('Paths', contentlist)
            # fp = open(self.file,'w')
            # #filename = BASEDIR + '/cache/crawler/' + genFilename(self.url) + '.txt'

            # filename = self.file
            # fp = open(filename,'a')
            # fp.write(os.linesep+'[Paths]'+os.linesep)
            # urls = self._getCrawlerPaths(self.url)
            # for eachurl in urls:
            # 	fp.write(eachurl + os.linesep)
            # fp.close()
        except Exception, e:
            print 'Exception:\t', e