Exemplo n.º 1
0
	def loop(self, url, next, post=None, cb=None, cc = 1, deep=2, debug=0, allow_external = False, link_filter=None, start_now=True,  **options):

		doneurls = [common.md5(url)]
		
		domain = common.get_domain(url).lower()



		def page_loaded(doc):

			if doc.req['meta']['deep']<deep:
				for n in doc.q(next):
					nexturl = n.nodevalue()

					if domain != common.get_domain(nexturl):
						continue
					if link_filter and not link_filter(url=nexturl):
						continue

					if common.md5(nexturl) not in doneurls:					
						doneurls.append(common.md5(nexturl))
						req = Request(url=nexturl, meta=dict(deep=doc.req['meta']['deep']+1),use_cache=True,  cb = page_loaded, **options)
						self.downloader.put(req)
			
			#allow the loop caller proccessing each loaded page			
			if cb:
				cb(doc)
		
		
		self.downloader.put(Request(url=url, post=post, meta=dict(deep=1), use_cache=True, cb = page_loaded, **options))			

		self.downloader.cc = cc
		if start_now:
			self.downloader.start()
Exemplo n.º 2
0
	def loop(self, url, next, post=None, cb=None, cc = 1, deep=2, debug=0, allow_external = False, link_filter=None, start_now=True,  **options):

		doneurls = [common.md5(url)]
		
		domain = common.get_domain(url).lower()



		def page_loaded(doc):

			if doc.req['meta']['deep']<deep:
				for n in doc.q(next):
					nexturl = n.nodevalue()

					if domain != common.get_domain(nexturl):
						continue
					if link_filter and not link_filter(url=nexturl):
						continue

					if common.md5(nexturl) not in doneurls:					
						doneurls.append(common.md5(nexturl))
						req = Request(url=nexturl, meta=dict(deep=doc.req['meta']['deep']+1),use_cache=True,  cb = page_loaded, **options)
						self.downloader.put(req)
			
			#allow the loop caller proccessing each loaded page			
			if cb:
				cb(doc)
		
		
		self.downloader.put(Request(url=url, post=post, meta=dict(deep=1), use_cache=True, cb = page_loaded, **options))			

		self.downloader.cc = cc
		if start_now:
			self.downloader.start()
Exemplo n.º 3
0
    def make_key(self, url, post=''):
        #normalise the post
        if post and isinstance(post, common.MyDict):
            post = post.dict()
        if post and isinstance(post, dict):
            post = urllib.urlencode(sorted(post.items()))

        return common.md5((url + (post or '')).encode('utf8')) + '.htm'
Exemplo n.º 4
0
	def make_key(self, url, post = ''):	
		#normalise the post
		if post and isinstance(post, common.MyDict):
			post = post.dict()
		if post and isinstance(post, dict):
			post = urllib.urlencode(sorted(post.items()))

		return common.md5((url + (post or '')).encode('utf8')) + '.htm'
Exemplo n.º 5
0
		def page_loaded(doc):

			if doc.req['meta']['deep']<deep:
				for n in doc.q(next):
					nexturl = n.nodevalue()

					if domain != common.get_domain(nexturl):
						continue
					if link_filter and not link_filter(url=nexturl):
						continue

					if common.md5(nexturl) not in doneurls:					
						doneurls.append(common.md5(nexturl))
						req = Request(url=nexturl, meta=dict(deep=doc.req['meta']['deep']+1),use_cache=True,  cb = page_loaded, **options)
						self.downloader.put(req)
			
			#allow the loop caller proccessing each loaded page			
			if cb:
				cb(doc)
Exemplo n.º 6
0
		def page_loaded(doc):

			if doc.req['meta']['deep']<deep:
				for n in doc.q(next):
					nexturl = n.nodevalue()

					if domain != common.get_domain(nexturl):
						continue
					if link_filter and not link_filter(url=nexturl):
						continue

					if common.md5(nexturl) not in doneurls:					
						doneurls.append(common.md5(nexturl))
						req = Request(url=nexturl, meta=dict(deep=doc.req['meta']['deep']+1),use_cache=True,  cb = page_loaded, **options)
						self.downloader.put(req)
			
			#allow the loop caller proccessing each loaded page			
			if cb:
				cb(doc)
Exemplo n.º 7
0
    def append_line(self, filename, line, dedup=False):
        #waiting while other thread writing
        while self.writingflag:
            pass
        #hold the flag
        self.writingflag = True
        path = self.join_path(filename)

        if dedup:
            if not hasattr(self, '_data_lines'):
                self._data_lines = []

            if common.md5(line) not in self._data_lines:
                self._data_lines.append(common.md5(line))
                common.append_file(path, line + '\r\n')
        else:
            common.append_file(path, line + '\r\n')

        #free the flag
        self.writingflag = False
Exemplo n.º 8
0
	def append_line(self, filename, line, dedup=False):		
		#waiting while other thread writing
		while self.writingflag:			
			pass
		#hold the flag	
		self.writingflag = True
		path = self.join_path(filename)					

		if dedup:
			if not hasattr(self,'_data_lines'):				
				self._data_lines = []

			if common.md5(line) not in self._data_lines:								
				self._data_lines.append(common.md5(line))							
				common.append_file(path, line+'\r\n')
		else:
			common.append_file(path, line+'\r\n')
					

		#free the flag
		self.writingflag = False