Пример #1
0
class RedirectorManager (object):
	def __init__ (self,configuration,poller):
		self.configuration = configuration

		self.low = configuration.redirector.minimum       # minimum number of workers at all time
		self.high = configuration.redirector.maximum      # maximum numbe of workers at all time
		self.program = configuration.redirector.program   # what program speaks the squid redirector API

		self.nextid = 1                   # incremental number to make the name of the next worker
		self.queue = Queue()              # queue with HTTP headers to process
		self.poller = poller              # poller interface that checks for events on sockets
		self.worker = {}                  # our workers threads
		self.closing = set()              # workers that are currently closing
		self.running = True               # we are running

		self.log = Logger('manager', configuration.log.manager)

	def _getid(self):
		id = str(self.nextid)
		self.nextid +=1
		return id

	def _spawn (self):
		"""add one worker to the pool"""
		wid = self._getid()

		worker = Redirector(self.configuration,wid,self.queue,self.program)
		self.poller.addReadSocket('read_workers', worker.response_box_read)
		self.worker[wid] = worker
		self.log.info("added a worker")
		self.log.info("we have %d workers. defined range is ( %d / %d )" % (len(self.worker),self.low,self.high))
		self.worker[wid].start()

	def spawn (self,number=1):
		"""create the set number of worker"""
		self.log.info("spawning %d more worker" % number)
		for _ in range(number):
			self._spawn()

	def respawn (self):
		"""make sure we reach the minimum number of workers"""
		number = max(min(len(self.worker),self.high),self.low)
		for wid in set(self.worker):
			self.reap(wid)
		self.spawn(number)

	def reap (self,wid):
		self.log.info('we are killing worker %s' % wid)
		worker = self.worker[wid]
		self.closing.add(wid)
		worker.stop()  # will cause the worker to stop when it can

	def decrease (self):
		if self.low < len(self.worker):
			worker = self._oldest()
			if worker:
				self.reap(worker.wid)

	def increase (self):
		if len(self.worker) < self.high:
			self.spawn()

	def start (self):
		"""spawn our minimum number of workers"""
		self.log.info("starting workers.")
		self.spawn(max(0,self.low-len(self.worker)))

	def stop (self):
		"""tell all our worker to stop reading the queue and stop"""
		self.running = False
		threads = self.worker.values()
		if len(self.worker):
			self.log.info("stopping %d workers." % len(self.worker))
			for wid in set(self.worker):
				self.reap(wid)
			for thread in threads:
				self.request(None, None, None, 'nop')
			for thread in threads:
				thread.destroyProcess()
				thread.join()

		self.worker = {}

	def _oldest (self):
		"""find the oldest worker"""
		oldest = None
		past = time.time()
		for wid in set(self.worker):
			creation = self.worker[wid].creation
			if creation < past and wid not in self.closing:
				past = creation
				oldest = self.worker[wid]
		return oldest

	def provision (self):
		"""manage our workers to make sure we have enough to consume the queue"""
		if not self.running:
			return

		num_workers = len(self.worker)

		# bad we are bleeding workers !
		if num_workers < self.low:
			self.log.info("we lost some workers, respawing %d new workers" % (self.low-num_workers))
			self.spawn(self.low-num_workers)

		size = self.queue.qsize()

		# we need more workers
		if size >= num_workers:
			# nothing we can do we have reach our limit
			if num_workers >= self.high:
				self.log.warning("help ! we need more workers but we reached our ceiling ! %d request are queued for %d processes" % (size,num_workers))
				return
			# try to figure a good number to add ..
			# no less than one, no more than to reach self.high, lower between self.low and a quarter of the allowed growth
			nb_to_add = int(min(max(1,min(self.low,(self.high-self.low)/4)),self.high-num_workers))
			self.log.warning("we are low on workers adding a few (%d), the queue has %d unhandled url" % (nb_to_add,size))
			self.spawn(nb_to_add)

	def deprovision (self):
		"""manage our workers to make sure we have enough to consume the queue"""
		if not self.running:
			return

		size = self.queue.qsize()
		num_workers = len(self.worker)

		# we are now overprovisioned
		if size < 2 and num_workers > self.low:
			self.log.info("we have too many workers (%d), stopping the oldest" % num_workers)
			# if we have to kill one, at least stop the one who had the most chance to memory leak :)
			worker = self._oldest()
			if worker:
				self.reap(worker.wid)

	def request(self, client_id, peer, request, source):
		return self.queue.put((client_id,peer,request,source,False))

	def getDecision(self, box):
		# NOTE: reads may block if we send badly formatted data
		try:
			r_buffer = box.read(3)
			while r_buffer.isdigit():
				r_buffer += box.read(1)

			if ':' in r_buffer:
				size, response = r_buffer.split(':', 1)
				if size.isdigit():
					size = int(size)
				else:
					size, response = None, None
			else:   # not a netstring
				size, response = None, None

			if size is not None:
				required = size + 1 - len(response)
				response += box.read(required)

			if response is not None:
				if response.endswith(','):
					response = response[:-1]
				else:
					response = None

		except ValueError:  # I/O operation on closed file
			worker = self.worker.get(box, None)
			if worker is not None:
				worker.destroyProcess()

			response = None
		except TypeError:
			response = None

		try:
			if response:
				client_id, command, decision = response.split('\0', 2)
			else:
				client_id = None
				command = None
				decision = None

		except (ValueError, TypeError):
			client_id = None
			command = None
			decision = None

		if command == 'requeue':
			_client_id, _peer, _source, _header = response.split('\0', 3)
			self.queue.put((_client_id,_peer,_header,_source,True))

			client_id = None
			command = None
			decision = None

		elif command == 'hangup':
			wid = decision
			client_id = None
			command = None
			decision = None

			worker = self.worker.pop(wid, None)

			if worker:
				self.poller.removeReadSocket('read_workers', worker.response_box_read)
				if wid in self.closing:
					self.closing.remove(wid)
				worker.shutdown()
				worker.join()

		elif command == 'stats':
			wid, timestamp, stats = decision
			self.storeStats(timestamp, wid, stats)

			client_id = None
			command = None
			decision = None

		return client_id, command, decision

	def showInternalError(self):
		return 'file', '\0'.join(('200', 'internal_error.html'))

	def requestStats(self):
		for wid, worker in self.worker.iteritems():
			worker.requestStats()

	def storeStats(self, timestamp, wid, stats):
		pairs = (d.split('=',1) for d in stats.split('?', 1).split('&'))
		d = self.cache.setdefault(timestamp, {})

		for k, v in pairs:
			d.setdefault(k, []).append(v)
Пример #2
0
class Supervisor (object):
	alarm_time = 0.1                           # regular backend work
	second_frequency = int(1/alarm_time)       # when we record history
	minute_frequency = int(60/alarm_time)      # when we want to average history
	increase_frequency = int(5/alarm_time)     # when we add workers
	decrease_frequency = int(60/alarm_time)    # when we remove workers
	saturation_frequency = int(20/alarm_time)  # when we report connection saturation
	interface_frequency = int(300/alarm_time)  # when we check for new interfaces

	# import os
	# clear = [hex(ord(c)) for c in os.popen('clear').read()]
	# clear = ''.join([chr(int(c,16)) for c in ['0x1b', '0x5b', '0x48', '0x1b', '0x5b', '0x32', '0x4a']])

	def __init__ (self,configuration):
		configuration = load()
		self.configuration = configuration

		# Only here so the introspection code can find them
		self.log = Logger('supervisor', configuration.log.supervisor)
		self.log.error('Starting exaproxy version %s' % configuration.proxy.version)

		self.signal_log = Logger('signal', configuration.log.signal)
		self.log_writer = SysLogWriter('log', configuration.log.destination, configuration.log.enable, level=configuration.log.level)
		self.usage_writer = UsageWriter('usage', configuration.usage.destination, configuration.usage.enable)

		self.log_writer.setIdentifier(configuration.daemon.identifier)
		#self.usage_writer.setIdentifier(configuration.daemon.identifier)

		if configuration.debug.log:
			self.log_writer.toggleDebug()
			self.usage_writer.toggleDebug()

		self.log.error('python version %s' % sys.version.replace(os.linesep,' '))
		self.log.debug('starting %s' % sys.argv[0])

		self.pid = PID(self.configuration)

		self.daemon = Daemon(self.configuration)
		self.poller = Poller(self.configuration.daemon)

		self.poller.setupRead('read_proxy')           # Listening proxy sockets
		self.poller.setupRead('read_web')             # Listening webserver sockets
		self.poller.setupRead('read_icap')             # Listening icap sockets
		self.poller.setupRead('read_workers')         # Pipes carrying responses from the child processes
		self.poller.setupRead('read_resolver')        # Sockets currently listening for DNS responses

		self.poller.setupRead('read_client')          # Active clients
		self.poller.setupRead('opening_client')       # Clients we have not yet read a request from
		self.poller.setupWrite('write_client')        # Active clients with buffered data to send
		self.poller.setupWrite('write_resolver')      # Active DNS requests with buffered data to send

		self.poller.setupRead('read_download')        # Established connections
		self.poller.setupWrite('write_download')      # Established connections we have buffered data to send to
		self.poller.setupWrite('opening_download')    # Opening connections

		self.monitor = Monitor(self)
		self.page = Page(self)
		self.manager = RedirectorManager(
			self.configuration,
			self.poller,
		)
		self.content = ContentManager(self,configuration)
		self.client = ClientManager(self.poller, configuration)
		self.resolver = ResolverManager(self.poller, self.configuration, configuration.dns.retries*10)
		self.proxy = Server('http proxy',self.poller,'read_proxy', configuration.http.connections)
		self.web = Server('web server',self.poller,'read_web', configuration.web.connections)
		self.icap = Server('icap server',self.poller,'read_icap', configuration.icap.connections)

		self.reactor = Reactor(self.configuration, self.web, self.proxy, self.icap, self.manager, self.content, self.client, self.resolver, self.log_writer, self.usage_writer, self.poller)

		self._shutdown = True if self.daemon.filemax == 0 else False  # stop the program
		self._softstop = False  # stop once all current connection have been dealt with
		self._reload = False  # unimplemented
		self._toggle_debug = False  # start logging a lot
		self._decrease_spawn_limit = 0
		self._increase_spawn_limit = 0
		self._refork = False  # unimplemented
		self._pdb = False  # turn on pdb debugging
		self._listen = None  # listening change ? None: no, True: listen, False: stop listeing
		self.wait_time = 5.0  # how long do we wait at maximum once we have been soft-killed
		self.local = set()  # what addresses are on our local interfaces

		self.interfaces()

		signal.signal(signal.SIGQUIT, self.sigquit)
		signal.signal(signal.SIGINT, self.sigterm)
		signal.signal(signal.SIGTERM, self.sigterm)
		# signal.signal(signal.SIGABRT, self.sigabrt)
		# signal.signal(signal.SIGHUP, self.sighup)

		signal.signal(signal.SIGTRAP, self.sigtrap)

		signal.signal(signal.SIGUSR1, self.sigusr1)
		signal.signal(signal.SIGUSR2, self.sigusr2)
		signal.signal(signal.SIGTTOU, self.sigttou)
		signal.signal(signal.SIGTTIN, self.sigttin)

		signal.signal(signal.SIGALRM, self.sigalrm)

		# make sure we always have data in history
		# (done in zero for dependencies reasons)
		self.monitor.zero()


	def sigquit (self,signum, frame):
		if self._softstop:
			self.signal_log.critical('multiple SIG INT received, shutdown')
			self._shutdown = True
		else:
			self.signal_log.critical('SIG INT received, soft-stop')
			self._softstop = True
			self._listen = False

	def sigterm (self,signum, frame):
		self.signal_log.critical('SIG TERM received, shutdown request')
		if os.environ.get('PDB',False):
			self._pdb = True
		else:
			self._shutdown = True

	# def sigabrt (self,signum, frame):
	# 	self.signal_log.info('SIG INFO received, refork request')
	# 	self._refork = True

	# def sighup (self,signum, frame):
	# 	self.signal_log.info('SIG HUP received, reload request')
	# 	self._reload = True

	def sigtrap (self,signum, frame):
		self.signal_log.critical('SIG TRAP received, toggle debug')
		self._toggle_debug = True


	def sigusr1 (self,signum, frame):
		self.signal_log.critical('SIG USR1 received, decrease worker number')
		self._decrease_spawn_limit += 1

	def sigusr2 (self,signum, frame):
		self.signal_log.critical('SIG USR2 received, increase worker number')
		self._increase_spawn_limit += 1


	def sigttou (self,signum, frame):
		self.signal_log.critical('SIG TTOU received, stop listening')
		self._listen = False

	def sigttin (self,signum, frame):
		self.signal_log.critical('SIG IN received, star listening')
		self._listen = True


	def sigalrm (self,signum, frame):
		self.signal_log.debug('SIG ALRM received, timed actions')
		self.reactor.running = False
		signal.setitimer(signal.ITIMER_REAL,self.alarm_time,self.alarm_time)


	def interfaces (self):
		local = set(['127.0.0.1','::1'])
		for interface in getifaddrs():
			if interface.family not in (AF_INET,AF_INET6):
				continue
			if interface.address not in self.local:
				self.log.info('found new local ip %s (%s)' % (interface.address,interface.name))
			local.add(interface.address)
		for ip in self.local:
			if ip not in local:
				self.log.info('removed local ip %s' % ip)
		if local == self.local:
			self.log.info('no ip change')
		else:
			self.local = local

	def run (self):
		if self.daemon.drop_privileges():
			self.log.critical('Could not drop privileges to \'%s\'. Refusing to run as root' % self.daemon.user)
			self.log.critical('Set the environment value USER to change the unprivileged user')
			self._shutdown = True

		elif not self.initialise():
			self._shutdown = True

		signal.setitimer(signal.ITIMER_REAL,self.alarm_time,self.alarm_time)

		count_second = 0
		count_minute = 0
		count_increase = 0
		count_decrease = 0
		count_saturation = 0
		count_interface = 0

		while True:
			count_second = (count_second + 1) % self.second_frequency
			count_minute = (count_minute + 1) % self.minute_frequency

			count_increase = (count_increase + 1) % self.increase_frequency
			count_decrease = (count_decrease + 1) % self.decrease_frequency
			count_saturation = (count_saturation + 1) % self.saturation_frequency
			count_interface = (count_interface + 1) % self.interface_frequency

			try:
				if self._pdb:
					self._pdb = False
					import pdb
					pdb.set_trace()


				# check for IO change with select
				self.reactor.run()


				# must follow the reactor so we are sure to go through the reactor at least once
				# and flush any logs
				if self._shutdown:
					self._shutdown = False
					self.shutdown()
					break
				elif self._reload:
					self._reload = False
					self.reload()
				elif self._refork:
					self._refork = False
					self.signal_log.warning('refork not implemented')
					# stop listening to new connections
					# refork the program (as we have been updated)
					# just handle current open connection


				if self._softstop:
					if self._listen == False:
						self.proxy.rejecting()
						self._listen = None
					if self.client.softstop():
						self._shutdown = True
				# only change listening if we are not shutting down
				elif self._listen is not None:
					if self._listen:
						self._shutdown = not self.proxy.accepting()
						self._listen = None
					else:
						self.proxy.rejecting()
						self._listen = None


				if self._toggle_debug:
					self._toggle_debug = False
					self.log_writer.toggleDebug()


				if self._increase_spawn_limit:
					number = self._increase_spawn_limit
					self._increase_spawn_limit = 0
					self.manager.low += number
					self.manager.high = max(self.manager.low,self.manager.high)
					for _ in range(number):
						self.manager.increase()

				if self._decrease_spawn_limit:
					number = self._decrease_spawn_limit
					self._decrease_spawn_limit = 0
					self.manager.high = max(1,self.manager.high-number)
					self.manager.low = min(self.manager.high,self.manager.low)
					for _ in range(number):
						self.manager.decrease()


				# save our monitoring stats
				if count_second == 0:
					self.monitor.second()
					expired = self.reactor.client.expire()
					self.reactor.log.debug('events : ' + ', '.join('%s:%d' % (k,len(v)) for (k,v) in self.reactor.events.items()))
				else:
					expired = 0

				if expired:
					self.proxy.notifyClose(None, count=expired)

				if count_minute == 0:
					self.monitor.minute()

				# make sure we have enough workers
				if count_increase == 0:
					self.manager.provision()
				# and every so often remove useless workers
				if count_decrease == 0:
					self.manager.deprovision()

				# report if we saw too many connections
				if count_saturation == 0:
					self.proxy.saturation()
					self.web.saturation()

				if self.configuration.daemon.poll_interfaces and count_interface == 0:
					self.interfaces()

			except KeyboardInterrupt:
				self.log.critical('^C received')
				self._shutdown = True
			except OSError,e:
				# This shoould never happen as we are limiting how many connections we accept
				if e.errno == 24:  # Too many open files
					self.log.critical('Too many opened files, shutting down')
					for line in traceback.format_exc().split('\n'):
						self.log.critical(line)
					self._shutdown = True
				else:
					self.log.critical('unrecoverable io error')
					for line in traceback.format_exc().split('\n'):
						self.log.critical(line)
					self._shutdown = True

			finally:
Пример #3
0
class Supervisor (object):
	alarm_time = 0.1                           # regular backend work
	second_frequency = int(1/alarm_time)       # when we record history
	minute_frequency = int(60/alarm_time)      # when we want to average history
	increase_frequency = int(5/alarm_time)     # when we add workers
	decrease_frequency = int(60/alarm_time)    # when we remove workers
	saturation_frequency = int(20/alarm_time)  # when we report connection saturation
	interface_frequency = int(300/alarm_time)  # when we check for new interfaces

	# import os
	# clear = [hex(ord(c)) for c in os.popen('clear').read()]
	# clear = ''.join([chr(int(c,16)) for c in ['0x1b', '0x5b', '0x48', '0x1b', '0x5b', '0x32', '0x4a']])

	def __init__ (self,configuration):
		configuration = load()
		self.configuration = configuration

		# Only here so the introspection code can find them
		self.log = Logger('supervisor', configuration.log.supervisor)
		self.log.error('Starting exaproxy version %s' % configuration.proxy.version)

		self.signal_log = Logger('signal', configuration.log.signal)
		self.log_writer = SysLogWriter('log', configuration.log.destination, configuration.log.enable, level=configuration.log.level)
		self.usage_writer = UsageWriter('usage', configuration.usage.destination, configuration.usage.enable)

		sys.exitfunc = self.log_writer.writeMessages

		self.log_writer.setIdentifier(configuration.daemon.identifier)
		#self.usage_writer.setIdentifier(configuration.daemon.identifier)

		if configuration.debug.log:
			self.log_writer.toggleDebug()
			self.usage_writer.toggleDebug()

		self.log.error('python version %s' % sys.version.replace(os.linesep,' '))
		self.log.debug('starting %s' % sys.argv[0])

		self.pid = PID(self.configuration)

		self.daemon = Daemon(self.configuration)
		self.poller = Poller(self.configuration.daemon)

		self.poller.setupRead('read_proxy')       # Listening proxy sockets
		self.poller.setupRead('read_web')         # Listening webserver sockets
		self.poller.setupRead('read_icap')        # Listening icap sockets
		self.poller.setupRead('read_redirector')  # Pipes carrying responses from the redirector process
		self.poller.setupRead('read_resolver')    # Sockets currently listening for DNS responses

		self.poller.setupRead('read_client')      # Active clients
		self.poller.setupRead('opening_client')   # Clients we have not yet read a request from
		self.poller.setupWrite('write_client')    # Active clients with buffered data to send
		self.poller.setupWrite('write_resolver')  # Active DNS requests with buffered data to send

		self.poller.setupRead('read_download')      # Established connections
		self.poller.setupWrite('write_download')    # Established connections we have buffered data to send to
		self.poller.setupWrite('opening_download')  # Opening connections

		self.monitor = Monitor(self)
		self.page = Page(self)
		self.content = ContentManager(self,configuration)
		self.client = ClientManager(self.poller, configuration)
		self.resolver = ResolverManager(self.poller, self.configuration, configuration.dns.retries*10)
		self.proxy = Server('http proxy',self.poller,'read_proxy', configuration.http.connections)
		self.web = Server('web server',self.poller,'read_web', configuration.web.connections)
		self.icap = Server('icap server',self.poller,'read_icap', configuration.icap.connections)

		self._shutdown = True if self.daemon.filemax == 0 else False  # stop the program
		self._softstop = False  # stop once all current connection have been dealt with
		self._reload = False  # unimplemented
		self._toggle_debug = False  # start logging a lot
		self._decrease_spawn_limit = 0
		self._increase_spawn_limit = 0
		self._refork = False  # unimplemented
		self._pdb = False  # turn on pdb debugging
		self._listen = None  # listening change ? None: no, True: listen, False: stop listeing
		self.wait_time = 5.0  # how long do we wait at maximum once we have been soft-killed
		self.local = set()  # what addresses are on our local interfaces

		if not self.initialise():
			self._shutdown = True

		elif self.daemon.drop_privileges():
			self.log.critical('Could not drop privileges to \'%s\'. Refusing to run as root' % self.daemon.user)
			self.log.critical('Set the environment value USER to change the unprivileged user')
			self._shutdown = True

		# fork the redirector process before performing any further setup
		redirector = fork_redirector(self.poller, self.configuration)

		# create threads _after_ all forking is done
		self.redirector = redirector_message_thread(redirector)

		self.reactor = Reactor(self.configuration, self.web, self.proxy, self.icap, self.redirector, self.content, self.client, self.resolver, self.log_writer, self.usage_writer, self.poller)

		self.interfaces()

		signal.signal(signal.SIGQUIT, self.sigquit)
		signal.signal(signal.SIGINT, self.sigterm)
		signal.signal(signal.SIGTERM, self.sigterm)
		# signal.signal(signal.SIGABRT, self.sigabrt)
		# signal.signal(signal.SIGHUP, self.sighup)

		signal.signal(signal.SIGTRAP, self.sigtrap)

		signal.signal(signal.SIGUSR1, self.sigusr1)
		signal.signal(signal.SIGUSR2, self.sigusr2)
		signal.signal(signal.SIGTTOU, self.sigttou)
		signal.signal(signal.SIGTTIN, self.sigttin)

		signal.signal(signal.SIGALRM, self.sigalrm)

		# make sure we always have data in history
		# (done in zero for dependencies reasons)
		self.monitor.zero()

	def exit (self):
		sys.exit()

	def sigquit (self,signum, frame):
		if self._softstop:
			self.signal_log.critical('multiple SIG INT received, shutdown')
			self._shutdown = True
		else:
			self.signal_log.critical('SIG INT received, soft-stop')
			self._softstop = True
			self._listen = False

	def sigterm (self,signum, frame):
		self.signal_log.critical('SIG TERM received, shutdown request')
		if os.environ.get('PDB',False):
			self._pdb = True
		else:
			self._shutdown = True

	# def sigabrt (self,signum, frame):
	# 	self.signal_log.info('SIG INFO received, refork request')
	# 	self._refork = True

	# def sighup (self,signum, frame):
	# 	self.signal_log.info('SIG HUP received, reload request')
	# 	self._reload = True

	def sigtrap (self,signum, frame):
		self.signal_log.critical('SIG TRAP received, toggle debug')
		self._toggle_debug = True


	def sigusr1 (self,signum, frame):
		self.signal_log.critical('SIG USR1 received, decrease worker number')
		self._decrease_spawn_limit += 1

	def sigusr2 (self,signum, frame):
		self.signal_log.critical('SIG USR2 received, increase worker number')
		self._increase_spawn_limit += 1


	def sigttou (self,signum, frame):
		self.signal_log.critical('SIG TTOU received, stop listening')
		self._listen = False

	def sigttin (self,signum, frame):
		self.signal_log.critical('SIG IN received, star listening')
		self._listen = True


	def sigalrm (self,signum, frame):
		self.reactor.running = False
		signal.setitimer(signal.ITIMER_REAL,self.alarm_time,self.alarm_time)


	def interfaces (self):
		local = set(['127.0.0.1','::1'])
		for interface in getifaddrs():
			if interface.family not in (AF_INET,AF_INET6):
				continue
			if interface.address not in self.local:
				self.log.info('found new local ip %s (%s)' % (interface.address,interface.name))
			local.add(interface.address)
		for ip in self.local:
			if ip not in local:
				self.log.info('removed local ip %s' % ip)
		if local == self.local:
			self.log.info('no ip change')
		else:
			self.local = local

	def run (self):
		signal.setitimer(signal.ITIMER_REAL,self.alarm_time,self.alarm_time)

		count_second = 0
		count_minute = 0
		count_saturation = 0
		count_interface = 0

		while True:
			count_second = (count_second + 1) % self.second_frequency
			count_minute = (count_minute + 1) % self.minute_frequency

			count_saturation = (count_saturation + 1) % self.saturation_frequency
			count_interface = (count_interface + 1) % self.interface_frequency

			try:
				if self._pdb:
					self._pdb = False
					import pdb
					pdb.set_trace()


				# check for IO change with select
				status = self.reactor.run()
				if status is False:
					self._shutdown = True

				# must follow the reactor so we are sure to go through the reactor at least once
				# and flush any logs
				if self._shutdown:
					self._shutdown = False
					self.shutdown()
					break
				elif self._reload:
					self._reload = False
					self.reload()
				elif self._refork:
					self._refork = False
					self.signal_log.warning('refork not implemented')
					# stop listening to new connections
					# refork the program (as we have been updated)
					# just handle current open connection


				if self._softstop:
					if self._listen == False:
						self.proxy.rejecting()
						self._listen = None
					if self.client.softstop():
						self._shutdown = True
				# only change listening if we are not shutting down
				elif self._listen is not None:
					if self._listen:
						self._shutdown = not self.proxy.accepting()
						self._listen = None
					else:
						self.proxy.rejecting()
						self._listen = None


				if self._toggle_debug:
					self._toggle_debug = False
					self.log_writer.toggleDebug()


				if self._decrease_spawn_limit:
					count = self._decrease_spawn_limit
					self.redirector.decreaseSpawnLimit(count)
					self._decrease_spawn_limit = 0

				if self._increase_spawn_limit:
					count = self._increase_spawn_limit
					self.redirector.increaseSpawnLimit(count)
					self._increase_spawn_limit = 0

				# save our monitoring stats
				if count_second == 0:
					self.monitor.second()
					expired = self.reactor.client.expire()
				else:
					expired = 0

				if expired:
					self.proxy.notifyClose(None, count=expired)

				if count_minute == 0:
					self.monitor.minute()

				# report if we saw too many connections
				if count_saturation == 0:
					self.proxy.saturation()
					self.web.saturation()

				if self.configuration.daemon.poll_interfaces and count_interface == 0:
					self.interfaces()

			except KeyboardInterrupt:
				self.log.critical('^C received')
				self._shutdown = True
			except OSError,e:
				# This shoould never happen as we are limiting how many connections we accept
				if e.errno == 24:  # Too many open files
					self.log.critical('Too many opened files, shutting down')
					for line in traceback.format_exc().split('\n'):
						self.log.critical(line)
					self._shutdown = True
				else:
					self.log.critical('unrecoverable io error')
					for line in traceback.format_exc().split('\n'):
						self.log.critical(line)
					self._shutdown = True

			finally:
Пример #4
0
class RedirectorManager (object):
	def __init__ (self, configuration, poller):
		self.low = configuration.redirector.minimum 		# minimum concurrent redirector workers
		self.high = configuration.redirector.maximum		# maximum concurrent redirector workers

		self.poller = poller
		self.configuration = configuration
		self.queue = Queue()    # store requests we do not immediately have the resources to process

		self.nextid = 1			# unique id to give to the next spawned worker
		self.worker = {}		# worker tasks for each spawned child
		self.processes = {}		# worker tasks indexed by file descriptors we can poll
		self.available = set()	# workers that are currently available to handle new requests
		self.active = {}        # workers that are currently busy waiting for a response from the spawned process
		self.stopping = set()   # workers we want to stop as soon as they stop being active

		program = configuration.redirector.program
		protocol = configuration.redirector.protocol
		self.redirector_factory = RedirectorFactory(configuration, program, protocol)

		self.log = Logger('manager', configuration.log.manager)

	def _getid(self):
		wid = str(self.nextid)
		self.nextid += 1
		return wid

	def _spawn (self):
		"""add one worker to the pool"""
		wid = self._getid()

		worker = self.redirector_factory.create(wid)
		self.worker[wid] = worker
		self.available.add(wid)

		if worker.process is not None:
			identifier = worker.process.stdout
			self.processes[identifier] = worker
			self.poller.addReadSocket('read_workers', identifier)

		self.log.info("added a worker")
		self.log.info("we have %d workers. defined range is ( %d / %d )" % (len(self.worker), self.low, self.high))

	def spawn (self, number=1):
		"""create the request number of worker processes"""
		self.log.info("spawning %d more workers" % number)
		for _ in range(number):
			self._spawn()

	def respawn (self):
		"""make sure we reach the minimum number of workers"""
		number = max(min(len(self.worker), self.high), self.low)

		for wid in set(self.worker):
			self.stopWorker(wid)

		self.spawn(number)

	def stopWorker (self, wid):
		self.log.info('want worker %s to go away' % wid)

		if wid not in self.active:
			self.reap(wid)

		else:
			self.stopping.add(wid)

	def reap (self, wid):
		self.log.info('we are killing worker %s' % wid)
		worker = self.worker[wid]

		if wid in self.active:
			self.log.error('reaping worker %s even though it is still active' % wid)
			self.active.pop(wid)

		if wid in self.stopping:
			self.stopping.remove(wid)

		if wid in self.available:
			self.available.remove(wid)

		if worker.process is not None:
			self.poller.removeReadSocket('read_workers', worker.process.stdout)
			self.processes.pop(worker.process.stdout)

		worker.shutdown()
		self.worker.pop(wid)

	def _decrease (self):
		if self.low < len(self.worker):
			wid = self._oldest()
			if wid:
				self.stopWorker(wid)

	def _increase (self):
		if len(self.worker) < self.high:
			self.spawn()

	def decrease (self, count=1):
		for _ in xrange(count):
			self._decrease()

	def increase (self, count=1):
		for _ in xrange(count):
			self._increase()

	def start (self):
		"""spawn our minimum number of workers"""
		self.log.info("starting workers.")
		self.spawn(max(0,self.low-len(self.worker)))

	def stop (self):
		"""tell all our worker to stop reading the queue and stop"""

		for wid in self.worker:
			self.reap(wid)

		self.worker = {}

	def _oldest (self):
		"""find the oldest worker"""
		oldest = None
		past = time.time()
		for wid in set(self.worker):
			creation = self.worker[wid].creation
			if creation < past and wid not in self.stopping:
				past = creation
				oldest = wid

		return oldest

	def provision (self):
		"""manage our workers to make sure we have enough to consume the queue"""
		size = self.queue.qsize()
		num_workers = len(self.worker)

		# bad we are bleeding workers !
		if num_workers < self.low:
			self.log.info("we lost some workers, respawing %d new workers" % (self.low - num_workers))
			self.spawn(self.low - num_workers)

		# we need more workers
		if size >= num_workers:
			# nothing we can do we have reach our limit
			if num_workers >= self.high:
				self.log.warning("help ! we need more workers but we reached our ceiling ! %d request are queued for %d processes" % (size,num_workers))
				return
			# try to figure a good number to add ..
			# no less than one, no more than to reach self.high, lower between self.low and a quarter of the allowed growth
			nb_to_add = int(min(max(1,min(self.low,(self.high-self.low)/4)),self.high-num_workers))
			self.log.warning("we are low on workers adding a few (%d), the queue has %d unhandled url" % (nb_to_add,size))
			self.spawn(nb_to_add)

	def deprovision (self):
		"""manage our workers to make sure we have enough to consume the queue"""
		size = self.queue.qsize()
		num_workers = len(self.worker)

		# we are now overprovisioned
		if size < 2 and num_workers > self.low:
			self.log.info("we have too many workers (%d), stopping the oldest" % num_workers)
			# if we have to kill one, at least stop the one who had the most chance to memory leak :)
			wid = self._oldest()
			if wid:
				self.stopWorker(wid)



	def acquire (self):
		if self.available:
			identifier = self.available.pop()
			worker = self.worker[identifier]

		else:
			worker = None

		return worker

	def release (self, wid):
		if wid not in self.stopping:
			self.available.add(wid)

		else:
			self.reap(wid)

	def persist (self, wid, client_id, peer, data, header, subheader, source, tainted):
		self.active[wid] = client_id, peer, data, header, subheader, source, tainted

	def progress (self, wid):
		return self.active.pop(wid)

	def doqueue (self):
		if self.available and not self.queue.isempty():
			client_id, peer, header, subheader, source, tainted = self.queue.get()
			_, command, decision = self.request(client_id, peer, header, subheader, source, tainted=tainted)

		else:
			client_id, command, decision = None, None, None

		return client_id, command, decision


	def request (self, client_id, peer, header, subheader, source, tainted=False):
		worker = self.acquire()

		if worker is not None:
			try:
				_, command, decision = worker.decide(client_id, peer, header, subheader, source)

			except:
				command, decision = None, None

			if command is None:
				self.reap(worker.wid)

				if tainted is False:
					_, command, decision = self.request(client_id, peer, header, subheader, source, tainted=True)

				else:
					_, command, decision = Respond.close(client_id)

		else:
			command, decision = None, None
			self.queue.put((client_id, peer, header, subheader, source, tainted))

		if command == 'defer':
			self.persist(worker.wid, client_id, peer, decision, header, subheader, source, tainted)
			command, decision = None, None

		elif worker is not None:
			self.release(worker.wid)

		return client_id, command, decision


	def getDecision (self, pipe_in):
		worker = self.processes.get(pipe_in, None)

		if worker is not None and worker.wid in self.active:
			client_id, peer, request, header, subheader, source, tainted = self.progress(worker.wid)
			try:
				_, command, decision = worker.progress(client_id, peer, request, header, subheader, source)

			except Exception, e:
				command, decision = None, None

			self.release(worker.wid)

			if command is None:
				self.reap(worker.wid)

				if tainted is False:
					_, command, decision = self.request(client_id, peer, header, subheader, source, tainted=True)

				else:
					_, command, decision = Respond.close(client_id)

		else:
Пример #5
0
	def poll(self):
		try:
			res = self.master.control(None, self.max_events, self.speed)
		except EnvironmentError, e:
			if e.errno != errno.EINTR:
				log.critical('KQueue master poller - unexpected error')
				raise

			log.warning('KQueue master poller - got EINTR, ignoring it.')
			res = []
			response = {}
		else:
			# response['poller1']=[] ; response['poller2']=[] etc.
			response = dict((name, []) for (name, _, _, _) in self.pollers.values())
			if (len(res) == self.max_events):
				log.warning("polled max_events from master kqueue")

		for events in res:
			fd = events.ident

			name, poller, sockets, fdtosock = self.pollers[fd]
			events = poller.control(None, self.max_events, 0)

			if (len(events) == self.max_events):
				log.warning("polled max_events from queue %s" % (name))

			for sock_events in events:
				sock_fd = sock_events.ident
				try:
					response[name].append(fdtosock[sock_fd])
				except KeyError, e:
Пример #6
0
	def poll(self):
		try:
			res = self.master.control(None, self.max_events, self.speed)
		except EnvironmentError, e:
			if e.errno != errno.EINTR:
				log.critical('KQueue master poller - unexpected error')
				raise

			log.warning('KQueue master poller - got EINTR, ignoring it.')
			res = []
			response = {}
		else:
			# response['poller1']=[] ; response['poller2']=[] etc.
			response = dict((name, []) for (name, _, _, _) in self.pollers.values())
			if len(res) == self.max_events:
				log.warning("polled max_events from master kqueue")

		for events in res:
			fd = events.ident

			name, poller, sockets, fdtosock = self.pollers[fd]
			events = poller.control(None, self.max_events, 0)

			if len(events) == self.max_events:
				log.warning("polled max_events from queue %s" % name)

			for sock_events in events:
				sock_fd = sock_events.ident
				try:
					response[name].append(fdtosock[sock_fd])
				except KeyError:
Пример #7
0
class Supervisor(object):
    alarm_time = 0.1  # regular backend work
    second_frequency = int(1 / alarm_time)  # when we record history
    minute_frequency = int(60 / alarm_time)  # when we want to average history
    increase_frequency = int(5 / alarm_time)  # when we add workers
    decrease_frequency = int(60 / alarm_time)  # when we remove workers
    saturation_frequency = int(
        20 / alarm_time)  # when we report connection saturation
    interface_frequency = int(300 /
                              alarm_time)  # when we check for new interfaces

    # import os
    # clear = [hex(ord(c)) for c in os.popen('clear').read()]
    # clear = ''.join([chr(int(c,16)) for c in ['0x1b', '0x5b', '0x48', '0x1b', '0x5b', '0x32', '0x4a']])

    def __init__(self, configuration):
        self.configuration = configuration

        # Only here so the introspection code can find them
        self.log = Logger('supervisor', configuration.log.supervisor)
        self.log.error('Starting exaproxy version %s' %
                       configuration.proxy.version)

        self.signal_log = Logger('signal', configuration.log.signal)
        self.log_writer = SysLogWriter('log',
                                       configuration.log.destination,
                                       configuration.log.enable,
                                       level=configuration.log.level)
        self.usage_writer = UsageWriter('usage',
                                        configuration.usage.destination,
                                        configuration.usage.enable)

        sys.exitfunc = self.log_writer.writeMessages

        self.log_writer.setIdentifier(configuration.daemon.identifier)
        #self.usage_writer.setIdentifier(configuration.daemon.identifier)

        if configuration.debug.log:
            self.log_writer.toggleDebug()
            self.usage_writer.toggleDebug()

        self.log.error('python version %s' %
                       sys.version.replace(os.linesep, ' '))
        self.log.debug('starting %s' % sys.argv[0])

        self.pid = PID(self.configuration)

        self.daemon = Daemon(self.configuration)
        self.poller = Poller(self.configuration.daemon)

        self.poller.setupRead('read_proxy')  # Listening proxy sockets
        self.poller.setupRead('read_web')  # Listening webserver sockets
        self.poller.setupRead('read_icap')  # Listening icap sockets
        self.poller.setupRead('read_tls')  # Listening tls sockets
        self.poller.setupRead('read_passthrough')  # Listening raw data sockets
        self.poller.setupRead(
            'read_redirector'
        )  # Pipes carrying responses from the redirector process
        self.poller.setupRead(
            'read_resolver')  # Sockets currently listening for DNS responses

        self.poller.setupRead('read_client')  # Active clients
        self.poller.setupRead(
            'opening_client')  # Clients we have not yet read a request from
        self.poller.setupWrite(
            'write_client')  # Active clients with buffered data to send
        self.poller.setupWrite(
            'write_resolver')  # Active DNS requests with buffered data to send

        self.poller.setupRead('read_download')  # Established connections
        self.poller.setupWrite(
            'write_download'
        )  # Established connections we have buffered data to send to
        self.poller.setupWrite('opening_download')  # Opening connections

        self.poller.setupRead('read_interrupt')  # Scheduled events
        self.poller.setupRead(
            'read_control'
        )  # Responses from commands sent to the redirector process

        self.monitor = Monitor(self)
        self.page = Page(self)
        self.content = ContentManager(self, configuration)
        self.client = ClientManager(self.poller, configuration)
        self.resolver = ResolverManager(self.poller, self.configuration,
                                        configuration.dns.retries * 10)
        self.proxy = Server('http proxy', self.poller, 'read_proxy',
                            configuration.http)
        self.web = Server('web server', self.poller, 'read_web',
                          configuration.web)
        self.icap = Server('icap server', self.poller, 'read_icap',
                           configuration.icap)
        self.tls = Server('tls server', self.poller, 'read_tls',
                          configuration.tls)
        self.passthrough = InterceptServer('passthrough server', self.poller,
                                           'read_passthrough',
                                           configuration.passthrough)

        self._shutdown = True if self.daemon.filemax == 0 else False  # stop the program
        self._softstop = False  # stop once all current connection have been dealt with
        self._reload = False  # unimplemented
        self._toggle_debug = False  # start logging a lot
        self._decrease_spawn_limit = 0
        self._increase_spawn_limit = 0
        self._refork = False  # unimplemented
        self._pdb = False  # turn on pdb debugging
        self._listen = None  # listening change ? None: no, True: listen, False: stop listeing
        self.wait_time = 5.0  # how long do we wait at maximum once we have been soft-killed
        self.local = set()  # what addresses are on our local interfaces

        if not self.initialise():
            self._shutdown = True

        elif self.daemon.drop_privileges():
            self.log.critical(
                'Could not drop privileges to \'%s\'. Refusing to run as root'
                % self.daemon.user)
            self.log.critical(
                'Set the environment value USER to change the unprivileged user'
            )
            self._shutdown = True

        # fork the redirector process before performing any further setup
        redirector = fork_redirector(self.poller, self.configuration)

        # use simple blocking IO for communication with the redirector process
        self.redirector = redirector_message_thread(redirector)

        # NOTE: create threads _after_ all forking is done

        # regularly interrupt the reactor for maintenance
        self.interrupt_scheduler = alarm_thread(self.poller, self.alarm_time)

        self.reactor = Reactor(self.configuration, self.web, self.proxy,
                               self.passthrough, self.icap, self.tls,
                               self.redirector, self.content, self.client,
                               self.resolver, self.log_writer,
                               self.usage_writer, self.poller)

        self.interfaces()

        signal.signal(signal.SIGQUIT, self.sigquit)
        signal.signal(signal.SIGINT, self.sigterm)
        signal.signal(signal.SIGTERM, self.sigterm)
        # signal.signal(signal.SIGABRT, self.sigabrt)
        # signal.signal(signal.SIGHUP, self.sighup)

        signal.signal(signal.SIGTRAP, self.sigtrap)

        signal.signal(signal.SIGUSR1, self.sigusr1)
        signal.signal(signal.SIGUSR2, self.sigusr2)
        signal.signal(signal.SIGTTOU, self.sigttou)
        signal.signal(signal.SIGTTIN, self.sigttin)

        # make sure we always have data in history
        # (done in zero for dependencies reasons)

        if self._shutdown is False:
            self.redirector.requestStats()
            command, control_data = self.redirector.readResponse()
            stats_data = control_data if command == 'STATS' else None

            stats = self.monitor.statistics(stats_data)
            ok = self.monitor.zero(stats)

            if ok:
                self.redirector.requestStats()

            else:
                self._shutdown = True

    def exit(self):
        sys.exit()

    def sigquit(self, signum, frame):
        if self._softstop:
            self.signal_log.critical('multiple SIG INT received, shutdown')
            self._shutdown = True
        else:
            self.signal_log.critical('SIG INT received, soft-stop')
            self._softstop = True
            self._listen = False

    def sigterm(self, signum, frame):
        self.signal_log.critical('SIG TERM received, shutdown request')
        if os.environ.get('PDB', False):
            self._pdb = True
        else:
            self._shutdown = True

    # def sigabrt (self,signum, frame):
    # 	self.signal_log.info('SIG INFO received, refork request')
    # 	self._refork = True

    # def sighup (self,signum, frame):
    # 	self.signal_log.info('SIG HUP received, reload request')
    # 	self._reload = True

    def sigtrap(self, signum, frame):
        self.signal_log.critical('SIG TRAP received, toggle debug')
        self._toggle_debug = True

    def sigusr1(self, signum, frame):
        self.signal_log.critical('SIG USR1 received, decrease worker number')
        self._decrease_spawn_limit += 1

    def sigusr2(self, signum, frame):
        self.signal_log.critical('SIG USR2 received, increase worker number')
        self._increase_spawn_limit += 1

    def sigttou(self, signum, frame):
        self.signal_log.critical('SIG TTOU received, stop listening')
        self._listen = False

    def sigttin(self, signum, frame):
        self.signal_log.critical('SIG IN received, star listening')
        self._listen = True

    def interfaces(self):
        local = {'127.0.0.1', '::1'}
        for interface in getifaddrs():
            if interface.family not in (AF_INET, AF_INET6):
                continue
            if interface.address not in self.local:
                self.log.info('found new local ip %s (%s)' %
                              (interface.address, interface.name))
            local.add(interface.address)
        for ip in self.local:
            if ip not in local:
                self.log.info('removed local ip %s' % ip)
        if local == self.local:
            self.log.info('no ip change')
        else:
            self.local = local

    def run(self):
        count_second = 0
        count_minute = 0
        count_saturation = 0
        count_interface = 0

        events = {'read_interrupt'}

        while True:
            count_second = (count_second + 1) % self.second_frequency
            count_minute = (count_minute + 1) % self.minute_frequency

            count_saturation = (count_saturation +
                                1) % self.saturation_frequency
            count_interface = (count_interface + 1) % self.interface_frequency

            try:
                if self._pdb:
                    self._pdb = False
                    import pdb
                    pdb.set_trace()

                # prime the alarm
                if 'read_interrupt' in events:
                    self.interrupt_scheduler.setAlarm()

                # check for IO change with select
                status, events = self.reactor.run()

                # shut down the server if a child process disappears
                if status is False:
                    self._shutdown = True

                # respond to control responses immediately
                if 'read_control' in events:
                    command, control_data = self.redirector.readResponse()

                    if command == 'STATS':
                        ok = self.doStats(count_second, count_minute,
                                          control_data)

                    if ok is False:
                        self._shutdown = True

                    # jump straight back into the reactor if we haven't yet received an
                    # interrupt event
                    if 'read_interrupt' not in events:
                        continue

                # clear the alarm condition
                self.interrupt_scheduler.acknowledgeAlarm()

                # must follow the reactor so we are sure to go through the reactor at least once
                # and flush any logs
                if self._shutdown:
                    self._shutdown = False
                    self.shutdown()
                    break
                elif self._reload:
                    self._reload = False
                    self.reload()
                elif self._refork:
                    self._refork = False
                    self.signal_log.warning('refork not implemented')
                    # stop listening to new connections
                    # refork the program (as we have been updated)
                    # just handle current open connection

                # ask the redirector process for stats
                self.redirector.requestStats()

                if self._softstop:
                    if self._listen == False:
                        self.proxy.rejecting()
                        self._listen = None
                    if self.client.softstop():
                        self._shutdown = True
                # only change listening if we are not shutting down
                elif self._listen is not None:
                    if self._listen:
                        self._shutdown = not self.proxy.accepting()
                        self._listen = None
                    else:
                        self.proxy.rejecting()
                        self._listen = None

                if self._toggle_debug:
                    self._toggle_debug = False
                    self.log_writer.toggleDebug()

                if self._decrease_spawn_limit:
                    count = self._decrease_spawn_limit
                    self.redirector.decreaseSpawnLimit(count)
                    self._decrease_spawn_limit = 0

                if self._increase_spawn_limit:
                    count = self._increase_spawn_limit
                    self.redirector.increaseSpawnLimit(count)
                    self._increase_spawn_limit = 0

                # cleanup idle connections
                # TODO: track all idle connections, not just the ones that have never sent data
                expired = self.reactor.client.expire()

                for expire_source, expire_count in expired.items():
                    if expire_source == 'proxy':
                        self.proxy.notifyClose(None, count=expire_count)

                    elif expire_source == 'icap':
                        self.icap.notifyClose(None, count=expire_count)

                    elif expire_source == 'passthrough':
                        self.passthrough.notifyClose(None, count=expire_count)

                    elif expire_source == 'tls':
                        self.tls.notifyClose(None, count=expire_count)

                    elif expire_source == 'web':
                        self.web.notifyClose(None, count=expire_count)

                # report if we saw too many connections
                if count_saturation == 0:
                    self.proxy.saturation()
                    self.web.saturation()

                if self.configuration.daemon.poll_interfaces and count_interface == 0:
                    self.interfaces()

            except KeyboardInterrupt:
                self.log.critical('^C received')
                self._shutdown = True

            except OSError, e:
                # This shoould never happen as we are limiting how many connections we accept
                if e.errno == 24:  # Too many open files
                    self.log.critical('Too many opened files, shutting down')
                    for line in traceback.format_exc().split('\n'):
                        self.log.critical(line)
                    self._shutdown = True
                else:
                    self.log.critical('unrecoverable io error')
                    for line in traceback.format_exc().split('\n'):
                        self.log.critical(line)
                    self._shutdown = True

            finally: