def _client_loop(self): try: backoff = Backoff(start=0.1, limit=10, rate=5) while True: self.logger.info("Starting new irc connection") client = girc.Client(**self.irc_kwargs) self.logger.debug("Joining channels: {}".format(self.all_open_channels)) for channel in self.all_open_channels: client.channel(channel).join() client.handler(self._client_recv, command=girc.message.Privmsg) self._client.set(client) try: client.start() self.logger.debug("Started new irc connection") backoff.reset() client.wait_for_stop() except Exception as ex: self.logger.warning("irc connection died, retrying in {}".format(backoff.peek()), exc_info=True) # clear _client if no-one else has if self._client.ready(): assert self._client.get() is client self._client = AsyncResult() gevent.sleep(backoff.get()) else: self.logger.info("irc connection exited gracefully, stopping") self.stop() # graceful exit return except Exception as ex: self.stop(ex)
def main(config, interval=10, restart_in_progress=False, restart_errors=False, restart_all=False, no_update_state=False, log_level='DEBUG', one_pass=False, ignore_not_ready=False): with open(config) as f: config = json.load(f) backdoor(6666) class Stop(BaseException): pass logging.getLogger().setLevel(log_level) jobs = gevent.pool.Pool(MAX_JOBS) backoff = Backoff(1, 60) try: while True: try: sheet = open_sheet(config['sheet_id'], config['worksheet_title'], config['creds']) backoff.reset() while True: start_jobs(jobs, sheet, restart_in_progress=restart_in_progress, restart_errors=restart_errors, restart_all=restart_all, no_update_state=no_update_state, ignore_not_ready=ignore_not_ready) if one_pass: raise Stop restart_in_progress = False # restart in progress on first pass only (if at all) restart_all = False gevent.sleep(interval) except Exception: logging.exception("Main loop failure") gevent.sleep(backoff.get()) except KeyboardInterrupt: logging.warning("Interrupt recieved") jobs.kill(block=True) except Stop: pass logging.info("Waiting for {} jobs".format(len(jobs.greenlets))) jobs.join()
def test_custom_max_backoff(self): now = time.time() _backoff = Backoff(max_backoff=7200) _backoff.backoff_time = 4096 _backoff._bump_backoff() self.assertEqual(7200, _backoff.backoff_time)
def test_max_backoff(self): now = time.time() _backoff = Backoff() _backoff.backoff_time = 3600 _backoff._bump_backoff() self.assertEqual(3600, _backoff.backoff_time)
def test_reset(self): _backoff = Backoff() now = time.time() _backoff.start_time = now-3 _backoff.backoff_time = 2 @_backoff def foo(): pass foo() self.assertEqual(None, _backoff.backoff_time) self.assertEqual(None, _backoff.start_time)
def test_reset(self): _backoff = Backoff() now = time.time() _backoff.start_time = now - 3 _backoff.backoff_time = 2 @_backoff def foo(): pass foo() self.assertEqual(None, _backoff.backoff_time) self.assertEqual(None, _backoff.start_time)
def test_backoff_elapsed(self): _backoff = Backoff() @_backoff def foo(a=None): if a is None: raise Exception return a self._call_and_toss_exception(foo) now = time.time() _backoff.start_time = now - 2 self.assertEqual(1, foo(1))
def test_backoff_elapsed(self): _backoff = Backoff() @_backoff def foo(a=None): if a is None: raise Exception return a self._call_and_toss_exception(foo) now = time.time() _backoff.start_time = now-2 self.assertEqual(1, foo(1))
def test_backoff_elapsed_raises_again(self): _backoff = Backoff() @_backoff def foo(a=None): if a is None: raise Exception return a self._call_and_toss_exception(foo) now = time.time() _backoff.start_time = now - 2 self.assertRaises(Exception, foo) self.assertEqual(4, _backoff.backoff_time)
def test_backoff_elapsed_raises_again(self): _backoff = Backoff() @_backoff def foo(a=None): if a is None: raise Exception return a self._call_and_toss_exception(foo) now = time.time() _backoff.start_time = now-2 self.assertRaises(Exception, foo) self.assertEqual(4, _backoff.backoff_time)
def test_raises_exception(self): _backoff = Backoff() @_backoff def foo(a): raise Exception self._call_and_toss_exception(foo, 1) self.assertEqual(2, _backoff.backoff_time)
def test_raises_in_backoff(self): _backoff = Backoff() @_backoff def foo(a): raise Exception self._call_and_toss_exception(foo, 1) self.assertRaises(InBackoff, foo, 1)
def main(interval=10, restart_in_progress=False, restart_errors=False, restart_all=False, no_update_state=False, log_level='DEBUG', one_pass=False): backdoor(6666) class Stop(BaseException): pass logging.basicConfig(level=log_level) jobs = gevent.pool.Pool(MAX_JOBS) backoff = Backoff(1, 60) try: while True: try: sheet = open_sheet(CONFIG['sheet_id'], CONFIG['worksheet_title'], CONFIG['creds']) backoff.reset() while True: start_jobs(jobs, sheet, restart_in_progress=restart_in_progress, restart_errors=restart_errors, restart_all=restart_all, no_update_state=no_update_state) if one_pass: raise Stop restart_in_progress = False # restart in progress on first pass only (if at all) restart_all = False gevent.sleep(interval) except Exception: logging.exception("Main loop failure") gevent.sleep(backoff.get()) except KeyboardInterrupt: logging.warning("Interrupt recieved") jobs.kill(block=True) except Stop: pass logging.info("Waiting for {} jobs".format(len(jobs.greenlets))) jobs.join()
def __init__(self): self.on_active_breakpoints_changed = lambda x: None self.on_idle = lambda: None self._debuggee_labels = {} self._service_account_auth = False self._debuggee_id = None self._wait_token = 'init' self._breakpoints = [] self._main_thread = None self._transmission_thread = None self._transmission_thread_startup_lock = threading.Lock() self._transmission_queue = deque(maxlen=100) self._new_updates = threading.Event(False) # Disable logging in the discovery API to avoid excessive logging. class _ChildLogFilter(logging.Filter): """Filter to eliminate info-level logging when called from this module.""" def __init__(self, filter_levels=None): super(_ChildLogFilter, self).__init__() self._filter_levels = filter_levels or set(logging.INFO) # Get name without extension to avoid .py vs .pyc issues self._my_filename = os.path.splitext( inspect.getmodule(_ChildLogFilter).__file__)[0] def filter(self, record): if record.levelno not in self._filter_levels: return True callerframes = inspect.getouterframes(inspect.currentframe()) for f in callerframes: if os.path.splitext(f[1])[0] == self._my_filename: return False return True self._log_filter = _ChildLogFilter({logging.INFO}) discovery.logger.addFilter(self._log_filter) # # Configuration options (constants only modified by unit test) # # Delay before retrying failed request. self.register_backoff = Backoff() # Register debuggee. self.list_backoff = Backoff() # Query active breakpoints. self.update_backoff = Backoff() # Update breakpoint. # Maximum number of times that the message is re-transmitted before it # is assumed to be poisonous and discarded self.max_transmit_attempts = 10
class GcpHubClient(object): """Controller API client. Registers the debuggee, queries the active breakpoints and sends breakpoint updates to the backend. This class supports two types of authentication: metadata service and service account. The mode is selected by calling EnableServiceAccountAuth or EnableGceAuth method. GcpHubClient creates a worker thread that communicates with the backend. The thread can be stopped with a Stop function, but it is optional since the worker thread is marked as daemon. """ def __init__(self): self.on_active_breakpoints_changed = lambda x: None self.on_idle = lambda: None self._debuggee_labels = {} self._service_account_auth = False self._debuggee_id = None self._wait_token = 'init' self._breakpoints = [] self._main_thread = None self._transmission_thread = None self._transmission_thread_startup_lock = threading.Lock() self._transmission_queue = deque(maxlen=100) self._new_updates = threading.Event(False) # Disable logging in the discovery API to avoid excessive logging. class _ChildLogFilter(logging.Filter): """Filter to eliminate info-level logging when called from this module.""" def __init__(self, filter_levels=None): super(_ChildLogFilter, self).__init__() self._filter_levels = filter_levels or set(logging.INFO) # Get name without extension to avoid .py vs .pyc issues self._my_filename = os.path.splitext( inspect.getmodule(_ChildLogFilter).__file__)[0] def filter(self, record): if record.levelno not in self._filter_levels: return True callerframes = inspect.getouterframes(inspect.currentframe()) for f in callerframes: if os.path.splitext(f[1])[0] == self._my_filename: return False return True self._log_filter = _ChildLogFilter({logging.INFO}) discovery.logger.addFilter(self._log_filter) # # Configuration options (constants only modified by unit test) # # Delay before retrying failed request. self.register_backoff = Backoff() # Register debuggee. self.list_backoff = Backoff() # Query active breakpoints. self.update_backoff = Backoff() # Update breakpoint. # Maximum number of times that the message is re-transmitted before it # is assumed to be poisonous and discarded self.max_transmit_attempts = 10 def InitializeDebuggeeLabels(self, flags): """Initialize debuggee labels from environment variables and flags. The caller passes all the flags that the the debuglet got. This function will only use the flags used to label the debuggee. Flags take precedence over environment variables. Debuggee description is formatted from available flags. Project ID is not set here. It is obtained from metadata service or specified as a parameter to EnableServiceAccountAuth. Args: flags: dictionary of debuglet command line flags. """ self._debuggee_labels = {} for (label, env) in _DEBUGGEE_LABELS.iteritems(): if env and env in os.environ: # Special case for GAE_MODULE_NAME. We omit the "default" module # to stay consistent with AppEngine. if env == 'GAE_MODULE_NAME' and os.environ[env] == 'default': continue self._debuggee_labels[label] = os.environ[env] if flags: self._debuggee_labels.update( {name: value for (name, value) in flags.iteritems() if name in _DEBUGGEE_LABELS}) self._debuggee_labels['projectid'] = self._project_id() def EnableServiceAccountAuthP12(self, project_id, project_number, email, p12_file): """Selects service account authentication with a p12 file. Using this function is not recommended. Use EnableServiceAccountAuthJson for authentication, instead. The p12 file format is no longer recommended. Args: project_id: GCP project ID (e.g. myproject). project_number: numberic GCP project ID (e.g. 72386324623). email: service account identifier for use with p12_file ([email protected]). p12_file: (deprecated) path to an old-style p12 file with the private key. Raises: NotImplementedError indicates that the installed version of oauth2client does not support using a p12 file. """ try: with open(p12_file, 'rb') as f: self._credentials = oauth2client.client.SignedJwtAssertionCredentials( email, f.read(), scope=_CLOUD_PLATFORM_SCOPE) except AttributeError: raise NotImplementedError( 'P12 key files are no longer supported. Please use a JSON ' 'credentials file instead.') self._project_id = lambda: project_id self._project_number = lambda: project_number def EnableServiceAccountAuthJson(self, project_id, project_number, auth_json_file): """Selects service account authentication using Json credentials. Args: project_id: GCP project ID (e.g. myproject). project_number: numberic GCP project ID (e.g. 72386324623). auth_json_file: the JSON keyfile """ self._credentials = ( service_account.ServiceAccountCredentials .from_json_keyfile_name(auth_json_file, scopes=_CLOUD_PLATFORM_SCOPE)) self._project_id = lambda: project_id self._project_number = lambda: project_number def EnableGceAuth(self): """Selects to use local metadata service for authentication. The project ID and project number are also retrieved from the metadata service. It is done lazily from the worker thread. The motivation is to speed up initialization and be able to recover from failures. """ self._credentials = AppAssertionCredentials() self._project_id = lambda: self._QueryGcpProject('project-id') self._project_number = lambda: self._QueryGcpProject('numeric-project-id') def Start(self): """Starts the worker thread.""" self._shutdown = False self._main_thread = threading.Thread(target=self._MainThreadProc) self._main_thread.name = 'Cloud Debugger main worker thread' self._main_thread.daemon = True self._main_thread.start() def Stop(self): """Signals the worker threads to shut down and waits until it exits.""" self._shutdown = True self._new_updates.set() # Wake up the transmission thread. if self._main_thread is not None: self._main_thread.join() self._main_thread = None if self._transmission_thread is not None: self._transmission_thread.join() self._transmission_thread = None def EnqueueBreakpointUpdate(self, breakpoint): """Asynchronously updates the specified breakpoint on the backend. This function returns immediately. The worker thread is actually doing all the work. The worker thread is responsible to retry the transmission in case of transient errors. Args: breakpoint: breakpoint in either final or non-final state. """ with self._transmission_thread_startup_lock: if self._transmission_thread is None: self._transmission_thread = threading.Thread( target=self._TransmissionThreadProc) self._transmission_thread.name = 'Cloud Debugger transmission thread' self._transmission_thread.daemon = True self._transmission_thread.start() self._transmission_queue.append((breakpoint, 0)) self._new_updates.set() # Wake up the worker thread to send immediately. def _BuildService(self): http = httplib2.Http(timeout=_HTTP_TIMEOUT_SECONDS) http = self._credentials.authorize(http) api = apiclient.discovery.build('clouddebugger', 'v2', http=http) return api.controller() def _MainThreadProc(self): """Entry point for the worker thread.""" registration_required = True while not self._shutdown: if registration_required: service = self._BuildService() registration_required, delay = self._RegisterDebuggee(service) if not registration_required: registration_required, delay = self._ListActiveBreakpoints(service) if self.on_idle is not None: self.on_idle() if not self._shutdown: time.sleep(delay) def _TransmissionThreadProc(self): """Entry point for the transmission worker thread.""" reconnect = True while not self._shutdown: self._new_updates.clear() if reconnect: service = self._BuildService() reconnect = False reconnect, delay = self._TransmitBreakpointUpdates(service) self._new_updates.wait(delay) def _RegisterDebuggee(self, service): """Single attempt to register the debuggee. If the registration succeeds, sets self._debuggee_id to the registered debuggee ID. Args: service: client to use for API calls Returns: (registration_required, delay) tuple """ try: request = {'debuggee': self._GetDebuggee()} try: response = service.debuggees().register(body=request).execute() self._debuggee_id = response['debuggee']['id'] native.LogInfo('Debuggee registered successfully, ID: %s' % ( self._debuggee_id)) self.register_backoff.Succeeded() return (False, 0) # Proceed immediately to list active breakpoints. except BaseException: native.LogInfo('Failed to register debuggee: %s, %s' % (request, traceback.format_exc())) except BaseException: native.LogWarning('Debuggee information not available: ' + traceback.format_exc()) return (True, self.register_backoff.Failed()) def _ListActiveBreakpoints(self, service): """Single attempt query the list of active breakpoints. Must not be called before the debuggee has been registered. If the request fails, this function resets self._debuggee_id, which triggers repeated debuggee registration. Args: service: client to use for API calls Returns: (registration_required, delay) tuple """ try: response = service.debuggees().breakpoints().list( debuggeeId=self._debuggee_id, waitToken=self._wait_token, successOnTimeout=True).execute() breakpoints = response.get('breakpoints') or [] self._wait_token = response.get('nextWaitToken') if cmp(self._breakpoints, breakpoints) != 0: self._breakpoints = breakpoints native.LogInfo( 'Breakpoints list changed, %d active, wait token: %s' % ( len(self._breakpoints), self._wait_token)) self.on_active_breakpoints_changed(copy.deepcopy(self._breakpoints)) except Exception as e: native.LogInfo('Failed to query active breakpoints: ' + traceback.format_exc()) # Forget debuggee ID to trigger repeated debuggee registration. Once the # registration succeeds, the worker thread will retry this query self._debuggee_id = None return (True, self.list_backoff.Failed()) self.list_backoff.Succeeded() return (False, 0) def _TransmitBreakpointUpdates(self, service): """Tries to send pending breakpoint updates to the backend. Sends all the pending breakpoint updates. In case of transient failures, the breakpoint is inserted back to the top of the queue. Application failures are not retried (for example updating breakpoint in a final state). Each pending breakpoint maintains a retry counter. After repeated transient failures the breakpoint is discarded and dropped from the queue. Args: service: client to use for API calls Returns: (reconnect, timeout) tuple. The first element ("reconnect") is set to true on unexpected HTTP responses. The caller should discard the HTTP connection and create a new one. The second element ("timeout") is set to None if all pending breakpoints were sent successfully. Otherwise returns time interval in seconds to stall before retrying. """ reconnect = False retry_list = [] # There is only one consumer, so two step pop is safe. while self._transmission_queue: breakpoint, retry_count = self._transmission_queue.popleft() try: service.debuggees().breakpoints().update( debuggeeId=self._debuggee_id, id=breakpoint['id'], body={'breakpoint': breakpoint}).execute() native.LogInfo('Breakpoint %s update transmitted successfully' % ( breakpoint['id'])) except apiclient.errors.HttpError as err: # Treat 400 error codes (except timeout) as application error that will # not be retried. All other errors are assumed to be transient. status = err.resp.status is_transient = ((status >= 500) or (status == 408)) if is_transient and retry_count < self.max_transmit_attempts - 1: native.LogInfo('Failed to send breakpoint %s update: %s' % ( breakpoint['id'], traceback.format_exc())) retry_list.append((breakpoint, retry_count + 1)) elif is_transient: native.LogWarning( 'Breakpoint %s retry count exceeded maximum' % breakpoint['id']) else: # This is very common if multiple instances are sending final update # simultaneously. native.LogInfo('%s, breakpoint: %s' % (err, breakpoint['id'])) except Exception: native.LogWarning( 'Fatal error sending breakpoint %s update: %s' % ( breakpoint['id'], traceback.format_exc())) reconnect = True self._transmission_queue.extend(retry_list) if not self._transmission_queue: self.update_backoff.Succeeded() # Nothing to send, wait until next breakpoint update. return (reconnect, None) else: return (reconnect, self.update_backoff.Failed()) def _QueryGcpProject(self, resource): """Queries project resource on a local metadata service.""" url = _LOCAL_METADATA_SERVICE_PROJECT_URL + resource http = httplib2.Http() response, content = http.request( url, headers={'Metadata-Flavor': 'Google'}) if response['status'] != '200': raise RuntimeError( 'HTTP error %s %s when querying local metadata service at %s' % (response['status'], content, url)) return content def _GetDebuggee(self): """Builds the debuggee structure.""" major_version = version.__version__.split('.')[0] debuggee = { 'project': self._project_number(), 'description': self._GetDebuggeeDescription(), 'labels': self._debuggee_labels, 'agentVersion': 'google.com/python2.7-' + major_version } source_context = self._ReadAppJsonFile('source-context.json') if source_context: debuggee['sourceContexts'] = [source_context] source_contexts = self._ReadAppJsonFile('source-contexts.json') if source_contexts: debuggee['extSourceContexts'] = source_contexts elif source_context: debuggee['extSourceContexts'] = [{'context': source_context}] debuggee['uniquifier'] = self._ComputeUniquifier(debuggee) return debuggee def _GetDebuggeeDescription(self): """Formats debuggee description based on debuggee labels.""" return '-'.join(self._debuggee_labels[label] for label in _DESCRIPTION_LABELS if label in self._debuggee_labels) def _ComputeUniquifier(self, debuggee): """Computes debuggee uniquifier. The debuggee uniquifier has to be identical on all instances. Therefore the uniquifier should not include any random numbers and should only be based on inputs that are guaranteed to be the same on all instances. Args: debuggee: complete debuggee message without the uniquifier Returns: Hex string of SHA1 hash of project information, debuggee labels and debuglet version. """ uniquifier = hashlib.sha1() # Project information. uniquifier.update(self._project_id()) uniquifier.update(self._project_number()) # Debuggee information. uniquifier.update(str(debuggee)) # Compute hash of application files if we don't have source context. This # way we can still distinguish between different deployments. if ('minorversion' not in debuggee.get('labels', []) and 'sourceContexts' not in debuggee and 'extSourceContexts' not in debuggee): uniquifier_computer.ComputeApplicationUniquifier(uniquifier) return uniquifier.hexdigest() def _ReadAppJsonFile(self, relative_path): """Reads JSON file from an application directory. Args: relative_path: file name relative to application root directory. Returns: Parsed JSON data or None if the file does not exist, can't be read or not a valid JSON file. """ try: with open(os.path.join(sys.path[0], relative_path), 'r') as f: return json.load(f) except (IOError, ValueError): return None
class ClientManager(gevent.Greenlet): """Wrapper for a client to manage clean restarts, etc""" # We mostly handle state via a synchronous main function, hence we base off Greenlet INIT_ARGS = {'hostname', 'nick', 'port', 'password', 'ident', 'real_name', 'twitch'} client = None _can_signal = False # indicates if main loop is in good state to get a stop/restart _stop = False # indicates to quit after next client quit class _Restart(Exception): """Indicates the client manager should cleanly disconnect and reconnect""" def __init__(self, name, handoff_data=None): self.name = name self.handoff_data = handoff_data self.logger = main_logger.getChild(name) super(ClientManager, self).__init__() def stop(self, message): """Gracefully stop the client""" self._stop = True if self._can_signal: self.client.quit("Shutting down", block=False) else: # we are mid-restart or similar, just kill the main loop self.kill(block=False) def restart(self, message): """Gracefully restart the client""" # if can_signal is false, restarting isn't a valid operation (ie. we're already restarting) # otherwise, send a _Restart exception to the main loop if self._can_signal: self.kill(self._Restart(message), block=False) def handoff(self): """Gracefully shut down and prepare for handoff. This stops the client and returns a dict suitable to pass as config.handoff_data[name] to a child or re-exec()ed process. However, if the client is not currently in a good state for handoff (eg. it is currently restarting) this method will still stop the client manager, but will return None. In this case, there was no state to handoff so the best thing to do is let the child re-create a new client. """ # Note this method intentionally leaks an fd so we can't accidentially close it # due to destructors. This fd is then passed onto the child / re-exec()ed process. self.logger.info("Attempting to handoff") if not self._can_signal: self.logger.info("Handoff aborted - client is not running") # we are mid-restart or similar, just kill the main loop self.kill(block=False) return try: self.client._prepare_for_handoff() except Exception: # this can happen if we're mid-start, best thing to do is just abort self.logger.info("Handoff aborted - client in bad state") self.client.stop() self.kill(block=False) return data = self.client._get_handoff_data() data['fd'] = os.dup(self.client._socket.fileno()) self.logger.info("Handoff initiated with data {!r}".format(data)) # this will gracefully stop, which will cause the main loop to exit self.client._finalize_handoff() return data def _parse_config_plugins(self): plugins = [] for name in config.clients_with_defaults[self.name].get('plugins', []): args = () if ':' in name: name, args = name.split(':', 1) args = args.split(',') plugins.append((name, args)) return plugins def _run(self): if self.name in clients: return # already running, ignore second attempt to start clients[self.name] = self try: self.retry_timer = Backoff(RETRY_START, RETRY_LIMIT, RETRY_FACTOR) while not self._stop: if self.name not in config.clients_with_defaults: raise Exception("No such client {!r}".format(self.name)) options = config.clients_with_defaults[self.name] channels = options.get('channels', []) plugins = self._parse_config_plugins() try: if self.handoff_data: self.logger.info("Accepting handoff with data {!r}".format(self.handoff_data)) client_sock = socket.fromfd(self.handoff_data.pop('fd'), socket.AF_INET, socket.SOCK_STREAM) self.client = EkimbotClient._from_handoff(client_sock, name=self.name, logger=self.logger, **self.handoff_data) self.handoff_data = None else: self.logger.info("Starting client") self.client = EkimbotClient(self.name, logger=self.logger, **{key: options[key] for key in self.INIT_ARGS if key in options}) self.logger.info("Enabling {} plugins".format(len(plugins))) for plugin, args in plugins: self.logger.debug("Enabling plugin {} with args {}".format(plugin, args)) ClientPlugin.enable(plugin, self.client, *args) self.logger.info("Joining {} channels".format(len(channels))) for channel in channels: self.logger.debug("Joining channel {}".format(channel)) self.client.channel(channel).join() try: self._can_signal = True self.client.start() self.logger.debug("Client started") self.retry_timer.reset() self.client.wait_for_stop() self.logger.info("Client exited cleanly, not re-connecting") break finally: self._can_signal = False except Exception as ex: if isinstance(ex, self._Restart): self.logger.info("Client gracefully restarting: {}".format(ex)) try: self.client.quit(str(ex)) except Exception: self.logger.warning("Client failed during graceful restart", exc_info=True) else: self.logger.warning("Client failed, re-connecting in {}s".format(self.retry_timer.peek()), exc_info=True) if not self._stop and not isinstance(ex, self._Restart): gevent.sleep(self.retry_timer.get()) except Exception: self.logger.critical("run_client failed with unhandled exception") raise finally: assert clients[self.name] is self del clients[self.name]
def _run(self): if self.name in clients: return # already running, ignore second attempt to start clients[self.name] = self try: self.retry_timer = Backoff(RETRY_START, RETRY_LIMIT, RETRY_FACTOR) while not self._stop: if self.name not in config.clients_with_defaults: raise Exception("No such client {!r}".format(self.name)) options = config.clients_with_defaults[self.name] channels = options.get('channels', []) plugins = self._parse_config_plugins() try: if self.handoff_data: self.logger.info("Accepting handoff with data {!r}".format(self.handoff_data)) client_sock = socket.fromfd(self.handoff_data.pop('fd'), socket.AF_INET, socket.SOCK_STREAM) self.client = EkimbotClient._from_handoff(client_sock, name=self.name, logger=self.logger, **self.handoff_data) self.handoff_data = None else: self.logger.info("Starting client") self.client = EkimbotClient(self.name, logger=self.logger, **{key: options[key] for key in self.INIT_ARGS if key in options}) self.logger.info("Enabling {} plugins".format(len(plugins))) for plugin, args in plugins: self.logger.debug("Enabling plugin {} with args {}".format(plugin, args)) ClientPlugin.enable(plugin, self.client, *args) self.logger.info("Joining {} channels".format(len(channels))) for channel in channels: self.logger.debug("Joining channel {}".format(channel)) self.client.channel(channel).join() try: self._can_signal = True self.client.start() self.logger.debug("Client started") self.retry_timer.reset() self.client.wait_for_stop() self.logger.info("Client exited cleanly, not re-connecting") break finally: self._can_signal = False except Exception as ex: if isinstance(ex, self._Restart): self.logger.info("Client gracefully restarting: {}".format(ex)) try: self.client.quit(str(ex)) except Exception: self.logger.warning("Client failed during graceful restart", exc_info=True) else: self.logger.warning("Client failed, re-connecting in {}s".format(self.retry_timer.peek()), exc_info=True) if not self._stop and not isinstance(ex, self._Restart): gevent.sleep(self.retry_timer.get()) except Exception: self.logger.critical("run_client failed with unhandled exception") raise finally: assert clients[self.name] is self del clients[self.name]
def main(): # loads from the default config file, then argv and env. setting --conf allows you # to specify a conf file at "argv level" priority, overriding the defaults. CONF.load_all(conf_file=os.path.join(os.path.dirname(__file__), '/etc/mikeirc.conf')) # this is horrible # required keys host = CONF['host'] CONF['nick'] # just check it's there CONF['channel'] # optional keys. note that defaults are None (which works as False) port = int(CONF.port or 6667) backdoor = CONF.backdoor twitch = CONF.twitch password = CONF.password log_args = { 'level': CONF.get('log', 'WARNING').upper(), } if CONF.log_file: log_args['filename'] = CONF.log_file logging.basicConfig(**log_args) # resolve password config options to actual password values if password is None and not CONF.no_auth: password = getpass("Password for {}: ".format(CONF.nick)) if not password: # password == '' is different to password == None password = None if twitch: nickserv_password = None elif password: nickserv_password = "******".format(CONF.email, password) if CONF.email else password password = None else: nickserv_password = None if backdoor: if backdoor is True: backdoor = 1235 gtools.backdoor(backdoor) if twitch: # make changes to host if not isinstance(twitch, basestring): print "Loading chat server for channel..." resp = requests.get('http://tmi.twitch.tv/servers', params={'channel': CONF.channel.lstrip('#')}) resp.raise_for_status() servers = resp.json()['servers'] server = random.choice(servers) host, _ = server.split(':') print "Using twitch server:", host elif twitch == 'event': host = random.choice(TWITCH_EVENT_SERVERS) print 'Using twitch event server:', host else: host = twitch print 'Using custom twitch server:', host # make channel owner bold USER_HIGHLIGHTS[CONF.channel.lstrip('#').lower()] = '1' client = None backoff = Backoff(0.2, 10, 2) while True: try: client = Client(host, CONF.nick, port, real_name=CONF.real_name, password=password, nickserv_password=nickserv_password, twitch=twitch, ssl=CONF.ssl) channel = client.channel(CONF.channel) channel.join() editor = LineEditing(input_fn=read, completion=lambda prefix: complete_from(channel.users.users)(prefix.lower()), gevent_handle_sigint=True) client.handler(lambda client, msg: generic_recv(editor, client, msg)) client.start() # spawn input greenlet in client's Group, linking its lifecycle to the client client._group.spawn(in_worker, client, editor) backoff.reset() # successful startup client.wait_for_stop() except Exception: traceback.print_exc() time = backoff.get() print "retrying in %.2f seconds..." % time gevent.sleep(time) else: break finally: if client: try: with gevent.Timeout(CLEAN_QUIT_TIMEOUT): client.quit("Quitting") except (Exception, KeyboardInterrupt, gevent.Timeout) as ex: try: client.stop(ex) except Exception: pass