def _report_failure(self, server): """Mark the server as faulty and report a failure. The thread is created to allow the built-in failure detector to continue monitoring the servers so that if the report failure hangs, it will kill all connections to faulty servers thus eventually freeing the thread. Not though that the report failure is not crash-safe so it might fail without promoting a new server to master. In the future, we will circumvent this limitation. """ try: _persistence.init_thread() server.status = MySQLServer.FAULTY self.__connection_manager.purge_connections(server) procedures = trigger( "REPORT_FAILURE", None, str(server.uuid), threading.current_thread().name, MySQLServer.FAULTY, False ) executor = _executor.Executor() for procedure in procedures: executor.wait_for_procedure(procedure) _persistence.deinit_thread() finally: self.__thread_report_failure = False
def _run(self): """Function that verifies servers' availabilities. """ ignored_status = [MySQLServer.FAULTY] quarantine = {} interval = FailureDetector._DETECTION_INTERVAL detections = FailureDetector._DETECTIONS detection_timeout = FailureDetector._DETECTION_TIMEOUT _persistence.init_thread() while self.__check: try: unreachable = set() group = Group.fetch(self.__group_id) if group is not None: for server in group.servers(): if server.status in ignored_status or \ MySQLServer.is_alive(server, detection_timeout): if server.status == MySQLServer.FAULTY: self.__connection_manager.purge_connections( server ) continue unreachable.add(server.uuid) _LOGGER.warning( "Server (%s) in group (%s) is unreachable.", server.uuid, self.__group_id ) unstable = False failed_attempts = 0 if server.uuid not in quarantine: quarantine[server.uuid] = failed_attempts = 1 else: failed_attempts = quarantine[server.uuid] + 1 quarantine[server.uuid] = failed_attempts if failed_attempts >= detections: unstable = True can_set_faulty = group.can_set_server_faulty( server, get_time() ) if unstable and can_set_faulty: self._spawn_report_failure(server) for uuid in quarantine.keys(): if uuid not in unreachable: del quarantine[uuid] except (_errors.ExecutorError, _errors.DatabaseError): pass except Exception as error: _LOGGER.exception(error) time.sleep(interval / detections) _persistence.deinit_thread()
def check_credentials(group, command, config, protocol): """Check credentials using configuration :raises errors.CredentialError: if login failed, or if user has no permission """ if group not in ('user', 'role'): return _configure_connections(config) _persistence.init_thread() if not protocol: protocol = FABRIC_DEFAULT_PROTOCOL section = 'protocol.' + protocol username = config.get(section, 'user') password = config.get(section, 'password') realm = config.get(section, 'realm', vars=FABRIC_PROTOCOL_DEFAULTS) user = User.fetch_user(username, protocol=protocol) password_hash = _hash_password(username, password, protocol, config, realm) if user is None or user.password_hash != password_hash: _LOGGER.info("Failed login for user %s/%s", username, protocol) raise _errors.CredentialError("Login failed") elif not user.has_permission('core', group, command): _LOGGER.info("Permission denied for user %s/%s", username, protocol) raise _errors.CredentialError("No permission")
def run(self): """Process registered requests. """ _LOGGER.info("Started XML-RPC-Session.") try: _persistence.init_thread() except Exception as error: _LOGGER.warning("Error connecting to backing store: (%s).", error) SessionThread.local_thread.thread = self while True: request, client_address = self.__server.dequeue_request() _LOGGER.debug( "Processing request (%s) from (%s) through thread (%s).", request, client_address, self) # There is no need to catch exceptions here because the method # process_request_thread already does so. It is the main entry # point in the code which means that any uncaught exception # in the code will be reported as xmlrpclib.Fault. self.__server.process_request_thread(request, client_address) _LOGGER.debug( "Finishing request (%s) from (%s) through thread (%s).", request, client_address, self) if self.__is_shutdown: self.__server.shutdown_now() try: _persistence.deinit_thread() except Exception as error: _LOGGER.warning("Error connecting to backing store: (%s).", error)
def setup_xmlrpc(options, config): # Set up the persistence. from mysql.fabric import persistence # Set up the manager. from mysql.fabric.services.manage import ( _start, _configure_connections, ) _configure_connections(config) persistence.setup() persistence.init_thread() _start(options, config) # Set up the client. url = "http://%s" % (config.get("protocol.xmlrpc", "address"),) proxy = xmlrpclib.ServerProxy(url) while True: try: proxy.manage.ping() break except Exception: pass return proxy
def run(self): """Process registered requests. """ _LOGGER.info("Started XML-RPC-Session.") try: _persistence.init_thread() except Exception as error: _LOGGER.warning("Error connecting to backing store: (%s).", error) SessionThread.local_thread.thread = self while True: request, client_address = self.__server.dequeue_request() _LOGGER.debug( "Processing request (%s) from (%s) through thread (%s).", request, client_address, self ) # There is no need to catch exceptions here because the method # process_request_thread already does so. It is the main entry # point in the code which means that any uncaught exception # in the code will be reported as xmlrpclib.Fault. self.__server.process_request_thread(request, client_address) _LOGGER.debug( "Finishing request (%s) from (%s) through thread (%s).", request, client_address, self ) if self.__is_shutdown: self.__server.shutdown_now() try: _persistence.deinit_thread() except Exception as error: _LOGGER.warning("Error connecting to backing store: (%s).", error)
def setup(self): """Setup the MySQLRPC request handler""" self._handshaked = False self._authenticated = False self._curr_pktnr = 1 persistence.init_thread() self._store = persistence.current_persister()
def dispatch(self, *args): """Setup Fabric Storage System. """ # Configure connections. _configure_connections(self.config) _persistence.init_thread() self.persister = _persistence.current_persister() self.execute(*args)
def _start(options, config): """Start Fabric server. """ # Remove temporary defaults file, which migh have left behind # by former runs of Fabric. _backup.cleanup_temp_defaults_files() #Configure TTL _setup_ttl(config) # Configure modules that are not dynamic loaded. _server.configure(config) _error_log.configure(config) _failure_detector.configure(config) # Load information on all providers. providers.find_providers() # Load all services into the service manager _services.ServiceManager().load_services(options, config) # Initilize the state store. _persistence.init_thread() # Check the maximum number of threads. _utils.check_number_threads() # Configure Fabric Node. fabric = FabricNode() reported = _utils.get_time() _LOGGER.info( "Fabric node version (%s) started. ", fabric.version, extra={ 'subject' : str(fabric.uuid), 'category' : MySQLHandler.NODE, 'type' : MySQLHandler.START, 'reported' : reported } ) fabric.startup = reported # Start the executor, failure detector and then service manager. In this # scenario, the recovery is sequentially executed after starting the # executor and before starting the service manager. _events.Handler().start() _recovery.recovery() _failure_detector.FailureDetector.register_groups() _services.ServiceManager().start()
def _start(options, config): """Start Fabric server. """ # Remove temporary defaults file, which migh have left behind # by former runs of Fabric. _backup.cleanup_temp_defaults_files() #Configure TTL _setup_ttl(config) # Configure modules that are not dynamic loaded. _server.configure(config) _error_log.configure(config) _failure_detector.configure(config) # Load information on all providers. providers.find_providers() # Load all services into the service manager _services.ServiceManager().load_services(options, config) # Initilize the state store. _persistence.init_thread() # Check the maximum number of threads. _utils.check_number_threads() # Configure Fabric Node. fabric = FabricNode() reported = _utils.get_time() _LOGGER.info("Fabric node version (%s) started. ", fabric.version, extra={ 'subject': str(fabric.uuid), 'category': MySQLHandler.NODE, 'type': MySQLHandler.START, 'reported': reported }) fabric.startup = reported # Start the executor, failure detector and then service manager. In this # scenario, the recovery is sequentially executed after starting the # executor and before starting the service manager. _events.Handler().start() _recovery.recovery() _failure_detector.FailureDetector.register_groups() _services.ServiceManager().start()
def _start(options, config): """Start Fabric server. """ # Configure modules that are not dynamic loaded. _server.configure(config) _error_log.configure(config) _failure_detector.configure(config) # Load all services into the service manager _services.ServiceManager().load_services(options, config) # Initilize the state store. _persistence.init_thread() # Check the maximum number of threads. _utils.check_number_threads() # Configure Fabric Node. fabric = FabricNode() reported = _utils.get_time() _LOGGER.info( "Fabric node starting.", extra={ 'subject' : str(fabric.uuid), 'category' : MySQLHandler.NODE, 'type' : MySQLHandler.START, 'reported' : reported } ) fabric.startup = reported # Start the executor, failure detector and then service manager. In this # scenario, the recovery is sequentially executed after starting the # executor and before starting the service manager. _events.Handler().start() _recovery.recovery() _failure_detector.FailureDetector.register_groups() _services.ServiceManager().start()
def _run(self): """Function that verifies servers' availabilities. """ from mysql.fabric.server import ( Group, MySQLServer, ConnectionManager, ) ignored_status = [MySQLServer.FAULTY] quarantine = {} interval = FailureDetector._DETECTION_INTERVAL detections = FailureDetector._DETECTIONS detection_timeout = FailureDetector._DETECTION_TIMEOUT connection_manager = ConnectionManager() slave_deep_checks = FailureDetector._SLAVE_DEEP_CHECKS _persistence.init_thread() while self.__check: try: unreachable = set() group = Group.fetch(self.__group_id) if group is not None: for server in group.servers(): if server.status in ignored_status: ### Server is FAULTY connection_manager.kill_connections(server) continue else: ### Server is Not FAULTY if MySQLServer.is_alive(server, detection_timeout): ### Server is alive ### check depends on `slave_deep_checks` parameter if slave_deep_checks: ### When server is alive and status != FAULTY is_master= (group.master == server.uuid) if not is_master: ### Checking master is dead or alive. master_server = MySQLServer.fetch(group.master) if MySQLServer.is_alive(master_server, detection_timeout): ### Checking is replication valid or not if master is alive. server.connect() slave_issues, why_slave_issues = \ _replication.check_slave_issues(server) if slave_issues: if (why_slave_issues['io_error'] and \ why_slave_issues['io_errno'] == 2003): ### Nothing to do during reconnecting, just logging _LOGGER.info(why_slave_issues) else: ### If slave threads are not running, set status to SPARE server.status = MySQLServer.SPARE ### Done slave_issues. server.disconnect() ### Endif MySQLServer.is_alive(master_server, detection_timeout) ### Endif not is_master ### Endif slave_deep_checks continue ### Else MySQLServer.is_alive(server, detection_timeout) else: unreachable.add(server.uuid) _LOGGER.warning( "Server (%s) in group (%s) is unreachable.", server.uuid, self.__group_id ) unstable = False failed_attempts = 0 if server.uuid not in quarantine: quarantine[server.uuid] = failed_attempts = 1 else: failed_attempts = quarantine[server.uuid] + 1 quarantine[server.uuid] = failed_attempts if failed_attempts >= detections: unstable = True can_set_faulty = group.can_set_server_faulty( server, get_time() ) if unstable and can_set_faulty: # We have to make this transactional and make the # failover (i.e. report failure) robust to failures. # Otherwise, a master might be set to faulty and # a new one never promoted. server.status = MySQLServer.FAULTY connection_manager.kill_connections(server) procedures = trigger("REPORT_FAILURE", None, str(server.uuid), threading.current_thread().name, MySQLServer.FAULTY, False ) executor = _executor.Executor() for procedure in procedures: executor.wait_for_procedure(procedure) ### Endif MySQLServer.is_alive(server, detection_timeout) ### Endif server.status in ignored_status ### End for server in group.servers() ### Endif group is not None for uuid in quarantine.keys(): if uuid not in unreachable: del quarantine[uuid] except (_errors.ExecutorError, _errors.DatabaseError): pass except Exception as error: _LOGGER.exception(error) time.sleep(interval) _persistence.deinit_thread()
def _run(self): """Function that verifies servers' availabilities. """ from mysql.fabric.server import ( Group, MySQLServer, ConnectionManager, ) ignored_status = [MySQLServer.FAULTY] quarantine = {} interval = FailureDetector._DETECTION_INTERVAL detections = FailureDetector._DETECTIONS detection_timeout = FailureDetector._DETECTION_TIMEOUT connection_manager = ConnectionManager() _persistence.init_thread() while self.__check: try: unreachable = set() group = Group.fetch(self.__group_id) if group is not None: for server in group.servers(): if server.status in ignored_status or \ MySQLServer.is_alive(server, detection_timeout): if server.status == MySQLServer.FAULTY: connection_manager.kill_connections(server) continue unreachable.add(server.uuid) _LOGGER.warning( "Server (%s) in group (%s) is unreachable.", server.uuid, self.__group_id ) unstable = False failed_attempts = 0 if server.uuid not in quarantine: quarantine[server.uuid] = failed_attempts = 1 else: failed_attempts = quarantine[server.uuid] + 1 quarantine[server.uuid] = failed_attempts if failed_attempts >= detections: unstable = True can_set_faulty = group.can_set_server_faulty( server, get_time() ) if unstable and can_set_faulty: # We have to make this transactional and make the # failover (i.e. report failure) robust to failures. # Otherwise, a master might be set to faulty and # a new one never promoted. server.status = MySQLServer.FAULTY connection_manager.kill_connections(server) procedures = trigger("REPORT_FAILURE", None, str(server.uuid), threading.current_thread().name, MySQLServer.FAULTY, False ) executor = _executor.Executor() for procedure in procedures: executor.wait_for_procedure(procedure) for uuid in quarantine.keys(): if uuid not in unreachable: del quarantine[uuid] except (_errors.ExecutorError, _errors.DatabaseError): pass except Exception as error: _LOGGER.exception(error) time.sleep(interval / detections) _persistence.deinit_thread()