Exemplo n.º 1
0
    def test_init(self):
        """Test basic properties/methods in the ErrorLog.
        """
        # Check that the input parameters are not changed.
        interval = get_time_delta(1)
        now = get_time()
        input_whens = [ 30, 40 ]
        input_reporters = [ "reporter", "reporter" ]
        st = ErrorLog(self.server, interval, now, input_whens,
                               input_reporters)
        self.assertEqual(st.server_uuid, self.server.uuid)
        self.assertEqual(st.interval, interval)
        self.assertEqual(st.now, now)
        self.assertEqual(st.whens, input_whens)
        self.assertEqual(st.reporters, input_reporters)

        # If whens and reporters don't have the same length, an exception is
        # raised
        interval = get_time_delta(1)
        now = get_time()
        input_whens = [ 0, 0, 0, 0 ]
        input_reporters = []
        self.assertRaises(AssertionError, ErrorLog,
                          self.server, interval, now, input_whens,
                          input_reporters)
Exemplo n.º 2
0
 def test_node_view(self):
     """Test basic properties/methods in the MySQLHandler.
     """
     # Retrieve information on Fabric node. Note though
     # that there is no specific view to retrieve such
     # information.
     node_id_1 = _uuid.uuid4()
     node_startup_1 = _utils.get_time()
     _LOGGER.debug("Fabric Node started.",
         extra={
             'subject' : str(node_id_1),
             'category' : MySQLHandler.NODE,
             'type' : MySQLHandler.START,
             'reported' : node_startup_1,
         }
     )
     node_stop_1 = _utils.get_time()
     _LOGGER.debug("Fabric Node started.",
         extra={
             'subject' : str(node_id_1),
             'category' : MySQLHandler.NODE,
             'type' : MySQLHandler.STOP,
             'reported' : node_stop_1,
         }
     )
     node_id_2 = _uuid.uuid4()
     node_startup_2 = _utils.get_time()
     _LOGGER.debug("Fabric Node started.",
         extra={
             'subject' : str(node_id_2),
             'category' : MySQLHandler.NODE,
             'type' : MySQLHandler.START,
             'reported' : node_startup_2,
         }
     )
     node_view = ("SELECT subject as node_id, "
         "TIMEDIFF(UTC_TIMESTAMP(), reported) as node_uptime, "
         "reported as node_startup FROM log WHERE category = %s "
         "and type = %s ORDER BY node_id, node_startup"
     )
     persister = _persistence.current_persister()
     res = persister.exec_stmt(
         node_view, {
             "params" : (
                 MySQLHandler.idx_category(MySQLHandler.NODE),
                 MySQLHandler.idx_type(MySQLHandler.START)
             )
         }
     )
     self.assertEqual(len(res), 2)
    def _run(self):
        """Function that verifies servers' availabilities.
        """
        ignored_status = [MySQLServer.FAULTY]
        quarantine = {}
        interval = FailureDetector._DETECTION_INTERVAL
        detections = FailureDetector._DETECTIONS
        detection_timeout = FailureDetector._DETECTION_TIMEOUT

        _persistence.init_thread()

        while self.__check:
            try:
                unreachable = set()
                group = Group.fetch(self.__group_id)
                if group is not None:
                    for server in group.servers():
                        if server.status in ignored_status or \
                            MySQLServer.is_alive(server, detection_timeout):
                            if server.status == MySQLServer.FAULTY:
                                self.__connection_manager.purge_connections(
                                    server
                                )
                            continue

                        unreachable.add(server.uuid)

                        _LOGGER.warning(
                            "Server (%s) in group (%s) is unreachable.",
                            server.uuid, self.__group_id
                        )

                        unstable = False
                        failed_attempts = 0
                        if server.uuid not in quarantine:
                            quarantine[server.uuid] = failed_attempts = 1
                        else:
                            failed_attempts = quarantine[server.uuid] + 1
                            quarantine[server.uuid] = failed_attempts
                        if failed_attempts >= detections:
                            unstable = True

                        can_set_faulty = group.can_set_server_faulty(
                            server, get_time()
                        )
                        if unstable and can_set_faulty:
                            self._spawn_report_failure(server)
                            
                for uuid in quarantine.keys():
                    if uuid not in unreachable:
                        del quarantine[uuid]

            except (_errors.ExecutorError, _errors.DatabaseError):
                pass
            except Exception as error:
                _LOGGER.exception(error)

            time.sleep(interval / detections)

        _persistence.deinit_thread()
Exemplo n.º 4
0
 def test_check_instability(self):
     """Test whether a server can be considered unstable or not.
     """
     # Update/Notify and refresh, they should match.
     interval = get_time_delta(10)
     now = get_time()
     input_whens = [now, now - get_time_delta(5)]
     input_reporters = ["client:1000", "client:2000"]
     st = ErrorLog(self.server, interval, now, [], [])
     ErrorLog.add(self.server, now, "client:1000", "error")
     ErrorLog.add(self.server, now - get_time_delta(5), "client:2000",
                  "error")
     ErrorLog.add(self.server, now - get_time_delta(11), "client:3000",
                  "error")
     st.refresh()
     self.assertEqual(
         st.is_unstable(n_notifications=1,
                        n_reporters=1,
                        filter_reporter=None), True)
     self.assertEqual(
         st.is_unstable(n_notifications=2,
                        n_reporters=2,
                        filter_reporter=None), True)
     self.assertEqual(
         st.is_unstable(n_notifications=3,
                        n_reporters=2,
                        filter_reporter=None), False)
     self.assertEqual(
         st.is_unstable(n_notifications=2,
                        n_reporters=3,
                        filter_reporter=None), False)
     self.assertEqual(
         st.is_unstable(n_notifications=1,
                        n_reporters=1,
                        filter_reporter=["client:2000"]), True)
Exemplo n.º 5
0
def _node_view():
    """Retrieve information on the Fabric node.
    """
    fabric = FabricNode()
    node_id = fabric.uuid
    node_startup = fabric.startup
    node_uptime = _utils.get_time() - node_startup

    return [[str(node_id), str(node_uptime), str(node_startup)]]
Exemplo n.º 6
0
def _append_error_log(server_id, reporter, error):
    """Check whether the server exist and is not faulty and register
    error log.
    """
    server = _retrieve_server(server_id)
    now = get_time()
    _error_log.ErrorLog.add(server, now, reporter, error)

    _LOGGER.warning("Reported issue (%s) for server (%s).", error, server.uuid)

    return (now, server)
Exemplo n.º 7
0
def _append_error_log(server_id, reporter, error):
    """Check whether the server exist and is not faulty and register
    error log.
    """
    server = _retrieve_server(server_id)
    now = get_time()
    _error_log.ErrorLog.add(server, now, reporter, error)

    _LOGGER.warning("Reported issue (%s) for server (%s).", error, server.uuid)

    return (now, server)
Exemplo n.º 8
0
def _start(options, config):
    """Start Fabric server.
    """

    # Remove temporary defaults file, which migh have left behind
    # by former runs of Fabric.
    _backup.cleanup_temp_defaults_files()

    #Configure TTL
    _setup_ttl(config)

    # Configure modules that are not dynamic loaded.
    _server.configure(config)
    _error_log.configure(config)
    _failure_detector.configure(config)

    # Load information on all providers.
    providers.find_providers()

    # Load all services into the service manager
    _services.ServiceManager().load_services(options, config)

    # Initilize the state store.
    _persistence.init_thread()

    # Check the maximum number of threads.
    _utils.check_number_threads()

    # Configure Fabric Node.
    fabric = FabricNode()
    reported = _utils.get_time()
    _LOGGER.info(
        "Fabric node version (%s) started. ",
        fabric.version,
        extra={
            'subject' : str(fabric.uuid),
            'category' : MySQLHandler.NODE,
            'type' : MySQLHandler.START,
            'reported' : reported
        }
    )
    fabric.startup = reported

    # Start the executor, failure detector and then service manager. In this
    # scenario, the recovery is sequentially executed after starting the
    # executor and before starting the service manager.
    _events.Handler().start()
    _recovery.recovery()
    _failure_detector.FailureDetector.register_groups()
    _services.ServiceManager().start()
Exemplo n.º 9
0
    def test_init(self):
        """Test basic properties/methods in the ErrorLog.
        """
        # Check that the input parameters are not changed.
        interval = get_time_delta(1)
        now = get_time()
        input_whens = [30, 40]
        input_reporters = ["reporter", "reporter"]
        st = ErrorLog(self.server, interval, now, input_whens, input_reporters)
        self.assertEqual(st.server_uuid, self.server.uuid)
        self.assertEqual(st.interval, interval)
        self.assertEqual(st.now, now)
        self.assertEqual(st.whens, input_whens)
        self.assertEqual(st.reporters, input_reporters)

        # If whens and reporters don't have the same length, an exception is
        # raised
        interval = get_time_delta(1)
        now = get_time()
        input_whens = [0, 0, 0, 0]
        input_reporters = []
        self.assertRaises(AssertionError, ErrorLog, self.server, interval, now,
                          input_whens, input_reporters)
Exemplo n.º 10
0
def _append_error_log(server_id, reporter, error):
    """Check whether the server exist and is not faulty and register
    error log.
    """
    server = _retrieve_server(server_id)
    if server.status == _server.MySQLServer.FAULTY:
        raise _errors.ServerError("Server (%s) is already marked as faulty." %
                                  (server.uuid, ))

    _LOGGER.warning("Reported issue (%s) for server (%s).", error, server.uuid)

    now = get_time()
    _error_log.ErrorLog.add(server, now, reporter, error)

    return (now, server)
Exemplo n.º 11
0
def _start(options, config):
    """Start Fabric server.
    """

    # Remove temporary defaults file, which migh have left behind
    # by former runs of Fabric.
    _backup.cleanup_temp_defaults_files()

    #Configure TTL
    _setup_ttl(config)

    # Configure modules that are not dynamic loaded.
    _server.configure(config)
    _error_log.configure(config)
    _failure_detector.configure(config)

    # Load information on all providers.
    providers.find_providers()

    # Load all services into the service manager
    _services.ServiceManager().load_services(options, config)

    # Initilize the state store.
    _persistence.init_thread()

    # Check the maximum number of threads.
    _utils.check_number_threads()

    # Configure Fabric Node.
    fabric = FabricNode()
    reported = _utils.get_time()
    _LOGGER.info("Fabric node version (%s) started. ",
                 fabric.version,
                 extra={
                     'subject': str(fabric.uuid),
                     'category': MySQLHandler.NODE,
                     'type': MySQLHandler.START,
                     'reported': reported
                 })
    fabric.startup = reported

    # Start the executor, failure detector and then service manager. In this
    # scenario, the recovery is sequentially executed after starting the
    # executor and before starting the service manager.
    _events.Handler().start()
    _recovery.recovery()
    _failure_detector.FailureDetector.register_groups()
    _services.ServiceManager().start()
Exemplo n.º 12
0
def _append_error_log(server_id, reporter, error):
    """Check whether the server exist and is not faulty and register
    error log.
    """
    server = _retrieve_server(server_id)
    if server.status == _server.MySQLServer.FAULTY:
        raise _errors.ServerError(
            "Server (%s) is already marked as faulty." % (server.uuid, )
        )

    _LOGGER.warning("Reported issue (%s) for server (%s).", error, server.uuid)

    now = get_time()
    _error_log.ErrorLog.add(server, now, reporter, error)

    return (now, server)
Exemplo n.º 13
0
    def fetch(server, interval, now=None, persister=None):
        """Return a ErrorLog object corresponding to the
        server.

        :param server: Server whose error has been reported.
        :param interval: Interval of interest.
        :param now: Consider from `now` until `now` - `interval`.
        :param persister: Persister to persist the object to.
        :return: ErrorLog object.
        """
        from mysql.fabric.server import MySQLServer
        assert (isinstance(server, MySQLServer))

        now = now or get_time()
        (whens, reporters) = ErrorLog.compute(server.uuid, interval, now)
        return ErrorLog(server, interval, now, whens, reporters)
Exemplo n.º 14
0
    def execute(self):
        """Statistics on the Fabric node.

        It returns information on the Fabric node, specifically a list with
        the following fileds: node identification, how long it is running,
        when it was started.
        """
        fabric = FabricNode()
        node_id = fabric.uuid
        node_startup = fabric.startup
        node_uptime = _utils.get_time() - node_startup

        rset = ResultSet(names=('node_id', 'node_uptime', 'node_startup'),
                         types=(str, str, str))
        rset.append_row([node_id, node_uptime, node_startup])

        return CommandResult(None, results=rset)
Exemplo n.º 15
0
    def fetch(server, interval, now=None, persister=None):
        """Return a ErrorLog object corresponding to the
        server.

        :param server: Server whose error has been reported.
        :param interval: Interval of interest.
        :param now: Consider from `now` until `now` - `interval`.
        :param persister: Persister to persist the object to.
        :return: ErrorLog object.
        """
        from mysql.fabric.server import MySQLServer
        assert(isinstance(server, MySQLServer))

        now = now or get_time()
        (whens, reporters) = ErrorLog.compute(
            server.uuid, interval, now
        )
        return ErrorLog(server, interval, now, whens, reporters)
Exemplo n.º 16
0
    def execute(self):
        """Statistics on the Fabric node.

        It returns information on the Fabric node, specifically a list with
        the following fileds: node identification, how long it is running,
        when it was started.
        """
        fabric = FabricNode()
        node_id = fabric.uuid
        node_startup = fabric.startup
        node_uptime = _utils.get_time() - node_startup

        rset = ResultSet(
            names=('node_id', 'node_uptime', 'node_startup'),
            types=( str, str, str))
        rset.append_row([node_id, node_uptime, node_startup])

        return CommandResult(None, results=rset)
Exemplo n.º 17
0
 def test_check_instability(self):
     """Test whether a server can be considered unstable or not.
     """
     # Update/Notify and refresh, they should match.
     interval = get_time_delta(10)
     now = get_time()
     input_whens = [ now, now - get_time_delta(5) ]
     input_reporters = [ "client:1000", "client:2000" ]
     st = ErrorLog(self.server, interval, now, [], [])
     ErrorLog.add(self.server, now, "client:1000", "error")
     ErrorLog.add(self.server, now - get_time_delta(5),
                         "client:2000", "error")
     ErrorLog.add(self.server, now - get_time_delta(11),
                         "client:3000", "error")
     st.refresh()
     self.assertEqual(
         st.is_unstable(n_notifications=1, n_reporters=1,
                        filter_reporter=None),
         True
     )
     self.assertEqual(
         st.is_unstable(n_notifications=2, n_reporters=2,
                        filter_reporter=None),
         True
     )
     self.assertEqual(
         st.is_unstable(n_notifications=3, n_reporters=2,
                        filter_reporter=None),
         False
     )
     self.assertEqual(
         st.is_unstable(n_notifications=2, n_reporters=3,
                        filter_reporter=None),
         False
     )
     self.assertEqual(
         st.is_unstable(n_notifications=1, n_reporters=1,
                        filter_reporter=["client:2000"]),
         True
     )
Exemplo n.º 18
0
def _start(options, config):
    """Start Fabric server.
    """
    # Configure modules that are not dynamic loaded.
    _server.configure(config)
    _error_log.configure(config)
    _failure_detector.configure(config)

    # Load all services into the service manager
    _services.ServiceManager().load_services(options, config)

    # Initilize the state store.
    _persistence.init_thread()

    # Check the maximum number of threads.
    _utils.check_number_threads()

    # Configure Fabric Node.
    fabric = FabricNode()
    reported = _utils.get_time()
    _LOGGER.info(
        "Fabric node starting.",
        extra={
            'subject' : str(fabric.uuid),
            'category' : MySQLHandler.NODE,
            'type' : MySQLHandler.START,
            'reported' : reported
        }
    )
    fabric.startup = reported

    # Start the executor, failure detector and then service manager. In this
    # scenario, the recovery is sequentially executed after starting the
    # executor and before starting the service manager.
    _events.Handler().start()
    _recovery.recovery()
    _failure_detector.FailureDetector.register_groups()
    _services.ServiceManager().start()
Exemplo n.º 19
0
    def test_persistence(self):
        """Test ErrorLog.
        """
        # Update/Notify and fetch, they should match.
        interval = get_time_delta(1)
        now = get_time()
        input_whens = [now, now]
        input_reporters = ["client:1000", "client:2000"]
        st = ErrorLog(self.server, interval, now, input_whens, input_reporters)
        ErrorLog.add(self.server, now, "client:1000", "error")
        ErrorLog.add(self.server, now, "client:2000", "error")
        new_st = ErrorLog.fetch(self.server, interval, now)
        self.assertEqual(st.reporters, new_st.reporters)
        self.assertEqual(st.whens, new_st.whens)

        # Call remove, they should be empty and match.
        interval = get_time_delta(1)
        now = get_time()
        input_whens = []
        input_reporters = []
        ErrorLog.remove(self.server)
        st = ErrorLog(self.server, interval, now, input_whens, input_reporters)
        new_st = ErrorLog.fetch(self.server, interval, now)
        self.assertEqual(st.reporters, new_st.reporters)
        self.assertEqual(st.whens, new_st.whens)

        # Update/Notify and refresh, they should match.
        interval = get_time_delta(10)
        now = get_time()
        input_whens = [now, now - get_time_delta(5)]
        input_reporters = ["client:1000", "client:2000"]
        st = ErrorLog(self.server, interval, now, [], [])
        ErrorLog.add(self.server, now, "client:1000", "error")
        ErrorLog.add(self.server, now - get_time_delta(5), "client:2000",
                     "error")
        ErrorLog.add(self.server, now - get_time_delta(11), "client:3000",
                     "error")
        st.refresh()
        self.assertEqual(set(st.reporters), set(input_reporters))
        self.assertEqual(set(st.whens), set(input_whens))

        # Check whether a statement similar to the one used in the
        # event is fine.
        ErrorLog.remove(self.server)
        ErrorLog.add(self.server, now, "client:1000", "error")
        ErrorLog.add(self.server, now, "client:2000", "error")
        persister = _persistence.current_persister()
        out = persister.exec_stmt(
            "SELECT reported, UTC_TIMESTAMP() as now, "
            "TIMEDIFF(UTC_TIMESTAMP(), reported - MAKETIME(2,0,0)) as diff "
            "FROM error_log")
        _LOGGER.debug("Output test persistence %s.", out)
        self.assertEqual(len(out), 2)
        res = persister.exec_stmt(
            "DELETE FROM error_log WHERE "
            "TIMEDIFF(UTC_TIMESTAMP(), reported - MAKETIME(2,0,0)) > "
            "MAKETIME(1,0,0)")
        _LOGGER.debug("Output test persistence %s.", res)
        out = persister.exec_stmt(
            "SELECT reported, UTC_TIMESTAMP() as now, "
            "TIMEDIFF(UTC_TIMESTAMP(), reported - MAKETIME(2,0,0)) as diff "
            "FROM error_log")
        _LOGGER.debug("Output test persistence %s.", out)
        self.assertEqual(len(out), 0)
Exemplo n.º 20
0
    def _run(self):
        """Function that verifies servers' availabilities.
        """
        from mysql.fabric.server import (
            Group,
            MySQLServer,
            ConnectionManager,
        )

        ignored_status = [MySQLServer.FAULTY]
        quarantine = {}
        interval = FailureDetector._DETECTION_INTERVAL
        detections = FailureDetector._DETECTIONS
        detection_timeout = FailureDetector._DETECTION_TIMEOUT
        connection_manager = ConnectionManager()
        slave_deep_checks = FailureDetector._SLAVE_DEEP_CHECKS

        _persistence.init_thread()

        while self.__check:
            try:
                unreachable = set()
                group = Group.fetch(self.__group_id)
                if group is not None:
                    for server in group.servers():
                        if server.status in ignored_status:

                            ### Server is FAULTY
                            connection_manager.kill_connections(server)
                            continue
                        else:
                            ### Server is Not FAULTY
                            if MySQLServer.is_alive(server, detection_timeout):

                                ### Server is alive
                                ### check depends on `slave_deep_checks` parameter
                                if slave_deep_checks:

                                    ### When server is alive and status != FAULTY
                                    is_master= (group.master == server.uuid)
                                    if not is_master:
                                        ### Checking master is dead or alive.
                                        master_server = MySQLServer.fetch(group.master)
    
                                        if MySQLServer.is_alive(master_server, detection_timeout):
    
                                            ### Checking is replication valid or not if master is alive.
                                            server.connect()
                                            slave_issues, why_slave_issues = \
                                                _replication.check_slave_issues(server)
                                            if slave_issues:
        
                                                if (why_slave_issues['io_error'] and \
                                                    why_slave_issues['io_errno'] == 2003):
        
                                                    ### Nothing to do during reconnecting, just logging
                                                    _LOGGER.info(why_slave_issues)
        
                                                else:
                                                        
                                                    ### If slave threads are not running, set status to SPARE
                                                    server.status = MySQLServer.SPARE
        
                                            ### Done slave_issues.
                                            server.disconnect()
    
                                        ### Endif MySQLServer.is_alive(master_server, detection_timeout)
                                    ### Endif not is_master
                                ### Endif slave_deep_checks
                                continue
                            ### Else MySQLServer.is_alive(server, detection_timeout)
                            else:

                                unreachable.add(server.uuid)

                                _LOGGER.warning(
                                    "Server (%s) in group (%s) is unreachable.",
                                    server.uuid, self.__group_id
                                )
        
                                unstable = False
                                failed_attempts = 0
                                if server.uuid not in quarantine:
                                    quarantine[server.uuid] = failed_attempts = 1
                                else:
                                    failed_attempts = quarantine[server.uuid] + 1
                                    quarantine[server.uuid] = failed_attempts
                                if failed_attempts >= detections:
                                    unstable = True
        
                                can_set_faulty = group.can_set_server_faulty(
                                    server, get_time()
                                )
                                if unstable and can_set_faulty:
                                    # We have to make this transactional and make the
                                    # failover (i.e. report failure) robust to failures.
                                    # Otherwise, a master might be set to faulty and
                                    # a new one never promoted.
                                    server.status = MySQLServer.FAULTY
                                    connection_manager.kill_connections(server)
                                    
                                    procedures = trigger("REPORT_FAILURE", None,
                                        str(server.uuid),
                                        threading.current_thread().name,
                                        MySQLServer.FAULTY, False
                                    )
                                    executor = _executor.Executor()
                                    for procedure in procedures:
                                        executor.wait_for_procedure(procedure)

                            ### Endif MySQLServer.is_alive(server, detection_timeout)
                        ### Endif server.status in ignored_status
                    ### End for server in group.servers()
                ### Endif group is not None
                for uuid in quarantine.keys():
                    if uuid not in unreachable:
                        del quarantine[uuid]

            except (_errors.ExecutorError, _errors.DatabaseError):
                pass
            except Exception as error:
                _LOGGER.exception(error)

            time.sleep(interval)

        _persistence.deinit_thread()
Exemplo n.º 21
0
    def test_persistence(self):
        """Test ErrorLog.
        """
        # Update/Notify and fetch, they should match.
        interval = get_time_delta(1)
        now = get_time()
        input_whens = [ now, now ]
        input_reporters = [ "client:1000", "client:2000" ]
        st = ErrorLog(self.server, interval, now, input_whens,
                               input_reporters)
        ErrorLog.add(self.server, now, "client:1000", "error")
        ErrorLog.add(self.server, now, "client:2000", "error")
        new_st = ErrorLog.fetch(self.server, interval, now)
        self.assertEqual(st.reporters, new_st.reporters)
        self.assertEqual(st.whens, new_st.whens)

        # Call remove, they should be empty and match.
        interval = get_time_delta(1)
        now = get_time()
        input_whens = [ ]
        input_reporters = [ ]
        ErrorLog.remove(self.server)
        st = ErrorLog(self.server, interval, now, input_whens,
                               input_reporters)
        new_st = ErrorLog.fetch(self.server, interval, now)
        self.assertEqual(st.reporters, new_st.reporters)
        self.assertEqual(st.whens, new_st.whens)

        # Update/Notify and refresh, they should match.
        interval = get_time_delta(10)
        now = get_time()
        input_whens = [ now, now - get_time_delta(5) ]
        input_reporters = [ "client:1000", "client:2000" ]
        st = ErrorLog(self.server, interval, now, [], [])
        ErrorLog.add(self.server, now, "client:1000", "error")
        ErrorLog.add(self.server, now - get_time_delta(5),
                            "client:2000", "error")
        ErrorLog.add(self.server, now - get_time_delta(11),
                            "client:3000", "error")
        st.refresh()
        self.assertEqual(set(st.reporters), set(input_reporters))
        self.assertEqual(set(st.whens), set(input_whens))

        # Check whether a statement similar to the one used in the
        # event is fine.
        ErrorLog.remove(self.server)
        ErrorLog.add(self.server, now, "client:1000", "error")
        ErrorLog.add(self.server, now, "client:2000", "error")
        persister = _persistence.current_persister()
        out = persister.exec_stmt(
            "SELECT reported, UTC_TIMESTAMP() as now, "
            "TIMEDIFF(UTC_TIMESTAMP(), reported - MAKETIME(2,0,0)) as diff "
            "FROM error_log"
        )
        _LOGGER.debug("Output test persistence %s.", out)
        self.assertEqual(len(out), 2)
        res = persister.exec_stmt(
            "DELETE FROM error_log WHERE "
            "TIMEDIFF(UTC_TIMESTAMP(), reported - MAKETIME(2,0,0)) > "
            "MAKETIME(1,0,0)"
        )
        _LOGGER.debug("Output test persistence %s.", res)
        out = persister.exec_stmt(
            "SELECT reported, UTC_TIMESTAMP() as now, "
            "TIMEDIFF(UTC_TIMESTAMP(), reported - MAKETIME(2,0,0)) as diff "
            "FROM error_log"
        )
        _LOGGER.debug("Output test persistence %s.", out)
        self.assertEqual(len(out), 0)
Exemplo n.º 22
0
    def _run(self):
        """Function that verifies servers' availabilities.
        """
        from mysql.fabric.server import (
            Group,
            MySQLServer,
            ConnectionManager,
        )

        ignored_status = [MySQLServer.FAULTY]
        quarantine = {}
        interval = FailureDetector._DETECTION_INTERVAL
        detections = FailureDetector._DETECTIONS
        detection_timeout = FailureDetector._DETECTION_TIMEOUT
        connection_manager = ConnectionManager()

        _persistence.init_thread()

        while self.__check:
            try:
                unreachable = set()
                group = Group.fetch(self.__group_id)
                if group is not None:
                    for server in group.servers():
                        if server.status in ignored_status or \
                            MySQLServer.is_alive(server, detection_timeout):
                            if server.status == MySQLServer.FAULTY:
                                connection_manager.kill_connections(server)
                            continue

                        unreachable.add(server.uuid)

                        _LOGGER.warning(
                            "Server (%s) in group (%s) is unreachable.",
                            server.uuid, self.__group_id
                        )

                        unstable = False
                        failed_attempts = 0
                        if server.uuid not in quarantine:
                            quarantine[server.uuid] = failed_attempts = 1
                        else:
                            failed_attempts = quarantine[server.uuid] + 1
                            quarantine[server.uuid] = failed_attempts
                        if failed_attempts >= detections:
                            unstable = True

                        can_set_faulty = group.can_set_server_faulty(
                            server, get_time()
                        )
                        if unstable and can_set_faulty:
                            # We have to make this transactional and make the
                            # failover (i.e. report failure) robust to failures.
                            # Otherwise, a master might be set to faulty and
                            # a new one never promoted.
                            server.status = MySQLServer.FAULTY
                            connection_manager.kill_connections(server)
                            
                            procedures = trigger("REPORT_FAILURE", None,
                                str(server.uuid),
                                threading.current_thread().name,
                                MySQLServer.FAULTY, False
                            )
                            executor = _executor.Executor()
                            for procedure in procedures:
                                executor.wait_for_procedure(procedure)

                for uuid in quarantine.keys():
                    if uuid not in unreachable:
                        del quarantine[uuid]

            except (_errors.ExecutorError, _errors.DatabaseError):
                pass
            except Exception as error:
                _LOGGER.exception(error)

            time.sleep(interval / detections)

        _persistence.deinit_thread()