def _setup_replication(shard_id, source_group_id, destn_group_id, split_value, prune_limit, cmd): """Setup replication between the source and the destination groups and ensure that they are in sync. :param shard_id: The shard ID of the shard that needs to be moved. :param source_group_id: The group_id of the source shard. :param destn_group_id: The ID of the group to which the shard needs to be moved. :param split_value: Indicates the value at which the range for the particular shard will be split. Will be set only for shard split operations. :param prune_limit: The number of DELETEs that should be done in one batch. :param cmd: Indicates the type of re-sharding operation """ source_group = Group.fetch(source_group_id) if source_group is None: raise _errors.ShardingError(_services_sharding.SHARD_GROUP_NOT_FOUND % (source_group_id, )) destination_group = Group.fetch(destn_group_id) if destination_group is None: raise _errors.ShardingError(_services_sharding.SHARD_GROUP_NOT_FOUND % (destn_group_id, )) master = MySQLServer.fetch(source_group.master) if master is None: raise _errors.ShardingError( _services_sharding.SHARD_GROUP_MASTER_NOT_FOUND) master.connect() slave = MySQLServer.fetch(destination_group.master) if slave is None: raise _errors.ShardingError( _services_sharding.SHARD_GROUP_MASTER_NOT_FOUND) slave.connect() #Stop and reset any slave that might be running on the slave server. _utils.set_offline_mode(slave, True) ### TODO: if forced offline_mode _replication.stop_slave(slave, wait=True) _replication.reset_slave(slave, clean=True) #Change the master to the shard group master. _replication.switch_master(slave, master, master.repl_user, master.repl_pass) #Start the slave so that syncing of the data begins _replication.start_slave(slave, wait=True) _utils.set_offline_mode(slave, False) ### TODO: if forced offline_mode #Setup sync between the source and the destination groups. _events.trigger_within_procedure( SETUP_SYNC, shard_id, source_group_id, destn_group_id, split_value, prune_limit, cmd )
def switch_master(slave, master): """Make slave point to master. :param slave: Slave. :param master: Master. """ _replication.stop_slave(slave, wait=True) _replication.switch_master(slave, master, master.user, master.passwd) slave.read_only = True _replication.start_slave(slave, wait=True)
def process_slave_backlog(slave): """Wait until slave processes its backlog. :param slave: slave. """ _replication.stop_slave(slave, wait=True) _replication.start_slave(slave, threads=("SQL_THREAD", ), wait=True) slave_status = _replication.get_slave_status(slave)[0] _replication.wait_for_slave(slave, slave_status.Master_Log_File, slave_status.Read_Master_Log_Pos)
def process_slave_backlog(slave): """Wait until slave processes its backlog. :param slave: slave. """ _replication.stop_slave(slave, wait=True) _replication.start_slave(slave, threads=("SQL_THREAD", ), wait=True) slave_status = _replication.get_slave_status(slave)[0] _replication.wait_for_slave( slave, slave_status.Master_Log_File, slave_status.Read_Master_Log_Pos )
def configure_instances(self, topology, user, passwd): """Configure a replication topology using the MySQL Instances previously registerd. :param topology: Topology to be configured. :param user: MySQL Instances' user. :param passwd: MySQL Instances' password. This method can be used as follows:: import tests.utils as _test_utils topology = {1 : [{2 : []}, {3 : []}]} instances = _test_utils.MySQLInstances() user = instances.user passwd = instances.passwd instances.configure_instances(topology, user, passwd) Each instance in the topology is represented as a dictionary whose keys are references to addresses that will be retrieved through the :meth:`get_address` method and values are a list of slaves. So after calling :meth:`configure_instances` method, one can get a reference to an object, MySQLServer, through the :meth:`get_instance` method. """ for number in topology.keys(): master_address = self.get_address(number) master_uuid = _server.MySQLServer.discover_uuid( address=master_address ) master = _server.MySQLServer( _uuid.UUID(master_uuid), master_address, user, passwd) master.connect() master.read_only = False self.__instances[number] = master for slave_topology in topology[number]: slave = self.configure_instances(slave_topology, user, passwd) slave.read_only = True _replication.switch_master(slave, master, user, passwd) _replication.start_slave(slave, wait=True) return master
def configure_instances(self, topology, user, passwd): """Configure a replication topology using the MySQL Instances previously registerd. :param topology: Topology to be configured. :param user: MySQL Instances' user. :param passwd: MySQL Instances' password. This method can be used as follows:: import tests.utils as _test_utils topology = {1 : [{2 : []}, {3 : []}]} instances = _test_utils.MySQLInstances() user = instances.user passwd = instances.passwd instances.configure_instances(topology, user, passwd) Each instance in the topology is represented as a dictionary whose keys are references to addresses that will be retrieved through the :meth:`get_address` method and values are a list of slaves. So after calling :meth:`configure_instances` method, one can get a reference to an object, MySQLServer, through the :meth:`get_instance` method. """ for number in topology.keys(): master_address = self.get_address(number) master_uuid = _server.MySQLServer.discover_uuid( address=master_address) master = _server.MySQLServer(uuid.UUID(master_uuid), master_address, user, passwd) master.connect() master.read_only = False self.__instances[number] = master for slave_topology in topology[number]: slave = self.configure_instances(slave_topology, user, passwd) slave.read_only = True _replication.switch_master(slave, master, user, passwd) _replication.start_slave(slave, wait=True) return master
def test_check_no_healthy_slave(self): """Test promoting when there is no healthy slave. """ # Configure replication. instances = tests.utils.MySQLInstances() user = instances.user passwd = instances.passwd instances.configure_instances({0 : [{1 : []}, {2 : []}]}, user, passwd) master = instances.get_instance(0) slave_1 = instances.get_instance(1) slave_2 = instances.get_instance(2) self.proxy.group.create("group_id", "") self.proxy.group.add("group_id", master.address) self.proxy.group.add("group_id", slave_1.address) self.proxy.group.add("group_id", slave_2.address) # Promote a master. status = self.proxy.group.promote("group_id", str(master.uuid)) self.check_xmlrpc_command_result(status) # Check replication. status = self.proxy.group.health("group_id") self.check_xmlrpc_simple(status, { 'status': _server.MySQLServer.SECONDARY }, index=2, rowcount=3) self.check_xmlrpc_simple(status, { 'status': _server.MySQLServer.SECONDARY }, index=1, rowcount=3) self.check_xmlrpc_simple(status, { 'status': _server.MySQLServer.PRIMARY }, index=0, rowcount=3) # Inject some events that make slaves break. slave_1.set_session_binlog(False) slave_1.exec_stmt("CREATE DATABASE IF NOT EXISTS test") slave_1.exec_stmt("USE test") slave_1.exec_stmt("DROP TABLE IF EXISTS test") slave_1.exec_stmt("CREATE TABLE test (id INTEGER)") slave_1.set_session_binlog(True) slave_2.set_session_binlog(False) slave_2.exec_stmt("CREATE DATABASE IF NOT EXISTS test") slave_2.exec_stmt("USE test") slave_2.exec_stmt("DROP TABLE IF EXISTS test") slave_2.exec_stmt("CREATE TABLE test (id INTEGER)") slave_2.set_session_binlog(True) master.exec_stmt("CREATE DATABASE IF NOT EXISTS test") master.exec_stmt("USE test") master.exec_stmt("SET sql_log_bin=0") master.exec_stmt("DROP TABLE IF EXISTS test") master.exec_stmt("SET sql_log_bin=1") master.exec_stmt("CREATE TABLE test (id INTEGER)") # Synchronize replicas. self.assertRaises(_errors.DatabaseError, _repl.sync_slave_with_master, slave_1, master, timeout=0) self.assertRaises(_errors.DatabaseError, _repl.sync_slave_with_master, slave_2, master, timeout=0) # Check replication. status = self.proxy.group.health("group_id") for info in self.check_xmlrpc_iter(status): if info['uuid'] in (str(slave_2.uuid), str(slave_1.uuid)): self.assertEqual( info['status'], _server.MySQLServer.SECONDARY ) self.assertEqual(info['sql_not_running'], True) elif info['uuid'] == str(master.uuid): self.assertEqual( info['status'], _server.MySQLServer.PRIMARY ) # Try to choose a new master through switch over. status = self.proxy.group.promote("group_id") self.check_xmlrpc_command_result(status, has_error=True) # Try to reset the slave and restart slave. _repl.stop_slave(slave_1, wait=True) _repl.reset_slave(slave_1, clean=False) try: _repl.start_slave(slave_1, wait=True) except _errors.DatabaseError as error: self.assertEqual( str(error), "Error 'Table 'test' already exists' " "on query. Default database: 'test'. Query: 'CREATE " "TABLE test (id INTEGER)'" ) # Synchronize replica. self.assertRaises(_errors.DatabaseError, _repl.sync_slave_with_master, slave_1, master, timeout=0) # Check replication. status = self.proxy.group.health("group_id") self.check_xmlrpc_simple(status, { 'status': _server.MySQLServer.SECONDARY, "sql_not_running": True, }, index=2, rowcount=3) self.check_xmlrpc_simple(status, { 'status': _server.MySQLServer.SECONDARY, "sql_not_running": True, }, index=1, rowcount=3) self.check_xmlrpc_simple(status, { 'status': _server.MySQLServer.PRIMARY, }, index=0, rowcount=3) # Try to drop the table on the slave. _repl.stop_slave(slave_1, wait=True) _repl.reset_slave(slave_1, clean=False) slave_1.set_session_binlog(False) slave_1.exec_stmt("DROP TABLE IF EXISTS test") slave_1.set_session_binlog(True) _repl.start_slave(slave_1, wait=True) _repl.stop_slave(slave_2, wait=True) _repl.reset_slave(slave_2, clean=False) slave_2.set_session_binlog(False) slave_2.exec_stmt("DROP TABLE IF EXISTS test") slave_2.set_session_binlog(True) _repl.start_slave(slave_2, wait=True) # Synchronize replicas. _repl.sync_slave_with_master(slave_1, master, timeout=0) _repl.sync_slave_with_master(slave_2, master, timeout=0) # Check replication. status = self.proxy.group.health("group_id") for info in self.check_xmlrpc_iter(status, rowcount=3): if info['uuid'] in (str(slave_2.uuid), str(slave_1.uuid)): self.assertEqual( info['status'], _server.MySQLServer.SECONDARY ) self.assertEqual(info['sql_not_running'], False) elif info['uuid'] == str(master.uuid): self.assertEqual( info['status'], _server.MySQLServer.PRIMARY )
def test_promote_to(self): # Create topology: M1 ---> S2, M1 ---> S3 instances = tests.utils.MySQLInstances() user = instances.user passwd = instances.passwd instances.configure_instances({0: [{1: []}, {2: []}]}, user, passwd) master = instances.get_instance(0) slave_1 = instances.get_instance(1) slave_2 = instances.get_instance(2) # Try to use a group that does not exist. status = self.proxy.group.promote("group_id", str(slave_1.uuid)) self.check_xmlrpc_command_result(status, has_error=True) # Try to use a slave that does not exist with the group. self.proxy.group.create("group_id", "") status = self.proxy.group.promote("group_id", str(slave_1.uuid)) self.check_xmlrpc_command_result(status, has_error=True) # Try to use a server that is already a master. self.proxy.group.add("group_id", master.address) self.proxy.group.add("group_id", slave_1.address) self.proxy.group.add("group_id", slave_2.address) group = _server.Group.fetch("group_id") tests.utils.configure_decoupled_master(group, slave_1) status = self.proxy.group.promote("group_id", str(slave_1.uuid)) self.check_xmlrpc_command_result(status, has_error=True) # Try to use a slave whose replication is not properly configured. tests.utils.configure_decoupled_master(group, master) _repl.stop_slave(slave_1, wait=True) _repl.reset_slave(slave_1, clean=True) status = self.proxy.group.promote("group_id", str(slave_1.uuid)) self.check_xmlrpc_command_result(status, has_error=True) # Try to use a slave whose replication is not properly running. _repl.switch_master(slave_1, master, user, passwd) status = self.proxy.group.promote("group_id", str(slave_1.uuid)) self.check_xmlrpc_command_result(status, has_error=True) # Start the slave. _repl.start_slave(slave_1, wait=True) # Look up servers. expected = tests.utils.make_servers_lookup_result([ [ str(master.uuid), master.address, _server.MySQLServer.PRIMARY, _server.MySQLServer.READ_WRITE, _server.MySQLServer.DEFAULT_WEIGHT ], [ str(slave_1.uuid), slave_1.address, _server.MySQLServer.SECONDARY, _server.MySQLServer.READ_ONLY, _server.MySQLServer.DEFAULT_WEIGHT ], [ str(slave_2.uuid), slave_2.address, _server.MySQLServer.SECONDARY, _server.MySQLServer.READ_ONLY, _server.MySQLServer.DEFAULT_WEIGHT ], ]) servers = self.proxy.group.lookup_servers("group_id") self.check_xmlrpc_result(servers, expected) # Do the promote. status = self.proxy.group.promote("group_id", str(slave_1.uuid)) self.check_xmlrpc_command_result(status) # Look up servers. expected = tests.utils.make_servers_lookup_result([ [ str(master.uuid), master.address, _server.MySQLServer.SECONDARY, _server.MySQLServer.READ_ONLY, _server.MySQLServer.DEFAULT_WEIGHT ], [ str(slave_1.uuid), slave_1.address, _server.MySQLServer.PRIMARY, _server.MySQLServer.READ_WRITE, _server.MySQLServer.DEFAULT_WEIGHT ], [ str(slave_2.uuid), slave_2.address, _server.MySQLServer.SECONDARY, _server.MySQLServer.READ_ONLY, _server.MySQLServer.DEFAULT_WEIGHT ], ]) servers = self.proxy.group.lookup_servers("group_id") self.check_xmlrpc_result(servers, expected) # Do the promote. # Note that it is using HOST:PORT instead of UUID. status = self.proxy.group.promote("group_id", master.address) self.check_xmlrpc_command_result(status) # Look up servers. servers = self.proxy.group.lookup_servers("group_id") expected = tests.utils.make_servers_lookup_result([ [ str(master.uuid), master.address, _server.MySQLServer.PRIMARY, _server.MySQLServer.READ_WRITE, _server.MySQLServer.DEFAULT_WEIGHT ], [ str(slave_1.uuid), slave_1.address, _server.MySQLServer.SECONDARY, _server.MySQLServer.READ_ONLY, _server.MySQLServer.DEFAULT_WEIGHT ], [ str(slave_2.uuid), slave_2.address, _server.MySQLServer.SECONDARY, _server.MySQLServer.READ_ONLY, _server.MySQLServer.DEFAULT_WEIGHT ], ]) self.check_xmlrpc_result(servers, expected)
def setup_group_replication(group_master_id, group_slave_id): """Sets up replication between the masters of the two groups and updates the references to the groups in each other. :param group_master_id: The group whose master will act as the master in the replication setup. :param group_slave_id: The group whose master will act as the slave in the replication setup. """ group_master = Group.fetch(group_master_id) group_slave = Group.fetch(group_slave_id) if group_master is None: raise _errors.GroupError \ (GROUP_REPLICATION_GROUP_NOT_FOUND_ERROR % (group_master_id, )) if group_slave is None: raise _errors.GroupError \ (GROUP_REPLICATION_GROUP_NOT_FOUND_ERROR % (group_slave_id, )) if group_master.master is None: raise _errors.GroupError \ (GROUP_REPLICATION_GROUP_MASTER_NOT_FOUND_ERROR % "") if group_slave.master is None: raise _errors.GroupError \ (GROUP_REPLICATION_GROUP_MASTER_NOT_FOUND_ERROR % "") #Master is the master of the Global Group. We replicate from here to #the masters of all the shard Groups. master = MySQLServer.fetch(group_master.master) if master is None: raise _errors.GroupError \ (GROUP_REPLICATION_GROUP_MASTER_NOT_FOUND_ERROR % \ (group_master.master, )) #Get the master of the shard Group. slave = MySQLServer.fetch(group_slave.master) if slave is None: raise _errors.GroupError \ (GROUP_REPLICATION_GROUP_MASTER_NOT_FOUND_ERROR % \ (group_slave.master, )) if not server_running(master): #The server is already down. We cannot connect to it to setup #replication. raise _errors.GroupError \ (GROUP_MASTER_NOT_RUNNING % (group_master.group_id, )) try: master.connect() except _errors.DatabaseError as error: #Server is not accessible, unable to connect to the server. raise _errors.GroupError( GROUP_REPLICATION_SERVER_ERROR % (group_slave.master, error) ) if not server_running(slave): #The server is already down. We cannot connect to it to setup #replication. raise _errors.GroupError \ (GROUP_MASTER_NOT_RUNNING % (group_slave.group_id, )) try: slave.connect() except _errors.DatabaseError as error: raise _errors.GroupError( GROUP_REPLICATION_SERVER_ERROR % (group_master.master, error) ) _replication.stop_slave(slave, wait=True) #clear references to old masters in the slave _replication.reset_slave(slave, clean=True) _replication.switch_master(slave, master, master.user, master.passwd) _replication.start_slave(slave, wait=True) try: group_master.add_slave_group_id(group_slave_id) group_slave.add_master_group_id(group_master_id) except _errors.DatabaseError: #If there is an error while adding a reference to #the slave group or a master group, it means that #the slave group was already added and the error #is happening because the group was already registered. #Ignore this error. pass
def test_promote_to(self): # Create topology: M1 ---> S2, M1 ---> S3 instances = tests.utils.MySQLInstances() user = instances.user passwd = instances.passwd instances.configure_instances({0 : [{1 : []}, {2 : []}]}, user, passwd) master = instances.get_instance(0) slave_1 = instances.get_instance(1) slave_2 = instances.get_instance(2) # Try to use a group that does not exist. status = self.proxy.group.promote( "group_id", str(slave_1.uuid) ) self.assertStatus(status, _executor.Job.ERROR) self.assertEqual(status[1][-1]["state"], _executor.Job.COMPLETE) self.assertEqual(status[1][-1]["description"], "Tried to execute action (_define_ha_operation).") # Try to use a slave that does not exist with the group. self.proxy.group.create("group_id", "") status = self.proxy.group.promote( "group_id", str(slave_1.uuid) ) self.assertStatus(status, _executor.Job.ERROR) self.assertEqual(status[1][-1]["state"], _executor.Job.COMPLETE) self.assertEqual(status[1][-1]["description"], "Tried to execute action (_check_candidate_fail).") # Try to use a server that is already a master. self.proxy.group.add("group_id", master.address) self.proxy.group.add("group_id", slave_1.address) self.proxy.group.add("group_id", slave_2.address) group = _server.Group.fetch("group_id") tests.utils.configure_decoupled_master(group, slave_1) status = self.proxy.group.promote( "group_id", str(slave_1.uuid) ) self.assertStatus(status, _executor.Job.ERROR) self.assertEqual(status[1][-1]["state"], _executor.Job.COMPLETE) self.assertEqual(status[1][-1]["description"], "Tried to execute action (_check_candidate_switch).") # Try to use a slave whose replication is not properly configured. tests.utils.configure_decoupled_master(group, master) _repl.stop_slave(slave_1, wait=True) _repl.reset_slave(slave_1, clean=True) status = self.proxy.group.promote( "group_id", str(slave_1.uuid) ) self.assertStatus(status, _executor.Job.ERROR) self.assertEqual(status[1][-1]["state"], _executor.Job.COMPLETE) self.assertEqual(status[1][-1]["description"], "Tried to execute action (_check_candidate_switch).") # Try to use a slave whose replication is not properly running. _repl.switch_master(slave_1, master, user, passwd) status = self.proxy.group.promote( "group_id", str(slave_1.uuid) ) self.assertStatus(status, _executor.Job.ERROR) self.assertEqual(status[1][-1]["state"], _executor.Job.COMPLETE) self.assertEqual(status[1][-1]["description"], "Tried to execute action (_check_candidate_switch).") # Start the slave. _repl.start_slave(slave_1, wait=True) # Look up servers. servers = self.proxy.group.lookup_servers("group_id") self.assertEqual(servers[0], True) self.assertEqual(servers[1], "") retrieved = servers[2] expected = \ [{"server_uuid" : str(master.uuid), "address" : master.address, "status" :_server.MySQLServer.PRIMARY, "mode" : _server.MySQLServer.READ_WRITE, "weight" : _server.MySQLServer.DEFAULT_WEIGHT}, {"server_uuid" : str(slave_1.uuid), "address" : slave_1.address, "status" : _server.MySQLServer.SECONDARY, "mode" : _server.MySQLServer.READ_ONLY, "weight" : _server.MySQLServer.DEFAULT_WEIGHT}, {"server_uuid" : str(slave_2.uuid), "address" : slave_2.address, "status" : _server.MySQLServer.SECONDARY, "mode" : _server.MySQLServer.READ_ONLY, "weight" : _server.MySQLServer.DEFAULT_WEIGHT}] retrieved.sort() expected.sort() self.assertEqual(retrieved, expected) # Do the promote. status = self.proxy.group.promote( "group_id", str(slave_1.uuid) ) self.assertStatus(status, _executor.Job.SUCCESS) self.assertEqual(status[1][-1]["state"], _executor.Job.COMPLETE) self.assertEqual(status[1][-1]["description"], "Executed action (_change_to_candidate).") # Look up servers. servers = self.proxy.group.lookup_servers("group_id") self.assertEqual(servers[0], True) self.assertEqual(servers[1], "") retrieved = servers[2] expected = \ [{"server_uuid" : str(master.uuid), "address" : master.address, "status" : _server.MySQLServer.SECONDARY, "mode" : _server.MySQLServer.READ_ONLY, "weight" : _server.MySQLServer.DEFAULT_WEIGHT}, {"server_uuid" : str(slave_1.uuid), "address" : slave_1.address, "status" : _server.MySQLServer.PRIMARY, "mode" : _server.MySQLServer.READ_WRITE, "weight" : _server.MySQLServer.DEFAULT_WEIGHT}, {"server_uuid" : str(slave_2.uuid), "address" : slave_2.address, "status" : _server.MySQLServer.SECONDARY, "mode" : _server.MySQLServer.READ_ONLY, "weight" : _server.MySQLServer.DEFAULT_WEIGHT}] retrieved.sort() expected.sort() self.assertEqual(retrieved, expected) # Do the promote. # Note that it is using HOST:PORT instead of UUID. status = self.proxy.group.promote( "group_id", master.address ) self.assertStatus(status, _executor.Job.SUCCESS) self.assertEqual(status[1][-1]["state"], _executor.Job.COMPLETE) self.assertEqual(status[1][-1]["description"], "Executed action (_change_to_candidate).") # Look up servers. servers = self.proxy.group.lookup_servers("group_id") self.assertEqual(servers[0], True) self.assertEqual(servers[1], "") retrieved = servers[2] expected = \ [{"server_uuid" : str(master.uuid), "address" : master.address, "status" : _server.MySQLServer.PRIMARY, "mode" : _server.MySQLServer.READ_WRITE, "weight" : _server.MySQLServer.DEFAULT_WEIGHT}, {"server_uuid" : str(slave_1.uuid), "address" : slave_1.address, "status" : _server.MySQLServer.SECONDARY, "mode" : _server.MySQLServer.READ_ONLY, "weight" : _server.MySQLServer.DEFAULT_WEIGHT}, {"server_uuid" : str(slave_2.uuid), "address" : slave_2.address, "status" : _server.MySQLServer.SECONDARY, "mode" : _server.MySQLServer.READ_ONLY, "weight" : _server.MySQLServer.DEFAULT_WEIGHT}] retrieved.sort() expected.sort() self.assertEqual(retrieved, expected)
def test_update_only(self): """Test the update_only parameter while adding a slave. """ # Prepare group and servers self.proxy.group.create("group", "Testing group...") address_1 = tests.utils.MySQLInstances().get_address(0) address_2 = tests.utils.MySQLInstances().get_address(1) address_3 = tests.utils.MySQLInstances().get_address(2) user = tests.utils.MySQLInstances().user passwd = tests.utils.MySQLInstances().passwd status = self.proxy.server.lookup_uuid(address_1) uuid_1 = self.check_xmlrpc_get_uuid(status, False) server_1 = _server.MySQLServer(_uuid.UUID(uuid_1), address_1, user, passwd) server_1.connect() status = self.proxy.server.lookup_uuid(address_2) uuid_2 = self.check_xmlrpc_get_uuid(status, False) server_2 = _server.MySQLServer(_uuid.UUID(uuid_2), address_2, user, passwd) server_2.connect() status = self.proxy.server.lookup_uuid(address_3) uuid_3 = self.check_xmlrpc_get_uuid(status, False) server_3 = _server.MySQLServer(_uuid.UUID(uuid_3), address_3, user, passwd) server_3.connect() # Add a server and check that replication is not configured. Since # there is no master configured, it does not matter whether the # update_only parameter is set or not. self.proxy.group.add("group", address_1, 5, True) status = self.proxy.group.health("group") self.check_xmlrpc_simple(status, { 'status': _server.MySQLServer.SECONDARY, 'is_not_configured': True, }) self.proxy.group.remove("group", uuid_1) self.proxy.group.add("group", address_1, 5, False) status = self.proxy.group.health("group") self.check_xmlrpc_simple(status, { "status": _server.MySQLServer.SECONDARY, "is_not_configured": True, }) # Try to make the previous server a master, i.e. --update-only = False. status = self.proxy.server.set_status(uuid_1, _server.MySQLServer.PRIMARY) self.check_xmlrpc_command_result(status, True) # Try to make the previous server a master, i.e. --update-only = True. status = self.proxy.server.set_status(uuid_1, _server.MySQLServer.PRIMARY, True) self.check_xmlrpc_command_result(status, True) self.proxy.group.promote("group", uuid_1) # Add a slave but notice that it is not properly configured, i.e. # --update-only = True. self.proxy.group.add("group", address_2, 5, True) status = self.proxy.group.health("group") self.check_xmlrpc_simple(status, { "status": _server.MySQLServer.SECONDARY, "is_not_configured": True, }, rowcount=2, index=1) # Properly configure the previous slave. _replication.switch_master(slave=server_2, master=server_1, master_user=server_1.user, master_passwd=server_1.passwd) _replication.start_slave(server_2, wait=True) status = self.proxy.group.health("group") self.check_xmlrpc_simple(status, { "status": _server.MySQLServer.SECONDARY, }, rowcount=2, index=1) # Add a slave but notice that it is properly configured, i.e. # --update-only = False. self.proxy.group.add("group", address_3) status = self.proxy.group.health("group") self.check_xmlrpc_simple(status, { "status": _server.MySQLServer.SECONDARY, }, index=1) # Stop replication, set slave's status to faulty and add it # back as a spare, --update-only = False. Note that it is # properly configured. _replication.stop_slave(server_3, wait=True) server_3.status = _server.MySQLServer.FAULTY status = self.proxy.group.health("group") self.check_xmlrpc_simple(status, { 'status': _server.MySQLServer.FAULTY, "io_not_running": True, "sql_not_running": True, }, rowcount=3, index=2) status = self.proxy.server.set_status(uuid_3, _server.MySQLServer.SPARE) status = self.proxy.group.health("group") self.check_xmlrpc_simple(status, { "status": _server.MySQLServer.SPARE, }, rowcount=3, index=2) # Stop replication, set slave's status to faulty and add it # back as a spare, --update-only = True. Note that it is not # properly configured. _replication.stop_slave(server_3, wait=True) server_3.status = _server.MySQLServer.FAULTY status = self.proxy.group.health("group") self.check_xmlrpc_simple(status, { "status": _server.MySQLServer.FAULTY, "io_not_running": True, "sql_not_running": True, }, rowcount=3, index=2) status = self.proxy.server.set_status(uuid_3, _server.MySQLServer.SPARE, True) status = self.proxy.group.health("group") self.check_xmlrpc_simple(status, { "status": _server.MySQLServer.SPARE, "io_not_running": True, "sql_not_running": True, }, rowcount=3, index=2) # Try to set slave's status to faulty, i.e. --update-only = False. status = self.proxy.server.set_status(uuid_3, _server.MySQLServer.FAULTY) self.check_xmlrpc_command_result(status, True) status = self.proxy.group.health("group") self.check_xmlrpc_simple(status, { "status": _server.MySQLServer.SPARE, "io_not_running": True, "sql_not_running": True, }, rowcount=3, index=2) # Try to set slave's status to faulty, i.e. --update-only = True. status = self.proxy.server.set_status(uuid_3, _server.MySQLServer.FAULTY, True) self.check_xmlrpc_command_result(status, has_error=True) status = self.proxy.group.health("group") self.check_xmlrpc_simple(status, { "status": _server.MySQLServer.SPARE, "io_not_running": True, "sql_not_running": True, }, rowcount=3, index=2)
def setup_group_replication(group_master_id, group_slave_id): """Sets up replication between the masters of the two groups and updates the references to the groups in each other. :param group_master_id: The group whose master will act as the master in the replication setup. :param group_slave_id: The group whose master will act as the slave in the replication setup. """ group_master = Group.fetch(group_master_id) group_slave = Group.fetch(group_slave_id) if group_master is None: raise _errors.GroupError \ (GROUP_REPLICATION_GROUP_NOT_FOUND_ERROR % (group_master_id, )) if group_slave is None: raise _errors.GroupError \ (GROUP_REPLICATION_GROUP_NOT_FOUND_ERROR % (group_slave_id, )) if group_master.master is None: raise _errors.GroupError \ (GROUP_REPLICATION_GROUP_MASTER_NOT_FOUND_ERROR % "") if group_slave.master is None: raise _errors.GroupError \ (GROUP_REPLICATION_GROUP_MASTER_NOT_FOUND_ERROR % "") #Master is the master of the Global Group. We replicate from here to #the masters of all the shard Groups. master = MySQLServer.fetch(group_master.master) if master is None: raise _errors.GroupError \ (GROUP_REPLICATION_GROUP_MASTER_NOT_FOUND_ERROR % \ (group_master.master, )) #Get the master of the shard Group. slave = MySQLServer.fetch(group_slave.master) if slave is None: raise _errors.GroupError \ (GROUP_REPLICATION_GROUP_MASTER_NOT_FOUND_ERROR % \ (group_slave.master, )) if not server_running(master): #The server is already down. We cannot connect to it to setup #replication. raise _errors.GroupError \ (GROUP_MASTER_NOT_RUNNING % (group_master.group_id, )) try: master.connect() except _errors.DatabaseError as error: #Server is not accessible, unable to connect to the server. raise _errors.GroupError(GROUP_REPLICATION_SERVER_ERROR % (group_slave.master, error)) if not server_running(slave): #The server is already down. We cannot connect to it to setup #replication. raise _errors.GroupError \ (GROUP_MASTER_NOT_RUNNING % (group_slave.group_id, )) try: slave.connect() except _errors.DatabaseError as error: raise _errors.GroupError(GROUP_REPLICATION_SERVER_ERROR % (group_master.master, error)) _replication.stop_slave(slave, wait=True) #clear references to old masters in the slave _replication.reset_slave(slave, clean=True) _replication.switch_master(slave, master, master.user, master.passwd) _replication.start_slave(slave, wait=True) try: group_master.add_slave_group_id(group_slave_id) group_slave.add_master_group_id(group_master_id) except _errors.DatabaseError: #If there is an error while adding a reference to #the slave group or a master group, it means that #the slave group was already added and the error #is happening because the group was already registered. #Ignore this error. pass
def _setup_move_sync(shard_id, source_group_id, destn_group_id, split_value, cmd): """Setup replication between the source and the destination groups and ensure that they are in sync. :param shard_id: The shard ID of the shard that needs to be moved. :param source_group_id: The group_id of the source shard. :param destn_group_id: The ID of the group to which the shard needs to be moved. :param split_value: Indicates the value at which the range for the particular shard will be split. Will be set only for shard split operations. :param cmd: Indicates the type of re-sharding operation """ source_group = Group.fetch(source_group_id) if source_group is None: raise _errors.ShardingError(_services_sharding.SHARD_GROUP_NOT_FOUND % (source_group_id, )) destination_group = Group.fetch(destn_group_id) if destination_group is None: raise _errors.ShardingError(_services_sharding.SHARD_GROUP_NOT_FOUND % (destination_group_id, )) master = MySQLServer.fetch(source_group.master) if master is None: raise _errors.ShardingError( _services_sharding.SHARD_GROUP_MASTER_NOT_FOUND) master.connect() slave = MySQLServer.fetch(destination_group.master) if slave is None: raise _errors.ShardingError( _services_sharding.SHARD_GROUP_MASTER_NOT_FOUND) slave.connect() #Stop and reset any slave that might be running on the slave server. _replication.stop_slave(slave, wait=True) _replication.reset_slave(slave, clean=True) #Change the master to the shard group master. _replication.switch_master(slave, master, master. user, master.passwd) #Start the slave so that syncing of the data begins _replication.start_slave(slave, wait=True) #Synchronize until the slave catches up with the master. _replication.synchronize_with_read_only(slave, master) #Reset replication once the syncing is done. _replication.stop_slave(slave, wait=True) _replication.reset_slave(slave, clean=True) #Trigger changing the mappings for the shard that was copied _events.trigger_within_procedure( SETUP_RESHARDING_SWITCH, shard_id, source_group_id, destn_group_id, split_value, cmd )
def test_check_no_healthy_slave(self): """Test promoting when there is no healthy slave. """ # Configure replication. instances = tests.utils.MySQLInstances() user = instances.user passwd = instances.passwd instances.configure_instances({0 : [{1 : []}, {2 : []}]}, user, passwd) master = instances.get_instance(0) slave_1 = instances.get_instance(1) slave_2 = instances.get_instance(2) self.proxy.group.create("group_id", "") self.proxy.group.add("group_id", master.address) self.proxy.group.add("group_id", slave_1.address) self.proxy.group.add("group_id", slave_2.address) # Promote a master. status = self.proxy.group.promote("group_id", str(master.uuid)) self.assertStatus(status, _executor.Job.SUCCESS) self.assertEqual(status[1][-1]["state"], _executor.Job.COMPLETE) self.assertEqual(status[1][-1]["description"], "Executed action (_change_to_candidate).") # Check replication. status = self.proxy.group.health("group_id") self.assertEqual(status[2][str(slave_1.uuid)]["threads"], {}) self.assertEqual(status[2][str(slave_1.uuid)]["status"], _server.MySQLServer.SECONDARY) self.assertEqual(status[2][str(slave_2.uuid)]["threads"], {}) self.assertEqual(status[2][str(slave_2.uuid)]["status"], _server.MySQLServer.SECONDARY) self.assertEqual(status[2][str(master.uuid)]["status"], _server.MySQLServer.PRIMARY) # Inject some events that make slaves break. slave_1.set_session_binlog(False) slave_1.exec_stmt("CREATE DATABASE IF NOT EXISTS test") slave_1.exec_stmt("USE test") slave_1.exec_stmt("DROP TABLE IF EXISTS test") slave_1.exec_stmt("CREATE TABLE test (id INTEGER)") slave_1.set_session_binlog(True) slave_2.set_session_binlog(False) slave_2.exec_stmt("CREATE DATABASE IF NOT EXISTS test") slave_2.exec_stmt("USE test") slave_2.exec_stmt("DROP TABLE IF EXISTS test") slave_2.exec_stmt("CREATE TABLE test (id INTEGER)") slave_2.set_session_binlog(True) master.exec_stmt("CREATE DATABASE IF NOT EXISTS test") master.exec_stmt("USE test") master.exec_stmt("SET sql_log_bin=0") master.exec_stmt("DROP TABLE IF EXISTS test") master.exec_stmt("SET sql_log_bin=1") master.exec_stmt("CREATE TABLE test (id INTEGER)") # Synchronize replicas. self.assertRaises(_errors.DatabaseError, _repl.sync_slave_with_master, slave_1, master, timeout=0) self.assertRaises(_errors.DatabaseError, _repl.sync_slave_with_master, slave_2, master, timeout=0) # Check replication. status = self.proxy.group.health("group_id") self.assertEqual(status[2][str(slave_1.uuid)]["threads"], {"sql_running": False, "sql_error": "Error 'Table 'test' " "already exists' on query. Default database: 'test'. Query: " "'CREATE TABLE test (id INTEGER)'"} ) self.assertEqual(status[2][str(slave_1.uuid)]["status"], _server.MySQLServer.SECONDARY) self.assertEqual(status[2][str(slave_2.uuid)]["threads"], {"sql_running": False, "sql_error": "Error 'Table 'test' " "already exists' on query. Default database: 'test'. Query: " "'CREATE TABLE test (id INTEGER)'"} ) self.assertEqual(status[2][str(slave_2.uuid)]["status"], _server.MySQLServer.SECONDARY) self.assertEqual(status[2][str(master.uuid)]["status"], _server.MySQLServer.PRIMARY) # Try to choose a new master through switch over. status = self.proxy.group.promote("group_id") self.assertStatus(status, _executor.Job.ERROR) self.assertEqual(status[1][-1]["state"], _executor.Job.COMPLETE) self.assertEqual(status[1][-1]["description"], "Tried to execute action (_find_candidate_switch).") # Try to reset the slave and restart slave. _repl.stop_slave(slave_1, wait=True) _repl.reset_slave(slave_1, clean=False) try: _repl.start_slave(slave_1, wait=True) except _errors.DatabaseError as error: self.assertEqual( str(error), "Error 'Table 'test' already exists' " "on query. Default database: 'test'. Query: 'CREATE " "TABLE test (id INTEGER)'" ) # Synchronize replica. self.assertRaises(_errors.DatabaseError, _repl.sync_slave_with_master, slave_1, master, timeout=0) # Check replication. status = self.proxy.group.health("group_id") self.assertTrue(status[2][str(slave_1.uuid)]["threads"] == {"sql_running": False, "sql_error": "Error 'Table 'test' " "already exists' on query. Default database: 'test'. Query: " "'CREATE TABLE test (id INTEGER)'"} ) self.assertEqual(status[2][str(slave_1.uuid)]["status"], _server.MySQLServer.SECONDARY) self.assertEqual(status[2][str(slave_2.uuid)]["threads"], {"sql_running": False, "sql_error": "Error 'Table 'test' " "already exists' on query. Default database: 'test'. Query: " "'CREATE TABLE test (id INTEGER)'"} ) self.assertEqual(status[2][str(slave_2.uuid)]["status"], _server.MySQLServer.SECONDARY) self.assertEqual(status[2][str(master.uuid)]["status"], _server.MySQLServer.PRIMARY) # Try to drop the table on the slave. _repl.stop_slave(slave_1, wait=True) _repl.reset_slave(slave_1, clean=False) slave_1.set_session_binlog(False) slave_1.exec_stmt("DROP TABLE IF EXISTS test") slave_1.set_session_binlog(True) _repl.start_slave(slave_1, wait=True) _repl.stop_slave(slave_2, wait=True) _repl.reset_slave(slave_2, clean=False) slave_2.set_session_binlog(False) slave_2.exec_stmt("DROP TABLE IF EXISTS test") slave_2.set_session_binlog(True) _repl.start_slave(slave_2, wait=True) # Synchronize replicas. _repl.sync_slave_with_master(slave_1, master, timeout=0) _repl.sync_slave_with_master(slave_2, master, timeout=0) # Check replication. status = self.proxy.group.health("group_id") self.assertEqual(status[2][str(slave_1.uuid)]["threads"], {}) self.assertEqual(status[2][str(slave_1.uuid)]["status"], _server.MySQLServer.SECONDARY) self.assertEqual(status[2][str(slave_2.uuid)]["threads"], {}) self.assertEqual(status[2][str(slave_2.uuid)]["status"], _server.MySQLServer.SECONDARY) self.assertEqual(status[2][str(master.uuid)]["status"], _server.MySQLServer.PRIMARY)