def implicit_reparent(self, keyspace, shard, num_shards): """Performs an implicit reparent. This function will call borg restart on the current master task and verify that a new task was selected to be the master. Args: keyspace: Name of the keyspace to reparent (string) shard: Numeric ID of the shard to reparent (zero based int) num_shards: Total number of shards (int) """ shard_name = sharding_utils.get_shard_name(shard, num_shards) original_master_name = self.env.get_current_master_name(keyspace, shard_name) original_master_cell = self.env.get_tablet_cell(original_master_name) master_task_num = self.env.get_tablet_task_number(original_master_name) logging.info( "Restarting %s/%s, current master: %s, task: %d", keyspace, shard_name, original_master_name, master_task_num, ) ret_val = self.env.restart_mysql_task( original_master_cell, keyspace, shard, master_task_num, "replica", "mysql-alloc", True ) self.assertEquals(ret_val, 0, msg="restartalloc failed (returned %d)" % ret_val) start_time = time.time() while time.time() - start_time < self.reparent_timeout_threshold: new_master_name = self.env.get_current_master_name(keyspace, shard_name) new_master_task_num = self.env.get_tablet_task_number(new_master_name) if new_master_name != original_master_name: break time.sleep(1) self.assertNotEquals( new_master_name, original_master_name, msg="Expected master tablet to change, but it remained as %s" % (new_master_name), ) logging.info( "restartalloc on %s/%s resulted in new master: %s, task: %d", keyspace, shard_name, new_master_name, new_master_task_num, )
def implicit_reparent(self, keyspace, shard, num_shards): """Performs an implicit reparent. This function will call borg restart on the current master task and verify that a new task was selected to be the master. Args: keyspace: Name of the keyspace to reparent (string) shard: Numeric ID of the shard to reparent (zero based int) num_shards: Total number of shards (int) """ shard_name = sharding_utils.get_shard_name(shard, num_shards) original_master_name = (self.env.get_current_master_name( keyspace, shard_name)) original_master_cell = self.env.get_tablet_cell(original_master_name) master_task_num = self.env.get_tablet_task_number(original_master_name) logging.info('Restarting %s/%s, current master: %s, task: %d', keyspace, shard_name, original_master_name, master_task_num) ret_val = self.env.restart_mysql_task(original_master_cell, keyspace, shard, master_task_num, 'replica', 'mysql-alloc', True) self.assertEquals(ret_val, 0, msg='restartalloc failed (returned %d)' % ret_val) start_time = time.time() while time.time() - start_time < self.reparent_timeout_threshold: new_master_name = self.env.get_current_master_name( keyspace, shard_name) new_master_task_num = self.env.get_tablet_task_number( new_master_name) if new_master_name != original_master_name: break time.sleep(1) self.assertNotEquals( new_master_name, original_master_name, msg='Expected master tablet to change, but it remained as %s' % (new_master_name)) logging.info( 'restartalloc on %s/%s resulted in new master: %s, task: %d', keyspace, shard_name, new_master_name, new_master_task_num)
def explicit_reparent(self, keyspace, num_shards, external=False, cross_cell=False): """Performs an explicit reparent. This function will explicitly select a new master and verify that the topology is updated. Args: keyspace: Name of the keyspace to reparent (string) num_shards: Total number of shards (int) external: Whether the reparent should be external or through vtctl (bool) cross_cell: Whether to reparent to a different cell (bool) Returns: How long we waited for the reparent. The time begins just before calling an explicit reparent. This is a list of floats, one for each shard. For cross-cell reparents, it returns []. """ next_masters = [] durations = [] for shard in xrange(num_shards): shard_name = sharding_utils.get_shard_name(shard, num_shards) original_master = self.env.get_current_master_name( keyspace, shard_name) next_master = self.env.get_next_master(keyspace, shard_name, cross_cell) next_masters.append(next_master) self.env.wait_for_good_failover_status(keyspace, shard_name) # Call Reparent in a separate thread. def reparent_shard(shard, shard_name, original_master, next_master): logging.info('Reparenting %s/%s from %s to %s', keyspace, shard_name, original_master, next_master[2]) if external: return_code, return_output = self.env.external_reparent( keyspace, next_master[0], shard, new_task_num=next_master[1]) else: return_code, return_output = self.env.internal_reparent( keyspace, shard_name, next_master[2]) logging.info('Reparent returned %d for %s/%s: %s', return_code, keyspace, shard_name, return_output) thread = threading.Thread( target=reparent_shard, args=[shard, shard_name, original_master, next_master]) start_time = time.time() thread.start() # Wait for the reparent. while time.time() - start_time < self.reparent_timeout_threshold: try: tablet_health = json.loads( self.env.vtctl_helper.execute_vtctl_command( ['VtTabletStreamHealth', next_master[2]])) if tablet_health['target'][ 'tablet_type'] == topodata_pb2.MASTER: duration = time.time() - start_time durations.append(duration) logging.info('Reparent took %f seconds', duration) break except (IndexError, KeyError, vtctl_helper.VtctlClientError): pass else: self.fail('Timed out waiting for reparent on %s/%s' % (keyspace, shard_name)) thread.join() return durations
def explicit_reparent(self, keyspace, num_shards, external=False, cross_cell=False): """Performs an explicit reparent. This function will explicitly select a new master and verify that the topology is updated. Args: keyspace: Name of the keyspace to reparent (string) num_shards: Total number of shards (int) external: Whether the reparent should be external or through vtctl (bool) cross_cell: Whether to reparent to a different cell (bool) Returns: How long we waited for the reparent. The time begins just before calling an explicit reparent. This is a list of floats, one for each shard. For cross-cell reparents, it returns []. """ next_masters = [] durations = [] for shard in xrange(num_shards): shard_name = sharding_utils.get_shard_name(shard, num_shards) original_master = self.env.get_current_master_name(keyspace, shard_name) next_master = self.env.get_next_master(keyspace, shard_name, cross_cell) next_masters.append(next_master) self.env.wait_for_good_failover_status(keyspace, shard_name) # Call Reparent in a separate thread. def reparent_shard(shard, shard_name, original_master, next_master): logging.info("Reparenting %s/%s from %s to %s", keyspace, shard_name, original_master, next_master[2]) if external: return_code, return_output = self.env.external_reparent( keyspace, next_master[0], shard, new_task_num=next_master[1] ) else: return_code, return_output = self.env.internal_reparent(keyspace, shard_name, next_master[2]) logging.info("Reparent returned %d for %s/%s: %s", return_code, keyspace, shard_name, return_output) thread = threading.Thread(target=reparent_shard, args=[shard, shard_name, original_master, next_master]) start_time = time.time() thread.start() # Wait for the reparent. while time.time() - start_time < self.reparent_timeout_threshold: try: tablet_health = json.loads( self.env.vtctl_helper.execute_vtctl_command(["VtTabletStreamHealth", next_master[2]]) ) if tablet_health["target"]["tablet_type"] == topodata_pb2.MASTER: duration = time.time() - start_time durations.append(duration) logging.info("Reparent took %f seconds", duration) break except (IndexError, KeyError, vtctl_helper.VtctlClientError): pass else: self.fail("Timed out waiting for reparent on %s/%s" % (keyspace, shard_name)) thread.join() return durations