def test_service_disabled(self): # perform some inserts, then change state to stop the invalidator self.perform_insert(500) inv_before = self.replica_stats()['Totals']['Invalidations'] invStats_before = self.replica_vars() utils.run_vtctl(['ChangeSlaveType', replica_tablet.tablet_alias, 'spare']) # wait until it's stopped timeout = 30 while True: invStats_after = self.replica_vars() if invStats_after['RowcacheInvalidatorState'] == 'Stopped': break timeout = utils.wait_step( 'RowcacheInvalidatorState, got %s expecting Stopped' % invStats_after['RowcacheInvalidatorState'], timeout, sleep_time=0.1) # check all data is right inv_after = self.replica_stats()['Totals']['Invalidations'] invStats_after = self.replica_vars() logging.debug( 'Tablet Replica->Spare\n\tBefore: Invalidations: %d InvalidatorStats ' '%s\n\tAfter: Invalidations: %d InvalidatorStats %s', inv_before, invStats_before['RowcacheInvalidatorPosition'], inv_after, invStats_after['RowcacheInvalidatorPosition']) self.assertEqual(inv_after, 0, 'Row-cache invalid. should be disabled, no invalidations') self.assertEqual(invStats_after['RowcacheInvalidatorState'], 'Stopped', 'Row-cache invalidator should be disabled') # and restore the type utils.run_vtctl( ['ChangeSlaveType', replica_tablet.tablet_alias, 'replica'])
def test_restart(self): """test_restart tests that when starting a second vttablet with the same configuration as another one, it will kill the previous process and take over listening on the socket. If vttablet listens to other ports (like gRPC), this feature will break. We believe it is not widely used, so we're OK with this for now. (container based installations usually handle tablet restarts by using a different set of servers, and do not rely on this feature at all). """ if environment.topo_server().flavor() != 'zookeeper': logging.info("Skipping this test in non-github tree") return if tablet_62344.grpc_enabled(): logging.info("Skipping this test as second gRPC port interferes") return utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) # create the database so vttablets start, as it is serving tablet_62344.create_db('vt_test_keyspace') tablet_62344.init_tablet('master', 'test_keyspace', '0') proc1 = tablet_62344.start_vttablet() proc2 = tablet_62344.start_vttablet() for timeout in xrange(20): logging.debug("Sleeping waiting for first process to die") time.sleep(1.0) proc1.poll() if proc1.returncode is not None: break if proc1.returncode is None: self.fail("proc1 still running") tablet_62344.kill_vttablet()
def test_stop_replication(self): utils.debug("===========test_stop_replication=========") utils.run_vtctl('ChangeSlaveType test_nj-0000062345 replica') time.sleep(10) perform_insert(100) master_position = utils.mysql_query(62344, 'vt_test_keyspace', 'show master status') #The sleep is needed here, so the invalidator can catch up and the number can be tested. replica_tablet.mquery('vt_test_keyspace', "select MASTER_POS_WAIT('%s', %d)" % (master_position[0][0], master_position[0][1]), 5) time.sleep(5) inv_count1 = framework.MultiDict(json.load(urllib2.urlopen("http://%s/debug/table_stats" % replica_host)))['Totals']['Invalidations'] replica_tablet.mquery('vt_test_keyspace', "stop slave") perform_insert(100) # EOF is returned after 30s, sleeping a bit more to ensure we catch the EOF # and can test replication stop effectively. time.sleep(35) replica_tablet.mquery('vt_test_keyspace', "start slave") master_position = utils.mysql_query(62344, 'vt_test_keyspace', 'show master status') #The sleep is needed here, so the invalidator can catch up and the number can be tested. replica_tablet.mquery('vt_test_keyspace', "select MASTER_POS_WAIT('%s', %d)" % (master_position[0][0], master_position[0][1]), 5) time.sleep(10) invalidatorStats = framework.MultiDict(json.load(urllib2.urlopen("http://%s/debug/vars" % replica_host)))['CacheInvalidationProcessor'] utils.debug("invalidatorStats %s" % invalidatorStats) inv_count2 = framework.MultiDict(json.load(urllib2.urlopen("http://%s/debug/table_stats" % replica_host)))['Totals']['Invalidations'] utils.debug("invalidator count1 %d count2 %d" % (inv_count1, inv_count2)) self.assertEqual(invalidatorStats["States"]["Current"], "Enabled", "Row-cache invalidator should be enabled") self.assertTrue(inv_count2 - inv_count1 > 0, "invalidator was able to restart after a small pause in replication")
def check_throttler_service_maxrates(self, throttler_server, names, rate): """Checks the vtctl ThrottlerMaxRates and ThrottlerSetRate commands.""" # Avoid flakes by waiting for all throttlers. (Necessary because filtered # replication on vttablet will register the throttler asynchronously.) timeout_s = 10 while True: stdout, _ = utils.run_vtctl(['ThrottlerMaxRates', '--server', throttler_server], auto_log=True, trap_output=True) if '%d active throttler(s)' % len(names) in stdout: break timeout_s = utils.wait_step('all throttlers registered', timeout_s) for name in names: self.assertIn('| %s | %d |' % (name, rate), stdout) self.assertIn('%d active throttler(s)' % len(names), stdout) # Check that it's possible to change the max rate on the throttler. new_rate = 'unlimited' stdout, _ = utils.run_vtctl(['ThrottlerSetMaxRate', '--server', throttler_server, new_rate], auto_log=True, trap_output=True) self.assertIn('%d active throttler(s)' % len(names), stdout) stdout, _ = utils.run_vtctl(['ThrottlerMaxRates', '--server', throttler_server], auto_log=True, trap_output=True) for name in names: self.assertIn('| %s | %s |' % (name, new_rate), stdout) self.assertIn('%d active throttler(s)' % len(names), stdout)
def setup_unsharded_keyspace(): utils.run_vtctl(['CreateKeyspace', UNSHARDED_KEYSPACE]) utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', UNSHARDED_KEYSPACE, 'keyspace_id', 'uint64']) unsharded_master.init_tablet('master', keyspace=UNSHARDED_KEYSPACE, shard='0') unsharded_replica.init_tablet('replica', keyspace=UNSHARDED_KEYSPACE, shard='0') unsharded_rdonly.init_tablet('rdonly', keyspace=UNSHARDED_KEYSPACE, shard='0') utils.run_vtctl(['RebuildKeyspaceGraph', UNSHARDED_KEYSPACE,], auto_log=True) for t in [unsharded_master, unsharded_replica, unsharded_rdonly]: t.create_db('vt_test_keyspace_unsharded') t.mquery(unsharded_master.dbname, create_vt_insert_test) t.start_vttablet(wait_for_state=None) for t in [unsharded_master, unsharded_replica, unsharded_rdonly]: t.wait_for_vttablet_state('SERVING') utils.run_vtctl(['ReparentShard', '-force', '%s/0' % UNSHARDED_KEYSPACE, unsharded_master.tablet_alias], auto_log=True) utils.run_vtctl(['RebuildKeyspaceGraph', UNSHARDED_KEYSPACE], auto_log=True) utils.check_srv_keyspace('test_nj', UNSHARDED_KEYSPACE, 'Partitions(master): -\n' + 'Partitions(rdonly): -\n' + 'Partitions(replica): -\n' + 'TabletTypes: master,rdonly,replica')
def _verify_vtctl_set_shard_tablet_control(self): """Test that manually editing the blacklisted tables works correctly. TODO(mberlin): This is more an integration test and should be moved to the Go codebase eventually. """ # check 'vtctl SetShardTabletControl' command works as expected: # clear the rdonly entry: utils.run_vtctl(['SetShardTabletControl', '--remove', 'source_keyspace/0', 'rdonly'], auto_log=True) self._assert_tablet_controls([topodata_pb2.MASTER, topodata_pb2.REPLICA]) # re-add rdonly: utils.run_vtctl(['SetShardTabletControl', '--tables=moving.*,view1', 'source_keyspace/0', 'rdonly'], auto_log=True) self._assert_tablet_controls([topodata_pb2.MASTER, topodata_pb2.REPLICA, topodata_pb2.RDONLY]) # and then clear all entries: utils.run_vtctl(['SetShardTabletControl', '--remove', 'source_keyspace/0', 'rdonly'], auto_log=True) utils.run_vtctl(['SetShardTabletControl', '--remove', 'source_keyspace/0', 'replica'], auto_log=True) utils.run_vtctl(['SetShardTabletControl', '--remove', 'source_keyspace/0', 'master'], auto_log=True) shard_json = utils.run_vtctl_json(['GetShard', 'source_keyspace/0']) self.assertNotIn('tablet_controls', shard_json)
def test_service_switch(self): """tests the service switch from disable -> enable -> disable""" self._test_service_disabled() self._test_service_enabled() # The above tests leaves the service in disabled state, hence enabling it. utils.run_vtctl(["ChangeSlaveType", replica_tablet.tablet_alias, "replica"]) utils.wait_for_tablet_type(replica_tablet.tablet_alias, tablet.Tablet.tablet_type_value["REPLICA"], 30)
def copy_schema_to_destination_shards(self): for keyspace_shard in ('test_keyspace/-80', 'test_keyspace/80-'): utils.run_vtctl(['CopySchemaShard', '--exclude_tables', 'unrelated', shard_rdonly1.tablet_alias, keyspace_shard], auto_log=True)
def _create_source_schema(self): create_table_template = '''create table %s( id bigint not null, msg varchar(64), primary key (id), index by_msg (msg) ) Engine=InnoDB''' create_view_template = 'create view %s(id, msg) as select id, msg from %s' for t in ['moving1', 'moving2', 'staying1', 'staying2']: utils.run_vtctl(['ApplySchema', '-sql=' + create_table_template % (t), 'source_keyspace'], auto_log=True) utils.run_vtctl(['ApplySchema', '-sql=' + create_view_template % ('view1', 'moving1'), 'source_keyspace'], auto_log=True) for t in [source_master, source_replica, source_rdonly1, source_rdonly2]: utils.run_vtctl(['ReloadSchema', t.tablet_alias]) # Add a table to the destination keyspace which should be ignored. utils.run_vtctl(['ApplySchema', '-sql=' + create_table_template % 'extra1', 'destination_keyspace'], auto_log=True) for t in [destination_master, destination_replica, destination_rdonly1, destination_rdonly2]: utils.run_vtctl(['ReloadSchema', t.tablet_alias])
def test_vtaction_dies_hard(self): utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) # create the database so vttablets start, as it is serving tablet_62344.create_db('vt_test_keyspace') tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True) # start a 'vtctl Sleep' command, don't wait for it action_path, _ = utils.run_vtctl(['-no-wait', 'Sleep', tablet_62344.tablet_alias, '60s'], trap_output=True) action_path = action_path.strip() # wait for the action to be 'Running', capture its pid timeout = 10 while True: an = utils.run_vtctl_json(['ReadTabletAction', action_path]) if an.get('State', None) == 'Running': pid = an['Pid'] logging.info("Action is running with pid %u, good", pid) break timeout = utils.wait_step('sleep action to run', timeout) # let's kill it hard, wait until it's gone for good os.kill(pid, signal.SIGKILL) try: os.waitpid(pid, 0) except OSError: # this means the process doesn't exist any more, we're good pass # Then let's make sure the next action cleans up properly and can execute. # If that doesn't work, this will time out and the test will fail. utils.run_vtctl(['Ping', tablet_62344.tablet_alias]) tablet_62344.kill_vttablet()
def test_vtgate_qps(self): # create the topology utils.run_vtctl('CreateKeyspace test_keyspace') t = tablet.Tablet(tablet_uid=1, cell="nj") t.init_tablet("master", "test_keyspace", "0") t.update_addrs() utils.run_vtctl('RebuildKeyspaceGraph test_keyspace', auto_log=True) # start vtgate and the qps-er vtgate_proc, vtgate_port = utils.vtgate_start( extra_args=['-cpu_profile', os.path.join(environment.tmproot, 'vtgate.pprof')]) qpser = utils.run_bg(environment.binary_args('zkclient2') + [ '-server', 'localhost:%u' % vtgate_port, '-mode', 'qps', '-zkclient_cpu_profile', os.path.join(environment.tmproot, 'zkclient2.pprof'), 'test_nj', 'test_keyspace']) qpser.wait() # get the vtgate vars, make sure we have what we need v = utils.get_vars(vtgate_port) # some checks on performance / stats rpcCalls = v['TopoReaderRpcQueryCount']['test_nj'] if rpcCalls < MIN_QPS * 10: self.fail('QPS is too low: %u < %u' % (rpcCalls / 10, MIN_QPS)) else: logging.debug("Recorded qps: %u", rpcCalls / 10) utils.vtgate_kill(vtgate_proc)
def test_sigterm(self): utils.run_vtctl('CreateKeyspace test_keyspace') # create the database so vttablets start, as it is serving tablet_62344.create_db('vt_test_keyspace') tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True) # start a 'vtctl Sleep' command in the background args = [environment.binary_path('vtctl'), '-log_dir', environment.vtlogroot, '--alsologtostderr'] args.extend(environment.topo_server_flags()) args.extend(environment.tablet_manager_protocol_flags()) args.extend(['Sleep', tablet_62344.tablet_alias, '60s']) sp = utils.run_bg(args, stdout=PIPE, stderr=PIPE) # wait for it to start, and let's kill it time.sleep(4.0) utils.run(['pkill', 'vtaction']) out, err = sp.communicate() # check the vtctl command got the right remote error back if "vtaction interrupted by signal" not in err: self.fail("cannot find expected output in error: " + err) logging.debug("vtaction was interrupted correctly:\n" + err) tablet_62344.kill_vttablet()
def test_sigterm(self): utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) # create the database so vttablets start, as it is serving tablet_62344.create_db('vt_test_keyspace') tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True) # start a 'vtctl Sleep' command, don't wait for it action_path, _ = utils.run_vtctl(['-no-wait', 'Sleep', tablet_62344.tablet_alias, '60s'], trap_output=True) action_path = action_path.strip() # wait for the action to be 'Running', capture its pid timeout = 10 while True: an = utils.run_vtctl_json(['ReadTabletAction', action_path]) if an.get('State', None) == 'Running': pid = an['Pid'] logging.info("Action is running with pid %u, good", pid) break timeout = utils.wait_step('sleep action to run', timeout) # let's kill the vtaction process with a regular SIGTERM os.kill(pid, signal.SIGTERM) # check the vtctl command got the right remote error back out, err = utils.run_vtctl(['WaitForAction', action_path], trap_output=True, raise_on_error=False) if "vtaction interrupted by signal" not in err: self.fail("cannot find expected output in error: " + err) logging.debug("vtaction was interrupted correctly:\n" + err) tablet_62344.kill_vttablet()
def _test_vtctl_copyschemashard(self, source): # Apply initial schema to the whole keyspace before creating shard 2. self._apply_initial_schema() _setup_shard_2() try: # InitShardMaster creates the db, but there shouldn't be any tables yet. self._check_tables(shard_2_master, 0) self._check_tables(shard_2_replica1, 0) # Run the command twice to make sure it's idempotent. for _ in range(2): utils.run_vtctl(['CopySchemaShard', source, 'test_keyspace/2'], auto_log=True) # shard_2_master should look the same as the replica we copied from self._check_tables(shard_2_master, 4) utils.wait_for_replication_pos(shard_2_master, shard_2_replica1) self._check_tables(shard_2_replica1, 4) shard_0_schema = self._get_schema(shard_0_master.tablet_alias) shard_2_schema = self._get_schema(shard_2_master.tablet_alias) self.assertEqual(shard_0_schema, shard_2_schema) finally: _teardown_shard_2()
def check_stream_health_equals_binlog_player_vars(self, tablet_obj, count): """Checks the variables exported by streaming health check match vars. Args: tablet_obj: the tablet to check. count: number of binlog players to expect. """ blp_stats = utils.get_vars(tablet_obj.port) self.assertEqual(blp_stats['BinlogPlayerMapSize'], count) # Enforce health check because it's not running by default as # tablets may not be started with it, or may not run it in time. utils.run_vtctl(['RunHealthCheck', tablet_obj.tablet_alias]) stream_health = utils.run_vtctl_json(['VtTabletStreamHealth', '-count', '1', tablet_obj.tablet_alias]) logging.debug('Got health: %s', str(stream_health)) self.assertNotIn('serving', stream_health) self.assertIn('realtime_stats', stream_health) self.assertNotIn('health_error', stream_health['realtime_stats']) self.assertIn('binlog_players_count', stream_health['realtime_stats']) self.assertEqual(blp_stats['BinlogPlayerMapSize'], stream_health['realtime_stats']['binlog_players_count']) self.assertEqual(blp_stats['BinlogPlayerSecondsBehindMaster'], stream_health['realtime_stats'].get( 'seconds_behind_master_filtered_replication', 0))
def test_multisnapshot_mysqlctl(): populate = sum([[ "insert into vt_insert_test_%s (msg) values ('test %s')" % (i, x) for x in xrange(4)] for i in range(6)], []) create = ['''create table vt_insert_test_%s ( id bigint auto_increment, msg varchar(64), primary key (id) ) Engine=InnoDB''' % i for i in range(6)] utils.zk_wipe() # Start up a master mysql and vttablet utils.run_vtctl('CreateKeyspace -force test_keyspace') tablet_62344.init_tablet('master', 'test_keyspace', '0') utils.run_vtctl('RebuildShardGraph test_keyspace/0') utils.validate_topology() tablet_62344.populate('vt_test_keyspace', create, populate) tablet_62344.start_vttablet() err = tablet_62344.mysqlctl('-port %u -mysql-port %u multisnapshot --tables=vt_insert_test_1,vt_insert_test_2,vt_insert_test_3 --spec=-0000000000000003- vt_test_keyspace id' % (tablet_62344.port, tablet_62344.mysql_port)).wait() if err != 0: raise utils.TestError('mysqlctl multisnapshot failed') if os.path.exists(os.path.join(utils.vtdataroot, 'snapshot/vt_0000062344/data/vt_test_keyspace-,0000000000000003/vt_insert_test_4.csv.gz')): raise utils.TestError("Table vt_insert_test_4 wasn't supposed to be dumped.") for kr in 'vt_test_keyspace-,0000000000000003', 'vt_test_keyspace-0000000000000003,': path = os.path.join(utils.vtdataroot, 'snapshot/vt_0000062344/data/', kr, 'vt_insert_test_1.0.csv.gz') with gzip.open(path) as f: if len(f.readlines()) != 2: raise utils.TestError("Data looks wrong in %s" % path)
def start_tablets(): global __tablets # start tablets for t in __tablets: t.start_vttablet(wait_for_state=None) # wait for them to come in serving state for t in __tablets: t.wait_for_vttablet_state('SERVING') # InitShardMaster for master tablets for t in __tablets: if t.tablet_type == 'master': utils.run_vtctl(['InitShardMaster', t.keyspace+'/'+t.shard, t.tablet_alias], auto_log=True) for ks in topo_schema.keyspaces: ks_name = ks[0] ks_type = ks[1] utils.run_vtctl(['RebuildKeyspaceGraph', ks_name], auto_log=True) if ks_type == shard_constants.RANGE_SHARDED: utils.check_srv_keyspace('test_nj', ks_name, 'Partitions(master): -80 80-\n' 'Partitions(rdonly): -80 80-\n' 'Partitions(replica): -80 80-\n')
def test_actions_and_timeouts(self): # Start up a master mysql and vttablet utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) tablet_62344.init_tablet('master', 'test_keyspace', '0') utils.validate_topology() tablet_62344.create_db('vt_test_keyspace') tablet_62344.start_vttablet() utils.run_vtctl(['Ping', tablet_62344.tablet_alias]) # schedule long action in the background, sleep a little bit to make sure # it started to run args = (environment.binary_args('vtctl') + environment.topo_server().flags() + ['-tablet_manager_protocol', protocols_flavor().tablet_manager_protocol(), '-tablet_protocol', protocols_flavor().tabletconn_protocol(), '-log_dir', environment.vtlogroot, 'Sleep', tablet_62344.tablet_alias, '10s']) bg = utils.run_bg(args) time.sleep(3) # try a frontend RefreshState that should timeout as the tablet is busy # running the other one _, stderr = utils.run_vtctl( ['-wait-time', '3s', 'RefreshState', tablet_62344.tablet_alias], expect_fail=True) self.assertIn(protocols_flavor().rpc_timeout_message(), stderr) # wait for the background vtctl bg.wait() tablet_62344.kill_vttablet()
def run_test_sigterm(): utils.zk_wipe() utils.run_vtctl('CreateKeyspace -force test_keyspace') # create the database so vttablets start, as it is serving tablet_62344.create_db('vt_test_keyspace') tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True) # start a 'vtctl Sleep' command in the background sp = utils.run_bg(utils.vtroot+'/bin/vtctl -logfile=/dev/null Sleep %s 60s' % tablet_62344.tablet_alias, stdout=PIPE, stderr=PIPE) # wait for it to start, and let's kill it time.sleep(2.0) utils.run(['pkill', 'vtaction']) out, err = sp.communicate() # check the vtctl command got the right remote error back if "vtaction interrupted by signal" not in err: raise utils.TestError("cannot find expected output in error:", err) utils.debug("vtaction was interrupted correctly:\n" + err) tablet_62344.kill_vttablet()
def set_up(self): try: environment.topo_server_setup() utils.wait_procs([t.init_mysql() for t in self.tablets]) utils.run_vtctl(['CreateKeyspace', self.keyspace]) utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', self.keyspace, 'keyspace_id', 'uint64']) for t in self.tablets: t.init_tablet(t.type, keyspace=self.keyspace, shard=t.shard) utils.run_vtctl(['RebuildKeyspaceGraph', self.keyspace], auto_log=True) for t in self.tablets: t.create_db('vt_' + self.keyspace) t.mquery(t.dbname, create_table) t.start_vttablet(wait_for_state=None) for t in self.tablets: t.wait_for_vttablet_state('SERVING') for t in self.tablets: if t.type == "master": utils.run_vtctl(['ReparentShard', '-force', self.keyspace+'/'+t.shard, t.tablet_alias], auto_log=True) utils.run_vtctl(['RebuildKeyspaceGraph', self.keyspace], auto_log=True) self.vtgate_server, self.vtgate_port = utils.vtgate_start() vtgate_client = zkocc.ZkOccConnection("localhost:%u" % self.vtgate_port, "test_nj", 30.0) topology.read_topology(vtgate_client) except: self.shutdown() raise
def _test_reparent_from_outside_check(self, brutal, base_time): # make sure the shard replication graph is fine shard_replication = utils.run_vtctl_json(['GetShardReplication', 'test_nj', 'test_keyspace/0']) hashed_nodes = {} for node in shard_replication['nodes']: key = node['tablet_alias']['cell']+'-'+str(node['tablet_alias']['uid']) hashed_nodes[key] = True logging.debug('Got shard replication nodes: %s', str(hashed_nodes)) expected_nodes = { 'test_nj-41983': True, 'test_nj-62044': True, } if not brutal: expected_nodes['test_nj-62344'] = True self.assertEqual(expected_nodes, hashed_nodes, 'Got unexpected nodes: %s != %s' % (str(expected_nodes), str(hashed_nodes))) # make sure the master status page says it's the master tablet_62044_master_status = tablet_62044.get_status() self.assertIn('Serving graph: test_keyspace 0 master', tablet_62044_master_status) # make sure the master health stream says it's the master too # (health check is disabled on these servers, force it first) utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias, 'replica']) health = utils.run_vtctl_json(['VtTabletStreamHealth', '-count', '1', tablet_62044.tablet_alias]) self.assertEqual(health['target']['tablet_type'], topodata_pb2.MASTER) # have to compare the int version, or the rounding errors can break self.assertTrue( health['tablet_externally_reparented_timestamp'] >= int(base_time))
def setup_tablets(): # Start up a master mysql and vttablet logging.debug("Setting up tablets") utils.run_vtctl('CreateKeyspace test_keyspace') master_tablet.init_tablet('master', 'test_keyspace', '0') replica_tablet.init_tablet('replica', 'test_keyspace', '0') utils.run_vtctl('RebuildShardGraph test_keyspace/0') utils.validate_topology() master_tablet.create_db('vt_test_keyspace') replica_tablet.create_db('vt_test_keyspace') utils.run_vtctl('RebuildKeyspaceGraph test_keyspace') zkocc_server = utils.zkocc_start() master_tablet.start_vttablet() replica_tablet.start_vttablet() utils.run_vtctl('SetReadWrite ' + master_tablet.tablet_alias) utils.check_db_read_write(62344) for t in [master_tablet, replica_tablet]: t.reset_replication() utils.run_vtctl('ReparentShard -force test_keyspace/0 ' + master_tablet.tablet_alias, auto_log=True) # reset counter so tests don't assert tablet.Tablet.tablets_running = 0 setup_schema() master_tablet.vquery("set vt_schema_reload_time=86400", path="test_keyspace/0") replica_tablet.vquery("set vt_schema_reload_time=86400", path="test_keyspace/0")
def init_tablet(self, tablet_type, keyspace, shard, start=False, dbname=None, parent=True, wait_for_start=True, include_mysql_port=True, **kwargs): self.tablet_type = tablet_type self.keyspace = keyspace self.shard = shard self.dbname = dbname or ('vt_' + self.keyspace) args = ['InitTablet', '-hostname', 'localhost', '-port', str(self.port)] if include_mysql_port: args.extend(['-mysql_port', str(self.mysql_port)]) if parent: args.append('-parent') if dbname: args.extend(['-db_name_override', dbname]) if keyspace: args.extend(['-keyspace', keyspace]) if shard: args.extend(['-shard', shard]) args.extend([self.tablet_alias, tablet_type]) utils.run_vtctl(args) if start: if not wait_for_start: expected_state = None elif (tablet_type == 'master' or tablet_type == 'replica' or tablet_type == 'rdonly' or tablet_type == 'batch'): expected_state = 'SERVING' else: expected_state = 'NOT_SERVING' self.start_vttablet(wait_for_state=expected_state, **kwargs)
def test_vtgate_qps(self): # create the topology utils.run_vtctl('CreateKeyspace test_keyspace') t = tablet.Tablet(tablet_uid=1, cell="nj") t.init_tablet("master", "test_keyspace", "0") t.update_addrs() utils.run_vtctl('RebuildKeyspaceGraph test_keyspace', auto_log=True) # start vtgate and the qps-er vtgate_proc, vtgate_port = utils.vtgate_start() qpser = utils.run_bg(environment.binary_path('zkclient2')+' -server localhost:%u -mode qps2 test_nj test_keyspace' % vtgate_port) time.sleep(10) utils.kill_sub_process(qpser) # get the vtgate vars, make sure we have what we need v = utils.get_vars(vtgate_port) # some checks on performance / stats # a typical workstation will do 38-40k QPS, check we have more than 10k rpcCalls = v['TopoReaderRpcQueryCount']['test_nj'] if rpcCalls < 100000: self.fail('QPS is too low: %u < 10000' % (rpcCalls / 10)) else: logging.debug("Recorded qps: %u", rpcCalls / 10) utils.vtgate_kill(vtgate_proc)
def _check_query_service(self, tablet, serving, tablet_control_disabled): """_check_query_service will check that the query service is enabled or disabled on the tablet. It will also check if the tablet control status is the reason for being enabled / disabled. It will also run a remote RunHealthCheck to be sure it doesn't change the serving state. """ tablet_vars = utils.get_vars(tablet.port) if serving: expected_state = 'SERVING' else: expected_state = 'NOT_SERVING' self.assertEqual(tablet_vars['TabletStateName'], expected_state, 'tablet %s is not in the right serving state: got %s expected %s' % (tablet.tablet_alias, tablet_vars['TabletStateName'], expected_state)) status = tablet.get_status() if tablet_control_disabled: self.assertIn("Query Service disabled by TabletControl", status) else: self.assertNotIn("Query Service disabled by TabletControl", status) if tablet.tablet_type == 'rdonly': utils.run_vtctl(['RunHealthCheck', tablet.tablet_alias, 'rdonly'], auto_log=True) tablet_vars = utils.get_vars(tablet.port) if serving: expected_state = 'SERVING' else: expected_state = 'NOT_SERVING' self.assertEqual(tablet_vars['TabletStateName'], expected_state, 'tablet %s is not in the right serving state after health check: got %s expected %s' % (tablet.tablet_alias, tablet_vars['TabletStateName'], expected_state))
def setUpClass(klass): utils.run_vtctl('CreateKeyspace test_keyspace') shard_0_master.init_tablet( 'master', 'test_keyspace', '-80') shard_0_replica.init_tablet('replica', 'test_keyspace', '-80') shard_0_spare.init_tablet('spare', 'test_keyspace', '-80') shard_1_master.init_tablet( 'master', 'test_keyspace', '80-') shard_1_replica.init_tablet('replica', 'test_keyspace', '80-') idle.init_tablet('idle') scrap.init_tablet('idle') utils.run_vtctl('RebuildShardGraph /zk/global/vt/keyspaces/test_keyspace/shards/*', auto_log=True) utils.run_vtctl('RebuildKeyspaceGraph /zk/global/vt/keyspaces/*', auto_log=True) for t in assigned: t.create_db('vt_test_keyspace') t.start_vttablet() for t in scrap, idle, shard_0_spare: t.start_vttablet(wait_for_state='NOT_SERVING') scrap.scrap() utils.run_vtctl('ReparentShard -force test_keyspace/-80 ' + shard_0_master.tablet_alias, auto_log=True) utils.run_vtctl('ReparentShard -force test_keyspace/80- ' + shard_1_master.tablet_alias, auto_log=True) # run checks now before we start the tablets utils.validate_topology()
def test_service_disabled(self): # perform some inserts, then change state to stop the invalidator self.perform_insert(500) inv_before = self.replica_stats()['Totals']['Invalidations'] invStats_before = self.replica_vars() utils.run_vtctl(['ChangeSlaveType', replica_tablet.tablet_alias, 'spare']) # wait until it's stopped for timeout in xrange(300): time.sleep(0.1) invStats_after = self.replica_vars() logging.debug("Got state %s" % invStats_after["RowcacheInvalidationState"]) if invStats_after["RowcacheInvalidationState"] == "Disabled": break # check all data is right inv_after = self.replica_stats()['Totals']['Invalidations'] invStats_after = self.replica_vars() logging.debug("Tablet Replica->Spare\n\tBefore: Invalidations: %d InvalidatorStats %s\n\tAfter: Invalidations: %d InvalidatorStats %s" % (inv_before, invStats_before['RowcacheInvalidationCheckPoint'], inv_after, invStats_after['RowcacheInvalidationCheckPoint'])) self.assertEqual(inv_after, 0, "Row-cache invalid. should be disabled, no invalidations") self.assertEqual(invStats_after["RowcacheInvalidationState"], "Disabled", "Row-cache invalidator should be disabled") # and restore the type utils.run_vtctl(['ChangeSlaveType', replica_tablet.tablet_alias, 'replica'])
def setUpClass(klass): utils.run_vtctl("CreateKeyspace test_keyspace") shard_0_master.init_tablet("master", "test_keyspace", "-80") shard_0_replica.init_tablet("replica", "test_keyspace", "-80") shard_0_spare.init_tablet("spare", "test_keyspace", "-80") shard_1_master.init_tablet("master", "test_keyspace", "80-") shard_1_replica.init_tablet("replica", "test_keyspace", "80-") idle.init_tablet("idle") scrap.init_tablet("idle") utils.run_vtctl("RebuildShardGraph /zk/global/vt/keyspaces/test_keyspace/shards/*", auto_log=True) utils.run_vtctl("RebuildKeyspaceGraph /zk/global/vt/keyspaces/*", auto_log=True) for t in assigned: t.create_db("vt_test_keyspace") t.start_vttablet() for t in scrap, idle, shard_0_spare: t.start_vttablet(wait_for_state="NOT_SERVING") scrap.scrap() for t in [shard_0_master, shard_0_replica, shard_0_spare, shard_1_master, shard_1_replica, idle, scrap]: t.reset_replication() utils.run_vtctl("ReparentShard -force test_keyspace/-80 " + shard_0_master.tablet_alias, auto_log=True) utils.run_vtctl("ReparentShard -force test_keyspace/80- " + shard_1_master.tablet_alias, auto_log=True) # run checks now before we start the tablets utils.validate_topology()
def init_tablet(self, tablet_type, keyspace=None, shard=None, force=True, start=False, dbname=None, parent=True, wait_for_start=True, **kwargs): self.keyspace = keyspace self.shard = shard if dbname is None: self.dbname = "vt_" + (self.keyspace or "database") else: self.dbname = dbname args = ['InitTablet', '-hostname', 'localhost', '-port', str(self.port), '-mysql_port', str(self.mysql_port), ] if force: args.append('-force') if parent: args.append('-parent') if dbname: args.extend(['-db-name-override', dbname]) if keyspace: args.extend(['-keyspace', keyspace]) if shard: args.extend(['-shard', shard]) args.extend([self.tablet_alias, tablet_type]) utils.run_vtctl(args) if start: if not wait_for_start: expected_state = None elif tablet_type == 'master' or tablet_type == 'replica' or tablet_type == 'rdonly' or tablet_type == 'batch': expected_state = "SERVING" else: expected_state = "NOT_SERVING" self.start_vttablet(wait_for_state=expected_state, **kwargs)
def test_topo_read_threshold(self): before_topo_rtt = get_topo_rtt() # Check original state. keyspace_obj = topology.get_keyspace("test_keyspace") self.assertNotEqual(keyspace_obj, None, "test_keyspace should be not None") self.assertEqual( keyspace_obj.sharding_col_type, keyrange_constants.KIT_UINT64, "ShardingColumnType be %s" % keyrange_constants.KIT_UINT64, ) # Change the keyspace object. utils.run_vtctl( ["SetKeyspaceShardingInfo", "-force", "test_keyspace", "keyspace_id", keyrange_constants.KIT_BYTES] ) utils.run_vtctl(["RebuildKeyspaceGraph", "test_keyspace"], auto_log=True) # sleep throttle interval and check values again. # the keyspace should have changed and also caused a rtt to topo server. time.sleep(self.keyspace_fetch_throttle) topology.refresh_keyspace(self.vtgate_client, "test_keyspace") keyspace_obj = topology.get_keyspace("test_keyspace") after_1st_clear = get_topo_rtt() self.assertEqual(after_1st_clear - before_topo_rtt, 1, "One additional round-trips to topo server") self.assertEqual( keyspace_obj.sharding_col_type, keyrange_constants.KIT_BYTES, "ShardingColumnType be %s" % keyrange_constants.KIT_BYTES, ) # Refresh without sleeping for throttle time shouldn't cause additional rtt. topology.refresh_keyspace(self.vtgate_client, "test_keyspace") keyspace_obj = topology.get_keyspace("test_keyspace") after_2nd_clear = get_topo_rtt() self.assertEqual(after_2nd_clear - after_1st_clear, 0, "No additional round-trips to topo server")
def test_delete_keyspace(self): utils.run_vtctl(['CreateKeyspace', 'test_delete_keyspace']) utils.run_vtctl(['CreateShard', 'test_delete_keyspace/0']) utils.run_vtctl([ 'InitTablet', '-keyspace=test_delete_keyspace', '-shard=0', 'test_nj-0000000100', 'master' ]) # Can't delete keyspace if there are shards present. utils.run_vtctl(['DeleteKeyspace', 'test_delete_keyspace'], expect_fail=True) # Can't delete shard if there are tablets present. utils.run_vtctl( ['DeleteShard', '-even_if_serving', 'test_delete_keyspace/0'], expect_fail=True) # Use recursive DeleteShard to remove tablets. utils.run_vtctl([ 'DeleteShard', '-even_if_serving', '-recursive', 'test_delete_keyspace/0' ]) # Now non-recursive DeleteKeyspace should work. utils.run_vtctl(['DeleteKeyspace', 'test_delete_keyspace']) # Start over and this time use recursive DeleteKeyspace to do everything. utils.run_vtctl(['CreateKeyspace', 'test_delete_keyspace']) utils.run_vtctl(['CreateShard', 'test_delete_keyspace/0']) utils.run_vtctl([ 'InitTablet', '-port=1234', '-keyspace=test_delete_keyspace', '-shard=0', 'test_nj-0000000100', 'master' ]) # Create the serving/replication entries and check that they exist, # so we can later check they're deleted. utils.run_vtctl(['RebuildKeyspaceGraph', 'test_delete_keyspace']) utils.run_vtctl( ['GetShardReplication', 'test_nj', 'test_delete_keyspace/0']) utils.run_vtctl(['GetSrvKeyspace', 'test_nj', 'test_delete_keyspace']) # Recursive DeleteKeyspace utils.run_vtctl( ['DeleteKeyspace', '-recursive', 'test_delete_keyspace']) # Check that everything is gone. utils.run_vtctl(['GetKeyspace', 'test_delete_keyspace'], expect_fail=True) utils.run_vtctl(['GetShard', 'test_delete_keyspace/0'], expect_fail=True) utils.run_vtctl(['GetTablet', 'test_nj-0000000100'], expect_fail=True) utils.run_vtctl( ['GetShardReplication', 'test_nj', 'test_delete_keyspace/0'], expect_fail=True) utils.run_vtctl(['GetSrvKeyspace', 'test_nj', 'test_delete_keyspace'], expect_fail=True)
def test_health_check_drained_state_does_not_shutdown_query_service(self): # This test is similar to test_health_check, but has the following # differences: # - the second tablet is an 'rdonly' and not a 'replica' # - the second tablet will be set to 'drained' and we expect that # the query service won't be shutdown # Setup master and rdonly tablets. tablet_62344.init_tablet('replica', 'test_keyspace', '0') for t in tablet_62344, tablet_62044: t.create_db('vt_test_keyspace') # Note we only have a master and a rdonly. So we can't enable # semi-sync in this case, as the rdonly slaves don't semi-sync ack. tablet_62344.start_vttablet(wait_for_state=None, enable_semi_sync=False) tablet_62044.start_vttablet(wait_for_state=None, init_tablet_type='rdonly', init_keyspace='test_keyspace', init_shard='0', enable_semi_sync=False) tablet_62344.wait_for_vttablet_state('NOT_SERVING') tablet_62044.wait_for_vttablet_state('NOT_SERVING') self.check_healthz(tablet_62044, False) # Enable replication. utils.run_vtctl([ 'InitShardMaster', '-force', 'test_keyspace/0', tablet_62344.tablet_alias ]) # Trigger healthcheck to save time waiting for the next interval. utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias]) tablet_62044.wait_for_vttablet_state('SERVING') self.check_healthz(tablet_62044, True) # Change from rdonly to drained and stop replication. (These # actions are similar to the SplitClone vtworker command # implementation.) The tablet will stay healthy, and the # query service is still running. utils.run_vtctl( ['ChangeSlaveType', tablet_62044.tablet_alias, 'drained']) # Trying to drain the same tablet again, should error try: utils.run_vtctl( ['ChangeSlaveType', tablet_62044.tablet_alias, 'drained']) except Exception as e: s = str(e) self.assertIn("already drained", s) utils.run_vtctl(['StopSlave', tablet_62044.tablet_alias]) # Trigger healthcheck explicitly to avoid waiting for the next interval. utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias]) utils.wait_for_tablet_type(tablet_62044.tablet_alias, 'drained') self.check_healthz(tablet_62044, True) # Query service is still running. tablet_62044.wait_for_vttablet_state('SERVING') # Restart replication. Tablet will become healthy again. utils.run_vtctl( ['ChangeSlaveType', tablet_62044.tablet_alias, 'rdonly']) utils.run_vtctl(['StartSlave', tablet_62044.tablet_alias]) utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias]) self.check_healthz(tablet_62044, True) # kill the tablets tablet.kill_tablets([tablet_62344, tablet_62044])
def test_health_check(self): # one master, one replica that starts not initialized # (for the replica, we let vttablet do the InitTablet) tablet_62344.init_tablet('replica', 'test_keyspace', '0') for t in tablet_62344, tablet_62044: t.create_db('vt_test_keyspace') tablet_62344.start_vttablet(wait_for_state=None) tablet_62044.start_vttablet(wait_for_state=None, lameduck_period='5s', init_tablet_type='replica', init_keyspace='test_keyspace', init_shard='0') tablet_62344.wait_for_vttablet_state('NOT_SERVING') tablet_62044.wait_for_vttablet_state('NOT_SERVING') self.check_healthz(tablet_62044, False) utils.run_vtctl([ 'InitShardMaster', '-force', 'test_keyspace/0', tablet_62344.tablet_alias ]) # make sure the unhealthy slave goes to healthy tablet_62044.wait_for_vttablet_state('SERVING') utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias]) self.check_healthz(tablet_62044, True) # make sure the master is still master ti = utils.run_vtctl_json(['GetTablet', tablet_62344.tablet_alias]) self.assertEqual(ti['type'], topodata_pb2.MASTER, 'unexpected master type: %s' % ti['type']) # stop replication at the mysql level. tablet_62044.mquery('', 'stop slave') # vttablet replication_reporter should restart it. utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias]) # insert something on the master and wait for it on the slave. tablet_62344.mquery('vt_test_keyspace', [ 'create table repl_test_table (id int)', 'insert into repl_test_table values (123)' ], write=True) timeout = 10.0 while True: try: result = tablet_62044.mquery('vt_test_keyspace', 'select * from repl_test_table') if result: self.assertEqual(result[0][0], 123L) break except MySQLdb.ProgrammingError: # Maybe the create table hasn't gone trough yet, we wait more logging.exception( 'got this exception waiting for data, ignoring it') timeout = utils.wait_step( 'slave replication repaired by replication_reporter', timeout) # stop replication, make sure we don't go unhealthy. # (we have a baseline as well, so the time should be good). utils.run_vtctl(['StopSlave', tablet_62044.tablet_alias]) utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias]) self.check_healthz(tablet_62044, True) # make sure status web page is healthy self.assertRegexpMatches(tablet_62044.get_status(), healthy_expr) # make sure the health stream is updated health = utils.run_vtctl_json( ['VtTabletStreamHealth', '-count', '1', tablet_62044.tablet_alias]) self.assertTrue( ('seconds_behind_master' not in health['realtime_stats']) or (health['realtime_stats']['seconds_behind_master'] < 30), 'got unexpected health: %s' % str(health)) self.assertIn('serving', health) # then restart replication, make sure we stay healthy utils.run_vtctl(['StartSlave', tablet_62044.tablet_alias]) utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias]) # make sure status web page is healthy self.assertRegexpMatches(tablet_62044.get_status(), healthy_expr) # now test VtTabletStreamHealth returns the right thing stdout, _ = utils.run_vtctl( ['VtTabletStreamHealth', '-count', '2', tablet_62044.tablet_alias], trap_output=True, auto_log=True) lines = stdout.splitlines() self.assertEqual(len(lines), 2) for line in lines: logging.debug('Got health: %s', line) data = json.loads(line) self.assertIn('realtime_stats', data) self.assertIn('serving', data) self.assertTrue(data['serving']) self.assertNotIn('health_error', data['realtime_stats']) self.assertNotIn('tablet_externally_reparented_timestamp', data) self.assertEqual('test_keyspace', data['target']['keyspace']) self.assertEqual('0', data['target']['shard']) self.assertEqual(topodata_pb2.REPLICA, data['target']['tablet_type']) # Test that VtTabletStreamHealth reports a QPS >0.0. # Therefore, issue several reads first. # NOTE: This may be potentially flaky because we'll observe a QPS >0.0 # exactly "once" for the duration of one sampling interval (5s) and # after that we'll see 0.0 QPS rates again. If this becomes actually # flaky, we need to read continuously in a separate thread. for _ in range(10): tablet_62044.execute('select 1 from dual') # This may take up to 5 seconds to become true because we sample the query # counts for the rates only every 5 seconds (see query_service_stats.go). timeout = 10 while True: health = utils.run_vtctl_json([ 'VtTabletStreamHealth', '-count', '1', tablet_62044.tablet_alias ]) if health['realtime_stats'].get('qps', 0.0) > 0.0: break timeout = utils.wait_step('QPS >0.0 seen', timeout) # kill the tablets tablet.kill_tablets([tablet_62344, tablet_62044])
def _test_sanity(self): # Start up a master mysql and vttablet utils.run_vtctl(['CreateKeyspace', '-force', 'test_keyspace']) utils.run_vtctl(['createshard', '-force', 'test_keyspace/0']) tablet_62344.init_tablet('master', 'test_keyspace', '0', parent=False) utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace']) utils.validate_topology() # if these statements don't run before the tablet it will wedge # waiting for the db to become accessible. this is more a bug than # a feature. tablet_62344.populate('vt_test_keyspace', self._create_vt_select_test, self._populate_vt_select_test) tablet_62344.start_vttablet() # make sure the query service is started right away. qr = tablet_62344.execute('select id, msg from vt_select_test') self.assertEqual(len(qr['rows']), 4, 'expected 4 rows in vt_select_test: %s' % str(qr)) self.assertEqual(qr['fields'][0]['name'], 'id') self.assertEqual(qr['fields'][1]['name'], 'msg') # test exclude_field_names to vttablet works as expected. qr = tablet_62344.execute('select id, msg from vt_select_test', execute_options='included_fields:TYPE_ONLY ') self.assertEqual(len(qr['rows']), 4, 'expected 4 rows in vt_select_test: %s' % str(qr)) self.assertNotIn('name', qr['fields'][0]) self.assertNotIn('name', qr['fields'][1]) # make sure direct dba queries work query_result = utils.run_vtctl_json([ 'ExecuteFetchAsDba', '-json', tablet_62344.tablet_alias, 'select * from vt_test_keyspace.vt_select_test' ]) self.assertEqual( len(query_result['rows']), 4, 'expected 4 rows in vt_select_test: %s' % str(query_result)) self.assertEqual( len(query_result['fields']), 2, 'expected 2 fields in vt_select_test: %s' % str(query_result)) # check Ping / RefreshState / RefreshStateByShard utils.run_vtctl(['Ping', tablet_62344.tablet_alias]) utils.run_vtctl(['RefreshState', tablet_62344.tablet_alias]) utils.run_vtctl(['RefreshStateByShard', 'test_keyspace/0']) utils.run_vtctl( ['RefreshStateByShard', '--cells=test_nj', 'test_keyspace/0']) # Quickly check basic actions. utils.run_vtctl(['SetReadOnly', tablet_62344.tablet_alias]) utils.wait_db_read_only(62344) utils.run_vtctl(['SetReadWrite', tablet_62344.tablet_alias]) utils.check_db_read_write(62344) utils.validate_topology() utils.run_vtctl(['ValidateKeyspace', 'test_keyspace']) # not pinging tablets, as it enables replication checks, and they # break because we only have a single master, no slaves utils.run_vtctl( ['ValidateShard', '-ping-tablets=false', 'test_keyspace/0']) tablet_62344.kill_vttablet()
def test_sharding(self): shard_0_master.init_tablet('master', 'test_keyspace', '-80') shard_0_replica.init_tablet('replica', 'test_keyspace', '-80') shard_1_master.init_tablet('master', 'test_keyspace', '80-') shard_1_replica.init_tablet('replica', 'test_keyspace', '80-') utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) # run checks now before we start the tablets utils.validate_topology() # create databases, start the tablets, wait for them to start for t in [ shard_0_master, shard_0_replica, shard_1_master, shard_1_replica ]: t.create_db('vt_test_keyspace') t.start_vttablet(wait_for_state=None) for t in [ shard_0_master, shard_0_replica, shard_1_master, shard_1_replica ]: t.wait_for_vttablet_state('SERVING') # apply the schema on the first shard through vtctl, so all tablets # are the same (replication is not enabled yet, so allow_replication=false # is just there to be tested) utils.run_vtctl([ 'ApplySchema', '-stop-replication', '-sql=' + create_vt_select_test.replace("\n", ""), shard_0_master.tablet_alias ]) utils.run_vtctl([ 'ApplySchema', '-stop-replication', '-sql=' + create_vt_select_test.replace("\n", ""), shard_0_replica.tablet_alias ]) if environment.topo_server_implementation == 'zookeeper': # start zkocc, we'll use it later, indirectly with the vtdb-zkocc driver zkocc_server = utils.zkocc_start() # start vtgate, we'll use it later vtgate_server, vtgate_port = utils.vtgate_start() for t in [ shard_0_master, shard_0_replica, shard_1_master, shard_1_replica ]: t.reset_replication() utils.run_vtctl([ 'ReparentShard', '-force', 'test_keyspace/-80', shard_0_master.tablet_alias ], auto_log=True) utils.run_vtctl([ 'ReparentShard', '-force', 'test_keyspace/80-', shard_1_master.tablet_alias ], auto_log=True) # apply the schema on the second shard using a simple schema upgrade utils.run_vtctl([ 'ApplySchemaShard', '-simple', '-sql=' + create_vt_select_test_reverse.replace("\n", ""), 'test_keyspace/80-' ]) # insert some values directly (db is RO after minority reparent) # FIXME(alainjobart) these values don't match the shard map utils.run_vtctl(['SetReadWrite', shard_0_master.tablet_alias]) utils.run_vtctl(['SetReadWrite', shard_1_master.tablet_alias]) shard_0_master.mquery( 'vt_test_keyspace', "insert into vt_select_test (id, msg) values (1, 'test 1')", write=True) shard_1_master.mquery( 'vt_test_keyspace', "insert into vt_select_test (id, msg) values (10, 'test 10')", write=True) utils.validate_topology(ping_tablets=True) utils.pause("Before the sql scatter query") # note the order of the rows is not guaranteed, as the go routines # doing the work can go out of order self._check_rows(["Index\tid\tmsg", "1\ttest 1", "10\ttest 10"]) # write a value, re-read them all utils.vtclient2( 3803, "/test_nj/test_keyspace/master", "insert into vt_select_test (id, msg) values (:keyspace_id, 'test 2')", bindvars='{"keyspace_id": 2}', driver="vtdb", verbose=True) self._check_rows( ["Index\tid\tmsg", "1\ttest 1", "2\ttest 2", "10\ttest 10"]) # make sure the '2' value was written on first shard rows = shard_0_master.mquery( 'vt_test_keyspace', "select id, msg from vt_select_test order by id") self.assertEqual(rows, ( (1, 'test 1'), (2, 'test 2'), ), 'wrong mysql_query output: %s' % str(rows)) utils.pause("After db writes") # now use various topo servers and streaming or both for the same query self._check_rows( ["Index\tid\tmsg", "1\ttest 1", "2\ttest 2", "10\ttest 10"], driver="vtdb-streaming") if environment.topo_server_implementation == 'zookeeper': self._check_rows( ["Index\tid\tmsg", "1\ttest 1", "2\ttest 2", "10\ttest 10"], driver="vtdb-zk") self._check_rows( ["Index\tid\tmsg", "1\ttest 1", "2\ttest 2", "10\ttest 10"], driver="vtdb-zk-streaming") self._check_rows( ["Index\tid\tmsg", "1\ttest 1", "2\ttest 2", "10\ttest 10"], driver="vtdb-zkocc") self._check_rows( ["Index\tid\tmsg", "1\ttest 1", "2\ttest 2", "10\ttest 10"], driver="vtdb-zkocc-streaming") # make sure the schema checking works self._check_rows_schema_diff("vtdb") if environment.topo_server_implementation == 'zookeeper': self._check_rows_schema_diff("vtdb-zk") self._check_rows_schema_diff("vtdb-zkocc") # throw in some schema validation step # we created the schema differently, so it should show utils.run_vtctl(['ValidateSchemaShard', 'test_keyspace/-80']) utils.run_vtctl(['ValidateSchemaShard', 'test_keyspace/80-']) out, err = utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'], trap_output=True, raise_on_error=False) if 'test_nj-0000062344 and test_nj-0000062346 disagree on schema for table vt_select_test:\nCREATE TABLE' not in err or \ 'test_nj-0000062344 and test_nj-0000062347 disagree on schema for table vt_select_test:\nCREATE TABLE' not in err: self.fail('wrong ValidateSchemaKeyspace output: ' + err) # validate versions utils.run_vtctl(['ValidateVersionShard', 'test_keyspace/-80'], auto_log=True) utils.run_vtctl(['ValidateVersionKeyspace', 'test_keyspace'], auto_log=True) # show and validate permissions utils.run_vtctl(['GetPermissions', 'test_nj-0000062344'], auto_log=True) utils.run_vtctl(['ValidatePermissionsShard', 'test_keyspace/-80'], auto_log=True) utils.run_vtctl(['ValidatePermissionsKeyspace', 'test_keyspace'], auto_log=True) if environment.topo_server_implementation == 'zookeeper': # and create zkns on this complex keyspace, make sure a few files are created utils.run_vtctl(['ExportZknsForKeyspace', 'test_keyspace']) out, err = utils.run(environment.binary_argstr('zk') + ' ls -R /zk/test_nj/zk?s/vt/test_keysp*', trap_output=True) lines = out.splitlines() for base in ['-80', '80-']: for db_type in ['master', 'replica']: for sub_path in ['', '.vdns', '/0', '/_vtocc.vdns']: expected = '/zk/test_nj/zkns/vt/test_keyspace/' + base + '/' + db_type + sub_path if expected not in lines: self.fail('missing zkns part:\n%s\nin:%s' % (expected, out)) # now try to connect using the python client and shard-aware connection # to both shards # first get the topology and check it vtgate_client = zkocc.ZkOccConnection("localhost:%u" % vtgate_port, "test_nj", 30.0) topology.read_keyspaces(vtgate_client) shard_0_master_addrs = topology.get_host_port_by_name( vtgate_client, "test_keyspace.-80.master:_vtocc") if len(shard_0_master_addrs) != 1: self.fail( 'topology.get_host_port_by_name failed for "test_keyspace.-80.master:_vtocc", got: %s' % " ".join([ "%s:%u(%s)" % (h, p, str(e)) for (h, p, e) in shard_0_master_addrs ])) logging.debug( "shard 0 master addrs: %s", " ".join([ "%s:%u(%s)" % (h, p, str(e)) for (h, p, e) in shard_0_master_addrs ])) # connect to shard -80 conn = tablet3.TabletConnection( "%s:%u" % (shard_0_master_addrs[0][0], shard_0_master_addrs[0][1]), "", "test_keyspace", "-80", 10.0) conn.dial() (results, rowcount, lastrowid, fields) = conn._execute( "select id, msg from vt_select_test order by id", {}) self.assertEqual(results, [ (1, 'test 1'), (2, 'test 2'), ], 'wrong conn._execute output: %s' % str(results)) # connect to shard 80- shard_1_master_addrs = topology.get_host_port_by_name( vtgate_client, "test_keyspace.80-.master:_vtocc") conn = tablet3.TabletConnection( "%s:%u" % (shard_1_master_addrs[0][0], shard_1_master_addrs[0][1]), "", "test_keyspace", "80-", 10.0) conn.dial() (results, rowcount, lastrowid, fields) = conn._execute( "select id, msg from vt_select_test order by id", {}) self.assertEqual(results, [ (10, 'test 10'), ], 'wrong conn._execute output: %s' % str(results)) vtgate_client.close() # try to connect with bad shard try: conn = tablet3.TabletConnection( "localhost:%u" % shard_0_master.port, "", "test_keyspace", "-90", 10.0) conn.dial() self.fail('expected an exception') except Exception as e: if "fatal: Shard mismatch, expecting -80, received -90" not in str( e): self.fail('unexpected exception: ' + str(e)) utils.vtgate_kill(vtgate_server) if environment.topo_server_implementation == 'zookeeper': utils.kill_sub_process(zkocc_server) tablet.kill_tablets( [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica])
def test_get_srv_keyspace_names(self): stdout, _ = utils.run_vtctl(['GetSrvKeyspaceNames', 'test_nj'], trap_output=True) self.assertEqual(set(stdout.splitlines()), {SHARDED_KEYSPACE, UNSHARDED_KEYSPACE})
def test_remove_keyspace_cell(self): utils.run_vtctl(['CreateKeyspace', 'test_delete_keyspace']) utils.run_vtctl(['CreateShard', 'test_delete_keyspace/0']) utils.run_vtctl(['CreateShard', 'test_delete_keyspace/1']) utils.run_vtctl([ 'InitTablet', '-port=1234', '-keyspace=test_delete_keyspace', '-shard=0', 'test_ca-0000000100', 'master' ]) utils.run_vtctl([ 'InitTablet', '-port=1234', '-keyspace=test_delete_keyspace', '-shard=1', 'test_ca-0000000101', 'master' ]) utils.run_vtctl([ 'InitTablet', '-port=1234', '-keyspace=test_delete_keyspace', '-shard=0', 'test_nj-0000000100', 'replica' ]) utils.run_vtctl([ 'InitTablet', '-port=1234', '-keyspace=test_delete_keyspace', '-shard=1', 'test_nj-0000000101', 'replica' ]) # Create the serving/replication entries and check that they exist, # so we can later check they're deleted. utils.run_vtctl(['RebuildKeyspaceGraph', 'test_delete_keyspace']) utils.run_vtctl( ['GetShardReplication', 'test_nj', 'test_delete_keyspace/0']) utils.run_vtctl( ['GetShardReplication', 'test_nj', 'test_delete_keyspace/1']) utils.run_vtctl(['GetSrvKeyspace', 'test_nj', 'test_delete_keyspace']) utils.run_vtctl(['GetSrvKeyspace', 'test_ca', 'test_delete_keyspace']) # Just remove the shard from one cell (including tablets), # but leaving the global records and other cells/shards alone. utils.run_vtctl([ 'RemoveShardCell', '-recursive', 'test_delete_keyspace/0', 'test_nj' ]) # Check that the shard is gone from test_nj. srv_keyspace = utils.run_vtctl_json( ['GetSrvKeyspace', 'test_nj', 'test_delete_keyspace']) for partition in srv_keyspace['partitions']: self.assertEqual( len(partition['shard_references']), 1, 'RemoveShardCell should have removed one shard from the target cell: ' + json.dumps(srv_keyspace)) # Make sure the shard is still serving in test_ca. srv_keyspace = utils.run_vtctl_json( ['GetSrvKeyspace', 'test_ca', 'test_delete_keyspace']) for partition in srv_keyspace['partitions']: self.assertEqual( len(partition['shard_references']), 2, 'RemoveShardCell should not have changed other cells: ' + json.dumps(srv_keyspace)) utils.run_vtctl(['RebuildKeyspaceGraph', 'test_delete_keyspace']) utils.run_vtctl(['GetKeyspace', 'test_delete_keyspace']) utils.run_vtctl(['GetShard', 'test_delete_keyspace/0']) utils.run_vtctl(['GetTablet', 'test_ca-0000000100']) utils.run_vtctl(['GetTablet', 'test_nj-0000000100'], expect_fail=True) utils.run_vtctl(['GetTablet', 'test_nj-0000000101']) utils.run_vtctl( ['GetShardReplication', 'test_ca', 'test_delete_keyspace/0']) utils.run_vtctl( ['GetShardReplication', 'test_nj', 'test_delete_keyspace/0'], expect_fail=True) utils.run_vtctl( ['GetShardReplication', 'test_nj', 'test_delete_keyspace/1']) utils.run_vtctl(['GetSrvKeyspace', 'test_nj', 'test_delete_keyspace']) # Add it back to do another test. utils.run_vtctl([ 'InitTablet', '-port=1234', '-keyspace=test_delete_keyspace', '-shard=0', 'test_nj-0000000100', 'replica' ]) utils.run_vtctl(['RebuildKeyspaceGraph', 'test_delete_keyspace']) utils.run_vtctl( ['GetShardReplication', 'test_nj', 'test_delete_keyspace/0']) # Now use RemoveKeyspaceCell to remove all shards. utils.run_vtctl([ 'RemoveKeyspaceCell', '-recursive', 'test_delete_keyspace', 'test_nj' ]) utils.run_vtctl(['RebuildKeyspaceGraph', 'test_delete_keyspace']) utils.run_vtctl( ['GetShardReplication', 'test_ca', 'test_delete_keyspace/0']) utils.run_vtctl( ['GetShardReplication', 'test_nj', 'test_delete_keyspace/0'], expect_fail=True) utils.run_vtctl( ['GetShardReplication', 'test_nj', 'test_delete_keyspace/1'], expect_fail=True) # Clean up. utils.run_vtctl( ['DeleteKeyspace', '-recursive', 'test_delete_keyspace'])
def _delete_swap(self, swap_uuid): """Delete the schema swap with the given uuid.""" utils.run_vtctl(['WorkflowDelete', swap_uuid], auto_log=True)
def setUpClass(cls): utils.run_vtctl([ 'CreateKeyspace', '--sharding_column_name', 'keyspace_id', '--sharding_column_type', 'uint64', 'test_keyspace' ]) utils.run_vtctl([ 'CreateKeyspace', '--served_from', 'master:test_keyspace,replica:test_keyspace,rdonly:test_keyspace', 'redirected_keyspace' ]) shard_0_master.init_tablet('master', 'test_keyspace', '-80') shard_0_replica.init_tablet('spare', 'test_keyspace', '-80') shard_0_spare.init_tablet('spare', 'test_keyspace', '-80') shard_1_master.init_tablet('master', 'test_keyspace', '80-') shard_1_replica.init_tablet('replica', 'test_keyspace', '80-') utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) utils.run_vtctl(['RebuildKeyspaceGraph', 'redirected_keyspace'], auto_log=True) # start running all the tablets for t in [shard_0_master, shard_1_master, shard_1_replica]: t.create_db('vt_test_keyspace') t.start_vttablet(wait_for_state=None, extra_args=utils.vtctld.process_args()) shard_0_replica.create_db('vt_test_keyspace') shard_0_replica.start_vttablet(extra_args=utils.vtctld.process_args(), target_tablet_type='replica', wait_for_state=None) shard_0_spare.start_vttablet(wait_for_state=None, extra_args=utils.vtctld.process_args()) # wait for the right states for t in [shard_0_master, shard_1_master, shard_1_replica]: t.wait_for_vttablet_state('SERVING') for t in [shard_0_replica, shard_0_spare]: t.wait_for_vttablet_state('NOT_SERVING') for t in [ shard_0_master, shard_0_replica, shard_0_spare, shard_1_master, shard_1_replica ]: t.reset_replication() utils.run_vtctl([ 'InitShardMaster', 'test_keyspace/-80', shard_0_master.tablet_alias ], auto_log=True) utils.run_vtctl([ 'InitShardMaster', 'test_keyspace/80-', shard_1_master.tablet_alias ], auto_log=True) shard_0_replica.wait_for_vttablet_state('SERVING') # run checks now utils.validate_topology()
def test_sharding(self): utils.run_vtctl([ 'CreateKeyspace', '--sharding_column_name', 'keyspace_id', '--sharding_column_type', 'uint64', 'test_keyspace' ]) shard_0_master.init_tablet('master', 'test_keyspace', '-80') shard_0_replica.init_tablet('replica', 'test_keyspace', '-80') shard_1_master.init_tablet('master', 'test_keyspace', '80-') shard_1_replica.init_tablet('replica', 'test_keyspace', '80-') # run checks now before we start the tablets utils.validate_topology() # create databases, start the tablets, wait for them to start for t in [ shard_0_master, shard_0_replica, shard_1_master, shard_1_replica ]: t.create_db('vt_test_keyspace') t.start_vttablet(wait_for_state=None) for t in [shard_0_master, shard_1_master]: t.wait_for_vttablet_state('SERVING') for t in [shard_0_replica, shard_1_replica]: t.wait_for_vttablet_state('NOT_SERVING') # apply the schema on the first shard through vtctl, so all tablets # are the same. shard_0_master.mquery('vt_test_keyspace', create_vt_select_test.replace('\n', ''), write=True) shard_0_replica.mquery('vt_test_keyspace', create_vt_select_test.replace('\n', ''), write=True) # apply the schema on the second shard. shard_1_master.mquery('vt_test_keyspace', create_vt_select_test_reverse.replace('\n', ''), write=True) shard_1_replica.mquery('vt_test_keyspace', create_vt_select_test_reverse.replace('\n', ''), write=True) for t in [ shard_0_master, shard_0_replica, shard_1_master, shard_1_replica ]: utils.run_vtctl(['ReloadSchema', t.tablet_alias]) for t in [ shard_0_master, shard_0_replica, shard_1_master, shard_1_replica ]: t.reset_replication() utils.run_vtctl([ 'InitShardMaster', 'test_keyspace/-80', shard_0_master.tablet_alias ], auto_log=True) utils.run_vtctl([ 'InitShardMaster', 'test_keyspace/80-', shard_1_master.tablet_alias ], auto_log=True) # insert some values directly (db is RO after minority reparent) # FIXME(alainjobart) these values don't match the shard map utils.run_vtctl(['SetReadWrite', shard_0_master.tablet_alias]) utils.run_vtctl(['SetReadWrite', shard_1_master.tablet_alias]) shard_0_master.mquery( 'vt_test_keyspace', "insert into vt_select_test (id, msg) values (1, 'test 1')", write=True) shard_1_master.mquery( 'vt_test_keyspace', "insert into vt_select_test (id, msg) values (10, 'test 10')", write=True) utils.validate_topology(ping_tablets=True) utils.pause('Before the sql scatter query') # make sure the '1' value was written on first shard rows = shard_0_master.mquery( 'vt_test_keyspace', 'select id, msg from vt_select_test order by id') self.assertEqual(rows, ((1, 'test 1'), ), 'wrong mysql_query output: %s' % str(rows)) utils.pause('After db writes') # throw in some schema validation step # we created the schema differently, so it should show utils.run_vtctl(['ValidateSchemaShard', 'test_keyspace/-80']) utils.run_vtctl(['ValidateSchemaShard', 'test_keyspace/80-']) _, err = utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'], trap_output=True, raise_on_error=False) if ('test_nj-0000062344 and test_nj-0000062346 disagree on schema ' 'for table vt_select_test:\nCREATE TABLE' not in err or 'test_nj-0000062344 and test_nj-0000062347 disagree on schema ' 'for table vt_select_test:\nCREATE TABLE' not in err): self.fail('wrong ValidateSchemaKeyspace output: ' + err) # validate versions utils.run_vtctl(['ValidateVersionShard', 'test_keyspace/-80'], auto_log=True) utils.run_vtctl(['ValidateVersionKeyspace', 'test_keyspace'], auto_log=True) # show and validate permissions utils.run_vtctl(['GetPermissions', 'test_nj-0000062344'], auto_log=True) utils.run_vtctl(['ValidatePermissionsShard', 'test_keyspace/-80'], auto_log=True) utils.run_vtctl(['ValidatePermissionsKeyspace', 'test_keyspace'], auto_log=True) # connect to the tablets directly, make sure they know / validate # their own shard sql = 'select id, msg from vt_select_test order by id' qr = shard_0_master.execute(sql) self.assertEqual(qr['rows'], [ [1, 'test 1'], ]) qr = shard_1_master.execute(sql) self.assertEqual(qr['rows'], [ [10, 'test 10'], ]) # make sure that if we use a wrong target, the destination rejects # the query. _, stderr = utils.run_vtctl([ 'VtTabletExecute', '-json', '-keyspace', 'test_keyspace', '-shard', '-90', '-tablet_type', 'master', shard_0_master.tablet_alias, sql ], expect_fail=True) self.assertIn('retry: Invalid shard -90', stderr) tablet.kill_tablets( [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica])
v = utils.get_vars(replica_tablet.port) if v['UpdateStreamState'] != 'Enabled': self.fail("Update stream service should be 'Enabled' but is '%s'" % v['UpdateStreamState']) self.assertTrue('DML' in v['UpdateStreamEvents']) self.assertTrue('POS' in v['UpdateStreamEvents']) logging.debug('Testing enable -> disable switch starting @ %s', start_position) replica_conn = self._get_replica_stream_conn() replica_conn.dial() disabled_err = False txn_count = 0 try: data = replica_conn.stream_start(start_position) utils.run_vtctl( ['ChangeSlaveType', replica_tablet.tablet_alias, 'spare']) utils.wait_for_tablet_type(replica_tablet.tablet_alias, 'spare', 30) while data: data = replica_conn.stream_next() if data is not None and data['Category'] == 'POS': txn_count += 1 logging.debug('Test Service Switch: FAIL') return except dbexceptions.DatabaseError, e: self.assertEqual( 'Fatal Service Error: Disconnecting because the Update Stream ' 'service has been disabled', str(e)) except Exception, e: logging.error('Exception: %s', str(e)) logging.error('Traceback: %s', traceback.print_exc())
def _stop_swap(self, swap_uuid): """Stop the running schema swap with the given uuid.""" utils.run_vtctl(['WorkflowStop', swap_uuid], auto_log=True)
def setUpModule(): try: environment.topo_server().setup() logging.debug("Creating certificates") os.makedirs(cert_dir) # Create CA certificate ca_key = cert_dir + "/ca-key.pem" ca_cert = cert_dir + "/ca-cert.pem" openssl(["genrsa", "-out", cert_dir + "/ca-key.pem"]) ca_config = cert_dir + "/ca.config" with open(ca_config, 'w') as fd: fd.write(""" [ req ] default_bits = 1024 default_keyfile = keyfile.pem distinguished_name = req_distinguished_name attributes = req_attributes prompt = no output_password = mypass [ req_distinguished_name ] C = US ST = California L = Mountain View O = Google OU = Vitess CN = Mysql CA emailAddress = [email protected] [ req_attributes ] challengePassword = A challenge password """) openssl(["req", "-new", "-x509", "-nodes", "-days", "3600", "-batch", "-config", ca_config, "-key", ca_key, "-out", ca_cert]) # Create mysql server certificate, remove passphrase, and sign it server_key = cert_dir + "/server-key.pem" server_cert = cert_dir + "/server-cert.pem" server_req = cert_dir + "/server-req.pem" server_config = cert_dir + "/server.config" with open(server_config, 'w') as fd: fd.write(""" [ req ] default_bits = 1024 default_keyfile = keyfile.pem distinguished_name = req_distinguished_name attributes = req_attributes prompt = no output_password = mypass [ req_distinguished_name ] C = US ST = California L = Mountain View O = Google OU = Vitess CN = Mysql Server emailAddress = [email protected] [ req_attributes ] challengePassword = A challenge password """) openssl(["req", "-newkey", "rsa:2048", "-days", "3600", "-nodes", "-batch", "-config", server_config, "-keyout", server_key, "-out", server_req]) openssl(["rsa", "-in", server_key, "-out", server_key]) openssl(["x509", "-req", "-in", server_req, "-days", "3600", "-CA", ca_cert, "-CAkey", ca_key, "-set_serial", "01", "-out", server_cert]) # Create mysql client certificate, remove passphrase, and sign it client_key = cert_dir + "/client-key.pem" client_cert = cert_dir + "/client-cert.pem" client_req = cert_dir + "/client-req.pem" client_config = cert_dir + "/client.config" with open(client_config, 'w') as fd: fd.write(""" [ req ] default_bits = 1024 default_keyfile = keyfile.pem distinguished_name = req_distinguished_name attributes = req_attributes prompt = no output_password = mypass [ req_distinguished_name ] C = US ST = California L = Mountain View O = Google OU = Vitess CN = Mysql Client emailAddress = [email protected] [ req_attributes ] challengePassword = A challenge password """) openssl(["req", "-newkey", "rsa:2048", "-days", "3600", "-nodes", "-batch", "-config", client_config, "-keyout", client_key, "-out", client_req]) openssl(["rsa", "-in", client_key, "-out", client_key]) openssl(["x509", "-req", "-in", client_req, "-days", "3600", "-CA", ca_cert, "-CAkey", ca_key, "-set_serial", "02", "-out", client_cert]) # Create vt server certificate, remove passphrase, and sign it vt_server_key = cert_dir + "/vt-server-key.pem" vt_server_cert = cert_dir + "/vt-server-cert.pem" vt_server_req = cert_dir + "/vt-server-req.pem" vt_server_config = cert_dir + "/server.config" with open(vt_server_config, 'w') as fd: fd.write(""" [ req ] default_bits = 1024 default_keyfile = keyfile.pem distinguished_name = req_distinguished_name attributes = req_attributes prompt = no output_password = mypass [ req_distinguished_name ] C = US ST = California L = Mountain View O = Google OU = Vitess CN = Vitess Server emailAddress = [email protected] [ req_attributes ] challengePassword = A challenge password """) openssl(["req", "-newkey", "rsa:2048", "-days", "3600", "-nodes", "-batch", "-config", vt_server_config, "-keyout", vt_server_key, "-out", vt_server_req]) openssl(["rsa", "-in", vt_server_key, "-out", vt_server_key]) openssl(["x509", "-req", "-in", vt_server_req, "-days", "3600", "-CA", ca_cert, "-CAkey", ca_key, "-set_serial", "03", "-out", vt_server_cert]) extra_my_cnf = cert_dir + "/secure.cnf" fd = open(extra_my_cnf, "w") fd.write("ssl-ca=" + ca_cert + "\n") fd.write("ssl-cert=" + server_cert + "\n") fd.write("ssl-key=" + server_key + "\n") fd.close() setup_procs = [ shard_0_master.init_mysql(extra_my_cnf=extra_my_cnf), shard_0_slave.init_mysql(extra_my_cnf=extra_my_cnf), ] utils.wait_procs(setup_procs) utils.run_vtctl('CreateKeyspace test_keyspace') shard_0_master.init_tablet('master', 'test_keyspace', '0') shard_0_slave.init_tablet('replica', 'test_keyspace', '0') utils.run_vtctl('RebuildKeyspaceGraph test_keyspace', auto_log=True) # create databases so vttablet can start behaving normally shard_0_master.create_db('vt_test_keyspace') shard_0_slave.create_db('vt_test_keyspace') except: tearDownModule() raise
def setUpModule(): global vtgate_server global vtgate_port global vtgate_socket_file global master_start_position try: environment.topo_server().setup() # start mysql instance external to the test setup_procs = [master_tablet.init_mysql(), replica_tablet.init_mysql()] utils.wait_procs(setup_procs) # Start up a master mysql and vttablet logging.debug('Setting up tablets') utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) master_tablet.init_tablet('master', 'test_keyspace', '0') replica_tablet.init_tablet('replica', 'test_keyspace', '0') utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/0']) utils.validate_topology() master_tablet.create_db('vt_test_keyspace') master_tablet.create_db('other_database') replica_tablet.create_db('vt_test_keyspace') replica_tablet.create_db('other_database') utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace']) vtgate_socket_file = environment.tmproot + '/vtgate.sock' vtgate_server, vtgate_port = utils.vtgate_start( socket_file=vtgate_socket_file) master_tablet.start_vttablet() replica_tablet.start_vttablet() utils.run_vtctl(['SetReadWrite', master_tablet.tablet_alias]) utils.check_db_read_write(master_tablet.tablet_uid) for t in [master_tablet, replica_tablet]: t.reset_replication() utils.run_vtctl([ 'ReparentShard', '-force', 'test_keyspace/0', master_tablet.tablet_alias ], auto_log=True) # reset counter so tests don't assert tablet.Tablet.tablets_running = 0 master_start_position = _get_master_current_position() master_tablet.mquery('vt_test_keyspace', _create_vt_insert_test) master_tablet.mquery('vt_test_keyspace', _create_vt_a) master_tablet.mquery('vt_test_keyspace', _create_vt_b) utils.run_vtctl(['ReloadSchema', master_tablet.tablet_alias]) utils.run_vtctl(['ReloadSchema', replica_tablet.tablet_alias]) except: tearDownModule() raise
def test_reparent_with_down_slave(self, shard_id='0'): utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) # create the database so vttablets start, as they are serving tablet_62344.create_db('vt_test_keyspace') tablet_62044.create_db('vt_test_keyspace') tablet_41983.create_db('vt_test_keyspace') tablet_31981.create_db('vt_test_keyspace') # Start up a master mysql and vttablet tablet_62344.init_tablet('master', 'test_keyspace', shard_id, start=True, wait_for_start=False) # Create a few slaves for testing reparenting. tablet_62044.init_tablet('replica', 'test_keyspace', shard_id, start=True, wait_for_start=False) tablet_31981.init_tablet('replica', 'test_keyspace', shard_id, start=True, wait_for_start=False) tablet_41983.init_tablet('spare', 'test_keyspace', shard_id, start=True, wait_for_start=False) # wait for all tablets to start for t in [tablet_62344, tablet_62044, tablet_31981]: t.wait_for_vttablet_state('SERVING') tablet_41983.wait_for_vttablet_state('NOT_SERVING') # Recompute the shard layout node - until you do that, it might not be # valid. utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/' + shard_id]) utils.validate_topology() # Force the slaves to reparent assuming that all the datasets are identical. for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.reset_replication() utils.run_vtctl([ 'InitShardMaster', 'test_keyspace/' + shard_id, tablet_62344.tablet_alias ]) utils.validate_topology(ping_tablets=True) tablet_62344.mquery('vt_test_keyspace', self._create_vt_insert_test) utils.wait_procs([tablet_41983.shutdown_mysql()]) # Perform a graceful reparent operation. It will fail as one tablet is down. _, stderr = utils.run_vtctl([ 'PlannedReparentShard', 'test_keyspace/' + shard_id, tablet_62044.tablet_alias ], expect_fail=True) self.assertIn('TabletManager.SetMaster on test_nj-0000041983 error', stderr) # insert data into the new master, check the connected slaves work self._populate_vt_insert_test(tablet_62044, 3) self._check_vt_insert_test(tablet_31981, 3) self._check_vt_insert_test(tablet_62344, 3) # restart mysql on the old slave, should still be connecting to the # old master utils.wait_procs([tablet_41983.start_mysql()]) utils.pause('check orphan') # reparent the tablet (will not start replication, so we have to # do it ourselves), then it should catch up on replication really quickly utils.run_vtctl(['ReparentTablet', tablet_41983.tablet_alias]) utils.run_vtctl(['StartSlave', tablet_41983.tablet_alias]) # wait until it gets the data self._check_vt_insert_test(tablet_41983, 3) tablet.kill_tablets( [tablet_62344, tablet_62044, tablet_41983, tablet_31981])
def test_resharding(self): utils.run_vtctl([ 'CreateKeyspace', '--sharding_column_name', 'bad_column', '--sharding_column_type', 'bytes', '--split_shard_count', '2', 'test_keyspace' ]) utils.run_vtctl([ 'SetKeyspaceShardingInfo', 'test_keyspace', 'keyspace_id', 'uint64' ], expect_fail=True) utils.run_vtctl([ 'SetKeyspaceShardingInfo', '-force', '-split_shard_count', '4', 'test_keyspace', 'keyspace_id', keyspace_id_type ]) shard_0_master.init_tablet('master', 'test_keyspace', '-80') shard_0_replica.init_tablet('replica', 'test_keyspace', '-80') shard_0_ny_slave.init_tablet('spare', 'test_keyspace', '-80') shard_1_master.init_tablet('master', 'test_keyspace', '80-') shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-') shard_1_slave2.init_tablet('spare', 'test_keyspace', '80-') shard_1_ny_slave.init_tablet('spare', 'test_keyspace', '80-') shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '80-') utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) ks = utils.run_vtctl_json( ['GetSrvKeyspace', 'test_nj', 'test_keyspace']) self.assertEqual(ks['SplitShardCount'], 4) # we set full_mycnf_args to True as a test in the KIT_BYTES case full_mycnf_args = keyspace_id_type == keyrange_constants.KIT_BYTES # create databases so vttablet can start behaving normally for t in [ shard_0_master, shard_0_replica, shard_0_ny_slave, shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_slave, shard_1_rdonly ]: t.create_db('vt_test_keyspace') t.start_vttablet(wait_for_state=None, full_mycnf_args=full_mycnf_args) # wait for the tablets shard_0_master.wait_for_vttablet_state('SERVING') shard_0_replica.wait_for_vttablet_state('SERVING') shard_0_ny_slave.wait_for_vttablet_state('NOT_SERVING') # spare shard_1_master.wait_for_vttablet_state('SERVING') shard_1_slave1.wait_for_vttablet_state('SERVING') shard_1_slave2.wait_for_vttablet_state('NOT_SERVING') # spare shard_1_ny_slave.wait_for_vttablet_state('NOT_SERVING') # spare shard_1_rdonly.wait_for_vttablet_state('SERVING') # reparent to make the tablets work utils.run_vtctl([ 'ReparentShard', '-force', 'test_keyspace/-80', shard_0_master.tablet_alias ], auto_log=True) utils.run_vtctl([ 'ReparentShard', '-force', 'test_keyspace/80-', shard_1_master.tablet_alias ], auto_log=True) # create the tables self._create_schema() self._insert_startup_values() self._test_keyrange_constraints() # create the split shards shard_2_master.init_tablet('master', 'test_keyspace', '80-c0') shard_2_replica1.init_tablet('spare', 'test_keyspace', '80-c0') shard_2_replica2.init_tablet('spare', 'test_keyspace', '80-c0') shard_3_master.init_tablet('master', 'test_keyspace', 'c0-') shard_3_replica.init_tablet('spare', 'test_keyspace', 'c0-') shard_3_rdonly.init_tablet('rdonly', 'test_keyspace', 'c0-') # start vttablet on the split shards (no db created, # so they're all not serving) shard_3_master.start_vttablet(wait_for_state=None, target_tablet_type='replica') for t in [ shard_2_master, shard_2_replica1, shard_2_replica2, shard_3_replica, shard_3_rdonly ]: t.start_vttablet(wait_for_state=None) for t in [ shard_2_master, shard_2_replica1, shard_2_replica2, shard_3_master, shard_3_replica, shard_3_rdonly ]: t.wait_for_vttablet_state('NOT_SERVING') utils.run_vtctl([ 'ReparentShard', '-force', 'test_keyspace/80-c0', shard_2_master.tablet_alias ], auto_log=True) utils.run_vtctl([ 'ReparentShard', '-force', 'test_keyspace/c0-', shard_3_master.tablet_alias ], auto_log=True) utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) utils.check_srv_keyspace('test_nj', 'test_keyspace', 'Partitions(master): -80 80-\n' + 'Partitions(rdonly): -80 80-\n' + 'Partitions(replica): -80 80-\n' + 'TabletTypes: master,rdonly,replica', keyspace_id_type=keyspace_id_type) if use_clone_worker: # the worker will do everything. We test with source_reader_count=10 # (down from default=20) as connection pool is not big enough for 20. # min_table_size_for_split is set to 1 as to force a split even on the # small table we have. utils.run_vtworker([ '--cell', 'test_nj', '--command_display_interval', '10ms', 'SplitClone', '--exclude_tables', 'unrelated', '--strategy', 'populateBlpCheckpoint', '--source_reader_count', '10', '--min_table_size_for_split', '1', 'test_keyspace/80-c0' ], auto_log=True) utils.run_vtctl( ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'], auto_log=True) # TODO(alainjobart): experiment with the dontStartBinlogPlayer option else: # take the snapshot for the split utils.run_vtctl([ 'MultiSnapshot', '--spec=80-c0-', '--exclude_tables=unrelated', shard_1_slave1.tablet_alias ], auto_log=True) # the snapshot_copy hook will copy the snapshot files to # VTDATAROOT/tmp/... as a test. We want to use these for one half, # but not for the other, so we test both scenarios. os.unlink( os.path.join( environment.tmproot, "snapshot-from-%s-for-%s.tar" % (shard_1_slave1.tablet_alias, "80-c0"))) # wait for tablet's binlog server service to be enabled after snapshot shard_1_slave1.wait_for_binlog_server_state("Enabled") # perform the restores: first one from source tablet. We removed the # storage backup, so it's coming from the tablet itself. # we also delay starting the binlog player, then enable it. utils.run_vtctl([ 'ShardMultiRestore', '-strategy=populateBlpCheckpoint,dontStartBinlogPlayer', 'test_keyspace/80-c0', shard_1_slave1.tablet_alias ], auto_log=True) timeout = 10 while True: shard_2_master_status = shard_2_master.get_status() if not "not starting because flag 'DontStart' is set" in shard_2_master_status: timeout = utils.wait_step( 'shard 2 master has not failed starting yet', timeout) continue logging.debug( "shard 2 master is waiting on flag removal, good") break qr = utils.run_vtctl_json([ 'ExecuteFetch', shard_2_master.tablet_alias, 'update _vt.blp_checkpoint set flags="" where source_shard_uid=0' ]) self.assertEqual(qr['RowsAffected'], 1) timeout = 10 while True: shard_2_master_status = shard_2_master.get_status() if "not starting because flag 'DontStart' is set" in shard_2_master_status: timeout = utils.wait_step( 'shard 2 master has not started replication yet', timeout) continue logging.debug("shard 2 master has started replication, good") break # second restore from storage: to be sure, we stop vttablet, and restart # it afterwards shard_1_slave1.kill_vttablet() utils.run_vtctl([ 'ShardMultiRestore', '-strategy=populateBlpCheckpoint', 'test_keyspace/c0-', shard_1_slave1.tablet_alias ], auto_log=True) shard_1_slave1.start_vttablet(wait_for_state=None) shard_1_slave1.wait_for_binlog_server_state("Enabled") # check the startup values are in the right place self._check_startup_values() # check the schema too utils.run_vtctl([ 'ValidateSchemaKeyspace', '--exclude_tables=unrelated', 'test_keyspace' ], auto_log=True) # check the binlog players are running and exporting vars shard_2_master.wait_for_binlog_player_count(1) shard_3_master.wait_for_binlog_player_count(1) self._check_binlog_player_vars(shard_2_master) self._check_binlog_player_vars(shard_3_master) # check that binlog server exported the stats vars self._check_binlog_server_vars(shard_1_slave1) # testing filtered replication: insert a bunch of data on shard 1, # check we get most of it after a few seconds, wait for binlog server # timeout, check we get all of it. logging.debug("Inserting lots of data on source shard") self._insert_lots(1000) logging.debug("Checking 80 percent of data is sent quickly") self._check_lots_timeout(1000, 80, 5) logging.debug("Checking all data goes through eventually") self._check_lots_timeout(1000, 100, 20) logging.debug("Checking no data was sent the wrong way") self._check_lots_not_present(1000) self._check_binlog_player_vars(shard_2_master, seconds_behind_master_max=30) self._check_binlog_player_vars(shard_3_master, seconds_behind_master_max=30) # use the vtworker checker to compare the data logging.debug("Running vtworker SplitDiff") utils.run_vtworker( ['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/c0-'], auto_log=True) utils.run_vtctl( ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'], auto_log=True) utils.run_vtctl( ['ChangeSlaveType', shard_3_rdonly.tablet_alias, 'rdonly'], auto_log=True) utils.pause("Good time to test vtworker for diffs") # get status for a destination master tablet, make sure we have it all shard_2_master_status = shard_2_master.get_status() self.assertIn('Binlog player state: Running', shard_2_master_status) self.assertIn( '<td><b>All</b>: 6000<br><b>Query</b>: 4000<br><b>Transaction</b>: 2000<br></td>', shard_2_master_status) self.assertIn('</html>', shard_2_master_status) # start a thread to insert data into shard_1 in the background # with current time, and monitor the delay insert_thread_1 = InsertThread(shard_1_master, "insert_low", 10000, 0x9000000000000000) insert_thread_2 = InsertThread(shard_1_master, "insert_high", 10001, 0xD000000000000000) monitor_thread_1 = MonitorLagThread(shard_2_replica2, "insert_low") monitor_thread_2 = MonitorLagThread(shard_3_replica, "insert_high") # tests a failover switching serving to a different replica utils.run_vtctl( ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica']) utils.run_vtctl( ['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare']) shard_1_slave2.wait_for_vttablet_state('SERVING') shard_1_slave1.wait_for_vttablet_state('NOT_SERVING') # test data goes through again logging.debug("Inserting lots of data on source shard") self._insert_lots(1000, base=1000) logging.debug("Checking 80 percent of data was sent quickly") self._check_lots_timeout(1000, 80, 5, base=1000) # check we can't migrate the master just yet utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'], expect_fail=True) # check query service is off on master 2 and master 3, as filtered # replication is enabled. Even health check that is enabled on # master 3 should not interfere. shard_2_master_vars = utils.get_vars(shard_2_master.port) self.assertEqual(shard_2_master_vars['TabletStateName'], 'NOT_SERVING') shard_3_master_vars = utils.get_vars(shard_3_master.port) self.assertEqual(shard_3_master_vars['TabletStateName'], 'NOT_SERVING') # now serve rdonly from the split shards utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'], auto_log=True) utils.check_srv_keyspace('test_nj', 'test_keyspace', 'Partitions(master): -80 80-\n' + 'Partitions(rdonly): -80 80-c0 c0-\n' + 'Partitions(replica): -80 80-\n' + 'TabletTypes: master,rdonly,replica', keyspace_id_type=keyspace_id_type) # then serve replica from the split shards utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'], auto_log=True) utils.check_srv_keyspace('test_nj', 'test_keyspace', 'Partitions(master): -80 80-\n' + 'Partitions(rdonly): -80 80-c0 c0-\n' + 'Partitions(replica): -80 80-c0 c0-\n' + 'TabletTypes: master,rdonly,replica', keyspace_id_type=keyspace_id_type) # move replica back and forth utils.run_vtctl( ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'], auto_log=True) utils.check_srv_keyspace('test_nj', 'test_keyspace', 'Partitions(master): -80 80-\n' + 'Partitions(rdonly): -80 80-c0 c0-\n' + 'Partitions(replica): -80 80-\n' + 'TabletTypes: master,rdonly,replica', keyspace_id_type=keyspace_id_type) utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'], auto_log=True) utils.check_srv_keyspace('test_nj', 'test_keyspace', 'Partitions(master): -80 80-\n' + 'Partitions(rdonly): -80 80-c0 c0-\n' + 'Partitions(replica): -80 80-c0 c0-\n' + 'TabletTypes: master,rdonly,replica', keyspace_id_type=keyspace_id_type) # reparent shard_2 to shard_2_replica1, then insert more data and # see it flow through still utils.run_vtctl([ 'ReparentShard', 'test_keyspace/80-c0', shard_2_replica1.tablet_alias ]) logging.debug( "Inserting lots of data on source shard after reparenting") self._insert_lots(3000, base=2000) logging.debug("Checking 80 percent of data was sent fairly quickly") self._check_lots_timeout(3000, 80, 10, base=2000) # use the vtworker checker to compare the data again logging.debug("Running vtworker SplitDiff") utils.run_vtworker( ['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/c0-'], auto_log=True) utils.run_vtctl( ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'], auto_log=True) utils.run_vtctl( ['ChangeSlaveType', shard_3_rdonly.tablet_alias, 'rdonly'], auto_log=True) # going to migrate the master now, check the delays monitor_thread_1.done = True monitor_thread_2.done = True insert_thread_1.done = True insert_thread_2.done = True logging.debug("DELAY 1: %s max_lag=%u avg_lag=%u", monitor_thread_1.object_name, monitor_thread_1.max_lag, monitor_thread_1.lag_sum / monitor_thread_1.sample_count) logging.debug("DELAY 2: %s max_lag=%u avg_lag=%u", monitor_thread_2.object_name, monitor_thread_2.max_lag, monitor_thread_2.lag_sum / monitor_thread_2.sample_count) # then serve master from the split shards utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'], auto_log=True) utils.check_srv_keyspace('test_nj', 'test_keyspace', 'Partitions(master): -80 80-c0 c0-\n' + 'Partitions(rdonly): -80 80-c0 c0-\n' + 'Partitions(replica): -80 80-c0 c0-\n' + 'TabletTypes: master,rdonly,replica', keyspace_id_type=keyspace_id_type) # check the binlog players are gone now shard_2_master.wait_for_binlog_player_count(0) shard_3_master.wait_for_binlog_player_count(0) # get status for a destination master tablet, make sure it's good shard_2_master_status = shard_2_master.get_status() self.assertIn('No binlog player is running', shard_2_master_status) self.assertIn('</html>', shard_2_master_status) # scrap the original tablets in the original shard for t in [ shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_slave, shard_1_rdonly ]: utils.run_vtctl(['ScrapTablet', t.tablet_alias], auto_log=True) tablet.kill_tablets([ shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_slave, shard_1_rdonly ]) for t in [ shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_slave, shard_1_rdonly ]: utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True) # rebuild the serving graph, all mentions of the old shards shoud be gone utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) # test RemoveShardCell utils.run_vtctl(['RemoveShardCell', 'test_keyspace/-80', 'test_nj'], auto_log=True, expect_fail=True) utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_nj'], auto_log=True) utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_ny'], auto_log=True) shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-']) if shard['Cells']: self.fail("Non-empty Cells record for shard: %s" % str(shard)) # delete the original shard utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True) # kill everything tablet.kill_tablets([ shard_0_master, shard_0_replica, shard_0_ny_slave, shard_2_master, shard_2_replica1, shard_2_replica2, shard_3_master, shard_3_replica, shard_3_rdonly ])
def _test_reparent_slave_offline(self, shard_id='0'): utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) # create the database so vttablets start, as they are serving tablet_62344.create_db('vt_test_keyspace') tablet_62044.create_db('vt_test_keyspace') tablet_41983.create_db('vt_test_keyspace') tablet_31981.create_db('vt_test_keyspace') # Start up a master mysql and vttablet tablet_62344.init_tablet('master', 'test_keyspace', shard_id, start=True, wait_for_start=False) # Create a few slaves for testing reparenting. tablet_62044.init_tablet('replica', 'test_keyspace', shard_id, start=True, wait_for_start=False) tablet_41983.init_tablet('replica', 'test_keyspace', shard_id, start=True, wait_for_start=False) tablet_31981.init_tablet('replica', 'test_keyspace', shard_id, start=True, wait_for_start=False) # wait for all tablets to start for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.wait_for_vttablet_state('SERVING') # Recompute the shard layout node - until you do that, it might not be # valid. utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/' + shard_id]) utils.validate_topology() # Force the slaves to reparent assuming that all the datasets are # identical. for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.reset_replication() utils.run_vtctl([ 'InitShardMaster', '-force', 'test_keyspace/' + shard_id, tablet_62344.tablet_alias ]) utils.validate_topology(ping_tablets=True) self._check_db_addr(shard_id, 'master', tablet_62344.port) # Kill one tablet so we seem offline tablet_31981.kill_vttablet() # Perform a graceful reparent operation. utils.run_vtctl([ 'PlannedReparentShard', 'test_keyspace/' + shard_id, tablet_62044.tablet_alias ]) self._check_db_addr(shard_id, 'master', tablet_62044.port) tablet.kill_tablets([tablet_62344, tablet_62044, tablet_41983])
def test_secure(self): vtgate_server, vtgate_port = utils.vtgate_start(cache_ttl='0s') # start the tablets shard_0_master.start_vttablet(cert=cert_dir + "/vt-server-cert.pem", key=cert_dir + "/vt-server-key.pem") shard_0_slave.start_vttablet(cert=cert_dir + "/vt-server-cert.pem", key=cert_dir + "/vt-server-key.pem", repl_extra_flags={ 'flags': "2048", 'ssl-ca': cert_dir + "/ca-cert.pem", 'ssl-cert': cert_dir + "/client-cert.pem", 'ssl-key': cert_dir + "/client-key.pem", }) # Reparent using SSL for t in [shard_0_master, shard_0_slave]: t.reset_replication() utils.run_vtctl('ReparentShard -force test_keyspace/0 ' + shard_0_master.tablet_alias, auto_log=True) # then get the topology and check it topo_client = zkocc.ZkOccConnection("localhost:%u" % vtgate_port, "test_nj", 30.0) topology.read_keyspaces(topo_client) shard_0_master_addrs = topology.get_host_port_by_name(topo_client, "test_keyspace.0.master:_vts") if len(shard_0_master_addrs) != 1: self.fail('topology.get_host_port_by_name failed for "test_keyspace.0.master:_vts", got: %s' % " ".join(["%s:%u(%s)" % (h, p, str(e)) for (h, p, e) in shard_0_master_addrs])) if shard_0_master_addrs[0][2] != True: self.fail('topology.get_host_port_by_name failed for "test_keyspace.0.master:_vts" is not encrypted') logging.debug("shard 0 master addrs: %s", " ".join(["%s:%u(%s)" % (h, p, str(e)) for (h, p, e) in shard_0_master_addrs])) # make sure asking for optionally secure connections works too auto_addrs = topology.get_host_port_by_name(topo_client, "test_keyspace.0.master:_vtocc", encrypted=True) if auto_addrs != shard_0_master_addrs: self.fail('topology.get_host_port_by_name doesn\'t resolve encrypted addresses properly: %s != %s' % (str(shard_0_master_addrs), str(auto_addrs))) # try to connect with regular client try: conn = tablet3.TabletConnection("%s:%u" % (shard_0_master_addrs[0][0], shard_0_master_addrs[0][1]), "", "test_keyspace", "0", 10.0) conn.dial() self.fail("No exception raised to secure port") except dbexceptions.FatalError as e: if not e.args[0][0].startswith('Unexpected EOF in handshake to'): self.fail("Unexpected exception: %s" % str(e)) sconn = utils.get_vars(shard_0_master.port)["SecureConnections"] if sconn != 0: self.fail("unexpected conns %s" % sconn) # connect to encrypted port conn = tablet3.TabletConnection("%s:%u" % (shard_0_master_addrs[0][0], shard_0_master_addrs[0][1]), "", "test_keyspace", "0", 5.0, encrypted=True) conn.dial() (results, rowcount, lastrowid, fields) = conn._execute("select 1 from dual", {}) self.assertEqual(results, [(1,),], 'wrong conn._execute output: %s' % str(results)) sconn = utils.get_vars(shard_0_master.port)["SecureConnections"] if sconn != 1: self.fail("unexpected conns %s" % sconn) saccept = utils.get_vars(shard_0_master.port)["SecureAccepts"] if saccept == 0: self.fail("unexpected accepts %s" % saccept) # trigger a time out on a secure connection, see what exception we get try: conn._execute("select sleep(100) from dual", {}) self.fail("No timeout exception") except dbexceptions.TimeoutError as e: logging.debug("Got the right exception for SSL timeout: %s", str(e)) # start a vtgate to connect to that tablet gate_proc, gate_port, gate_secure_port = utils.vtgate_start( tablet_bson_encrypted=True, cert=cert_dir + "/vt-server-cert.pem", key=cert_dir + "/vt-server-key.pem") # try to connect to vtgate with regular client timeout = 2.0 try: conn = vtgatev2.connect(["localhost:%s" % (gate_secure_port),], timeout) self.fail("No exception raised to VTGate secure port") except dbexceptions.OperationalError as e: exception_type = e.args[2] exception_msg = str(e.args[2][0][0]) self.assertIsInstance(exception_type, dbexceptions.FatalError, "unexpected exception type") if not exception_msg.startswith('Unexpected EOF in handshake to'): self.fail("Unexpected exception message: %s" % exception_msg) sconn = utils.get_vars(gate_port)["SecureConnections"] if sconn != 0: self.fail("unexpected conns %s" % sconn) # connect to vtgate with encrypted port conn = vtgatev2.connect(["localhost:%s" % (gate_secure_port),], timeout, encrypted=True) (results, rowcount, lastrowid, fields) = conn._execute( "select 1 from dual", {}, "test_keyspace", "master", keyranges=[keyrange.KeyRange(keyrange_constants.NON_PARTIAL_KEYRANGE),]) self.assertEqual(rowcount, 1, "want 1, got %d" % (rowcount)) self.assertEqual(len(fields), 1, "want 1, got %d" % (len(fields))) self.assertEqual(results, [(1,),], 'wrong conn._execute output: %s' % str(results)) sconn = utils.get_vars(gate_port)["SecureConnections"] if sconn != 1: self.fail("unexpected conns %s" % sconn) saccept = utils.get_vars(gate_port)["SecureAccepts"] if saccept == 0: self.fail("unexpected accepts %s" % saccept) # trigger a time out on a vtgate secure connection, see what exception we get try: conn._execute("select sleep(4) from dual", {}, "test_keyspace", "master", keyranges=[keyrange.KeyRange(keyrange_constants.NON_PARTIAL_KEYRANGE),]) self.fail("No timeout exception") except dbexceptions.TimeoutError as e: logging.debug("Got the right exception for SSL timeout: %s", str(e)) conn.close() utils.vtgate_kill(gate_proc) # kill everything utils.vtgate_kill(vtgate_server)
def test_reparent_graceful(self): utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) self._test_reparent_graceful('0')
def _test_reparent_from_outside(self, brutal=False): """This test will start a master and 3 slaves. Then: - one slave will be the new master - one slave will be reparented to that new master - one slave will be busted and dead in the water and we'll call TabletExternallyReparented. Args: brutal: kills the old master first """ utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) # create the database so vttablets start, as they are serving for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.create_db('vt_test_keyspace') # Start up a master mysql and vttablet tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True, wait_for_start=False) # Create a few slaves for testing reparenting. tablet_62044.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False) tablet_41983.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False) tablet_31981.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False) # wait for all tablets to start for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.wait_for_vttablet_state('SERVING') # Reparent as a starting point for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.reset_replication() utils.run_vtctl( ['InitShardMaster', 'test_keyspace/0', tablet_62344.tablet_alias], auto_log=True) # now manually reparent 1 out of 2 tablets # 62044 will be the new master # 31981 won't be re-parented, so it will be busted tablet_62044.mquery('', mysql_flavor().promote_slave_commands()) new_pos = mysql_flavor().master_position(tablet_62044) logging.debug('New master position: %s', str(new_pos)) # Use 'localhost' as hostname because Travis CI worker hostnames # are too long for MySQL replication. change_master_cmds = mysql_flavor().change_master_commands( 'localhost', tablet_62044.mysql_port, new_pos) # 62344 will now be a slave of 62044 tablet_62344.mquery('', ['RESET MASTER', 'RESET SLAVE'] + change_master_cmds + ['START SLAVE']) # 41983 will be a slave of 62044 tablet_41983.mquery('', ['STOP SLAVE'] + change_master_cmds + ['START SLAVE']) # in brutal mode, we kill the old master first # and delete its tablet record if brutal: tablet_62344.kill_vttablet() utils.run_vtctl( ['DeleteTablet', '-allow_master', tablet_62344.tablet_alias], auto_log=True) base_time = time.time() # update topology with the new server utils.run_vtctl( ['TabletExternallyReparented', tablet_62044.tablet_alias], mode=utils.VTCTL_VTCTL, auto_log=True) self._test_reparent_from_outside_check(brutal, base_time) # RebuildReplicationGraph will rebuild the topo data from # the tablet records. It is an emergency command only. utils.run_vtctl( ['RebuildReplicationGraph', 'test_nj', 'test_keyspace']) self._test_reparent_from_outside_check(brutal, base_time) if not brutal: tablet_62344.kill_vttablet() tablet.kill_tablets([tablet_31981, tablet_62044, tablet_41983])
def test_reparent_cross_cell(self, shard_id='0'): utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) # create the database so vttablets start, as they are serving tablet_62344.create_db('vt_test_keyspace') tablet_62044.create_db('vt_test_keyspace') tablet_41983.create_db('vt_test_keyspace') tablet_31981.create_db('vt_test_keyspace') # Start up a master mysql and vttablet tablet_62344.init_tablet('master', 'test_keyspace', shard_id, start=True, wait_for_start=False) shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/' + shard_id]) self.assertEqual( shard['cells'], ['test_nj'], 'wrong list of cell in Shard: %s' % str(shard['cells'])) # Create a few slaves for testing reparenting. tablet_62044.init_tablet('replica', 'test_keyspace', shard_id, start=True, wait_for_start=False) tablet_41983.init_tablet('replica', 'test_keyspace', shard_id, start=True, wait_for_start=False) tablet_31981.init_tablet('replica', 'test_keyspace', shard_id, start=True, wait_for_start=False) for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.wait_for_vttablet_state('SERVING') shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/' + shard_id]) self.assertEqual( shard['cells'], ['test_nj', 'test_ny'], 'wrong list of cell in Shard: %s' % str(shard['cells'])) # Recompute the shard layout node - until you do that, it might not be # valid. utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/' + shard_id]) utils.validate_topology() # Force the slaves to reparent assuming that all the datasets are # identical. for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.reset_replication() utils.run_vtctl([ 'InitShardMaster', 'test_keyspace/' + shard_id, tablet_62344.tablet_alias ], auto_log=True) utils.validate_topology(ping_tablets=True) self._check_db_addr(shard_id, 'master', tablet_62344.port) # Verify MasterCell is properly set self._check_master_cell('test_nj', shard_id, 'test_nj') self._check_master_cell('test_ny', shard_id, 'test_nj') # Perform a graceful reparent operation to another cell. utils.pause('test_reparent_cross_cell PlannedReparentShard') utils.run_vtctl([ 'PlannedReparentShard', 'test_keyspace/' + shard_id, tablet_31981.tablet_alias ], auto_log=True) utils.validate_topology() self._check_db_addr(shard_id, 'master', tablet_31981.port, cell='test_ny') # Verify MasterCell is set to new cell. self._check_master_cell('test_nj', shard_id, 'test_ny') self._check_master_cell('test_ny', shard_id, 'test_ny') tablet.kill_tablets( [tablet_62344, tablet_62044, tablet_41983, tablet_31981])
def _test_reparent_graceful(self, shard_id): # create the database so vttablets start, as they are serving tablet_62344.create_db('vt_test_keyspace') tablet_62044.create_db('vt_test_keyspace') tablet_41983.create_db('vt_test_keyspace') tablet_31981.create_db('vt_test_keyspace') # Start up a master mysql and vttablet tablet_62344.init_tablet('master', 'test_keyspace', shard_id, start=True) if environment.topo_server().flavor() == 'zookeeper': shard = utils.run_vtctl_json( ['GetShard', 'test_keyspace/' + shard_id]) self.assertEqual( shard['cells'], ['test_nj'], 'wrong list of cell in Shard: %s' % str(shard['cells'])) # Create a few slaves for testing reparenting. tablet_62044.init_tablet('replica', 'test_keyspace', shard_id, start=True, wait_for_start=False) tablet_41983.init_tablet('replica', 'test_keyspace', shard_id, start=True, wait_for_start=False) tablet_31981.init_tablet('replica', 'test_keyspace', shard_id, start=True, wait_for_start=False) for t in [tablet_62044, tablet_41983, tablet_31981]: t.wait_for_vttablet_state('SERVING') if environment.topo_server().flavor() == 'zookeeper': shard = utils.run_vtctl_json( ['GetShard', 'test_keyspace/' + shard_id]) self.assertEqual( shard['cells'], ['test_nj', 'test_ny'], 'wrong list of cell in Shard: %s' % str(shard['cells'])) # Recompute the shard layout node - until you do that, it might not be # valid. utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/' + shard_id]) utils.validate_topology() # Force the slaves to reparent assuming that all the datasets are # identical. for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.reset_replication() utils.run_vtctl([ 'InitShardMaster', 'test_keyspace/' + shard_id, tablet_62344.tablet_alias ]) utils.validate_topology(ping_tablets=True) tablet_62344.mquery('vt_test_keyspace', self._create_vt_insert_test) self._check_db_addr(shard_id, 'master', tablet_62344.port) # Verify MasterCell is set to new cell. self._check_master_cell('test_nj', shard_id, 'test_nj') self._check_master_cell('test_ny', shard_id, 'test_nj') # Convert two replica to spare. That should leave only one node # serving traffic, but still needs to appear in the replication # graph. utils.run_vtctl( ['ChangeSlaveType', tablet_41983.tablet_alias, 'spare']) utils.run_vtctl( ['ChangeSlaveType', tablet_31981.tablet_alias, 'spare']) utils.validate_topology() self._check_db_addr(shard_id, 'replica', tablet_62044.port) # Run this to make sure it succeeds. utils.run_vtctl( ['ShardReplicationPositions', 'test_keyspace/' + shard_id], stdout=utils.devnull) # Perform a graceful reparent operation. utils.pause('_test_reparent_graceful PlannedReparentShard') utils.run_vtctl([ 'PlannedReparentShard', 'test_keyspace/' + shard_id, tablet_62044.tablet_alias ], auto_log=True) utils.validate_topology() self._check_db_addr(shard_id, 'master', tablet_62044.port) # insert data into the new master, check the connected slaves work self._populate_vt_insert_test(tablet_62044, 1) self._check_vt_insert_test(tablet_41983, 1) self._check_vt_insert_test(tablet_62344, 1) # Verify MasterCell is set to new cell. self._check_master_cell('test_nj', shard_id, 'test_nj') self._check_master_cell('test_ny', shard_id, 'test_nj') tablet.kill_tablets( [tablet_62344, tablet_62044, tablet_41983, tablet_31981]) # Test address correction. new_port = environment.reserve_ports(1) tablet_62044.start_vttablet(port=new_port) # Wait until the new address registers. timeout = 30.0 while True: try: self._check_db_addr(shard_id, 'master', new_port) break except protocols_flavor().client_error_exception_type(): timeout = utils.wait_step('waiting for new port to register', timeout, sleep_time=0.1) tablet_62044.kill_vttablet()
def test_vertical_split(self): utils.run_vtctl(['CopySchemaShard', '--tables', '/moving/,view1', source_rdonly1.tablet_alias, 'destination_keyspace/0'], auto_log=True) utils.run_vtworker(['--cell', 'test_nj', '--command_display_interval', '10ms', '--use_v3_resharding_mode=false', 'VerticalSplitClone', '--tables', '/moving/,view1', '--chunk_count', '10', '--min_rows_per_chunk', '1', '--min_healthy_rdonly_tablets', '1', 'destination_keyspace/0'], auto_log=True) # test Cancel first utils.run_vtctl(['CancelResharding', 'destination_keyspace/0'], auto_log=True) self.check_no_binlog_player(destination_master) # redo VerticalSplitClone utils.run_vtworker(['--cell', 'test_nj', '--command_display_interval', '10ms', '--use_v3_resharding_mode=false', 'VerticalSplitClone', '--tables', '/moving/,view1', '--chunk_count', '10', '--min_rows_per_chunk', '1', '--min_healthy_rdonly_tablets', '1', 'destination_keyspace/0'], auto_log=True) # check values are present self._check_values(destination_master, 'vt_destination_keyspace', 'moving1', self.moving1_first, 100) self._check_values(destination_master, 'vt_destination_keyspace', 'moving2', self.moving2_first, 100) self._check_values(destination_master, 'vt_destination_keyspace', 'view1', self.moving1_first, 100) if base_sharding.use_rbr: self._check_values(destination_master, 'vt_destination_keyspace', 'moving3_no_pk', self.moving3_no_pk_first, 100) # Verify vreplication table entries result = destination_master.mquery('_vt', 'select * from vreplication') self.assertEqual(len(result), 1) self.assertEqual(result[0][1], 'SplitClone') self.assertEqual(result[0][2], 'keyspace:"source_keyspace" shard:"0" tables:"/moving/" tables:"view1" ') # check the binlog player is running and exporting vars self.check_destination_master(destination_master, ['source_keyspace/0']) # check that binlog server exported the stats vars self.check_binlog_server_vars(source_replica, horizontal=False) # add values to source, make sure they're replicated moving1_first_add1 = self._insert_values('moving1', 100) _ = self._insert_values('staying1', 100) moving2_first_add1 = self._insert_values('moving2', 100) self._check_values_timeout(destination_master, 'vt_destination_keyspace', 'moving1', moving1_first_add1, 100) self._check_values_timeout(destination_master, 'vt_destination_keyspace', 'moving2', moving2_first_add1, 100) self.check_binlog_player_vars(destination_master, ['source_keyspace/0'], seconds_behind_master_max=30) self.check_binlog_server_vars(source_replica, horizontal=False, min_statements=100, min_transactions=100) # use vtworker to compare the data logging.debug('Running vtworker VerticalSplitDiff') utils.run_vtworker(['-cell', 'test_nj', '--use_v3_resharding_mode=false', 'VerticalSplitDiff', '--min_healthy_rdonly_tablets', '1', 'destination_keyspace/0'], auto_log=True) utils.pause('Good time to test vtworker for diffs') # get status for destination master tablet, make sure we have it all self.check_running_binlog_player(destination_master, 700, 300, extra_text='moving') # check query service is off on destination master, as filtered # replication is enabled. Even health check should not interfere. destination_master_vars = utils.get_vars(destination_master.port) self.assertEqual(destination_master_vars['TabletStateName'], 'NOT_SERVING') # check we can't migrate the master just yet utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'master'], expect_fail=True) # migrate rdonly only in test_ny cell, make sure nothing is migrated # in test_nj utils.run_vtctl(['MigrateServedFrom', '--cells=test_ny', 'destination_keyspace/0', 'rdonly'], auto_log=True) self._check_srv_keyspace('ServedFrom(master): source_keyspace\n' 'ServedFrom(rdonly): source_keyspace\n' 'ServedFrom(replica): source_keyspace\n') self._check_blacklisted_tables(source_master, None) self._check_blacklisted_tables(source_replica, None) self._check_blacklisted_tables(source_rdonly1, None) self._check_blacklisted_tables(source_rdonly2, None) # migrate test_nj only, using command line manual fix command, # and restore it back. keyspace_json = utils.run_vtctl_json( ['GetKeyspace', 'destination_keyspace']) found = False for ksf in keyspace_json['served_froms']: if ksf['tablet_type'] == topodata_pb2.RDONLY: found = True self.assertEqual(ksf['cells'], ['test_nj']) self.assertTrue(found) utils.run_vtctl(['SetKeyspaceServedFrom', '-source=source_keyspace', '-remove', '-cells=test_nj', 'destination_keyspace', 'rdonly'], auto_log=True) keyspace_json = utils.run_vtctl_json( ['GetKeyspace', 'destination_keyspace']) found = False for ksf in keyspace_json['served_froms']: if ksf['tablet_type'] == topodata_pb2.RDONLY: found = True self.assertFalse(found) utils.run_vtctl(['SetKeyspaceServedFrom', '-source=source_keyspace', 'destination_keyspace', 'rdonly'], auto_log=True) keyspace_json = utils.run_vtctl_json( ['GetKeyspace', 'destination_keyspace']) found = False for ksf in keyspace_json['served_froms']: if ksf['tablet_type'] == topodata_pb2.RDONLY: found = True self.assertTrue('cells' not in ksf or not ksf['cells']) self.assertTrue(found) # now serve rdonly from the destination shards utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'rdonly'], auto_log=True) self._check_srv_keyspace('ServedFrom(master): source_keyspace\n' 'ServedFrom(replica): source_keyspace\n') self._check_blacklisted_tables(source_master, None) self._check_blacklisted_tables(source_replica, None) self._check_blacklisted_tables(source_rdonly1, ['/moving/', 'view1']) self._check_blacklisted_tables(source_rdonly2, ['/moving/', 'view1']) self._check_client_conn_redirection( 'destination_keyspace', ['master', 'replica'], ['moving1', 'moving2']) # then serve replica from the destination shards utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'replica'], auto_log=True) self._check_srv_keyspace('ServedFrom(master): source_keyspace\n') self._check_blacklisted_tables(source_master, None) self._check_blacklisted_tables(source_replica, ['/moving/', 'view1']) self._check_blacklisted_tables(source_rdonly1, ['/moving/', 'view1']) self._check_blacklisted_tables(source_rdonly2, ['/moving/', 'view1']) self._check_client_conn_redirection( 'destination_keyspace', ['master'], ['moving1', 'moving2']) # move replica back and forth utils.run_vtctl(['MigrateServedFrom', '-reverse', 'destination_keyspace/0', 'replica'], auto_log=True) self._check_srv_keyspace('ServedFrom(master): source_keyspace\n' 'ServedFrom(replica): source_keyspace\n') self._check_blacklisted_tables(source_master, None) self._check_blacklisted_tables(source_replica, None) self._check_blacklisted_tables(source_rdonly1, ['/moving/', 'view1']) self._check_blacklisted_tables(source_rdonly2, ['/moving/', 'view1']) utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'replica'], auto_log=True) self._check_srv_keyspace('ServedFrom(master): source_keyspace\n') self._check_blacklisted_tables(source_master, None) self._check_blacklisted_tables(source_replica, ['/moving/', 'view1']) self._check_blacklisted_tables(source_rdonly1, ['/moving/', 'view1']) self._check_blacklisted_tables(source_rdonly2, ['/moving/', 'view1']) self._check_client_conn_redirection( 'destination_keyspace', ['master'], ['moving1', 'moving2']) # Cancel should fail now utils.run_vtctl(['CancelResharding', 'destination_keyspace/0'], auto_log=True, expect_fail=True) # then serve master from the destination shards utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'master'], auto_log=True) self._check_srv_keyspace('') self._check_blacklisted_tables(source_master, ['/moving/', 'view1']) self._check_blacklisted_tables(source_replica, ['/moving/', 'view1']) self._check_blacklisted_tables(source_rdonly1, ['/moving/', 'view1']) self._check_blacklisted_tables(source_rdonly2, ['/moving/', 'view1']) # check the binlog player is gone now self.check_no_binlog_player(destination_master) # check the stats are correct self._check_stats() # now remove the tables on the source shard. The blacklisted tables # in the source shard won't match any table, make sure that works. utils.run_vtctl(['ApplySchema', '-sql=drop view view1', 'source_keyspace'], auto_log=True) for t in ['moving1', 'moving2']: utils.run_vtctl(['ApplySchema', '-sql=drop table %s' % (t), 'source_keyspace'], auto_log=True) for t in [source_master, source_replica, source_rdonly1, source_rdonly2]: utils.run_vtctl(['ReloadSchema', t.tablet_alias]) qr = source_master.execute('select count(1) from staying1') self.assertEqual(len(qr['rows']), 1, 'cannot read staying1: got %s' % str(qr)) # test SetShardTabletControl self._verify_vtctl_set_shard_tablet_control()
def test_reparent_graceful_range_based(self): utils.run_vtctl([ 'CreateKeyspace', '--sharding_column_name', 'keyspace_id', '--sharding_column_type', 'uint64', 'test_keyspace' ]) self._test_reparent_graceful('0000000000000000-ffffffffffffffff')
def _test_vtctl_snapshot_restore(self, server_mode): if server_mode: snapshot_flags = ['-server-mode', '-concurrency=8'] restore_flags = ['-dont-wait-for-slave-start'] else: snapshot_flags = ['-concurrency=4'] restore_flags = [] # Start up a master mysql and vttablet utils.run_vtctl(['CreateKeyspace', 'snapshot_test']) tablet_62344.init_tablet('master', 'snapshot_test', '0') utils.run_vtctl(['RebuildShardGraph', 'snapshot_test/0']) utils.validate_topology() tablet_62344.populate('vt_snapshot_test', self._create_vt_insert_test, self._populate_vt_insert_test) tablet_62044.create_db('vt_snapshot_test') tablet_62344.start_vttablet() # Need to force snapshot since this is a master db. out, err = utils.run_vtctl(['Snapshot', '-force'] + snapshot_flags + [tablet_62344.tablet_alias], trap_output=True) results = {} for name in [ 'Manifest', 'ParentAlias', 'SlaveStartRequired', 'ReadOnly', 'OriginalType' ]: sepPos = err.find(name + ": ") if sepPos != -1: results[name] = err[sepPos + len(name) + 2:].splitlines()[0] if "Manifest" not in results: self.fail("Snapshot didn't echo Manifest file: %s" % str(err)) if "ParentAlias" not in results: self.fail("Snapshot didn't echo ParentAlias: %s" % str(err)) utils.pause("snapshot finished: " + results['Manifest'] + " " + results['ParentAlias']) if server_mode: if "SlaveStartRequired" not in results: self.fail("Snapshot didn't echo SlaveStartRequired: %s" % err) if "ReadOnly" not in results: self.fail("Snapshot didn't echo ReadOnly %s" % err) if "OriginalType" not in results: self.fail("Snapshot didn't echo OriginalType: %s" % err) if (results['SlaveStartRequired'] != 'false' or results['ReadOnly'] != 'true' or results['OriginalType'] != 'master'): self.fail("Bad values returned by Snapshot: %s" % err) tablet_62044.init_tablet('idle', start=True) # do not specify a MANIFEST, see if 'default' works call(["touch", "/tmp/vtSimulateFetchFailures"]) utils.run_vtctl( ['Restore', '-fetch-concurrency=2', '-fetch-retry-count=4'] + restore_flags + [ tablet_62344.tablet_alias, 'default', tablet_62044.tablet_alias, results['ParentAlias'] ], auto_log=True) utils.pause("restore finished") tablet_62044.assert_table_count('vt_snapshot_test', 'vt_insert_test', 4) utils.validate_topology() # in server_mode, get the server out of it and check it if server_mode: utils.run_vtctl([ 'SnapshotSourceEnd', tablet_62344.tablet_alias, results['OriginalType'] ], auto_log=True) tablet_62344.assert_table_count('vt_snapshot_test', 'vt_insert_test', 4) utils.validate_topology() tablet.kill_tablets([tablet_62344, tablet_62044])
def test_reparent_down_master(self): utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) # create the database so vttablets start, as they are serving tablet_62344.create_db('vt_test_keyspace') tablet_62044.create_db('vt_test_keyspace') tablet_41983.create_db('vt_test_keyspace') tablet_31981.create_db('vt_test_keyspace') # Start up a master mysql and vttablet tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True, wait_for_start=False) # Create a few slaves for testing reparenting. tablet_62044.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False) tablet_41983.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False) tablet_31981.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False) # wait for all tablets to start for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.wait_for_vttablet_state('SERVING') # Recompute the shard layout node - until you do that, it might not be # valid. utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/0']) utils.validate_topology() # Force the slaves to reparent assuming that all the datasets are # identical. for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.reset_replication() utils.run_vtctl( ['InitShardMaster', 'test_keyspace/0', tablet_62344.tablet_alias], auto_log=True) utils.validate_topology() tablet_62344.mquery('vt_test_keyspace', self._create_vt_insert_test) # Make the current master agent and database unavailable. tablet_62344.kill_vttablet() tablet_62344.shutdown_mysql().wait() self._check_db_addr('0', 'master', tablet_62344.port) # Perform a planned reparent operation, will try to contact # the current master and fail somewhat quickly _, stderr = utils.run_vtctl([ '-wait-time', '5s', 'PlannedReparentShard', 'test_keyspace/0', tablet_62044.tablet_alias ], expect_fail=True) self.assertIn('DemoteMaster failed', stderr) # Run forced reparent operation, this should now proceed unimpeded. utils.run_vtctl([ 'EmergencyReparentShard', 'test_keyspace/0', tablet_62044.tablet_alias ], auto_log=True) utils.validate_topology() self._check_db_addr('0', 'master', tablet_62044.port) # insert data into the new master, check the connected slaves work self._populate_vt_insert_test(tablet_62044, 2) self._check_vt_insert_test(tablet_41983, 2) self._check_vt_insert_test(tablet_31981, 2) tablet.kill_tablets([tablet_62044, tablet_41983, tablet_31981]) # so the other tests don't have any surprise tablet_62344.start_mysql().wait()
def verify_successful_worker_copy_with_reparent(self, mysql_down=False): """Verifies that vtworker can successfully copy data for a SplitClone. Order of operations: 1. Run a background vtworker 2. Wait until the worker successfully resolves the destination masters. 3. Reparent the destination tablets 4. Wait until the vtworker copy is finished 5. Verify that the worker was forced to reresolve topology and retry writes due to the reparent. 6. Verify that the data was copied successfully to both new shards Args: mysql_down: boolean. If True, we take down the MySQL instances on the destination masters at first, then bring them back and reparent away. Raises: AssertionError if things didn't go as expected. """ if mysql_down: logging.debug('Shutting down mysqld on destination masters.') utils.wait_procs([ shard_0_master.shutdown_mysql(), shard_1_master.shutdown_mysql() ]) worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg( ['--cell', 'test_nj'], auto_log=True) # --max_tps is only specified to enable the throttler and ensure that the # code is executed. But the intent here is not to throttle the test, hence # the rate limit is set very high. workerclient_proc = utils.run_vtworker_client_bg([ 'SplitClone', '--source_reader_count', '1', '--destination_writer_count', '1', '--write_query_max_rows', '1', '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999', 'test_keyspace/0' ], worker_rpc_port) if mysql_down: # If MySQL is down, we wait until vtworker retried at least once to make # sure it reached the point where a write failed due to MySQL being down. # There should be two retries at least, one for each destination shard. utils.poll_for_vars( 'vtworker', worker_port, 'WorkerRetryCount >= 2', condition_fn=lambda v: v.get('WorkerRetryCount') >= 2) logging.debug( 'Worker has retried at least twice, starting reparent now') # vtworker is blocked at this point. This is a good time to test that its # throttler server is reacting to RPCs. self.check_binlog_throttler( 'localhost:%d' % worker_rpc_port, ['test_keyspace/-80', 'test_keyspace/80-'], 9999) # Bring back masters. Since we test with semi-sync now, we need at least # one replica for the new master. This test is already quite expensive, # so we bring back the old master as a replica rather than having a third # replica up the whole time. logging.debug('Restarting mysqld on destination masters') utils.wait_procs( [shard_0_master.start_mysql(), shard_1_master.start_mysql()]) # Reparent away from the old masters. utils.run_vtctl([ 'PlannedReparentShard', 'test_keyspace/-80', shard_0_replica.tablet_alias ], auto_log=True) utils.run_vtctl([ 'PlannedReparentShard', 'test_keyspace/80-', shard_1_replica.tablet_alias ], auto_log=True) else: # NOTE: There is a race condition around this: # It's possible that the SplitClone vtworker command finishes before the # PlannedReparentShard vtctl command, which we start below, succeeds. # Then the test would fail because vtworker did not have to retry. # # To workaround this, the test takes a parameter to increase the number of # rows that the worker has to copy (with the idea being to slow the worker # down). # You should choose a value for num_insert_rows, such that this test # passes for your environment (trial-and-error...) # Make sure that vtworker got past the point where it picked a master # for each destination shard ("finding targets" state). utils.poll_for_vars( 'vtworker', worker_port, 'WorkerState == cloning the data (online)', condition_fn=lambda v: v.get('WorkerState') == 'cloning the' ' data (online)') logging.debug('Worker is in copy state, starting reparent now') utils.run_vtctl([ 'PlannedReparentShard', 'test_keyspace/-80', shard_0_replica.tablet_alias ], auto_log=True) utils.run_vtctl([ 'PlannedReparentShard', 'test_keyspace/80-', shard_1_replica.tablet_alias ], auto_log=True) utils.wait_procs([workerclient_proc]) # Verify that we were forced to re-resolve and retry. worker_vars = utils.get_vars(worker_port) # There should be two retries at least, one for each destination shard. self.assertGreater(worker_vars['WorkerRetryCount'], 1) self.assertNotEqual(worker_vars['WorkerRetryCount'], {}, "expected vtworker to retry, but it didn't") utils.kill_sub_process(worker_proc, soft=True) # Make sure that everything is caught up to the same replication point self.run_split_diff('test_keyspace/-80', all_shard_tablets, shard_0_tablets) self.run_split_diff('test_keyspace/80-', all_shard_tablets, shard_1_tablets) self.assert_shard_data_equal(0, shard_master, shard_0_tablets.replica) self.assert_shard_data_equal(1, shard_master, shard_1_tablets.replica)
def _init_keyspaces_and_tablets(self): utils.run_vtctl(['CreateKeyspace', 'source_keyspace']) utils.run_vtctl( ['CreateKeyspace', '--served_from', 'master:source_keyspace,replica:source_keyspace,rdonly:' 'source_keyspace', 'destination_keyspace']) source_master.init_tablet( 'replica', keyspace='source_keyspace', shard='0', tablet_index=0) source_replica.init_tablet( 'replica', keyspace='source_keyspace', shard='0', tablet_index=1) source_rdonly1.init_tablet( 'rdonly', keyspace='source_keyspace', shard='0', tablet_index=2) source_rdonly2.init_tablet( 'rdonly', keyspace='source_keyspace', shard='0', tablet_index=3) destination_master.init_tablet( 'replica', keyspace='destination_keyspace', shard='0', tablet_index=0) destination_replica.init_tablet( 'replica', keyspace='destination_keyspace', shard='0', tablet_index=1) destination_rdonly1.init_tablet( 'rdonly', keyspace='destination_keyspace', shard='0', tablet_index=2) destination_rdonly2.init_tablet( 'rdonly', keyspace='destination_keyspace', shard='0', tablet_index=3) utils.run_vtctl( ['RebuildKeyspaceGraph', 'source_keyspace'], auto_log=True) utils.run_vtctl( ['RebuildKeyspaceGraph', 'destination_keyspace'], auto_log=True) self._create_source_schema() for t in [source_master, source_replica, destination_master, destination_replica]: t.start_vttablet(wait_for_state=None) for t in [source_rdonly1, source_rdonly2, destination_rdonly1, destination_rdonly2]: t.start_vttablet(wait_for_state=None) # wait for the tablets master_tablets = [source_master, destination_master] replica_tablets = [ source_replica, source_rdonly1, source_rdonly2, destination_replica, destination_rdonly1, destination_rdonly2] for t in master_tablets + replica_tablets: t.wait_for_vttablet_state('NOT_SERVING') # check SrvKeyspace self._check_srv_keyspace('ServedFrom(master): source_keyspace\n' 'ServedFrom(rdonly): source_keyspace\n' 'ServedFrom(replica): source_keyspace\n') # reparent to make the tablets work (we use health check, fix their types) utils.run_vtctl(['InitShardMaster', '-force', 'source_keyspace/0', source_master.tablet_alias], auto_log=True) source_master.tablet_type = 'master' utils.run_vtctl(['InitShardMaster', '-force', 'destination_keyspace/0', destination_master.tablet_alias], auto_log=True) destination_master.tablet_type = 'master' for t in [source_replica, destination_replica]: utils.wait_for_tablet_type(t.tablet_alias, 'replica') for t in [source_rdonly1, source_rdonly2, destination_rdonly1, destination_rdonly2]: utils.wait_for_tablet_type(t.tablet_alias, 'rdonly') for t in master_tablets + replica_tablets: t.wait_for_vttablet_state('SERVING')
def run_shard_tablets(self, shard_name, shard_tablets, create_table=True): """Handles all the necessary work for initially running a shard's tablets. This encompasses the following steps: 1. (optional) Create db 2. Starting vttablets and let themselves init them 3. Waiting for the appropriate vttablet state 4. Force reparent to the master tablet 5. RebuildKeyspaceGraph 7. (optional) Running initial schema setup Args: shard_name: the name of the shard to start tablets in shard_tablets: an instance of ShardTablets for the given shard create_table: boolean, True iff we should create a table on the tablets """ # Start tablets. # # NOTE: The future master has to be started with type 'replica'. shard_tablets.master.start_vttablet(wait_for_state=None, init_tablet_type='replica', init_keyspace='test_keyspace', init_shard=shard_name) for t in shard_tablets.replicas: t.start_vttablet(wait_for_state=None, init_tablet_type='replica', init_keyspace='test_keyspace', init_shard=shard_name) for t in shard_tablets.rdonlys: t.start_vttablet(wait_for_state=None, init_tablet_type='rdonly', init_keyspace='test_keyspace', init_shard=shard_name) # Block until tablets are up and we can enable replication. # All tables should be NOT_SERVING until we run InitShardMaster. for t in shard_tablets.all_tablets: t.wait_for_vttablet_state('NOT_SERVING') # Reparent to choose an initial master and enable replication. utils.run_vtctl([ 'InitShardMaster', '-force', 'test_keyspace/%s' % shard_name, shard_tablets.master.tablet_alias ], auto_log=True) utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) # Enforce a health check instead of waiting for the next periodic one. # (saves up to 1 second execution time on average) for t in shard_tablets.replicas: utils.run_vtctl(['RunHealthCheck', t.tablet_alias]) for t in shard_tablets.rdonlys: utils.run_vtctl(['RunHealthCheck', t.tablet_alias]) # Wait for tablet state to change after starting all tablets. This allows # us to start all tablets at once, instead of sequentially waiting. # NOTE: Replication has to be enabled first or the health check will # set a a replica or rdonly tablet back to NOT_SERVING. for t in shard_tablets.all_tablets: t.wait_for_vttablet_state('SERVING') create_table_sql = ('create table worker_test(' 'id bigint unsigned,' 'msg varchar(64),' 'keyspace_id bigint(20) unsigned not null,' 'primary key (id),' 'index by_msg (msg)' ') Engine=InnoDB') if create_table: utils.run_vtctl( ['ApplySchema', '-sql=' + create_table_sql, 'test_keyspace'], auto_log=True)
def init_keyspace(): """Creates a `test_keyspace` keyspace with a sharding key.""" utils.run_vtctl([ 'CreateKeyspace', '-sharding_column_name', 'keyspace_id', '-sharding_column_type', KEYSPACE_ID_TYPE, 'test_keyspace' ])